Index: head/sys/dev/cxgbe/adapter.h =================================================================== --- head/sys/dev/cxgbe/adapter.h (revision 333393) +++ head/sys/dev/cxgbe/adapter.h (revision 333394) @@ -1,1299 +1,1302 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef __T4_ADAPTER_H__ #define __T4_ADAPTER_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "offload.h" #include "t4_ioctl.h" #include "common/t4_msg.h" #include "firmware/t4fw_interface.h" #define KTR_CXGBE KTR_SPARE3 MALLOC_DECLARE(M_CXGBE); #define CXGBE_UNIMPLEMENTED(s) \ panic("%s (%s, line %d) not implemented yet.", s, __FILE__, __LINE__) #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } #else #define prefetch(x) __builtin_prefetch(x) #endif #ifndef SYSCTL_ADD_UQUAD #define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD #define sysctl_handle_64 sysctl_handle_quad #define CTLTYPE_U64 CTLTYPE_QUAD #endif #if (__FreeBSD_version >= 900030) || \ ((__FreeBSD_version >= 802507) && (__FreeBSD_version < 900000)) #define SBUF_DRAIN 1 #endif struct adapter; typedef struct adapter adapter_t; enum { /* * All ingress queues use this entry size. Note that the firmware event * queue and any iq expecting CPL_RX_PKT in the descriptor needs this to * be at least 64. */ IQ_ESIZE = 64, /* Default queue sizes for all kinds of ingress queues */ FW_IQ_QSIZE = 256, RX_IQ_QSIZE = 1024, /* All egress queues use this entry size */ EQ_ESIZE = 64, /* Default queue sizes for all kinds of egress queues */ CTRL_EQ_QSIZE = 128, TX_EQ_QSIZE = 1024, #if MJUMPAGESIZE != MCLBYTES SW_ZONE_SIZES = 4, /* cluster, jumbop, jumbo9k, jumbo16k */ #else SW_ZONE_SIZES = 3, /* cluster, jumbo9k, jumbo16k */ #endif CL_METADATA_SIZE = CACHE_LINE_SIZE, SGE_MAX_WR_NDESC = SGE_MAX_WR_LEN / EQ_ESIZE, /* max WR size in desc */ TX_SGL_SEGS = 39, TX_SGL_SEGS_TSO = 38, TX_WR_FLITS = SGE_MAX_WR_LEN / 8 }; enum { /* adapter intr_type */ INTR_INTX = (1 << 0), INTR_MSI = (1 << 1), INTR_MSIX = (1 << 2) }; enum { XGMAC_MTU = (1 << 0), XGMAC_PROMISC = (1 << 1), XGMAC_ALLMULTI = (1 << 2), XGMAC_VLANEX = (1 << 3), XGMAC_UCADDR = (1 << 4), XGMAC_MCADDRS = (1 << 5), XGMAC_ALL = 0xffff }; enum { /* flags understood by begin_synchronized_op */ HOLD_LOCK = (1 << 0), SLEEP_OK = (1 << 1), INTR_OK = (1 << 2), /* flags understood by end_synchronized_op */ LOCK_HELD = HOLD_LOCK, }; enum { /* adapter flags */ FULL_INIT_DONE = (1 << 0), FW_OK = (1 << 1), CHK_MBOX_ACCESS = (1 << 2), MASTER_PF = (1 << 3), ADAP_SYSCTL_CTX = (1 << 4), /* TOM_INIT_DONE= (1 << 5), No longer used */ BUF_PACKING_OK = (1 << 6), IS_VF = (1 << 7), CXGBE_BUSY = (1 << 9), /* port flags */ HAS_TRACEQ = (1 << 3), /* VI flags */ DOOMED = (1 << 0), VI_INIT_DONE = (1 << 1), VI_SYSCTL_CTX = (1 << 2), /* adapter debug_flags */ DF_DUMP_MBOX = (1 << 0), /* Log all mbox cmd/rpl. */ DF_LOAD_FW_ANYTIME = (1 << 1), /* Allow LOAD_FW after init */ DF_DISABLE_TCB_CACHE = (1 << 2), /* Disable TCB cache (T6+) */ }; #define IS_DOOMED(vi) ((vi)->flags & DOOMED) #define SET_DOOMED(vi) do {(vi)->flags |= DOOMED;} while (0) #define IS_BUSY(sc) ((sc)->flags & CXGBE_BUSY) #define SET_BUSY(sc) do {(sc)->flags |= CXGBE_BUSY;} while (0) #define CLR_BUSY(sc) do {(sc)->flags &= ~CXGBE_BUSY;} while (0) struct vi_info { device_t dev; struct port_info *pi; struct ifnet *ifp; unsigned long flags; int if_flags; uint16_t *rss, *nm_rss; int smt_idx; /* for convenience */ uint16_t viid; int16_t xact_addr_filt;/* index of exact MAC address filter */ uint16_t rss_size; /* size of VI's RSS table slice */ uint16_t rss_base; /* start of VI's RSS table slice */ eventhandler_tag vlan_c; int nintr; int first_intr; /* These need to be int as they are used in sysctl */ int ntxq; /* # of tx queues */ int first_txq; /* index of first tx queue */ int rsrv_noflowq; /* Reserve queue 0 for non-flowid packets */ int nrxq; /* # of rx queues */ int first_rxq; /* index of first rx queue */ int nofldtxq; /* # of offload tx queues */ int first_ofld_txq; /* index of first offload tx queue */ int nofldrxq; /* # of offload rx queues */ int first_ofld_rxq; /* index of first offload rx queue */ int nnmtxq; int first_nm_txq; int nnmrxq; int first_nm_rxq; int tmr_idx; int ofld_tmr_idx; int pktc_idx; int ofld_pktc_idx; int qsize_rxq; int qsize_txq; struct timeval last_refreshed; struct fw_vi_stats_vf stats; struct callout tick; struct sysctl_ctx_list ctx; /* from ifconfig up to driver detach */ uint8_t hw_addr[ETHER_ADDR_LEN]; /* factory MAC address, won't change */ }; struct tx_ch_rl_params { enum fw_sched_params_rate ratemode; /* %port (REL) or kbps (ABS) */ uint32_t maxrate; }; enum { TX_CLRL_REFRESH = (1 << 0), /* Need to update hardware state. */ TX_CLRL_ERROR = (1 << 1), /* Error, hardware state unknown. */ }; struct tx_cl_rl_params { int refcount; u_int flags; enum fw_sched_params_rate ratemode; /* %port REL or ABS value */ enum fw_sched_params_unit rateunit; /* kbps or pps (when ABS) */ enum fw_sched_params_mode mode; /* aggr or per-flow */ uint32_t maxrate; uint16_t pktsize; }; /* Tx scheduler parameters for a channel/port */ struct tx_sched_params { /* Channel Rate Limiter */ struct tx_ch_rl_params ch_rl; /* Class WRR */ /* XXX */ /* Class Rate Limiter */ struct tx_cl_rl_params cl_rl[]; }; struct port_info { device_t dev; struct adapter *adapter; struct vi_info *vi; int nvi; int up_vis; int uld_vis; struct tx_sched_params *sched_params; struct mtx pi_lock; char lockname[16]; unsigned long flags; uint8_t lport; /* associated offload logical port */ int8_t mdio_addr; uint8_t port_type; uint8_t mod_type; uint8_t port_id; uint8_t tx_chan; uint8_t mps_bg_map; /* rx MPS buffer group bitmap */ uint8_t rx_e_chan_map; /* rx TP e-channel bitmap */ struct link_config link_cfg; struct link_config old_link_cfg; struct ifmedia media; struct timeval last_refreshed; struct port_stats stats; u_int tnl_cong_drops; u_int tx_parse_error; u_long tx_tls_records; u_long tx_tls_octets; u_long rx_tls_records; u_long rx_tls_octets; struct callout tick; }; #define IS_MAIN_VI(vi) ((vi) == &((vi)->pi->vi[0])) /* Where the cluster came from, how it has been carved up. */ struct cluster_layout { int8_t zidx; int8_t hwidx; uint16_t region1; /* mbufs laid out within this region */ /* region2 is the DMA region */ uint16_t region3; /* cluster_metadata within this region */ }; struct cluster_metadata { u_int refcount; struct fl_sdesc *sd; /* For debug only. Could easily be stale */ }; struct fl_sdesc { caddr_t cl; uint16_t nmbuf; /* # of driver originated mbufs with ref on cluster */ struct cluster_layout cll; }; struct tx_desc { __be64 flit[8]; }; struct tx_sdesc { struct mbuf *m; /* m_nextpkt linked chain of frames */ uint8_t desc_used; /* # of hardware descriptors used by the WR */ }; #define IQ_PAD (IQ_ESIZE - sizeof(struct rsp_ctrl) - sizeof(struct rss_header)) struct iq_desc { struct rss_header rss; uint8_t cpl[IQ_PAD]; struct rsp_ctrl rsp; }; #undef IQ_PAD CTASSERT(sizeof(struct iq_desc) == IQ_ESIZE); enum { /* iq flags */ IQ_ALLOCATED = (1 << 0), /* firmware resources allocated */ IQ_HAS_FL = (1 << 1), /* iq associated with a freelist */ /* 1 << 2 Used to be IQ_INTR */ IQ_LRO_ENABLED = (1 << 3), /* iq is an eth rxq with LRO enabled */ IQ_ADJ_CREDIT = (1 << 4), /* hw is off by 1 credit for this iq */ /* iq state */ IQS_DISABLED = 0, IQS_BUSY = 1, IQS_IDLE = 2, /* netmap related flags */ NM_OFF = 0, NM_ON = 1, NM_BUSY = 2, }; enum { CPL_COOKIE_RESERVED = 0, CPL_COOKIE_FILTER, CPL_COOKIE_DDP0, CPL_COOKIE_DDP1, CPL_COOKIE_TOM, - CPL_COOKIE_AVAILABLE1, + CPL_COOKIE_HASHFILTER, CPL_COOKIE_AVAILABLE2, CPL_COOKIE_AVAILABLE3, NUM_CPL_COOKIES = 8 /* Limited by M_COOKIE. Do not increase. */ }; struct sge_iq; struct rss_header; typedef int (*cpl_handler_t)(struct sge_iq *, const struct rss_header *, struct mbuf *); typedef int (*an_handler_t)(struct sge_iq *, const struct rsp_ctrl *); typedef int (*fw_msg_handler_t)(struct adapter *, const __be64 *); /* * Ingress Queue: T4 is producer, driver is consumer. */ struct sge_iq { uint32_t flags; volatile int state; struct adapter *adapter; struct iq_desc *desc; /* KVA of descriptor ring */ int8_t intr_pktc_idx; /* packet count threshold index */ uint8_t gen; /* generation bit */ uint8_t intr_params; /* interrupt holdoff parameters */ uint8_t intr_next; /* XXX: holdoff for next interrupt */ uint16_t qsize; /* size (# of entries) of the queue */ uint16_t sidx; /* index of the entry with the status page */ uint16_t cidx; /* consumer index */ uint16_t cntxt_id; /* SGE context id for the iq */ uint16_t abs_id; /* absolute SGE id for the iq */ STAILQ_ENTRY(sge_iq) link; bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_addr_t ba; /* bus address of descriptor ring */ }; enum { EQ_CTRL = 1, EQ_ETH = 2, EQ_OFLD = 3, /* eq flags */ EQ_TYPEMASK = 0x3, /* 2 lsbits hold the type (see above) */ EQ_ALLOCATED = (1 << 2), /* firmware resources allocated */ EQ_ENABLED = (1 << 3), /* open for business */ EQ_QFLUSH = (1 << 4), /* if_qflush in progress */ }; /* Listed in order of preference. Update t4_sysctls too if you change these */ enum {DOORBELL_UDB, DOORBELL_WCWR, DOORBELL_UDBWC, DOORBELL_KDB}; /* * Egress Queue: driver is producer, T4 is consumer. * * Note: A free list is an egress queue (driver produces the buffers and T4 * consumes them) but it's special enough to have its own struct (see sge_fl). */ struct sge_eq { unsigned int flags; /* MUST be first */ unsigned int cntxt_id; /* SGE context id for the eq */ unsigned int abs_id; /* absolute SGE id for the eq */ struct mtx eq_lock; struct tx_desc *desc; /* KVA of descriptor ring */ uint8_t doorbells; volatile uint32_t *udb; /* KVA of doorbell (lies within BAR2) */ u_int udb_qid; /* relative qid within the doorbell page */ uint16_t sidx; /* index of the entry with the status page */ uint16_t cidx; /* consumer idx (desc idx) */ uint16_t pidx; /* producer idx (desc idx) */ uint16_t equeqidx; /* EQUEQ last requested at this pidx */ uint16_t dbidx; /* pidx of the most recent doorbell */ uint16_t iqid; /* iq that gets egr_update for the eq */ uint8_t tx_chan; /* tx channel used by the eq */ volatile u_int equiq; /* EQUIQ outstanding */ bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_addr_t ba; /* bus address of descriptor ring */ char lockname[16]; }; struct sw_zone_info { uma_zone_t zone; /* zone that this cluster comes from */ int size; /* size of cluster: 2K, 4K, 9K, 16K, etc. */ int type; /* EXT_xxx type of the cluster */ int8_t head_hwidx; int8_t tail_hwidx; }; struct hw_buf_info { int8_t zidx; /* backpointer to zone; -ve means unused */ int8_t next; /* next hwidx for this zone; -1 means no more */ int size; }; enum { NUM_MEMWIN = 3, MEMWIN0_APERTURE = 2048, MEMWIN0_BASE = 0x1b800, MEMWIN1_APERTURE = 32768, MEMWIN1_BASE = 0x28000, MEMWIN2_APERTURE_T4 = 65536, MEMWIN2_BASE_T4 = 0x30000, MEMWIN2_APERTURE_T5 = 128 * 1024, MEMWIN2_BASE_T5 = 0x60000, }; struct memwin { struct rwlock mw_lock __aligned(CACHE_LINE_SIZE); uint32_t mw_base; /* constant after setup_memwin */ uint32_t mw_aperture; /* ditto */ uint32_t mw_curpos; /* protected by mw_lock */ }; enum { FL_STARVING = (1 << 0), /* on the adapter's list of starving fl's */ FL_DOOMED = (1 << 1), /* about to be destroyed */ FL_BUF_PACKING = (1 << 2), /* buffer packing enabled */ FL_BUF_RESUME = (1 << 3), /* resume from the middle of the frame */ }; #define FL_RUNNING_LOW(fl) \ (IDXDIFF(fl->dbidx * 8, fl->cidx, fl->sidx * 8) <= fl->lowat) #define FL_NOT_RUNNING_LOW(fl) \ (IDXDIFF(fl->dbidx * 8, fl->cidx, fl->sidx * 8) >= 2 * fl->lowat) struct sge_fl { struct mtx fl_lock; __be64 *desc; /* KVA of descriptor ring, ptr to addresses */ struct fl_sdesc *sdesc; /* KVA of software descriptor ring */ struct cluster_layout cll_def; /* default refill zone, layout */ uint16_t lowat; /* # of buffers <= this means fl needs help */ int flags; uint16_t buf_boundary; /* The 16b idx all deal with hw descriptors */ uint16_t dbidx; /* hw pidx after last doorbell */ uint16_t sidx; /* index of status page */ volatile uint16_t hw_cidx; /* The 32b idx are all buffer idx, not hardware descriptor idx */ uint32_t cidx; /* consumer index */ uint32_t pidx; /* producer index */ uint32_t dbval; u_int rx_offset; /* offset in fl buf (when buffer packing) */ volatile uint32_t *udb; uint64_t mbuf_allocated;/* # of mbuf allocated from zone_mbuf */ uint64_t mbuf_inlined; /* # of mbuf created within clusters */ uint64_t cl_allocated; /* # of clusters allocated */ uint64_t cl_recycled; /* # of clusters recycled */ uint64_t cl_fast_recycled; /* # of clusters recycled (fast) */ /* These 3 are valid when FL_BUF_RESUME is set, stale otherwise. */ struct mbuf *m0; struct mbuf **pnext; u_int remaining; uint16_t qsize; /* # of hw descriptors (status page included) */ uint16_t cntxt_id; /* SGE context id for the freelist */ TAILQ_ENTRY(sge_fl) link; /* All starving freelists */ bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; char lockname[16]; bus_addr_t ba; /* bus address of descriptor ring */ struct cluster_layout cll_alt; /* alternate refill zone, layout */ }; struct mp_ring; /* txq: SGE egress queue + what's needed for Ethernet NIC */ struct sge_txq { struct sge_eq eq; /* MUST be first */ struct ifnet *ifp; /* the interface this txq belongs to */ struct mp_ring *r; /* tx software ring */ struct tx_sdesc *sdesc; /* KVA of software descriptor ring */ struct sglist *gl; __be32 cpl_ctrl0; /* for convenience */ int tc_idx; /* traffic class */ struct task tx_reclaim_task; /* stats for common events first */ uint64_t txcsum; /* # of times hardware assisted with checksum */ uint64_t tso_wrs; /* # of TSO work requests */ uint64_t vlan_insertion;/* # of times VLAN tag was inserted */ uint64_t imm_wrs; /* # of work requests with immediate data */ uint64_t sgl_wrs; /* # of work requests with direct SGL */ uint64_t txpkt_wrs; /* # of txpkt work requests (not coalesced) */ uint64_t txpkts0_wrs; /* # of type0 coalesced tx work requests */ uint64_t txpkts1_wrs; /* # of type1 coalesced tx work requests */ uint64_t txpkts0_pkts; /* # of frames in type0 coalesced tx WRs */ uint64_t txpkts1_pkts; /* # of frames in type1 coalesced tx WRs */ /* stats for not-that-common events */ } __aligned(CACHE_LINE_SIZE); /* rxq: SGE ingress queue + SGE free list + miscellaneous items */ struct sge_rxq { struct sge_iq iq; /* MUST be first */ struct sge_fl fl; /* MUST follow iq */ struct ifnet *ifp; /* the interface this rxq belongs to */ #if defined(INET) || defined(INET6) struct lro_ctrl lro; /* LRO state */ #endif /* stats for common events first */ uint64_t rxcsum; /* # of times hardware assisted with checksum */ uint64_t vlan_extraction;/* # of times VLAN tag was extracted */ /* stats for not-that-common events */ } __aligned(CACHE_LINE_SIZE); static inline struct sge_rxq * iq_to_rxq(struct sge_iq *iq) { return (__containerof(iq, struct sge_rxq, iq)); } /* ofld_rxq: SGE ingress queue + SGE free list + miscellaneous items */ struct sge_ofld_rxq { struct sge_iq iq; /* MUST be first */ struct sge_fl fl; /* MUST follow iq */ } __aligned(CACHE_LINE_SIZE); static inline struct sge_ofld_rxq * iq_to_ofld_rxq(struct sge_iq *iq) { return (__containerof(iq, struct sge_ofld_rxq, iq)); } struct wrqe { STAILQ_ENTRY(wrqe) link; struct sge_wrq *wrq; int wr_len; char wr[] __aligned(16); }; struct wrq_cookie { TAILQ_ENTRY(wrq_cookie) link; int ndesc; int pidx; }; /* * wrq: SGE egress queue that is given prebuilt work requests. Both the control * and offload tx queues are of this type. */ struct sge_wrq { struct sge_eq eq; /* MUST be first */ struct adapter *adapter; struct task wrq_tx_task; /* Tx desc reserved but WR not "committed" yet. */ TAILQ_HEAD(wrq_incomplete_wrs , wrq_cookie) incomplete_wrs; /* List of WRs ready to go out as soon as descriptors are available. */ STAILQ_HEAD(, wrqe) wr_list; u_int nwr_pending; u_int ndesc_needed; /* stats for common events first */ uint64_t tx_wrs_direct; /* # of WRs written directly to desc ring. */ uint64_t tx_wrs_ss; /* # of WRs copied from scratch space. */ uint64_t tx_wrs_copied; /* # of WRs queued and copied to desc ring. */ /* stats for not-that-common events */ /* * Scratch space for work requests that wrap around after reaching the * status page, and some information about the last WR that used it. */ uint16_t ss_pidx; uint16_t ss_len; uint8_t ss[SGE_MAX_WR_LEN]; } __aligned(CACHE_LINE_SIZE); #define INVALID_NM_RXQ_CNTXT_ID ((uint16_t)(-1)) struct sge_nm_rxq { struct vi_info *vi; struct iq_desc *iq_desc; uint16_t iq_abs_id; uint16_t iq_cntxt_id; uint16_t iq_cidx; uint16_t iq_sidx; uint8_t iq_gen; __be64 *fl_desc; uint16_t fl_cntxt_id; uint32_t fl_cidx; uint32_t fl_pidx; uint32_t fl_sidx; uint32_t fl_db_val; u_int fl_hwidx:4; u_int nid; /* netmap ring # for this queue */ /* infrequently used items after this */ bus_dma_tag_t iq_desc_tag; bus_dmamap_t iq_desc_map; bus_addr_t iq_ba; int intr_idx; bus_dma_tag_t fl_desc_tag; bus_dmamap_t fl_desc_map; bus_addr_t fl_ba; } __aligned(CACHE_LINE_SIZE); #define INVALID_NM_TXQ_CNTXT_ID ((u_int)(-1)) struct sge_nm_txq { struct tx_desc *desc; uint16_t cidx; uint16_t pidx; uint16_t sidx; uint16_t equiqidx; /* EQUIQ last requested at this pidx */ uint16_t equeqidx; /* EQUEQ last requested at this pidx */ uint16_t dbidx; /* pidx of the most recent doorbell */ uint8_t doorbells; volatile uint32_t *udb; u_int udb_qid; u_int cntxt_id; __be32 cpl_ctrl0; /* for convenience */ u_int nid; /* netmap ring # for this queue */ /* infrequently used items after this */ bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_addr_t ba; int iqidx; } __aligned(CACHE_LINE_SIZE); struct sge { int nrxq; /* total # of Ethernet rx queues */ int ntxq; /* total # of Ethernet tx queues */ int nofldrxq; /* total # of TOE rx queues */ int nofldtxq; /* total # of TOE tx queues */ int nnmrxq; /* total # of netmap rx queues */ int nnmtxq; /* total # of netmap tx queues */ int niq; /* total # of ingress queues */ int neq; /* total # of egress queues */ struct sge_iq fwq; /* Firmware event queue */ struct sge_wrq mgmtq; /* Management queue (control queue) */ struct sge_wrq *ctrlq; /* Control queues */ struct sge_txq *txq; /* NIC tx queues */ struct sge_rxq *rxq; /* NIC rx queues */ struct sge_wrq *ofld_txq; /* TOE tx queues */ struct sge_ofld_rxq *ofld_rxq; /* TOE rx queues */ struct sge_nm_txq *nm_txq; /* netmap tx queues */ struct sge_nm_rxq *nm_rxq; /* netmap rx queues */ uint16_t iq_start; /* first cntxt_id */ uint16_t iq_base; /* first abs_id */ int eq_start; /* first cntxt_id */ int eq_base; /* first abs_id */ struct sge_iq **iqmap; /* iq->cntxt_id to iq mapping */ struct sge_eq **eqmap; /* eq->cntxt_id to eq mapping */ int8_t safe_hwidx1; /* may not have room for metadata */ int8_t safe_hwidx2; /* with room for metadata and maybe more */ struct sw_zone_info sw_zone_info[SW_ZONE_SIZES]; struct hw_buf_info hw_buf_info[SGE_FLBUF_SIZES]; }; struct devnames { const char *nexus_name; const char *ifnet_name; const char *vi_ifnet_name; const char *pf03_drv_name; const char *vf_nexus_name; const char *vf_ifnet_name; }; struct adapter { SLIST_ENTRY(adapter) link; device_t dev; struct cdev *cdev; const struct devnames *names; /* PCIe register resources */ int regs_rid; struct resource *regs_res; int msix_rid; struct resource *msix_res; bus_space_handle_t bh; bus_space_tag_t bt; bus_size_t mmio_len; int udbs_rid; struct resource *udbs_res; volatile uint8_t *udbs_base; unsigned int pf; unsigned int mbox; unsigned int vpd_busy; unsigned int vpd_flag; /* Interrupt information */ int intr_type; int intr_count; struct irq { struct resource *res; int rid; volatile int nm_state; /* NM_OFF, NM_ON, or NM_BUSY */ void *tag; struct sge_rxq *rxq; struct sge_nm_rxq *nm_rxq; } __aligned(CACHE_LINE_SIZE) *irq; int sge_gts_reg; int sge_kdoorbell_reg; bus_dma_tag_t dmat; /* Parent DMA tag */ struct sge sge; int lro_timeout; int sc_do_rxcopy; struct taskqueue *tq[MAX_NCHAN]; /* General purpose taskqueues */ struct port_info *port[MAX_NPORTS]; uint8_t chan_map[MAX_NCHAN]; /* channel -> port */ void *tom_softc; /* (struct tom_data *) */ struct tom_tunables tt; struct t4_offload_policy *policy; struct rwlock policy_lock; void *iwarp_softc; /* (struct c4iw_dev *) */ struct iw_tunables iwt; void *iscsi_ulp_softc; /* (struct cxgbei_data *) */ void *ccr_softc; /* (struct ccr_softc *) */ struct l2t_data *l2t; /* L2 table */ struct tid_info tids; uint8_t doorbells; int offload_map; /* ports with IFCAP_TOE enabled */ int active_ulds; /* ULDs activated on this adapter */ int flags; int debug_flags; char ifp_lockname[16]; struct mtx ifp_lock; struct ifnet *ifp; /* tracer ifp */ struct ifmedia media; int traceq; /* iq used by all tracers, -1 if none */ int tracer_valid; /* bitmap of valid tracers */ int tracer_enabled; /* bitmap of enabled tracers */ char fw_version[16]; char tp_version[16]; char er_version[16]; char bs_version[16]; char cfg_file[32]; u_int cfcsum; struct adapter_params params; const struct chip_params *chip_params; struct t4_virt_res vres; uint16_t nbmcaps; uint16_t linkcaps; uint16_t switchcaps; uint16_t niccaps; uint16_t toecaps; uint16_t rdmacaps; uint16_t cryptocaps; uint16_t iscsicaps; uint16_t fcoecaps; struct sysctl_ctx_list ctx; /* from adapter_full_init to full_uninit */ struct mtx sc_lock; char lockname[16]; /* Starving free lists */ struct mtx sfl_lock; /* same cache-line as sc_lock? but that's ok */ TAILQ_HEAD(, sge_fl) sfl; struct callout sfl_callout; struct mtx reg_lock; /* for indirect register access */ struct memwin memwin[NUM_MEMWIN]; /* memory windows */ struct mtx tc_lock; struct task tc_task; const char *last_op; const void *last_op_thr; int last_op_flags; }; #define ADAPTER_LOCK(sc) mtx_lock(&(sc)->sc_lock) #define ADAPTER_UNLOCK(sc) mtx_unlock(&(sc)->sc_lock) #define ADAPTER_LOCK_ASSERT_OWNED(sc) mtx_assert(&(sc)->sc_lock, MA_OWNED) #define ADAPTER_LOCK_ASSERT_NOTOWNED(sc) mtx_assert(&(sc)->sc_lock, MA_NOTOWNED) #define ASSERT_SYNCHRONIZED_OP(sc) \ KASSERT(IS_BUSY(sc) && \ (mtx_owned(&(sc)->sc_lock) || sc->last_op_thr == curthread), \ ("%s: operation not synchronized.", __func__)) #define PORT_LOCK(pi) mtx_lock(&(pi)->pi_lock) #define PORT_UNLOCK(pi) mtx_unlock(&(pi)->pi_lock) #define PORT_LOCK_ASSERT_OWNED(pi) mtx_assert(&(pi)->pi_lock, MA_OWNED) #define PORT_LOCK_ASSERT_NOTOWNED(pi) mtx_assert(&(pi)->pi_lock, MA_NOTOWNED) #define FL_LOCK(fl) mtx_lock(&(fl)->fl_lock) #define FL_TRYLOCK(fl) mtx_trylock(&(fl)->fl_lock) #define FL_UNLOCK(fl) mtx_unlock(&(fl)->fl_lock) #define FL_LOCK_ASSERT_OWNED(fl) mtx_assert(&(fl)->fl_lock, MA_OWNED) #define FL_LOCK_ASSERT_NOTOWNED(fl) mtx_assert(&(fl)->fl_lock, MA_NOTOWNED) #define RXQ_FL_LOCK(rxq) FL_LOCK(&(rxq)->fl) #define RXQ_FL_UNLOCK(rxq) FL_UNLOCK(&(rxq)->fl) #define RXQ_FL_LOCK_ASSERT_OWNED(rxq) FL_LOCK_ASSERT_OWNED(&(rxq)->fl) #define RXQ_FL_LOCK_ASSERT_NOTOWNED(rxq) FL_LOCK_ASSERT_NOTOWNED(&(rxq)->fl) #define EQ_LOCK(eq) mtx_lock(&(eq)->eq_lock) #define EQ_TRYLOCK(eq) mtx_trylock(&(eq)->eq_lock) #define EQ_UNLOCK(eq) mtx_unlock(&(eq)->eq_lock) #define EQ_LOCK_ASSERT_OWNED(eq) mtx_assert(&(eq)->eq_lock, MA_OWNED) #define EQ_LOCK_ASSERT_NOTOWNED(eq) mtx_assert(&(eq)->eq_lock, MA_NOTOWNED) #define TXQ_LOCK(txq) EQ_LOCK(&(txq)->eq) #define TXQ_TRYLOCK(txq) EQ_TRYLOCK(&(txq)->eq) #define TXQ_UNLOCK(txq) EQ_UNLOCK(&(txq)->eq) #define TXQ_LOCK_ASSERT_OWNED(txq) EQ_LOCK_ASSERT_OWNED(&(txq)->eq) #define TXQ_LOCK_ASSERT_NOTOWNED(txq) EQ_LOCK_ASSERT_NOTOWNED(&(txq)->eq) #define CH_DUMP_MBOX(sc, mbox, data_reg) \ do { \ if (sc->debug_flags & DF_DUMP_MBOX) { \ log(LOG_NOTICE, \ "%s mbox %u: %016llx %016llx %016llx %016llx " \ "%016llx %016llx %016llx %016llx\n", \ device_get_nameunit(sc->dev), mbox, \ (unsigned long long)t4_read_reg64(sc, data_reg), \ (unsigned long long)t4_read_reg64(sc, data_reg + 8), \ (unsigned long long)t4_read_reg64(sc, data_reg + 16), \ (unsigned long long)t4_read_reg64(sc, data_reg + 24), \ (unsigned long long)t4_read_reg64(sc, data_reg + 32), \ (unsigned long long)t4_read_reg64(sc, data_reg + 40), \ (unsigned long long)t4_read_reg64(sc, data_reg + 48), \ (unsigned long long)t4_read_reg64(sc, data_reg + 56)); \ } \ } while (0) #define for_each_txq(vi, iter, q) \ for (q = &vi->pi->adapter->sge.txq[vi->first_txq], iter = 0; \ iter < vi->ntxq; ++iter, ++q) #define for_each_rxq(vi, iter, q) \ for (q = &vi->pi->adapter->sge.rxq[vi->first_rxq], iter = 0; \ iter < vi->nrxq; ++iter, ++q) #define for_each_ofld_txq(vi, iter, q) \ for (q = &vi->pi->adapter->sge.ofld_txq[vi->first_ofld_txq], iter = 0; \ iter < vi->nofldtxq; ++iter, ++q) #define for_each_ofld_rxq(vi, iter, q) \ for (q = &vi->pi->adapter->sge.ofld_rxq[vi->first_ofld_rxq], iter = 0; \ iter < vi->nofldrxq; ++iter, ++q) #define for_each_nm_txq(vi, iter, q) \ for (q = &vi->pi->adapter->sge.nm_txq[vi->first_nm_txq], iter = 0; \ iter < vi->nnmtxq; ++iter, ++q) #define for_each_nm_rxq(vi, iter, q) \ for (q = &vi->pi->adapter->sge.nm_rxq[vi->first_nm_rxq], iter = 0; \ iter < vi->nnmrxq; ++iter, ++q) #define for_each_vi(_pi, _iter, _vi) \ for ((_vi) = (_pi)->vi, (_iter) = 0; (_iter) < (_pi)->nvi; \ ++(_iter), ++(_vi)) #define IDXINCR(idx, incr, wrap) do { \ idx = wrap - idx > incr ? idx + incr : incr - (wrap - idx); \ } while (0) #define IDXDIFF(head, tail, wrap) \ ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) /* One for errors, one for firmware events */ #define T4_EXTRA_INTR 2 /* One for firmware events */ #define T4VF_EXTRA_INTR 1 static inline int forwarding_intr_to_fwq(struct adapter *sc) { return (sc->intr_count == 1); } static inline uint32_t t4_read_reg(struct adapter *sc, uint32_t reg) { return bus_space_read_4(sc->bt, sc->bh, reg); } static inline void t4_write_reg(struct adapter *sc, uint32_t reg, uint32_t val) { bus_space_write_4(sc->bt, sc->bh, reg, val); } static inline uint64_t t4_read_reg64(struct adapter *sc, uint32_t reg) { #ifdef __LP64__ return bus_space_read_8(sc->bt, sc->bh, reg); #else return (uint64_t)bus_space_read_4(sc->bt, sc->bh, reg) + ((uint64_t)bus_space_read_4(sc->bt, sc->bh, reg + 4) << 32); #endif } static inline void t4_write_reg64(struct adapter *sc, uint32_t reg, uint64_t val) { #ifdef __LP64__ bus_space_write_8(sc->bt, sc->bh, reg, val); #else bus_space_write_4(sc->bt, sc->bh, reg, val); bus_space_write_4(sc->bt, sc->bh, reg + 4, val>> 32); #endif } static inline void t4_os_pci_read_cfg1(struct adapter *sc, int reg, uint8_t *val) { *val = pci_read_config(sc->dev, reg, 1); } static inline void t4_os_pci_write_cfg1(struct adapter *sc, int reg, uint8_t val) { pci_write_config(sc->dev, reg, val, 1); } static inline void t4_os_pci_read_cfg2(struct adapter *sc, int reg, uint16_t *val) { *val = pci_read_config(sc->dev, reg, 2); } static inline void t4_os_pci_write_cfg2(struct adapter *sc, int reg, uint16_t val) { pci_write_config(sc->dev, reg, val, 2); } static inline void t4_os_pci_read_cfg4(struct adapter *sc, int reg, uint32_t *val) { *val = pci_read_config(sc->dev, reg, 4); } static inline void t4_os_pci_write_cfg4(struct adapter *sc, int reg, uint32_t val) { pci_write_config(sc->dev, reg, val, 4); } static inline struct port_info * adap2pinfo(struct adapter *sc, int idx) { return (sc->port[idx]); } static inline void t4_os_set_hw_addr(struct port_info *pi, uint8_t hw_addr[]) { bcopy(hw_addr, pi->vi[0].hw_addr, ETHER_ADDR_LEN); } static inline bool is_10G_port(const struct port_info *pi) { return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G) != 0); } static inline bool is_25G_port(const struct port_info *pi) { return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G) != 0); } static inline bool is_40G_port(const struct port_info *pi) { return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G) != 0); } static inline bool is_100G_port(const struct port_info *pi) { return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G) != 0); } static inline int port_top_speed(const struct port_info *pi) { if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G) return (100); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G) return (40); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G) return (25); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G) return (10); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_1G) return (1); return (0); } static inline int tx_resume_threshold(struct sge_eq *eq) { /* not quite the same as qsize / 4, but this will do. */ return (eq->sidx / 4); } static inline int t4_use_ldst(struct adapter *sc) { #ifdef notyet return (sc->flags & FW_OK || !sc->use_bd); #else return (0); #endif } /* t4_main.c */ extern int t4_ntxq; extern int t4_nrxq; extern int t4_intr_types; extern int t4_tmr_idx; extern int t4_pktc_idx; extern unsigned int t4_qsize_rxq; extern unsigned int t4_qsize_txq; extern device_method_t cxgbe_methods[]; int t4_os_find_pci_capability(struct adapter *, int); int t4_os_pci_save_state(struct adapter *); int t4_os_pci_restore_state(struct adapter *); void t4_os_portmod_changed(struct port_info *); void t4_os_link_changed(struct port_info *); void t4_iterate(void (*)(struct adapter *, void *), void *); void t4_init_devnames(struct adapter *); void t4_add_adapter(struct adapter *); void t4_aes_getdeckey(void *, const void *, unsigned int); int t4_detach_common(device_t); int t4_map_bars_0_and_4(struct adapter *); int t4_map_bar_2(struct adapter *); int t4_setup_intr_handlers(struct adapter *); void t4_sysctls(struct adapter *); int begin_synchronized_op(struct adapter *, struct vi_info *, int, char *); void doom_vi(struct adapter *, struct vi_info *); void end_synchronized_op(struct adapter *, int); int update_mac_settings(struct ifnet *, int); int adapter_full_init(struct adapter *); int adapter_full_uninit(struct adapter *); uint64_t cxgbe_get_counter(struct ifnet *, ift_counter); int vi_full_init(struct vi_info *); int vi_full_uninit(struct vi_info *); void vi_sysctls(struct vi_info *); void vi_tick(void *); int rw_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int, int); int alloc_atid_tab(struct tid_info *, int); void free_atid_tab(struct tid_info *); int alloc_atid(struct adapter *, void *); void *lookup_atid(struct adapter *, int); void free_atid(struct adapter *, int); void release_tid(struct adapter *, int, struct sge_wrq *); #ifdef DEV_NETMAP /* t4_netmap.c */ void cxgbe_nm_attach(struct vi_info *); void cxgbe_nm_detach(struct vi_info *); void t4_nm_intr(void *); #endif /* t4_sge.c */ void t4_sge_modload(void); void t4_sge_modunload(void); uint64_t t4_sge_extfree_refs(void); void t4_tweak_chip_settings(struct adapter *); int t4_read_chip_settings(struct adapter *); int t4_create_dma_tag(struct adapter *); void t4_sge_sysctls(struct adapter *, struct sysctl_ctx_list *, struct sysctl_oid_list *); int t4_destroy_dma_tag(struct adapter *); int t4_setup_adapter_queues(struct adapter *); int t4_teardown_adapter_queues(struct adapter *); int t4_setup_vi_queues(struct vi_info *); int t4_teardown_vi_queues(struct vi_info *); void t4_intr_all(void *); void t4_intr(void *); void t4_vi_intr(void *); void t4_intr_err(void *); void t4_intr_evt(void *); void t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct wrqe *); void t4_update_fl_bufsize(struct ifnet *); int parse_pkt(struct adapter *, struct mbuf **); void *start_wrq_wr(struct sge_wrq *, int, struct wrq_cookie *); void commit_wrq_wr(struct sge_wrq *, void *, struct wrq_cookie *); int tnl_cong(struct port_info *, int); void t4_register_an_handler(an_handler_t); void t4_register_fw_msg_handler(int, fw_msg_handler_t); void t4_register_cpl_handler(int, cpl_handler_t); void t4_register_shared_cpl_handler(int, cpl_handler_t, int); /* t4_tracer.c */ struct t4_tracer; void t4_tracer_modload(void); void t4_tracer_modunload(void); void t4_tracer_port_detach(struct adapter *); int t4_get_tracer(struct adapter *, struct t4_tracer *); int t4_set_tracer(struct adapter *, struct t4_tracer *); int t4_trace_pkt(struct sge_iq *, const struct rss_header *, struct mbuf *); int t5_trace_pkt(struct sge_iq *, const struct rss_header *, struct mbuf *); /* t4_sched.c */ int t4_set_sched_class(struct adapter *, struct t4_sched_params *); int t4_set_sched_queue(struct adapter *, struct t4_sched_queue *); int t4_init_tx_sched(struct adapter *); int t4_free_tx_sched(struct adapter *); void t4_update_tx_sched(struct adapter *); int t4_reserve_cl_rl_kbps(struct adapter *, int, u_int, int *); void t4_release_cl_rl_kbps(struct adapter *, int, int); /* t4_filter.c */ int get_filter_mode(struct adapter *, uint32_t *); int set_filter_mode(struct adapter *, uint32_t); int get_filter(struct adapter *, struct t4_filter *); int set_filter(struct adapter *, struct t4_filter *); int del_filter(struct adapter *, struct t4_filter *); int t4_filter_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); +int t4_hashfilter_ao_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); +int t4_hashfilter_tcb_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); +int t4_del_hashfilter_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); static inline struct wrqe * alloc_wrqe(int wr_len, struct sge_wrq *wrq) { int len = offsetof(struct wrqe, wr) + wr_len; struct wrqe *wr; wr = malloc(len, M_CXGBE, M_NOWAIT); if (__predict_false(wr == NULL)) return (NULL); wr->wr_len = wr_len; wr->wrq = wrq; return (wr); } static inline void * wrtod(struct wrqe *wr) { return (&wr->wr[0]); } static inline void free_wrqe(struct wrqe *wr) { free(wr, M_CXGBE); } static inline void t4_wrq_tx(struct adapter *sc, struct wrqe *wr) { struct sge_wrq *wrq = wr->wrq; TXQ_LOCK(wrq); t4_wrq_tx_locked(sc, wrq, wr); TXQ_UNLOCK(wrq); } static inline int read_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val, int len) { return (rw_via_memwin(sc, idx, addr, val, len, 0)); } static inline int write_via_memwin(struct adapter *sc, int idx, uint32_t addr, const uint32_t *val, int len) { return (rw_via_memwin(sc, idx, addr, (void *)(uintptr_t)val, len, 1)); } #endif Index: head/sys/dev/cxgbe/common/common.h =================================================================== --- head/sys/dev/cxgbe/common/common.h (revision 333393) +++ head/sys/dev/cxgbe/common/common.h (revision 333394) @@ -1,866 +1,872 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef __CHELSIO_COMMON_H #define __CHELSIO_COMMON_H #include "t4_hw.h" #define GLBL_INTR_MASK (F_CIM | F_MPS | F_PL | F_PCIE | F_MC0 | F_EDC0 | \ F_EDC1 | F_LE | F_TP | F_MA | F_PM_TX | F_PM_RX | F_ULP_RX | \ F_CPL_SWITCH | F_SGE | F_ULP_TX) enum { MAX_NPORTS = 4, /* max # of ports */ SERNUM_LEN = 24, /* Serial # length */ EC_LEN = 16, /* E/C length */ ID_LEN = 16, /* ID length */ PN_LEN = 16, /* Part Number length */ MD_LEN = 16, /* MFG diags version length */ MACADDR_LEN = 12, /* MAC Address length */ }; enum { T4_REGMAP_SIZE = (160 * 1024), T5_REGMAP_SIZE = (332 * 1024), }; enum { MEM_EDC0, MEM_EDC1, MEM_MC, MEM_MC0 = MEM_MC, MEM_MC1 }; enum dev_master { MASTER_CANT, MASTER_MAY, MASTER_MUST }; enum dev_state { DEV_STATE_UNINIT, DEV_STATE_INIT, DEV_STATE_ERR }; enum { PAUSE_RX = 1 << 0, PAUSE_TX = 1 << 1, PAUSE_AUTONEG = 1 << 2 }; enum { FEC_RS = 1 << 0, FEC_BASER_RS = 1 << 1, FEC_RESERVED = 1 << 2, }; enum t4_bar2_qtype { T4_BAR2_QTYPE_EGRESS, T4_BAR2_QTYPE_INGRESS }; struct port_stats { u64 tx_octets; /* total # of octets in good frames */ u64 tx_frames; /* all good frames */ u64 tx_bcast_frames; /* all broadcast frames */ u64 tx_mcast_frames; /* all multicast frames */ u64 tx_ucast_frames; /* all unicast frames */ u64 tx_error_frames; /* all error frames */ u64 tx_frames_64; /* # of Tx frames in a particular range */ u64 tx_frames_65_127; u64 tx_frames_128_255; u64 tx_frames_256_511; u64 tx_frames_512_1023; u64 tx_frames_1024_1518; u64 tx_frames_1519_max; u64 tx_drop; /* # of dropped Tx frames */ u64 tx_pause; /* # of transmitted pause frames */ u64 tx_ppp0; /* # of transmitted PPP prio 0 frames */ u64 tx_ppp1; /* # of transmitted PPP prio 1 frames */ u64 tx_ppp2; /* # of transmitted PPP prio 2 frames */ u64 tx_ppp3; /* # of transmitted PPP prio 3 frames */ u64 tx_ppp4; /* # of transmitted PPP prio 4 frames */ u64 tx_ppp5; /* # of transmitted PPP prio 5 frames */ u64 tx_ppp6; /* # of transmitted PPP prio 6 frames */ u64 tx_ppp7; /* # of transmitted PPP prio 7 frames */ u64 rx_octets; /* total # of octets in good frames */ u64 rx_frames; /* all good frames */ u64 rx_bcast_frames; /* all broadcast frames */ u64 rx_mcast_frames; /* all multicast frames */ u64 rx_ucast_frames; /* all unicast frames */ u64 rx_too_long; /* # of frames exceeding MTU */ u64 rx_jabber; /* # of jabber frames */ u64 rx_fcs_err; /* # of received frames with bad FCS */ u64 rx_len_err; /* # of received frames with length error */ u64 rx_symbol_err; /* symbol errors */ u64 rx_runt; /* # of short frames */ u64 rx_frames_64; /* # of Rx frames in a particular range */ u64 rx_frames_65_127; u64 rx_frames_128_255; u64 rx_frames_256_511; u64 rx_frames_512_1023; u64 rx_frames_1024_1518; u64 rx_frames_1519_max; u64 rx_pause; /* # of received pause frames */ u64 rx_ppp0; /* # of received PPP prio 0 frames */ u64 rx_ppp1; /* # of received PPP prio 1 frames */ u64 rx_ppp2; /* # of received PPP prio 2 frames */ u64 rx_ppp3; /* # of received PPP prio 3 frames */ u64 rx_ppp4; /* # of received PPP prio 4 frames */ u64 rx_ppp5; /* # of received PPP prio 5 frames */ u64 rx_ppp6; /* # of received PPP prio 6 frames */ u64 rx_ppp7; /* # of received PPP prio 7 frames */ u64 rx_ovflow0; /* drops due to buffer-group 0 overflows */ u64 rx_ovflow1; /* drops due to buffer-group 1 overflows */ u64 rx_ovflow2; /* drops due to buffer-group 2 overflows */ u64 rx_ovflow3; /* drops due to buffer-group 3 overflows */ u64 rx_trunc0; /* buffer-group 0 truncated packets */ u64 rx_trunc1; /* buffer-group 1 truncated packets */ u64 rx_trunc2; /* buffer-group 2 truncated packets */ u64 rx_trunc3; /* buffer-group 3 truncated packets */ }; struct lb_port_stats { u64 octets; u64 frames; u64 bcast_frames; u64 mcast_frames; u64 ucast_frames; u64 error_frames; u64 frames_64; u64 frames_65_127; u64 frames_128_255; u64 frames_256_511; u64 frames_512_1023; u64 frames_1024_1518; u64 frames_1519_max; u64 drop; u64 ovflow0; u64 ovflow1; u64 ovflow2; u64 ovflow3; u64 trunc0; u64 trunc1; u64 trunc2; u64 trunc3; }; struct tp_tcp_stats { u32 tcp_out_rsts; u64 tcp_in_segs; u64 tcp_out_segs; u64 tcp_retrans_segs; }; struct tp_usm_stats { u32 frames; u32 drops; u64 octets; }; struct tp_fcoe_stats { u32 frames_ddp; u32 frames_drop; u64 octets_ddp; }; struct tp_err_stats { u32 mac_in_errs[MAX_NCHAN]; u32 hdr_in_errs[MAX_NCHAN]; u32 tcp_in_errs[MAX_NCHAN]; u32 tnl_cong_drops[MAX_NCHAN]; u32 ofld_chan_drops[MAX_NCHAN]; u32 tnl_tx_drops[MAX_NCHAN]; u32 ofld_vlan_drops[MAX_NCHAN]; u32 tcp6_in_errs[MAX_NCHAN]; u32 ofld_no_neigh; u32 ofld_cong_defer; }; struct tp_proxy_stats { u32 proxy[MAX_NCHAN]; }; struct tp_cpl_stats { u32 req[MAX_NCHAN]; u32 rsp[MAX_NCHAN]; }; struct tp_rdma_stats { u32 rqe_dfr_pkt; u32 rqe_dfr_mod; }; struct sge_params { int timer_val[SGE_NTIMERS]; /* final, scaled values */ int counter_val[SGE_NCOUNTERS]; int fl_starve_threshold; int fl_starve_threshold2; int page_shift; int eq_s_qpp; int iq_s_qpp; int spg_len; int pad_boundary; int pack_boundary; int fl_pktshift; u32 sge_control; u32 sge_fl_buffer_size[SGE_FLBUF_SIZES]; }; struct tp_params { unsigned int tre; /* log2 of core clocks per TP tick */ unsigned int dack_re; /* DACK timer resolution */ unsigned int la_mask; /* what events are recorded by TP LA */ unsigned short tx_modq[MAX_NCHAN]; /* channel to modulation queue map */ uint32_t vlan_pri_map; uint32_t ingress_config; __be16 err_vec_mask; int8_t fcoe_shift; int8_t port_shift; int8_t vnic_shift; int8_t vlan_shift; int8_t tos_shift; int8_t protocol_shift; int8_t ethertype_shift; int8_t macmatch_shift; int8_t matchtype_shift; int8_t frag_shift; }; struct vpd_params { unsigned int cclk; u8 ec[EC_LEN + 1]; u8 sn[SERNUM_LEN + 1]; u8 id[ID_LEN + 1]; u8 pn[PN_LEN + 1]; u8 na[MACADDR_LEN + 1]; u8 md[MD_LEN + 1]; }; struct pci_params { unsigned int vpd_cap_addr; unsigned int mps; unsigned short speed; unsigned short width; }; /* * Firmware device log. */ struct devlog_params { u32 memtype; /* which memory (FW_MEMTYPE_* ) */ u32 start; /* start of log in firmware memory */ u32 size; /* size of log */ u32 addr; /* start address in flat addr space */ }; /* Stores chip specific parameters */ struct chip_params { u8 nchan; u8 pm_stats_cnt; u8 cng_ch_bits_log; /* congestion channel map bits width */ u8 nsched_cls; u8 cim_num_obq; u16 mps_rplc_size; u16 vfcount; u32 sge_fl_db; u16 mps_tcam_size; }; /* VF-only parameters. */ /* * Global Receive Side Scaling (RSS) parameters in host-native format. */ struct rss_params { unsigned int mode; /* RSS mode */ union { struct { u_int synmapen:1; /* SYN Map Enable */ u_int syn4tupenipv6:1; /* enable hashing 4-tuple IPv6 SYNs */ u_int syn2tupenipv6:1; /* enable hashing 2-tuple IPv6 SYNs */ u_int syn4tupenipv4:1; /* enable hashing 4-tuple IPv4 SYNs */ u_int syn2tupenipv4:1; /* enable hashing 2-tuple IPv4 SYNs */ u_int ofdmapen:1; /* Offload Map Enable */ u_int tnlmapen:1; /* Tunnel Map Enable */ u_int tnlalllookup:1; /* Tunnel All Lookup */ u_int hashtoeplitz:1; /* use Toeplitz hash */ } basicvirtual; } u; }; /* * Maximum resources provisioned for a PCI VF. */ struct vf_resources { unsigned int nvi; /* N virtual interfaces */ unsigned int neq; /* N egress Qs */ unsigned int nethctrl; /* N egress ETH or CTRL Qs */ unsigned int niqflint; /* N ingress Qs/w free list(s) & intr */ unsigned int niq; /* N ingress Qs */ unsigned int tc; /* PCI-E traffic class */ unsigned int pmask; /* port access rights mask */ unsigned int nexactf; /* N exact MPS filters */ unsigned int r_caps; /* read capabilities */ unsigned int wx_caps; /* write/execute capabilities */ }; struct adapter_params { struct sge_params sge; struct tp_params tp; /* PF-only */ struct vpd_params vpd; struct pci_params pci; struct devlog_params devlog; /* PF-only */ struct rss_params rss; /* VF-only */ struct vf_resources vfres; /* VF-only */ unsigned int core_vdd; unsigned int sf_size; /* serial flash size in bytes */ unsigned int sf_nsec; /* # of flash sectors */ unsigned int fw_vers; /* firmware version */ unsigned int bs_vers; /* bootstrap version */ unsigned int tp_vers; /* TP microcode version */ unsigned int er_vers; /* expansion ROM version */ unsigned int scfg_vers; /* Serial Configuration version */ unsigned int vpd_vers; /* VPD version */ unsigned short mtus[NMTUS]; unsigned short a_wnd[NCCTRL_WIN]; unsigned short b_wnd[NCCTRL_WIN]; u_int ftid_min; u_int ftid_max; u_int etid_min; u_int netids; unsigned int cim_la_size; uint8_t nports; /* # of ethernet ports */ uint8_t portvec; unsigned int chipid:4; /* chip ID. T4 = 4, T5 = 5, ... */ unsigned int rev:4; /* chip revision */ unsigned int fpga:1; /* this is an FPGA */ unsigned int offload:1; /* hw is TOE capable, fw has divvied up card resources for TOE operation. */ unsigned int bypass:1; /* this is a bypass card */ unsigned int ethoffload:1; + unsigned int hash_filter:1; unsigned int ofldq_wr_cred; unsigned int eo_wr_cred; unsigned int max_ordird_qp; unsigned int max_ird_adapter; uint32_t mps_bg_map; /* rx buffer group map for all ports (upto 4) */ bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */ bool fr_nsmr_tpte_wr_support; /* FW support for FR_NSMR_TPTE_WR */ }; #define CHELSIO_T4 0x4 #define CHELSIO_T5 0x5 #define CHELSIO_T6 0x6 /* * State needed to monitor the forward progress of SGE Ingress DMA activities * and possible hangs. */ struct sge_idma_monitor_state { unsigned int idma_1s_thresh; /* 1s threshold in Core Clock ticks */ unsigned int idma_stalled[2]; /* synthesized stalled timers in HZ */ unsigned int idma_state[2]; /* IDMA Hang detect state */ unsigned int idma_qid[2]; /* IDMA Hung Ingress Queue ID */ unsigned int idma_warn[2]; /* time to warning in HZ */ }; struct trace_params { u32 data[TRACE_LEN / 4]; u32 mask[TRACE_LEN / 4]; unsigned short snap_len; unsigned short min_len; unsigned char skip_ofst; unsigned char skip_len; unsigned char invert; unsigned char port; }; struct link_config { /* OS-specific code owns all the requested_* fields */ unsigned char requested_aneg; /* link aneg user has requested */ unsigned char requested_fc; /* flow control user has requested */ unsigned char requested_fec; /* FEC user has requested */ unsigned int requested_speed; /* speed user has requested */ unsigned short supported; /* link capabilities */ unsigned short advertising; /* advertised capabilities */ unsigned short lp_advertising; /* peer advertised capabilities */ unsigned int speed; /* actual link speed */ unsigned char fc; /* actual link flow control */ unsigned char fec; /* actual FEC */ unsigned char link_ok; /* link up? */ unsigned char link_down_rc; /* link down reason */ }; #include "adapter.h" #ifndef PCI_VENDOR_ID_CHELSIO # define PCI_VENDOR_ID_CHELSIO 0x1425 #endif #define for_each_port(adapter, iter) \ for (iter = 0; iter < (adapter)->params.nports; ++iter) static inline int is_ftid(const struct adapter *sc, u_int tid) { return (tid >= sc->params.ftid_min && tid <= sc->params.ftid_max); } static inline int is_etid(const struct adapter *sc, u_int tid) { return (tid >= sc->params.etid_min); } static inline int is_offload(const struct adapter *adap) { return adap->params.offload; } static inline int is_ethoffload(const struct adapter *adap) { return adap->params.ethoffload; +} + +static inline int is_hashfilter(const struct adapter *adap) +{ + return adap->params.hash_filter; } static inline int chip_id(struct adapter *adap) { return adap->params.chipid; } static inline int chip_rev(struct adapter *adap) { return adap->params.rev; } static inline int is_t4(struct adapter *adap) { return adap->params.chipid == CHELSIO_T4; } static inline int is_t5(struct adapter *adap) { return adap->params.chipid == CHELSIO_T5; } static inline int is_t6(struct adapter *adap) { return adap->params.chipid == CHELSIO_T6; } static inline int is_fpga(struct adapter *adap) { return adap->params.fpga; } static inline unsigned int core_ticks_per_usec(const struct adapter *adap) { return adap->params.vpd.cclk / 1000; } static inline unsigned int us_to_core_ticks(const struct adapter *adap, unsigned int us) { return (us * adap->params.vpd.cclk) / 1000; } static inline unsigned int core_ticks_to_us(const struct adapter *adapter, unsigned int ticks) { /* add Core Clock / 2 to round ticks to nearest uS */ return ((ticks * 1000 + adapter->params.vpd.cclk/2) / adapter->params.vpd.cclk); } static inline unsigned int dack_ticks_to_usec(const struct adapter *adap, unsigned int ticks) { return (ticks << adap->params.tp.dack_re) / core_ticks_per_usec(adap); } static inline u_int us_to_tcp_ticks(const struct adapter *adap, u_long us) { return (us * adap->params.vpd.cclk / 1000 >> adap->params.tp.tre); } static inline u_int tcp_ticks_to_us(const struct adapter *adap, u_int ticks) { return ((uint64_t)ticks << adap->params.tp.tre) / core_ticks_per_usec(adap); } void t4_set_reg_field(struct adapter *adap, unsigned int addr, u32 mask, u32 val); int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd, int size, void *rpl, bool sleep_ok, int timeout); int t4_wr_mbox_meat(struct adapter *adap, int mbox, const void *cmd, int size, void *rpl, bool sleep_ok); static inline int t4_wr_mbox_timeout(struct adapter *adap, int mbox, const void *cmd, int size, void *rpl, int timeout) { return t4_wr_mbox_meat_timeout(adap, mbox, cmd, size, rpl, true, timeout); } static inline int t4_wr_mbox(struct adapter *adap, int mbox, const void *cmd, int size, void *rpl) { return t4_wr_mbox_meat(adap, mbox, cmd, size, rpl, true); } static inline int t4_wr_mbox_ns(struct adapter *adap, int mbox, const void *cmd, int size, void *rpl) { return t4_wr_mbox_meat(adap, mbox, cmd, size, rpl, false); } void t4_read_indirect(struct adapter *adap, unsigned int addr_reg, unsigned int data_reg, u32 *vals, unsigned int nregs, unsigned int start_idx); void t4_write_indirect(struct adapter *adap, unsigned int addr_reg, unsigned int data_reg, const u32 *vals, unsigned int nregs, unsigned int start_idx); u32 t4_hw_pci_read_cfg4(adapter_t *adapter, int reg); struct fw_filter_wr; void t4_intr_enable(struct adapter *adapter); void t4_intr_disable(struct adapter *adapter); void t4_intr_clear(struct adapter *adapter); int t4_slow_intr_handler(struct adapter *adapter); int t4_hash_mac_addr(const u8 *addr); int t4_link_l1cfg(struct adapter *adap, unsigned int mbox, unsigned int port, struct link_config *lc); int t4_restart_aneg(struct adapter *adap, unsigned int mbox, unsigned int port); int t4_seeprom_read(struct adapter *adapter, u32 addr, u32 *data); int t4_seeprom_write(struct adapter *adapter, u32 addr, u32 data); int t4_eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz); int t4_seeprom_wp(struct adapter *adapter, int enable); int t4_read_flash(struct adapter *adapter, unsigned int addr, unsigned int nwords, u32 *data, int byte_oriented); int t4_write_flash(struct adapter *adapter, unsigned int addr, unsigned int n, const u8 *data, int byte_oriented); int t4_load_fw(struct adapter *adapter, const u8 *fw_data, unsigned int size); int t4_fwcache(struct adapter *adap, enum fw_params_param_dev_fwcache op); int t5_fw_init_extern_mem(struct adapter *adap); int t4_load_bootcfg(struct adapter *adapter, const u8 *cfg_data, unsigned int size); int t4_load_boot(struct adapter *adap, u8 *boot_data, unsigned int boot_addr, unsigned int size); int t4_flash_erase_sectors(struct adapter *adapter, int start, int end); int t4_flash_cfg_addr(struct adapter *adapter); int t4_load_cfg(struct adapter *adapter, const u8 *cfg_data, unsigned int size); int t4_get_fw_version(struct adapter *adapter, u32 *vers); int t4_get_bs_version(struct adapter *adapter, u32 *vers); int t4_get_tp_version(struct adapter *adapter, u32 *vers); int t4_get_exprom_version(struct adapter *adapter, u32 *vers); int t4_get_scfg_version(struct adapter *adapter, u32 *vers); int t4_get_vpd_version(struct adapter *adapter, u32 *vers); int t4_get_version_info(struct adapter *adapter); int t4_init_hw(struct adapter *adapter, u32 fw_params); const struct chip_params *t4_get_chip_params(int chipid); int t4_prep_adapter(struct adapter *adapter, u32 *buf); int t4_shutdown_adapter(struct adapter *adapter); int t4_init_devlog_params(struct adapter *adapter, int fw_attach); int t4_init_sge_params(struct adapter *adapter); int t4_init_tp_params(struct adapter *adap, bool sleep_ok); int t4_filter_field_shift(const struct adapter *adap, int filter_sel); int t4_port_init(struct adapter *adap, int mbox, int pf, int vf, int port_id); void t4_fatal_err(struct adapter *adapter); void t4_db_full(struct adapter *adapter); void t4_db_dropped(struct adapter *adapter); int t4_set_trace_filter(struct adapter *adapter, const struct trace_params *tp, int filter_index, int enable); void t4_get_trace_filter(struct adapter *adapter, struct trace_params *tp, int filter_index, int *enabled); int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid, int start, int n, const u16 *rspq, unsigned int nrspq); int t4_config_glbl_rss(struct adapter *adapter, int mbox, unsigned int mode, unsigned int flags); int t4_config_vi_rss(struct adapter *adapter, int mbox, unsigned int viid, unsigned int flags, unsigned int defq, unsigned int skeyidx, unsigned int skey); int t4_read_rss(struct adapter *adapter, u16 *entries); void t4_read_rss_key(struct adapter *adapter, u32 *key, bool sleep_ok); void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx, bool sleep_ok); void t4_read_rss_pf_config(struct adapter *adapter, unsigned int index, u32 *valp, bool sleep_ok); void t4_write_rss_pf_config(struct adapter *adapter, unsigned int index, u32 val, bool sleep_ok); void t4_read_rss_vf_config(struct adapter *adapter, unsigned int index, u32 *vfl, u32 *vfh, bool sleep_ok); void t4_write_rss_vf_config(struct adapter *adapter, unsigned int index, u32 vfl, u32 vfh, bool sleep_ok); u32 t4_read_rss_pf_map(struct adapter *adapter, bool sleep_ok); void t4_write_rss_pf_map(struct adapter *adapter, u32 pfmap, bool sleep_ok); u32 t4_read_rss_pf_mask(struct adapter *adapter, bool sleep_ok); void t4_write_rss_pf_mask(struct adapter *adapter, u32 pfmask, bool sleep_ok); int t4_mps_set_active_ports(struct adapter *adap, unsigned int port_mask); void t4_pmtx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]); void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]); void t4_read_cimq_cfg(struct adapter *adap, u16 *base, u16 *size, u16 *thres); int t4_read_cim_ibq(struct adapter *adap, unsigned int qid, u32 *data, size_t n); int t4_read_cim_obq(struct adapter *adap, unsigned int qid, u32 *data, size_t n); int t4_cim_read(struct adapter *adap, unsigned int addr, unsigned int n, unsigned int *valp); int t4_cim_write(struct adapter *adap, unsigned int addr, unsigned int n, const unsigned int *valp); int t4_cim_ctl_read(struct adapter *adap, unsigned int addr, unsigned int n, unsigned int *valp); int t4_cim_read_la(struct adapter *adap, u32 *la_buf, unsigned int *wrptr); void t4_cim_read_pif_la(struct adapter *adap, u32 *pif_req, u32 *pif_rsp, unsigned int *pif_req_wrptr, unsigned int *pif_rsp_wrptr); void t4_cim_read_ma_la(struct adapter *adap, u32 *ma_req, u32 *ma_rsp); int t4_get_flash_params(struct adapter *adapter); u32 t4_read_pcie_cfg4(struct adapter *adap, int reg, int drv_fw_attach); int t4_mc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *parity); int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *parity); int t4_mem_read(struct adapter *adap, int mtype, u32 addr, u32 size, __be32 *data); void t4_idma_monitor_init(struct adapter *adapter, struct sge_idma_monitor_state *idma); void t4_idma_monitor(struct adapter *adapter, struct sge_idma_monitor_state *idma, int hz, int ticks); unsigned int t4_get_regs_len(struct adapter *adapter); void t4_get_regs(struct adapter *adap, u8 *buf, size_t buf_size); const char *t4_get_port_type_description(enum fw_port_type port_type); void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p); void t4_get_port_stats_offset(struct adapter *adap, int idx, struct port_stats *stats, struct port_stats *offset); void t4_get_lb_stats(struct adapter *adap, int idx, struct lb_port_stats *p); void t4_clr_port_stats(struct adapter *adap, int idx); void t4_read_mtu_tbl(struct adapter *adap, u16 *mtus, u8 *mtu_log); void t4_read_cong_tbl(struct adapter *adap, u16 incr[NMTUS][NCCTRL_WIN]); void t4_read_pace_tbl(struct adapter *adap, unsigned int pace_vals[NTX_SCHED]); void t4_get_tx_sched(struct adapter *adap, unsigned int sched, unsigned int *kbps, unsigned int *ipg, bool sleep_ok); void t4_tp_wr_bits_indirect(struct adapter *adap, unsigned int addr, unsigned int mask, unsigned int val); void t4_tp_read_la(struct adapter *adap, u64 *la_buf, unsigned int *wrptr); void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st, bool sleep_ok); void t4_tp_get_proxy_stats(struct adapter *adap, struct tp_proxy_stats *st, bool sleep_ok); void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st, bool sleep_ok); void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st, bool sleep_ok); void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st, bool sleep_ok); void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4, struct tp_tcp_stats *v6, bool sleep_ok); void t4_get_fcoe_stats(struct adapter *adap, unsigned int idx, struct tp_fcoe_stats *st, bool sleep_ok); void t4_load_mtus(struct adapter *adap, const unsigned short *mtus, const unsigned short *alpha, const unsigned short *beta); void t4_ulprx_read_la(struct adapter *adap, u32 *la_buf); int t4_set_sched_bps(struct adapter *adap, int sched, unsigned int kbps); int t4_set_sched_ipg(struct adapter *adap, int sched, unsigned int ipg); int t4_set_pace_tbl(struct adapter *adap, const unsigned int *pace_vals, unsigned int start, unsigned int n); void t4_get_chan_txrate(struct adapter *adap, u64 *nic_rate, u64 *ofld_rate); int t4_set_filter_mode(struct adapter *adap, unsigned int mode_map, bool sleep_ok); void t4_mk_filtdelwr(unsigned int ftid, struct fw_filter_wr *wr, int qid); void t4_wol_magic_enable(struct adapter *adap, unsigned int port, const u8 *addr); int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map, u64 mask0, u64 mask1, unsigned int crc, bool enable); int t4_fw_hello(struct adapter *adap, unsigned int mbox, unsigned int evt_mbox, enum dev_master master, enum dev_state *state); int t4_fw_bye(struct adapter *adap, unsigned int mbox); int t4_fw_reset(struct adapter *adap, unsigned int mbox, int reset); int t4_fw_halt(struct adapter *adap, unsigned int mbox, int force); int t4_fw_restart(struct adapter *adap, unsigned int mbox, int reset); int t4_fw_upgrade(struct adapter *adap, unsigned int mbox, const u8 *fw_data, unsigned int size, int force); int t4_fw_forceinstall(struct adapter *adap, const u8 *fw_data, unsigned int size); int t4_fw_initialize(struct adapter *adap, unsigned int mbox); int t4_query_params(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int nparams, const u32 *params, u32 *val); int t4_query_params_rw(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int nparams, const u32 *params, u32 *val, int rw); int t4_set_params_timeout(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int nparams, const u32 *params, const u32 *val, int timeout); int t4_set_params(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int nparams, const u32 *params, const u32 *val); int t4_cfg_pfvf(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int txq, unsigned int txq_eth_ctrl, unsigned int rxqi, unsigned int rxq, unsigned int tc, unsigned int vi, unsigned int cmask, unsigned int pmask, unsigned int exactf, unsigned int rcaps, unsigned int wxcaps); int t4_alloc_vi_func(struct adapter *adap, unsigned int mbox, unsigned int port, unsigned int pf, unsigned int vf, unsigned int nmac, u8 *mac, u16 *rss_size, unsigned int portfunc, unsigned int idstype); int t4_alloc_vi(struct adapter *adap, unsigned int mbox, unsigned int port, unsigned int pf, unsigned int vf, unsigned int nmac, u8 *mac, u16 *rss_size); int t4_free_vi(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int viid); int t4_set_rxmode(struct adapter *adap, unsigned int mbox, unsigned int viid, int mtu, int promisc, int all_multi, int bcast, int vlanex, bool sleep_ok); int t4_alloc_mac_filt(struct adapter *adap, unsigned int mbox, unsigned int viid, bool free, unsigned int naddr, const u8 **addr, u16 *idx, u64 *hash, bool sleep_ok); int t4_change_mac(struct adapter *adap, unsigned int mbox, unsigned int viid, int idx, const u8 *addr, bool persist, bool add_smt); int t4_set_addr_hash(struct adapter *adap, unsigned int mbox, unsigned int viid, bool ucast, u64 vec, bool sleep_ok); int t4_enable_vi_params(struct adapter *adap, unsigned int mbox, unsigned int viid, bool rx_en, bool tx_en, bool dcb_en); int t4_enable_vi(struct adapter *adap, unsigned int mbox, unsigned int viid, bool rx_en, bool tx_en); int t4_identify_port(struct adapter *adap, unsigned int mbox, unsigned int viid, unsigned int nblinks); int t4_mdio_rd(struct adapter *adap, unsigned int mbox, unsigned int phy_addr, unsigned int mmd, unsigned int reg, unsigned int *valp); int t4_mdio_wr(struct adapter *adap, unsigned int mbox, unsigned int phy_addr, unsigned int mmd, unsigned int reg, unsigned int val); int t4_i2c_rd(struct adapter *adap, unsigned int mbox, int port, unsigned int devid, unsigned int offset, unsigned int len, u8 *buf); int t4_i2c_wr(struct adapter *adap, unsigned int mbox, int port, unsigned int devid, unsigned int offset, unsigned int len, u8 *buf); int t4_iq_stop(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int iqtype, unsigned int iqid, unsigned int fl0id, unsigned int fl1id); int t4_iq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int iqtype, unsigned int iqid, unsigned int fl0id, unsigned int fl1id); int t4_eth_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int eqid); int t4_ctrl_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int eqid); int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int eqid); int t4_sge_ctxt_rd(struct adapter *adap, unsigned int mbox, unsigned int cid, enum ctxt_type ctype, u32 *data); int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid, enum ctxt_type ctype, u32 *data); int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox); const char *t4_link_down_rc_str(unsigned char link_down_rc); int t4_update_port_info(struct port_info *pi); int t4_handle_fw_rpl(struct adapter *adap, const __be64 *rpl); int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, u32 addr, u32 val); int t4_sched_config(struct adapter *adapter, int type, int minmaxen, int sleep_ok); int t4_sched_params(struct adapter *adapter, int type, int level, int mode, int rateunit, int ratemode, int channel, int cl, int minrate, int maxrate, int weight, int pktsize, int sleep_ok); int t4_sched_params_ch_rl(struct adapter *adapter, int channel, int ratemode, unsigned int maxrate, int sleep_ok); int t4_sched_params_cl_wrr(struct adapter *adapter, int channel, int cl, int weight, int sleep_ok); int t4_sched_params_cl_rl_kbps(struct adapter *adapter, int channel, int cl, int mode, unsigned int maxrate, int pktsize, int sleep_ok); int t4_config_watchdog(struct adapter *adapter, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int timeout, unsigned int action); int t4_get_devlog_level(struct adapter *adapter, unsigned int *level); int t4_set_devlog_level(struct adapter *adapter, unsigned int level); void t4_sge_decode_idma_state(struct adapter *adapter, int state); void t4_tp_pio_read(struct adapter *adap, u32 *buff, u32 nregs, u32 start_index, bool sleep_ok); void t4_tp_pio_write(struct adapter *adap, const u32 *buff, u32 nregs, u32 start_index, bool sleep_ok); void t4_tp_tm_pio_read(struct adapter *adap, u32 *buff, u32 nregs, u32 start_index, bool sleep_ok); void t4_tp_mib_read(struct adapter *adap, u32 *buff, u32 nregs, u32 start_index, bool sleep_ok); static inline int t4vf_query_params(struct adapter *adapter, unsigned int nparams, const u32 *params, u32 *vals) { return t4_query_params(adapter, 0, 0, 0, nparams, params, vals); } static inline int t4vf_set_params(struct adapter *adapter, unsigned int nparams, const u32 *params, const u32 *vals) { return t4_set_params(adapter, 0, 0, 0, nparams, params, vals); } static inline int t4vf_wr_mbox(struct adapter *adap, const void *cmd, int size, void *rpl) { return t4_wr_mbox(adap, adap->mbox, cmd, size, rpl); } int t4vf_wait_dev_ready(struct adapter *adapter); int t4vf_fw_reset(struct adapter *adapter); int t4vf_get_sge_params(struct adapter *adapter); int t4vf_get_rss_glb_config(struct adapter *adapter); int t4vf_get_vfres(struct adapter *adapter); int t4vf_prep_adapter(struct adapter *adapter); int t4_bar2_sge_qregs(struct adapter *adapter, unsigned int qid, enum t4_bar2_qtype qtype, int user, u64 *pbar2_qoffset, unsigned int *pbar2_qid); #endif /* __CHELSIO_COMMON_H */ Index: head/sys/dev/cxgbe/common/t4_msg.h =================================================================== --- head/sys/dev/cxgbe/common/t4_msg.h (revision 333393) +++ head/sys/dev/cxgbe/common/t4_msg.h (revision 333394) @@ -1,3661 +1,3701 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011, 2016 Chelsio Communications, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef T4_MSG_H #define T4_MSG_H enum { CPL_PASS_OPEN_REQ = 0x1, CPL_PASS_ACCEPT_RPL = 0x2, CPL_ACT_OPEN_REQ = 0x3, CPL_SET_TCB = 0x4, CPL_SET_TCB_FIELD = 0x5, CPL_GET_TCB = 0x6, CPL_CLOSE_CON_REQ = 0x8, CPL_CLOSE_LISTSRV_REQ = 0x9, CPL_ABORT_REQ = 0xA, CPL_ABORT_RPL = 0xB, CPL_TX_DATA = 0xC, CPL_RX_DATA_ACK = 0xD, CPL_TX_PKT = 0xE, CPL_RTE_DELETE_REQ = 0xF, CPL_RTE_WRITE_REQ = 0x10, CPL_RTE_READ_REQ = 0x11, CPL_L2T_WRITE_REQ = 0x12, CPL_L2T_READ_REQ = 0x13, CPL_SMT_WRITE_REQ = 0x14, CPL_SMT_READ_REQ = 0x15, CPL_TAG_WRITE_REQ = 0x16, CPL_BARRIER = 0x18, CPL_TID_RELEASE = 0x1A, CPL_TAG_READ_REQ = 0x1B, CPL_SRQ_TABLE_REQ = 0x1C, CPL_TX_PKT_FSO = 0x1E, CPL_TX_DATA_ISO = 0x1F, CPL_CLOSE_LISTSRV_RPL = 0x20, CPL_ERROR = 0x21, CPL_GET_TCB_RPL = 0x22, CPL_L2T_WRITE_RPL = 0x23, CPL_PASS_OPEN_RPL = 0x24, CPL_ACT_OPEN_RPL = 0x25, CPL_PEER_CLOSE = 0x26, CPL_RTE_DELETE_RPL = 0x27, CPL_RTE_WRITE_RPL = 0x28, CPL_RX_URG_PKT = 0x29, CPL_TAG_WRITE_RPL = 0x2A, CPL_ABORT_REQ_RSS = 0x2B, CPL_RX_URG_NOTIFY = 0x2C, CPL_ABORT_RPL_RSS = 0x2D, CPL_SMT_WRITE_RPL = 0x2E, CPL_TX_DATA_ACK = 0x2F, CPL_RX_PHYS_ADDR = 0x30, CPL_PCMD_READ_RPL = 0x31, CPL_CLOSE_CON_RPL = 0x32, CPL_ISCSI_HDR = 0x33, CPL_L2T_READ_RPL = 0x34, CPL_RDMA_CQE = 0x35, CPL_RDMA_CQE_READ_RSP = 0x36, CPL_RDMA_CQE_ERR = 0x37, CPL_RTE_READ_RPL = 0x38, CPL_RX_DATA = 0x39, CPL_SET_TCB_RPL = 0x3A, CPL_RX_PKT = 0x3B, CPL_TAG_READ_RPL = 0x3C, CPL_HIT_NOTIFY = 0x3D, CPL_PKT_NOTIFY = 0x3E, CPL_RX_DDP_COMPLETE = 0x3F, CPL_ACT_ESTABLISH = 0x40, CPL_PASS_ESTABLISH = 0x41, CPL_RX_DATA_DDP = 0x42, CPL_SMT_READ_RPL = 0x43, CPL_PASS_ACCEPT_REQ = 0x44, CPL_RX_ISCSI_CMP = 0x45, CPL_RX_FCOE_DDP = 0x46, CPL_FCOE_HDR = 0x47, CPL_T5_TRACE_PKT = 0x48, CPL_RX_ISCSI_DDP = 0x49, CPL_RX_FCOE_DIF = 0x4A, CPL_RX_DATA_DIF = 0x4B, CPL_ERR_NOTIFY = 0x4D, CPL_RX_TLS_CMP = 0x4E, CPL_RDMA_READ_REQ = 0x60, CPL_RX_ISCSI_DIF = 0x60, CPL_SET_LE_REQ = 0x80, CPL_PASS_OPEN_REQ6 = 0x81, CPL_ACT_OPEN_REQ6 = 0x83, CPL_TX_TLS_PDU = 0x88, CPL_TX_TLS_SFO = 0x89, CPL_TX_SEC_PDU = 0x8A, CPL_TX_TLS_ACK = 0x8B, CPL_RDMA_TERMINATE = 0xA2, CPL_RDMA_WRITE = 0xA4, CPL_SGE_EGR_UPDATE = 0xA5, CPL_SET_LE_RPL = 0xA6, CPL_FW2_MSG = 0xA7, CPL_FW2_PLD = 0xA8, CPL_T5_RDMA_READ_REQ = 0xA9, CPL_RDMA_ATOMIC_REQ = 0xAA, CPL_RDMA_ATOMIC_RPL = 0xAB, CPL_RDMA_IMM_DATA = 0xAC, CPL_RDMA_IMM_DATA_SE = 0xAD, CPL_RX_MPS_PKT = 0xAF, CPL_TRACE_PKT = 0xB0, CPL_RX2TX_DATA = 0xB1, CPL_TLS_DATA = 0xB1, CPL_ISCSI_DATA = 0xB2, CPL_FCOE_DATA = 0xB3, CPL_FW4_MSG = 0xC0, CPL_FW4_PLD = 0xC1, CPL_FW4_ACK = 0xC3, CPL_SRQ_TABLE_RPL = 0xCC, CPL_RX_PHYS_DSGL = 0xD0, CPL_FW6_MSG = 0xE0, CPL_FW6_PLD = 0xE1, CPL_TX_TNL_LSO = 0xEC, CPL_TX_PKT_LSO = 0xED, CPL_TX_PKT_XT = 0xEE, NUM_CPL_CMDS /* must be last and previous entries must be sorted */ }; enum CPL_error { CPL_ERR_NONE = 0, CPL_ERR_TCAM_PARITY = 1, CPL_ERR_TCAM_MISS = 2, CPL_ERR_TCAM_FULL = 3, CPL_ERR_BAD_LENGTH = 15, CPL_ERR_BAD_ROUTE = 18, CPL_ERR_CONN_RESET = 20, CPL_ERR_CONN_EXIST_SYNRECV = 21, CPL_ERR_CONN_EXIST = 22, CPL_ERR_ARP_MISS = 23, CPL_ERR_BAD_SYN = 24, CPL_ERR_CONN_TIMEDOUT = 30, CPL_ERR_XMIT_TIMEDOUT = 31, CPL_ERR_PERSIST_TIMEDOUT = 32, CPL_ERR_FINWAIT2_TIMEDOUT = 33, CPL_ERR_KEEPALIVE_TIMEDOUT = 34, CPL_ERR_RTX_NEG_ADVICE = 35, CPL_ERR_PERSIST_NEG_ADVICE = 36, CPL_ERR_KEEPALV_NEG_ADVICE = 37, CPL_ERR_WAIT_ARP_RPL = 41, CPL_ERR_ABORT_FAILED = 42, CPL_ERR_IWARP_FLM = 50, CPL_CONTAINS_READ_RPL = 60, CPL_CONTAINS_WRITE_RPL = 61, }; /* * Some of the error codes above implicitly indicate that there is no TID * allocated with the result of an ACT_OPEN. We use this predicate to make * that explicit. */ static inline int act_open_has_tid(int status) { return (status != CPL_ERR_TCAM_PARITY && status != CPL_ERR_TCAM_MISS && status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST_SYNRECV && status != CPL_ERR_CONN_EXIST); } +/* + * Convert an ACT_OPEN_RPL status to an errno. + */ +static inline int +act_open_rpl_status_to_errno(int status) +{ + + switch (status) { + case CPL_ERR_CONN_RESET: + return (ECONNREFUSED); + case CPL_ERR_ARP_MISS: + return (EHOSTUNREACH); + case CPL_ERR_CONN_TIMEDOUT: + return (ETIMEDOUT); + case CPL_ERR_TCAM_FULL: + return (EAGAIN); + case CPL_ERR_CONN_EXIST: + return (EAGAIN); + default: + return (EIO); + } +} + + enum { CPL_CONN_POLICY_AUTO = 0, CPL_CONN_POLICY_ASK = 1, CPL_CONN_POLICY_FILTER = 2, CPL_CONN_POLICY_DENY = 3 }; enum { ULP_MODE_NONE = 0, ULP_MODE_ISCSI = 2, ULP_MODE_RDMA = 4, ULP_MODE_TCPDDP = 5, ULP_MODE_FCOE = 6, ULP_MODE_TLS = 8, }; enum { ULP_CRC_HEADER = 1 << 0, ULP_CRC_DATA = 1 << 1 }; enum { CPL_PASS_OPEN_ACCEPT, CPL_PASS_OPEN_REJECT, CPL_PASS_OPEN_ACCEPT_TNL }; enum { CPL_ABORT_SEND_RST = 0, CPL_ABORT_NO_RST, }; enum { /* TX_PKT_XT checksum types */ TX_CSUM_TCP = 0, TX_CSUM_UDP = 1, TX_CSUM_CRC16 = 4, TX_CSUM_CRC32 = 5, TX_CSUM_CRC32C = 6, TX_CSUM_FCOE = 7, TX_CSUM_TCPIP = 8, TX_CSUM_UDPIP = 9, TX_CSUM_TCPIP6 = 10, TX_CSUM_UDPIP6 = 11, TX_CSUM_IP = 12, }; enum { /* packet type in CPL_RX_PKT */ PKTYPE_XACT_UCAST = 0, PKTYPE_HASH_UCAST = 1, PKTYPE_XACT_MCAST = 2, PKTYPE_HASH_MCAST = 3, PKTYPE_PROMISC = 4, PKTYPE_HPROMISC = 5, PKTYPE_BCAST = 6 }; enum { /* DMAC type in CPL_RX_PKT */ DATYPE_UCAST, DATYPE_MCAST, DATYPE_BCAST }; enum { /* TCP congestion control algorithms */ CONG_ALG_RENO, CONG_ALG_TAHOE, CONG_ALG_NEWRENO, CONG_ALG_HIGHSPEED }; enum { /* RSS hash type */ RSS_HASH_NONE = 0, /* no hash computed */ RSS_HASH_IP = 1, /* IP or IPv6 2-tuple hash */ RSS_HASH_TCP = 2, /* TCP 4-tuple hash */ RSS_HASH_UDP = 3 /* UDP 4-tuple hash */ }; enum { /* LE commands */ LE_CMD_READ = 0x4, LE_CMD_WRITE = 0xb }; enum { /* LE request size */ LE_SZ_NONE = 0, LE_SZ_33 = 1, LE_SZ_66 = 2, LE_SZ_132 = 3, LE_SZ_264 = 4, LE_SZ_528 = 5 }; union opcode_tid { __be32 opcode_tid; __u8 opcode; }; #define S_CPL_OPCODE 24 #define V_CPL_OPCODE(x) ((x) << S_CPL_OPCODE) #define G_CPL_OPCODE(x) (((x) >> S_CPL_OPCODE) & 0xFF) #define G_TID(x) ((x) & 0xFFFFFF) /* tid is assumed to be 24-bits */ #define MK_OPCODE_TID(opcode, tid) (V_CPL_OPCODE(opcode) | (tid)) #define OPCODE_TID(cmd) ((cmd)->ot.opcode_tid) /* extract the TID from a CPL command */ #define GET_TID(cmd) (G_TID(ntohl(OPCODE_TID(cmd)))) #define GET_OPCODE(cmd) ((cmd)->ot.opcode) /* partitioning of TID fields that also carry a queue id */ #define S_TID_TID 0 #define M_TID_TID 0x7ff #define V_TID_TID(x) ((x) << S_TID_TID) #define G_TID_TID(x) (((x) >> S_TID_TID) & M_TID_TID) #define S_TID_COOKIE 11 #define M_TID_COOKIE 0x7 #define V_TID_COOKIE(x) ((x) << S_TID_COOKIE) #define G_TID_COOKIE(x) (((x) >> S_TID_COOKIE) & M_TID_COOKIE) #define S_TID_QID 14 #define M_TID_QID 0x3ff #define V_TID_QID(x) ((x) << S_TID_QID) #define G_TID_QID(x) (((x) >> S_TID_QID) & M_TID_QID) union opcode_info { __be64 opcode_info; __u8 opcode; }; struct tcp_options { __be16 mss; __u8 wsf; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 :4; __u8 unknown:1; __u8 ecn:1; __u8 sack:1; __u8 tstamp:1; #else __u8 tstamp:1; __u8 sack:1; __u8 ecn:1; __u8 unknown:1; __u8 :4; #endif }; struct rss_header { __u8 opcode; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 channel:2; __u8 filter_hit:1; __u8 filter_tid:1; __u8 hash_type:2; __u8 ipv6:1; __u8 send2fw:1; #else __u8 send2fw:1; __u8 ipv6:1; __u8 hash_type:2; __u8 filter_tid:1; __u8 filter_hit:1; __u8 channel:2; #endif __be16 qid; __be32 hash_val; }; #define S_HASHTYPE 20 #define M_HASHTYPE 0x3 #define G_HASHTYPE(x) (((x) >> S_HASHTYPE) & M_HASHTYPE) #define S_QNUM 0 #define M_QNUM 0xFFFF #define G_QNUM(x) (((x) >> S_QNUM) & M_QNUM) #if defined(RSS_HDR_VLD) || defined(CHELSIO_FW) # define RSS_HDR struct rss_header rss_hdr; #else # define RSS_HDR #endif #ifndef CHELSIO_FW struct work_request_hdr { __be32 wr_hi; __be32 wr_mid; __be64 wr_lo; }; /* wr_mid fields */ #define S_WR_LEN16 0 #define M_WR_LEN16 0xFF #define V_WR_LEN16(x) ((x) << S_WR_LEN16) #define G_WR_LEN16(x) (((x) >> S_WR_LEN16) & M_WR_LEN16) /* wr_hi fields */ #define S_WR_OP 24 #define M_WR_OP 0xFF #define V_WR_OP(x) ((__u64)(x) << S_WR_OP) #define G_WR_OP(x) (((x) >> S_WR_OP) & M_WR_OP) # define WR_HDR struct work_request_hdr wr # define WR_HDR_SIZE sizeof(struct work_request_hdr) #else # define WR_HDR # define WR_HDR_SIZE 0 #endif /* option 0 fields */ #define S_ACCEPT_MODE 0 #define M_ACCEPT_MODE 0x3 #define V_ACCEPT_MODE(x) ((x) << S_ACCEPT_MODE) #define G_ACCEPT_MODE(x) (((x) >> S_ACCEPT_MODE) & M_ACCEPT_MODE) #define S_TX_CHAN 2 #define M_TX_CHAN 0x3 #define V_TX_CHAN(x) ((x) << S_TX_CHAN) #define G_TX_CHAN(x) (((x) >> S_TX_CHAN) & M_TX_CHAN) #define S_NO_CONG 4 #define V_NO_CONG(x) ((x) << S_NO_CONG) #define F_NO_CONG V_NO_CONG(1U) #define S_DELACK 5 #define V_DELACK(x) ((x) << S_DELACK) #define F_DELACK V_DELACK(1U) #define S_INJECT_TIMER 6 #define V_INJECT_TIMER(x) ((x) << S_INJECT_TIMER) #define F_INJECT_TIMER V_INJECT_TIMER(1U) #define S_NON_OFFLOAD 7 #define V_NON_OFFLOAD(x) ((x) << S_NON_OFFLOAD) #define F_NON_OFFLOAD V_NON_OFFLOAD(1U) #define S_ULP_MODE 8 #define M_ULP_MODE 0xF #define V_ULP_MODE(x) ((x) << S_ULP_MODE) #define G_ULP_MODE(x) (((x) >> S_ULP_MODE) & M_ULP_MODE) #define S_RCV_BUFSIZ 12 #define M_RCV_BUFSIZ 0x3FFU #define V_RCV_BUFSIZ(x) ((x) << S_RCV_BUFSIZ) #define G_RCV_BUFSIZ(x) (((x) >> S_RCV_BUFSIZ) & M_RCV_BUFSIZ) #define S_DSCP 22 #define M_DSCP 0x3F #define V_DSCP(x) ((x) << S_DSCP) #define G_DSCP(x) (((x) >> S_DSCP) & M_DSCP) #define S_SMAC_SEL 28 #define M_SMAC_SEL 0xFF #define V_SMAC_SEL(x) ((__u64)(x) << S_SMAC_SEL) #define G_SMAC_SEL(x) (((x) >> S_SMAC_SEL) & M_SMAC_SEL) #define S_L2T_IDX 36 #define M_L2T_IDX 0xFFF #define V_L2T_IDX(x) ((__u64)(x) << S_L2T_IDX) #define G_L2T_IDX(x) (((x) >> S_L2T_IDX) & M_L2T_IDX) #define S_TCAM_BYPASS 48 #define V_TCAM_BYPASS(x) ((__u64)(x) << S_TCAM_BYPASS) #define F_TCAM_BYPASS V_TCAM_BYPASS(1ULL) #define S_NAGLE 49 #define V_NAGLE(x) ((__u64)(x) << S_NAGLE) #define F_NAGLE V_NAGLE(1ULL) #define S_WND_SCALE 50 #define M_WND_SCALE 0xF #define V_WND_SCALE(x) ((__u64)(x) << S_WND_SCALE) #define G_WND_SCALE(x) (((x) >> S_WND_SCALE) & M_WND_SCALE) #define S_KEEP_ALIVE 54 #define V_KEEP_ALIVE(x) ((__u64)(x) << S_KEEP_ALIVE) #define F_KEEP_ALIVE V_KEEP_ALIVE(1ULL) #define S_MAX_RT 55 #define M_MAX_RT 0xF #define V_MAX_RT(x) ((__u64)(x) << S_MAX_RT) #define G_MAX_RT(x) (((x) >> S_MAX_RT) & M_MAX_RT) #define S_MAX_RT_OVERRIDE 59 #define V_MAX_RT_OVERRIDE(x) ((__u64)(x) << S_MAX_RT_OVERRIDE) #define F_MAX_RT_OVERRIDE V_MAX_RT_OVERRIDE(1ULL) #define S_MSS_IDX 60 #define M_MSS_IDX 0xF #define V_MSS_IDX(x) ((__u64)(x) << S_MSS_IDX) #define G_MSS_IDX(x) (((x) >> S_MSS_IDX) & M_MSS_IDX) /* option 1 fields */ #define S_SYN_RSS_ENABLE 0 #define V_SYN_RSS_ENABLE(x) ((x) << S_SYN_RSS_ENABLE) #define F_SYN_RSS_ENABLE V_SYN_RSS_ENABLE(1U) #define S_SYN_RSS_USE_HASH 1 #define V_SYN_RSS_USE_HASH(x) ((x) << S_SYN_RSS_USE_HASH) #define F_SYN_RSS_USE_HASH V_SYN_RSS_USE_HASH(1U) #define S_SYN_RSS_QUEUE 2 #define M_SYN_RSS_QUEUE 0x3FF #define V_SYN_RSS_QUEUE(x) ((x) << S_SYN_RSS_QUEUE) #define G_SYN_RSS_QUEUE(x) (((x) >> S_SYN_RSS_QUEUE) & M_SYN_RSS_QUEUE) #define S_LISTEN_INTF 12 #define M_LISTEN_INTF 0xFF #define V_LISTEN_INTF(x) ((x) << S_LISTEN_INTF) #define G_LISTEN_INTF(x) (((x) >> S_LISTEN_INTF) & M_LISTEN_INTF) #define S_LISTEN_FILTER 20 #define V_LISTEN_FILTER(x) ((x) << S_LISTEN_FILTER) #define F_LISTEN_FILTER V_LISTEN_FILTER(1U) #define S_SYN_DEFENSE 21 #define V_SYN_DEFENSE(x) ((x) << S_SYN_DEFENSE) #define F_SYN_DEFENSE V_SYN_DEFENSE(1U) #define S_CONN_POLICY 22 #define M_CONN_POLICY 0x3 #define V_CONN_POLICY(x) ((x) << S_CONN_POLICY) #define G_CONN_POLICY(x) (((x) >> S_CONN_POLICY) & M_CONN_POLICY) #define S_T5_FILT_INFO 24 #define M_T5_FILT_INFO 0xffffffffffULL #define V_T5_FILT_INFO(x) ((x) << S_T5_FILT_INFO) #define G_T5_FILT_INFO(x) (((x) >> S_T5_FILT_INFO) & M_T5_FILT_INFO) #define S_FILT_INFO 28 #define M_FILT_INFO 0xfffffffffULL #define V_FILT_INFO(x) ((x) << S_FILT_INFO) #define G_FILT_INFO(x) (((x) >> S_FILT_INFO) & M_FILT_INFO) /* option 2 fields */ #define S_RSS_QUEUE 0 #define M_RSS_QUEUE 0x3FF #define V_RSS_QUEUE(x) ((x) << S_RSS_QUEUE) #define G_RSS_QUEUE(x) (((x) >> S_RSS_QUEUE) & M_RSS_QUEUE) #define S_RSS_QUEUE_VALID 10 #define V_RSS_QUEUE_VALID(x) ((x) << S_RSS_QUEUE_VALID) #define F_RSS_QUEUE_VALID V_RSS_QUEUE_VALID(1U) #define S_RX_COALESCE_VALID 11 #define V_RX_COALESCE_VALID(x) ((x) << S_RX_COALESCE_VALID) #define F_RX_COALESCE_VALID V_RX_COALESCE_VALID(1U) #define S_RX_COALESCE 12 #define M_RX_COALESCE 0x3 #define V_RX_COALESCE(x) ((x) << S_RX_COALESCE) #define G_RX_COALESCE(x) (((x) >> S_RX_COALESCE) & M_RX_COALESCE) #define S_CONG_CNTRL 14 #define M_CONG_CNTRL 0x3 #define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL) #define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL) #define S_PACE 16 #define M_PACE 0x3 #define V_PACE(x) ((x) << S_PACE) #define G_PACE(x) (((x) >> S_PACE) & M_PACE) #define S_CONG_CNTRL_VALID 18 #define V_CONG_CNTRL_VALID(x) ((x) << S_CONG_CNTRL_VALID) #define F_CONG_CNTRL_VALID V_CONG_CNTRL_VALID(1U) #define S_T5_ISS 18 #define V_T5_ISS(x) ((x) << S_T5_ISS) #define F_T5_ISS V_T5_ISS(1U) #define S_PACE_VALID 19 #define V_PACE_VALID(x) ((x) << S_PACE_VALID) #define F_PACE_VALID V_PACE_VALID(1U) #define S_RX_FC_DISABLE 20 #define V_RX_FC_DISABLE(x) ((x) << S_RX_FC_DISABLE) #define F_RX_FC_DISABLE V_RX_FC_DISABLE(1U) #define S_RX_FC_DDP 21 #define V_RX_FC_DDP(x) ((x) << S_RX_FC_DDP) #define F_RX_FC_DDP V_RX_FC_DDP(1U) #define S_RX_FC_VALID 22 #define V_RX_FC_VALID(x) ((x) << S_RX_FC_VALID) #define F_RX_FC_VALID V_RX_FC_VALID(1U) #define S_TX_QUEUE 23 #define M_TX_QUEUE 0x7 #define V_TX_QUEUE(x) ((x) << S_TX_QUEUE) #define G_TX_QUEUE(x) (((x) >> S_TX_QUEUE) & M_TX_QUEUE) #define S_RX_CHANNEL 26 #define V_RX_CHANNEL(x) ((x) << S_RX_CHANNEL) #define F_RX_CHANNEL V_RX_CHANNEL(1U) #define S_CCTRL_ECN 27 #define V_CCTRL_ECN(x) ((x) << S_CCTRL_ECN) #define F_CCTRL_ECN V_CCTRL_ECN(1U) #define S_WND_SCALE_EN 28 #define V_WND_SCALE_EN(x) ((x) << S_WND_SCALE_EN) #define F_WND_SCALE_EN V_WND_SCALE_EN(1U) #define S_TSTAMPS_EN 29 #define V_TSTAMPS_EN(x) ((x) << S_TSTAMPS_EN) #define F_TSTAMPS_EN V_TSTAMPS_EN(1U) #define S_SACK_EN 30 #define V_SACK_EN(x) ((x) << S_SACK_EN) #define F_SACK_EN V_SACK_EN(1U) #define S_T5_OPT_2_VALID 31 #define V_T5_OPT_2_VALID(x) ((x) << S_T5_OPT_2_VALID) #define F_T5_OPT_2_VALID V_T5_OPT_2_VALID(1U) struct cpl_pass_open_req { WR_HDR; union opcode_tid ot; __be16 local_port; __be16 peer_port; __be32 local_ip; __be32 peer_ip; __be64 opt0; __be64 opt1; }; struct cpl_pass_open_req6 { WR_HDR; union opcode_tid ot; __be16 local_port; __be16 peer_port; __be64 local_ip_hi; __be64 local_ip_lo; __be64 peer_ip_hi; __be64 peer_ip_lo; __be64 opt0; __be64 opt1; }; struct cpl_pass_open_rpl { RSS_HDR union opcode_tid ot; __u8 rsvd[3]; __u8 status; }; struct cpl_pass_establish { RSS_HDR union opcode_tid ot; __be32 rsvd; __be32 tos_stid; __be16 mac_idx; __be16 tcp_opt; __be32 snd_isn; __be32 rcv_isn; }; /* cpl_pass_establish.tos_stid fields */ #define S_PASS_OPEN_TID 0 #define M_PASS_OPEN_TID 0xFFFFFF #define V_PASS_OPEN_TID(x) ((x) << S_PASS_OPEN_TID) #define G_PASS_OPEN_TID(x) (((x) >> S_PASS_OPEN_TID) & M_PASS_OPEN_TID) #define S_PASS_OPEN_TOS 24 #define M_PASS_OPEN_TOS 0xFF #define V_PASS_OPEN_TOS(x) ((x) << S_PASS_OPEN_TOS) #define G_PASS_OPEN_TOS(x) (((x) >> S_PASS_OPEN_TOS) & M_PASS_OPEN_TOS) /* cpl_pass_establish.tcp_opt fields (also applies to act_open_establish) */ #define S_TCPOPT_WSCALE_OK 5 #define M_TCPOPT_WSCALE_OK 0x1 #define V_TCPOPT_WSCALE_OK(x) ((x) << S_TCPOPT_WSCALE_OK) #define G_TCPOPT_WSCALE_OK(x) (((x) >> S_TCPOPT_WSCALE_OK) & M_TCPOPT_WSCALE_OK) #define S_TCPOPT_SACK 6 #define M_TCPOPT_SACK 0x1 #define V_TCPOPT_SACK(x) ((x) << S_TCPOPT_SACK) #define G_TCPOPT_SACK(x) (((x) >> S_TCPOPT_SACK) & M_TCPOPT_SACK) #define S_TCPOPT_TSTAMP 7 #define M_TCPOPT_TSTAMP 0x1 #define V_TCPOPT_TSTAMP(x) ((x) << S_TCPOPT_TSTAMP) #define G_TCPOPT_TSTAMP(x) (((x) >> S_TCPOPT_TSTAMP) & M_TCPOPT_TSTAMP) #define S_TCPOPT_SND_WSCALE 8 #define M_TCPOPT_SND_WSCALE 0xF #define V_TCPOPT_SND_WSCALE(x) ((x) << S_TCPOPT_SND_WSCALE) #define G_TCPOPT_SND_WSCALE(x) (((x) >> S_TCPOPT_SND_WSCALE) & M_TCPOPT_SND_WSCALE) #define S_TCPOPT_MSS 12 #define M_TCPOPT_MSS 0xF #define V_TCPOPT_MSS(x) ((x) << S_TCPOPT_MSS) #define G_TCPOPT_MSS(x) (((x) >> S_TCPOPT_MSS) & M_TCPOPT_MSS) struct cpl_pass_accept_req { RSS_HDR union opcode_tid ot; __be16 rsvd; __be16 len; __be32 hdr_len; __be16 vlan; __be16 l2info; __be32 tos_stid; struct tcp_options tcpopt; }; /* cpl_pass_accept_req.hdr_len fields */ #define S_SYN_RX_CHAN 0 #define M_SYN_RX_CHAN 0xF #define V_SYN_RX_CHAN(x) ((x) << S_SYN_RX_CHAN) #define G_SYN_RX_CHAN(x) (((x) >> S_SYN_RX_CHAN) & M_SYN_RX_CHAN) #define S_TCP_HDR_LEN 10 #define M_TCP_HDR_LEN 0x3F #define V_TCP_HDR_LEN(x) ((x) << S_TCP_HDR_LEN) #define G_TCP_HDR_LEN(x) (((x) >> S_TCP_HDR_LEN) & M_TCP_HDR_LEN) #define S_T6_TCP_HDR_LEN 8 #define V_T6_TCP_HDR_LEN(x) ((x) << S_T6_TCP_HDR_LEN) #define G_T6_TCP_HDR_LEN(x) (((x) >> S_T6_TCP_HDR_LEN) & M_TCP_HDR_LEN) #define S_IP_HDR_LEN 16 #define M_IP_HDR_LEN 0x3FF #define V_IP_HDR_LEN(x) ((x) << S_IP_HDR_LEN) #define G_IP_HDR_LEN(x) (((x) >> S_IP_HDR_LEN) & M_IP_HDR_LEN) #define S_T6_IP_HDR_LEN 14 #define V_T6_IP_HDR_LEN(x) ((x) << S_T6_IP_HDR_LEN) #define G_T6_IP_HDR_LEN(x) (((x) >> S_T6_IP_HDR_LEN) & M_IP_HDR_LEN) #define S_ETH_HDR_LEN 26 #define M_ETH_HDR_LEN 0x3F #define V_ETH_HDR_LEN(x) ((x) << S_ETH_HDR_LEN) #define G_ETH_HDR_LEN(x) (((x) >> S_ETH_HDR_LEN) & M_ETH_HDR_LEN) #define S_T6_ETH_HDR_LEN 24 #define M_T6_ETH_HDR_LEN 0xFF #define V_T6_ETH_HDR_LEN(x) ((x) << S_T6_ETH_HDR_LEN) #define G_T6_ETH_HDR_LEN(x) (((x) >> S_T6_ETH_HDR_LEN) & M_T6_ETH_HDR_LEN) /* cpl_pass_accept_req.l2info fields */ #define S_SYN_MAC_IDX 0 #define M_SYN_MAC_IDX 0x1FF #define V_SYN_MAC_IDX(x) ((x) << S_SYN_MAC_IDX) #define G_SYN_MAC_IDX(x) (((x) >> S_SYN_MAC_IDX) & M_SYN_MAC_IDX) #define S_SYN_XACT_MATCH 9 #define V_SYN_XACT_MATCH(x) ((x) << S_SYN_XACT_MATCH) #define F_SYN_XACT_MATCH V_SYN_XACT_MATCH(1U) #define S_SYN_INTF 12 #define M_SYN_INTF 0xF #define V_SYN_INTF(x) ((x) << S_SYN_INTF) #define G_SYN_INTF(x) (((x) >> S_SYN_INTF) & M_SYN_INTF) struct cpl_pass_accept_rpl { WR_HDR; union opcode_tid ot; __be32 opt2; __be64 opt0; }; struct cpl_t5_pass_accept_rpl { WR_HDR; union opcode_tid ot; __be32 opt2; __be64 opt0; __be32 iss; union { __be32 rsvd; /* T5 */ __be32 opt3; /* T6 */ } u; }; struct cpl_act_open_req { WR_HDR; union opcode_tid ot; __be16 local_port; __be16 peer_port; __be32 local_ip; __be32 peer_ip; __be64 opt0; __be32 params; __be32 opt2; }; #define S_FILTER_TUPLE 24 #define M_FILTER_TUPLE 0xFFFFFFFFFF #define V_FILTER_TUPLE(x) ((x) << S_FILTER_TUPLE) #define G_FILTER_TUPLE(x) (((x) >> S_FILTER_TUPLE) & M_FILTER_TUPLE) struct cpl_t5_act_open_req { WR_HDR; union opcode_tid ot; __be16 local_port; __be16 peer_port; __be32 local_ip; __be32 peer_ip; __be64 opt0; __be32 iss; __be32 opt2; __be64 params; }; struct cpl_t6_act_open_req { WR_HDR; union opcode_tid ot; __be16 local_port; __be16 peer_port; __be32 local_ip; __be32 peer_ip; __be64 opt0; __be32 iss; __be32 opt2; __be64 params; __be32 rsvd2; __be32 opt3; }; /* cpl_{t5,t6}_act_open_req.params field */ #define S_AOPEN_FCOEMASK 0 #define V_AOPEN_FCOEMASK(x) ((x) << S_AOPEN_FCOEMASK) #define F_AOPEN_FCOEMASK V_AOPEN_FCOEMASK(1U) struct cpl_act_open_req6 { WR_HDR; union opcode_tid ot; __be16 local_port; __be16 peer_port; __be64 local_ip_hi; __be64 local_ip_lo; __be64 peer_ip_hi; __be64 peer_ip_lo; __be64 opt0; __be32 params; __be32 opt2; }; struct cpl_t5_act_open_req6 { WR_HDR; union opcode_tid ot; __be16 local_port; __be16 peer_port; __be64 local_ip_hi; __be64 local_ip_lo; __be64 peer_ip_hi; __be64 peer_ip_lo; __be64 opt0; __be32 iss; __be32 opt2; __be64 params; }; struct cpl_t6_act_open_req6 { WR_HDR; union opcode_tid ot; __be16 local_port; __be16 peer_port; __be64 local_ip_hi; __be64 local_ip_lo; __be64 peer_ip_hi; __be64 peer_ip_lo; __be64 opt0; __be32 iss; __be32 opt2; __be64 params; __be32 rsvd2; __be32 opt3; }; struct cpl_act_open_rpl { RSS_HDR union opcode_tid ot; __be32 atid_status; }; /* cpl_act_open_rpl.atid_status fields */ #define S_AOPEN_STATUS 0 #define M_AOPEN_STATUS 0xFF #define V_AOPEN_STATUS(x) ((x) << S_AOPEN_STATUS) #define G_AOPEN_STATUS(x) (((x) >> S_AOPEN_STATUS) & M_AOPEN_STATUS) #define S_AOPEN_ATID 8 #define M_AOPEN_ATID 0xFFFFFF #define V_AOPEN_ATID(x) ((x) << S_AOPEN_ATID) #define G_AOPEN_ATID(x) (((x) >> S_AOPEN_ATID) & M_AOPEN_ATID) struct cpl_act_establish { RSS_HDR union opcode_tid ot; __be32 rsvd; __be32 tos_atid; __be16 mac_idx; __be16 tcp_opt; __be32 snd_isn; __be32 rcv_isn; }; struct cpl_get_tcb { WR_HDR; union opcode_tid ot; __be16 reply_ctrl; __be16 cookie; }; /* cpl_get_tcb.reply_ctrl fields */ #define S_QUEUENO 0 #define M_QUEUENO 0x3FF #define V_QUEUENO(x) ((x) << S_QUEUENO) #define G_QUEUENO(x) (((x) >> S_QUEUENO) & M_QUEUENO) #define S_REPLY_CHAN 14 #define V_REPLY_CHAN(x) ((x) << S_REPLY_CHAN) #define F_REPLY_CHAN V_REPLY_CHAN(1U) #define S_NO_REPLY 15 #define V_NO_REPLY(x) ((x) << S_NO_REPLY) #define F_NO_REPLY V_NO_REPLY(1U) struct cpl_get_tcb_rpl { RSS_HDR union opcode_tid ot; __u8 cookie; __u8 status; __be16 len; }; struct cpl_set_tcb { WR_HDR; union opcode_tid ot; __be16 reply_ctrl; __be16 cookie; }; struct cpl_set_tcb_field { WR_HDR; union opcode_tid ot; __be16 reply_ctrl; __be16 word_cookie; __be64 mask; __be64 val; }; struct cpl_set_tcb_field_core { union opcode_tid ot; __be16 reply_ctrl; __be16 word_cookie; __be64 mask; __be64 val; }; /* cpl_set_tcb_field.word_cookie fields */ #define S_WORD 0 #define M_WORD 0x1F #define V_WORD(x) ((x) << S_WORD) #define G_WORD(x) (((x) >> S_WORD) & M_WORD) #define S_COOKIE 5 #define M_COOKIE 0x7 #define V_COOKIE(x) ((x) << S_COOKIE) #define G_COOKIE(x) (((x) >> S_COOKIE) & M_COOKIE) struct cpl_set_tcb_rpl { RSS_HDR union opcode_tid ot; __be16 rsvd; __u8 cookie; __u8 status; __be64 oldval; }; struct cpl_close_con_req { WR_HDR; union opcode_tid ot; __be32 rsvd; }; struct cpl_close_con_rpl { RSS_HDR union opcode_tid ot; __u8 rsvd[3]; __u8 status; __be32 snd_nxt; __be32 rcv_nxt; }; struct cpl_close_listsvr_req { WR_HDR; union opcode_tid ot; __be16 reply_ctrl; __be16 rsvd; }; /* additional cpl_close_listsvr_req.reply_ctrl field */ #define S_LISTSVR_IPV6 14 #define V_LISTSVR_IPV6(x) ((x) << S_LISTSVR_IPV6) #define F_LISTSVR_IPV6 V_LISTSVR_IPV6(1U) struct cpl_close_listsvr_rpl { RSS_HDR union opcode_tid ot; __u8 rsvd[3]; __u8 status; }; struct cpl_abort_req_rss { RSS_HDR union opcode_tid ot; __u8 rsvd[3]; __u8 status; }; struct cpl_abort_req_rss6 { RSS_HDR union opcode_tid ot; __u32 srqidx_status; }; #define S_ABORT_RSS_STATUS 0 #define M_ABORT_RSS_STATUS 0xff #define V_ABORT_RSS_STATUS(x) ((x) << S_ABORT_RSS_STATUS) #define G_ABORT_RSS_STATUS(x) (((x) >> S_ABORT_RSS_STATUS) & M_ABORT_RSS_STATUS) #define S_ABORT_RSS_SRQIDX 8 #define M_ABORT_RSS_SRQIDX 0xffffff #define V_ABORT_RSS_SRQIDX(x) ((x) << S_ABORT_RSS_SRQIDX) #define G_ABORT_RSS_SRQIDX(x) (((x) >> S_ABORT_RSS_SRQIDX) & M_ABORT_RSS_SRQIDX) /* cpl_abort_req status command code in case of T6, * bit[0] specifies whether to send RST (0) to remote peer or suppress it (1) * bit[1] indicates ABORT_REQ was sent after a CLOSE_CON_REQ * bit[2] specifies whether to disable the mmgr (1) or not (0) */ struct cpl_abort_req { WR_HDR; union opcode_tid ot; __be32 rsvd0; __u8 rsvd1; __u8 cmd; __u8 rsvd2[6]; }; +struct cpl_abort_req_core { + union opcode_tid ot; + __be32 rsvd0; + __u8 rsvd1; + __u8 cmd; + __u8 rsvd2[6]; +}; + struct cpl_abort_rpl_rss { RSS_HDR union opcode_tid ot; __u8 rsvd[3]; __u8 status; }; struct cpl_abort_rpl_rss6 { RSS_HDR union opcode_tid ot; __u32 srqidx_status; }; struct cpl_abort_rpl { WR_HDR; + union opcode_tid ot; + __be32 rsvd0; + __u8 rsvd1; + __u8 cmd; + __u8 rsvd2[6]; +}; + +struct cpl_abort_rpl_core { union opcode_tid ot; __be32 rsvd0; __u8 rsvd1; __u8 cmd; __u8 rsvd2[6]; }; struct cpl_peer_close { RSS_HDR union opcode_tid ot; __be32 rcv_nxt; }; struct cpl_tid_release { WR_HDR; union opcode_tid ot; __be32 rsvd; }; struct tx_data_wr { __be32 wr_hi; __be32 wr_lo; __be32 len; __be32 flags; __be32 sndseq; __be32 param; }; /* tx_data_wr.flags fields */ #define S_TX_ACK_PAGES 21 #define M_TX_ACK_PAGES 0x7 #define V_TX_ACK_PAGES(x) ((x) << S_TX_ACK_PAGES) #define G_TX_ACK_PAGES(x) (((x) >> S_TX_ACK_PAGES) & M_TX_ACK_PAGES) /* tx_data_wr.param fields */ #define S_TX_PORT 0 #define M_TX_PORT 0x7 #define V_TX_PORT(x) ((x) << S_TX_PORT) #define G_TX_PORT(x) (((x) >> S_TX_PORT) & M_TX_PORT) #define S_TX_MSS 4 #define M_TX_MSS 0xF #define V_TX_MSS(x) ((x) << S_TX_MSS) #define G_TX_MSS(x) (((x) >> S_TX_MSS) & M_TX_MSS) #define S_TX_QOS 8 #define M_TX_QOS 0xFF #define V_TX_QOS(x) ((x) << S_TX_QOS) #define G_TX_QOS(x) (((x) >> S_TX_QOS) & M_TX_QOS) #define S_TX_SNDBUF 16 #define M_TX_SNDBUF 0xFFFF #define V_TX_SNDBUF(x) ((x) << S_TX_SNDBUF) #define G_TX_SNDBUF(x) (((x) >> S_TX_SNDBUF) & M_TX_SNDBUF) struct cpl_tx_data { union opcode_tid ot; __be32 len; __be32 rsvd; __be32 flags; }; /* cpl_tx_data.flags fields */ #define S_TX_PROXY 5 #define V_TX_PROXY(x) ((x) << S_TX_PROXY) #define F_TX_PROXY V_TX_PROXY(1U) #define S_TX_ULP_SUBMODE 6 #define M_TX_ULP_SUBMODE 0xF #define V_TX_ULP_SUBMODE(x) ((x) << S_TX_ULP_SUBMODE) #define G_TX_ULP_SUBMODE(x) (((x) >> S_TX_ULP_SUBMODE) & M_TX_ULP_SUBMODE) #define S_TX_ULP_MODE 10 #define M_TX_ULP_MODE 0x7 #define V_TX_ULP_MODE(x) ((x) << S_TX_ULP_MODE) #define G_TX_ULP_MODE(x) (((x) >> S_TX_ULP_MODE) & M_TX_ULP_MODE) #define S_TX_FORCE 13 #define V_TX_FORCE(x) ((x) << S_TX_FORCE) #define F_TX_FORCE V_TX_FORCE(1U) #define S_TX_SHOVE 14 #define V_TX_SHOVE(x) ((x) << S_TX_SHOVE) #define F_TX_SHOVE V_TX_SHOVE(1U) #define S_TX_MORE 15 #define V_TX_MORE(x) ((x) << S_TX_MORE) #define F_TX_MORE V_TX_MORE(1U) #define S_TX_URG 16 #define V_TX_URG(x) ((x) << S_TX_URG) #define F_TX_URG V_TX_URG(1U) #define S_TX_FLUSH 17 #define V_TX_FLUSH(x) ((x) << S_TX_FLUSH) #define F_TX_FLUSH V_TX_FLUSH(1U) #define S_TX_SAVE 18 #define V_TX_SAVE(x) ((x) << S_TX_SAVE) #define F_TX_SAVE V_TX_SAVE(1U) #define S_TX_TNL 19 #define V_TX_TNL(x) ((x) << S_TX_TNL) #define F_TX_TNL V_TX_TNL(1U) #define S_T6_TX_FORCE 20 #define V_T6_TX_FORCE(x) ((x) << S_T6_TX_FORCE) #define F_T6_TX_FORCE V_T6_TX_FORCE(1U) /* additional tx_data_wr.flags fields */ #define S_TX_CPU_IDX 0 #define M_TX_CPU_IDX 0x3F #define V_TX_CPU_IDX(x) ((x) << S_TX_CPU_IDX) #define G_TX_CPU_IDX(x) (((x) >> S_TX_CPU_IDX) & M_TX_CPU_IDX) #define S_TX_CLOSE 17 #define V_TX_CLOSE(x) ((x) << S_TX_CLOSE) #define F_TX_CLOSE V_TX_CLOSE(1U) #define S_TX_INIT 18 #define V_TX_INIT(x) ((x) << S_TX_INIT) #define F_TX_INIT V_TX_INIT(1U) #define S_TX_IMM_ACK 19 #define V_TX_IMM_ACK(x) ((x) << S_TX_IMM_ACK) #define F_TX_IMM_ACK V_TX_IMM_ACK(1U) #define S_TX_IMM_DMA 20 #define V_TX_IMM_DMA(x) ((x) << S_TX_IMM_DMA) #define F_TX_IMM_DMA V_TX_IMM_DMA(1U) struct cpl_tx_data_ack { RSS_HDR union opcode_tid ot; __be32 snd_una; }; struct cpl_wr_ack { /* XXX */ RSS_HDR union opcode_tid ot; __be16 credits; __be16 rsvd; __be32 snd_nxt; __be32 snd_una; }; struct cpl_tx_pkt_core { __be32 ctrl0; __be16 pack; __be16 len; __be64 ctrl1; }; struct cpl_tx_pkt { WR_HDR; struct cpl_tx_pkt_core c; }; #define cpl_tx_pkt_xt cpl_tx_pkt /* cpl_tx_pkt_core.ctrl0 fields */ #define S_TXPKT_VF 0 #define M_TXPKT_VF 0xFF #define V_TXPKT_VF(x) ((x) << S_TXPKT_VF) #define G_TXPKT_VF(x) (((x) >> S_TXPKT_VF) & M_TXPKT_VF) #define S_TXPKT_PF 8 #define M_TXPKT_PF 0x7 #define V_TXPKT_PF(x) ((x) << S_TXPKT_PF) #define G_TXPKT_PF(x) (((x) >> S_TXPKT_PF) & M_TXPKT_PF) #define S_TXPKT_VF_VLD 11 #define V_TXPKT_VF_VLD(x) ((x) << S_TXPKT_VF_VLD) #define F_TXPKT_VF_VLD V_TXPKT_VF_VLD(1U) #define S_TXPKT_OVLAN_IDX 12 #define M_TXPKT_OVLAN_IDX 0xF #define V_TXPKT_OVLAN_IDX(x) ((x) << S_TXPKT_OVLAN_IDX) #define G_TXPKT_OVLAN_IDX(x) (((x) >> S_TXPKT_OVLAN_IDX) & M_TXPKT_OVLAN_IDX) #define S_TXPKT_T5_OVLAN_IDX 12 #define M_TXPKT_T5_OVLAN_IDX 0x7 #define V_TXPKT_T5_OVLAN_IDX(x) ((x) << S_TXPKT_T5_OVLAN_IDX) #define G_TXPKT_T5_OVLAN_IDX(x) (((x) >> S_TXPKT_T5_OVLAN_IDX) & \ M_TXPKT_T5_OVLAN_IDX) #define S_TXPKT_INTF 16 #define M_TXPKT_INTF 0xF #define V_TXPKT_INTF(x) ((x) << S_TXPKT_INTF) #define G_TXPKT_INTF(x) (((x) >> S_TXPKT_INTF) & M_TXPKT_INTF) #define S_TXPKT_SPECIAL_STAT 20 #define V_TXPKT_SPECIAL_STAT(x) ((x) << S_TXPKT_SPECIAL_STAT) #define F_TXPKT_SPECIAL_STAT V_TXPKT_SPECIAL_STAT(1U) #define S_TXPKT_T5_FCS_DIS 21 #define V_TXPKT_T5_FCS_DIS(x) ((x) << S_TXPKT_T5_FCS_DIS) #define F_TXPKT_T5_FCS_DIS V_TXPKT_T5_FCS_DIS(1U) #define S_TXPKT_INS_OVLAN 21 #define V_TXPKT_INS_OVLAN(x) ((x) << S_TXPKT_INS_OVLAN) #define F_TXPKT_INS_OVLAN V_TXPKT_INS_OVLAN(1U) #define S_TXPKT_T5_INS_OVLAN 15 #define V_TXPKT_T5_INS_OVLAN(x) ((x) << S_TXPKT_T5_INS_OVLAN) #define F_TXPKT_T5_INS_OVLAN V_TXPKT_T5_INS_OVLAN(1U) #define S_TXPKT_STAT_DIS 22 #define V_TXPKT_STAT_DIS(x) ((x) << S_TXPKT_STAT_DIS) #define F_TXPKT_STAT_DIS V_TXPKT_STAT_DIS(1U) #define S_TXPKT_LOOPBACK 23 #define V_TXPKT_LOOPBACK(x) ((x) << S_TXPKT_LOOPBACK) #define F_TXPKT_LOOPBACK V_TXPKT_LOOPBACK(1U) #define S_TXPKT_TSTAMP 23 #define V_TXPKT_TSTAMP(x) ((x) << S_TXPKT_TSTAMP) #define F_TXPKT_TSTAMP V_TXPKT_TSTAMP(1U) #define S_TXPKT_OPCODE 24 #define M_TXPKT_OPCODE 0xFF #define V_TXPKT_OPCODE(x) ((x) << S_TXPKT_OPCODE) #define G_TXPKT_OPCODE(x) (((x) >> S_TXPKT_OPCODE) & M_TXPKT_OPCODE) /* cpl_tx_pkt_core.ctrl1 fields */ #define S_TXPKT_SA_IDX 0 #define M_TXPKT_SA_IDX 0xFFF #define V_TXPKT_SA_IDX(x) ((x) << S_TXPKT_SA_IDX) #define G_TXPKT_SA_IDX(x) (((x) >> S_TXPKT_SA_IDX) & M_TXPKT_SA_IDX) #define S_TXPKT_CSUM_END 12 #define M_TXPKT_CSUM_END 0xFF #define V_TXPKT_CSUM_END(x) ((x) << S_TXPKT_CSUM_END) #define G_TXPKT_CSUM_END(x) (((x) >> S_TXPKT_CSUM_END) & M_TXPKT_CSUM_END) #define S_TXPKT_CSUM_START 20 #define M_TXPKT_CSUM_START 0x3FF #define V_TXPKT_CSUM_START(x) ((x) << S_TXPKT_CSUM_START) #define G_TXPKT_CSUM_START(x) (((x) >> S_TXPKT_CSUM_START) & M_TXPKT_CSUM_START) #define S_TXPKT_IPHDR_LEN 20 #define M_TXPKT_IPHDR_LEN 0x3FFF #define V_TXPKT_IPHDR_LEN(x) ((__u64)(x) << S_TXPKT_IPHDR_LEN) #define G_TXPKT_IPHDR_LEN(x) (((x) >> S_TXPKT_IPHDR_LEN) & M_TXPKT_IPHDR_LEN) #define M_T6_TXPKT_IPHDR_LEN 0xFFF #define G_T6_TXPKT_IPHDR_LEN(x) \ (((x) >> S_TXPKT_IPHDR_LEN) & M_T6_TXPKT_IPHDR_LEN) #define S_TXPKT_CSUM_LOC 30 #define M_TXPKT_CSUM_LOC 0x3FF #define V_TXPKT_CSUM_LOC(x) ((__u64)(x) << S_TXPKT_CSUM_LOC) #define G_TXPKT_CSUM_LOC(x) (((x) >> S_TXPKT_CSUM_LOC) & M_TXPKT_CSUM_LOC) #define S_TXPKT_ETHHDR_LEN 34 #define M_TXPKT_ETHHDR_LEN 0x3F #define V_TXPKT_ETHHDR_LEN(x) ((__u64)(x) << S_TXPKT_ETHHDR_LEN) #define G_TXPKT_ETHHDR_LEN(x) (((x) >> S_TXPKT_ETHHDR_LEN) & M_TXPKT_ETHHDR_LEN) #define S_T6_TXPKT_ETHHDR_LEN 32 #define M_T6_TXPKT_ETHHDR_LEN 0xFF #define V_T6_TXPKT_ETHHDR_LEN(x) ((__u64)(x) << S_T6_TXPKT_ETHHDR_LEN) #define G_T6_TXPKT_ETHHDR_LEN(x) \ (((x) >> S_T6_TXPKT_ETHHDR_LEN) & M_T6_TXPKT_ETHHDR_LEN) #define S_TXPKT_CSUM_TYPE 40 #define M_TXPKT_CSUM_TYPE 0xF #define V_TXPKT_CSUM_TYPE(x) ((__u64)(x) << S_TXPKT_CSUM_TYPE) #define G_TXPKT_CSUM_TYPE(x) (((x) >> S_TXPKT_CSUM_TYPE) & M_TXPKT_CSUM_TYPE) #define S_TXPKT_VLAN 44 #define M_TXPKT_VLAN 0xFFFF #define V_TXPKT_VLAN(x) ((__u64)(x) << S_TXPKT_VLAN) #define G_TXPKT_VLAN(x) (((x) >> S_TXPKT_VLAN) & M_TXPKT_VLAN) #define S_TXPKT_VLAN_VLD 60 #define V_TXPKT_VLAN_VLD(x) ((__u64)(x) << S_TXPKT_VLAN_VLD) #define F_TXPKT_VLAN_VLD V_TXPKT_VLAN_VLD(1ULL) #define S_TXPKT_IPSEC 61 #define V_TXPKT_IPSEC(x) ((__u64)(x) << S_TXPKT_IPSEC) #define F_TXPKT_IPSEC V_TXPKT_IPSEC(1ULL) #define S_TXPKT_IPCSUM_DIS 62 #define V_TXPKT_IPCSUM_DIS(x) ((__u64)(x) << S_TXPKT_IPCSUM_DIS) #define F_TXPKT_IPCSUM_DIS V_TXPKT_IPCSUM_DIS(1ULL) #define S_TXPKT_L4CSUM_DIS 63 #define V_TXPKT_L4CSUM_DIS(x) ((__u64)(x) << S_TXPKT_L4CSUM_DIS) #define F_TXPKT_L4CSUM_DIS V_TXPKT_L4CSUM_DIS(1ULL) struct cpl_tx_pkt_lso_core { __be32 lso_ctrl; __be16 ipid_ofst; __be16 mss; __be32 seqno_offset; __be32 len; /* encapsulated CPL (TX_PKT, TX_PKT_XT or TX_DATA) follows here */ }; struct cpl_tx_pkt_lso { WR_HDR; struct cpl_tx_pkt_lso_core c; /* encapsulated CPL (TX_PKT, TX_PKT_XT or TX_DATA) follows here */ }; struct cpl_tx_pkt_ufo_core { __be16 ethlen; __be16 iplen; __be16 udplen; __be16 mss; __be32 len; __be32 r1; /* encapsulated CPL (TX_PKT, TX_PKT_XT or TX_DATA) follows here */ }; struct cpl_tx_pkt_ufo { WR_HDR; struct cpl_tx_pkt_ufo_core c; /* encapsulated CPL (TX_PKT, TX_PKT_XT or TX_DATA) follows here */ }; /* cpl_tx_pkt_lso_core.lso_ctrl fields */ #define S_LSO_TCPHDR_LEN 0 #define M_LSO_TCPHDR_LEN 0xF #define V_LSO_TCPHDR_LEN(x) ((x) << S_LSO_TCPHDR_LEN) #define G_LSO_TCPHDR_LEN(x) (((x) >> S_LSO_TCPHDR_LEN) & M_LSO_TCPHDR_LEN) #define S_LSO_IPHDR_LEN 4 #define M_LSO_IPHDR_LEN 0xFFF #define V_LSO_IPHDR_LEN(x) ((x) << S_LSO_IPHDR_LEN) #define G_LSO_IPHDR_LEN(x) (((x) >> S_LSO_IPHDR_LEN) & M_LSO_IPHDR_LEN) #define S_LSO_ETHHDR_LEN 16 #define M_LSO_ETHHDR_LEN 0xF #define V_LSO_ETHHDR_LEN(x) ((x) << S_LSO_ETHHDR_LEN) #define G_LSO_ETHHDR_LEN(x) (((x) >> S_LSO_ETHHDR_LEN) & M_LSO_ETHHDR_LEN) #define S_LSO_IPV6 20 #define V_LSO_IPV6(x) ((x) << S_LSO_IPV6) #define F_LSO_IPV6 V_LSO_IPV6(1U) #define S_LSO_OFLD_ENCAP 21 #define V_LSO_OFLD_ENCAP(x) ((x) << S_LSO_OFLD_ENCAP) #define F_LSO_OFLD_ENCAP V_LSO_OFLD_ENCAP(1U) #define S_LSO_LAST_SLICE 22 #define V_LSO_LAST_SLICE(x) ((x) << S_LSO_LAST_SLICE) #define F_LSO_LAST_SLICE V_LSO_LAST_SLICE(1U) #define S_LSO_FIRST_SLICE 23 #define V_LSO_FIRST_SLICE(x) ((x) << S_LSO_FIRST_SLICE) #define F_LSO_FIRST_SLICE V_LSO_FIRST_SLICE(1U) #define S_LSO_OPCODE 24 #define M_LSO_OPCODE 0xFF #define V_LSO_OPCODE(x) ((x) << S_LSO_OPCODE) #define G_LSO_OPCODE(x) (((x) >> S_LSO_OPCODE) & M_LSO_OPCODE) #define S_LSO_T5_XFER_SIZE 0 #define M_LSO_T5_XFER_SIZE 0xFFFFFFF #define V_LSO_T5_XFER_SIZE(x) ((x) << S_LSO_T5_XFER_SIZE) #define G_LSO_T5_XFER_SIZE(x) (((x) >> S_LSO_T5_XFER_SIZE) & M_LSO_T5_XFER_SIZE) /* cpl_tx_pkt_lso_core.mss fields */ #define S_LSO_MSS 0 #define M_LSO_MSS 0x3FFF #define V_LSO_MSS(x) ((x) << S_LSO_MSS) #define G_LSO_MSS(x) (((x) >> S_LSO_MSS) & M_LSO_MSS) #define S_LSO_IPID_SPLIT 15 #define V_LSO_IPID_SPLIT(x) ((x) << S_LSO_IPID_SPLIT) #define F_LSO_IPID_SPLIT V_LSO_IPID_SPLIT(1U) struct cpl_tx_pkt_fso { WR_HDR; __be32 fso_ctrl; __be16 seqcnt_ofst; __be16 mtu; __be32 param_offset; __be32 len; /* encapsulated CPL (TX_PKT or TX_PKT_XT) follows here */ }; /* cpl_tx_pkt_fso.fso_ctrl fields different from cpl_tx_pkt_lso.lso_ctrl */ #define S_FSO_XCHG_CLASS 21 #define V_FSO_XCHG_CLASS(x) ((x) << S_FSO_XCHG_CLASS) #define F_FSO_XCHG_CLASS V_FSO_XCHG_CLASS(1U) #define S_FSO_INITIATOR 20 #define V_FSO_INITIATOR(x) ((x) << S_FSO_INITIATOR) #define F_FSO_INITIATOR V_FSO_INITIATOR(1U) #define S_FSO_FCHDR_LEN 12 #define M_FSO_FCHDR_LEN 0xF #define V_FSO_FCHDR_LEN(x) ((x) << S_FSO_FCHDR_LEN) #define G_FSO_FCHDR_LEN(x) (((x) >> S_FSO_FCHDR_LEN) & M_FSO_FCHDR_LEN) struct cpl_iscsi_hdr_no_rss { union opcode_tid ot; __be16 pdu_len_ddp; __be16 len; __be32 seq; __be16 urg; __u8 rsvd; __u8 status; }; struct cpl_tx_data_iso { __be32 op_to_scsi; __u8 reserved1; __u8 ahs_len; __be16 mpdu; __be32 burst_size; __be32 len; __be32 reserved2_seglen_offset; __be32 datasn_offset; __be32 buffer_offset; __be32 reserved3; /* encapsulated CPL_TX_DATA follows here */ }; /* cpl_tx_data_iso.op_to_scsi fields */ #define S_CPL_TX_DATA_ISO_OP 24 #define M_CPL_TX_DATA_ISO_OP 0xff #define V_CPL_TX_DATA_ISO_OP(x) ((x) << S_CPL_TX_DATA_ISO_OP) #define G_CPL_TX_DATA_ISO_OP(x) \ (((x) >> S_CPL_TX_DATA_ISO_OP) & M_CPL_TX_DATA_ISO_OP) #define S_CPL_TX_DATA_ISO_FIRST 23 #define M_CPL_TX_DATA_ISO_FIRST 0x1 #define V_CPL_TX_DATA_ISO_FIRST(x) ((x) << S_CPL_TX_DATA_ISO_FIRST) #define G_CPL_TX_DATA_ISO_FIRST(x) \ (((x) >> S_CPL_TX_DATA_ISO_FIRST) & M_CPL_TX_DATA_ISO_FIRST) #define F_CPL_TX_DATA_ISO_FIRST V_CPL_TX_DATA_ISO_FIRST(1U) #define S_CPL_TX_DATA_ISO_LAST 22 #define M_CPL_TX_DATA_ISO_LAST 0x1 #define V_CPL_TX_DATA_ISO_LAST(x) ((x) << S_CPL_TX_DATA_ISO_LAST) #define G_CPL_TX_DATA_ISO_LAST(x) \ (((x) >> S_CPL_TX_DATA_ISO_LAST) & M_CPL_TX_DATA_ISO_LAST) #define F_CPL_TX_DATA_ISO_LAST V_CPL_TX_DATA_ISO_LAST(1U) #define S_CPL_TX_DATA_ISO_CPLHDRLEN 21 #define M_CPL_TX_DATA_ISO_CPLHDRLEN 0x1 #define V_CPL_TX_DATA_ISO_CPLHDRLEN(x) ((x) << S_CPL_TX_DATA_ISO_CPLHDRLEN) #define G_CPL_TX_DATA_ISO_CPLHDRLEN(x) \ (((x) >> S_CPL_TX_DATA_ISO_CPLHDRLEN) & M_CPL_TX_DATA_ISO_CPLHDRLEN) #define F_CPL_TX_DATA_ISO_CPLHDRLEN V_CPL_TX_DATA_ISO_CPLHDRLEN(1U) #define S_CPL_TX_DATA_ISO_HDRCRC 20 #define M_CPL_TX_DATA_ISO_HDRCRC 0x1 #define V_CPL_TX_DATA_ISO_HDRCRC(x) ((x) << S_CPL_TX_DATA_ISO_HDRCRC) #define G_CPL_TX_DATA_ISO_HDRCRC(x) \ (((x) >> S_CPL_TX_DATA_ISO_HDRCRC) & M_CPL_TX_DATA_ISO_HDRCRC) #define F_CPL_TX_DATA_ISO_HDRCRC V_CPL_TX_DATA_ISO_HDRCRC(1U) #define S_CPL_TX_DATA_ISO_PLDCRC 19 #define M_CPL_TX_DATA_ISO_PLDCRC 0x1 #define V_CPL_TX_DATA_ISO_PLDCRC(x) ((x) << S_CPL_TX_DATA_ISO_PLDCRC) #define G_CPL_TX_DATA_ISO_PLDCRC(x) \ (((x) >> S_CPL_TX_DATA_ISO_PLDCRC) & M_CPL_TX_DATA_ISO_PLDCRC) #define F_CPL_TX_DATA_ISO_PLDCRC V_CPL_TX_DATA_ISO_PLDCRC(1U) #define S_CPL_TX_DATA_ISO_IMMEDIATE 18 #define M_CPL_TX_DATA_ISO_IMMEDIATE 0x1 #define V_CPL_TX_DATA_ISO_IMMEDIATE(x) ((x) << S_CPL_TX_DATA_ISO_IMMEDIATE) #define G_CPL_TX_DATA_ISO_IMMEDIATE(x) \ (((x) >> S_CPL_TX_DATA_ISO_IMMEDIATE) & M_CPL_TX_DATA_ISO_IMMEDIATE) #define F_CPL_TX_DATA_ISO_IMMEDIATE V_CPL_TX_DATA_ISO_IMMEDIATE(1U) #define S_CPL_TX_DATA_ISO_SCSI 16 #define M_CPL_TX_DATA_ISO_SCSI 0x3 #define V_CPL_TX_DATA_ISO_SCSI(x) ((x) << S_CPL_TX_DATA_ISO_SCSI) #define G_CPL_TX_DATA_ISO_SCSI(x) \ (((x) >> S_CPL_TX_DATA_ISO_SCSI) & M_CPL_TX_DATA_ISO_SCSI) /* cpl_tx_data_iso.reserved2_seglen_offset fields */ #define S_CPL_TX_DATA_ISO_SEGLEN_OFFSET 0 #define M_CPL_TX_DATA_ISO_SEGLEN_OFFSET 0xffffff #define V_CPL_TX_DATA_ISO_SEGLEN_OFFSET(x) \ ((x) << S_CPL_TX_DATA_ISO_SEGLEN_OFFSET) #define G_CPL_TX_DATA_ISO_SEGLEN_OFFSET(x) \ (((x) >> S_CPL_TX_DATA_ISO_SEGLEN_OFFSET) & \ M_CPL_TX_DATA_ISO_SEGLEN_OFFSET) struct cpl_iscsi_hdr { RSS_HDR union opcode_tid ot; __be16 pdu_len_ddp; __be16 len; __be32 seq; __be16 urg; __u8 rsvd; __u8 status; }; /* cpl_iscsi_hdr.pdu_len_ddp fields */ #define S_ISCSI_PDU_LEN 0 #define M_ISCSI_PDU_LEN 0x7FFF #define V_ISCSI_PDU_LEN(x) ((x) << S_ISCSI_PDU_LEN) #define G_ISCSI_PDU_LEN(x) (((x) >> S_ISCSI_PDU_LEN) & M_ISCSI_PDU_LEN) #define S_ISCSI_DDP 15 #define V_ISCSI_DDP(x) ((x) << S_ISCSI_DDP) #define F_ISCSI_DDP V_ISCSI_DDP(1U) struct cpl_iscsi_data { RSS_HDR union opcode_tid ot; __u8 rsvd0[2]; __be16 len; __be32 seq; __be16 urg; __u8 rsvd1; __u8 status; }; struct cpl_rx_data { RSS_HDR union opcode_tid ot; __be16 rsvd; __be16 len; __be32 seq; __be16 urg; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 dack_mode:2; __u8 psh:1; __u8 heartbeat:1; __u8 ddp_off:1; __u8 :3; #else __u8 :3; __u8 ddp_off:1; __u8 heartbeat:1; __u8 psh:1; __u8 dack_mode:2; #endif __u8 status; }; struct cpl_fcoe_hdr { RSS_HDR union opcode_tid ot; __be16 oxid; __be16 len; __be32 rctl_fctl; __u8 cs_ctl; __u8 df_ctl; __u8 sof; __u8 eof; __be16 seq_cnt; __u8 seq_id; __u8 type; __be32 param; }; /* cpl_fcoe_hdr.rctl_fctl fields */ #define S_FCOE_FCHDR_RCTL 24 #define M_FCOE_FCHDR_RCTL 0xff #define V_FCOE_FCHDR_RCTL(x) ((x) << S_FCOE_FCHDR_RCTL) #define G_FCOE_FCHDR_RCTL(x) \ (((x) >> S_FCOE_FCHDR_RCTL) & M_FCOE_FCHDR_RCTL) #define S_FCOE_FCHDR_FCTL 0 #define M_FCOE_FCHDR_FCTL 0xffffff #define V_FCOE_FCHDR_FCTL(x) ((x) << S_FCOE_FCHDR_FCTL) #define G_FCOE_FCHDR_FCTL(x) \ (((x) >> S_FCOE_FCHDR_FCTL) & M_FCOE_FCHDR_FCTL) struct cpl_fcoe_data { RSS_HDR union opcode_tid ot; __u8 rsvd0[2]; __be16 len; __be32 seq; __u8 rsvd1[3]; __u8 status; }; struct cpl_rx_urg_notify { RSS_HDR union opcode_tid ot; __be32 seq; }; struct cpl_rx_urg_pkt { RSS_HDR union opcode_tid ot; __be16 rsvd; __be16 len; }; struct cpl_rx_data_ack { WR_HDR; union opcode_tid ot; __be32 credit_dack; }; struct cpl_rx_data_ack_core { union opcode_tid ot; __be32 credit_dack; }; /* cpl_rx_data_ack.ack_seq fields */ #define S_RX_CREDITS 0 #define M_RX_CREDITS 0x3FFFFFF #define V_RX_CREDITS(x) ((x) << S_RX_CREDITS) #define G_RX_CREDITS(x) (((x) >> S_RX_CREDITS) & M_RX_CREDITS) #define S_RX_MODULATE_TX 26 #define V_RX_MODULATE_TX(x) ((x) << S_RX_MODULATE_TX) #define F_RX_MODULATE_TX V_RX_MODULATE_TX(1U) #define S_RX_MODULATE_RX 27 #define V_RX_MODULATE_RX(x) ((x) << S_RX_MODULATE_RX) #define F_RX_MODULATE_RX V_RX_MODULATE_RX(1U) #define S_RX_FORCE_ACK 28 #define V_RX_FORCE_ACK(x) ((x) << S_RX_FORCE_ACK) #define F_RX_FORCE_ACK V_RX_FORCE_ACK(1U) #define S_RX_DACK_MODE 29 #define M_RX_DACK_MODE 0x3 #define V_RX_DACK_MODE(x) ((x) << S_RX_DACK_MODE) #define G_RX_DACK_MODE(x) (((x) >> S_RX_DACK_MODE) & M_RX_DACK_MODE) #define S_RX_DACK_CHANGE 31 #define V_RX_DACK_CHANGE(x) ((x) << S_RX_DACK_CHANGE) #define F_RX_DACK_CHANGE V_RX_DACK_CHANGE(1U) struct cpl_rx_ddp_complete { RSS_HDR union opcode_tid ot; __be32 ddp_report; __be32 rcv_nxt; __be32 rsvd; }; struct cpl_rx_data_ddp { RSS_HDR union opcode_tid ot; __be16 urg; __be16 len; __be32 seq; union { __be32 nxt_seq; __be32 ddp_report; } u; __be32 ulp_crc; __be32 ddpvld; }; #define cpl_rx_iscsi_ddp cpl_rx_data_ddp struct cpl_rx_fcoe_ddp { RSS_HDR union opcode_tid ot; __be16 rsvd; __be16 len; __be32 seq; __be32 ddp_report; __be32 ulp_crc; __be32 ddpvld; }; struct cpl_rx_data_dif { RSS_HDR union opcode_tid ot; __be16 ddp_len; __be16 msg_len; __be32 seq; union { __be32 nxt_seq; __be32 ddp_report; } u; __be32 err_vec; __be32 ddpvld; }; struct cpl_rx_iscsi_dif { RSS_HDR union opcode_tid ot; __be16 ddp_len; __be16 msg_len; __be32 seq; union { __be32 nxt_seq; __be32 ddp_report; } u; __be32 ulp_crc; __be32 ddpvld; __u8 rsvd0[8]; __be32 err_vec; __u8 rsvd1[4]; }; struct cpl_rx_iscsi_cmp { RSS_HDR union opcode_tid ot; __be16 pdu_len_ddp; __be16 len; __be32 seq; __be16 urg; __u8 rsvd; __u8 status; __be32 ulp_crc; __be32 ddpvld; }; struct cpl_rx_fcoe_dif { RSS_HDR union opcode_tid ot; __be16 ddp_len; __be16 msg_len; __be32 seq; __be32 ddp_report; __be32 err_vec; __be32 ddpvld; }; /* cpl_rx_{data,iscsi,fcoe}_{ddp,dif}.ddpvld fields */ #define S_DDP_VALID 15 #define M_DDP_VALID 0x1FFFF #define V_DDP_VALID(x) ((x) << S_DDP_VALID) #define G_DDP_VALID(x) (((x) >> S_DDP_VALID) & M_DDP_VALID) #define S_DDP_PPOD_MISMATCH 15 #define V_DDP_PPOD_MISMATCH(x) ((x) << S_DDP_PPOD_MISMATCH) #define F_DDP_PPOD_MISMATCH V_DDP_PPOD_MISMATCH(1U) #define S_DDP_PDU 16 #define V_DDP_PDU(x) ((x) << S_DDP_PDU) #define F_DDP_PDU V_DDP_PDU(1U) #define S_DDP_LLIMIT_ERR 17 #define V_DDP_LLIMIT_ERR(x) ((x) << S_DDP_LLIMIT_ERR) #define F_DDP_LLIMIT_ERR V_DDP_LLIMIT_ERR(1U) #define S_DDP_PPOD_PARITY_ERR 18 #define V_DDP_PPOD_PARITY_ERR(x) ((x) << S_DDP_PPOD_PARITY_ERR) #define F_DDP_PPOD_PARITY_ERR V_DDP_PPOD_PARITY_ERR(1U) #define S_DDP_PADDING_ERR 19 #define V_DDP_PADDING_ERR(x) ((x) << S_DDP_PADDING_ERR) #define F_DDP_PADDING_ERR V_DDP_PADDING_ERR(1U) #define S_DDP_HDRCRC_ERR 20 #define V_DDP_HDRCRC_ERR(x) ((x) << S_DDP_HDRCRC_ERR) #define F_DDP_HDRCRC_ERR V_DDP_HDRCRC_ERR(1U) #define S_DDP_DATACRC_ERR 21 #define V_DDP_DATACRC_ERR(x) ((x) << S_DDP_DATACRC_ERR) #define F_DDP_DATACRC_ERR V_DDP_DATACRC_ERR(1U) #define S_DDP_INVALID_TAG 22 #define V_DDP_INVALID_TAG(x) ((x) << S_DDP_INVALID_TAG) #define F_DDP_INVALID_TAG V_DDP_INVALID_TAG(1U) #define S_DDP_ULIMIT_ERR 23 #define V_DDP_ULIMIT_ERR(x) ((x) << S_DDP_ULIMIT_ERR) #define F_DDP_ULIMIT_ERR V_DDP_ULIMIT_ERR(1U) #define S_DDP_OFFSET_ERR 24 #define V_DDP_OFFSET_ERR(x) ((x) << S_DDP_OFFSET_ERR) #define F_DDP_OFFSET_ERR V_DDP_OFFSET_ERR(1U) #define S_DDP_COLOR_ERR 25 #define V_DDP_COLOR_ERR(x) ((x) << S_DDP_COLOR_ERR) #define F_DDP_COLOR_ERR V_DDP_COLOR_ERR(1U) #define S_DDP_TID_MISMATCH 26 #define V_DDP_TID_MISMATCH(x) ((x) << S_DDP_TID_MISMATCH) #define F_DDP_TID_MISMATCH V_DDP_TID_MISMATCH(1U) #define S_DDP_INVALID_PPOD 27 #define V_DDP_INVALID_PPOD(x) ((x) << S_DDP_INVALID_PPOD) #define F_DDP_INVALID_PPOD V_DDP_INVALID_PPOD(1U) #define S_DDP_ULP_MODE 28 #define M_DDP_ULP_MODE 0xF #define V_DDP_ULP_MODE(x) ((x) << S_DDP_ULP_MODE) #define G_DDP_ULP_MODE(x) (((x) >> S_DDP_ULP_MODE) & M_DDP_ULP_MODE) /* cpl_rx_{data,iscsi,fcoe}_{ddp,dif}.ddp_report fields */ #define S_DDP_OFFSET 0 #define M_DDP_OFFSET 0xFFFFFF #define V_DDP_OFFSET(x) ((x) << S_DDP_OFFSET) #define G_DDP_OFFSET(x) (((x) >> S_DDP_OFFSET) & M_DDP_OFFSET) #define S_DDP_DACK_MODE 24 #define M_DDP_DACK_MODE 0x3 #define V_DDP_DACK_MODE(x) ((x) << S_DDP_DACK_MODE) #define G_DDP_DACK_MODE(x) (((x) >> S_DDP_DACK_MODE) & M_DDP_DACK_MODE) #define S_DDP_BUF_IDX 26 #define V_DDP_BUF_IDX(x) ((x) << S_DDP_BUF_IDX) #define F_DDP_BUF_IDX V_DDP_BUF_IDX(1U) #define S_DDP_URG 27 #define V_DDP_URG(x) ((x) << S_DDP_URG) #define F_DDP_URG V_DDP_URG(1U) #define S_DDP_PSH 28 #define V_DDP_PSH(x) ((x) << S_DDP_PSH) #define F_DDP_PSH V_DDP_PSH(1U) #define S_DDP_BUF_COMPLETE 29 #define V_DDP_BUF_COMPLETE(x) ((x) << S_DDP_BUF_COMPLETE) #define F_DDP_BUF_COMPLETE V_DDP_BUF_COMPLETE(1U) #define S_DDP_BUF_TIMED_OUT 30 #define V_DDP_BUF_TIMED_OUT(x) ((x) << S_DDP_BUF_TIMED_OUT) #define F_DDP_BUF_TIMED_OUT V_DDP_BUF_TIMED_OUT(1U) #define S_DDP_INV 31 #define V_DDP_INV(x) ((x) << S_DDP_INV) #define F_DDP_INV V_DDP_INV(1U) struct cpl_rx_pkt { RSS_HDR __u8 opcode; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 iff:4; __u8 csum_calc:1; __u8 ipmi_pkt:1; __u8 vlan_ex:1; __u8 ip_frag:1; #else __u8 ip_frag:1; __u8 vlan_ex:1; __u8 ipmi_pkt:1; __u8 csum_calc:1; __u8 iff:4; #endif __be16 csum; __be16 vlan; __be16 len; __be32 l2info; __be16 hdr_len; __be16 err_vec; }; /* rx_pkt.l2info fields */ #define S_RX_ETHHDR_LEN 0 #define M_RX_ETHHDR_LEN 0x1F #define V_RX_ETHHDR_LEN(x) ((x) << S_RX_ETHHDR_LEN) #define G_RX_ETHHDR_LEN(x) (((x) >> S_RX_ETHHDR_LEN) & M_RX_ETHHDR_LEN) #define S_RX_T5_ETHHDR_LEN 0 #define M_RX_T5_ETHHDR_LEN 0x3F #define V_RX_T5_ETHHDR_LEN(x) ((x) << S_RX_T5_ETHHDR_LEN) #define G_RX_T5_ETHHDR_LEN(x) (((x) >> S_RX_T5_ETHHDR_LEN) & M_RX_T5_ETHHDR_LEN) #define M_RX_T6_ETHHDR_LEN 0xFF #define G_RX_T6_ETHHDR_LEN(x) (((x) >> S_RX_ETHHDR_LEN) & M_RX_T6_ETHHDR_LEN) #define S_RX_PKTYPE 5 #define M_RX_PKTYPE 0x7 #define V_RX_PKTYPE(x) ((x) << S_RX_PKTYPE) #define G_RX_PKTYPE(x) (((x) >> S_RX_PKTYPE) & M_RX_PKTYPE) #define S_RX_T5_DATYPE 6 #define M_RX_T5_DATYPE 0x3 #define V_RX_T5_DATYPE(x) ((x) << S_RX_T5_DATYPE) #define G_RX_T5_DATYPE(x) (((x) >> S_RX_T5_DATYPE) & M_RX_T5_DATYPE) #define S_RX_MACIDX 8 #define M_RX_MACIDX 0x1FF #define V_RX_MACIDX(x) ((x) << S_RX_MACIDX) #define G_RX_MACIDX(x) (((x) >> S_RX_MACIDX) & M_RX_MACIDX) #define S_RX_T5_PKTYPE 17 #define M_RX_T5_PKTYPE 0x7 #define V_RX_T5_PKTYPE(x) ((x) << S_RX_T5_PKTYPE) #define G_RX_T5_PKTYPE(x) (((x) >> S_RX_T5_PKTYPE) & M_RX_T5_PKTYPE) #define S_RX_DATYPE 18 #define M_RX_DATYPE 0x3 #define V_RX_DATYPE(x) ((x) << S_RX_DATYPE) #define G_RX_DATYPE(x) (((x) >> S_RX_DATYPE) & M_RX_DATYPE) #define S_RXF_PSH 20 #define V_RXF_PSH(x) ((x) << S_RXF_PSH) #define F_RXF_PSH V_RXF_PSH(1U) #define S_RXF_SYN 21 #define V_RXF_SYN(x) ((x) << S_RXF_SYN) #define F_RXF_SYN V_RXF_SYN(1U) #define S_RXF_UDP 22 #define V_RXF_UDP(x) ((x) << S_RXF_UDP) #define F_RXF_UDP V_RXF_UDP(1U) #define S_RXF_TCP 23 #define V_RXF_TCP(x) ((x) << S_RXF_TCP) #define F_RXF_TCP V_RXF_TCP(1U) #define S_RXF_IP 24 #define V_RXF_IP(x) ((x) << S_RXF_IP) #define F_RXF_IP V_RXF_IP(1U) #define S_RXF_IP6 25 #define V_RXF_IP6(x) ((x) << S_RXF_IP6) #define F_RXF_IP6 V_RXF_IP6(1U) #define S_RXF_SYN_COOKIE 26 #define V_RXF_SYN_COOKIE(x) ((x) << S_RXF_SYN_COOKIE) #define F_RXF_SYN_COOKIE V_RXF_SYN_COOKIE(1U) #define S_RXF_FCOE 26 #define V_RXF_FCOE(x) ((x) << S_RXF_FCOE) #define F_RXF_FCOE V_RXF_FCOE(1U) #define S_RXF_LRO 27 #define V_RXF_LRO(x) ((x) << S_RXF_LRO) #define F_RXF_LRO V_RXF_LRO(1U) #define S_RX_CHAN 28 #define M_RX_CHAN 0xF #define V_RX_CHAN(x) ((x) << S_RX_CHAN) #define G_RX_CHAN(x) (((x) >> S_RX_CHAN) & M_RX_CHAN) /* rx_pkt.hdr_len fields */ #define S_RX_TCPHDR_LEN 0 #define M_RX_TCPHDR_LEN 0x3F #define V_RX_TCPHDR_LEN(x) ((x) << S_RX_TCPHDR_LEN) #define G_RX_TCPHDR_LEN(x) (((x) >> S_RX_TCPHDR_LEN) & M_RX_TCPHDR_LEN) #define S_RX_IPHDR_LEN 6 #define M_RX_IPHDR_LEN 0x3FF #define V_RX_IPHDR_LEN(x) ((x) << S_RX_IPHDR_LEN) #define G_RX_IPHDR_LEN(x) (((x) >> S_RX_IPHDR_LEN) & M_RX_IPHDR_LEN) /* rx_pkt.err_vec fields */ #define S_RXERR_OR 0 #define V_RXERR_OR(x) ((x) << S_RXERR_OR) #define F_RXERR_OR V_RXERR_OR(1U) #define S_RXERR_MAC 1 #define V_RXERR_MAC(x) ((x) << S_RXERR_MAC) #define F_RXERR_MAC V_RXERR_MAC(1U) #define S_RXERR_IPVERS 2 #define V_RXERR_IPVERS(x) ((x) << S_RXERR_IPVERS) #define F_RXERR_IPVERS V_RXERR_IPVERS(1U) #define S_RXERR_FRAG 3 #define V_RXERR_FRAG(x) ((x) << S_RXERR_FRAG) #define F_RXERR_FRAG V_RXERR_FRAG(1U) #define S_RXERR_ATTACK 4 #define V_RXERR_ATTACK(x) ((x) << S_RXERR_ATTACK) #define F_RXERR_ATTACK V_RXERR_ATTACK(1U) #define S_RXERR_ETHHDR_LEN 5 #define V_RXERR_ETHHDR_LEN(x) ((x) << S_RXERR_ETHHDR_LEN) #define F_RXERR_ETHHDR_LEN V_RXERR_ETHHDR_LEN(1U) #define S_RXERR_IPHDR_LEN 6 #define V_RXERR_IPHDR_LEN(x) ((x) << S_RXERR_IPHDR_LEN) #define F_RXERR_IPHDR_LEN V_RXERR_IPHDR_LEN(1U) #define S_RXERR_TCPHDR_LEN 7 #define V_RXERR_TCPHDR_LEN(x) ((x) << S_RXERR_TCPHDR_LEN) #define F_RXERR_TCPHDR_LEN V_RXERR_TCPHDR_LEN(1U) #define S_RXERR_PKT_LEN 8 #define V_RXERR_PKT_LEN(x) ((x) << S_RXERR_PKT_LEN) #define F_RXERR_PKT_LEN V_RXERR_PKT_LEN(1U) #define S_RXERR_TCP_OPT 9 #define V_RXERR_TCP_OPT(x) ((x) << S_RXERR_TCP_OPT) #define F_RXERR_TCP_OPT V_RXERR_TCP_OPT(1U) #define S_RXERR_IPCSUM 12 #define V_RXERR_IPCSUM(x) ((x) << S_RXERR_IPCSUM) #define F_RXERR_IPCSUM V_RXERR_IPCSUM(1U) #define S_RXERR_CSUM 13 #define V_RXERR_CSUM(x) ((x) << S_RXERR_CSUM) #define F_RXERR_CSUM V_RXERR_CSUM(1U) #define S_RXERR_PING 14 #define V_RXERR_PING(x) ((x) << S_RXERR_PING) #define F_RXERR_PING V_RXERR_PING(1U) /* In T6, rx_pkt.err_vec indicates * RxError Error vector (16b) or * Encapsulating header length (8b), * Outer encapsulation type (2b) and * compressed error vector (6b) if CRxPktEnc is * enabled in TP_OUT_CONFIG */ #define S_T6_COMPR_RXERR_VEC 0 #define M_T6_COMPR_RXERR_VEC 0x3F #define V_T6_COMPR_RXERR_VEC(x) ((x) << S_T6_COMPR_RXERR_VEC) #define G_T6_COMPR_RXERR_VEC(x) \ (((x) >> S_T6_COMPR_RXERR_VEC) & M_T6_COMPR_RXERR_VEC) #define S_T6_COMPR_RXERR_MAC 0 #define V_T6_COMPR_RXERR_MAC(x) ((x) << S_T6_COMPR_RXERR_MAC) #define F_T6_COMPR_RXERR_MAC V_T6_COMPR_RXERR_MAC(1U) /* Logical OR of RX_ERROR_PKT_LEN, RX_ERROR_TCP_HDR_LEN * RX_ERROR_IP_HDR_LEN, RX_ERROR_ETH_HDR_LEN */ #define S_T6_COMPR_RXERR_LEN 1 #define V_T6_COMPR_RXERR_LEN(x) ((x) << S_T6_COMPR_RXERR_LEN) #define F_T6_COMPR_RXERR_LEN V_COMPR_T6_RXERR_LEN(1U) #define S_T6_COMPR_RXERR_TCP_OPT 2 #define V_T6_COMPR_RXERR_TCP_OPT(x) ((x) << S_T6_COMPR_RXERR_TCP_OPT) #define F_T6_COMPR_RXERR_TCP_OPT V_T6_COMPR_RXERR_TCP_OPT(1U) #define S_T6_COMPR_RXERR_IPV6_EXT 3 #define V_T6_COMPR_RXERR_IPV6_EXT(x) ((x) << S_T6_COMPR_RXERR_IPV6_EXT) #define F_T6_COMPR_RXERR_IPV6_EXT V_T6_COMPR_RXERR_IPV6_EXT(1U) /* Logical OR of RX_ERROR_CSUM, RX_ERROR_CSIP */ #define S_T6_COMPR_RXERR_SUM 4 #define V_T6_COMPR_RXERR_SUM(x) ((x) << S_T6_COMPR_RXERR_SUM) #define F_T6_COMPR_RXERR_SUM V_T6_COMPR_RXERR_SUM(1U) /* Logical OR of RX_ERROR_FPMA, RX_ERROR_PING_DROP, * RX_ERROR_ATTACK, RX_ERROR_FRAG,RX_ERROR_IPVERSION */ #define S_T6_COMPR_RXERR_MISC 5 #define V_T6_COMPR_RXERR_MISC(x) ((x) << S_T6_COMPR_RXERR_MISC) #define F_T6_COMPR_RXERR_MISC V_T6_COMPR_RXERR_MISC(1U) #define S_T6_RX_TNL_TYPE 6 #define M_T6_RX_TNL_TYPE 0x3 #define V_T6_RX_TNL_TYPE(x) ((x) << S_T6_RX_TNL_TYPE) #define G_T6_RX_TNL_TYPE(x) (((x) >> S_T6_RX_TNL_TYPE) & M_T6_RX_TNL_TYPE) #define RX_PKT_TNL_TYPE_NVGRE 1 #define RX_PKT_TNL_TYPE_VXLAN 2 #define RX_PKT_TNL_TYPE_GENEVE 3 #define S_T6_RX_TNLHDR_LEN 8 #define M_T6_RX_TNLHDR_LEN 0xFF #define V_T6_RX_TNLHDR_LEN(x) ((x) << S_T6_RX_TNLHDR_LEN) #define G_T6_RX_TNLHDR_LEN(x) (((x) >> S_T6_RX_TNLHDR_LEN) & M_T6_RX_TNLHDR_LEN) struct cpl_trace_pkt { RSS_HDR __u8 opcode; __u8 intf; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 runt:4; __u8 filter_hit:4; __u8 :6; __u8 err:1; __u8 trunc:1; #else __u8 filter_hit:4; __u8 runt:4; __u8 trunc:1; __u8 err:1; __u8 :6; #endif __be16 rsvd; __be16 len; __be64 tstamp; }; struct cpl_t5_trace_pkt { RSS_HDR __u8 opcode; __u8 intf; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 runt:4; __u8 filter_hit:4; __u8 :6; __u8 err:1; __u8 trunc:1; #else __u8 filter_hit:4; __u8 runt:4; __u8 trunc:1; __u8 err:1; __u8 :6; #endif __be16 rsvd; __be16 len; __be64 tstamp; __be64 rsvd1; }; struct cpl_rte_delete_req { WR_HDR; union opcode_tid ot; __be32 params; }; /* {cpl_rte_delete_req, cpl_rte_read_req}.params fields */ #define S_RTE_REQ_LUT_IX 8 #define M_RTE_REQ_LUT_IX 0x7FF #define V_RTE_REQ_LUT_IX(x) ((x) << S_RTE_REQ_LUT_IX) #define G_RTE_REQ_LUT_IX(x) (((x) >> S_RTE_REQ_LUT_IX) & M_RTE_REQ_LUT_IX) #define S_RTE_REQ_LUT_BASE 19 #define M_RTE_REQ_LUT_BASE 0x7FF #define V_RTE_REQ_LUT_BASE(x) ((x) << S_RTE_REQ_LUT_BASE) #define G_RTE_REQ_LUT_BASE(x) (((x) >> S_RTE_REQ_LUT_BASE) & M_RTE_REQ_LUT_BASE) #define S_RTE_READ_REQ_SELECT 31 #define V_RTE_READ_REQ_SELECT(x) ((x) << S_RTE_READ_REQ_SELECT) #define F_RTE_READ_REQ_SELECT V_RTE_READ_REQ_SELECT(1U) struct cpl_rte_delete_rpl { RSS_HDR union opcode_tid ot; __u8 status; __u8 rsvd[3]; }; struct cpl_rte_write_req { WR_HDR; union opcode_tid ot; __u32 write_sel; __be32 lut_params; __be32 l2t_idx; __be32 netmask; __be32 faddr; }; /* cpl_rte_write_req.write_sel fields */ #define S_RTE_WR_L2TIDX 31 #define V_RTE_WR_L2TIDX(x) ((x) << S_RTE_WR_L2TIDX) #define F_RTE_WR_L2TIDX V_RTE_WR_L2TIDX(1U) #define S_RTE_WR_FADDR 30 #define V_RTE_WR_FADDR(x) ((x) << S_RTE_WR_FADDR) #define F_RTE_WR_FADDR V_RTE_WR_FADDR(1U) /* cpl_rte_write_req.lut_params fields */ #define S_RTE_WR_LUT_IX 10 #define M_RTE_WR_LUT_IX 0x7FF #define V_RTE_WR_LUT_IX(x) ((x) << S_RTE_WR_LUT_IX) #define G_RTE_WR_LUT_IX(x) (((x) >> S_RTE_WR_LUT_IX) & M_RTE_WR_LUT_IX) #define S_RTE_WR_LUT_BASE 21 #define M_RTE_WR_LUT_BASE 0x7FF #define V_RTE_WR_LUT_BASE(x) ((x) << S_RTE_WR_LUT_BASE) #define G_RTE_WR_LUT_BASE(x) (((x) >> S_RTE_WR_LUT_BASE) & M_RTE_WR_LUT_BASE) struct cpl_rte_write_rpl { RSS_HDR union opcode_tid ot; __u8 status; __u8 rsvd[3]; }; struct cpl_rte_read_req { WR_HDR; union opcode_tid ot; __be32 params; }; struct cpl_rte_read_rpl { RSS_HDR union opcode_tid ot; __u8 status; __u8 rsvd; __be16 l2t_idx; #if defined(__LITTLE_ENDIAN_BITFIELD) __u32 :30; __u32 select:1; #else __u32 select:1; __u32 :30; #endif __be32 addr; }; struct cpl_l2t_write_req { WR_HDR; union opcode_tid ot; __be16 params; __be16 l2t_idx; __be16 vlan; __u8 dst_mac[6]; }; /* cpl_l2t_write_req.params fields */ #define S_L2T_W_INFO 2 #define M_L2T_W_INFO 0x3F #define V_L2T_W_INFO(x) ((x) << S_L2T_W_INFO) #define G_L2T_W_INFO(x) (((x) >> S_L2T_W_INFO) & M_L2T_W_INFO) #define S_L2T_W_PORT 8 #define M_L2T_W_PORT 0x3 #define V_L2T_W_PORT(x) ((x) << S_L2T_W_PORT) #define G_L2T_W_PORT(x) (((x) >> S_L2T_W_PORT) & M_L2T_W_PORT) #define S_L2T_W_LPBK 10 #define V_L2T_W_LPBK(x) ((x) << S_L2T_W_LPBK) #define F_L2T_W_PKBK V_L2T_W_LPBK(1U) #define S_L2T_W_ARPMISS 11 #define V_L2T_W_ARPMISS(x) ((x) << S_L2T_W_ARPMISS) #define F_L2T_W_ARPMISS V_L2T_W_ARPMISS(1U) #define S_L2T_W_NOREPLY 15 #define V_L2T_W_NOREPLY(x) ((x) << S_L2T_W_NOREPLY) #define F_L2T_W_NOREPLY V_L2T_W_NOREPLY(1U) #define CPL_L2T_VLAN_NONE 0xfff struct cpl_l2t_write_rpl { RSS_HDR union opcode_tid ot; __u8 status; __u8 rsvd[3]; }; struct cpl_l2t_read_req { WR_HDR; union opcode_tid ot; __be32 l2t_idx; }; struct cpl_l2t_read_rpl { RSS_HDR union opcode_tid ot; __u8 status; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 :4; __u8 iff:4; #else __u8 iff:4; __u8 :4; #endif __be16 vlan; __be16 info; __u8 dst_mac[6]; }; struct cpl_srq_table_req { WR_HDR; union opcode_tid ot; __u8 status; __u8 rsvd[2]; __u8 idx; __be64 rsvd_pdid; __be32 qlen_qbase; __be16 cur_msn; __be16 max_msn; }; struct cpl_srq_table_rpl { RSS_HDR union opcode_tid ot; __u8 status; __u8 rsvd[2]; __u8 idx; __be64 rsvd_pdid; __be32 qlen_qbase; __be16 cur_msn; __be16 max_msn; }; /* cpl_srq_table_{req,rpl}.params fields */ #define S_SRQT_QLEN 28 #define M_SRQT_QLEN 0xF #define V_SRQT_QLEN(x) ((x) << S_SRQT_QLEN) #define G_SRQT_QLEN(x) (((x) >> S_SRQT_QLEN) & M_SRQT_QLEN) #define S_SRQT_QBASE 0 #define M_SRQT_QBASE 0x3FFFFFF #define V_SRQT_QBASE(x) ((x) << S_SRQT_QBASE) #define G_SRQT_QBASE(x) (((x) >> S_SRQT_QBASE) & M_SRQT_QBASE) #define S_SRQT_PDID 0 #define M_SRQT_PDID 0xFF #define V_SRQT_PDID(x) ((x) << S_SRQT_PDID) #define G_SRQT_PDID(x) (((x) >> S_SRQT_PDID) & M_SRQT_PDID) #define S_SRQT_IDX 0 #define M_SRQT_IDX 0xF #define V_SRQT_IDX(x) ((x) << S_SRQT_IDX) #define G_SRQT_IDX(x) (((x) >> S_SRQT_IDX) & M_SRQT_IDX) struct cpl_smt_write_req { WR_HDR; union opcode_tid ot; __be32 params; __be16 pfvf1; __u8 src_mac1[6]; __be16 pfvf0; __u8 src_mac0[6]; }; struct cpl_t6_smt_write_req { WR_HDR; union opcode_tid ot; __be32 params; __be64 tag; __be16 pfvf0; __u8 src_mac0[6]; __be32 local_ip; __be32 rsvd; }; struct cpl_smt_write_rpl { RSS_HDR union opcode_tid ot; __u8 status; __u8 rsvd[3]; }; struct cpl_smt_read_req { WR_HDR; union opcode_tid ot; __be32 params; }; struct cpl_smt_read_rpl { RSS_HDR union opcode_tid ot; __u8 status; __u8 ovlan_idx; __be16 rsvd; __be16 pfvf1; __u8 src_mac1[6]; __be16 pfvf0; __u8 src_mac0[6]; }; /* cpl_smt_{read,write}_req.params fields */ #define S_SMTW_OVLAN_IDX 16 #define M_SMTW_OVLAN_IDX 0xF #define V_SMTW_OVLAN_IDX(x) ((x) << S_SMTW_OVLAN_IDX) #define G_SMTW_OVLAN_IDX(x) (((x) >> S_SMTW_OVLAN_IDX) & M_SMTW_OVLAN_IDX) #define S_SMTW_IDX 20 #define M_SMTW_IDX 0x7F #define V_SMTW_IDX(x) ((x) << S_SMTW_IDX) #define G_SMTW_IDX(x) (((x) >> S_SMTW_IDX) & M_SMTW_IDX) #define M_T6_SMTW_IDX 0xFF #define G_T6_SMTW_IDX(x) (((x) >> S_SMTW_IDX) & M_T6_SMTW_IDX) #define S_SMTW_NORPL 31 #define V_SMTW_NORPL(x) ((x) << S_SMTW_NORPL) #define F_SMTW_NORPL V_SMTW_NORPL(1U) /* cpl_smt_{read,write}_req.pfvf? fields */ #define S_SMTW_VF 0 #define M_SMTW_VF 0xFF #define V_SMTW_VF(x) ((x) << S_SMTW_VF) #define G_SMTW_VF(x) (((x) >> S_SMTW_VF) & M_SMTW_VF) #define S_SMTW_PF 8 #define M_SMTW_PF 0x7 #define V_SMTW_PF(x) ((x) << S_SMTW_PF) #define G_SMTW_PF(x) (((x) >> S_SMTW_PF) & M_SMTW_PF) #define S_SMTW_VF_VLD 11 #define V_SMTW_VF_VLD(x) ((x) << S_SMTW_VF_VLD) #define F_SMTW_VF_VLD V_SMTW_VF_VLD(1U) struct cpl_tag_write_req { WR_HDR; union opcode_tid ot; __be32 params; __be64 tag_val; }; struct cpl_tag_write_rpl { RSS_HDR union opcode_tid ot; __u8 status; __u8 rsvd[2]; __u8 idx; }; struct cpl_tag_read_req { WR_HDR; union opcode_tid ot; __be32 params; }; struct cpl_tag_read_rpl { RSS_HDR union opcode_tid ot; __u8 status; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 :4; __u8 tag_len:1; __u8 :2; __u8 ins_enable:1; #else __u8 ins_enable:1; __u8 :2; __u8 tag_len:1; __u8 :4; #endif __u8 rsvd; __u8 tag_idx; __be64 tag_val; }; /* cpl_tag{read,write}_req.params fields */ #define S_TAGW_IDX 0 #define M_TAGW_IDX 0x7F #define V_TAGW_IDX(x) ((x) << S_TAGW_IDX) #define G_TAGW_IDX(x) (((x) >> S_TAGW_IDX) & M_TAGW_IDX) #define S_TAGW_LEN 20 #define V_TAGW_LEN(x) ((x) << S_TAGW_LEN) #define F_TAGW_LEN V_TAGW_LEN(1U) #define S_TAGW_INS_ENABLE 23 #define V_TAGW_INS_ENABLE(x) ((x) << S_TAGW_INS_ENABLE) #define F_TAGW_INS_ENABLE V_TAGW_INS_ENABLE(1U) #define S_TAGW_NORPL 31 #define V_TAGW_NORPL(x) ((x) << S_TAGW_NORPL) #define F_TAGW_NORPL V_TAGW_NORPL(1U) struct cpl_barrier { WR_HDR; __u8 opcode; __u8 chan_map; __be16 rsvd0; __be32 rsvd1; }; /* cpl_barrier.chan_map fields */ #define S_CHAN_MAP 4 #define M_CHAN_MAP 0xF #define V_CHAN_MAP(x) ((x) << S_CHAN_MAP) #define G_CHAN_MAP(x) (((x) >> S_CHAN_MAP) & M_CHAN_MAP) struct cpl_error { RSS_HDR union opcode_tid ot; __be32 error; }; struct cpl_hit_notify { RSS_HDR union opcode_tid ot; __be32 rsvd; __be32 info; __be32 reason; }; struct cpl_pkt_notify { RSS_HDR union opcode_tid ot; __be16 rsvd; __be16 len; __be32 info; __be32 reason; }; /* cpl_{hit,pkt}_notify.info fields */ #define S_NTFY_MAC_IDX 0 #define M_NTFY_MAC_IDX 0x1FF #define V_NTFY_MAC_IDX(x) ((x) << S_NTFY_MAC_IDX) #define G_NTFY_MAC_IDX(x) (((x) >> S_NTFY_MAC_IDX) & M_NTFY_MAC_IDX) #define S_NTFY_INTF 10 #define M_NTFY_INTF 0xF #define V_NTFY_INTF(x) ((x) << S_NTFY_INTF) #define G_NTFY_INTF(x) (((x) >> S_NTFY_INTF) & M_NTFY_INTF) #define S_NTFY_TCPHDR_LEN 14 #define M_NTFY_TCPHDR_LEN 0xF #define V_NTFY_TCPHDR_LEN(x) ((x) << S_NTFY_TCPHDR_LEN) #define G_NTFY_TCPHDR_LEN(x) (((x) >> S_NTFY_TCPHDR_LEN) & M_NTFY_TCPHDR_LEN) #define S_NTFY_IPHDR_LEN 18 #define M_NTFY_IPHDR_LEN 0x1FF #define V_NTFY_IPHDR_LEN(x) ((x) << S_NTFY_IPHDR_LEN) #define G_NTFY_IPHDR_LEN(x) (((x) >> S_NTFY_IPHDR_LEN) & M_NTFY_IPHDR_LEN) #define S_NTFY_ETHHDR_LEN 27 #define M_NTFY_ETHHDR_LEN 0x1F #define V_NTFY_ETHHDR_LEN(x) ((x) << S_NTFY_ETHHDR_LEN) #define G_NTFY_ETHHDR_LEN(x) (((x) >> S_NTFY_ETHHDR_LEN) & M_NTFY_ETHHDR_LEN) #define S_NTFY_T5_IPHDR_LEN 18 #define M_NTFY_T5_IPHDR_LEN 0xFF #define V_NTFY_T5_IPHDR_LEN(x) ((x) << S_NTFY_T5_IPHDR_LEN) #define G_NTFY_T5_IPHDR_LEN(x) (((x) >> S_NTFY_T5_IPHDR_LEN) & M_NTFY_T5_IPHDR_LEN) #define S_NTFY_T5_ETHHDR_LEN 26 #define M_NTFY_T5_ETHHDR_LEN 0x3F #define V_NTFY_T5_ETHHDR_LEN(x) ((x) << S_NTFY_T5_ETHHDR_LEN) #define G_NTFY_T5_ETHHDR_LEN(x) (((x) >> S_NTFY_T5_ETHHDR_LEN) & M_NTFY_T5_ETHHDR_LEN) struct cpl_rdma_terminate { RSS_HDR union opcode_tid ot; __be16 rsvd; __be16 len; }; struct cpl_set_le_req { WR_HDR; union opcode_tid ot; __be16 reply_ctrl; __be16 params; __be64 mask_hi; __be64 mask_lo; __be64 val_hi; __be64 val_lo; }; /* cpl_set_le_req.reply_ctrl additional fields */ #define S_LE_REQ_IP6 13 #define V_LE_REQ_IP6(x) ((x) << S_LE_REQ_IP6) #define F_LE_REQ_IP6 V_LE_REQ_IP6(1U) /* cpl_set_le_req.params fields */ #define S_LE_CHAN 0 #define M_LE_CHAN 0x3 #define V_LE_CHAN(x) ((x) << S_LE_CHAN) #define G_LE_CHAN(x) (((x) >> S_LE_CHAN) & M_LE_CHAN) #define S_LE_OFFSET 5 #define M_LE_OFFSET 0x7 #define V_LE_OFFSET(x) ((x) << S_LE_OFFSET) #define G_LE_OFFSET(x) (((x) >> S_LE_OFFSET) & M_LE_OFFSET) #define S_LE_MORE 8 #define V_LE_MORE(x) ((x) << S_LE_MORE) #define F_LE_MORE V_LE_MORE(1U) #define S_LE_REQSIZE 9 #define M_LE_REQSIZE 0x7 #define V_LE_REQSIZE(x) ((x) << S_LE_REQSIZE) #define G_LE_REQSIZE(x) (((x) >> S_LE_REQSIZE) & M_LE_REQSIZE) #define S_LE_REQCMD 12 #define M_LE_REQCMD 0xF #define V_LE_REQCMD(x) ((x) << S_LE_REQCMD) #define G_LE_REQCMD(x) (((x) >> S_LE_REQCMD) & M_LE_REQCMD) struct cpl_set_le_rpl { RSS_HDR union opcode_tid ot; __u8 chan; __u8 info; __be16 len; }; /* cpl_set_le_rpl.info fields */ #define S_LE_RSPCMD 0 #define M_LE_RSPCMD 0xF #define V_LE_RSPCMD(x) ((x) << S_LE_RSPCMD) #define G_LE_RSPCMD(x) (((x) >> S_LE_RSPCMD) & M_LE_RSPCMD) #define S_LE_RSPSIZE 4 #define M_LE_RSPSIZE 0x7 #define V_LE_RSPSIZE(x) ((x) << S_LE_RSPSIZE) #define G_LE_RSPSIZE(x) (((x) >> S_LE_RSPSIZE) & M_LE_RSPSIZE) #define S_LE_RSPTYPE 7 #define V_LE_RSPTYPE(x) ((x) << S_LE_RSPTYPE) #define F_LE_RSPTYPE V_LE_RSPTYPE(1U) struct cpl_sge_egr_update { RSS_HDR __be32 opcode_qid; __be16 cidx; __be16 pidx; }; /* cpl_sge_egr_update.ot fields */ #define S_AUTOEQU 22 #define M_AUTOEQU 0x1 #define V_AUTOEQU(x) ((x) << S_AUTOEQU) #define G_AUTOEQU(x) (((x) >> S_AUTOEQU) & M_AUTOEQU) #define S_EGR_QID 0 #define M_EGR_QID 0x1FFFF #define V_EGR_QID(x) ((x) << S_EGR_QID) #define G_EGR_QID(x) (((x) >> S_EGR_QID) & M_EGR_QID) /* cpl_fw*.type values */ enum { FW_TYPE_CMD_RPL = 0, FW_TYPE_WR_RPL = 1, FW_TYPE_CQE = 2, FW_TYPE_OFLD_CONNECTION_WR_RPL = 3, FW_TYPE_RSSCPL = 4, FW_TYPE_WRERR_RPL = 5, FW_TYPE_PI_ERR = 6, FW_TYPE_TLS_KEY = 7, }; struct cpl_fw2_pld { RSS_HDR u8 opcode; u8 rsvd[5]; __be16 len; }; struct cpl_fw4_pld { RSS_HDR u8 opcode; u8 rsvd0[3]; u8 type; u8 rsvd1; __be16 len; __be64 data; __be64 rsvd2; }; struct cpl_fw6_pld { RSS_HDR u8 opcode; u8 rsvd[5]; __be16 len; __be64 data[4]; }; struct cpl_fw2_msg { RSS_HDR union opcode_info oi; }; struct cpl_fw4_msg { RSS_HDR u8 opcode; u8 type; __be16 rsvd0; __be32 rsvd1; __be64 data[2]; }; struct cpl_fw4_ack { RSS_HDR union opcode_tid ot; u8 credits; u8 rsvd0[2]; u8 flags; __be32 snd_nxt; __be32 snd_una; __be64 rsvd1; }; enum { CPL_FW4_ACK_FLAGS_SEQVAL = 0x1, /* seqn valid */ CPL_FW4_ACK_FLAGS_CH = 0x2, /* channel change complete */ CPL_FW4_ACK_FLAGS_FLOWC = 0x4, /* fw_flowc_wr complete */ }; struct cpl_fw6_msg { RSS_HDR u8 opcode; u8 type; __be16 rsvd0; __be32 rsvd1; __be64 data[4]; }; /* cpl_fw6_msg.type values */ enum { FW6_TYPE_CMD_RPL = FW_TYPE_CMD_RPL, FW6_TYPE_WR_RPL = FW_TYPE_WR_RPL, FW6_TYPE_CQE = FW_TYPE_CQE, FW6_TYPE_OFLD_CONNECTION_WR_RPL = FW_TYPE_OFLD_CONNECTION_WR_RPL, FW6_TYPE_RSSCPL = FW_TYPE_RSSCPL, FW6_TYPE_WRERR_RPL = FW_TYPE_WRERR_RPL, FW6_TYPE_PI_ERR = FW_TYPE_PI_ERR, NUM_FW6_TYPES }; struct cpl_fw6_msg_ofld_connection_wr_rpl { __u64 cookie; __be32 tid; /* or atid in case of active failure */ __u8 t_state; __u8 retval; __u8 rsvd[2]; }; /* ULP_TX opcodes */ enum { ULP_TX_MEM_READ = 2, ULP_TX_MEM_WRITE = 3, ULP_TX_PKT = 4 }; enum { ULP_TX_SC_NOOP = 0x80, ULP_TX_SC_IMM = 0x81, ULP_TX_SC_DSGL = 0x82, ULP_TX_SC_ISGL = 0x83, ULP_TX_SC_PICTRL = 0x84, ULP_TX_SC_MEMRD = 0x86 }; #define S_ULPTX_CMD 24 #define M_ULPTX_CMD 0xFF #define V_ULPTX_CMD(x) ((x) << S_ULPTX_CMD) #define S_ULPTX_LEN16 0 #define M_ULPTX_LEN16 0xFF #define V_ULPTX_LEN16(x) ((x) << S_ULPTX_LEN16) #define S_ULP_TX_SC_MORE 23 #define V_ULP_TX_SC_MORE(x) ((x) << S_ULP_TX_SC_MORE) #define F_ULP_TX_SC_MORE V_ULP_TX_SC_MORE(1U) struct ulptx_sge_pair { __be32 len[2]; __be64 addr[2]; }; struct ulptx_sgl { __be32 cmd_nsge; __be32 len0; __be64 addr0; #if !(defined C99_NOT_SUPPORTED) struct ulptx_sge_pair sge[0]; #endif }; struct ulptx_isge { __be32 stag; __be32 len; __be64 target_ofst; }; struct ulptx_isgl { __be32 cmd_nisge; __be32 rsvd; #if !(defined C99_NOT_SUPPORTED) struct ulptx_isge sge[0]; #endif }; struct ulptx_idata { __be32 cmd_more; __be32 len; }; #define S_ULPTX_NSGE 0 #define M_ULPTX_NSGE 0xFFFF #define V_ULPTX_NSGE(x) ((x) << S_ULPTX_NSGE) #define G_ULPTX_NSGE(x) (((x) >> S_ULPTX_NSGE) & M_ULPTX_NSGE) struct ulptx_sc_memrd { __be32 cmd_to_len; __be32 addr; }; struct ulp_mem_io { WR_HDR; __be32 cmd; __be32 len16; /* command length */ __be32 dlen; /* data length in 32-byte units */ __be32 lock_addr; }; /* additional ulp_mem_io.cmd fields */ #define S_ULP_MEMIO_ORDER 23 #define V_ULP_MEMIO_ORDER(x) ((x) << S_ULP_MEMIO_ORDER) #define F_ULP_MEMIO_ORDER V_ULP_MEMIO_ORDER(1U) #define S_T5_ULP_MEMIO_IMM 23 #define V_T5_ULP_MEMIO_IMM(x) ((x) << S_T5_ULP_MEMIO_IMM) #define F_T5_ULP_MEMIO_IMM V_T5_ULP_MEMIO_IMM(1U) #define S_T5_ULP_MEMIO_ORDER 22 #define V_T5_ULP_MEMIO_ORDER(x) ((x) << S_T5_ULP_MEMIO_ORDER) #define F_T5_ULP_MEMIO_ORDER V_T5_ULP_MEMIO_ORDER(1U) #define S_T5_ULP_MEMIO_FID 4 #define M_T5_ULP_MEMIO_FID 0x7ff #define V_T5_ULP_MEMIO_FID(x) ((x) << S_T5_ULP_MEMIO_FID) /* ulp_mem_io.lock_addr fields */ #define S_ULP_MEMIO_ADDR 0 #define M_ULP_MEMIO_ADDR 0x7FFFFFF #define V_ULP_MEMIO_ADDR(x) ((x) << S_ULP_MEMIO_ADDR) #define S_ULP_MEMIO_LOCK 31 #define V_ULP_MEMIO_LOCK(x) ((x) << S_ULP_MEMIO_LOCK) #define F_ULP_MEMIO_LOCK V_ULP_MEMIO_LOCK(1U) /* ulp_mem_io.dlen fields */ #define S_ULP_MEMIO_DATA_LEN 0 #define M_ULP_MEMIO_DATA_LEN 0x1F #define V_ULP_MEMIO_DATA_LEN(x) ((x) << S_ULP_MEMIO_DATA_LEN) /* ULP_TXPKT field values */ enum { ULP_TXPKT_DEST_TP = 0, ULP_TXPKT_DEST_SGE, ULP_TXPKT_DEST_UP, ULP_TXPKT_DEST_DEVNULL, }; struct ulp_txpkt { __be32 cmd_dest; __be32 len; }; /* ulp_txpkt.cmd_dest fields */ #define S_ULP_TXPKT_DATAMODIFY 23 #define M_ULP_TXPKT_DATAMODIFY 0x1 #define V_ULP_TXPKT_DATAMODIFY(x) ((x) << S_ULP_TXPKT_DATAMODIFY) #define G_ULP_TXPKT_DATAMODIFY(x) \ (((x) >> S_ULP_TXPKT_DATAMODIFY) & M_ULP_TXPKT_DATAMODIFY_) #define F_ULP_TXPKT_DATAMODIFY V_ULP_TXPKT_DATAMODIFY(1U) #define S_ULP_TXPKT_CHANNELID 22 #define M_ULP_TXPKT_CHANNELID 0x1 #define V_ULP_TXPKT_CHANNELID(x) ((x) << S_ULP_TXPKT_CHANNELID) #define G_ULP_TXPKT_CHANNELID(x) \ (((x) >> S_ULP_TXPKT_CHANNELID) & M_ULP_TXPKT_CHANNELID) #define F_ULP_TXPKT_CHANNELID V_ULP_TXPKT_CHANNELID(1U) /* ulp_txpkt.cmd_dest fields */ #define S_ULP_TXPKT_DEST 16 #define M_ULP_TXPKT_DEST 0x3 #define V_ULP_TXPKT_DEST(x) ((x) << S_ULP_TXPKT_DEST) #define S_ULP_TXPKT_FID 4 #define M_ULP_TXPKT_FID 0x7ff #define V_ULP_TXPKT_FID(x) ((x) << S_ULP_TXPKT_FID) #define S_ULP_TXPKT_RO 3 #define V_ULP_TXPKT_RO(x) ((x) << S_ULP_TXPKT_RO) #define F_ULP_TXPKT_RO V_ULP_TXPKT_RO(1U) enum cpl_tx_tnl_lso_type { TX_TNL_TYPE_OPAQUE, TX_TNL_TYPE_NVGRE, TX_TNL_TYPE_VXLAN, TX_TNL_TYPE_GENEVE, }; struct cpl_tx_tnl_lso { __be32 op_to_IpIdSplitOut; __be16 IpIdOffsetOut; __be16 UdpLenSetOut_to_TnlHdrLen; __be64 r1; __be32 Flow_to_TcpHdrLen; __be16 IpIdOffset; __be16 IpIdSplit_to_Mss; __be32 TCPSeqOffset; __be32 EthLenOffset_Size; /* encapsulated CPL (TX_PKT_XT) follows here */ }; #define S_CPL_TX_TNL_LSO_OPCODE 24 #define M_CPL_TX_TNL_LSO_OPCODE 0xff #define V_CPL_TX_TNL_LSO_OPCODE(x) ((x) << S_CPL_TX_TNL_LSO_OPCODE) #define G_CPL_TX_TNL_LSO_OPCODE(x) \ (((x) >> S_CPL_TX_TNL_LSO_OPCODE) & M_CPL_TX_TNL_LSO_OPCODE) #define S_CPL_TX_TNL_LSO_FIRST 23 #define M_CPL_TX_TNL_LSO_FIRST 0x1 #define V_CPL_TX_TNL_LSO_FIRST(x) ((x) << S_CPL_TX_TNL_LSO_FIRST) #define G_CPL_TX_TNL_LSO_FIRST(x) \ (((x) >> S_CPL_TX_TNL_LSO_FIRST) & M_CPL_TX_TNL_LSO_FIRST) #define F_CPL_TX_TNL_LSO_FIRST V_CPL_TX_TNL_LSO_FIRST(1U) #define S_CPL_TX_TNL_LSO_LAST 22 #define M_CPL_TX_TNL_LSO_LAST 0x1 #define V_CPL_TX_TNL_LSO_LAST(x) ((x) << S_CPL_TX_TNL_LSO_LAST) #define G_CPL_TX_TNL_LSO_LAST(x) \ (((x) >> S_CPL_TX_TNL_LSO_LAST) & M_CPL_TX_TNL_LSO_LAST) #define F_CPL_TX_TNL_LSO_LAST V_CPL_TX_TNL_LSO_LAST(1U) #define S_CPL_TX_TNL_LSO_ETHHDRLENXOUT 21 #define M_CPL_TX_TNL_LSO_ETHHDRLENXOUT 0x1 #define V_CPL_TX_TNL_LSO_ETHHDRLENXOUT(x) \ ((x) << S_CPL_TX_TNL_LSO_ETHHDRLENXOUT) #define G_CPL_TX_TNL_LSO_ETHHDRLENXOUT(x) \ (((x) >> S_CPL_TX_TNL_LSO_ETHHDRLENXOUT) & M_CPL_TX_TNL_LSO_ETHHDRLENXOUT) #define F_CPL_TX_TNL_LSO_ETHHDRLENXOUT V_CPL_TX_TNL_LSO_ETHHDRLENXOUT(1U) #define S_CPL_TX_TNL_LSO_IPV6OUT 20 #define M_CPL_TX_TNL_LSO_IPV6OUT 0x1 #define V_CPL_TX_TNL_LSO_IPV6OUT(x) ((x) << S_CPL_TX_TNL_LSO_IPV6OUT) #define G_CPL_TX_TNL_LSO_IPV6OUT(x) \ (((x) >> S_CPL_TX_TNL_LSO_IPV6OUT) & M_CPL_TX_TNL_LSO_IPV6OUT) #define F_CPL_TX_TNL_LSO_IPV6OUT V_CPL_TX_TNL_LSO_IPV6OUT(1U) #define S_CPL_TX_TNL_LSO_ETHHDRLENOUT 16 #define M_CPL_TX_TNL_LSO_ETHHDRLENOUT 0xf #define V_CPL_TX_TNL_LSO_ETHHDRLENOUT(x) \ ((x) << S_CPL_TX_TNL_LSO_ETHHDRLENOUT) #define G_CPL_TX_TNL_LSO_ETHHDRLENOUT(x) \ (((x) >> S_CPL_TX_TNL_LSO_ETHHDRLENOUT) & M_CPL_TX_TNL_LSO_ETHHDRLENOUT) #define S_CPL_TX_TNL_LSO_IPHDRLENOUT 4 #define M_CPL_TX_TNL_LSO_IPHDRLENOUT 0xfff #define V_CPL_TX_TNL_LSO_IPHDRLENOUT(x) ((x) << S_CPL_TX_TNL_LSO_IPHDRLENOUT) #define G_CPL_TX_TNL_LSO_IPHDRLENOUT(x) \ (((x) >> S_CPL_TX_TNL_LSO_IPHDRLENOUT) & M_CPL_TX_TNL_LSO_IPHDRLENOUT) #define S_CPL_TX_TNL_LSO_IPHDRCHKOUT 3 #define M_CPL_TX_TNL_LSO_IPHDRCHKOUT 0x1 #define V_CPL_TX_TNL_LSO_IPHDRCHKOUT(x) ((x) << S_CPL_TX_TNL_LSO_IPHDRCHKOUT) #define G_CPL_TX_TNL_LSO_IPHDRCHKOUT(x) \ (((x) >> S_CPL_TX_TNL_LSO_IPHDRCHKOUT) & M_CPL_TX_TNL_LSO_IPHDRCHKOUT) #define F_CPL_TX_TNL_LSO_IPHDRCHKOUT V_CPL_TX_TNL_LSO_IPHDRCHKOUT(1U) #define S_CPL_TX_TNL_LSO_IPLENSETOUT 2 #define M_CPL_TX_TNL_LSO_IPLENSETOUT 0x1 #define V_CPL_TX_TNL_LSO_IPLENSETOUT(x) ((x) << S_CPL_TX_TNL_LSO_IPLENSETOUT) #define G_CPL_TX_TNL_LSO_IPLENSETOUT(x) \ (((x) >> S_CPL_TX_TNL_LSO_IPLENSETOUT) & M_CPL_TX_TNL_LSO_IPLENSETOUT) #define F_CPL_TX_TNL_LSO_IPLENSETOUT V_CPL_TX_TNL_LSO_IPLENSETOUT(1U) #define S_CPL_TX_TNL_LSO_IPIDINCOUT 1 #define M_CPL_TX_TNL_LSO_IPIDINCOUT 0x1 #define V_CPL_TX_TNL_LSO_IPIDINCOUT(x) ((x) << S_CPL_TX_TNL_LSO_IPIDINCOUT) #define G_CPL_TX_TNL_LSO_IPIDINCOUT(x) \ (((x) >> S_CPL_TX_TNL_LSO_IPIDINCOUT) & M_CPL_TX_TNL_LSO_IPIDINCOUT) #define F_CPL_TX_TNL_LSO_IPIDINCOUT V_CPL_TX_TNL_LSO_IPIDINCOUT(1U) #define S_CPL_TX_TNL_LSO_IPIDSPLITOUT 0 #define M_CPL_TX_TNL_LSO_IPIDSPLITOUT 0x1 #define V_CPL_TX_TNL_LSO_IPIDSPLITOUT(x) \ ((x) << S_CPL_TX_TNL_LSO_IPIDSPLITOUT) #define G_CPL_TX_TNL_LSO_IPIDSPLITOUT(x) \ (((x) >> S_CPL_TX_TNL_LSO_IPIDSPLITOUT) & M_CPL_TX_TNL_LSO_IPIDSPLITOUT) #define F_CPL_TX_TNL_LSO_IPIDSPLITOUT V_CPL_TX_TNL_LSO_IPIDSPLITOUT(1U) #define S_CPL_TX_TNL_LSO_UDPLENSETOUT 15 #define M_CPL_TX_TNL_LSO_UDPLENSETOUT 0x1 #define V_CPL_TX_TNL_LSO_UDPLENSETOUT(x) \ ((x) << S_CPL_TX_TNL_LSO_UDPLENSETOUT) #define G_CPL_TX_TNL_LSO_UDPLENSETOUT(x) \ (((x) >> S_CPL_TX_TNL_LSO_UDPLENSETOUT) & M_CPL_TX_TNL_LSO_UDPLENSETOUT) #define F_CPL_TX_TNL_LSO_UDPLENSETOUT V_CPL_TX_TNL_LSO_UDPLENSETOUT(1U) #define S_CPL_TX_TNL_LSO_UDPCHKCLROUT 14 #define M_CPL_TX_TNL_LSO_UDPCHKCLROUT 0x1 #define V_CPL_TX_TNL_LSO_UDPCHKCLROUT(x) \ ((x) << S_CPL_TX_TNL_LSO_UDPCHKCLROUT) #define G_CPL_TX_TNL_LSO_UDPCHKCLROUT(x) \ (((x) >> S_CPL_TX_TNL_LSO_UDPCHKCLROUT) & M_CPL_TX_TNL_LSO_UDPCHKCLROUT) #define F_CPL_TX_TNL_LSO_UDPCHKCLROUT V_CPL_TX_TNL_LSO_UDPCHKCLROUT(1U) #define S_CPL_TX_TNL_LSO_TNLTYPE 12 #define M_CPL_TX_TNL_LSO_TNLTYPE 0x3 #define V_CPL_TX_TNL_LSO_TNLTYPE(x) ((x) << S_CPL_TX_TNL_LSO_TNLTYPE) #define G_CPL_TX_TNL_LSO_TNLTYPE(x) \ (((x) >> S_CPL_TX_TNL_LSO_TNLTYPE) & M_CPL_TX_TNL_LSO_TNLTYPE) #define S_CPL_TX_TNL_LSO_TNLHDRLEN 0 #define M_CPL_TX_TNL_LSO_TNLHDRLEN 0xfff #define V_CPL_TX_TNL_LSO_TNLHDRLEN(x) ((x) << S_CPL_TX_TNL_LSO_TNLHDRLEN) #define G_CPL_TX_TNL_LSO_TNLHDRLEN(x) \ (((x) >> S_CPL_TX_TNL_LSO_TNLHDRLEN) & M_CPL_TX_TNL_LSO_TNLHDRLEN) #define S_CPL_TX_TNL_LSO_FLOW 21 #define M_CPL_TX_TNL_LSO_FLOW 0x1 #define V_CPL_TX_TNL_LSO_FLOW(x) ((x) << S_CPL_TX_TNL_LSO_FLOW) #define G_CPL_TX_TNL_LSO_FLOW(x) \ (((x) >> S_CPL_TX_TNL_LSO_FLOW) & M_CPL_TX_TNL_LSO_FLOW) #define F_CPL_TX_TNL_LSO_FLOW V_CPL_TX_TNL_LSO_FLOW(1U) #define S_CPL_TX_TNL_LSO_IPV6 20 #define M_CPL_TX_TNL_LSO_IPV6 0x1 #define V_CPL_TX_TNL_LSO_IPV6(x) ((x) << S_CPL_TX_TNL_LSO_IPV6) #define G_CPL_TX_TNL_LSO_IPV6(x) \ (((x) >> S_CPL_TX_TNL_LSO_IPV6) & M_CPL_TX_TNL_LSO_IPV6) #define F_CPL_TX_TNL_LSO_IPV6 V_CPL_TX_TNL_LSO_IPV6(1U) #define S_CPL_TX_TNL_LSO_ETHHDRLEN 16 #define M_CPL_TX_TNL_LSO_ETHHDRLEN 0xf #define V_CPL_TX_TNL_LSO_ETHHDRLEN(x) ((x) << S_CPL_TX_TNL_LSO_ETHHDRLEN) #define G_CPL_TX_TNL_LSO_ETHHDRLEN(x) \ (((x) >> S_CPL_TX_TNL_LSO_ETHHDRLEN) & M_CPL_TX_TNL_LSO_ETHHDRLEN) #define S_CPL_TX_TNL_LSO_IPHDRLEN 4 #define M_CPL_TX_TNL_LSO_IPHDRLEN 0xfff #define V_CPL_TX_TNL_LSO_IPHDRLEN(x) ((x) << S_CPL_TX_TNL_LSO_IPHDRLEN) #define G_CPL_TX_TNL_LSO_IPHDRLEN(x) \ (((x) >> S_CPL_TX_TNL_LSO_IPHDRLEN) & M_CPL_TX_TNL_LSO_IPHDRLEN) #define S_CPL_TX_TNL_LSO_TCPHDRLEN 0 #define M_CPL_TX_TNL_LSO_TCPHDRLEN 0xf #define V_CPL_TX_TNL_LSO_TCPHDRLEN(x) ((x) << S_CPL_TX_TNL_LSO_TCPHDRLEN) #define G_CPL_TX_TNL_LSO_TCPHDRLEN(x) \ (((x) >> S_CPL_TX_TNL_LSO_TCPHDRLEN) & M_CPL_TX_TNL_LSO_TCPHDRLEN) #define S_CPL_TX_TNL_LSO_IPIDSPLIT 15 #define M_CPL_TX_TNL_LSO_IPIDSPLIT 0x1 #define V_CPL_TX_TNL_LSO_IPIDSPLIT(x) ((x) << S_CPL_TX_TNL_LSO_IPIDSPLIT) #define G_CPL_TX_TNL_LSO_IPIDSPLIT(x) \ (((x) >> S_CPL_TX_TNL_LSO_IPIDSPLIT) & M_CPL_TX_TNL_LSO_IPIDSPLIT) #define F_CPL_TX_TNL_LSO_IPIDSPLIT V_CPL_TX_TNL_LSO_IPIDSPLIT(1U) #define S_CPL_TX_TNL_LSO_ETHHDRLENX 14 #define M_CPL_TX_TNL_LSO_ETHHDRLENX 0x1 #define V_CPL_TX_TNL_LSO_ETHHDRLENX(x) ((x) << S_CPL_TX_TNL_LSO_ETHHDRLENX) #define G_CPL_TX_TNL_LSO_ETHHDRLENX(x) \ (((x) >> S_CPL_TX_TNL_LSO_ETHHDRLENX) & M_CPL_TX_TNL_LSO_ETHHDRLENX) #define F_CPL_TX_TNL_LSO_ETHHDRLENX V_CPL_TX_TNL_LSO_ETHHDRLENX(1U) #define S_CPL_TX_TNL_LSO_MSS 0 #define M_CPL_TX_TNL_LSO_MSS 0x3fff #define V_CPL_TX_TNL_LSO_MSS(x) ((x) << S_CPL_TX_TNL_LSO_MSS) #define G_CPL_TX_TNL_LSO_MSS(x) \ (((x) >> S_CPL_TX_TNL_LSO_MSS) & M_CPL_TX_TNL_LSO_MSS) #define S_CPL_TX_TNL_LSO_ETHLENOFFSET 28 #define M_CPL_TX_TNL_LSO_ETHLENOFFSET 0xf #define V_CPL_TX_TNL_LSO_ETHLENOFFSET(x) \ ((x) << S_CPL_TX_TNL_LSO_ETHLENOFFSET) #define G_CPL_TX_TNL_LSO_ETHLENOFFSET(x) \ (((x) >> S_CPL_TX_TNL_LSO_ETHLENOFFSET) & M_CPL_TX_TNL_LSO_ETHLENOFFSET) #define S_CPL_TX_TNL_LSO_SIZE 0 #define M_CPL_TX_TNL_LSO_SIZE 0xfffffff #define V_CPL_TX_TNL_LSO_SIZE(x) ((x) << S_CPL_TX_TNL_LSO_SIZE) #define G_CPL_TX_TNL_LSO_SIZE(x) \ (((x) >> S_CPL_TX_TNL_LSO_SIZE) & M_CPL_TX_TNL_LSO_SIZE) struct cpl_rx_mps_pkt { __be32 op_to_r1_hi; __be32 r1_lo_length; }; #define S_CPL_RX_MPS_PKT_OP 24 #define M_CPL_RX_MPS_PKT_OP 0xff #define V_CPL_RX_MPS_PKT_OP(x) ((x) << S_CPL_RX_MPS_PKT_OP) #define G_CPL_RX_MPS_PKT_OP(x) \ (((x) >> S_CPL_RX_MPS_PKT_OP) & M_CPL_RX_MPS_PKT_OP) #define S_CPL_RX_MPS_PKT_TYPE 20 #define M_CPL_RX_MPS_PKT_TYPE 0xf #define V_CPL_RX_MPS_PKT_TYPE(x) ((x) << S_CPL_RX_MPS_PKT_TYPE) #define G_CPL_RX_MPS_PKT_TYPE(x) \ (((x) >> S_CPL_RX_MPS_PKT_TYPE) & M_CPL_RX_MPS_PKT_TYPE) /* * Values for CPL_RX_MPS_PKT_TYPE, a bit-wise orthogonal field. */ #define X_CPL_RX_MPS_PKT_TYPE_PAUSE (1 << 0) #define X_CPL_RX_MPS_PKT_TYPE_PPP (1 << 1) #define X_CPL_RX_MPS_PKT_TYPE_QFC (1 << 2) #define X_CPL_RX_MPS_PKT_TYPE_PTP (1 << 3) struct cpl_tx_tls_sfo { __be32 op_to_seg_len; __be32 pld_len; __be32 type_protover; __be32 r1_lo; __be32 seqno_numivs; __be32 ivgen_hdrlen; __be64 scmd1; }; /* cpl_tx_tls_sfo macros */ #define S_CPL_TX_TLS_SFO_OPCODE 24 #define M_CPL_TX_TLS_SFO_OPCODE 0xff #define V_CPL_TX_TLS_SFO_OPCODE(x) ((x) << S_CPL_TX_TLS_SFO_OPCODE) #define G_CPL_TX_TLS_SFO_OPCODE(x) \ (((x) >> S_CPL_TX_TLS_SFO_OPCODE) & M_CPL_TX_TLS_SFO_OPCODE) #define S_CPL_TX_TLS_SFO_DATA_TYPE 20 #define M_CPL_TX_TLS_SFO_DATA_TYPE 0xf #define V_CPL_TX_TLS_SFO_DATA_TYPE(x) ((x) << S_CPL_TX_TLS_SFO_DATA_TYPE) #define G_CPL_TX_TLS_SFO_DATA_TYPE(x) \ (((x) >> S_CPL_TX_TLS_SFO_DATA_TYPE) & M_CPL_TX_TLS_SFO_DATA_TYPE) #define S_CPL_TX_TLS_SFO_CPL_LEN 16 #define M_CPL_TX_TLS_SFO_CPL_LEN 0xf #define V_CPL_TX_TLS_SFO_CPL_LEN(x) ((x) << S_CPL_TX_TLS_SFO_CPL_LEN) #define G_CPL_TX_TLS_SFO_CPL_LEN(x) \ (((x) >> S_CPL_TX_TLS_SFO_CPL_LEN) & M_CPL_TX_TLS_SFO_CPL_LEN) #define S_CPL_TX_TLS_SFO_SEG_LEN 0 #define M_CPL_TX_TLS_SFO_SEG_LEN 0xffff #define V_CPL_TX_TLS_SFO_SEG_LEN(x) ((x) << S_CPL_TX_TLS_SFO_SEG_LEN) #define G_CPL_TX_TLS_SFO_SEG_LEN(x) \ (((x) >> S_CPL_TX_TLS_SFO_SEG_LEN) & M_CPL_TX_TLS_SFO_SEG_LEN) #define S_CPL_TX_TLS_SFO_TYPE 24 #define M_CPL_TX_TLS_SFO_TYPE 0xff #define V_CPL_TX_TLS_SFO_TYPE(x) ((x) << S_CPL_TX_TLS_SFO_TYPE) #define G_CPL_TX_TLS_SFO_TYPE(x) \ (((x) >> S_CPL_TX_TLS_SFO_TYPE) & M_CPL_TX_TLS_SFO_TYPE) #define S_CPL_TX_TLS_SFO_PROTOVER 8 #define M_CPL_TX_TLS_SFO_PROTOVER 0xffff #define V_CPL_TX_TLS_SFO_PROTOVER(x) ((x) << S_CPL_TX_TLS_SFO_PROTOVER) #define G_CPL_TX_TLS_SFO_PROTOVER(x) \ (((x) >> S_CPL_TX_TLS_SFO_PROTOVER) & M_CPL_TX_TLS_SFO_PROTOVER) struct cpl_tls_data { RSS_HDR union opcode_tid ot; __be32 length_pkd; __be32 seq; __be32 r1; }; #define S_CPL_TLS_DATA_OPCODE 24 #define M_CPL_TLS_DATA_OPCODE 0xff #define V_CPL_TLS_DATA_OPCODE(x) ((x) << S_CPL_TLS_DATA_OPCODE) #define G_CPL_TLS_DATA_OPCODE(x) \ (((x) >> S_CPL_TLS_DATA_OPCODE) & M_CPL_TLS_DATA_OPCODE) #define S_CPL_TLS_DATA_TID 0 #define M_CPL_TLS_DATA_TID 0xffffff #define V_CPL_TLS_DATA_TID(x) ((x) << S_CPL_TLS_DATA_TID) #define G_CPL_TLS_DATA_TID(x) \ (((x) >> S_CPL_TLS_DATA_TID) & M_CPL_TLS_DATA_TID) #define S_CPL_TLS_DATA_LENGTH 0 #define M_CPL_TLS_DATA_LENGTH 0xffff #define V_CPL_TLS_DATA_LENGTH(x) ((x) << S_CPL_TLS_DATA_LENGTH) #define G_CPL_TLS_DATA_LENGTH(x) \ (((x) >> S_CPL_TLS_DATA_LENGTH) & M_CPL_TLS_DATA_LENGTH) struct cpl_rx_tls_cmp { RSS_HDR union opcode_tid ot; __be32 pdulength_length; __be32 seq; __be32 ddp_report; __be32 r; __be32 ddp_valid; }; #define S_CPL_RX_TLS_CMP_OPCODE 24 #define M_CPL_RX_TLS_CMP_OPCODE 0xff #define V_CPL_RX_TLS_CMP_OPCODE(x) ((x) << S_CPL_RX_TLS_CMP_OPCODE) #define G_CPL_RX_TLS_CMP_OPCODE(x) \ (((x) >> S_CPL_RX_TLS_CMP_OPCODE) & M_CPL_RX_TLS_CMP_OPCODE) #define S_CPL_RX_TLS_CMP_TID 0 #define M_CPL_RX_TLS_CMP_TID 0xffffff #define V_CPL_RX_TLS_CMP_TID(x) ((x) << S_CPL_RX_TLS_CMP_TID) #define G_CPL_RX_TLS_CMP_TID(x) \ (((x) >> S_CPL_RX_TLS_CMP_TID) & M_CPL_RX_TLS_CMP_TID) #define S_CPL_RX_TLS_CMP_PDULENGTH 16 #define M_CPL_RX_TLS_CMP_PDULENGTH 0xffff #define V_CPL_RX_TLS_CMP_PDULENGTH(x) ((x) << S_CPL_RX_TLS_CMP_PDULENGTH) #define G_CPL_RX_TLS_CMP_PDULENGTH(x) \ (((x) >> S_CPL_RX_TLS_CMP_PDULENGTH) & M_CPL_RX_TLS_CMP_PDULENGTH) #define S_CPL_RX_TLS_CMP_LENGTH 0 #define M_CPL_RX_TLS_CMP_LENGTH 0xffff #define V_CPL_RX_TLS_CMP_LENGTH(x) ((x) << S_CPL_RX_TLS_CMP_LENGTH) #define G_CPL_RX_TLS_CMP_LENGTH(x) \ (((x) >> S_CPL_RX_TLS_CMP_LENGTH) & M_CPL_RX_TLS_CMP_LENGTH) #define S_SCMD_SEQ_NO_CTRL 29 #define M_SCMD_SEQ_NO_CTRL 0x3 #define V_SCMD_SEQ_NO_CTRL(x) ((x) << S_SCMD_SEQ_NO_CTRL) #define G_SCMD_SEQ_NO_CTRL(x) \ (((x) >> S_SCMD_SEQ_NO_CTRL) & M_SCMD_SEQ_NO_CTRL) /* StsFieldPrsnt- Status field at the end of the TLS PDU */ #define S_SCMD_STATUS_PRESENT 28 #define M_SCMD_STATUS_PRESENT 0x1 #define V_SCMD_STATUS_PRESENT(x) ((x) << S_SCMD_STATUS_PRESENT) #define G_SCMD_STATUS_PRESENT(x) \ (((x) >> S_SCMD_STATUS_PRESENT) & M_SCMD_STATUS_PRESENT) #define F_SCMD_STATUS_PRESENT V_SCMD_STATUS_PRESENT(1U) /* ProtoVersion - Protocol Version 0: 1.2, 1:1.1, 2:DTLS, 3:Generic, * 3-15: Reserved. */ #define S_SCMD_PROTO_VERSION 24 #define M_SCMD_PROTO_VERSION 0xf #define V_SCMD_PROTO_VERSION(x) ((x) << S_SCMD_PROTO_VERSION) #define G_SCMD_PROTO_VERSION(x) \ (((x) >> S_SCMD_PROTO_VERSION) & M_SCMD_PROTO_VERSION) /* EncDecCtrl - Encryption/Decryption Control. 0: Encrypt, 1: Decrypt */ #define S_SCMD_ENC_DEC_CTRL 23 #define M_SCMD_ENC_DEC_CTRL 0x1 #define V_SCMD_ENC_DEC_CTRL(x) ((x) << S_SCMD_ENC_DEC_CTRL) #define G_SCMD_ENC_DEC_CTRL(x) \ (((x) >> S_SCMD_ENC_DEC_CTRL) & M_SCMD_ENC_DEC_CTRL) #define F_SCMD_ENC_DEC_CTRL V_SCMD_ENC_DEC_CTRL(1U) /* CipherAuthSeqCtrl - Cipher Authentication Sequence Control. */ #define S_SCMD_CIPH_AUTH_SEQ_CTRL 22 #define M_SCMD_CIPH_AUTH_SEQ_CTRL 0x1 #define V_SCMD_CIPH_AUTH_SEQ_CTRL(x) \ ((x) << S_SCMD_CIPH_AUTH_SEQ_CTRL) #define G_SCMD_CIPH_AUTH_SEQ_CTRL(x) \ (((x) >> S_SCMD_CIPH_AUTH_SEQ_CTRL) & M_SCMD_CIPH_AUTH_SEQ_CTRL) #define F_SCMD_CIPH_AUTH_SEQ_CTRL V_SCMD_CIPH_AUTH_SEQ_CTRL(1U) /* CiphMode - Cipher Mode. 0: NOP, 1:AES-CBC, 2:AES-GCM, 3:AES-CTR, * 4:Generic-AES, 5-15: Reserved. */ #define S_SCMD_CIPH_MODE 18 #define M_SCMD_CIPH_MODE 0xf #define V_SCMD_CIPH_MODE(x) ((x) << S_SCMD_CIPH_MODE) #define G_SCMD_CIPH_MODE(x) \ (((x) >> S_SCMD_CIPH_MODE) & M_SCMD_CIPH_MODE) /* AuthMode - Auth Mode. 0: NOP, 1:SHA1, 2:SHA2-224, 3:SHA2-256 * 4-15: Reserved */ #define S_SCMD_AUTH_MODE 14 #define M_SCMD_AUTH_MODE 0xf #define V_SCMD_AUTH_MODE(x) ((x) << S_SCMD_AUTH_MODE) #define G_SCMD_AUTH_MODE(x) \ (((x) >> S_SCMD_AUTH_MODE) & M_SCMD_AUTH_MODE) /* HmacCtrl - HMAC Control. 0:NOP, 1:No truncation, 2:Support HMAC Truncation * per RFC 4366, 3:IPSec 96 bits, 4-7:Reserved */ #define S_SCMD_HMAC_CTRL 11 #define M_SCMD_HMAC_CTRL 0x7 #define V_SCMD_HMAC_CTRL(x) ((x) << S_SCMD_HMAC_CTRL) #define G_SCMD_HMAC_CTRL(x) \ (((x) >> S_SCMD_HMAC_CTRL) & M_SCMD_HMAC_CTRL) /* IvSize - IV size in units of 2 bytes */ #define S_SCMD_IV_SIZE 7 #define M_SCMD_IV_SIZE 0xf #define V_SCMD_IV_SIZE(x) ((x) << S_SCMD_IV_SIZE) #define G_SCMD_IV_SIZE(x) \ (((x) >> S_SCMD_IV_SIZE) & M_SCMD_IV_SIZE) /* NumIVs - Number of IVs */ #define S_SCMD_NUM_IVS 0 #define M_SCMD_NUM_IVS 0x7f #define V_SCMD_NUM_IVS(x) ((x) << S_SCMD_NUM_IVS) #define G_SCMD_NUM_IVS(x) \ (((x) >> S_SCMD_NUM_IVS) & M_SCMD_NUM_IVS) /* EnbDbgId - If this is enabled upper 20 (63:44) bits if SeqNumber * (below) are used as Cid (connection id for debug status), these * bits are padded to zero for forming the 64 bit * sequence number for TLS */ #define S_SCMD_ENB_DBGID 31 #define M_SCMD_ENB_DBGID 0x1 #define V_SCMD_ENB_DBGID(x) ((x) << S_SCMD_ENB_DBGID) #define G_SCMD_ENB_DBGID(x) \ (((x) >> S_SCMD_ENB_DBGID) & M_SCMD_ENB_DBGID) /* IV generation in SW. */ #define S_SCMD_IV_GEN_CTRL 30 #define M_SCMD_IV_GEN_CTRL 0x1 #define V_SCMD_IV_GEN_CTRL(x) ((x) << S_SCMD_IV_GEN_CTRL) #define G_SCMD_IV_GEN_CTRL(x) \ (((x) >> S_SCMD_IV_GEN_CTRL) & M_SCMD_IV_GEN_CTRL) #define F_SCMD_IV_GEN_CTRL V_SCMD_IV_GEN_CTRL(1U) /* More frags */ #define S_SCMD_MORE_FRAGS 20 #define M_SCMD_MORE_FRAGS 0x1 #define V_SCMD_MORE_FRAGS(x) ((x) << S_SCMD_MORE_FRAGS) #define G_SCMD_MORE_FRAGS(x) (((x) >> S_SCMD_MORE_FRAGS) & M_SCMD_MORE_FRAGS) /*last frag */ #define S_SCMD_LAST_FRAG 19 #define M_SCMD_LAST_FRAG 0x1 #define V_SCMD_LAST_FRAG(x) ((x) << S_SCMD_LAST_FRAG) #define G_SCMD_LAST_FRAG(x) (((x) >> S_SCMD_LAST_FRAG) & M_SCMD_LAST_FRAG) /* TlsCompPdu */ #define S_SCMD_TLS_COMPPDU 18 #define M_SCMD_TLS_COMPPDU 0x1 #define V_SCMD_TLS_COMPPDU(x) ((x) << S_SCMD_TLS_COMPPDU) #define G_SCMD_TLS_COMPPDU(x) (((x) >> S_SCMD_TLS_COMPPDU) & M_SCMD_TLS_COMPPDU) /* KeyCntxtInline - Key context inline after the scmd OR PayloadOnly*/ #define S_SCMD_KEY_CTX_INLINE 17 #define M_SCMD_KEY_CTX_INLINE 0x1 #define V_SCMD_KEY_CTX_INLINE(x) ((x) << S_SCMD_KEY_CTX_INLINE) #define G_SCMD_KEY_CTX_INLINE(x) \ (((x) >> S_SCMD_KEY_CTX_INLINE) & M_SCMD_KEY_CTX_INLINE) #define F_SCMD_KEY_CTX_INLINE V_SCMD_KEY_CTX_INLINE(1U) /* TLSFragEnable - 0: Host created TLS PDUs, 1: TLS Framgmentation in ASIC */ #define S_SCMD_TLS_FRAG_ENABLE 16 #define M_SCMD_TLS_FRAG_ENABLE 0x1 #define V_SCMD_TLS_FRAG_ENABLE(x) ((x) << S_SCMD_TLS_FRAG_ENABLE) #define G_SCMD_TLS_FRAG_ENABLE(x) \ (((x) >> S_SCMD_TLS_FRAG_ENABLE) & M_SCMD_TLS_FRAG_ENABLE) #define F_SCMD_TLS_FRAG_ENABLE V_SCMD_TLS_FRAG_ENABLE(1U) /* MacOnly - Only send the MAC and discard PDU. This is valid for hash only * modes, in this case TLS_TX will drop the PDU and only * send back the MAC bytes. */ #define S_SCMD_MAC_ONLY 15 #define M_SCMD_MAC_ONLY 0x1 #define V_SCMD_MAC_ONLY(x) ((x) << S_SCMD_MAC_ONLY) #define G_SCMD_MAC_ONLY(x) \ (((x) >> S_SCMD_MAC_ONLY) & M_SCMD_MAC_ONLY) #define F_SCMD_MAC_ONLY V_SCMD_MAC_ONLY(1U) /* AadIVDrop - Drop the AAD and IV fields. Useful in protocols * which have complex AAD and IV formations Eg:AES-CCM */ #define S_SCMD_AADIVDROP 14 #define M_SCMD_AADIVDROP 0x1 #define V_SCMD_AADIVDROP(x) ((x) << S_SCMD_AADIVDROP) #define G_SCMD_AADIVDROP(x) \ (((x) >> S_SCMD_AADIVDROP) & M_SCMD_AADIVDROP) #define F_SCMD_AADIVDROP V_SCMD_AADIVDROP(1U) /* HdrLength - Length of all headers excluding TLS header * present before start of crypto PDU/payload. */ #define S_SCMD_HDR_LEN 0 #define M_SCMD_HDR_LEN 0x3fff #define V_SCMD_HDR_LEN(x) ((x) << S_SCMD_HDR_LEN) #define G_SCMD_HDR_LEN(x) \ (((x) >> S_SCMD_HDR_LEN) & M_SCMD_HDR_LEN) struct cpl_tx_sec_pdu { __be32 op_ivinsrtofst; __be32 pldlen; __be32 aadstart_cipherstop_hi; __be32 cipherstop_lo_authinsert; __be32 seqno_numivs; __be32 ivgen_hdrlen; __be64 scmd1; }; #define S_CPL_TX_SEC_PDU_OPCODE 24 #define M_CPL_TX_SEC_PDU_OPCODE 0xff #define V_CPL_TX_SEC_PDU_OPCODE(x) ((x) << S_CPL_TX_SEC_PDU_OPCODE) #define G_CPL_TX_SEC_PDU_OPCODE(x) \ (((x) >> S_CPL_TX_SEC_PDU_OPCODE) & M_CPL_TX_SEC_PDU_OPCODE) /* RX Channel Id */ #define S_CPL_TX_SEC_PDU_RXCHID 22 #define M_CPL_TX_SEC_PDU_RXCHID 0x1 #define V_CPL_TX_SEC_PDU_RXCHID(x) ((x) << S_CPL_TX_SEC_PDU_RXCHID) #define G_CPL_TX_SEC_PDU_RXCHID(x) \ (((x) >> S_CPL_TX_SEC_PDU_RXCHID) & M_CPL_TX_SEC_PDU_RXCHID) #define F_CPL_TX_SEC_PDU_RXCHID V_CPL_TX_SEC_PDU_RXCHID(1U) /* Ack Follows */ #define S_CPL_TX_SEC_PDU_ACKFOLLOWS 21 #define M_CPL_TX_SEC_PDU_ACKFOLLOWS 0x1 #define V_CPL_TX_SEC_PDU_ACKFOLLOWS(x) ((x) << S_CPL_TX_SEC_PDU_ACKFOLLOWS) #define G_CPL_TX_SEC_PDU_ACKFOLLOWS(x) \ (((x) >> S_CPL_TX_SEC_PDU_ACKFOLLOWS) & M_CPL_TX_SEC_PDU_ACKFOLLOWS) #define F_CPL_TX_SEC_PDU_ACKFOLLOWS V_CPL_TX_SEC_PDU_ACKFOLLOWS(1U) /* Loopback bit in cpl_tx_sec_pdu */ #define S_CPL_TX_SEC_PDU_ULPTXLPBK 20 #define M_CPL_TX_SEC_PDU_ULPTXLPBK 0x1 #define V_CPL_TX_SEC_PDU_ULPTXLPBK(x) ((x) << S_CPL_TX_SEC_PDU_ULPTXLPBK) #define G_CPL_TX_SEC_PDU_ULPTXLPBK(x) \ (((x) >> S_CPL_TX_SEC_PDU_ULPTXLPBK) & M_CPL_TX_SEC_PDU_ULPTXLPBK) #define F_CPL_TX_SEC_PDU_ULPTXLPBK V_CPL_TX_SEC_PDU_ULPTXLPBK(1U) /* Length of cpl header encapsulated */ #define S_CPL_TX_SEC_PDU_CPLLEN 16 #define M_CPL_TX_SEC_PDU_CPLLEN 0xf #define V_CPL_TX_SEC_PDU_CPLLEN(x) ((x) << S_CPL_TX_SEC_PDU_CPLLEN) #define G_CPL_TX_SEC_PDU_CPLLEN(x) \ (((x) >> S_CPL_TX_SEC_PDU_CPLLEN) & M_CPL_TX_SEC_PDU_CPLLEN) /* PlaceHolder */ #define S_CPL_TX_SEC_PDU_PLACEHOLDER 10 #define M_CPL_TX_SEC_PDU_PLACEHOLDER 0x1 #define V_CPL_TX_SEC_PDU_PLACEHOLDER(x) ((x) << S_CPL_TX_SEC_PDU_PLACEHOLDER) #define G_CPL_TX_SEC_PDU_PLACEHOLDER(x) \ (((x) >> S_CPL_TX_SEC_PDU_PLACEHOLDER) & \ M_CPL_TX_SEC_PDU_PLACEHOLDER) /* IvInsrtOffset: Insertion location for IV */ #define S_CPL_TX_SEC_PDU_IVINSRTOFST 0 #define M_CPL_TX_SEC_PDU_IVINSRTOFST 0x3ff #define V_CPL_TX_SEC_PDU_IVINSRTOFST(x) ((x) << S_CPL_TX_SEC_PDU_IVINSRTOFST) #define G_CPL_TX_SEC_PDU_IVINSRTOFST(x) \ (((x) >> S_CPL_TX_SEC_PDU_IVINSRTOFST) & \ M_CPL_TX_SEC_PDU_IVINSRTOFST) /* AadStartOffset: Offset in bytes for AAD start from * the first byte following * the pkt headers (0-255 * bytes) */ #define S_CPL_TX_SEC_PDU_AADSTART 24 #define M_CPL_TX_SEC_PDU_AADSTART 0xff #define V_CPL_TX_SEC_PDU_AADSTART(x) ((x) << S_CPL_TX_SEC_PDU_AADSTART) #define G_CPL_TX_SEC_PDU_AADSTART(x) \ (((x) >> S_CPL_TX_SEC_PDU_AADSTART) & \ M_CPL_TX_SEC_PDU_AADSTART) /* AadStopOffset: offset in bytes for AAD stop/end from the first byte following * the pkt headers (0-511 bytes) */ #define S_CPL_TX_SEC_PDU_AADSTOP 15 #define M_CPL_TX_SEC_PDU_AADSTOP 0x1ff #define V_CPL_TX_SEC_PDU_AADSTOP(x) ((x) << S_CPL_TX_SEC_PDU_AADSTOP) #define G_CPL_TX_SEC_PDU_AADSTOP(x) \ (((x) >> S_CPL_TX_SEC_PDU_AADSTOP) & M_CPL_TX_SEC_PDU_AADSTOP) /* CipherStartOffset: offset in bytes for encryption/decryption start from the * first byte following the pkt headers (0-1023 * bytes) */ #define S_CPL_TX_SEC_PDU_CIPHERSTART 5 #define M_CPL_TX_SEC_PDU_CIPHERSTART 0x3ff #define V_CPL_TX_SEC_PDU_CIPHERSTART(x) ((x) << S_CPL_TX_SEC_PDU_CIPHERSTART) #define G_CPL_TX_SEC_PDU_CIPHERSTART(x) \ (((x) >> S_CPL_TX_SEC_PDU_CIPHERSTART) & \ M_CPL_TX_SEC_PDU_CIPHERSTART) /* CipherStopOffset: offset in bytes for encryption/decryption end * from end of the payload of this command (0-511 bytes) */ #define S_CPL_TX_SEC_PDU_CIPHERSTOP_HI 0 #define M_CPL_TX_SEC_PDU_CIPHERSTOP_HI 0x1f #define V_CPL_TX_SEC_PDU_CIPHERSTOP_HI(x) \ ((x) << S_CPL_TX_SEC_PDU_CIPHERSTOP_HI) #define G_CPL_TX_SEC_PDU_CIPHERSTOP_HI(x) \ (((x) >> S_CPL_TX_SEC_PDU_CIPHERSTOP_HI) & \ M_CPL_TX_SEC_PDU_CIPHERSTOP_HI) #define S_CPL_TX_SEC_PDU_CIPHERSTOP_LO 28 #define M_CPL_TX_SEC_PDU_CIPHERSTOP_LO 0xf #define V_CPL_TX_SEC_PDU_CIPHERSTOP_LO(x) \ ((x) << S_CPL_TX_SEC_PDU_CIPHERSTOP_LO) #define G_CPL_TX_SEC_PDU_CIPHERSTOP_LO(x) \ (((x) >> S_CPL_TX_SEC_PDU_CIPHERSTOP_LO) & \ M_CPL_TX_SEC_PDU_CIPHERSTOP_LO) /* AuthStartOffset: offset in bytes for authentication start from * the first byte following the pkt headers (0-1023) * */ #define S_CPL_TX_SEC_PDU_AUTHSTART 18 #define M_CPL_TX_SEC_PDU_AUTHSTART 0x3ff #define V_CPL_TX_SEC_PDU_AUTHSTART(x) ((x) << S_CPL_TX_SEC_PDU_AUTHSTART) #define G_CPL_TX_SEC_PDU_AUTHSTART(x) \ (((x) >> S_CPL_TX_SEC_PDU_AUTHSTART) & \ M_CPL_TX_SEC_PDU_AUTHSTART) /* AuthStopOffset: offset in bytes for authentication * end from end of the payload of this command (0-511 Bytes) */ #define S_CPL_TX_SEC_PDU_AUTHSTOP 9 #define M_CPL_TX_SEC_PDU_AUTHSTOP 0x1ff #define V_CPL_TX_SEC_PDU_AUTHSTOP(x) ((x) << S_CPL_TX_SEC_PDU_AUTHSTOP) #define G_CPL_TX_SEC_PDU_AUTHSTOP(x) \ (((x) >> S_CPL_TX_SEC_PDU_AUTHSTOP) & \ M_CPL_TX_SEC_PDU_AUTHSTOP) /* AuthInsrtOffset: offset in bytes for authentication insertion * from end of the payload of this command (0-511 bytes) */ #define S_CPL_TX_SEC_PDU_AUTHINSERT 0 #define M_CPL_TX_SEC_PDU_AUTHINSERT 0x1ff #define V_CPL_TX_SEC_PDU_AUTHINSERT(x) ((x) << S_CPL_TX_SEC_PDU_AUTHINSERT) #define G_CPL_TX_SEC_PDU_AUTHINSERT(x) \ (((x) >> S_CPL_TX_SEC_PDU_AUTHINSERT) & \ M_CPL_TX_SEC_PDU_AUTHINSERT) struct cpl_rx_phys_dsgl { __be32 op_to_tid; __be32 pcirlxorder_to_noofsgentr; struct rss_header rss_hdr_int; }; #define S_CPL_RX_PHYS_DSGL_OPCODE 24 #define M_CPL_RX_PHYS_DSGL_OPCODE 0xff #define V_CPL_RX_PHYS_DSGL_OPCODE(x) ((x) << S_CPL_RX_PHYS_DSGL_OPCODE) #define G_CPL_RX_PHYS_DSGL_OPCODE(x) \ (((x) >> S_CPL_RX_PHYS_DSGL_OPCODE) & M_CPL_RX_PHYS_DSGL_OPCODE) #define S_CPL_RX_PHYS_DSGL_ISRDMA 23 #define M_CPL_RX_PHYS_DSGL_ISRDMA 0x1 #define V_CPL_RX_PHYS_DSGL_ISRDMA(x) ((x) << S_CPL_RX_PHYS_DSGL_ISRDMA) #define G_CPL_RX_PHYS_DSGL_ISRDMA(x) \ (((x) >> S_CPL_RX_PHYS_DSGL_ISRDMA) & M_CPL_RX_PHYS_DSGL_ISRDMA) #define F_CPL_RX_PHYS_DSGL_ISRDMA V_CPL_RX_PHYS_DSGL_ISRDMA(1U) #define S_CPL_RX_PHYS_DSGL_RSVD1 20 #define M_CPL_RX_PHYS_DSGL_RSVD1 0x7 #define V_CPL_RX_PHYS_DSGL_RSVD1(x) ((x) << S_CPL_RX_PHYS_DSGL_RSVD1) #define G_CPL_RX_PHYS_DSGL_RSVD1(x) \ (((x) >> S_CPL_RX_PHYS_DSGL_RSVD1) & M_CPL_RX_PHYS_DSGL_RSVD1) #define S_CPL_RX_PHYS_DSGL_PCIRLXORDER 31 #define M_CPL_RX_PHYS_DSGL_PCIRLXORDER 0x1 #define V_CPL_RX_PHYS_DSGL_PCIRLXORDER(x) \ ((x) << S_CPL_RX_PHYS_DSGL_PCIRLXORDER) #define G_CPL_RX_PHYS_DSGL_PCIRLXORDER(x) \ (((x) >> S_CPL_RX_PHYS_DSGL_PCIRLXORDER) & \ M_CPL_RX_PHYS_DSGL_PCIRLXORDER) #define F_CPL_RX_PHYS_DSGL_PCIRLXORDER V_CPL_RX_PHYS_DSGL_PCIRLXORDER(1U) #define S_CPL_RX_PHYS_DSGL_PCINOSNOOP 30 #define M_CPL_RX_PHYS_DSGL_PCINOSNOOP 0x1 #define V_CPL_RX_PHYS_DSGL_PCINOSNOOP(x) \ ((x) << S_CPL_RX_PHYS_DSGL_PCINOSNOOP) #define G_CPL_RX_PHYS_DSGL_PCINOSNOOP(x) \ (((x) >> S_CPL_RX_PHYS_DSGL_PCINOSNOOP) & \ M_CPL_RX_PHYS_DSGL_PCINOSNOOP) #define F_CPL_RX_PHYS_DSGL_PCINOSNOOP V_CPL_RX_PHYS_DSGL_PCINOSNOOP(1U) #define S_CPL_RX_PHYS_DSGL_PCITPHNTENB 29 #define M_CPL_RX_PHYS_DSGL_PCITPHNTENB 0x1 #define V_CPL_RX_PHYS_DSGL_PCITPHNTENB(x) \ ((x) << S_CPL_RX_PHYS_DSGL_PCITPHNTENB) #define G_CPL_RX_PHYS_DSGL_PCITPHNTENB(x) \ (((x) >> S_CPL_RX_PHYS_DSGL_PCITPHNTENB) & \ M_CPL_RX_PHYS_DSGL_PCITPHNTENB) #define F_CPL_RX_PHYS_DSGL_PCITPHNTENB V_CPL_RX_PHYS_DSGL_PCITPHNTENB(1U) #define S_CPL_RX_PHYS_DSGL_PCITPHNT 27 #define M_CPL_RX_PHYS_DSGL_PCITPHNT 0x3 #define V_CPL_RX_PHYS_DSGL_PCITPHNT(x) ((x) << S_CPL_RX_PHYS_DSGL_PCITPHNT) #define G_CPL_RX_PHYS_DSGL_PCITPHNT(x) \ (((x) >> S_CPL_RX_PHYS_DSGL_PCITPHNT) & \ M_CPL_RX_PHYS_DSGL_PCITPHNT) #define S_CPL_RX_PHYS_DSGL_DCAID 16 #define M_CPL_RX_PHYS_DSGL_DCAID 0x7ff #define V_CPL_RX_PHYS_DSGL_DCAID(x) ((x) << S_CPL_RX_PHYS_DSGL_DCAID) #define G_CPL_RX_PHYS_DSGL_DCAID(x) \ (((x) >> S_CPL_RX_PHYS_DSGL_DCAID) & \ M_CPL_RX_PHYS_DSGL_DCAID) #define S_CPL_RX_PHYS_DSGL_NOOFSGENTR 0 #define M_CPL_RX_PHYS_DSGL_NOOFSGENTR 0xffff #define V_CPL_RX_PHYS_DSGL_NOOFSGENTR(x) \ ((x) << S_CPL_RX_PHYS_DSGL_NOOFSGENTR) #define G_CPL_RX_PHYS_DSGL_NOOFSGENTR(x) \ (((x) >> S_CPL_RX_PHYS_DSGL_NOOFSGENTR) & \ M_CPL_RX_PHYS_DSGL_NOOFSGENTR) /* CPL_TX_TLS_ACK */ struct cpl_tx_tls_ack { __be32 op_to_Rsvd2; __be32 PldLen; __be64 Rsvd3; }; #define S_CPL_TX_TLS_ACK_OPCODE 24 #define M_CPL_TX_TLS_ACK_OPCODE 0xff #define V_CPL_TX_TLS_ACK_OPCODE(x) ((x) << S_CPL_TX_TLS_ACK_OPCODE) #define G_CPL_TX_TLS_ACK_OPCODE(x) \ (((x) >> S_CPL_TX_TLS_ACK_OPCODE) & M_CPL_TX_TLS_ACK_OPCODE) #define S_CPL_TX_TLS_ACK_RSVD1 23 #define M_CPL_TX_TLS_ACK_RSVD1 0x1 #define V_CPL_TX_TLS_ACK_RSVD1(x) ((x) << S_CPL_TX_TLS_ACK_RSVD1) #define G_CPL_TX_TLS_ACK_RSVD1(x) \ (((x) >> S_CPL_TX_TLS_ACK_RSVD1) & M_CPL_TX_TLS_ACK_RSVD1) #define F_CPL_TX_TLS_ACK_RSVD1 V_CPL_TX_TLS_ACK_RSVD1(1U) #define S_CPL_TX_TLS_ACK_RXCHID 22 #define M_CPL_TX_TLS_ACK_RXCHID 0x1 #define V_CPL_TX_TLS_ACK_RXCHID(x) ((x) << S_CPL_TX_TLS_ACK_RXCHID) #define G_CPL_TX_TLS_ACK_RXCHID(x) \ (((x) >> S_CPL_TX_TLS_ACK_RXCHID) & M_CPL_TX_TLS_ACK_RXCHID) #define F_CPL_TX_TLS_ACK_RXCHID V_CPL_TX_TLS_ACK_RXCHID(1U) #define S_CPL_TX_TLS_ACK_FWMSG 21 #define M_CPL_TX_TLS_ACK_FWMSG 0x1 #define V_CPL_TX_TLS_ACK_FWMSG(x) ((x) << S_CPL_TX_TLS_ACK_FWMSG) #define G_CPL_TX_TLS_ACK_FWMSG(x) \ (((x) >> S_CPL_TX_TLS_ACK_FWMSG) & M_CPL_TX_TLS_ACK_FWMSG) #define F_CPL_TX_TLS_ACK_FWMSG V_CPL_TX_TLS_ACK_FWMSG(1U) #define S_CPL_TX_TLS_ACK_ULPTXLPBK 20 #define M_CPL_TX_TLS_ACK_ULPTXLPBK 0x1 #define V_CPL_TX_TLS_ACK_ULPTXLPBK(x) ((x) << S_CPL_TX_TLS_ACK_ULPTXLPBK) #define G_CPL_TX_TLS_ACK_ULPTXLPBK(x) \ (((x) >> S_CPL_TX_TLS_ACK_ULPTXLPBK) & M_CPL_TX_TLS_ACK_ULPTXLPBK) #define F_CPL_TX_TLS_ACK_ULPTXLPBK V_CPL_TX_TLS_ACK_ULPTXLPBK(1U) #define S_CPL_TX_TLS_ACK_CPLLEN 16 #define M_CPL_TX_TLS_ACK_CPLLEN 0xf #define V_CPL_TX_TLS_ACK_CPLLEN(x) ((x) << S_CPL_TX_TLS_ACK_CPLLEN) #define G_CPL_TX_TLS_ACK_CPLLEN(x) \ (((x) >> S_CPL_TX_TLS_ACK_CPLLEN) & M_CPL_TX_TLS_ACK_CPLLEN) #define S_CPL_TX_TLS_ACK_COMPLONERR 15 #define M_CPL_TX_TLS_ACK_COMPLONERR 0x1 #define V_CPL_TX_TLS_ACK_COMPLONERR(x) ((x) << S_CPL_TX_TLS_ACK_COMPLONERR) #define G_CPL_TX_TLS_ACK_COMPLONERR(x) \ (((x) >> S_CPL_TX_TLS_ACK_COMPLONERR) & M_CPL_TX_TLS_ACK_COMPLONERR) #define F_CPL_TX_TLS_ACK_COMPLONERR V_CPL_TX_TLS_ACK_COMPLONERR(1U) #define S_CPL_TX_TLS_ACK_LCB 14 #define M_CPL_TX_TLS_ACK_LCB 0x1 #define V_CPL_TX_TLS_ACK_LCB(x) ((x) << S_CPL_TX_TLS_ACK_LCB) #define G_CPL_TX_TLS_ACK_LCB(x) \ (((x) >> S_CPL_TX_TLS_ACK_LCB) & M_CPL_TX_TLS_ACK_LCB) #define F_CPL_TX_TLS_ACK_LCB V_CPL_TX_TLS_ACK_LCB(1U) #define S_CPL_TX_TLS_ACK_PHASH 13 #define M_CPL_TX_TLS_ACK_PHASH 0x1 #define V_CPL_TX_TLS_ACK_PHASH(x) ((x) << S_CPL_TX_TLS_ACK_PHASH) #define G_CPL_TX_TLS_ACK_PHASH(x) \ (((x) >> S_CPL_TX_TLS_ACK_PHASH) & M_CPL_TX_TLS_ACK_PHASH) #define F_CPL_TX_TLS_ACK_PHASH V_CPL_TX_TLS_ACK_PHASH(1U) #define S_CPL_TX_TLS_ACK_RSVD2 0 #define M_CPL_TX_TLS_ACK_RSVD2 0x1fff #define V_CPL_TX_TLS_ACK_RSVD2(x) ((x) << S_CPL_TX_TLS_ACK_RSVD2) #define G_CPL_TX_TLS_ACK_RSVD2(x) \ (((x) >> S_CPL_TX_TLS_ACK_RSVD2) & M_CPL_TX_TLS_ACK_RSVD2) #endif /* T4_MSG_H */ Index: head/sys/dev/cxgbe/firmware/t6fw_cfg_hashfilter.txt =================================================================== --- head/sys/dev/cxgbe/firmware/t6fw_cfg_hashfilter.txt (nonexistent) +++ head/sys/dev/cxgbe/firmware/t6fw_cfg_hashfilter.txt (revision 333394) @@ -0,0 +1,265 @@ +# Firmware configuration file. +# +# Global limits (some are hardware limits, others are due to the firmware). +# nvi = 128 virtual interfaces +# niqflint = 1023 ingress queues with freelists and/or interrupts +# nethctrl = 64K Ethernet or ctrl egress queues +# neq = 64K egress queues of all kinds, including freelists +# nexactf = 512 MPS TCAM entries, can oversubscribe. + +[global] + rss_glb_config_mode = basicvirtual + rss_glb_config_options = tnlmapen,hashtoeplitz,tnlalllkp + + # PL_TIMEOUT register + pl_timeout_value = 200 # the timeout value in units of us + + sge_timer_value = 1, 5, 10, 50, 100, 200 # SGE_TIMER_VALUE* in usecs + + reg[0x10c4] = 0x20000000/0x20000000 # GK_CONTROL, enable 5th thread + + reg[0x7dc0] = 0x0e2f8849 # TP_SHIFT_CNT + + #Tick granularities in kbps + tsch_ticks = 100000, 10000, 1000, 10 + + filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe + filterMask = port, protocol + + tp_pmrx = 20, 512 + tp_pmrx_pagesize = 16K + + # TP number of RX channels (0 = auto) + tp_nrxch = 0 + + tp_pmtx = 40, 512 + tp_pmtx_pagesize = 64K + + # TP number of TX channels (0 = auto) + tp_ntxch = 0 + + # TP OFLD MTUs + tp_mtus = 88, 256, 512, 576, 808, 1024, 1280, 1488, 1500, 2002, 2048, 4096, 4352, 8192, 9000, 9600 + + # enable TP_OUT_CONFIG.IPIDSPLITMODE and CRXPKTENC + reg[0x7d04] = 0x00010008/0x00010008 + + # TP_GLOBAL_CONFIG + reg[0x7d08] = 0x00000800/0x00000800 # set IssFromCplEnable + + # TP_PC_CONFIG + reg[0x7d48] = 0x00000000/0x00000400 # clear EnableFLMError + + # TP_PC_CONFIG2 + reg[0x7d4c] = 0x00010000/0x00010000 # set DisableNewPshFlag + + # TP_PARA_REG0 + reg[0x7d60] = 0x06000000/0x07000000 # set InitCWND to 6 + + # TP_PARA_REG3 + reg[0x7d6c] = 0x28000000/0x28000000 # set EnableTnlCngHdr + # set RxMacCheck (Note: + # Only for hash filter, + # no tcp offload) + + # LE_DB_CONFIG + reg[0x19c04] = 0x00000000/0x02040000 # LE IPv4 compression disabled + # EXTN_HASH_IPV4 Disable + + # LE_DB_RSP_CODE_0 + reg[0x19c74] = 0x00000004/0x0000000f # TCAM_ACTV_HIT = 4 + + # LE_DB_RSP_CODE_1 + reg[0x19c78] = 0x08000000/0x0e000000 # HASH_ACTV_HIT = 4 + + # LE_DB_HASH_CONFIG + reg[0x19c28] = 0x00800000/0x01f00000 # LE Hash bucket size 8, + + # MC configuration + mc_mode_brc[0] = 0 # mc0 - 1: enable BRC, 0: enable RBC + +# PFs 0-3. These get 8 MSI/8 MSI-X vectors each. VFs are supported by +# these 4 PFs only. +[function "0"] + nvf = 4 + wx_caps = all + r_caps = all + nvi = 2 + rssnvi = 2 + niqflint = 4 + nethctrl = 4 + neq = 8 + nexactf = 4 + cmask = all + pmask = 0x1 + +[function "1"] + nvf = 4 + wx_caps = all + r_caps = all + nvi = 2 + rssnvi = 2 + niqflint = 4 + nethctrl = 4 + neq = 8 + nexactf = 4 + cmask = all + pmask = 0x2 + +[function "2"] + nvf = 4 + wx_caps = all + r_caps = all + nvi = 2 + rssnvi = 2 + niqflint = 4 + nethctrl = 4 + neq = 8 + nexactf = 4 + cmask = all + pmask = 0x4 + +[function "3"] + nvf = 4 + wx_caps = all + r_caps = all + nvi = 2 + rssnvi = 2 + niqflint = 4 + nethctrl = 4 + neq = 8 + nexactf = 4 + cmask = all + pmask = 0x8 + +# PF4 is the resource-rich PF that the bus/nexus driver attaches to. +# It gets 32 MSI/128 MSI-X vectors. +[function "4"] + wx_caps = all + r_caps = all + nvi = 32 + rssnvi = 8 + niqflint = 512 + nethctrl = 1024 + neq = 2048 + nqpcq = 8192 + nexactf = 456 + cmask = all + pmask = all + nclip = 320 + + # TCAM has 6K cells; each region must start at a multiple of 128 cell. + # Each entry in these categories takes 2 cells each. nhash will use the + # TCAM iff there is room left (that is, the rest don't add up to 3072). + nfilter = 2032 + nserver = 512 + nhpfilter = 0 + nhash = 524288 + protocol = nic_hashfilter + tp_l2t = 4096 + +# PF5 is the SCSI Controller PF. It gets 32 MSI/40 MSI-X vectors. +# Not used right now. +[function "5"] + nvi = 1 + rssnvi = 0 + +# PF6 is the FCoE Controller PF. It gets 32 MSI/40 MSI-X vectors. +# Not used right now. +[function "6"] + nvi = 1 + rssnvi = 0 + +# The following function, 1023, is not an actual PCIE function but is used to +# configure and reserve firmware internal resources that come from the global +# resource pool. +# +[function "1023"] + wx_caps = all + r_caps = all + nvi = 4 + rssnvi = 0 + cmask = all + pmask = all + nexactf = 8 + nfilter = 16 + + +# For Virtual functions, we only allow NIC functionality and we only allow +# access to one port (1 << PF). Note that because of limitations in the +# Scatter Gather Engine (SGE) hardware which checks writes to VF KDOORBELL +# and GTS registers, the number of Ingress and Egress Queues must be a power +# of 2. +# +[function "0/*"] + wx_caps = 0x82 + r_caps = 0x86 + nvi = 1 + rssnvi = 1 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x1 + +[function "1/*"] + wx_caps = 0x82 + r_caps = 0x86 + nvi = 1 + rssnvi = 1 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x2 + +[function "2/*"] + wx_caps = 0x82 + r_caps = 0x86 + nvi = 1 + rssnvi = 1 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x1 + +[function "3/*"] + wx_caps = 0x82 + r_caps = 0x86 + nvi = 1 + rssnvi = 1 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x2 + +# MPS has 192K buffer space for ingress packets from the wire as well as +# loopback path of the L2 switch. +[port "0"] + dcb = none + #bg_mem = 25 + #lpbk_mem = 25 + hwm = 60 + lwm = 15 + dwm = 30 + +[port "1"] + dcb = none + #bg_mem = 25 + #lpbk_mem = 25 + hwm = 60 + lwm = 15 + dwm = 30 + +[fini] + version = 0x1 + checksum = 0xb577311e +# +# $FreeBSD$ +# Property changes on: head/sys/dev/cxgbe/firmware/t6fw_cfg_hashfilter.txt ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: head/sys/dev/cxgbe/offload.h =================================================================== --- head/sys/dev/cxgbe/offload.h (revision 333393) +++ head/sys/dev/cxgbe/offload.h (revision 333394) @@ -1,173 +1,182 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2010 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef __T4_OFFLOAD_H__ #define __T4_OFFLOAD_H__ +#include +#include +#include #define INIT_ULPTX_WRH(w, wrlen, atomic, tid) do { \ (w)->wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \ (w)->wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \ V_FW_WR_FLOWID(tid)); \ (w)->wr_lo = cpu_to_be64(0); \ } while (0) #define INIT_ULPTX_WR(w, wrlen, atomic, tid) \ INIT_ULPTX_WRH(&((w)->wr), wrlen, atomic, tid) #define INIT_TP_WR(w, tid) do { \ (w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | \ V_FW_WR_IMMDLEN(sizeof(*w) - sizeof(w->wr))); \ (w)->wr.wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(sizeof(*w), 16)) | \ V_FW_WR_FLOWID(tid)); \ (w)->wr.wr_lo = cpu_to_be64(0); \ } while (0) #define INIT_TP_WR_MIT_CPL(w, cpl, tid) do { \ INIT_TP_WR(w, tid); \ OPCODE_TID(w) = htonl(MK_OPCODE_TID(cpl, tid)); \ } while (0) TAILQ_HEAD(stid_head, stid_region); struct listen_ctx; struct stid_region { TAILQ_ENTRY(stid_region) link; u_int used; /* # of stids used by this region */ u_int free; /* # of contiguous stids free right after this region */ }; /* * Max # of ATIDs. The absolute HW max is 14b (enough for 16K) but we reserve * the upper 3b for use as a cookie to demux the reply. */ #define MAX_ATIDS 2048U union aopen_entry { void *data; union aopen_entry *next; }; /* * Holds the size, base address, free list start, etc of the TID, server TID, * and active-open TID tables. The tables themselves are allocated dynamically. */ struct tid_info { void **tid_tab; u_int ntids; u_int tids_in_use; struct mtx stid_lock __aligned(CACHE_LINE_SIZE); struct listen_ctx **stid_tab; u_int nstids; u_int stid_base; u_int stids_in_use; u_int nstids_free_head; /* # of available stids at the beginning */ struct stid_head stids; struct mtx atid_lock __aligned(CACHE_LINE_SIZE); union aopen_entry *atid_tab; u_int natids; union aopen_entry *afree; u_int atids_in_use; struct mtx ftid_lock __aligned(CACHE_LINE_SIZE); + struct cv ftid_cv; struct filter_entry *ftid_tab; u_int nftids; u_int ftid_base; u_int ftids_in_use; + + struct mtx hftid_lock __aligned(CACHE_LINE_SIZE); + struct cv hftid_cv; + void **hftid_tab; + /* ntids, tids_in_use */ struct mtx etid_lock __aligned(CACHE_LINE_SIZE); struct etid_entry *etid_tab; u_int netids; u_int etid_base; }; struct t4_range { u_int start; u_int size; }; struct t4_virt_res { /* virtualized HW resources */ struct t4_range ddp; struct t4_range iscsi; struct t4_range stag; struct t4_range rq; struct t4_range pbl; struct t4_range qp; struct t4_range cq; struct t4_range srq; struct t4_range ocq; struct t4_range l2t; struct t4_range key; }; enum { ULD_TOM = 0, ULD_IWARP, ULD_ISCSI, ULD_MAX = ULD_ISCSI }; struct adapter; struct port_info; struct uld_info { SLIST_ENTRY(uld_info) link; int refcount; int uld_id; int (*activate)(struct adapter *); int (*deactivate)(struct adapter *); }; struct tom_tunables { int cong_algorithm; int sndbuf; int ddp; int rx_coalesce; int tls; int *tls_rx_ports; int num_tls_rx_ports; int tx_align; int tx_zcopy; int cop_managed_offloading; }; /* iWARP driver tunables */ struct iw_tunables { int wc_en; }; #ifdef TCP_OFFLOAD int t4_register_uld(struct uld_info *); int t4_unregister_uld(struct uld_info *); int t4_activate_uld(struct adapter *, int); int t4_deactivate_uld(struct adapter *, int); int uld_active(struct adapter *, int); #endif #endif Index: head/sys/dev/cxgbe/t4_filter.c =================================================================== --- head/sys/dev/cxgbe/t4_filter.c (revision 333393) +++ head/sys/dev/cxgbe/t4_filter.c (revision 333394) @@ -1,686 +1,1427 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018 Chelsio Communications, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" +#include "common/t4_regs_values.h" +#include "common/t4_tcb.h" #include "t4_l2t.h" struct filter_entry { - uint32_t valid:1; /* filter allocated and valid */ - uint32_t locked:1; /* filter is administratively locked */ - uint32_t pending:1; /* filter action is pending firmware reply */ + uint32_t valid:1; /* filter allocated and valid */ + uint32_t locked:1; /* filter is administratively locked or busy */ + uint32_t pending:1; /* filter action is pending firmware reply */ uint32_t smtidx:8; /* Source MAC Table index for smac */ - struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ + int tid; /* tid of the filter TCB */ + struct l2t_entry *l2te; /* L2 table entry for DMAC rewrite */ - struct t4_filter_specification fs; + struct t4_filter_specification fs; }; +static void free_filter_resources(struct filter_entry *); +static int get_hashfilter(struct adapter *, struct t4_filter *); +static int set_hashfilter(struct adapter *, struct t4_filter *, + struct l2t_entry *); +static int del_hashfilter(struct adapter *, struct t4_filter *); +static int configure_hashfilter_tcb(struct adapter *, struct filter_entry *); + +static void +insert_hftid(struct adapter *sc, int tid, void *ctx, int ntids) +{ + struct tid_info *t = &sc->tids; + + t->hftid_tab[tid] = ctx; + atomic_add_int(&t->tids_in_use, ntids); +} + +static void * +lookup_hftid(struct adapter *sc, int tid) +{ + struct tid_info *t = &sc->tids; + + return (t->hftid_tab[tid]); +} + +static void +remove_hftid(struct adapter *sc, int tid, int ntids) +{ + struct tid_info *t = &sc->tids; + + t->hftid_tab[tid] = NULL; + atomic_subtract_int(&t->tids_in_use, ntids); +} + static uint32_t fconf_iconf_to_mode(uint32_t fconf, uint32_t iconf) { uint32_t mode; mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR | T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT; if (fconf & F_FRAGMENTATION) mode |= T4_FILTER_IP_FRAGMENT; if (fconf & F_MPSHITTYPE) mode |= T4_FILTER_MPS_HIT_TYPE; if (fconf & F_MACMATCH) mode |= T4_FILTER_MAC_IDX; if (fconf & F_ETHERTYPE) mode |= T4_FILTER_ETH_TYPE; if (fconf & F_PROTOCOL) mode |= T4_FILTER_IP_PROTO; if (fconf & F_TOS) mode |= T4_FILTER_IP_TOS; if (fconf & F_VLAN) mode |= T4_FILTER_VLAN; if (fconf & F_VNIC_ID) { mode |= T4_FILTER_VNIC; if (iconf & F_VNIC) mode |= T4_FILTER_IC_VNIC; } if (fconf & F_PORT) mode |= T4_FILTER_PORT; if (fconf & F_FCOE) mode |= T4_FILTER_FCoE; return (mode); } static uint32_t mode_to_fconf(uint32_t mode) { uint32_t fconf = 0; if (mode & T4_FILTER_IP_FRAGMENT) fconf |= F_FRAGMENTATION; if (mode & T4_FILTER_MPS_HIT_TYPE) fconf |= F_MPSHITTYPE; if (mode & T4_FILTER_MAC_IDX) fconf |= F_MACMATCH; if (mode & T4_FILTER_ETH_TYPE) fconf |= F_ETHERTYPE; if (mode & T4_FILTER_IP_PROTO) fconf |= F_PROTOCOL; if (mode & T4_FILTER_IP_TOS) fconf |= F_TOS; if (mode & T4_FILTER_VLAN) fconf |= F_VLAN; if (mode & T4_FILTER_VNIC) fconf |= F_VNIC_ID; if (mode & T4_FILTER_PORT) fconf |= F_PORT; if (mode & T4_FILTER_FCoE) fconf |= F_FCOE; return (fconf); } static uint32_t mode_to_iconf(uint32_t mode) { if (mode & T4_FILTER_IC_VNIC) return (F_VNIC); return (0); } static int check_fspec_against_fconf_iconf(struct adapter *sc, struct t4_filter_specification *fs) { struct tp_params *tpp = &sc->params.tp; uint32_t fconf = 0; if (fs->val.frag || fs->mask.frag) fconf |= F_FRAGMENTATION; if (fs->val.matchtype || fs->mask.matchtype) fconf |= F_MPSHITTYPE; if (fs->val.macidx || fs->mask.macidx) fconf |= F_MACMATCH; if (fs->val.ethtype || fs->mask.ethtype) fconf |= F_ETHERTYPE; if (fs->val.proto || fs->mask.proto) fconf |= F_PROTOCOL; if (fs->val.tos || fs->mask.tos) fconf |= F_TOS; if (fs->val.vlan_vld || fs->mask.vlan_vld) fconf |= F_VLAN; if (fs->val.ovlan_vld || fs->mask.ovlan_vld) { fconf |= F_VNIC_ID; if (tpp->ingress_config & F_VNIC) return (EINVAL); } if (fs->val.pfvf_vld || fs->mask.pfvf_vld) { fconf |= F_VNIC_ID; if ((tpp->ingress_config & F_VNIC) == 0) return (EINVAL); } if (fs->val.iport || fs->mask.iport) fconf |= F_PORT; if (fs->val.fcoe || fs->mask.fcoe) fconf |= F_FCOE; if ((tpp->vlan_pri_map | fconf) != tpp->vlan_pri_map) return (E2BIG); return (0); } int get_filter_mode(struct adapter *sc, uint32_t *mode) { struct tp_params *tpp = &sc->params.tp; /* * We trust the cached values of the relevant TP registers. This means * things work reliably only if writes to those registers are always via - * t4_set_filter_mode_. + * t4_set_filter_mode. */ *mode = fconf_iconf_to_mode(tpp->vlan_pri_map, tpp->ingress_config); return (0); } int set_filter_mode(struct adapter *sc, uint32_t mode) { struct tp_params *tpp = &sc->params.tp; uint32_t fconf, iconf; int rc; iconf = mode_to_iconf(mode); if ((iconf ^ tpp->ingress_config) & F_VNIC) { /* * For now we just complain if A_TP_INGRESS_CONFIG is not * already set to the correct value for the requested filter * mode. It's not clear if it's safe to write to this register * on the fly. (And we trust the cached value of the register). + * + * check_fspec_against_fconf_iconf and other code that looks at + * tp->vlan_pri_map and tp->ingress_config needs to be reviewed + * thorougly before allowing dynamic filter mode changes. */ return (EBUSY); } fconf = mode_to_fconf(mode); rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4setfm"); if (rc) return (rc); if (sc->tids.ftids_in_use > 0) { rc = EBUSY; goto done; } #ifdef TCP_OFFLOAD if (uld_active(sc, ULD_TOM)) { rc = EBUSY; goto done; } #endif rc = -t4_set_filter_mode(sc, fconf, true); done: end_synchronized_op(sc, LOCK_HELD); return (rc); } static inline uint64_t -get_filter_hits(struct adapter *sc, uint32_t fid) +get_filter_hits(struct adapter *sc, uint32_t tid) { uint32_t tcb_addr; - tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + - (fid + sc->tids.ftid_base) * TCB_SIZE; + tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + tid * TCB_SIZE; if (is_t4(sc)) { uint64_t hits; read_via_memwin(sc, 0, tcb_addr + 16, (uint32_t *)&hits, 8); return (be64toh(hits)); } else { uint32_t hits; read_via_memwin(sc, 0, tcb_addr + 24, &hits, 4); return (be32toh(hits)); } } int get_filter(struct adapter *sc, struct t4_filter *t) { - int i, rc, nfilters = sc->tids.nftids; + int i, nfilters = sc->tids.nftids; struct filter_entry *f; - rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, - "t4getf"); - if (rc) - return (rc); + if (t->fs.hash) + return (get_hashfilter(sc, t)); if (sc->tids.ftids_in_use == 0 || sc->tids.ftid_tab == NULL || t->idx >= nfilters) { t->idx = 0xffffffff; - goto done; + return (0); } + mtx_lock(&sc->tids.ftid_lock); f = &sc->tids.ftid_tab[t->idx]; + MPASS(f->tid == sc->tids.ftid_base + t->idx); for (i = t->idx; i < nfilters; i++, f++) { if (f->valid) { t->idx = i; - t->l2tidx = f->l2t ? f->l2t->idx : 0; + t->l2tidx = f->l2te ? f->l2te->idx : 0; t->smtidx = f->smtidx; if (f->fs.hitcnts) - t->hits = get_filter_hits(sc, t->idx); + t->hits = get_filter_hits(sc, f->tid); else t->hits = UINT64_MAX; t->fs = f->fs; goto done; } } - t->idx = 0xffffffff; done: - end_synchronized_op(sc, LOCK_HELD); + mtx_unlock(&sc->tids.ftid_lock); return (0); } static int -set_filter_wr(struct adapter *sc, int fidx) +set_tcamfilter(struct adapter *sc, struct t4_filter *t, struct l2t_entry *l2te) { - struct filter_entry *f = &sc->tids.ftid_tab[fidx]; + struct filter_entry *f; struct fw_filter_wr *fwr; - unsigned int ftid, vnic_vld, vnic_vld_mask; + u_int vnic_vld, vnic_vld_mask; struct wrq_cookie cookie; + int i, rc, busy, locked; + const int ntids = t->fs.type ? 4 : 1; - ASSERT_SYNCHRONIZED_OP(sc); + MPASS(!t->fs.hash); + MPASS(t->idx < sc->tids.nftids); + /* Already validated against fconf, iconf */ + MPASS((t->fs.val.pfvf_vld & t->fs.val.ovlan_vld) == 0); + MPASS((t->fs.mask.pfvf_vld & t->fs.mask.ovlan_vld) == 0); - if (f->fs.newdmac || f->fs.newvlan) { - /* This filter needs an L2T entry; allocate one. */ - f->l2t = t4_l2t_alloc_switching(sc->l2t); - if (f->l2t == NULL) - return (EAGAIN); - if (t4_l2t_set_switching(sc, f->l2t, f->fs.vlan, f->fs.eport, - f->fs.dmac)) { - t4_l2t_release(f->l2t); - f->l2t = NULL; - return (ENOMEM); + f = &sc->tids.ftid_tab[t->idx]; + rc = busy = locked = 0; + mtx_lock(&sc->tids.ftid_lock); + for (i = 0; i < ntids; i++) { + busy += f[i].pending + f[i].valid; + locked += f[i].locked; + } + if (locked > 0) + rc = EPERM; + else if (busy > 0) + rc = EBUSY; + else { + fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), + &cookie); + if (__predict_false(fwr == NULL)) + rc = ENOMEM; + else { + f->pending = 1; + sc->tids.ftids_in_use++; } } + mtx_unlock(&sc->tids.ftid_lock); + if (rc != 0) { + if (l2te) + t4_l2t_release(l2te); + return (rc); + } - /* Already validated against fconf, iconf */ - MPASS((f->fs.val.pfvf_vld & f->fs.val.ovlan_vld) == 0); - MPASS((f->fs.mask.pfvf_vld & f->fs.mask.ovlan_vld) == 0); - if (f->fs.val.pfvf_vld || f->fs.val.ovlan_vld) + /* + * Can't fail now. A set-filter WR will definitely be sent. + */ + + f->tid = sc->tids.ftid_base + t->idx; + f->fs = t->fs; + f->l2te = l2te; + + if (t->fs.val.pfvf_vld || t->fs.val.ovlan_vld) vnic_vld = 1; else vnic_vld = 0; - if (f->fs.mask.pfvf_vld || f->fs.mask.ovlan_vld) + if (t->fs.mask.pfvf_vld || t->fs.mask.ovlan_vld) vnic_vld_mask = 1; else vnic_vld_mask = 0; - ftid = sc->tids.ftid_base + fidx; - - fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie); - if (fwr == NULL) - return (ENOMEM); bzero(fwr, sizeof(*fwr)); - fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR)); fwr->len16_pkd = htobe32(FW_LEN16(*fwr)); fwr->tid_to_iq = - htobe32(V_FW_FILTER_WR_TID(ftid) | + htobe32(V_FW_FILTER_WR_TID(f->tid) | V_FW_FILTER_WR_RQTYPE(f->fs.type) | V_FW_FILTER_WR_NOREPLY(0) | V_FW_FILTER_WR_IQ(f->fs.iq)); fwr->del_filter_to_l2tix = htobe32(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) | V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) | V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) | V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) | V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) | V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) | V_FW_FILTER_WR_DMAC(f->fs.newdmac) | V_FW_FILTER_WR_SMAC(f->fs.newsmac) | V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) | V_FW_FILTER_WR_TXCHAN(f->fs.eport) | V_FW_FILTER_WR_PRIO(f->fs.prio) | - V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0)); + V_FW_FILTER_WR_L2TIX(f->l2te ? f->l2te->idx : 0)); fwr->ethtype = htobe16(f->fs.val.ethtype); fwr->ethtypem = htobe16(f->fs.mask.ethtype); fwr->frag_to_ovlan_vldm = (V_FW_FILTER_WR_FRAG(f->fs.val.frag) | V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) | V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) | V_FW_FILTER_WR_OVLAN_VLD(vnic_vld) | V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) | V_FW_FILTER_WR_OVLAN_VLDM(vnic_vld_mask)); fwr->smac_sel = 0; fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) | V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id)); fwr->maci_to_matchtypem = htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) | V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) | V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) | V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) | V_FW_FILTER_WR_PORT(f->fs.val.iport) | V_FW_FILTER_WR_PORTM(f->fs.mask.iport) | V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) | V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype)); fwr->ptcl = f->fs.val.proto; fwr->ptclm = f->fs.mask.proto; fwr->ttyp = f->fs.val.tos; fwr->ttypm = f->fs.mask.tos; fwr->ivlan = htobe16(f->fs.val.vlan); fwr->ivlanm = htobe16(f->fs.mask.vlan); fwr->ovlan = htobe16(f->fs.val.vnic); fwr->ovlanm = htobe16(f->fs.mask.vnic); bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip)); bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm)); bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip)); bcopy(f->fs.mask.sip, fwr->fipm, sizeof (fwr->fipm)); fwr->lp = htobe16(f->fs.val.dport); fwr->lpm = htobe16(f->fs.mask.dport); fwr->fp = htobe16(f->fs.val.sport); fwr->fpm = htobe16(f->fs.mask.sport); - if (f->fs.newsmac) + if (f->fs.newsmac) { + /* XXX: need to use SMT idx instead */ bcopy(f->fs.smac, fwr->sma, sizeof (fwr->sma)); - - f->pending = 1; - sc->tids.ftids_in_use++; - + } commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie); - return (0); + + /* Wait for response. */ + mtx_lock(&sc->tids.ftid_lock); + for (;;) { + if (f->pending == 0) { + rc = f->valid ? 0 : EIO; + break; + } + if (cv_wait_sig(&sc->tids.ftid_cv, &sc->tids.ftid_lock) != 0) { + rc = EINPROGRESS; + break; + } + } + mtx_unlock(&sc->tids.ftid_lock); + return (rc); } int set_filter(struct adapter *sc, struct t4_filter *t) { - unsigned int nfilters, nports; - struct filter_entry *f; - int i, rc; + struct tid_info *ti = &sc->tids; + struct l2t_entry *l2te; + int rc; + /* + * Basic filter checks first. + */ + + if (t->fs.hash) { + if (!is_hashfilter(sc) || ti->ntids == 0) + return (ENOTSUP); + if (t->idx != (uint32_t)-1) + return (EINVAL); /* hw, not user picks the idx */ + } else { + if (ti->nftids == 0) + return (ENOTSUP); + if (t->idx >= ti->nftids) + return (EINVAL); + /* IPv6 filter idx must be 4 aligned */ + if (t->fs.type == 1 && + ((t->idx & 0x3) || t->idx + 4 >= ti->nftids)) + return (EINVAL); + } + + /* T4 doesn't support removing VLAN Tags for loop back filters. */ + if (is_t4(sc) && t->fs.action == FILTER_SWITCH && + (t->fs.newvlan == VLAN_REMOVE || t->fs.newvlan == VLAN_REWRITE)) + return (ENOTSUP); + + if (t->fs.action == FILTER_SWITCH && t->fs.eport >= sc->params.nports) + return (EINVAL); + if (t->fs.val.iport >= sc->params.nports) + return (EINVAL); + + /* Can't specify an iq if not steering to it */ + if (!t->fs.dirsteer && t->fs.iq) + return (EINVAL); + + /* Validate against the global filter mode and ingress config */ + rc = check_fspec_against_fconf_iconf(sc, &t->fs); + if (rc != 0) + return (rc); + + /* + * Basic checks passed. Make sure the queues and tid tables are setup. + */ + rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf"); if (rc) return (rc); + if (!(sc->flags & FULL_INIT_DONE) && + ((rc = adapter_full_init(sc)) != 0)) { + end_synchronized_op(sc, 0); + return (rc); + } + if (t->fs.hash) { + if (__predict_false(ti->hftid_tab == NULL)) { + ti->hftid_tab = malloc(sizeof(*ti->hftid_tab) * ti->ntids, + M_CXGBE, M_NOWAIT | M_ZERO); + if (ti->hftid_tab == NULL) { + rc = ENOMEM; + goto done; + } + mtx_init(&ti->hftid_lock, "T4 hashfilters", 0, MTX_DEF); + cv_init(&ti->hftid_cv, "t4hfcv"); + } + if (__predict_false(sc->tids.atid_tab == NULL)) { + rc = alloc_atid_tab(&sc->tids, M_NOWAIT); + if (rc != 0) + goto done; + } + } else if (__predict_false(ti->ftid_tab == NULL)) { + KASSERT(ti->ftids_in_use == 0, + ("%s: no memory allocated but ftids_in_use > 0", __func__)); + ti->ftid_tab = malloc(sizeof(struct filter_entry) * ti->nftids, + M_CXGBE, M_NOWAIT | M_ZERO); + if (ti->ftid_tab == NULL) { + rc = ENOMEM; + goto done; + } + mtx_init(&ti->ftid_lock, "T4 filters", 0, MTX_DEF); + cv_init(&ti->ftid_cv, "t4fcv"); + } +done: + end_synchronized_op(sc, 0); + if (rc != 0) + return (rc); - nfilters = sc->tids.nftids; - nports = sc->params.nports; + /* + * Allocate L2T entry, SMT entry, etc. + */ - if (nfilters == 0) { - rc = ENOTSUP; - goto done; + l2te = NULL; + if (t->fs.newdmac || t->fs.newvlan) { + /* This filter needs an L2T entry; allocate one. */ + l2te = t4_l2t_alloc_switching(sc->l2t); + if (__predict_false(l2te == NULL)) + return (EAGAIN); + if (t4_l2t_set_switching(sc, l2te, t->fs.vlan, t->fs.eport, + t->fs.dmac)) { + t4_l2t_release(l2te); + return (ENOMEM); + } } - if (t->idx >= nfilters) { - rc = EINVAL; - goto done; + if (t->fs.newsmac) { + /* XXX: alloc SMT */ + return (ENOTSUP); } - /* Validate against the global filter mode and ingress config */ - rc = check_fspec_against_fconf_iconf(sc, &t->fs); - if (rc != 0) - goto done; + if (t->fs.hash) + return (set_hashfilter(sc, t, l2te)); + else + return (set_tcamfilter(sc, t, l2te)); - if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) { - rc = EINVAL; +} + +static int +del_tcamfilter(struct adapter *sc, struct t4_filter *t) +{ + struct filter_entry *f; + struct fw_filter_wr *fwr; + struct wrq_cookie cookie; + int rc; + + MPASS(sc->tids.ftid_tab != NULL); + MPASS(sc->tids.nftids > 0); + + if (t->idx >= sc->tids.nftids) + return (EINVAL); + + mtx_lock(&sc->tids.ftid_lock); + f = &sc->tids.ftid_tab[t->idx]; + if (f->locked) { + rc = EPERM; goto done; } - - if (t->fs.val.iport >= nports) { - rc = EINVAL; + if (f->pending) { + rc = EBUSY; goto done; } - - /* Can't specify an iq if not steering to it */ - if (!t->fs.dirsteer && t->fs.iq) { + if (f->valid == 0) { rc = EINVAL; goto done; } - - /* IPv6 filter idx must be 4 aligned */ - if (t->fs.type == 1 && - ((t->idx & 0x3) || t->idx + 4 >= nfilters)) { - rc = EINVAL; + MPASS(f->tid == sc->tids.ftid_base + t->idx); + fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie); + if (fwr == NULL) { + rc = ENOMEM; goto done; } - if (!(sc->flags & FULL_INIT_DONE) && - ((rc = adapter_full_init(sc)) != 0)) - goto done; + bzero(fwr, sizeof (*fwr)); + t4_mk_filtdelwr(f->tid, fwr, sc->sge.fwq.abs_id); + f->pending = 1; + commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie); + t->fs = f->fs; /* extra info for the caller */ - if (sc->tids.ftid_tab == NULL) { - KASSERT(sc->tids.ftids_in_use == 0, - ("%s: no memory allocated but filters_in_use > 0", - __func__)); - - sc->tids.ftid_tab = malloc(sizeof (struct filter_entry) * - nfilters, M_CXGBE, M_NOWAIT | M_ZERO); - if (sc->tids.ftid_tab == NULL) { - rc = ENOMEM; - goto done; + for (;;) { + if (f->pending == 0) { + rc = f->valid ? EIO : 0; + break; } - mtx_init(&sc->tids.ftid_lock, "T4 filters", 0, MTX_DEF); + if (cv_wait_sig(&sc->tids.ftid_cv, &sc->tids.ftid_lock) != 0) { + rc = EINPROGRESS; + break; + } } +done: + mtx_unlock(&sc->tids.ftid_lock); + return (rc); +} - for (i = 0; i < 4; i++) { - f = &sc->tids.ftid_tab[t->idx + i]; +int +del_filter(struct adapter *sc, struct t4_filter *t) +{ - if (f->pending || f->valid) { - rc = EBUSY; - goto done; - } - if (f->locked) { - rc = EPERM; - goto done; - } + /* No filters possible if not initialized yet. */ + if (!(sc->flags & FULL_INIT_DONE)) + return (EINVAL); - if (t->fs.type == 0) - break; + /* + * The checks for tid tables ensure that the locks that del_* will reach + * for are initialized. + */ + if (t->fs.hash) { + if (sc->tids.hftid_tab != NULL) + return (del_hashfilter(sc, t)); + } else { + if (sc->tids.ftid_tab != NULL) + return (del_tcamfilter(sc, t)); } - f = &sc->tids.ftid_tab[t->idx]; - f->fs = t->fs; + return (EINVAL); +} - rc = set_filter_wr(sc, t->idx); -done: - end_synchronized_op(sc, 0); +/* + * Release secondary resources associated with the filter. + */ +static void +free_filter_resources(struct filter_entry *f) +{ - if (rc == 0) { - mtx_lock(&sc->tids.ftid_lock); - for (;;) { - if (f->pending == 0) { - rc = f->valid ? 0 : EIO; - break; - } + if (f->l2te) { + t4_l2t_release(f->l2te); + f->l2te = NULL; + } +} - if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock, - PCATCH, "t4setfw", 0)) { - rc = EINPROGRESS; - break; - } - } - mtx_unlock(&sc->tids.ftid_lock); +int +t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) +{ + struct adapter *sc = iq->adapter; + const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1); + u_int tid = GET_TID(rpl); + u_int rc, cleanup, idx; + struct filter_entry *f; + + KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, + rss->opcode)); + MPASS(is_ftid(sc, tid)); + + cleanup = 0; + idx = tid - sc->tids.ftid_base; + f = &sc->tids.ftid_tab[idx]; + rc = G_COOKIE(rpl->cookie); + + mtx_lock(&sc->tids.ftid_lock); + KASSERT(f->pending, ("%s: reply %d for filter[%u] that isn't pending.", + __func__, rc, idx)); + switch(rc) { + case FW_FILTER_WR_FLT_ADDED: + /* set-filter succeeded */ + f->valid = 1; + f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff; + break; + case FW_FILTER_WR_FLT_DELETED: + /* del-filter succeeded */ + MPASS(f->valid == 1); + f->valid = 0; + /* Fall through */ + case FW_FILTER_WR_SMT_TBL_FULL: + /* set-filter failed due to lack of SMT space. */ + MPASS(f->valid == 0); + free_filter_resources(f); + sc->tids.ftids_in_use--; + break; + case FW_FILTER_WR_SUCCESS: + case FW_FILTER_WR_EINVAL: + default: + panic("%s: unexpected reply %d for filter[%d].", __func__, rc, + idx); } - return (rc); + f->pending = 0; + cv_broadcast(&sc->tids.ftid_cv); + mtx_unlock(&sc->tids.ftid_lock); + + return (0); } -static int -del_filter_wr(struct adapter *sc, int fidx) +/* + * This is the reply to the Active Open that created the filter. Additional TCB + * updates may be required to complete the filter configuration. + */ +int +t4_hashfilter_ao_rpl(struct sge_iq *iq, const struct rss_header *rss, + struct mbuf *m) { - struct filter_entry *f = &sc->tids.ftid_tab[fidx]; - struct fw_filter_wr *fwr; - unsigned int ftid; - struct wrq_cookie cookie; + struct adapter *sc = iq->adapter; + const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); + u_int atid = G_TID_TID(G_AOPEN_ATID(be32toh(cpl->atid_status))); + u_int status = G_AOPEN_STATUS(be32toh(cpl->atid_status)); + struct filter_entry *f = lookup_atid(sc, atid); - ftid = sc->tids.ftid_base + fidx; + KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); - fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie); - if (fwr == NULL) - return (ENOMEM); - bzero(fwr, sizeof (*fwr)); + mtx_lock(&sc->tids.hftid_lock); + KASSERT(f->pending, ("%s: hashfilter[%p] isn't pending.", __func__, f)); + KASSERT(f->tid == -1, ("%s: hashfilter[%p] has tid %d already.", + __func__, f, f->tid)); + if (status == CPL_ERR_NONE) { + struct filter_entry *f2; - t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id); + f->tid = GET_TID(cpl); + MPASS(f->tid < sc->tids.ntids); + if (__predict_false((f2 = lookup_hftid(sc, f->tid)) != NULL)) { + /* XXX: avoid hash collisions in the first place. */ + MPASS(f2->tid == f->tid); + remove_hftid(sc, f2->tid, f2->fs.type ? 2 : 1); + free_filter_resources(f2); + free(f2, M_CXGBE); + } + insert_hftid(sc, f->tid, f, f->fs.type ? 2 : 1); + /* + * Leave the filter pending until it is fully set up, which will + * be indicated by the reply to the last TCB update. No need to + * unblock the ioctl thread either. + */ + if (configure_hashfilter_tcb(sc, f) == EINPROGRESS) + goto done; + f->valid = 1; + f->pending = 0; + } else { + /* provide errno instead of tid to ioctl */ + f->tid = act_open_rpl_status_to_errno(status); + f->valid = 0; + if (act_open_has_tid(status)) + release_tid(sc, GET_TID(cpl), &sc->sge.mgmtq); + free_filter_resources(f); + if (f->locked == 0) + free(f, M_CXGBE); + } + cv_broadcast(&sc->tids.hftid_cv); +done: + mtx_unlock(&sc->tids.hftid_lock); - f->pending = 1; - commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie); + free_atid(sc, atid); return (0); } int -del_filter(struct adapter *sc, struct t4_filter *t) +t4_hashfilter_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, + struct mbuf *m) { - unsigned int nfilters; + struct adapter *sc = iq->adapter; + const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1); + u_int tid = GET_TID(rpl); struct filter_entry *f; - int rc; - rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4delf"); - if (rc) - return (rc); + mtx_lock(&sc->tids.hftid_lock); + f = lookup_hftid(sc, tid); + KASSERT(f->tid == tid, ("%s: filter tid mismatch", __func__)); + KASSERT(f->pending, ("%s: hashfilter %p [%u] isn't pending.", __func__, + f, tid)); + KASSERT(f->valid == 0, ("%s: hashfilter %p [%u] is valid already.", + __func__, f, tid)); + f->pending = 0; + if (rpl->status == 0) { + f->valid = 1; + } else { + f->tid = EIO; + f->valid = 0; + free_filter_resources(f); + remove_hftid(sc, tid, f->fs.type ? 2 : 1); + release_tid(sc, tid, &sc->sge.mgmtq); + if (f->locked == 0) + free(f, M_CXGBE); + } + cv_broadcast(&sc->tids.hftid_cv); + mtx_unlock(&sc->tids.hftid_lock); - nfilters = sc->tids.nftids; + return (0); +} - if (nfilters == 0) { - rc = ENOTSUP; - goto done; +int +t4_del_hashfilter_rpl(struct sge_iq *iq, const struct rss_header *rss, + struct mbuf *m) +{ + struct adapter *sc = iq->adapter; + const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); + unsigned int tid = GET_TID(cpl); + struct filter_entry *f; + + mtx_lock(&sc->tids.hftid_lock); + f = lookup_hftid(sc, tid); + KASSERT(f->tid == tid, ("%s: filter tid mismatch", __func__)); + KASSERT(f->pending, ("%s: hashfilter %p [%u] isn't pending.", __func__, + f, tid)); + KASSERT(f->valid, ("%s: hashfilter %p [%u] isn't valid.", __func__, f, + tid)); + f->pending = 0; + if (cpl->status == 0) { + f->valid = 0; + free_filter_resources(f); + remove_hftid(sc, tid, f->fs.type ? 2 : 1); + release_tid(sc, tid, &sc->sge.mgmtq); + if (f->locked == 0) + free(f, M_CXGBE); } + cv_broadcast(&sc->tids.hftid_cv); + mtx_unlock(&sc->tids.hftid_lock); - if (sc->tids.ftid_tab == NULL || sc->tids.ftids_in_use == 0 || + return (0); +} + +static int +get_hashfilter(struct adapter *sc, struct t4_filter *t) +{ + int i, nfilters = sc->tids.ntids; + struct filter_entry *f; + + if (sc->tids.tids_in_use == 0 || sc->tids.hftid_tab == NULL || t->idx >= nfilters) { - rc = EINVAL; + t->idx = 0xffffffff; + return (0); + } + + mtx_lock(&sc->tids.hftid_lock); + for (i = t->idx; i < nfilters; i++) { + f = lookup_hftid(sc, i); + if (f != NULL && f->valid) { + t->idx = i; + t->l2tidx = f->l2te ? f->l2te->idx : 0; + t->smtidx = f->smtidx; + if (f->fs.hitcnts) + t->hits = get_filter_hits(sc, t->idx); + else + t->hits = UINT64_MAX; + t->fs = f->fs; + + goto done; + } + } + t->idx = 0xffffffff; +done: + mtx_unlock(&sc->tids.hftid_lock); + return (0); +} + +static uint64_t +hashfilter_ntuple(struct adapter *sc, const struct t4_filter_specification *fs) +{ + struct tp_params *tp = &sc->params.tp; + uint64_t ntuple = 0; + + /* + * Initialize each of the fields which we care about which are present + * in the Compressed Filter Tuple. + */ + if (tp->vlan_shift >= 0 && fs->mask.vlan) + ntuple |= (F_FT_VLAN_VLD | fs->val.vlan) << tp->vlan_shift; + + if (tp->port_shift >= 0 && fs->mask.iport) + ntuple |= (uint64_t)fs->val.iport << tp->port_shift; + + if (tp->protocol_shift >= 0) { + if (!fs->val.proto) + ntuple |= (uint64_t)IPPROTO_TCP << tp->protocol_shift; + else + ntuple |= (uint64_t)fs->val.proto << tp->protocol_shift; + } + + if (tp->tos_shift >= 0 && fs->mask.tos) + ntuple |= (uint64_t)(fs->val.tos) << tp->tos_shift; + + if (tp->vnic_shift >= 0) { +#ifdef notyet + if (tp->ingress_config & F_VNIC && fs->mask.pfvf_vld) + ntuple |= (uint64_t)((fs->val.pfvf_vld << 16) | + (fs->val.pf << 13) | + (fs->val.vf)) << tp->vnic_shift; + else +#endif + ntuple |= (uint64_t)((fs->val.ovlan_vld << 16) | + (fs->val.vnic)) << tp->vnic_shift; + } + + if (tp->macmatch_shift >= 0 && fs->mask.macidx) + ntuple |= (uint64_t)(fs->val.macidx) << tp->macmatch_shift; + + if (tp->ethertype_shift >= 0 && fs->mask.ethtype) + ntuple |= (uint64_t)(fs->val.ethtype) << tp->ethertype_shift; + + if (tp->matchtype_shift >= 0 && fs->mask.matchtype) + ntuple |= (uint64_t)(fs->val.matchtype) << tp->matchtype_shift; + + if (tp->frag_shift >= 0 && fs->mask.frag) + ntuple |= (uint64_t)(fs->val.frag) << tp->frag_shift; + + if (tp->fcoe_shift >= 0 && fs->mask.fcoe) + ntuple |= (uint64_t)(fs->val.fcoe) << tp->fcoe_shift; + + return (ntuple); +} + +static void +mk_act_open_req6(struct adapter *sc, struct filter_entry *f, int atid, + struct cpl_act_open_req6 *cpl) +{ + struct cpl_t5_act_open_req6 *cpl5 = (void *)cpl; + struct cpl_t6_act_open_req6 *cpl6 = (void *)cpl; + + /* Review changes to CPL after cpl_t6_act_open_req if this goes off. */ + MPASS(chip_id(sc) >= CHELSIO_T5 && chip_id(sc) <= CHELSIO_T6); + MPASS(atid >= 0); + + if (chip_id(sc) == CHELSIO_T5) { + INIT_TP_WR(cpl5, 0); + } else { + INIT_TP_WR(cpl6, 0); + cpl6->rsvd2 = 0; + cpl6->opt3 = 0; + } + + OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, + V_TID_QID(sc->sge.fwq.abs_id) | V_TID_TID(atid) | + V_TID_COOKIE(CPL_COOKIE_HASHFILTER))); + cpl->local_port = htobe16(f->fs.val.dport); + cpl->peer_port = htobe16(f->fs.val.sport); + cpl->local_ip_hi = *(uint64_t *)(&f->fs.val.dip); + cpl->local_ip_lo = *(((uint64_t *)&f->fs.val.dip) + 1); + cpl->peer_ip_hi = *(uint64_t *)(&f->fs.val.sip); + cpl->peer_ip_lo = *(((uint64_t *)&f->fs.val.sip) + 1); + cpl->opt0 = htobe64(V_NAGLE(f->fs.newvlan == VLAN_REMOVE || + f->fs.newvlan == VLAN_REWRITE) | V_DELACK(f->fs.hitcnts) | + V_L2T_IDX(f->l2te ? f->l2te->idx : 0) | V_TX_CHAN(f->fs.eport) | + V_NO_CONG(f->fs.rpttid) | F_TCAM_BYPASS | F_NON_OFFLOAD); + + cpl6->params = htobe64(V_FILTER_TUPLE(hashfilter_ntuple(sc, &f->fs))); + cpl6->opt2 = htobe32(F_RSS_QUEUE_VALID | V_RSS_QUEUE(f->fs.iq) | + F_T5_OPT_2_VALID | F_RX_CHANNEL | + V_CONG_CNTRL((f->fs.action == FILTER_DROP) | (f->fs.dirsteer << 1)) | + V_PACE(f->fs.maskhash | (f->fs.dirsteerhash << 1))); +} + +static void +mk_act_open_req(struct adapter *sc, struct filter_entry *f, int atid, + struct cpl_act_open_req *cpl) +{ + struct cpl_t5_act_open_req *cpl5 = (void *)cpl; + struct cpl_t6_act_open_req *cpl6 = (void *)cpl; + + /* Review changes to CPL after cpl_t6_act_open_req if this goes off. */ + MPASS(chip_id(sc) >= CHELSIO_T5 && chip_id(sc) <= CHELSIO_T6); + MPASS(atid >= 0); + + if (chip_id(sc) == CHELSIO_T5) { + INIT_TP_WR(cpl5, 0); + } else { + INIT_TP_WR(cpl6, 0); + cpl6->rsvd2 = 0; + cpl6->opt3 = 0; + } + + OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, + V_TID_QID(sc->sge.fwq.abs_id) | V_TID_TID(atid) | + V_TID_COOKIE(CPL_COOKIE_HASHFILTER))); + cpl->local_port = htobe16(f->fs.val.dport); + cpl->peer_port = htobe16(f->fs.val.sport); + cpl->local_ip = f->fs.val.dip[0] | f->fs.val.dip[1] << 8 | + f->fs.val.dip[2] << 16 | f->fs.val.dip[3] << 24; + cpl->peer_ip = f->fs.val.sip[0] | f->fs.val.sip[1] << 8 | + f->fs.val.sip[2] << 16 | f->fs.val.sip[3] << 24; + cpl->opt0 = htobe64(V_NAGLE(f->fs.newvlan == VLAN_REMOVE || + f->fs.newvlan == VLAN_REWRITE) | V_DELACK(f->fs.hitcnts) | + V_L2T_IDX(f->l2te ? f->l2te->idx : 0) | V_TX_CHAN(f->fs.eport) | + V_NO_CONG(f->fs.rpttid) | F_TCAM_BYPASS | F_NON_OFFLOAD); + + cpl6->params = htobe64(V_FILTER_TUPLE(hashfilter_ntuple(sc, &f->fs))); + cpl6->opt2 = htobe32(F_RSS_QUEUE_VALID | V_RSS_QUEUE(f->fs.iq) | + F_T5_OPT_2_VALID | F_RX_CHANNEL | + V_CONG_CNTRL((f->fs.action == FILTER_DROP) | (f->fs.dirsteer << 1)) | + V_PACE(f->fs.maskhash | (f->fs.dirsteerhash << 1))); +} + +static int +act_open_cpl_len16(struct adapter *sc, int isipv6) +{ + int idx; + static const int sz_table[3][2] = { + { + howmany(sizeof (struct cpl_act_open_req), 16), + howmany(sizeof (struct cpl_act_open_req6), 16) + }, + { + howmany(sizeof (struct cpl_t5_act_open_req), 16), + howmany(sizeof (struct cpl_t5_act_open_req6), 16) + }, + { + howmany(sizeof (struct cpl_t6_act_open_req), 16), + howmany(sizeof (struct cpl_t6_act_open_req6), 16) + }, + }; + + MPASS(chip_id(sc) >= CHELSIO_T4); + idx = min(chip_id(sc) - CHELSIO_T4, 2); + + return (sz_table[idx][!!isipv6]); +} + +static int +set_hashfilter(struct adapter *sc, struct t4_filter *t, struct l2t_entry *l2te) +{ + void *wr; + struct wrq_cookie cookie; + struct filter_entry *f; + int rc, atid = -1; + + MPASS(t->fs.hash); + /* Already validated against fconf, iconf */ + MPASS((t->fs.val.pfvf_vld & t->fs.val.ovlan_vld) == 0); + MPASS((t->fs.mask.pfvf_vld & t->fs.mask.ovlan_vld) == 0); + + mtx_lock(&sc->tids.hftid_lock); + + /* + * XXX: Check for hash collisions and insert in the hash based lookup + * table so that in-flight hashfilters are also considered when checking + * for collisions. + */ + + f = malloc(sizeof(*f), M_CXGBE, M_ZERO | M_NOWAIT); + if (__predict_false(f == NULL)) { + if (l2te) + t4_l2t_release(l2te); + rc = ENOMEM; goto done; } + f->fs = t->fs; + f->l2te = l2te; - if (!(sc->flags & FULL_INIT_DONE)) { + atid = alloc_atid(sc, f); + if (__predict_false(atid) == -1) { + if (l2te) + t4_l2t_release(l2te); + free(f, M_CXGBE); rc = EAGAIN; goto done; } + MPASS(atid >= 0); - f = &sc->tids.ftid_tab[t->idx]; + wr = start_wrq_wr(&sc->sge.mgmtq, act_open_cpl_len16(sc, f->fs.type), + &cookie); + if (wr == NULL) { + free_atid(sc, atid); + if (l2te) + t4_l2t_release(l2te); + free(f, M_CXGBE); + rc = ENOMEM; + goto done; + } + if (f->fs.type) + mk_act_open_req6(sc, f, atid, wr); + else + mk_act_open_req(sc, f, atid, wr); - if (f->pending) { - rc = EBUSY; + f->locked = 1; /* ithread mustn't free f if ioctl is still around. */ + f->pending = 1; + f->tid = -1; + commit_wrq_wr(&sc->sge.mgmtq, wr, &cookie); + + for (;;) { + MPASS(f->locked); + if (f->pending == 0) { + if (f->valid) { + rc = 0; + f->locked = 0; + t->idx = f->tid; + } else { + rc = f->tid; + free(f, M_CXGBE); + } + break; + } + if (cv_wait_sig(&sc->tids.hftid_cv, &sc->tids.hftid_lock) != 0) { + f->locked = 0; + rc = EINPROGRESS; + break; + } + } +done: + mtx_unlock(&sc->tids.hftid_lock); + return (rc); +} + +/* SET_TCB_FIELD sent as a ULP command looks like this */ +#define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \ + sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core)) + +static void * +mk_set_tcb_field_ulp(struct ulp_txpkt *ulpmc, uint64_t word, uint64_t mask, + uint64_t val, uint32_t tid, uint32_t qid) +{ + struct ulptx_idata *ulpsc; + struct cpl_set_tcb_field_core *req; + + ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); + ulpmc->len = htobe32(howmany(LEN__SET_TCB_FIELD_ULP, 16)); + + ulpsc = (struct ulptx_idata *)(ulpmc + 1); + ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); + ulpsc->len = htobe32(sizeof(*req)); + + req = (struct cpl_set_tcb_field_core *)(ulpsc + 1); + OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); + req->reply_ctrl = htobe16(V_NO_REPLY(1) | V_QUEUENO(qid)); + req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0)); + req->mask = htobe64(mask); + req->val = htobe64(val); + + ulpsc = (struct ulptx_idata *)(req + 1); + if (LEN__SET_TCB_FIELD_ULP % 16) { + ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); + ulpsc->len = htobe32(0); + return (ulpsc + 1); + } + return (ulpsc); +} + +/* ABORT_REQ sent as a ULP command looks like this */ +#define LEN__ABORT_REQ_ULP (sizeof(struct ulp_txpkt) + \ + sizeof(struct ulptx_idata) + sizeof(struct cpl_abort_req_core)) + +static void * +mk_abort_req_ulp(struct ulp_txpkt *ulpmc, uint32_t tid) +{ + struct ulptx_idata *ulpsc; + struct cpl_abort_req_core *req; + + ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); + ulpmc->len = htobe32(howmany(LEN__ABORT_REQ_ULP, 16)); + + ulpsc = (struct ulptx_idata *)(ulpmc + 1); + ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); + ulpsc->len = htobe32(sizeof(*req)); + + req = (struct cpl_abort_req_core *)(ulpsc + 1); + OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); + req->rsvd0 = htonl(0); + req->rsvd1 = 0; + req->cmd = CPL_ABORT_NO_RST; + + ulpsc = (struct ulptx_idata *)(req + 1); + if (LEN__ABORT_REQ_ULP % 16) { + ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); + ulpsc->len = htobe32(0); + return (ulpsc + 1); + } + return (ulpsc); +} + +/* ABORT_RPL sent as a ULP command looks like this */ +#define LEN__ABORT_RPL_ULP (sizeof(struct ulp_txpkt) + \ + sizeof(struct ulptx_idata) + sizeof(struct cpl_abort_rpl_core)) + +static void * +mk_abort_rpl_ulp(struct ulp_txpkt *ulpmc, uint32_t tid) +{ + struct ulptx_idata *ulpsc; + struct cpl_abort_rpl_core *rpl; + + ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); + ulpmc->len = htobe32(howmany(LEN__ABORT_RPL_ULP, 16)); + + ulpsc = (struct ulptx_idata *)(ulpmc + 1); + ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); + ulpsc->len = htobe32(sizeof(*rpl)); + + rpl = (struct cpl_abort_rpl_core *)(ulpsc + 1); + OPCODE_TID(rpl) = htobe32(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); + rpl->rsvd0 = htonl(0); + rpl->rsvd1 = 0; + rpl->cmd = CPL_ABORT_NO_RST; + + ulpsc = (struct ulptx_idata *)(rpl + 1); + if (LEN__ABORT_RPL_ULP % 16) { + ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); + ulpsc->len = htobe32(0); + return (ulpsc + 1); + } + return (ulpsc); +} + +static inline int +del_hashfilter_wrlen(void) +{ + + return (sizeof(struct work_request_hdr) + + roundup2(LEN__SET_TCB_FIELD_ULP, 16) + + roundup2(LEN__ABORT_REQ_ULP, 16) + + roundup2(LEN__ABORT_RPL_ULP, 16)); +} + +static void +mk_del_hashfilter_wr(int tid, struct work_request_hdr *wrh, int wrlen, int qid) +{ + struct ulp_txpkt *ulpmc; + + INIT_ULPTX_WRH(wrh, wrlen, 0, 0); + ulpmc = (struct ulp_txpkt *)(wrh + 1); + ulpmc = mk_set_tcb_field_ulp(ulpmc, W_TCB_RSS_INFO, + V_TCB_RSS_INFO(M_TCB_RSS_INFO), V_TCB_RSS_INFO(qid), tid, 0); + ulpmc = mk_abort_req_ulp(ulpmc, tid); + ulpmc = mk_abort_rpl_ulp(ulpmc, tid); +} + +static int +del_hashfilter(struct adapter *sc, struct t4_filter *t) +{ + void *wr; + struct filter_entry *f; + struct wrq_cookie cookie; + int rc; + const int wrlen = del_hashfilter_wrlen(); + + MPASS(sc->tids.hftid_tab != NULL); + MPASS(sc->tids.ntids > 0); + + if (t->idx >= sc->tids.ntids) + return (EINVAL); + + mtx_lock(&sc->tids.hftid_lock); + f = lookup_hftid(sc, t->idx); + if (f == NULL || f->valid == 0) { + rc = EINVAL; goto done; } + MPASS(f->tid == t->idx); if (f->locked) { rc = EPERM; goto done; } - - if (f->valid) { - t->fs = f->fs; /* extra info for the caller */ - rc = del_filter_wr(sc, t->idx); + if (f->pending) { + rc = EBUSY; + goto done; } + wr = start_wrq_wr(&sc->sge.mgmtq, howmany(wrlen, 16), &cookie); + if (wr == NULL) { + rc = ENOMEM; + goto done; + } -done: - end_synchronized_op(sc, 0); + mk_del_hashfilter_wr(t->idx, wr, wrlen, sc->sge.fwq.abs_id); + f->locked = 1; + f->pending = 1; + commit_wrq_wr(&sc->sge.mgmtq, wr, &cookie); + t->fs = f->fs; /* extra info for the caller */ - if (rc == 0) { - mtx_lock(&sc->tids.ftid_lock); - for (;;) { - if (f->pending == 0) { - rc = f->valid ? EIO : 0; - break; + for (;;) { + MPASS(f->locked); + if (f->pending == 0) { + if (f->valid) { + f->locked = 0; + rc = EIO; + } else { + rc = 0; + free(f, M_CXGBE); } - - if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock, - PCATCH, "t4delfw", 0)) { - rc = EINPROGRESS; - break; - } + break; } - mtx_unlock(&sc->tids.ftid_lock); + if (cv_wait_sig(&sc->tids.hftid_cv, &sc->tids.hftid_lock) != 0) { + f->locked = 0; + rc = EINPROGRESS; + break; + } } - +done: + mtx_unlock(&sc->tids.hftid_lock); return (rc); } -static void -clear_filter(struct filter_entry *f) +static int +set_tcb_field(struct adapter *sc, u_int tid, uint16_t word, uint64_t mask, + uint64_t val, int no_reply) { - if (f->l2t) - t4_l2t_release(f->l2t); + struct wrq_cookie cookie; + struct cpl_set_tcb_field *req; - bzero(f, sizeof (*f)); + req = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*req), 16), &cookie); + if (req == NULL) + return (ENOMEM); + bzero(req, sizeof(*req)); + INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, tid); + if (no_reply == 0) { + req->reply_ctrl = htobe16(V_QUEUENO(sc->sge.fwq.abs_id) | + V_NO_REPLY(0)); + } else + req->reply_ctrl = htobe16(V_NO_REPLY(1)); + req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(CPL_COOKIE_HASHFILTER)); + req->mask = htobe64(mask); + req->val = htobe64(val); + commit_wrq_wr(&sc->sge.mgmtq, req, &cookie); + + return (0); } -int -t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) +/* Set one of the t_flags bits in the TCB. */ +static inline int +set_tcb_tflag(struct adapter *sc, int tid, u_int bit_pos, u_int val) { - struct adapter *sc = iq->adapter; - const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1); - unsigned int idx = GET_TID(rpl); - unsigned int rc; - struct filter_entry *f; - KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, - rss->opcode)); - MPASS(iq == &sc->sge.fwq); - MPASS(is_ftid(sc, idx)); + return (set_tcb_field(sc, tid, W_TCB_T_FLAGS, 1ULL << bit_pos, + (uint64_t)val << bit_pos, 1)); +} - idx -= sc->tids.ftid_base; - f = &sc->tids.ftid_tab[idx]; - rc = G_COOKIE(rpl->cookie); +/* + * Returns EINPROGRESS to indicate that at least one TCB update was sent and the + * last of the series of updates requested a reply. The reply informs the + * driver that the filter is fully setup. + */ +static int +configure_hashfilter_tcb(struct adapter *sc, struct filter_entry *f) +{ + int updated = 0; - mtx_lock(&sc->tids.ftid_lock); - if (rc == FW_FILTER_WR_FLT_ADDED) { - KASSERT(f->pending, ("%s: filter[%u] isn't pending.", - __func__, idx)); - f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff; - f->pending = 0; /* asynchronous setup completed */ - f->valid = 1; - } else { - if (rc != FW_FILTER_WR_FLT_DELETED) { - /* Add or delete failed, display an error */ - log(LOG_ERR, - "filter %u setup failed with error %u\n", - idx, rc); - } + MPASS(f->tid < sc->tids.ntids); + MPASS(f->fs.hash); + MPASS(f->pending); + MPASS(f->valid == 0); - clear_filter(f); - sc->tids.ftids_in_use--; + if (f->fs.newdmac) { + set_tcb_tflag(sc, f->tid, S_TF_CCTRL_ECE, 1); + updated++; } - wakeup(&sc->tids.ftid_tab); - mtx_unlock(&sc->tids.ftid_lock); + + if (f->fs.newvlan == VLAN_INSERT || f->fs.newvlan == VLAN_REWRITE) { + set_tcb_tflag(sc, f->tid, S_TF_CCTRL_RFR, 1); + updated++; + } + + if (f->fs.hitcnts || updated > 0) { + set_tcb_field(sc, f->tid, W_TCB_TIMESTAMP, + V_TCB_TIMESTAMP(M_TCB_TIMESTAMP) | + V_TCB_T_RTT_TS_RECENT_AGE(M_TCB_T_RTT_TS_RECENT_AGE), + V_TCB_TIMESTAMP(0ULL) | V_TCB_T_RTT_TS_RECENT_AGE(0ULL), 0); + return (EINPROGRESS); + } return (0); } Index: head/sys/dev/cxgbe/t4_ioctl.h =================================================================== --- head/sys/dev/cxgbe/t4_ioctl.h (revision 333393) +++ head/sys/dev/cxgbe/t4_ioctl.h (revision 333394) @@ -1,411 +1,412 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef __T4_IOCTL_H__ #define __T4_IOCTL_H__ #include #include #include /* * Ioctl commands specific to this driver. */ enum { T4_GETREG = 0x40, /* read register */ T4_SETREG, /* write register */ T4_REGDUMP, /* dump of all registers */ T4_GET_FILTER_MODE, /* get global filter mode */ T4_SET_FILTER_MODE, /* set global filter mode */ T4_GET_FILTER, /* get information about a filter */ T4_SET_FILTER, /* program a filter */ T4_DEL_FILTER, /* delete a filter */ T4_GET_SGE_CONTEXT, /* get SGE context for a queue */ T4_LOAD_FW, /* flash firmware */ T4_GET_MEM, /* read memory */ T4_GET_I2C, /* read from i2c addressible device */ T4_CLEAR_STATS, /* clear a port's MAC statistics */ T4_SET_OFLD_POLICY, /* Set offload policy */ T4_SET_SCHED_CLASS, /* set sched class */ T4_SET_SCHED_QUEUE, /* set queue class */ T4_GET_TRACER, /* get information about a tracer */ T4_SET_TRACER, /* program a tracer */ T4_LOAD_CFG, /* copy a config file to card's flash */ T4_LOAD_BOOT, /* flash boot rom */ T4_LOAD_BOOTCFG, /* flash bootcfg */ T4_CUDBG_DUMP, /* debug dump of chip state */ }; struct t4_reg { uint32_t addr; uint32_t size; uint64_t val; }; #define T4_REGDUMP_SIZE (160 * 1024) #define T5_REGDUMP_SIZE (332 * 1024) struct t4_regdump { uint32_t version; uint32_t len; /* bytes */ uint32_t *data; }; struct t4_data { uint32_t len; uint8_t *data; }; struct t4_bootrom { uint32_t pf_offset; uint32_t pfidx_addr; uint32_t len; uint8_t *data; }; struct t4_i2c_data { uint8_t port_id; uint8_t dev_addr; uint8_t offset; uint8_t len; uint8_t data[8]; }; /* * A hardware filter is some valid combination of these. */ #define T4_FILTER_IPv4 0x1 /* IPv4 packet */ #define T4_FILTER_IPv6 0x2 /* IPv6 packet */ #define T4_FILTER_IP_SADDR 0x4 /* Source IP address or network */ #define T4_FILTER_IP_DADDR 0x8 /* Destination IP address or network */ #define T4_FILTER_IP_SPORT 0x10 /* Source IP port */ #define T4_FILTER_IP_DPORT 0x20 /* Destination IP port */ #define T4_FILTER_FCoE 0x40 /* Fibre Channel over Ethernet packet */ #define T4_FILTER_PORT 0x80 /* Physical ingress port */ #define T4_FILTER_VNIC 0x100 /* VNIC id or outer VLAN */ #define T4_FILTER_VLAN 0x200 /* VLAN ID */ #define T4_FILTER_IP_TOS 0x400 /* IPv4 TOS/IPv6 Traffic Class */ #define T4_FILTER_IP_PROTO 0x800 /* IP protocol */ #define T4_FILTER_ETH_TYPE 0x1000 /* Ethernet Type */ #define T4_FILTER_MAC_IDX 0x2000 /* MPS MAC address match index */ #define T4_FILTER_MPS_HIT_TYPE 0x4000 /* MPS match type */ #define T4_FILTER_IP_FRAGMENT 0x8000 /* IP fragment */ #define T4_FILTER_IC_VNIC 0x80000000 /* TP Ingress Config's F_VNIC bit. It indicates whether T4_FILTER_VNIC bit means VNIC id (PF/VF) or outer VLAN. 0 = oVLAN, 1 = VNIC */ /* Filter action */ enum { FILTER_PASS = 0, /* default */ FILTER_DROP, FILTER_SWITCH }; /* 802.1q manipulation on FILTER_SWITCH */ enum { VLAN_NOCHANGE = 0, /* default */ VLAN_REMOVE, VLAN_INSERT, VLAN_REWRITE }; /* MPS match type */ enum { UCAST_EXACT = 0, /* exact unicast match */ UCAST_HASH = 1, /* inexact (hashed) unicast match */ MCAST_EXACT = 2, /* exact multicast match */ MCAST_HASH = 3, /* inexact (hashed) multicast match */ PROMISC = 4, /* no match but port is promiscuous */ HYPPROMISC = 5, /* port is hypervisor-promisuous + not bcast */ BCAST = 6, /* broadcast packet */ }; /* Rx steering */ enum { DST_MODE_QUEUE, /* queue is directly specified by filter */ DST_MODE_RSS_QUEUE, /* filter specifies RSS entry containing queue */ DST_MODE_RSS, /* queue selected by default RSS hash lookup */ DST_MODE_FILT_RSS /* queue selected by hashing in filter-specified RSS subtable */ }; struct t4_filter_tuple { /* * These are always available. */ uint8_t sip[16]; /* source IP address (IPv4 in [3:0]) */ uint8_t dip[16]; /* destinatin IP address (IPv4 in [3:0]) */ uint16_t sport; /* source port */ uint16_t dport; /* destination port */ /* * A combination of these (up to 36 bits) is available. TP_VLAN_PRI_MAP * is used to select the global mode and all filters are limited to the * set of fields allowed by the global mode. */ uint16_t vnic; /* VNIC id (PF/VF) or outer VLAN tag */ uint16_t vlan; /* VLAN tag */ uint16_t ethtype; /* Ethernet type */ uint8_t tos; /* TOS/Traffic Type */ uint8_t proto; /* protocol type */ uint32_t fcoe:1; /* FCoE packet */ uint32_t iport:3; /* ingress port */ uint32_t matchtype:3; /* MPS match type */ uint32_t frag:1; /* fragmentation extension header */ uint32_t macidx:9; /* exact match MAC index */ uint32_t vlan_vld:1; /* VLAN valid */ uint32_t ovlan_vld:1; /* outer VLAN tag valid, value in "vnic" */ uint32_t pfvf_vld:1; /* VNIC id (PF/VF) valid, value in "vnic" */ }; struct t4_filter_specification { uint32_t hitcnts:1; /* count filter hits in TCB */ uint32_t prio:1; /* filter has priority over active/server */ uint32_t type:1; /* 0 => IPv4, 1 => IPv6 */ + uint32_t hash:1; /* 0 => LE TCAM, 1 => Hash */ uint32_t action:2; /* drop, pass, switch */ uint32_t rpttid:1; /* report TID in RSS hash field */ uint32_t dirsteer:1; /* 0 => RSS, 1 => steer to iq */ uint32_t iq:10; /* ingress queue */ uint32_t maskhash:1; /* dirsteer=0: store RSS hash in TCB */ uint32_t dirsteerhash:1;/* dirsteer=1: 0 => TCB contains RSS hash */ /* 1 => TCB contains IQ ID */ /* * Switch proxy/rewrite fields. An ingress packet which matches a * filter with "switch" set will be looped back out as an egress * packet -- potentially with some Ethernet header rewriting. */ uint32_t eport:2; /* egress port to switch packet out */ uint32_t newdmac:1; /* rewrite destination MAC address */ uint32_t newsmac:1; /* rewrite source MAC address */ uint32_t newvlan:2; /* rewrite VLAN Tag */ uint8_t dmac[ETHER_ADDR_LEN]; /* new destination MAC address */ uint8_t smac[ETHER_ADDR_LEN]; /* new source MAC address */ uint16_t vlan; /* VLAN Tag to insert */ /* * Filter rule value/mask pairs. */ struct t4_filter_tuple val; struct t4_filter_tuple mask; }; struct t4_filter { uint32_t idx; uint16_t l2tidx; uint16_t smtidx; uint64_t hits; struct t4_filter_specification fs; }; /* Tx Scheduling Class parameters */ struct t4_sched_class_params { int8_t level; /* scheduler hierarchy level */ int8_t mode; /* per-class or per-flow */ int8_t rateunit; /* bit or packet rate */ int8_t ratemode; /* %port relative or kbps absolute */ int8_t channel; /* scheduler channel [0..N] */ int8_t cl; /* scheduler class [0..N] */ int32_t minrate; /* minimum rate */ int32_t maxrate; /* maximum rate */ int16_t weight; /* percent weight */ int16_t pktsize; /* average packet size */ }; /* * Support for "sched-class" command to allow a TX Scheduling Class to be * programmed with various parameters. */ struct t4_sched_params { int8_t subcmd; /* sub-command */ int8_t type; /* packet or flow */ union { struct { /* sub-command SCHED_CLASS_CONFIG */ int8_t minmax; /* minmax enable */ } config; struct t4_sched_class_params params; uint8_t reserved[6 + 8 * 8]; } u; }; enum { SCHED_CLASS_SUBCMD_CONFIG, /* config sub-command */ SCHED_CLASS_SUBCMD_PARAMS, /* params sub-command */ }; enum { SCHED_CLASS_TYPE_PACKET, }; enum { SCHED_CLASS_LEVEL_CL_RL, /* class rate limiter */ SCHED_CLASS_LEVEL_CL_WRR, /* class weighted round robin */ SCHED_CLASS_LEVEL_CH_RL, /* channel rate limiter */ }; enum { SCHED_CLASS_MODE_CLASS, /* per-class scheduling */ SCHED_CLASS_MODE_FLOW, /* per-flow scheduling */ }; enum { SCHED_CLASS_RATEUNIT_BITS, /* bit rate scheduling */ SCHED_CLASS_RATEUNIT_PKTS, /* packet rate scheduling */ }; enum { SCHED_CLASS_RATEMODE_REL, /* percent of port bandwidth */ SCHED_CLASS_RATEMODE_ABS, /* Kb/s */ }; /* * Support for "sched_queue" command to allow one or more NIC TX Queues to be * bound to a TX Scheduling Class. */ struct t4_sched_queue { uint8_t port; int8_t queue; /* queue index; -1 => all queues */ int8_t cl; /* class index; -1 => unbind */ }; #define T4_SGE_CONTEXT_SIZE 24 enum { SGE_CONTEXT_EGRESS, SGE_CONTEXT_INGRESS, SGE_CONTEXT_FLM, SGE_CONTEXT_CNM }; struct t4_sge_context { uint32_t mem_id; uint32_t cid; uint32_t data[T4_SGE_CONTEXT_SIZE / 4]; }; struct t4_mem_range { uint32_t addr; uint32_t len; uint32_t *data; }; #define T4_TRACE_LEN 112 struct t4_trace_params { uint32_t data[T4_TRACE_LEN / 4]; uint32_t mask[T4_TRACE_LEN / 4]; uint16_t snap_len; uint16_t min_len; uint8_t skip_ofst; uint8_t skip_len; uint8_t invert; uint8_t port; }; struct t4_tracer { uint8_t idx; uint8_t enabled; uint8_t valid; struct t4_trace_params tp; }; struct t4_cudbg_dump { uint8_t wr_flash; uint8_t bitmap[16]; uint32_t len; uint8_t *data; }; enum { OPEN_TYPE_LISTEN = 'L', OPEN_TYPE_ACTIVE = 'A', OPEN_TYPE_PASSIVE = 'P', OPEN_TYPE_DONTCARE = 'D', }; struct offload_settings { int8_t offload; int8_t rx_coalesce; int8_t cong_algo; int8_t sched_class; int8_t tstamp; int8_t sack; int8_t nagle; int8_t ecn; int8_t ddp; int8_t tls; int16_t txq; int16_t rxq; int16_t mss; }; struct offload_rule { char open_type; struct offload_settings settings; struct bpf_program bpf_prog; /* compiled program/filter */ }; /* * An offload policy consists of a set of rules matched in sequence. The * settings of the first rule that matches are applied to that connection. */ struct t4_offload_policy { uint32_t nrules; struct offload_rule *rule; }; #define CHELSIO_T4_GETREG _IOWR('f', T4_GETREG, struct t4_reg) #define CHELSIO_T4_SETREG _IOW('f', T4_SETREG, struct t4_reg) #define CHELSIO_T4_REGDUMP _IOWR('f', T4_REGDUMP, struct t4_regdump) #define CHELSIO_T4_GET_FILTER_MODE _IOWR('f', T4_GET_FILTER_MODE, uint32_t) #define CHELSIO_T4_SET_FILTER_MODE _IOW('f', T4_SET_FILTER_MODE, uint32_t) #define CHELSIO_T4_GET_FILTER _IOWR('f', T4_GET_FILTER, struct t4_filter) -#define CHELSIO_T4_SET_FILTER _IOW('f', T4_SET_FILTER, struct t4_filter) +#define CHELSIO_T4_SET_FILTER _IOWR('f', T4_SET_FILTER, struct t4_filter) #define CHELSIO_T4_DEL_FILTER _IOW('f', T4_DEL_FILTER, struct t4_filter) #define CHELSIO_T4_GET_SGE_CONTEXT _IOWR('f', T4_GET_SGE_CONTEXT, \ struct t4_sge_context) #define CHELSIO_T4_LOAD_FW _IOW('f', T4_LOAD_FW, struct t4_data) #define CHELSIO_T4_GET_MEM _IOW('f', T4_GET_MEM, struct t4_mem_range) #define CHELSIO_T4_GET_I2C _IOWR('f', T4_GET_I2C, struct t4_i2c_data) #define CHELSIO_T4_CLEAR_STATS _IOW('f', T4_CLEAR_STATS, uint32_t) #define CHELSIO_T4_SCHED_CLASS _IOW('f', T4_SET_SCHED_CLASS, \ struct t4_sched_params) #define CHELSIO_T4_SCHED_QUEUE _IOW('f', T4_SET_SCHED_QUEUE, \ struct t4_sched_queue) #define CHELSIO_T4_GET_TRACER _IOWR('f', T4_GET_TRACER, struct t4_tracer) #define CHELSIO_T4_SET_TRACER _IOW('f', T4_SET_TRACER, struct t4_tracer) #define CHELSIO_T4_LOAD_CFG _IOW('f', T4_LOAD_CFG, struct t4_data) #define CHELSIO_T4_LOAD_BOOT _IOW('f', T4_LOAD_BOOT, struct t4_bootrom) #define CHELSIO_T4_LOAD_BOOTCFG _IOW('f', T4_LOAD_BOOTCFG, struct t4_data) #define CHELSIO_T4_CUDBG_DUMP _IOWR('f', T4_CUDBG_DUMP, struct t4_cudbg_dump) #define CHELSIO_T4_SET_OFLD_POLICY _IOW('f', T4_SET_OFLD_POLICY, struct t4_offload_policy) #endif Index: head/sys/dev/cxgbe/t4_main.c =================================================================== --- head/sys/dev/cxgbe/t4_main.c (revision 333393) +++ head/sys/dev/cxgbe/t4_main.c (revision 333394) @@ -1,10005 +1,10051 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #endif #if defined(__i386__) || defined(__amd64__) #include #include #include #include #endif #include #ifdef DDB #include #include #endif #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "cudbg/cudbg.h" #include "t4_ioctl.h" #include "t4_l2t.h" #include "t4_mp_ring.h" #include "t4_if.h" /* T4 bus driver interface */ static int t4_probe(device_t); static int t4_attach(device_t); static int t4_detach(device_t); static int t4_ready(device_t); static int t4_read_port_device(device_t, int, device_t *); static device_method_t t4_methods[] = { DEVMETHOD(device_probe, t4_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD(t4_is_main_ready, t4_ready), DEVMETHOD(t4_read_port_device, t4_read_port_device), DEVMETHOD_END }; static driver_t t4_driver = { "t4nex", t4_methods, sizeof(struct adapter) }; /* T4 port (cxgbe) interface */ static int cxgbe_probe(device_t); static int cxgbe_attach(device_t); static int cxgbe_detach(device_t); device_method_t cxgbe_methods[] = { DEVMETHOD(device_probe, cxgbe_probe), DEVMETHOD(device_attach, cxgbe_attach), DEVMETHOD(device_detach, cxgbe_detach), { 0, 0 } }; static driver_t cxgbe_driver = { "cxgbe", cxgbe_methods, sizeof(struct port_info) }; /* T4 VI (vcxgbe) interface */ static int vcxgbe_probe(device_t); static int vcxgbe_attach(device_t); static int vcxgbe_detach(device_t); static device_method_t vcxgbe_methods[] = { DEVMETHOD(device_probe, vcxgbe_probe), DEVMETHOD(device_attach, vcxgbe_attach), DEVMETHOD(device_detach, vcxgbe_detach), { 0, 0 } }; static driver_t vcxgbe_driver = { "vcxgbe", vcxgbe_methods, sizeof(struct vi_info) }; static d_ioctl_t t4_ioctl; static struct cdevsw t4_cdevsw = { .d_version = D_VERSION, .d_ioctl = t4_ioctl, .d_name = "t4nex", }; /* T5 bus driver interface */ static int t5_probe(device_t); static device_method_t t5_methods[] = { DEVMETHOD(device_probe, t5_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD(t4_is_main_ready, t4_ready), DEVMETHOD(t4_read_port_device, t4_read_port_device), DEVMETHOD_END }; static driver_t t5_driver = { "t5nex", t5_methods, sizeof(struct adapter) }; /* T5 port (cxl) interface */ static driver_t cxl_driver = { "cxl", cxgbe_methods, sizeof(struct port_info) }; /* T5 VI (vcxl) interface */ static driver_t vcxl_driver = { "vcxl", vcxgbe_methods, sizeof(struct vi_info) }; /* T6 bus driver interface */ static int t6_probe(device_t); static device_method_t t6_methods[] = { DEVMETHOD(device_probe, t6_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD(t4_is_main_ready, t4_ready), DEVMETHOD(t4_read_port_device, t4_read_port_device), DEVMETHOD_END }; static driver_t t6_driver = { "t6nex", t6_methods, sizeof(struct adapter) }; /* T6 port (cc) interface */ static driver_t cc_driver = { "cc", cxgbe_methods, sizeof(struct port_info) }; /* T6 VI (vcc) interface */ static driver_t vcc_driver = { "vcc", vcxgbe_methods, sizeof(struct vi_info) }; /* ifnet + media interface */ static void cxgbe_init(void *); static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t); static int cxgbe_transmit(struct ifnet *, struct mbuf *); static void cxgbe_qflush(struct ifnet *); static int cxgbe_media_change(struct ifnet *); static void cxgbe_media_status(struct ifnet *, struct ifmediareq *); MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services"); /* * Correct lock order when you need to acquire multiple locks is t4_list_lock, * then ADAPTER_LOCK, then t4_uld_list_lock. */ static struct sx t4_list_lock; SLIST_HEAD(, adapter) t4_list; #ifdef TCP_OFFLOAD static struct sx t4_uld_list_lock; SLIST_HEAD(, uld_info) t4_uld_list; #endif /* * Tunables. See tweak_tunables() too. * * Each tunable is set to a default value here if it's known at compile-time. * Otherwise it is set to -n as an indication to tweak_tunables() that it should * provide a reasonable default (upto n) when the driver is loaded. * * Tunables applicable to both T4 and T5 are under hw.cxgbe. Those specific to * T5 are under hw.cxl. */ /* * Number of queues for tx and rx, NIC and offload. */ #define NTXQ 16 int t4_ntxq = -NTXQ; TUNABLE_INT("hw.cxgbe.ntxq", &t4_ntxq); TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq); /* Old name, undocumented */ #define NRXQ 8 int t4_nrxq = -NRXQ; TUNABLE_INT("hw.cxgbe.nrxq", &t4_nrxq); TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq); /* Old name, undocumented */ #define NTXQ_VI 1 static int t4_ntxq_vi = -NTXQ_VI; TUNABLE_INT("hw.cxgbe.ntxq_vi", &t4_ntxq_vi); #define NRXQ_VI 1 static int t4_nrxq_vi = -NRXQ_VI; TUNABLE_INT("hw.cxgbe.nrxq_vi", &t4_nrxq_vi); static int t4_rsrv_noflowq = 0; TUNABLE_INT("hw.cxgbe.rsrv_noflowq", &t4_rsrv_noflowq); #ifdef TCP_OFFLOAD #define NOFLDTXQ 8 static int t4_nofldtxq = -NOFLDTXQ; TUNABLE_INT("hw.cxgbe.nofldtxq", &t4_nofldtxq); #define NOFLDRXQ 2 static int t4_nofldrxq = -NOFLDRXQ; TUNABLE_INT("hw.cxgbe.nofldrxq", &t4_nofldrxq); #define NOFLDTXQ_VI 1 static int t4_nofldtxq_vi = -NOFLDTXQ_VI; TUNABLE_INT("hw.cxgbe.nofldtxq_vi", &t4_nofldtxq_vi); #define NOFLDRXQ_VI 1 static int t4_nofldrxq_vi = -NOFLDRXQ_VI; TUNABLE_INT("hw.cxgbe.nofldrxq_vi", &t4_nofldrxq_vi); #define TMR_IDX_OFLD 1 int t4_tmr_idx_ofld = TMR_IDX_OFLD; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_ofld", &t4_tmr_idx_ofld); #define PKTC_IDX_OFLD (-1) int t4_pktc_idx_ofld = PKTC_IDX_OFLD; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_ofld", &t4_pktc_idx_ofld); /* 0 means chip/fw default, non-zero number is value in microseconds */ static u_long t4_toe_keepalive_idle = 0; TUNABLE_ULONG("hw.cxgbe.toe.keepalive_idle", &t4_toe_keepalive_idle); /* 0 means chip/fw default, non-zero number is value in microseconds */ static u_long t4_toe_keepalive_interval = 0; TUNABLE_ULONG("hw.cxgbe.toe.keepalive_interval", &t4_toe_keepalive_interval); /* 0 means chip/fw default, non-zero number is # of keepalives before abort */ static int t4_toe_keepalive_count = 0; TUNABLE_INT("hw.cxgbe.toe.keepalive_count", &t4_toe_keepalive_count); /* 0 means chip/fw default, non-zero number is value in microseconds */ static u_long t4_toe_rexmt_min = 0; TUNABLE_ULONG("hw.cxgbe.toe.rexmt_min", &t4_toe_rexmt_min); /* 0 means chip/fw default, non-zero number is value in microseconds */ static u_long t4_toe_rexmt_max = 0; TUNABLE_ULONG("hw.cxgbe.toe.rexmt_max", &t4_toe_rexmt_max); /* 0 means chip/fw default, non-zero number is # of rexmt before abort */ static int t4_toe_rexmt_count = 0; TUNABLE_INT("hw.cxgbe.toe.rexmt_count", &t4_toe_rexmt_count); /* -1 means chip/fw default, other values are raw backoff values to use */ static int t4_toe_rexmt_backoff[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.0", &t4_toe_rexmt_backoff[0]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.1", &t4_toe_rexmt_backoff[1]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.2", &t4_toe_rexmt_backoff[2]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.3", &t4_toe_rexmt_backoff[3]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.4", &t4_toe_rexmt_backoff[4]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.5", &t4_toe_rexmt_backoff[5]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.6", &t4_toe_rexmt_backoff[6]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.7", &t4_toe_rexmt_backoff[7]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.8", &t4_toe_rexmt_backoff[8]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.9", &t4_toe_rexmt_backoff[9]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.10", &t4_toe_rexmt_backoff[10]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.11", &t4_toe_rexmt_backoff[11]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.12", &t4_toe_rexmt_backoff[12]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.13", &t4_toe_rexmt_backoff[13]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.14", &t4_toe_rexmt_backoff[14]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.15", &t4_toe_rexmt_backoff[15]); #endif #ifdef DEV_NETMAP #define NNMTXQ_VI 2 static int t4_nnmtxq_vi = -NNMTXQ_VI; TUNABLE_INT("hw.cxgbe.nnmtxq_vi", &t4_nnmtxq_vi); #define NNMRXQ_VI 2 static int t4_nnmrxq_vi = -NNMRXQ_VI; TUNABLE_INT("hw.cxgbe.nnmrxq_vi", &t4_nnmrxq_vi); #endif /* * Holdoff parameters for ports. */ #define TMR_IDX 1 int t4_tmr_idx = TMR_IDX; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx", &t4_tmr_idx); TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx); /* Old name */ #define PKTC_IDX (-1) int t4_pktc_idx = PKTC_IDX; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx", &t4_pktc_idx); TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx); /* Old name */ /* * Size (# of entries) of each tx and rx queue. */ unsigned int t4_qsize_txq = TX_EQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq); unsigned int t4_qsize_rxq = RX_IQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq); /* * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively). */ int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX; TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types); /* * Configuration file. */ #define DEFAULT_CF "default" #define FLASH_CF "flash" #define UWIRE_CF "uwire" #define FPGA_CF "fpga" static char t4_cfg_file[32] = DEFAULT_CF; TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file)); /* * PAUSE settings (bit 0, 1 = rx_pause, tx_pause respectively). * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them. * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water * mark or when signalled to do so, 0 to never emit PAUSE. */ static int t4_pause_settings = PAUSE_TX | PAUSE_RX; TUNABLE_INT("hw.cxgbe.pause_settings", &t4_pause_settings); /* * Forward Error Correction settings (bit 0, 1, 2 = FEC_RS, FEC_BASER_RS, * FEC_RESERVED respectively). * -1 to run with the firmware default. * 0 to disable FEC. */ static int t4_fec = -1; TUNABLE_INT("hw.cxgbe.fec", &t4_fec); /* * Link autonegotiation. * -1 to run with the firmware default. * 0 to disable. * 1 to enable. */ static int t4_autoneg = -1; TUNABLE_INT("hw.cxgbe.autoneg", &t4_autoneg); /* * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed, * encouraged respectively). */ static unsigned int t4_fw_install = 1; TUNABLE_INT("hw.cxgbe.fw_install", &t4_fw_install); /* * ASIC features that will be used. Disable the ones you don't want so that the * chip resources aren't wasted on features that will not be used. */ static int t4_nbmcaps_allowed = 0; TUNABLE_INT("hw.cxgbe.nbmcaps_allowed", &t4_nbmcaps_allowed); static int t4_linkcaps_allowed = 0; /* No DCBX, PPP, etc. by default */ TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed); static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS | FW_CAPS_CONFIG_SWITCH_EGRESS; TUNABLE_INT("hw.cxgbe.switchcaps_allowed", &t4_switchcaps_allowed); -static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC; +static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC | + FW_CAPS_CONFIG_NIC_HASHFILTER; TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed); static int t4_toecaps_allowed = -1; TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed); static int t4_rdmacaps_allowed = -1; TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed); static int t4_cryptocaps_allowed = -1; TUNABLE_INT("hw.cxgbe.cryptocaps_allowed", &t4_cryptocaps_allowed); static int t4_iscsicaps_allowed = -1; TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed); static int t4_fcoecaps_allowed = 0; TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed); static int t5_write_combine = 1; TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine); static int t4_num_vis = 1; TUNABLE_INT("hw.cxgbe.num_vis", &t4_num_vis); /* * PCIe Relaxed Ordering. * -1: driver should figure out a good value. * 0: disable RO. * 1: enable RO. * 2: leave RO alone. */ static int pcie_relaxed_ordering = -1; TUNABLE_INT("hw.cxgbe.pcie_relaxed_ordering", &pcie_relaxed_ordering); static int t4_panic_on_fatal_err = 0; TUNABLE_INT("hw.cxgbe.panic_on_fatal_err", &t4_panic_on_fatal_err); #ifdef TCP_OFFLOAD /* * TOE tunables. */ static int t4_cop_managed_offloading = 0; TUNABLE_INT("hw.cxgbe.cop_managed_offloading", &t4_cop_managed_offloading); #endif /* Functions used by VIs to obtain unique MAC addresses for each VI. */ static int vi_mac_funcs[] = { FW_VI_FUNC_ETH, FW_VI_FUNC_OFLD, FW_VI_FUNC_IWARP, FW_VI_FUNC_OPENISCSI, FW_VI_FUNC_OPENFCOE, FW_VI_FUNC_FOISCSI, FW_VI_FUNC_FOFCOE, }; struct intrs_and_queues { uint16_t intr_type; /* INTx, MSI, or MSI-X */ uint16_t num_vis; /* number of VIs for each port */ uint16_t nirq; /* Total # of vectors */ uint16_t ntxq; /* # of NIC txq's for each port */ uint16_t nrxq; /* # of NIC rxq's for each port */ uint16_t nofldtxq; /* # of TOE txq's for each port */ uint16_t nofldrxq; /* # of TOE rxq's for each port */ /* The vcxgbe/vcxl interfaces use these and not the ones above. */ uint16_t ntxq_vi; /* # of NIC txq's */ uint16_t nrxq_vi; /* # of NIC rxq's */ uint16_t nofldtxq_vi; /* # of TOE txq's */ uint16_t nofldrxq_vi; /* # of TOE rxq's */ uint16_t nnmtxq_vi; /* # of netmap txq's */ uint16_t nnmrxq_vi; /* # of netmap rxq's */ }; static void setup_memwin(struct adapter *); static void position_memwin(struct adapter *, int, uint32_t); static int validate_mem_range(struct adapter *, uint32_t, int); static int fwmtype_to_hwmtype(int); static int validate_mt_off_len(struct adapter *, int, uint32_t, int, uint32_t *); static int fixup_devlog_params(struct adapter *); static int cfg_itype_and_nqueues(struct adapter *, struct intrs_and_queues *); static int prep_firmware(struct adapter *); static int partition_resources(struct adapter *, const struct firmware *, const char *); static int get_params__pre_init(struct adapter *); static int get_params__post_init(struct adapter *); static int set_params__post_init(struct adapter *); static void t4_set_desc(struct adapter *); static void build_medialist(struct port_info *, struct ifmedia *); static void init_l1cfg(struct port_info *); static int cxgbe_init_synchronized(struct vi_info *); static int cxgbe_uninit_synchronized(struct vi_info *); static void quiesce_txq(struct adapter *, struct sge_txq *); static void quiesce_wrq(struct adapter *, struct sge_wrq *); static void quiesce_iq(struct adapter *, struct sge_iq *); static void quiesce_fl(struct adapter *, struct sge_fl *); static int t4_alloc_irq(struct adapter *, struct irq *, int rid, driver_intr_t *, void *, char *); static int t4_free_irq(struct adapter *, struct irq *); static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *); static void vi_refresh_stats(struct adapter *, struct vi_info *); static void cxgbe_refresh_stats(struct adapter *, struct port_info *); static void cxgbe_tick(void *); static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t); static void cxgbe_sysctls(struct port_info *); static int sysctl_int_array(SYSCTL_HANDLER_ARGS); static int sysctl_bitfield(SYSCTL_HANDLER_ARGS); static int sysctl_btphy(SYSCTL_HANDLER_ARGS); static int sysctl_noflowq(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS); static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS); static int sysctl_fec(SYSCTL_HANDLER_ARGS); static int sysctl_autoneg(SYSCTL_HANDLER_ARGS); static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS); static int sysctl_temperature(SYSCTL_HANDLER_ARGS); #ifdef SBUF_DRAIN static int sysctl_cctrl(SYSCTL_HANDLER_ARGS); static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS); static int sysctl_cim_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS); static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS); static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS); static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_devlog(SYSCTL_HANDLER_ARGS); static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS); static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS); static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS); static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS); static int sysctl_meminfo(SYSCTL_HANDLER_ARGS); static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS); static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS); static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS); static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS); static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tids(SYSCTL_HANDLER_ARGS); static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS); static int sysctl_tp_la(SYSCTL_HANDLER_ARGS); static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS); static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS); static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tc_params(SYSCTL_HANDLER_ARGS); #endif #ifdef TCP_OFFLOAD static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS); static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS); static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS); static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS); static int sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS); static int sysctl_tp_backoff(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS); #endif static int get_sge_context(struct adapter *, struct t4_sge_context *); static int load_fw(struct adapter *, struct t4_data *); static int load_cfg(struct adapter *, struct t4_data *); static int load_boot(struct adapter *, struct t4_bootrom *); static int load_bootcfg(struct adapter *, struct t4_data *); static int cudbg_dump(struct adapter *, struct t4_cudbg_dump *); static void free_offload_policy(struct t4_offload_policy *); static int set_offload_policy(struct adapter *, struct t4_offload_policy *); static int read_card_mem(struct adapter *, int, struct t4_mem_range *); static int read_i2c(struct adapter *, struct t4_i2c_data *); #ifdef TCP_OFFLOAD static int toe_capability(struct vi_info *, int); #endif static int mod_event(module_t, int, void *); static int notify_siblings(device_t, int); struct { uint16_t device; char *desc; } t4_pciids[] = { {0xa000, "Chelsio Terminator 4 FPGA"}, {0x4400, "Chelsio T440-dbg"}, {0x4401, "Chelsio T420-CR"}, {0x4402, "Chelsio T422-CR"}, {0x4403, "Chelsio T440-CR"}, {0x4404, "Chelsio T420-BCH"}, {0x4405, "Chelsio T440-BCH"}, {0x4406, "Chelsio T440-CH"}, {0x4407, "Chelsio T420-SO"}, {0x4408, "Chelsio T420-CX"}, {0x4409, "Chelsio T420-BT"}, {0x440a, "Chelsio T404-BT"}, {0x440e, "Chelsio T440-LP-CR"}, }, t5_pciids[] = { {0xb000, "Chelsio Terminator 5 FPGA"}, {0x5400, "Chelsio T580-dbg"}, {0x5401, "Chelsio T520-CR"}, /* 2 x 10G */ {0x5402, "Chelsio T522-CR"}, /* 2 x 10G, 2 X 1G */ {0x5403, "Chelsio T540-CR"}, /* 4 x 10G */ {0x5407, "Chelsio T520-SO"}, /* 2 x 10G, nomem */ {0x5409, "Chelsio T520-BT"}, /* 2 x 10GBaseT */ {0x540a, "Chelsio T504-BT"}, /* 4 x 1G */ {0x540d, "Chelsio T580-CR"}, /* 2 x 40G */ {0x540e, "Chelsio T540-LP-CR"}, /* 4 x 10G */ {0x5410, "Chelsio T580-LP-CR"}, /* 2 x 40G */ {0x5411, "Chelsio T520-LL-CR"}, /* 2 x 10G */ {0x5412, "Chelsio T560-CR"}, /* 1 x 40G, 2 x 10G */ {0x5414, "Chelsio T580-LP-SO-CR"}, /* 2 x 40G, nomem */ {0x5415, "Chelsio T502-BT"}, /* 2 x 1G */ #ifdef notyet {0x5404, "Chelsio T520-BCH"}, {0x5405, "Chelsio T540-BCH"}, {0x5406, "Chelsio T540-CH"}, {0x5408, "Chelsio T520-CX"}, {0x540b, "Chelsio B520-SR"}, {0x540c, "Chelsio B504-BT"}, {0x540f, "Chelsio Amsterdam"}, {0x5413, "Chelsio T580-CHR"}, #endif }, t6_pciids[] = { {0xc006, "Chelsio Terminator 6 FPGA"}, /* T6 PE10K6 FPGA (PF0) */ {0x6400, "Chelsio T6-DBG-25"}, /* 2 x 10/25G, debug */ {0x6401, "Chelsio T6225-CR"}, /* 2 x 10/25G */ {0x6402, "Chelsio T6225-SO-CR"}, /* 2 x 10/25G, nomem */ {0x6403, "Chelsio T6425-CR"}, /* 4 x 10/25G */ {0x6404, "Chelsio T6425-SO-CR"}, /* 4 x 10/25G, nomem */ {0x6405, "Chelsio T6225-OCP-SO"}, /* 2 x 10/25G, nomem */ {0x6406, "Chelsio T62100-OCP-SO"}, /* 2 x 40/50/100G, nomem */ {0x6407, "Chelsio T62100-LP-CR"}, /* 2 x 40/50/100G */ {0x6408, "Chelsio T62100-SO-CR"}, /* 2 x 40/50/100G, nomem */ {0x6409, "Chelsio T6210-BT"}, /* 2 x 10GBASE-T */ {0x640d, "Chelsio T62100-CR"}, /* 2 x 40/50/100G */ {0x6410, "Chelsio T6-DBG-100"}, /* 2 x 40/50/100G, debug */ {0x6411, "Chelsio T6225-LL-CR"}, /* 2 x 10/25G */ {0x6414, "Chelsio T61100-OCP-SO"}, /* 1 x 40/50/100G, nomem */ {0x6415, "Chelsio T6201-BT"}, /* 2 x 1000BASE-T */ /* Custom */ {0x6480, "Chelsio T6225 80"}, {0x6481, "Chelsio T62100 81"}, {0x6484, "Chelsio T62100 84"}, }; #ifdef TCP_OFFLOAD /* * service_iq() has an iq and needs the fl. Offset of fl from the iq should be * exactly the same for both rxq and ofld_rxq. */ CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq)); CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl)); #endif CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE); static int t4_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); uint8_t f = pci_get_function(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); /* Attach only to PF0 of the FPGA */ if (d == 0xa000 && f != 0) return (ENXIO); for (i = 0; i < nitems(t4_pciids); i++) { if (d == t4_pciids[i].device) { device_set_desc(dev, t4_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int t5_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); uint8_t f = pci_get_function(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); /* Attach only to PF0 of the FPGA */ if (d == 0xb000 && f != 0) return (ENXIO); for (i = 0; i < nitems(t5_pciids); i++) { if (d == t5_pciids[i].device) { device_set_desc(dev, t5_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int t6_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); for (i = 0; i < nitems(t6_pciids); i++) { if (d == t6_pciids[i].device) { device_set_desc(dev, t6_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static void t5_attribute_workaround(device_t dev) { device_t root_port; uint32_t v; /* * The T5 chips do not properly echo the No Snoop and Relaxed * Ordering attributes when replying to a TLP from a Root * Port. As a workaround, find the parent Root Port and * disable No Snoop and Relaxed Ordering. Note that this * affects all devices under this root port. */ root_port = pci_find_pcie_root_port(dev); if (root_port == NULL) { device_printf(dev, "Unable to find parent root port\n"); return; } v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL, PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2); if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) != 0) device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n", device_get_nameunit(root_port)); } static const struct devnames devnames[] = { { .nexus_name = "t4nex", .ifnet_name = "cxgbe", .vi_ifnet_name = "vcxgbe", .pf03_drv_name = "t4iov", .vf_nexus_name = "t4vf", .vf_ifnet_name = "cxgbev" }, { .nexus_name = "t5nex", .ifnet_name = "cxl", .vi_ifnet_name = "vcxl", .pf03_drv_name = "t5iov", .vf_nexus_name = "t5vf", .vf_ifnet_name = "cxlv" }, { .nexus_name = "t6nex", .ifnet_name = "cc", .vi_ifnet_name = "vcc", .pf03_drv_name = "t6iov", .vf_nexus_name = "t6vf", .vf_ifnet_name = "ccv" } }; void t4_init_devnames(struct adapter *sc) { int id; id = chip_id(sc); if (id >= CHELSIO_T4 && id - CHELSIO_T4 < nitems(devnames)) sc->names = &devnames[id - CHELSIO_T4]; else { device_printf(sc->dev, "chip id %d is not supported.\n", id); sc->names = NULL; } } static int t4_attach(device_t dev) { struct adapter *sc; int rc = 0, i, j, rqidx, tqidx, nports; struct make_dev_args mda; struct intrs_and_queues iaq; struct sge *s; uint32_t *buf; #ifdef TCP_OFFLOAD int ofld_rqidx, ofld_tqidx; #endif #ifdef DEV_NETMAP int nm_rqidx, nm_tqidx; #endif int num_vis; sc = device_get_softc(dev); sc->dev = dev; TUNABLE_INT_FETCH("hw.cxgbe.dflags", &sc->debug_flags); if ((pci_get_device(dev) & 0xff00) == 0x5400) t5_attribute_workaround(dev); pci_enable_busmaster(dev); if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) { uint32_t v; pci_set_max_read_req(dev, 4096); v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2); sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5); if (pcie_relaxed_ordering == 0 && (v | PCIEM_CTL_RELAXED_ORD_ENABLE) != 0) { v &= ~PCIEM_CTL_RELAXED_ORD_ENABLE; pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2); } else if (pcie_relaxed_ordering == 1 && (v & PCIEM_CTL_RELAXED_ORD_ENABLE) == 0) { v |= PCIEM_CTL_RELAXED_ORD_ENABLE; pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2); } } sc->sge_gts_reg = MYPF_REG(A_SGE_PF_GTS); sc->sge_kdoorbell_reg = MYPF_REG(A_SGE_PF_KDOORBELL); sc->traceq = -1; mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF); snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer", device_get_nameunit(dev)); snprintf(sc->lockname, sizeof(sc->lockname), "%s", device_get_nameunit(dev)); mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF); t4_add_adapter(sc); mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF); TAILQ_INIT(&sc->sfl); callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0); mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF); sc->policy = NULL; rw_init(&sc->policy_lock, "connection offload policy"); rc = t4_map_bars_0_and_4(sc); if (rc != 0) goto done; /* error message displayed already */ memset(sc->chan_map, 0xff, sizeof(sc->chan_map)); /* Prepare the adapter for operation. */ buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_prep_adapter(sc, buf); free(buf, M_CXGBE); if (rc != 0) { device_printf(dev, "failed to prepare adapter: %d.\n", rc); goto done; } /* * This is the real PF# to which we're attaching. Works from within PCI * passthrough environments too, where pci_get_function() could return a * different PF# depending on the passthrough configuration. We need to * use the real PF# in all our communication with the firmware. */ j = t4_read_reg(sc, A_PL_WHOAMI); sc->pf = chip_id(sc) <= CHELSIO_T5 ? G_SOURCEPF(j) : G_T6_SOURCEPF(j); sc->mbox = sc->pf; t4_init_devnames(sc); if (sc->names == NULL) { rc = ENOTSUP; goto done; /* error message displayed already */ } /* * Do this really early, with the memory windows set up even before the * character device. The userland tool's register i/o and mem read * will work even in "recovery mode". */ setup_memwin(sc); if (t4_init_devlog_params(sc, 0) == 0) fixup_devlog_params(sc); make_dev_args_init(&mda); mda.mda_devsw = &t4_cdevsw; mda.mda_uid = UID_ROOT; mda.mda_gid = GID_WHEEL; mda.mda_mode = 0600; mda.mda_si_drv1 = sc; rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev)); if (rc != 0) device_printf(dev, "failed to create nexus char device: %d.\n", rc); /* Go no further if recovery mode has been requested. */ if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) { device_printf(dev, "recovery mode.\n"); goto done; } #if defined(__i386__) if ((cpu_feature & CPUID_CX8) == 0) { device_printf(dev, "64 bit atomics not available.\n"); rc = ENOTSUP; goto done; } #endif /* Prepare the firmware for operation */ rc = prep_firmware(sc); if (rc != 0) goto done; /* error message displayed already */ rc = get_params__post_init(sc); if (rc != 0) goto done; /* error message displayed already */ rc = set_params__post_init(sc); if (rc != 0) goto done; /* error message displayed already */ rc = t4_map_bar_2(sc); if (rc != 0) goto done; /* error message displayed already */ rc = t4_create_dma_tag(sc); if (rc != 0) goto done; /* error message displayed already */ /* * First pass over all the ports - allocate VIs and initialize some * basic parameters like mac address, port type, etc. */ for_each_port(sc, i) { struct port_info *pi; pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK); sc->port[i] = pi; /* These must be set before t4_port_init */ pi->adapter = sc; pi->port_id = i; /* * XXX: vi[0] is special so we can't delay this allocation until * pi->nvi's final value is known. */ pi->vi = malloc(sizeof(struct vi_info) * t4_num_vis, M_CXGBE, M_ZERO | M_WAITOK); /* * Allocate the "main" VI and initialize parameters * like mac addr. */ rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i); if (rc != 0) { device_printf(dev, "unable to initialize port %d: %d\n", i, rc); free(pi->vi, M_CXGBE); free(pi, M_CXGBE); sc->port[i] = NULL; goto done; } snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d", device_get_nameunit(dev), i); mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF); sc->chan_map[pi->tx_chan] = i; /* All VIs on this port share this media. */ ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change, cxgbe_media_status); pi->dev = device_add_child(dev, sc->names->ifnet_name, -1); if (pi->dev == NULL) { device_printf(dev, "failed to add device for port %d.\n", i); rc = ENXIO; goto done; } pi->vi[0].dev = pi->dev; device_set_softc(pi->dev, pi); } /* * Interrupt type, # of interrupts, # of rx/tx queues, etc. */ nports = sc->params.nports; rc = cfg_itype_and_nqueues(sc, &iaq); if (rc != 0) goto done; /* error message displayed already */ num_vis = iaq.num_vis; sc->intr_type = iaq.intr_type; sc->intr_count = iaq.nirq; s = &sc->sge; s->nrxq = nports * iaq.nrxq; s->ntxq = nports * iaq.ntxq; if (num_vis > 1) { s->nrxq += nports * (num_vis - 1) * iaq.nrxq_vi; s->ntxq += nports * (num_vis - 1) * iaq.ntxq_vi; } s->neq = s->ntxq + s->nrxq; /* the free list in an rxq is an eq */ s->neq += nports + 1;/* ctrl queues: 1 per port + 1 mgmt */ s->niq = s->nrxq + 1; /* 1 extra for firmware event queue */ #ifdef TCP_OFFLOAD if (is_offload(sc)) { s->nofldrxq = nports * iaq.nofldrxq; s->nofldtxq = nports * iaq.nofldtxq; if (num_vis > 1) { s->nofldrxq += nports * (num_vis - 1) * iaq.nofldrxq_vi; s->nofldtxq += nports * (num_vis - 1) * iaq.nofldtxq_vi; } s->neq += s->nofldtxq + s->nofldrxq; s->niq += s->nofldrxq; s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); } #endif #ifdef DEV_NETMAP if (num_vis > 1) { s->nnmrxq = nports * (num_vis - 1) * iaq.nnmrxq_vi; s->nnmtxq = nports * (num_vis - 1) * iaq.nnmtxq_vi; } s->neq += s->nnmtxq + s->nnmrxq; s->niq += s->nnmrxq; s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq), M_CXGBE, M_ZERO | M_WAITOK); #endif s->ctrlq = malloc(nports * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE, M_ZERO | M_WAITOK); s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE, M_ZERO | M_WAITOK); s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE, M_ZERO | M_WAITOK); sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE, M_ZERO | M_WAITOK); t4_init_l2t(sc, M_WAITOK); t4_init_tx_sched(sc); /* * Second pass over the ports. This time we know the number of rx and * tx queues that each port should get. */ rqidx = tqidx = 0; #ifdef TCP_OFFLOAD ofld_rqidx = ofld_tqidx = 0; #endif #ifdef DEV_NETMAP nm_rqidx = nm_tqidx = 0; #endif for_each_port(sc, i) { struct port_info *pi = sc->port[i]; struct vi_info *vi; if (pi == NULL) continue; pi->nvi = num_vis; for_each_vi(pi, j, vi) { vi->pi = pi; vi->qsize_rxq = t4_qsize_rxq; vi->qsize_txq = t4_qsize_txq; vi->first_rxq = rqidx; vi->first_txq = tqidx; vi->tmr_idx = t4_tmr_idx; vi->pktc_idx = t4_pktc_idx; vi->nrxq = j == 0 ? iaq.nrxq : iaq.nrxq_vi; vi->ntxq = j == 0 ? iaq.ntxq : iaq.ntxq_vi; rqidx += vi->nrxq; tqidx += vi->ntxq; if (j == 0 && vi->ntxq > 1) vi->rsrv_noflowq = t4_rsrv_noflowq ? 1 : 0; else vi->rsrv_noflowq = 0; #ifdef TCP_OFFLOAD vi->ofld_tmr_idx = t4_tmr_idx_ofld; vi->ofld_pktc_idx = t4_pktc_idx_ofld; vi->first_ofld_rxq = ofld_rqidx; vi->first_ofld_txq = ofld_tqidx; vi->nofldrxq = j == 0 ? iaq.nofldrxq : iaq.nofldrxq_vi; vi->nofldtxq = j == 0 ? iaq.nofldtxq : iaq.nofldtxq_vi; ofld_rqidx += vi->nofldrxq; ofld_tqidx += vi->nofldtxq; #endif #ifdef DEV_NETMAP if (j > 0) { vi->first_nm_rxq = nm_rqidx; vi->first_nm_txq = nm_tqidx; vi->nnmrxq = iaq.nnmrxq_vi; vi->nnmtxq = iaq.nnmtxq_vi; nm_rqidx += vi->nnmrxq; nm_tqidx += vi->nnmtxq; } #endif } } rc = t4_setup_intr_handlers(sc); if (rc != 0) { device_printf(dev, "failed to setup interrupt handlers: %d\n", rc); goto done; } rc = bus_generic_probe(dev); if (rc != 0) { device_printf(dev, "failed to probe child drivers: %d\n", rc); goto done; } /* * Ensure thread-safe mailbox access (in debug builds). * * So far this was the only thread accessing the mailbox but various * ifnets and sysctls are about to be created and their handlers/ioctls * will access the mailbox from different threads. */ sc->flags |= CHK_MBOX_ACCESS; rc = bus_generic_attach(dev); if (rc != 0) { device_printf(dev, "failed to attach all child ports: %d\n", rc); goto done; } device_printf(dev, "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n", sc->params.pci.speed, sc->params.pci.width, sc->params.nports, sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" : (sc->intr_type == INTR_MSI ? "MSI" : "INTx"), sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq); t4_set_desc(sc); notify_siblings(dev, 0); done: if (rc != 0 && sc->cdev) { /* cdev was created and so cxgbetool works; recover that way. */ device_printf(dev, "error during attach, adapter is now in recovery mode.\n"); rc = 0; } if (rc != 0) t4_detach_common(dev); else t4_sysctls(sc); return (rc); } static int t4_ready(device_t dev) { struct adapter *sc; sc = device_get_softc(dev); if (sc->flags & FW_OK) return (0); return (ENXIO); } static int t4_read_port_device(device_t dev, int port, device_t *child) { struct adapter *sc; struct port_info *pi; sc = device_get_softc(dev); if (port < 0 || port >= MAX_NPORTS) return (EINVAL); pi = sc->port[port]; if (pi == NULL || pi->dev == NULL) return (ENXIO); *child = pi->dev; return (0); } static int notify_siblings(device_t dev, int detaching) { device_t sibling; int error, i; error = 0; for (i = 0; i < PCI_FUNCMAX; i++) { if (i == pci_get_function(dev)) continue; sibling = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev), i); if (sibling == NULL || !device_is_attached(sibling)) continue; if (detaching) error = T4_DETACH_CHILD(sibling); else (void)T4_ATTACH_CHILD(sibling); if (error) break; } return (error); } /* * Idempotent */ static int t4_detach(device_t dev) { struct adapter *sc; int rc; sc = device_get_softc(dev); rc = notify_siblings(dev, 1); if (rc) { device_printf(dev, "failed to detach sibling devices: %d\n", rc); return (rc); } return (t4_detach_common(dev)); } int t4_detach_common(device_t dev) { struct adapter *sc; struct port_info *pi; int i, rc; sc = device_get_softc(dev); if (sc->cdev) { destroy_dev(sc->cdev); sc->cdev = NULL; } sc->flags &= ~CHK_MBOX_ACCESS; if (sc->flags & FULL_INIT_DONE) { if (!(sc->flags & IS_VF)) t4_intr_disable(sc); } if (device_is_attached(dev)) { rc = bus_generic_detach(dev); if (rc) { device_printf(dev, "failed to detach child devices: %d\n", rc); return (rc); } } for (i = 0; i < sc->intr_count; i++) t4_free_irq(sc, &sc->irq[i]); if ((sc->flags & (IS_VF | FW_OK)) == FW_OK) t4_free_tx_sched(sc); for (i = 0; i < MAX_NPORTS; i++) { pi = sc->port[i]; if (pi) { t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid); if (pi->dev) device_delete_child(dev, pi->dev); mtx_destroy(&pi->pi_lock); free(pi->vi, M_CXGBE); free(pi, M_CXGBE); } } device_delete_children(dev); if (sc->flags & FULL_INIT_DONE) adapter_full_uninit(sc); if ((sc->flags & (IS_VF | FW_OK)) == FW_OK) t4_fw_bye(sc, sc->mbox); if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX) pci_release_msi(dev); if (sc->regs_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid, sc->regs_res); if (sc->udbs_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid, sc->udbs_res); if (sc->msix_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid, sc->msix_res); if (sc->l2t) t4_free_l2t(sc->l2t); #ifdef TCP_OFFLOAD free(sc->sge.ofld_rxq, M_CXGBE); free(sc->sge.ofld_txq, M_CXGBE); #endif #ifdef DEV_NETMAP free(sc->sge.nm_rxq, M_CXGBE); free(sc->sge.nm_txq, M_CXGBE); #endif free(sc->irq, M_CXGBE); free(sc->sge.rxq, M_CXGBE); free(sc->sge.txq, M_CXGBE); free(sc->sge.ctrlq, M_CXGBE); free(sc->sge.iqmap, M_CXGBE); free(sc->sge.eqmap, M_CXGBE); free(sc->tids.ftid_tab, M_CXGBE); + free(sc->tids.hftid_tab, M_CXGBE); + free(sc->tids.atid_tab, M_CXGBE); + free(sc->tids.tid_tab, M_CXGBE); free(sc->tt.tls_rx_ports, M_CXGBE); t4_destroy_dma_tag(sc); if (mtx_initialized(&sc->sc_lock)) { sx_xlock(&t4_list_lock); SLIST_REMOVE(&t4_list, sc, adapter, link); sx_xunlock(&t4_list_lock); mtx_destroy(&sc->sc_lock); } callout_drain(&sc->sfl_callout); - if (mtx_initialized(&sc->tids.ftid_lock)) + if (mtx_initialized(&sc->tids.ftid_lock)) { mtx_destroy(&sc->tids.ftid_lock); + cv_destroy(&sc->tids.ftid_cv); + } + if (mtx_initialized(&sc->tids.hftid_lock)) { + mtx_destroy(&sc->tids.hftid_lock); + cv_destroy(&sc->tids.hftid_cv); + } + if (mtx_initialized(&sc->tids.atid_lock)) + mtx_destroy(&sc->tids.atid_lock); if (mtx_initialized(&sc->sfl_lock)) mtx_destroy(&sc->sfl_lock); if (mtx_initialized(&sc->ifp_lock)) mtx_destroy(&sc->ifp_lock); if (mtx_initialized(&sc->reg_lock)) mtx_destroy(&sc->reg_lock); if (rw_initialized(&sc->policy_lock)) { rw_destroy(&sc->policy_lock); #ifdef TCP_OFFLOAD if (sc->policy != NULL) free_offload_policy(sc->policy); #endif } for (i = 0; i < NUM_MEMWIN; i++) { struct memwin *mw = &sc->memwin[i]; if (rw_initialized(&mw->mw_lock)) rw_destroy(&mw->mw_lock); } bzero(sc, sizeof(*sc)); return (0); } static int cxgbe_probe(device_t dev) { char buf[128]; struct port_info *pi = device_get_softc(dev); snprintf(buf, sizeof(buf), "port %d", pi->port_id); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \ IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \ IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS) #define T4_CAP_ENABLE (T4_CAP) static int cxgbe_vi_attach(device_t dev, struct vi_info *vi) { struct ifnet *ifp; struct sbuf *sb; vi->xact_addr_filt = -1; callout_init(&vi->tick, 1); /* Allocate an ifnet and set it up */ ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "Cannot allocate ifnet\n"); return (ENOMEM); } vi->ifp = ifp; ifp->if_softc = vi; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = cxgbe_init; ifp->if_ioctl = cxgbe_ioctl; ifp->if_transmit = cxgbe_transmit; ifp->if_qflush = cxgbe_qflush; ifp->if_get_counter = cxgbe_get_counter; ifp->if_capabilities = T4_CAP; #ifdef TCP_OFFLOAD if (vi->nofldrxq != 0) ifp->if_capabilities |= IFCAP_TOE; #endif #ifdef DEV_NETMAP if (vi->nnmrxq != 0) ifp->if_capabilities |= IFCAP_NETMAP; #endif ifp->if_capenable = T4_CAP_ENABLE; ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO | CSUM_UDP_IPV6 | CSUM_TCP_IPV6; ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS; ifp->if_hw_tsomaxsegsize = 65536; vi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp, EVENTHANDLER_PRI_ANY); ether_ifattach(ifp, vi->hw_addr); #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) cxgbe_nm_attach(vi); #endif sb = sbuf_new_auto(); sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq); #ifdef TCP_OFFLOAD if (ifp->if_capabilities & IFCAP_TOE) sbuf_printf(sb, "; %d txq, %d rxq (TOE)", vi->nofldtxq, vi->nofldrxq); #endif #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) sbuf_printf(sb, "; %d txq, %d rxq (netmap)", vi->nnmtxq, vi->nnmrxq); #endif sbuf_finish(sb); device_printf(dev, "%s\n", sbuf_data(sb)); sbuf_delete(sb); vi_sysctls(vi); return (0); } static int cxgbe_attach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct adapter *sc = pi->adapter; struct vi_info *vi; int i, rc; callout_init_mtx(&pi->tick, &pi->pi_lock, 0); rc = cxgbe_vi_attach(dev, &pi->vi[0]); if (rc) return (rc); for_each_vi(pi, i, vi) { if (i == 0) continue; vi->dev = device_add_child(dev, sc->names->vi_ifnet_name, -1); if (vi->dev == NULL) { device_printf(dev, "failed to add VI %d\n", i); continue; } device_set_softc(vi->dev, vi); } cxgbe_sysctls(pi); bus_generic_attach(dev); return (0); } static void cxgbe_vi_detach(struct vi_info *vi) { struct ifnet *ifp = vi->ifp; ether_ifdetach(ifp); if (vi->vlan_c) EVENTHANDLER_DEREGISTER(vlan_config, vi->vlan_c); /* Let detach proceed even if these fail. */ #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) cxgbe_nm_detach(vi); #endif cxgbe_uninit_synchronized(vi); callout_drain(&vi->tick); vi_full_uninit(vi); if_free(vi->ifp); vi->ifp = NULL; } static int cxgbe_detach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct adapter *sc = pi->adapter; int rc; /* Detach the extra VIs first. */ rc = bus_generic_detach(dev); if (rc) return (rc); device_delete_children(dev); doom_vi(sc, &pi->vi[0]); if (pi->flags & HAS_TRACEQ) { sc->traceq = -1; /* cloner should not create ifnet */ t4_tracer_port_detach(sc); } cxgbe_vi_detach(&pi->vi[0]); callout_drain(&pi->tick); ifmedia_removeall(&pi->media); end_synchronized_op(sc, 0); return (0); } static void cxgbe_init(void *arg) { struct vi_info *vi = arg; struct adapter *sc = vi->pi->adapter; if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0) return; cxgbe_init_synchronized(vi); end_synchronized_op(sc, 0); } static int cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data) { int rc = 0, mtu, flags, can_sleep; struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifreq *ifr = (struct ifreq *)data; uint32_t mask; switch (cmd) { case SIOCSIFMTU: mtu = ifr->ifr_mtu; if (mtu < ETHERMIN || mtu > MAX_MTU) return (EINVAL); rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu"); if (rc) return (rc); ifp->if_mtu = mtu; if (vi->flags & VI_INIT_DONE) { t4_update_fl_bufsize(ifp); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_MTU); } end_synchronized_op(sc, 0); break; case SIOCSIFFLAGS: can_sleep = 0; redo_sifflags: rc = begin_synchronized_op(sc, vi, can_sleep ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4flg"); if (rc) { if_printf(ifp, "%ssleepable synch operation failed: %d." " if_flags 0x%08x, if_drv_flags 0x%08x\n", can_sleep ? "" : "non-", rc, ifp->if_flags, ifp->if_drv_flags); return (rc); } if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { flags = vi->if_flags; if ((ifp->if_flags ^ flags) & (IFF_PROMISC | IFF_ALLMULTI)) { if (can_sleep == 1) { end_synchronized_op(sc, 0); can_sleep = 0; goto redo_sifflags; } rc = update_mac_settings(ifp, XGMAC_PROMISC | XGMAC_ALLMULTI); } } else { if (can_sleep == 0) { end_synchronized_op(sc, LOCK_HELD); can_sleep = 1; goto redo_sifflags; } rc = cxgbe_init_synchronized(vi); } vi->if_flags = ifp->if_flags; } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (can_sleep == 0) { end_synchronized_op(sc, LOCK_HELD); can_sleep = 1; goto redo_sifflags; } rc = cxgbe_uninit_synchronized(vi); } end_synchronized_op(sc, can_sleep ? 0 : LOCK_HELD); break; case SIOCADDMULTI: case SIOCDELMULTI: /* these two are called with a mutex held :-( */ rc = begin_synchronized_op(sc, vi, HOLD_LOCK, "t4multi"); if (rc) return (rc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_MCADDRS); end_synchronized_op(sc, LOCK_HELD); break; case SIOCSIFCAP: rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap"); if (rc) return (rc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO4; if_printf(ifp, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & ifp->if_capenable && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO6; if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; /* * Note that we leave CSUM_TSO alone (it is always set). The * kernel takes both IFCAP_TSOx and CSUM_TSO into account before * sending a TSO request our way, so it's sufficient to toggle * IFCAP_TSOx only. */ if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & ifp->if_capenable) && !(IFCAP_TXCSUM & ifp->if_capenable)) { if_printf(ifp, "enable txcsum first.\n"); rc = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO4; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & ifp->if_capenable) && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { if_printf(ifp, "enable txcsum6 first.\n"); rc = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO6; } if (mask & IFCAP_LRO) { #if defined(INET) || defined(INET6) int i; struct sge_rxq *rxq; ifp->if_capenable ^= IFCAP_LRO; for_each_rxq(vi, i, rxq) { if (ifp->if_capenable & IFCAP_LRO) rxq->iq.flags |= IQ_LRO_ENABLED; else rxq->iq.flags &= ~IQ_LRO_ENABLED; } #endif } #ifdef TCP_OFFLOAD if (mask & IFCAP_TOE) { int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE; rc = toe_capability(vi, enable); if (rc != 0) goto fail; ifp->if_capenable ^= mask; } #endif if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_VLANEX); } if (mask & IFCAP_VLAN_MTU) { ifp->if_capenable ^= IFCAP_VLAN_MTU; /* Need to find out how to disable auto-mtu-inflation */ } if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_VLAN_HWCSUM) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; #ifdef VLAN_CAPABILITIES VLAN_CAPABILITIES(ifp); #endif fail: end_synchronized_op(sc, 0); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: ifmedia_ioctl(ifp, ifr, &pi->media, cmd); break; case SIOCGI2C: { struct ifi2creq i2c; rc = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (rc != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { rc = EPERM; break; } if (i2c.len > sizeof(i2c.data)) { rc = EINVAL; break; } rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c"); if (rc) return (rc); rc = -t4_i2c_rd(sc, sc->mbox, pi->port_id, i2c.dev_addr, i2c.offset, i2c.len, &i2c.data[0]); end_synchronized_op(sc, 0); if (rc == 0) rc = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); break; } default: rc = ether_ioctl(ifp, cmd, data); } return (rc); } static int cxgbe_transmit(struct ifnet *ifp, struct mbuf *m) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct sge_txq *txq; void *items[1]; int rc; M_ASSERTPKTHDR(m); MPASS(m->m_nextpkt == NULL); /* not quite ready for this yet */ if (__predict_false(pi->link_cfg.link_ok == 0)) { m_freem(m); return (ENETDOWN); } rc = parse_pkt(sc, &m); if (__predict_false(rc != 0)) { MPASS(m == NULL); /* was freed already */ atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */ return (rc); } /* Select a txq. */ txq = &sc->sge.txq[vi->first_txq]; if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) + vi->rsrv_noflowq); items[0] = m; rc = mp_ring_enqueue(txq->r, items, 1, 4096); if (__predict_false(rc != 0)) m_freem(m); return (rc); } static void cxgbe_qflush(struct ifnet *ifp) { struct vi_info *vi = ifp->if_softc; struct sge_txq *txq; int i; /* queues do not exist if !VI_INIT_DONE. */ if (vi->flags & VI_INIT_DONE) { for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags |= EQ_QFLUSH; TXQ_UNLOCK(txq); while (!mp_ring_is_idle(txq->r)) { mp_ring_check_drainage(txq->r, 0); pause("qflush", 1); } TXQ_LOCK(txq); txq->eq.flags &= ~EQ_QFLUSH; TXQ_UNLOCK(txq); } } if_qflush(ifp); } static uint64_t vi_get_counter(struct ifnet *ifp, ift_counter c) { struct vi_info *vi = ifp->if_softc; struct fw_vi_stats_vf *s = &vi->stats; vi_refresh_stats(vi->pi->adapter, vi); switch (c) { case IFCOUNTER_IPACKETS: return (s->rx_bcast_frames + s->rx_mcast_frames + s->rx_ucast_frames); case IFCOUNTER_IERRORS: return (s->rx_err_frames); case IFCOUNTER_OPACKETS: return (s->tx_bcast_frames + s->tx_mcast_frames + s->tx_ucast_frames + s->tx_offload_frames); case IFCOUNTER_OERRORS: return (s->tx_drop_frames); case IFCOUNTER_IBYTES: return (s->rx_bcast_bytes + s->rx_mcast_bytes + s->rx_ucast_bytes); case IFCOUNTER_OBYTES: return (s->tx_bcast_bytes + s->tx_mcast_bytes + s->tx_ucast_bytes + s->tx_offload_bytes); case IFCOUNTER_IMCASTS: return (s->rx_mcast_frames); case IFCOUNTER_OMCASTS: return (s->tx_mcast_frames); case IFCOUNTER_OQDROPS: { uint64_t drops; drops = 0; if (vi->flags & VI_INIT_DONE) { int i; struct sge_txq *txq; for_each_txq(vi, i, txq) drops += counter_u64_fetch(txq->r->drops); } return (drops); } default: return (if_get_counter_default(ifp, c)); } } uint64_t cxgbe_get_counter(struct ifnet *ifp, ift_counter c) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct port_stats *s = &pi->stats; if (pi->nvi > 1 || sc->flags & IS_VF) return (vi_get_counter(ifp, c)); cxgbe_refresh_stats(sc, pi); switch (c) { case IFCOUNTER_IPACKETS: return (s->rx_frames); case IFCOUNTER_IERRORS: return (s->rx_jabber + s->rx_runt + s->rx_too_long + s->rx_fcs_err + s->rx_len_err); case IFCOUNTER_OPACKETS: return (s->tx_frames); case IFCOUNTER_OERRORS: return (s->tx_error_frames); case IFCOUNTER_IBYTES: return (s->rx_octets); case IFCOUNTER_OBYTES: return (s->tx_octets); case IFCOUNTER_IMCASTS: return (s->rx_mcast_frames); case IFCOUNTER_OMCASTS: return (s->tx_mcast_frames); case IFCOUNTER_IQDROPS: return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 + s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 + s->rx_trunc3 + pi->tnl_cong_drops); case IFCOUNTER_OQDROPS: { uint64_t drops; drops = s->tx_drop; if (vi->flags & VI_INIT_DONE) { int i; struct sge_txq *txq; for_each_txq(vi, i, txq) drops += counter_u64_fetch(txq->r->drops); } return (drops); } default: return (if_get_counter_default(ifp, c)); } } static int cxgbe_media_change(struct ifnet *ifp) { struct vi_info *vi = ifp->if_softc; device_printf(vi->dev, "%s unimplemented.\n", __func__); return (EOPNOTSUPP); } static void cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct ifmedia_entry *cur; struct link_config *lc = &pi->link_cfg; /* * If all the interfaces are administratively down the firmware does not * report transceiver changes. Refresh port info here so that ifconfig * displays accurate information at all times. */ if (begin_synchronized_op(pi->adapter, NULL, SLEEP_OK | INTR_OK, "t4med") == 0) { PORT_LOCK(pi); if (pi->up_vis == 0) { t4_update_port_info(pi); build_medialist(pi, &pi->media); } PORT_UNLOCK(pi); end_synchronized_op(pi->adapter, 0); } ifmr->ifm_status = IFM_AVALID; if (lc->link_ok == 0) return; ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active &= ~(IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE); if (lc->fc & PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; if (lc->fc & PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; /* active and current will differ iff current media is autoselect. */ cur = pi->media.ifm_cur; if (cur != NULL && IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO) return; ifmr->ifm_active = IFM_ETHER | IFM_FDX; if (lc->fc & PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; if (lc->fc & PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; switch (lc->speed) { case 10000: ifmr->ifm_active |= IFM_10G_T; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; case 100: ifmr->ifm_active |= IFM_100_TX; break; case 10: ifmr->ifm_active |= IFM_10_T; break; default: device_printf(vi->dev, "link up but speed unknown (%u)\n", lc->speed); } } static int vcxgbe_probe(device_t dev) { char buf[128]; struct vi_info *vi = device_get_softc(dev); snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id, vi - vi->pi->vi); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } static int alloc_extra_vi(struct adapter *sc, struct port_info *pi, struct vi_info *vi) { int func, index, rc; uint32_t param, val; ASSERT_SYNCHRONIZED_OP(sc); index = vi - pi->vi; MPASS(index > 0); /* This function deals with _extra_ VIs only */ KASSERT(index < nitems(vi_mac_funcs), ("%s: VI %s doesn't have a MAC func", __func__, device_get_nameunit(vi->dev))); func = vi_mac_funcs[index]; rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1, vi->hw_addr, &vi->rss_size, func, 0); if (rc < 0) { device_printf(vi->dev, "failed to allocate virtual interface %d" "for port %d: %d\n", index, pi->port_id, -rc); return (-rc); } vi->viid = rc; if (chip_id(sc) <= CHELSIO_T5) vi->smt_idx = (rc & 0x7f) << 1; else vi->smt_idx = (rc & 0x7f); if (vi->rss_size == 1) { /* * This VI didn't get a slice of the RSS table. Reduce the * number of VIs being created (hw.cxgbe.num_vis) or modify the * configuration file (nvi, rssnvi for this PF) if this is a * problem. */ device_printf(vi->dev, "RSS table not available.\n"); vi->rss_base = 0xffff; return (0); } param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) | V_FW_PARAMS_PARAM_YZ(vi->viid); rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc) vi->rss_base = 0xffff; else { MPASS((val >> 16) == vi->rss_size); vi->rss_base = val & 0xffff; } return (0); } static int vcxgbe_attach(device_t dev) { struct vi_info *vi; struct port_info *pi; struct adapter *sc; int rc; vi = device_get_softc(dev); pi = vi->pi; sc = pi->adapter; rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4via"); if (rc) return (rc); rc = alloc_extra_vi(sc, pi, vi); end_synchronized_op(sc, 0); if (rc) return (rc); rc = cxgbe_vi_attach(dev, vi); if (rc) { t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid); return (rc); } return (0); } static int vcxgbe_detach(device_t dev) { struct vi_info *vi; struct adapter *sc; vi = device_get_softc(dev); sc = vi->pi->adapter; doom_vi(sc, vi); cxgbe_vi_detach(vi); t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid); end_synchronized_op(sc, 0); return (0); } void t4_fatal_err(struct adapter *sc) { t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0); t4_intr_disable(sc); log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n", device_get_nameunit(sc->dev)); if (t4_panic_on_fatal_err) panic("panic requested on fatal error"); } void t4_add_adapter(struct adapter *sc) { sx_xlock(&t4_list_lock); SLIST_INSERT_HEAD(&t4_list, sc, link); sx_xunlock(&t4_list_lock); } int t4_map_bars_0_and_4(struct adapter *sc) { sc->regs_rid = PCIR_BAR(0); sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->regs_rid, RF_ACTIVE); if (sc->regs_res == NULL) { device_printf(sc->dev, "cannot map registers.\n"); return (ENXIO); } sc->bt = rman_get_bustag(sc->regs_res); sc->bh = rman_get_bushandle(sc->regs_res); sc->mmio_len = rman_get_size(sc->regs_res); setbit(&sc->doorbells, DOORBELL_KDB); sc->msix_rid = PCIR_BAR(4); sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->msix_rid, RF_ACTIVE); if (sc->msix_res == NULL) { device_printf(sc->dev, "cannot map MSI-X BAR.\n"); return (ENXIO); } return (0); } int t4_map_bar_2(struct adapter *sc) { /* * T4: only iWARP driver uses the userspace doorbells. There is no need * to map it if RDMA is disabled. */ if (is_t4(sc) && sc->rdmacaps == 0) return (0); sc->udbs_rid = PCIR_BAR(2); sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->udbs_rid, RF_ACTIVE); if (sc->udbs_res == NULL) { device_printf(sc->dev, "cannot map doorbell BAR.\n"); return (ENXIO); } sc->udbs_base = rman_get_virtual(sc->udbs_res); if (chip_id(sc) >= CHELSIO_T5) { setbit(&sc->doorbells, DOORBELL_UDB); #if defined(__i386__) || defined(__amd64__) if (t5_write_combine) { int rc, mode; /* * Enable write combining on BAR2. This is the * userspace doorbell BAR and is split into 128B * (UDBS_SEG_SIZE) doorbell regions, each associated * with an egress queue. The first 64B has the doorbell * and the second 64B can be used to submit a tx work * request with an implicit doorbell. */ rc = pmap_change_attr((vm_offset_t)sc->udbs_base, rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING); if (rc == 0) { clrbit(&sc->doorbells, DOORBELL_UDB); setbit(&sc->doorbells, DOORBELL_WCWR); setbit(&sc->doorbells, DOORBELL_UDBWC); } else { t5_write_combine = 0; device_printf(sc->dev, "couldn't enable write combining: %d\n", rc); } mode = is_t5(sc) ? V_STATMODE(0) : V_T6_STATMODE(0); t4_write_reg(sc, A_SGE_STAT_CFG, V_STATSOURCE_T5(7) | mode); } #else t5_write_combine = 0; #endif sc->iwt.wc_en = t5_write_combine; } return (0); } struct memwin_init { uint32_t base; uint32_t aperture; }; static const struct memwin_init t4_memwin[NUM_MEMWIN] = { { MEMWIN0_BASE, MEMWIN0_APERTURE }, { MEMWIN1_BASE, MEMWIN1_APERTURE }, { MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 } }; static const struct memwin_init t5_memwin[NUM_MEMWIN] = { { MEMWIN0_BASE, MEMWIN0_APERTURE }, { MEMWIN1_BASE, MEMWIN1_APERTURE }, { MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 }, }; static void setup_memwin(struct adapter *sc) { const struct memwin_init *mw_init; struct memwin *mw; int i; uint32_t bar0; if (is_t4(sc)) { /* * Read low 32b of bar0 indirectly via the hardware backdoor * mechanism. Works from within PCI passthrough environments * too, where rman_get_start() can return a different value. We * need to program the T4 memory window decoders with the actual * addresses that will be coming across the PCIe link. */ bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0)); bar0 &= (uint32_t) PCIM_BAR_MEM_BASE; mw_init = &t4_memwin[0]; } else { /* T5+ use the relative offset inside the PCIe BAR */ bar0 = 0; mw_init = &t5_memwin[0]; } for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) { rw_init(&mw->mw_lock, "memory window access"); mw->mw_base = mw_init->base; mw->mw_aperture = mw_init->aperture; mw->mw_curpos = 0; t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i), (mw->mw_base + bar0) | V_BIR(0) | V_WINDOW(ilog2(mw->mw_aperture) - 10)); rw_wlock(&mw->mw_lock); position_memwin(sc, i, 0); rw_wunlock(&mw->mw_lock); } /* flush */ t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2)); } /* * Positions the memory window at the given address in the card's address space. * There are some alignment requirements and the actual position may be at an * address prior to the requested address. mw->mw_curpos always has the actual * position of the window. */ static void position_memwin(struct adapter *sc, int idx, uint32_t addr) { struct memwin *mw; uint32_t pf; uint32_t reg; MPASS(idx >= 0 && idx < NUM_MEMWIN); mw = &sc->memwin[idx]; rw_assert(&mw->mw_lock, RA_WLOCKED); if (is_t4(sc)) { pf = 0; mw->mw_curpos = addr & ~0xf; /* start must be 16B aligned */ } else { pf = V_PFNUM(sc->pf); mw->mw_curpos = addr & ~0x7f; /* start must be 128B aligned */ } reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx); t4_write_reg(sc, reg, mw->mw_curpos | pf); t4_read_reg(sc, reg); /* flush */ } int rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val, int len, int rw) { struct memwin *mw; uint32_t mw_end, v; MPASS(idx >= 0 && idx < NUM_MEMWIN); /* Memory can only be accessed in naturally aligned 4 byte units */ if (addr & 3 || len & 3 || len <= 0) return (EINVAL); mw = &sc->memwin[idx]; while (len > 0) { rw_rlock(&mw->mw_lock); mw_end = mw->mw_curpos + mw->mw_aperture; if (addr >= mw_end || addr < mw->mw_curpos) { /* Will need to reposition the window */ if (!rw_try_upgrade(&mw->mw_lock)) { rw_runlock(&mw->mw_lock); rw_wlock(&mw->mw_lock); } rw_assert(&mw->mw_lock, RA_WLOCKED); position_memwin(sc, idx, addr); rw_downgrade(&mw->mw_lock); mw_end = mw->mw_curpos + mw->mw_aperture; } rw_assert(&mw->mw_lock, RA_RLOCKED); while (addr < mw_end && len > 0) { if (rw == 0) { v = t4_read_reg(sc, mw->mw_base + addr - mw->mw_curpos); *val++ = le32toh(v); } else { v = *val++; t4_write_reg(sc, mw->mw_base + addr - mw->mw_curpos, htole32(v)); } addr += 4; len -= 4; } rw_runlock(&mw->mw_lock); } return (0); } int alloc_atid_tab(struct tid_info *t, int flags) { int i; MPASS(t->natids > 0); MPASS(t->atid_tab == NULL); t->atid_tab = malloc(t->natids * sizeof(*t->atid_tab), M_CXGBE, M_ZERO | flags); if (t->atid_tab == NULL) return (ENOMEM); mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF); t->afree = t->atid_tab; t->atids_in_use = 0; for (i = 1; i < t->natids; i++) t->atid_tab[i - 1].next = &t->atid_tab[i]; t->atid_tab[t->natids - 1].next = NULL; return (0); } void free_atid_tab(struct tid_info *t) { KASSERT(t->atids_in_use == 0, ("%s: %d atids still in use.", __func__, t->atids_in_use)); if (mtx_initialized(&t->atid_lock)) mtx_destroy(&t->atid_lock); free(t->atid_tab, M_CXGBE); t->atid_tab = NULL; } int alloc_atid(struct adapter *sc, void *ctx) { struct tid_info *t = &sc->tids; int atid = -1; mtx_lock(&t->atid_lock); if (t->afree) { union aopen_entry *p = t->afree; atid = p - t->atid_tab; MPASS(atid <= M_TID_TID); t->afree = p->next; p->data = ctx; t->atids_in_use++; } mtx_unlock(&t->atid_lock); return (atid); } void * lookup_atid(struct adapter *sc, int atid) { struct tid_info *t = &sc->tids; return (t->atid_tab[atid].data); } void free_atid(struct adapter *sc, int atid) { struct tid_info *t = &sc->tids; union aopen_entry *p = &t->atid_tab[atid]; mtx_lock(&t->atid_lock); p->next = t->afree; t->afree = p; t->atids_in_use--; mtx_unlock(&t->atid_lock); } static void queue_tid_release(struct adapter *sc, int tid) { CXGBE_UNIMPLEMENTED("deferred tid release"); } void release_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq) { struct wrqe *wr; struct cpl_tid_release *req; wr = alloc_wrqe(sizeof(*req), ctrlq); if (wr == NULL) { queue_tid_release(sc, tid); /* defer */ return; } req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid); t4_wrq_tx(sc, wr); } static int t4_range_cmp(const void *a, const void *b) { return ((const struct t4_range *)a)->start - ((const struct t4_range *)b)->start; } /* * Verify that the memory range specified by the addr/len pair is valid within * the card's address space. */ static int validate_mem_range(struct adapter *sc, uint32_t addr, int len) { struct t4_range mem_ranges[4], *r, *next; uint32_t em, addr_len; int i, n, remaining; /* Memory can only be accessed in naturally aligned 4 byte units */ if (addr & 3 || len & 3 || len <= 0) return (EINVAL); /* Enabled memories */ em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); r = &mem_ranges[0]; n = 0; bzero(r, sizeof(mem_ranges)); if (em & F_EDRAM0_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR); r->size = G_EDRAM0_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EDRAM0_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (em & F_EDRAM1_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR); r->size = G_EDRAM1_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EDRAM1_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (em & F_EXT_MEM_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); r->size = G_EXT_MEM_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EXT_MEM_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); r->size = G_EXT_MEM1_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EXT_MEM1_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } MPASS(n <= nitems(mem_ranges)); if (n > 1) { /* Sort and merge the ranges. */ qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp); /* Start from index 0 and examine the next n - 1 entries. */ r = &mem_ranges[0]; for (remaining = n - 1; remaining > 0; remaining--, r++) { MPASS(r->size > 0); /* r is a valid entry. */ next = r + 1; MPASS(next->size > 0); /* and so is the next one. */ while (r->start + r->size >= next->start) { /* Merge the next one into the current entry. */ r->size = max(r->start + r->size, next->start + next->size) - r->start; n--; /* One fewer entry in total. */ if (--remaining == 0) goto done; /* short circuit */ next++; } if (next != r + 1) { /* * Some entries were merged into r and next * points to the first valid entry that couldn't * be merged. */ MPASS(next->size > 0); /* must be valid */ memcpy(r + 1, next, remaining * sizeof(*r)); #ifdef INVARIANTS /* * This so that the foo->size assertion in the * next iteration of the loop do the right * thing for entries that were pulled up and are * no longer valid. */ MPASS(n < nitems(mem_ranges)); bzero(&mem_ranges[n], (nitems(mem_ranges) - n) * sizeof(struct t4_range)); #endif } } done: /* Done merging the ranges. */ MPASS(n > 0); r = &mem_ranges[0]; for (i = 0; i < n; i++, r++) { if (addr >= r->start && addr + len <= r->start + r->size) return (0); } } return (EFAULT); } static int fwmtype_to_hwmtype(int mtype) { switch (mtype) { case FW_MEMTYPE_EDC0: return (MEM_EDC0); case FW_MEMTYPE_EDC1: return (MEM_EDC1); case FW_MEMTYPE_EXTMEM: return (MEM_MC0); case FW_MEMTYPE_EXTMEM1: return (MEM_MC1); default: panic("%s: cannot translate fw mtype %d.", __func__, mtype); } } /* * Verify that the memory range specified by the memtype/offset/len pair is * valid and lies entirely within the memtype specified. The global address of * the start of the range is returned in addr. */ static int validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, int len, uint32_t *addr) { uint32_t em, addr_len, maddr; /* Memory can only be accessed in naturally aligned 4 byte units */ if (off & 3 || len & 3 || len == 0) return (EINVAL); em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); switch (fwmtype_to_hwmtype(mtype)) { case MEM_EDC0: if (!(em & F_EDRAM0_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR); maddr = G_EDRAM0_BASE(addr_len) << 20; break; case MEM_EDC1: if (!(em & F_EDRAM1_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR); maddr = G_EDRAM1_BASE(addr_len) << 20; break; case MEM_MC: if (!(em & F_EXT_MEM_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); maddr = G_EXT_MEM_BASE(addr_len) << 20; break; case MEM_MC1: if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); maddr = G_EXT_MEM1_BASE(addr_len) << 20; break; default: return (EINVAL); } *addr = maddr + off; /* global address */ return (validate_mem_range(sc, *addr, len)); } static int fixup_devlog_params(struct adapter *sc) { struct devlog_params *dparams = &sc->params.devlog; int rc; rc = validate_mt_off_len(sc, dparams->memtype, dparams->start, dparams->size, &dparams->addr); return (rc); } static void update_nirq(struct intrs_and_queues *iaq, int nports) { int extra = T4_EXTRA_INTR; iaq->nirq = extra; iaq->nirq += nports * (iaq->nrxq + iaq->nofldrxq); iaq->nirq += nports * (iaq->num_vis - 1) * max(iaq->nrxq_vi, iaq->nnmrxq_vi); iaq->nirq += nports * (iaq->num_vis - 1) * iaq->nofldrxq_vi; } /* * Adjust requirements to fit the number of interrupts available. */ static void calculate_iaq(struct adapter *sc, struct intrs_and_queues *iaq, int itype, int navail) { int old_nirq; const int nports = sc->params.nports; MPASS(nports > 0); MPASS(navail > 0); bzero(iaq, sizeof(*iaq)); iaq->intr_type = itype; iaq->num_vis = t4_num_vis; iaq->ntxq = t4_ntxq; iaq->ntxq_vi = t4_ntxq_vi; iaq->nrxq = t4_nrxq; iaq->nrxq_vi = t4_nrxq_vi; #ifdef TCP_OFFLOAD if (is_offload(sc)) { iaq->nofldtxq = t4_nofldtxq; iaq->nofldtxq_vi = t4_nofldtxq_vi; iaq->nofldrxq = t4_nofldrxq; iaq->nofldrxq_vi = t4_nofldrxq_vi; } #endif #ifdef DEV_NETMAP iaq->nnmtxq_vi = t4_nnmtxq_vi; iaq->nnmrxq_vi = t4_nnmrxq_vi; #endif update_nirq(iaq, nports); if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) { /* * This is the normal case -- there are enough interrupts for * everything. */ goto done; } /* * If extra VIs have been configured try reducing their count and see if * that works. */ while (iaq->num_vis > 1) { iaq->num_vis--; update_nirq(iaq, nports); if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) { device_printf(sc->dev, "virtual interfaces per port " "reduced to %d from %d. nrxq=%u, nofldrxq=%u, " "nrxq_vi=%u nofldrxq_vi=%u, nnmrxq_vi=%u. " "itype %d, navail %u, nirq %d.\n", iaq->num_vis, t4_num_vis, iaq->nrxq, iaq->nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi, itype, navail, iaq->nirq); goto done; } } /* * Extra VIs will not be created. Log a message if they were requested. */ MPASS(iaq->num_vis == 1); iaq->ntxq_vi = iaq->nrxq_vi = 0; iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0; iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0; if (iaq->num_vis != t4_num_vis) { device_printf(sc->dev, "extra virtual interfaces disabled. " "nrxq=%u, nofldrxq=%u, nrxq_vi=%u nofldrxq_vi=%u, " "nnmrxq_vi=%u. itype %d, navail %u, nirq %d.\n", iaq->nrxq, iaq->nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi, itype, navail, iaq->nirq); } /* * Keep reducing the number of NIC rx queues to the next lower power of * 2 (for even RSS distribution) and halving the TOE rx queues and see * if that works. */ do { if (iaq->nrxq > 1) { do { iaq->nrxq--; } while (!powerof2(iaq->nrxq)); } if (iaq->nofldrxq > 1) iaq->nofldrxq >>= 1; old_nirq = iaq->nirq; update_nirq(iaq, nports); if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) { device_printf(sc->dev, "running with reduced number of " "rx queues because of shortage of interrupts. " "nrxq=%u, nofldrxq=%u. " "itype %d, navail %u, nirq %d.\n", iaq->nrxq, iaq->nofldrxq, itype, navail, iaq->nirq); goto done; } } while (old_nirq != iaq->nirq); /* One interrupt for everything. Ugh. */ device_printf(sc->dev, "running with minimal number of queues. " "itype %d, navail %u.\n", itype, navail); iaq->nirq = 1; MPASS(iaq->nrxq == 1); iaq->ntxq = 1; if (iaq->nofldrxq > 1) iaq->nofldtxq = 1; done: MPASS(iaq->num_vis > 0); if (iaq->num_vis > 1) { MPASS(iaq->nrxq_vi > 0); MPASS(iaq->ntxq_vi > 0); } MPASS(iaq->nirq > 0); MPASS(iaq->nrxq > 0); MPASS(iaq->ntxq > 0); if (itype == INTR_MSI) { MPASS(powerof2(iaq->nirq)); } } static int cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq) { int rc, itype, navail, nalloc; for (itype = INTR_MSIX; itype; itype >>= 1) { if ((itype & t4_intr_types) == 0) continue; /* not allowed */ if (itype == INTR_MSIX) navail = pci_msix_count(sc->dev); else if (itype == INTR_MSI) navail = pci_msi_count(sc->dev); else navail = 1; restart: if (navail == 0) continue; calculate_iaq(sc, iaq, itype, navail); nalloc = iaq->nirq; rc = 0; if (itype == INTR_MSIX) rc = pci_alloc_msix(sc->dev, &nalloc); else if (itype == INTR_MSI) rc = pci_alloc_msi(sc->dev, &nalloc); if (rc == 0 && nalloc > 0) { if (nalloc == iaq->nirq) return (0); /* * Didn't get the number requested. Use whatever number * the kernel is willing to allocate. */ device_printf(sc->dev, "fewer vectors than requested, " "type=%d, req=%d, rcvd=%d; will downshift req.\n", itype, iaq->nirq, nalloc); pci_release_msi(sc->dev); navail = nalloc; goto restart; } device_printf(sc->dev, "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n", itype, rc, iaq->nirq, nalloc); } device_printf(sc->dev, "failed to find a usable interrupt type. " "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types, pci_msix_count(sc->dev), pci_msi_count(sc->dev)); return (ENXIO); } #define FW_VERSION(chip) ( \ V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \ V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \ V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \ V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD)) #define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf) struct fw_info { uint8_t chip; char *kld_name; char *fw_mod_name; struct fw_hdr fw_hdr; /* XXX: waste of space, need a sparse struct */ } fw_info[] = { { .chip = CHELSIO_T4, .kld_name = "t4fw_cfg", .fw_mod_name = "t4fw", .fw_hdr = { .chip = FW_HDR_CHIP_T4, .fw_ver = htobe32_const(FW_VERSION(T4)), .intfver_nic = FW_INTFVER(T4, NIC), .intfver_vnic = FW_INTFVER(T4, VNIC), .intfver_ofld = FW_INTFVER(T4, OFLD), .intfver_ri = FW_INTFVER(T4, RI), .intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T4, ISCSI), .intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU), .intfver_fcoe = FW_INTFVER(T4, FCOE), }, }, { .chip = CHELSIO_T5, .kld_name = "t5fw_cfg", .fw_mod_name = "t5fw", .fw_hdr = { .chip = FW_HDR_CHIP_T5, .fw_ver = htobe32_const(FW_VERSION(T5)), .intfver_nic = FW_INTFVER(T5, NIC), .intfver_vnic = FW_INTFVER(T5, VNIC), .intfver_ofld = FW_INTFVER(T5, OFLD), .intfver_ri = FW_INTFVER(T5, RI), .intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T5, ISCSI), .intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU), .intfver_fcoe = FW_INTFVER(T5, FCOE), }, }, { .chip = CHELSIO_T6, .kld_name = "t6fw_cfg", .fw_mod_name = "t6fw", .fw_hdr = { .chip = FW_HDR_CHIP_T6, .fw_ver = htobe32_const(FW_VERSION(T6)), .intfver_nic = FW_INTFVER(T6, NIC), .intfver_vnic = FW_INTFVER(T6, VNIC), .intfver_ofld = FW_INTFVER(T6, OFLD), .intfver_ri = FW_INTFVER(T6, RI), .intfver_iscsipdu = FW_INTFVER(T6, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T6, ISCSI), .intfver_fcoepdu = FW_INTFVER(T6, FCOEPDU), .intfver_fcoe = FW_INTFVER(T6, FCOE), }, } }; static struct fw_info * find_fw_info(int chip) { int i; for (i = 0; i < nitems(fw_info); i++) { if (fw_info[i].chip == chip) return (&fw_info[i]); } return (NULL); } /* * Is the given firmware API compatible with the one the driver was compiled * with? */ static int fw_compatible(const struct fw_hdr *hdr1, const struct fw_hdr *hdr2) { /* short circuit if it's the exact same firmware version */ if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver) return (1); /* * XXX: Is this too conservative? Perhaps I should limit this to the * features that are supported in the driver. */ #define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x) if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) && SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) && SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe)) return (1); #undef SAME_INTF return (0); } /* * The firmware in the KLD is usable, but should it be installed? This routine * explains itself in detail if it indicates the KLD firmware should be * installed. */ static int should_install_kld_fw(struct adapter *sc, int card_fw_usable, int k, int c) { const char *reason; if (!card_fw_usable) { reason = "incompatible or unusable"; goto install; } if (k > c) { reason = "older than the version bundled with this driver"; goto install; } if (t4_fw_install == 2 && k != c) { reason = "different than the version bundled with this driver"; goto install; } return (0); install: if (t4_fw_install == 0) { device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, " "but the driver is prohibited from installing a different " "firmware on the card.\n", G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason); return (0); } device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, " "installing firmware %u.%u.%u.%u on card.\n", G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason, G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k), G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k)); return (1); } /* * Establish contact with the firmware and determine if we are the master driver * or not, and whether we are responsible for chip initialization. */ static int prep_firmware(struct adapter *sc) { const struct firmware *fw = NULL, *default_cfg; int rc, pf, card_fw_usable, kld_fw_usable, need_fw_reset = 1; enum dev_state state; struct fw_info *fw_info; struct fw_hdr *card_fw; /* fw on the card */ const struct fw_hdr *kld_fw; /* fw in the KLD */ const struct fw_hdr *drv_fw; /* fw header the driver was compiled against */ /* This is the firmware whose headers the driver was compiled against */ fw_info = find_fw_info(chip_id(sc)); if (fw_info == NULL) { device_printf(sc->dev, "unable to look up firmware information for chip %d.\n", chip_id(sc)); return (EINVAL); } drv_fw = &fw_info->fw_hdr; /* * The firmware KLD contains many modules. The KLD name is also the * name of the module that contains the default config file. */ default_cfg = firmware_get(fw_info->kld_name); /* This is the firmware in the KLD */ fw = firmware_get(fw_info->fw_mod_name); if (fw != NULL) { kld_fw = (const void *)fw->data; kld_fw_usable = fw_compatible(drv_fw, kld_fw); } else { kld_fw = NULL; kld_fw_usable = 0; } /* Read the header of the firmware on the card */ card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_read_flash(sc, FLASH_FW_START, sizeof (*card_fw) / sizeof (uint32_t), (uint32_t *)card_fw, 1); if (rc == 0) { card_fw_usable = fw_compatible(drv_fw, (const void*)card_fw); if (card_fw->fw_ver == be32toh(0xffffffff)) { uint32_t d = be32toh(kld_fw->fw_ver); if (!kld_fw_usable) { device_printf(sc->dev, "no firmware on the card and no usable " "firmware bundled with the driver.\n"); rc = EIO; goto done; } else if (t4_fw_install == 0) { device_printf(sc->dev, "no firmware on the card and the driver " "is prohibited from installing new " "firmware.\n"); rc = EIO; goto done; } device_printf(sc->dev, "no firmware on the card, " "installing firmware %d.%d.%d.%d\n", G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d), G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d)); rc = t4_fw_forceinstall(sc, fw->data, fw->datasize); if (rc < 0) { rc = -rc; device_printf(sc->dev, "firmware install failed: %d.\n", rc); goto done; } memcpy(card_fw, kld_fw, sizeof(*card_fw)); card_fw_usable = 1; need_fw_reset = 0; } } else { device_printf(sc->dev, "Unable to read card's firmware header: %d\n", rc); card_fw_usable = 0; } /* Contact firmware. */ rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state); if (rc < 0 || state == DEV_STATE_ERR) { rc = -rc; device_printf(sc->dev, "failed to connect to the firmware: %d, %d.\n", rc, state); goto done; } pf = rc; if (pf == sc->mbox) sc->flags |= MASTER_PF; else if (state == DEV_STATE_UNINIT) { /* * We didn't get to be the master so we definitely won't be * configuring the chip. It's a bug if someone else hasn't * configured it already. */ device_printf(sc->dev, "couldn't be master(%d), " "device not already initialized either(%d).\n", rc, state); rc = EPROTO; goto done; } if (card_fw_usable && card_fw->fw_ver == drv_fw->fw_ver && (!kld_fw_usable || kld_fw->fw_ver == drv_fw->fw_ver)) { /* * Common case: the firmware on the card is an exact match and * the KLD is an exact match too, or the KLD is * absent/incompatible. Note that t4_fw_install = 2 is ignored * here -- use cxgbetool loadfw if you want to reinstall the * same firmware as the one on the card. */ } else if (kld_fw_usable && state == DEV_STATE_UNINIT && should_install_kld_fw(sc, card_fw_usable, be32toh(kld_fw->fw_ver), be32toh(card_fw->fw_ver))) { rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0); if (rc != 0) { device_printf(sc->dev, "failed to install firmware: %d\n", rc); goto done; } /* Installed successfully, update the cached header too. */ memcpy(card_fw, kld_fw, sizeof(*card_fw)); card_fw_usable = 1; need_fw_reset = 0; /* already reset as part of load_fw */ } if (!card_fw_usable) { uint32_t d, c, k; d = ntohl(drv_fw->fw_ver); c = ntohl(card_fw->fw_ver); k = kld_fw ? ntohl(kld_fw->fw_ver) : 0; device_printf(sc->dev, "Cannot find a usable firmware: " "fw_install %d, chip state %d, " "driver compiled with %d.%d.%d.%d, " "card has %d.%d.%d.%d, KLD has %d.%d.%d.%d\n", t4_fw_install, state, G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d), G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d), G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k), G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k)); rc = EINVAL; goto done; } /* Reset device */ if (need_fw_reset && (rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST)) != 0) { device_printf(sc->dev, "firmware reset failed: %d.\n", rc); if (rc != ETIMEDOUT && rc != EIO) t4_fw_bye(sc, sc->mbox); goto done; } sc->flags |= FW_OK; rc = get_params__pre_init(sc); if (rc != 0) goto done; /* error message displayed already */ /* Partition adapter resources as specified in the config file. */ if (state == DEV_STATE_UNINIT) { KASSERT(sc->flags & MASTER_PF, ("%s: trying to change chip settings when not master.", __func__)); rc = partition_resources(sc, default_cfg, fw_info->kld_name); if (rc != 0) goto done; /* error message displayed already */ t4_tweak_chip_settings(sc); /* get basic stuff going */ rc = -t4_fw_initialize(sc, sc->mbox); if (rc != 0) { device_printf(sc->dev, "fw init failed: %d.\n", rc); goto done; } } else { snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", pf); sc->cfcsum = 0; } done: free(card_fw, M_CXGBE); if (fw != NULL) firmware_put(fw, FIRMWARE_UNLOAD); if (default_cfg != NULL) firmware_put(default_cfg, FIRMWARE_UNLOAD); return (rc); } #define FW_PARAM_DEV(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param)) #define FW_PARAM_PFVF(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param)) /* * Partition chip resources for use between various PFs, VFs, etc. */ static int partition_resources(struct adapter *sc, const struct firmware *default_cfg, const char *name_prefix) { const struct firmware *cfg = NULL; int rc = 0; struct fw_caps_config_cmd caps; uint32_t mtype, moff, finicsum, cfcsum; /* * Figure out what configuration file to use. Pick the default config * file for the card if the user hasn't specified one explicitly. */ snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", t4_cfg_file); if (strncmp(t4_cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) { /* Card specific overrides go here. */ if (pci_get_device(sc->dev) == 0x440a) snprintf(sc->cfg_file, sizeof(sc->cfg_file), UWIRE_CF); if (is_fpga(sc)) snprintf(sc->cfg_file, sizeof(sc->cfg_file), FPGA_CF); } /* * We need to load another module if the profile is anything except * "default" or "flash". */ if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) != 0 && strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) { char s[32]; snprintf(s, sizeof(s), "%s_%s", name_prefix, sc->cfg_file); cfg = firmware_get(s); if (cfg == NULL) { if (default_cfg != NULL) { device_printf(sc->dev, "unable to load module \"%s\" for " "configuration profile \"%s\", will use " "the default config file instead.\n", s, sc->cfg_file); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", DEFAULT_CF); } else { device_printf(sc->dev, "unable to load module \"%s\" for " "configuration profile \"%s\", will use " "the config file on the card's flash " "instead.\n", s, sc->cfg_file); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF); } } } if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) == 0 && default_cfg == NULL) { device_printf(sc->dev, "default config file not available, will use the config " "file on the card's flash instead.\n"); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF); } if (strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) { u_int cflen; const uint32_t *cfdata; uint32_t param, val, addr; KASSERT(cfg != NULL || default_cfg != NULL, ("%s: no config to upload", __func__)); /* * Ask the firmware where it wants us to upload the config file. */ param = FW_PARAM_DEV(CF); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc != 0) { /* No support for config file? Shouldn't happen. */ device_printf(sc->dev, "failed to query config file location: %d.\n", rc); goto done; } mtype = G_FW_PARAMS_PARAM_Y(val); moff = G_FW_PARAMS_PARAM_Z(val) << 16; /* * XXX: sheer laziness. We deliberately added 4 bytes of * useless stuffing/comments at the end of the config file so * it's ok to simply throw away the last remaining bytes when * the config file is not an exact multiple of 4. This also * helps with the validate_mt_off_len check. */ if (cfg != NULL) { cflen = cfg->datasize & ~3; cfdata = cfg->data; } else { cflen = default_cfg->datasize & ~3; cfdata = default_cfg->data; } if (cflen > FLASH_CFG_MAX_SIZE) { device_printf(sc->dev, "config file too long (%d, max allowed is %d). " "Will try to use the config on the card, if any.\n", cflen, FLASH_CFG_MAX_SIZE); goto use_config_on_flash; } rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr); if (rc != 0) { device_printf(sc->dev, "%s: addr (%d/0x%x) or len %d is not valid: %d. " "Will try to use the config on the card, if any.\n", __func__, mtype, moff, cflen, rc); goto use_config_on_flash; } write_via_memwin(sc, 2, addr, cfdata, cflen); } else { use_config_on_flash: mtype = FW_MEMTYPE_FLASH; moff = t4_flash_cfg_addr(sc); } bzero(&caps, sizeof(caps)); caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID | V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) | V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) | FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); if (rc != 0) { device_printf(sc->dev, "failed to pre-process config file: %d " "(mtype %d, moff 0x%x).\n", rc, mtype, moff); goto done; } finicsum = be32toh(caps.finicsum); cfcsum = be32toh(caps.cfcsum); if (finicsum != cfcsum) { device_printf(sc->dev, "WARNING: config file checksum mismatch: %08x %08x\n", finicsum, cfcsum); } sc->cfcsum = cfcsum; #define LIMIT_CAPS(x) do { \ caps.x &= htobe16(t4_##x##_allowed); \ } while (0) /* * Let the firmware know what features will (not) be used so it can tune * things accordingly. */ LIMIT_CAPS(nbmcaps); LIMIT_CAPS(linkcaps); LIMIT_CAPS(switchcaps); LIMIT_CAPS(niccaps); LIMIT_CAPS(toecaps); LIMIT_CAPS(rdmacaps); LIMIT_CAPS(cryptocaps); LIMIT_CAPS(iscsicaps); LIMIT_CAPS(fcoecaps); #undef LIMIT_CAPS + if (caps.niccaps & htobe16(FW_CAPS_CONFIG_NIC_HASHFILTER)) { + /* + * TOE and hashfilters are mutually exclusive. It is a config + * file or firmware bug if both are reported as available. Try + * to cope with the situation in non-debug builds by disabling + * TOE. + */ + MPASS(caps.toecaps == 0); + + caps.toecaps = 0; + caps.rdmacaps = 0; + caps.iscsicaps = 0; + } + caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE); caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL); if (rc != 0) { device_printf(sc->dev, "failed to process config file: %d.\n", rc); } done: if (cfg != NULL) firmware_put(cfg, FIRMWARE_UNLOAD); return (rc); } /* * Retrieve parameters that are needed (or nice to have) very early. */ static int get_params__pre_init(struct adapter *sc) { int rc; uint32_t param[2], val[2]; t4_get_version_info(sc); snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers), G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers)); snprintf(sc->bs_version, sizeof(sc->bs_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.bs_vers), G_FW_HDR_FW_VER_MINOR(sc->params.bs_vers), G_FW_HDR_FW_VER_MICRO(sc->params.bs_vers), G_FW_HDR_FW_VER_BUILD(sc->params.bs_vers)); snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers), G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers), G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers), G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers)); snprintf(sc->er_version, sizeof(sc->er_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.er_vers), G_FW_HDR_FW_VER_MINOR(sc->params.er_vers), G_FW_HDR_FW_VER_MICRO(sc->params.er_vers), G_FW_HDR_FW_VER_BUILD(sc->params.er_vers)); param[0] = FW_PARAM_DEV(PORTVEC); param[1] = FW_PARAM_DEV(CCLK); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (pre_init): %d.\n", rc); return (rc); } sc->params.portvec = val[0]; sc->params.nports = bitcount32(val[0]); sc->params.vpd.cclk = val[1]; /* Read device log parameters. */ rc = -t4_init_devlog_params(sc, 1); if (rc == 0) fixup_devlog_params(sc); else { device_printf(sc->dev, "failed to get devlog parameters: %d.\n", rc); rc = 0; /* devlog isn't critical for device operation */ } return (rc); } /* * Retrieve various parameters that are of interest to the driver. The device * has been initialized by the firmware at this point. */ static int get_params__post_init(struct adapter *sc) { int rc; uint32_t param[7], val[7]; struct fw_caps_config_cmd caps; param[0] = FW_PARAM_PFVF(IQFLINT_START); param[1] = FW_PARAM_PFVF(EQ_START); param[2] = FW_PARAM_PFVF(FILTER_START); param[3] = FW_PARAM_PFVF(FILTER_END); param[4] = FW_PARAM_PFVF(L2T_START); param[5] = FW_PARAM_PFVF(L2T_END); param[6] = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) | V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_VDD); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 7, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (post_init): %d.\n", rc); return (rc); } sc->sge.iq_start = val[0]; sc->sge.eq_start = val[1]; sc->tids.ftid_base = val[2]; sc->tids.nftids = val[3] - val[2] + 1; sc->params.ftid_min = val[2]; sc->params.ftid_max = val[3]; sc->vres.l2t.start = val[4]; sc->vres.l2t.size = val[5] - val[4] + 1; KASSERT(sc->vres.l2t.size <= L2T_SIZE, ("%s: L2 table size (%u) larger than expected (%u)", __func__, sc->vres.l2t.size, L2T_SIZE)); sc->params.core_vdd = val[6]; /* * MPSBGMAP is queried separately because only recent firmwares support * it as a parameter and we don't want the compound query above to fail * on older firmwares. */ param[0] = FW_PARAM_DEV(MPSBGMAP); val[0] = 0; rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val); if (rc == 0) sc->params.mps_bg_map = val[0]; else sc->params.mps_bg_map = 0; /* get capabilites */ bzero(&caps, sizeof(caps)); caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); if (rc != 0) { device_printf(sc->dev, "failed to get card capabilities: %d.\n", rc); return (rc); } #define READ_CAPS(x) do { \ sc->x = htobe16(caps.x); \ } while (0) READ_CAPS(nbmcaps); READ_CAPS(linkcaps); READ_CAPS(switchcaps); READ_CAPS(niccaps); READ_CAPS(toecaps); READ_CAPS(rdmacaps); READ_CAPS(cryptocaps); READ_CAPS(iscsicaps); READ_CAPS(fcoecaps); - /* - * The firmware attempts memfree TOE configuration for -SO cards and - * will report toecaps=0 if it runs out of resources (this depends on - * the config file). It may not report 0 for other capabilities - * dependent on the TOE in this case. Set them to 0 here so that the - * driver doesn't bother tracking resources that will never be used. - */ - if (sc->toecaps == 0) { - sc->iscsicaps = 0; - sc->rdmacaps = 0; - } + if (sc->niccaps & FW_CAPS_CONFIG_NIC_HASHFILTER) { + MPASS(chip_id(sc) > CHELSIO_T4); + MPASS(sc->toecaps == 0); + sc->toecaps = 0; + param[0] = FW_PARAM_DEV(NTID); + rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); + if (rc != 0) { + device_printf(sc->dev, + "failed to query HASHFILTER parameters: %d.\n", rc); + return (rc); + } + sc->tids.ntids = val[0]; + sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS); + sc->params.hash_filter = 1; + } if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) { param[0] = FW_PARAM_PFVF(ETHOFLD_START); param[1] = FW_PARAM_PFVF(ETHOFLD_END); param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query NIC parameters: %d.\n", rc); return (rc); } sc->tids.etid_base = val[0]; sc->params.etid_min = val[0]; sc->tids.netids = val[1] - val[0] + 1; sc->params.netids = sc->tids.netids; sc->params.eo_wr_cred = val[2]; sc->params.ethoffload = 1; } - if (sc->toecaps) { /* query offload-related parameters */ param[0] = FW_PARAM_DEV(NTID); param[1] = FW_PARAM_PFVF(SERVER_START); param[2] = FW_PARAM_PFVF(SERVER_END); param[3] = FW_PARAM_PFVF(TDDP_START); param[4] = FW_PARAM_PFVF(TDDP_END); param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query TOE parameters: %d.\n", rc); return (rc); } sc->tids.ntids = val[0]; sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS); sc->tids.stid_base = val[1]; sc->tids.nstids = val[2] - val[1] + 1; sc->vres.ddp.start = val[3]; sc->vres.ddp.size = val[4] - val[3] + 1; sc->params.ofldq_wr_cred = val[5]; sc->params.offload = 1; + } else { + /* + * The firmware attempts memfree TOE configuration for -SO cards + * and will report toecaps=0 if it runs out of resources (this + * depends on the config file). It may not report 0 for other + * capabilities dependent on the TOE in this case. Set them to + * 0 here so that the driver doesn't bother tracking resources + * that will never be used. + */ + sc->iscsicaps = 0; + sc->rdmacaps = 0; } if (sc->rdmacaps) { param[0] = FW_PARAM_PFVF(STAG_START); param[1] = FW_PARAM_PFVF(STAG_END); param[2] = FW_PARAM_PFVF(RQ_START); param[3] = FW_PARAM_PFVF(RQ_END); param[4] = FW_PARAM_PFVF(PBL_START); param[5] = FW_PARAM_PFVF(PBL_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(1): %d.\n", rc); return (rc); } sc->vres.stag.start = val[0]; sc->vres.stag.size = val[1] - val[0] + 1; sc->vres.rq.start = val[2]; sc->vres.rq.size = val[3] - val[2] + 1; sc->vres.pbl.start = val[4]; sc->vres.pbl.size = val[5] - val[4] + 1; param[0] = FW_PARAM_PFVF(SQRQ_START); param[1] = FW_PARAM_PFVF(SQRQ_END); param[2] = FW_PARAM_PFVF(CQ_START); param[3] = FW_PARAM_PFVF(CQ_END); param[4] = FW_PARAM_PFVF(OCQ_START); param[5] = FW_PARAM_PFVF(OCQ_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(2): %d.\n", rc); return (rc); } sc->vres.qp.start = val[0]; sc->vres.qp.size = val[1] - val[0] + 1; sc->vres.cq.start = val[2]; sc->vres.cq.size = val[3] - val[2] + 1; sc->vres.ocq.start = val[4]; sc->vres.ocq.size = val[5] - val[4] + 1; param[0] = FW_PARAM_PFVF(SRQ_START); param[1] = FW_PARAM_PFVF(SRQ_END); param[2] = FW_PARAM_DEV(MAXORDIRD_QP); param[3] = FW_PARAM_DEV(MAXIRD_ADAPTER); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(3): %d.\n", rc); return (rc); } sc->vres.srq.start = val[0]; sc->vres.srq.size = val[1] - val[0] + 1; sc->params.max_ordird_qp = val[2]; sc->params.max_ird_adapter = val[3]; } if (sc->iscsicaps) { param[0] = FW_PARAM_PFVF(ISCSI_START); param[1] = FW_PARAM_PFVF(ISCSI_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query iSCSI parameters: %d.\n", rc); return (rc); } sc->vres.iscsi.start = val[0]; sc->vres.iscsi.size = val[1] - val[0] + 1; } if (sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS) { param[0] = FW_PARAM_PFVF(TLS_START); param[1] = FW_PARAM_PFVF(TLS_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query TLS parameters: %d.\n", rc); return (rc); } sc->vres.key.start = val[0]; sc->vres.key.size = val[1] - val[0] + 1; } t4_init_sge_params(sc); /* * We've got the params we wanted to query via the firmware. Now grab * some others directly from the chip. */ rc = t4_read_chip_settings(sc); return (rc); } static int set_params__post_init(struct adapter *sc) { uint32_t param, val; #ifdef TCP_OFFLOAD int i, v, shift; #endif /* ask for encapsulated CPLs */ param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP); val = 1; (void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); #ifdef TCP_OFFLOAD /* * Override the TOE timers with user provided tunables. This is not the * recommended way to change the timers (the firmware config file is) so * these tunables are not documented. * * All the timer tunables are in microseconds. */ if (t4_toe_keepalive_idle != 0) { v = us_to_tcp_ticks(sc, t4_toe_keepalive_idle); v &= M_KEEPALIVEIDLE; t4_set_reg_field(sc, A_TP_KEEP_IDLE, V_KEEPALIVEIDLE(M_KEEPALIVEIDLE), V_KEEPALIVEIDLE(v)); } if (t4_toe_keepalive_interval != 0) { v = us_to_tcp_ticks(sc, t4_toe_keepalive_interval); v &= M_KEEPALIVEINTVL; t4_set_reg_field(sc, A_TP_KEEP_INTVL, V_KEEPALIVEINTVL(M_KEEPALIVEINTVL), V_KEEPALIVEINTVL(v)); } if (t4_toe_keepalive_count != 0) { v = t4_toe_keepalive_count & M_KEEPALIVEMAXR2; t4_set_reg_field(sc, A_TP_SHIFT_CNT, V_KEEPALIVEMAXR1(M_KEEPALIVEMAXR1) | V_KEEPALIVEMAXR2(M_KEEPALIVEMAXR2), V_KEEPALIVEMAXR1(1) | V_KEEPALIVEMAXR2(v)); } if (t4_toe_rexmt_min != 0) { v = us_to_tcp_ticks(sc, t4_toe_rexmt_min); v &= M_RXTMIN; t4_set_reg_field(sc, A_TP_RXT_MIN, V_RXTMIN(M_RXTMIN), V_RXTMIN(v)); } if (t4_toe_rexmt_max != 0) { v = us_to_tcp_ticks(sc, t4_toe_rexmt_max); v &= M_RXTMAX; t4_set_reg_field(sc, A_TP_RXT_MAX, V_RXTMAX(M_RXTMAX), V_RXTMAX(v)); } if (t4_toe_rexmt_count != 0) { v = t4_toe_rexmt_count & M_RXTSHIFTMAXR2; t4_set_reg_field(sc, A_TP_SHIFT_CNT, V_RXTSHIFTMAXR1(M_RXTSHIFTMAXR1) | V_RXTSHIFTMAXR2(M_RXTSHIFTMAXR2), V_RXTSHIFTMAXR1(1) | V_RXTSHIFTMAXR2(v)); } for (i = 0; i < nitems(t4_toe_rexmt_backoff); i++) { if (t4_toe_rexmt_backoff[i] != -1) { v = t4_toe_rexmt_backoff[i] & M_TIMERBACKOFFINDEX0; shift = (i & 3) << 3; t4_set_reg_field(sc, A_TP_TCP_BACKOFF_REG0 + (i & ~3), M_TIMERBACKOFFINDEX0 << shift, v << shift); } } #endif return (0); } #undef FW_PARAM_PFVF #undef FW_PARAM_DEV static void t4_set_desc(struct adapter *sc) { char buf[128]; struct adapter_params *p = &sc->params; snprintf(buf, sizeof(buf), "Chelsio %s", p->vpd.id); device_set_desc_copy(sc->dev, buf); } static void build_medialist(struct port_info *pi, struct ifmedia *media) { int m; PORT_LOCK_ASSERT_OWNED(pi); ifmedia_removeall(media); /* * XXX: Would it be better to ifmedia_add all 4 combinations of pause * settings for every speed instead of just txpause|rxpause? ifconfig * media display looks much better if autoselect is the only case where * ifm_current is different from ifm_active. If the user picks anything * except txpause|rxpause the display is ugly. */ m = IFM_ETHER | IFM_FDX | IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE; switch(pi->port_type) { case FW_PORT_TYPE_BT_XFI: case FW_PORT_TYPE_BT_XAUI: ifmedia_add(media, m | IFM_10G_T, 0, NULL); /* fall through */ case FW_PORT_TYPE_BT_SGMII: ifmedia_add(media, m | IFM_1000_T, 0, NULL); ifmedia_add(media, m | IFM_100_TX, 0, NULL); ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(media, IFM_ETHER | IFM_AUTO); break; case FW_PORT_TYPE_CX4: ifmedia_add(media, m | IFM_10G_CX4, 0, NULL); ifmedia_set(media, m | IFM_10G_CX4); break; case FW_PORT_TYPE_QSFP_10G: case FW_PORT_TYPE_SFP: case FW_PORT_TYPE_FIBER_XFI: case FW_PORT_TYPE_FIBER_XAUI: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_10G_LR, 0, NULL); ifmedia_set(media, m | IFM_10G_LR); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_10G_SR, 0, NULL); ifmedia_set(media, m | IFM_10G_SR); break; case FW_PORT_MOD_TYPE_LRM: ifmedia_add(media, m | IFM_10G_LRM, 0, NULL); ifmedia_set(media, m | IFM_10G_LRM); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_10G_TWINAX, 0, NULL); ifmedia_set(media, m | IFM_10G_TWINAX); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; case FW_PORT_MOD_TYPE_NA: case FW_PORT_MOD_TYPE_ER: default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; case FW_PORT_TYPE_CR_QSFP: case FW_PORT_TYPE_SFP28: case FW_PORT_TYPE_KR_SFP28: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_25G_SR, 0, NULL); ifmedia_set(media, m | IFM_25G_SR); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_25G_CR, 0, NULL); ifmedia_set(media, m | IFM_25G_CR); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; case FW_PORT_TYPE_QSFP: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_40G_LR4, 0, NULL); ifmedia_set(media, m | IFM_40G_LR4); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_40G_SR4, 0, NULL); ifmedia_set(media, m | IFM_40G_SR4); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_40G_CR4, 0, NULL); ifmedia_set(media, m | IFM_40G_CR4); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; case FW_PORT_TYPE_KR4_100G: case FW_PORT_TYPE_CR4_QSFP: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_100G_LR4, 0, NULL); ifmedia_set(media, m | IFM_100G_LR4); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_100G_SR4, 0, NULL); ifmedia_set(media, m | IFM_100G_SR4); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_100G_CR4, 0, NULL); ifmedia_set(media, m | IFM_100G_CR4); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } } /* * Update all the requested_* fields in the link config and then send a mailbox * command to apply the settings. */ static void init_l1cfg(struct port_info *pi) { struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc; ASSERT_SYNCHRONIZED_OP(sc); lc->requested_speed = port_top_speed(pi); /* in Gbps */ if (t4_autoneg != 0 && lc->supported & FW_PORT_CAP_ANEG) { lc->requested_aneg = AUTONEG_ENABLE; } else { lc->requested_aneg = AUTONEG_DISABLE; } lc->requested_fc = t4_pause_settings & (PAUSE_TX | PAUSE_RX); if (t4_fec != -1) { lc->requested_fec = t4_fec & (FEC_RS | FEC_BASER_RS | FEC_RESERVED); } else { /* Use the suggested value provided by the firmware in acaps */ if (lc->advertising & FW_PORT_CAP_FEC_RS) lc->requested_fec = FEC_RS; else if (lc->advertising & FW_PORT_CAP_FEC_BASER_RS) lc->requested_fec = FEC_BASER_RS; else lc->requested_fec = 0; } rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); if (rc != 0) { device_printf(pi->dev, "l1cfg failed: %d\n", rc); } else { lc->fc = lc->requested_fc; lc->fec = lc->requested_fec; } } #define FW_MAC_EXACT_CHUNK 7 /* * Program the port's XGMAC based on parameters in ifnet. The caller also * indicates which parameters should be programmed (the rest are left alone). */ int update_mac_settings(struct ifnet *ifp, int flags) { int rc = 0; struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1; ASSERT_SYNCHRONIZED_OP(sc); KASSERT(flags, ("%s: not told what to update.", __func__)); if (flags & XGMAC_MTU) mtu = ifp->if_mtu; if (flags & XGMAC_PROMISC) promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0; if (flags & XGMAC_ALLMULTI) allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0; if (flags & XGMAC_VLANEX) vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0; if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) { rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc, allmulti, 1, vlanex, false); if (rc) { if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags, rc); return (rc); } } if (flags & XGMAC_UCADDR) { uint8_t ucaddr[ETHER_ADDR_LEN]; bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr)); rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt, ucaddr, true, true); if (rc < 0) { rc = -rc; if_printf(ifp, "change_mac failed: %d\n", rc); return (rc); } else { vi->xact_addr_filt = rc; rc = 0; } } if (flags & XGMAC_MCADDRS) { const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK]; int del = 1; uint64_t hash = 0; struct ifmultiaddr *ifma; int i = 0, j; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mcaddr[i] = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); MPASS(ETHER_IS_MULTICAST(mcaddr[i])); i++; if (i == FW_MAC_EXACT_CHUNK) { rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; for (j = 0; j < i; j++) { if_printf(ifp, "failed to add mc address" " %02x:%02x:%02x:" "%02x:%02x:%02x rc=%d\n", mcaddr[j][0], mcaddr[j][1], mcaddr[j][2], mcaddr[j][3], mcaddr[j][4], mcaddr[j][5], rc); } goto mcfail; } del = 0; i = 0; } } if (i > 0) { rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; for (j = 0; j < i; j++) { if_printf(ifp, "failed to add mc address" " %02x:%02x:%02x:" "%02x:%02x:%02x rc=%d\n", mcaddr[j][0], mcaddr[j][1], mcaddr[j][2], mcaddr[j][3], mcaddr[j][4], mcaddr[j][5], rc); } goto mcfail; } } rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0); if (rc != 0) if_printf(ifp, "failed to set mc address hash: %d", rc); mcfail: if_maddr_runlock(ifp); } return (rc); } /* * {begin|end}_synchronized_op must be called from the same thread. */ int begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags, char *wmesg) { int rc, pri; #ifdef WITNESS /* the caller thinks it's ok to sleep, but is it really? */ if (flags & SLEEP_OK) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "begin_synchronized_op"); #endif if (INTR_OK) pri = PCATCH; else pri = 0; ADAPTER_LOCK(sc); for (;;) { if (vi && IS_DOOMED(vi)) { rc = ENXIO; goto done; } if (!IS_BUSY(sc)) { rc = 0; break; } if (!(flags & SLEEP_OK)) { rc = EBUSY; goto done; } if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) { rc = EINTR; goto done; } } KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); SET_BUSY(sc); #ifdef INVARIANTS sc->last_op = wmesg; sc->last_op_thr = curthread; sc->last_op_flags = flags; #endif done: if (!(flags & HOLD_LOCK) || rc) ADAPTER_UNLOCK(sc); return (rc); } /* * Tell if_ioctl and if_init that the VI is going away. This is * special variant of begin_synchronized_op and must be paired with a * call to end_synchronized_op. */ void doom_vi(struct adapter *sc, struct vi_info *vi) { ADAPTER_LOCK(sc); SET_DOOMED(vi); wakeup(&sc->flags); while (IS_BUSY(sc)) mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0); SET_BUSY(sc); #ifdef INVARIANTS sc->last_op = "t4detach"; sc->last_op_thr = curthread; sc->last_op_flags = 0; #endif ADAPTER_UNLOCK(sc); } /* * {begin|end}_synchronized_op must be called from the same thread. */ void end_synchronized_op(struct adapter *sc, int flags) { if (flags & LOCK_HELD) ADAPTER_LOCK_ASSERT_OWNED(sc); else ADAPTER_LOCK(sc); KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); CLR_BUSY(sc); wakeup(&sc->flags); ADAPTER_UNLOCK(sc); } static int cxgbe_init_synchronized(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifnet *ifp = vi->ifp; int rc = 0, i; struct sge_txq *txq; ASSERT_SYNCHRONIZED_OP(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return (0); /* already running */ if (!(sc->flags & FULL_INIT_DONE) && ((rc = adapter_full_init(sc)) != 0)) return (rc); /* error message displayed already */ if (!(vi->flags & VI_INIT_DONE) && ((rc = vi_full_init(vi)) != 0)) return (rc); /* error message displayed already */ rc = update_mac_settings(ifp, XGMAC_ALL); if (rc) goto done; /* error message displayed already */ rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true); if (rc != 0) { if_printf(ifp, "enable_vi failed: %d\n", rc); goto done; } /* * Can't fail from this point onwards. Review cxgbe_uninit_synchronized * if this changes. */ for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags |= EQ_ENABLED; TXQ_UNLOCK(txq); } /* * The first iq of the first port to come up is used for tracing. */ if (sc->traceq < 0 && IS_MAIN_VI(vi)) { sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id; t4_write_reg(sc, is_t4(sc) ? A_MPS_TRC_RSS_CONTROL : A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) | V_QUEUENUMBER(sc->traceq)); pi->flags |= HAS_TRACEQ; } /* all ok */ PORT_LOCK(pi); if (pi->up_vis++ == 0) { t4_update_port_info(pi); build_medialist(pi, &pi->media); init_l1cfg(pi); } ifp->if_drv_flags |= IFF_DRV_RUNNING; if (pi->nvi > 1 || sc->flags & IS_VF) callout_reset(&vi->tick, hz, vi_tick, vi); else callout_reset(&pi->tick, hz, cxgbe_tick, pi); PORT_UNLOCK(pi); done: if (rc != 0) cxgbe_uninit_synchronized(vi); return (rc); } /* * Idempotent. */ static int cxgbe_uninit_synchronized(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifnet *ifp = vi->ifp; int rc, i; struct sge_txq *txq; ASSERT_SYNCHRONIZED_OP(sc); if (!(vi->flags & VI_INIT_DONE)) { if (__predict_false(ifp->if_drv_flags & IFF_DRV_RUNNING)) { KASSERT(0, ("uninited VI is running")); if_printf(ifp, "uninited VI with running ifnet. " "vi->flags 0x%016lx, if_flags 0x%08x, " "if_drv_flags 0x%08x\n", vi->flags, ifp->if_flags, ifp->if_drv_flags); } return (0); } /* * Disable the VI so that all its data in either direction is discarded * by the MPS. Leave everything else (the queues, interrupts, and 1Hz * tick) intact as the TP can deliver negative advice or data that it's * holding in its RAM (for an offloaded connection) even after the VI is * disabled. */ rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false); if (rc) { if_printf(ifp, "disable_vi failed: %d\n", rc); return (rc); } for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags &= ~EQ_ENABLED; TXQ_UNLOCK(txq); } PORT_LOCK(pi); if (pi->nvi > 1 || sc->flags & IS_VF) callout_stop(&vi->tick); else callout_stop(&pi->tick); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { PORT_UNLOCK(pi); return (0); } ifp->if_drv_flags &= ~IFF_DRV_RUNNING; pi->up_vis--; if (pi->up_vis > 0) { PORT_UNLOCK(pi); return (0); } PORT_UNLOCK(pi); pi->link_cfg.link_ok = 0; pi->link_cfg.speed = 0; pi->link_cfg.link_down_rc = 255; t4_os_link_changed(pi); pi->old_link_cfg = pi->link_cfg; return (0); } /* * It is ok for this function to fail midway and return right away. t4_detach * will walk the entire sc->irq list and clean up whatever is valid. */ int t4_setup_intr_handlers(struct adapter *sc) { int rc, rid, p, q, v; char s[8]; struct irq *irq; struct port_info *pi; struct vi_info *vi; struct sge *sge = &sc->sge; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; #endif #ifdef DEV_NETMAP struct sge_nm_rxq *nm_rxq; #endif #ifdef RSS int nbuckets = rss_getnumbuckets(); #endif /* * Setup interrupts. */ irq = &sc->irq[0]; rid = sc->intr_type == INTR_INTX ? 0 : 1; if (forwarding_intr_to_fwq(sc)) return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all")); /* Multiple interrupts. */ if (sc->flags & IS_VF) KASSERT(sc->intr_count >= T4VF_EXTRA_INTR + sc->params.nports, ("%s: too few intr.", __func__)); else KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports, ("%s: too few intr.", __func__)); /* The first one is always error intr on PFs */ if (!(sc->flags & IS_VF)) { rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err"); if (rc != 0) return (rc); irq++; rid++; } /* The second one is always the firmware event queue (first on VFs) */ rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt"); if (rc != 0) return (rc); irq++; rid++; for_each_port(sc, p) { pi = sc->port[p]; for_each_vi(pi, v, vi) { vi->first_intr = rid - 1; if (vi->nnmrxq > 0) { int n = max(vi->nrxq, vi->nnmrxq); rxq = &sge->rxq[vi->first_rxq]; #ifdef DEV_NETMAP nm_rxq = &sge->nm_rxq[vi->first_nm_rxq]; #endif for (q = 0; q < n; q++) { snprintf(s, sizeof(s), "%x%c%x", p, 'a' + v, q); if (q < vi->nrxq) irq->rxq = rxq++; #ifdef DEV_NETMAP if (q < vi->nnmrxq) irq->nm_rxq = nm_rxq++; #endif rc = t4_alloc_irq(sc, irq, rid, t4_vi_intr, irq, s); if (rc != 0) return (rc); #ifdef RSS if (q < vi->nrxq) { bus_bind_intr(sc->dev, irq->res, rss_getcpu(q % nbuckets)); } #endif irq++; rid++; vi->nintr++; } } else { for_each_rxq(vi, q, rxq) { snprintf(s, sizeof(s), "%x%c%x", p, 'a' + v, q); rc = t4_alloc_irq(sc, irq, rid, t4_intr, rxq, s); if (rc != 0) return (rc); #ifdef RSS bus_bind_intr(sc->dev, irq->res, rss_getcpu(q % nbuckets)); #endif irq++; rid++; vi->nintr++; } } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, q, ofld_rxq) { snprintf(s, sizeof(s), "%x%c%x", p, 'A' + v, q); rc = t4_alloc_irq(sc, irq, rid, t4_intr, ofld_rxq, s); if (rc != 0) return (rc); irq++; rid++; vi->nintr++; } #endif } } MPASS(irq == &sc->irq[sc->intr_count]); return (0); } int adapter_full_init(struct adapter *sc) { int rc, i; #ifdef RSS uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)]; uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)]; #endif ASSERT_SYNCHRONIZED_OP(sc); ADAPTER_LOCK_ASSERT_NOTOWNED(sc); KASSERT((sc->flags & FULL_INIT_DONE) == 0, ("%s: FULL_INIT_DONE already", __func__)); /* * queues that belong to the adapter (not any particular port). */ rc = t4_setup_adapter_queues(sc); if (rc != 0) goto done; for (i = 0; i < nitems(sc->tq); i++) { sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->tq[i]); if (sc->tq[i] == NULL) { device_printf(sc->dev, "failed to allocate task queue %d\n", i); rc = ENOMEM; goto done; } taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d", device_get_nameunit(sc->dev), i); } #ifdef RSS MPASS(RSS_KEYSIZE == 40); rss_getkey((void *)&raw_rss_key[0]); for (i = 0; i < nitems(rss_key); i++) { rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]); } t4_write_rss_key(sc, &rss_key[0], -1, 1); #endif if (!(sc->flags & IS_VF)) t4_intr_enable(sc); sc->flags |= FULL_INIT_DONE; done: if (rc != 0) adapter_full_uninit(sc); return (rc); } int adapter_full_uninit(struct adapter *sc) { int i; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); t4_teardown_adapter_queues(sc); for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) { taskqueue_free(sc->tq[i]); sc->tq[i] = NULL; } sc->flags &= ~FULL_INIT_DONE; return (0); } #ifdef RSS #define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \ RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \ RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \ RSS_HASHTYPE_RSS_UDP_IPV6) /* Translates kernel hash types to hardware. */ static int hashconfig_to_hashen(int hashconfig) { int hashen = 0; if (hashconfig & RSS_HASHTYPE_RSS_IPV4) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_IPV6) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) { hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN | F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN; } if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) { hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN | F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN; } if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN; return (hashen); } /* Translates hardware hash types to kernel. */ static int hashen_to_hashconfig(int hashen) { int hashconfig = 0; if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) { /* * If UDP hashing was enabled it must have been enabled for * either IPv4 or IPv6 (inclusive or). Enabling UDP without * enabling any 4-tuple hash is nonsense configuration. */ MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)); if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6; } if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN) hashconfig |= RSS_HASHTYPE_RSS_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN) hashconfig |= RSS_HASHTYPE_RSS_IPV6; return (hashconfig); } #endif int vi_full_init(struct vi_info *vi) { struct adapter *sc = vi->pi->adapter; struct ifnet *ifp = vi->ifp; uint16_t *rss; struct sge_rxq *rxq; int rc, i, j, hashen; #ifdef RSS int nbuckets = rss_getnumbuckets(); int hashconfig = rss_gethashconfig(); int extra; #endif ASSERT_SYNCHRONIZED_OP(sc); KASSERT((vi->flags & VI_INIT_DONE) == 0, ("%s: VI_INIT_DONE already", __func__)); sysctl_ctx_init(&vi->ctx); vi->flags |= VI_SYSCTL_CTX; /* * Allocate tx/rx/fl queues for this VI. */ rc = t4_setup_vi_queues(vi); if (rc != 0) goto done; /* error message displayed already */ /* * Setup RSS for this VI. Save a copy of the RSS table for later use. */ if (vi->nrxq > vi->rss_size) { if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); " "some queues will never receive traffic.\n", vi->nrxq, vi->rss_size); } else if (vi->rss_size % vi->nrxq) { if_printf(ifp, "nrxq (%d), hw RSS table size (%d); " "expect uneven traffic distribution.\n", vi->nrxq, vi->rss_size); } #ifdef RSS if (vi->nrxq != nbuckets) { if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);" "performance will be impacted.\n", vi->nrxq, nbuckets); } #endif rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK); for (i = 0; i < vi->rss_size;) { #ifdef RSS j = rss_get_indirection_to_bucket(i); j %= vi->nrxq; rxq = &sc->sge.rxq[vi->first_rxq + j]; rss[i++] = rxq->iq.abs_id; #else for_each_rxq(vi, j, rxq) { rss[i++] = rxq->iq.abs_id; if (i == vi->rss_size) break; } #endif } rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss, vi->rss_size); if (rc != 0) { if_printf(ifp, "rss_config failed: %d\n", rc); goto done; } #ifdef RSS hashen = hashconfig_to_hashen(hashconfig); /* * We may have had to enable some hashes even though the global config * wants them disabled. This is a potential problem that must be * reported to the user. */ extra = hashen_to_hashconfig(hashen) ^ hashconfig; /* * If we consider only the supported hash types, then the enabled hashes * are a superset of the requested hashes. In other words, there cannot * be any supported hash that was requested but not enabled, but there * can be hashes that were not requested but had to be enabled. */ extra &= SUPPORTED_RSS_HASHTYPES; MPASS((extra & hashconfig) == 0); if (extra) { if_printf(ifp, "global RSS config (0x%x) cannot be accommodated.\n", hashconfig); } if (extra & RSS_HASHTYPE_RSS_IPV4) if_printf(ifp, "IPv4 2-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_TCP_IPV4) if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_IPV6) if_printf(ifp, "IPv6 2-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_TCP_IPV6) if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_UDP_IPV4) if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_UDP_IPV6) if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n"); #else hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN; #endif rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, hashen, rss[0], 0, 0); if (rc != 0) { if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc); goto done; } vi->rss = rss; vi->flags |= VI_INIT_DONE; done: if (rc != 0) vi_full_uninit(vi); return (rc); } /* * Idempotent. */ int vi_full_uninit(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; int i; struct sge_rxq *rxq; struct sge_txq *txq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ofld_txq; #endif if (vi->flags & VI_INIT_DONE) { /* Need to quiesce queues. */ /* XXX: Only for the first VI? */ if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF)) quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]); for_each_txq(vi, i, txq) { quiesce_txq(sc, txq); } #ifdef TCP_OFFLOAD for_each_ofld_txq(vi, i, ofld_txq) { quiesce_wrq(sc, ofld_txq); } #endif for_each_rxq(vi, i, rxq) { quiesce_iq(sc, &rxq->iq); quiesce_fl(sc, &rxq->fl); } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, i, ofld_rxq) { quiesce_iq(sc, &ofld_rxq->iq); quiesce_fl(sc, &ofld_rxq->fl); } #endif free(vi->rss, M_CXGBE); free(vi->nm_rss, M_CXGBE); } t4_teardown_vi_queues(vi); vi->flags &= ~VI_INIT_DONE; return (0); } static void quiesce_txq(struct adapter *sc, struct sge_txq *txq) { struct sge_eq *eq = &txq->eq; struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; (void) sc; /* unused */ #ifdef INVARIANTS TXQ_LOCK(txq); MPASS((eq->flags & EQ_ENABLED) == 0); TXQ_UNLOCK(txq); #endif /* Wait for the mp_ring to empty. */ while (!mp_ring_is_idle(txq->r)) { mp_ring_check_drainage(txq->r, 0); pause("rquiesce", 1); } /* Then wait for the hardware to finish. */ while (spg->cidx != htobe16(eq->pidx)) pause("equiesce", 1); /* Finally, wait for the driver to reclaim all descriptors. */ while (eq->cidx != eq->pidx) pause("dquiesce", 1); } static void quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq) { /* XXXTX */ } static void quiesce_iq(struct adapter *sc, struct sge_iq *iq) { (void) sc; /* unused */ /* Synchronize with the interrupt handler */ while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED)) pause("iqfree", 1); } static void quiesce_fl(struct adapter *sc, struct sge_fl *fl) { mtx_lock(&sc->sfl_lock); FL_LOCK(fl); fl->flags |= FL_DOOMED; FL_UNLOCK(fl); callout_stop(&sc->sfl_callout); mtx_unlock(&sc->sfl_lock); KASSERT((fl->flags & FL_STARVING) == 0, ("%s: still starving", __func__)); } static int t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid, driver_intr_t *handler, void *arg, char *name) { int rc; irq->rid = rid; irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid, RF_SHAREABLE | RF_ACTIVE); if (irq->res == NULL) { device_printf(sc->dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET, NULL, handler, arg, &irq->tag); if (rc != 0) { device_printf(sc->dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name, rc); } else if (name) bus_describe_intr(sc->dev, irq->res, irq->tag, "%s", name); return (rc); } static int t4_free_irq(struct adapter *sc, struct irq *irq) { if (irq->tag) bus_teardown_intr(sc->dev, irq->res, irq->tag); if (irq->res) bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res); bzero(irq, sizeof(*irq)); return (0); } static void get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf) { regs->version = chip_id(sc) | chip_rev(sc) << 10; t4_get_regs(sc, buf, regs->len); } #define A_PL_INDIR_CMD 0x1f8 #define S_PL_AUTOINC 31 #define M_PL_AUTOINC 0x1U #define V_PL_AUTOINC(x) ((x) << S_PL_AUTOINC) #define G_PL_AUTOINC(x) (((x) >> S_PL_AUTOINC) & M_PL_AUTOINC) #define S_PL_VFID 20 #define M_PL_VFID 0xffU #define V_PL_VFID(x) ((x) << S_PL_VFID) #define G_PL_VFID(x) (((x) >> S_PL_VFID) & M_PL_VFID) #define S_PL_ADDR 0 #define M_PL_ADDR 0xfffffU #define V_PL_ADDR(x) ((x) << S_PL_ADDR) #define G_PL_ADDR(x) (((x) >> S_PL_ADDR) & M_PL_ADDR) #define A_PL_INDIR_DATA 0x1fc static uint64_t read_vf_stat(struct adapter *sc, unsigned int viid, int reg) { u32 stats[2]; mtx_assert(&sc->reg_lock, MA_OWNED); if (sc->flags & IS_VF) { stats[0] = t4_read_reg(sc, VF_MPS_REG(reg)); stats[1] = t4_read_reg(sc, VF_MPS_REG(reg + 4)); } else { t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(reg))); stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA); stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA); } return (((uint64_t)stats[1]) << 32 | stats[0]); } static void t4_get_vi_stats(struct adapter *sc, unsigned int viid, struct fw_vi_stats_vf *stats) { #define GET_STAT(name) \ read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L) stats->tx_bcast_bytes = GET_STAT(TX_VF_BCAST_BYTES); stats->tx_bcast_frames = GET_STAT(TX_VF_BCAST_FRAMES); stats->tx_mcast_bytes = GET_STAT(TX_VF_MCAST_BYTES); stats->tx_mcast_frames = GET_STAT(TX_VF_MCAST_FRAMES); stats->tx_ucast_bytes = GET_STAT(TX_VF_UCAST_BYTES); stats->tx_ucast_frames = GET_STAT(TX_VF_UCAST_FRAMES); stats->tx_drop_frames = GET_STAT(TX_VF_DROP_FRAMES); stats->tx_offload_bytes = GET_STAT(TX_VF_OFFLOAD_BYTES); stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES); stats->rx_bcast_bytes = GET_STAT(RX_VF_BCAST_BYTES); stats->rx_bcast_frames = GET_STAT(RX_VF_BCAST_FRAMES); stats->rx_mcast_bytes = GET_STAT(RX_VF_MCAST_BYTES); stats->rx_mcast_frames = GET_STAT(RX_VF_MCAST_FRAMES); stats->rx_ucast_bytes = GET_STAT(RX_VF_UCAST_BYTES); stats->rx_ucast_frames = GET_STAT(RX_VF_UCAST_FRAMES); stats->rx_err_frames = GET_STAT(RX_VF_ERR_FRAMES); #undef GET_STAT } static void t4_clr_vi_stats(struct adapter *sc, unsigned int viid) { int reg; t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L))); for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L; reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4) t4_write_reg(sc, A_PL_INDIR_DATA, 0); } static void vi_refresh_stats(struct adapter *sc, struct vi_info *vi) { struct timeval tv; const struct timeval interval = {0, 250000}; /* 250ms */ if (!(vi->flags & VI_INIT_DONE)) return; getmicrotime(&tv); timevalsub(&tv, &interval); if (timevalcmp(&tv, &vi->last_refreshed, <)) return; mtx_lock(&sc->reg_lock); t4_get_vi_stats(sc, vi->viid, &vi->stats); getmicrotime(&vi->last_refreshed); mtx_unlock(&sc->reg_lock); } static void cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi) { u_int i, v, tnl_cong_drops, bg_map; struct timeval tv; const struct timeval interval = {0, 250000}; /* 250ms */ getmicrotime(&tv); timevalsub(&tv, &interval); if (timevalcmp(&tv, &pi->last_refreshed, <)) return; tnl_cong_drops = 0; t4_get_port_stats(sc, pi->tx_chan, &pi->stats); bg_map = pi->mps_bg_map; while (bg_map) { i = ffs(bg_map) - 1; mtx_lock(&sc->reg_lock); t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1, A_TP_MIB_TNL_CNG_DROP_0 + i); mtx_unlock(&sc->reg_lock); tnl_cong_drops += v; bg_map &= ~(1 << i); } pi->tnl_cong_drops = tnl_cong_drops; getmicrotime(&pi->last_refreshed); } static void cxgbe_tick(void *arg) { struct port_info *pi = arg; struct adapter *sc = pi->adapter; PORT_LOCK_ASSERT_OWNED(pi); cxgbe_refresh_stats(sc, pi); callout_schedule(&pi->tick, hz); } void vi_tick(void *arg) { struct vi_info *vi = arg; struct adapter *sc = vi->pi->adapter; vi_refresh_stats(sc, vi); callout_schedule(&vi->tick, hz); } static void cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid) { struct ifnet *vlan; if (arg != ifp || ifp->if_type != IFT_ETHER) return; vlan = VLAN_DEVAT(ifp, vid); VLAN_SETCOOKIE(vlan, ifp); } /* * Should match fw_caps_config_ enums in t4fw_interface.h */ static char *caps_decoder[] = { "\20\001IPMI\002NCSI", /* 0: NBM */ "\20\001PPP\002QFC\003DCBX", /* 1: link */ "\20\001INGRESS\002EGRESS", /* 2: switch */ "\20\001NIC\002VM\003IDS\004UM\005UM_ISGL" /* 3: NIC */ "\006HASHFILTER\007ETHOFLD", "\20\001TOE", /* 4: TOE */ "\20\001RDDP\002RDMAC", /* 5: RDMA */ "\20\001INITIATOR_PDU\002TARGET_PDU" /* 6: iSCSI */ "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD" "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD" "\007T10DIF" "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD", "\20\001LOOKASIDE\002TLSKEYS", /* 7: Crypto */ "\20\001INITIATOR\002TARGET\003CTRL_OFLD" /* 8: FCoE */ "\004PO_INITIATOR\005PO_TARGET", }; void t4_sysctls(struct adapter *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children, *c0; static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"}; ctx = device_get_sysctl_ctx(sc->dev); /* * dev.t4nex.X. */ oid = device_get_sysctl_tree(sc->dev); c0 = children = SYSCTL_CHILDREN(oid); sc->sc_do_rxcopy = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW, &sc->sc_do_rxcopy, 1, "Do RX copy of small frames"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL, sc->params.nports, "# of ports"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells", CTLTYPE_STRING | CTLFLAG_RD, doorbells, sc->doorbells, sysctl_bitfield, "A", "available doorbells"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL, sc->params.vpd.cclk, "core clock frequency (in KHz)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers", CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.timer_val, sizeof(sc->params.sge.timer_val), sysctl_int_array, "A", "interrupt holdoff timer values (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts", CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.counter_val, sizeof(sc->params.sge.counter_val), sysctl_int_array, "A", "interrupt holdoff packet counter values"); t4_sge_sysctls(sc, ctx, children); sc->lro_timeout = 100; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW, &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dflags", CTLFLAG_RW, &sc->debug_flags, 0, "flags to enable runtime debugging"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version", CTLFLAG_RD, sc->tp_version, 0, "TP microcode version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", CTLFLAG_RD, sc->fw_version, 0, "firmware version"); if (sc->flags & IS_VF) return; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD, NULL, chip_rev(sc), "chip hardware revision"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "sn", CTLFLAG_RD, sc->params.vpd.sn, 0, "serial number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pn", CTLFLAG_RD, sc->params.vpd.pn, 0, "part number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "ec", CTLFLAG_RD, sc->params.vpd.ec, 0, "engineering change"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "md_version", CTLFLAG_RD, sc->params.vpd.md, 0, "manufacturing diags version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "na", CTLFLAG_RD, sc->params.vpd.na, 0, "network address"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "er_version", CTLFLAG_RD, sc->er_version, 0, "expansion ROM version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bs_version", CTLFLAG_RD, sc->bs_version, 0, "bootstrap firmware version"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "scfg_version", CTLFLAG_RD, NULL, sc->params.scfg_vers, "serial config version"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "vpd_version", CTLFLAG_RD, NULL, sc->params.vpd_vers, "VPD version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf", CTLFLAG_RD, sc->cfg_file, 0, "configuration file"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL, sc->cfcsum, "config file checksum"); #define SYSCTL_CAP(name, n, text) \ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \ CTLTYPE_STRING | CTLFLAG_RD, caps_decoder[n], sc->name, \ sysctl_bitfield, "A", "available " text " capabilities") SYSCTL_CAP(nbmcaps, 0, "NBM"); SYSCTL_CAP(linkcaps, 1, "link"); SYSCTL_CAP(switchcaps, 2, "switch"); SYSCTL_CAP(niccaps, 3, "NIC"); SYSCTL_CAP(toecaps, 4, "TCP offload"); SYSCTL_CAP(rdmacaps, 5, "RDMA"); SYSCTL_CAP(iscsicaps, 6, "iSCSI"); SYSCTL_CAP(cryptocaps, 7, "crypto"); SYSCTL_CAP(fcoecaps, 8, "FCoE"); #undef SYSCTL_CAP SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD, NULL, sc->tids.nftids, "number of filters"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, sc, 0, sysctl_temperature, "I", "chip temperature (in Celsius)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_vdd", CTLFLAG_RD, &sc->params.core_vdd, 0, "core Vdd (in mV)"); #ifdef SBUF_DRAIN /* * dev.t4nex.X.misc. Marked CTLFLAG_SKIP to avoid information overload. */ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc", CTLFLAG_RD | CTLFLAG_SKIP, NULL, "logs and miscellaneous information"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cctrl, "A", "congestion control"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1", CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp", CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0", CTLTYPE_STRING | CTLFLAG_RD, sc, 3, sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1", CTLTYPE_STRING | CTLFLAG_RD, sc, 4, sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi", CTLTYPE_STRING | CTLFLAG_RD, sc, 5, sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, chip_id(sc) <= CHELSIO_T5 ? sysctl_cim_la : sysctl_cim_la_t6, "A", "CIM logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_ma_la, "A", "CIM MA logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0", CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1", CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2", CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3", CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge", CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi", CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)"); if (chip_id(sc) > CHELSIO_T4) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx", CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx", CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)"); } SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_pif_la, "A", "CIM PIF logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_qcfg, "A", "CIM queue configuration"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cpl_stats, "A", "CPL statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_ddp_stats, "A", "non-TCP DDP statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_devlog, "A", "firmware's device log"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_fcoe_stats, "A", "FCoE statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_hw_sched, "A", "hardware scheduler "); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_l2t, "A", "hardware L2 table"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_lb_stats, "A", "loopback statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_meminfo, "A", "memory regions"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6, "A", "MPS TCAM entries"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_path_mtus, "A", "path MTUs"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_pm_stats, "A", "PM statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_rdma_stats, "A", "RDMA statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tcp_stats, "A", "TCP statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tids, "A", "TID information"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_err_stats, "A", "TP error statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask", CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tp_la_mask, "I", "TP logic analyzer event capture mask"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_la, "A", "TP logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tx_rate, "A", "Tx rate"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_ulprx_la, "A", "ULPRX logic analyzer"); if (chip_id(sc) >= CHELSIO_T5) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_wcwr_stats, "A", "write combined work requests"); } #endif #ifdef TCP_OFFLOAD if (is_offload(sc)) { int i; char s[4]; /* * dev.t4nex.X.toe. */ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD, NULL, "TOE parameters"); children = SYSCTL_CHILDREN(oid); sc->tt.cong_algorithm = -1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_algorithm", CTLFLAG_RW, &sc->tt.cong_algorithm, 0, "congestion control " "(-1 = default, 0 = reno, 1 = tahoe, 2 = newreno, " "3 = highspeed)"); sc->tt.sndbuf = 256 * 1024; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW, &sc->tt.sndbuf, 0, "max hardware send buffer size"); sc->tt.ddp = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW, &sc->tt.ddp, 0, "DDP allowed"); sc->tt.rx_coalesce = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce", CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing"); sc->tt.tls = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tls", CTLFLAG_RW, &sc->tt.tls, 0, "Inline TLS allowed"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls_rx_ports", CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tls_rx_ports, "I", "TCP ports that use inline TLS+TOE RX"); sc->tt.tx_align = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align", CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload"); sc->tt.tx_zcopy = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_zcopy", CTLFLAG_RW, &sc->tt.tx_zcopy, 0, "Enable zero-copy aio_write(2)"); sc->tt.cop_managed_offloading = !!t4_cop_managed_offloading; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cop_managed_offloading", CTLFLAG_RW, &sc->tt.cop_managed_offloading, 0, "COP (Connection Offload Policy) controls all TOE offload"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_tick, "A", "TP timer tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_tp_tick, "A", "TCP timestamp tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_tp_tick, "A", "DACK tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer", CTLTYPE_UINT | CTLFLAG_RD, sc, 0, sysctl_tp_dack_timer, "IU", "DACK timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MIN, sysctl_tp_timer, "LU", "Minimum retransmit interval (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MAX, sysctl_tp_timer, "LU", "Maximum retransmit interval (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MIN, sysctl_tp_timer, "LU", "Persist timer min (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MAX, sysctl_tp_timer, "LU", "Persist timer max (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_IDLE, sysctl_tp_timer, "LU", "Keepalive idle timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_interval", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_INTVL, sysctl_tp_timer, "LU", "Keepalive interval timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_INIT_SRTT, sysctl_tp_timer, "LU", "Initial SRTT (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_FINWAIT2_TIMER, sysctl_tp_timer, "LU", "FINWAIT2 timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "syn_rexmt_count", CTLTYPE_UINT | CTLFLAG_RD, sc, S_SYNSHIFTMAX, sysctl_tp_shift_cnt, "IU", "Number of SYN retransmissions before abort"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_count", CTLTYPE_UINT | CTLFLAG_RD, sc, S_RXTSHIFTMAXR2, sysctl_tp_shift_cnt, "IU", "Number of retransmissions before abort"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_count", CTLTYPE_UINT | CTLFLAG_RD, sc, S_KEEPALIVEMAXR2, sysctl_tp_shift_cnt, "IU", "Number of keepalive probes before abort"); oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "rexmt_backoff", CTLFLAG_RD, NULL, "TOE retransmit backoffs"); children = SYSCTL_CHILDREN(oid); for (i = 0; i < 16; i++) { snprintf(s, sizeof(s), "%u", i); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, s, CTLTYPE_UINT | CTLFLAG_RD, sc, i, sysctl_tp_backoff, "IU", "TOE retransmit backoff"); } } #endif } void vi_sysctls(struct vi_info *vi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children; ctx = device_get_sysctl_ctx(vi->dev); /* * dev.v?(cxgbe|cxl).X. */ oid = device_get_sysctl_tree(vi->dev); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL, vi->viid, "VI identifer"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD, &vi->nrxq, 0, "# of rx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD, &vi->ntxq, 0, "# of tx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD, &vi->first_rxq, 0, "index of first rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD, &vi->first_txq, 0, "index of first tx queue"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_size", CTLFLAG_RD, NULL, vi->rss_size, "size of RSS indirection table"); if (IS_MAIN_VI(vi)) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU", "Reserve queue 0 for non-flowid packets"); } #ifdef TCP_OFFLOAD if (vi->nofldrxq != 0) { SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD, &vi->nofldrxq, 0, "# of rx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD, &vi->nofldtxq, 0, "# of tx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq", CTLFLAG_RD, &vi->first_ofld_rxq, 0, "index of first TOE rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq", CTLFLAG_RD, &vi->first_ofld_txq, 0, "index of first TOE tx queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx_ofld", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx_ofld, "I", "holdoff timer index for TOE queues"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx_ofld", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx_ofld, "I", "holdoff packet counter index for TOE queues"); } #endif #ifdef DEV_NETMAP if (vi->nnmrxq != 0) { SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD, &vi->nnmrxq, 0, "# of netmap rx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD, &vi->nnmtxq, 0, "# of netmap tx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq", CTLFLAG_RD, &vi->first_nm_rxq, 0, "index of first netmap rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq", CTLFLAG_RD, &vi->first_nm_txq, 0, "index of first netmap tx queue"); } #endif SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I", "holdoff timer index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I", "holdoff packet counter index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I", "rx queue size"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I", "tx queue size"); } static void cxgbe_sysctls(struct port_info *pi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children, *children2; struct adapter *sc = pi->adapter; int i; char name[16]; ctx = device_get_sysctl_ctx(pi->dev); /* * dev.cxgbe.X. */ oid = device_get_sysctl_tree(pi->dev); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING | CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down"); if (pi->port_type == FW_PORT_TYPE_BT_XAUI) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I", "PHY temperature (in Celsius)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version", CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I", "PHY firmware version"); } SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings", CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_pause_settings, "A", "PAUSE settings (bit 0 = rx_pause, bit 1 = tx_pause)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fec", CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_fec, "A", "Forward Error Correction (bit 0 = RS, bit 1 = BASER_RS)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "autoneg", CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_autoneg, "I", "autonegotiation (-1 = not supported)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL, port_top_speed(pi), "max speed (in Gbps)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "mps_bg_map", CTLFLAG_RD, NULL, pi->mps_bg_map, "MPS buffer group map"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_e_chan_map", CTLFLAG_RD, NULL, pi->rx_e_chan_map, "TP rx e-channel map"); if (sc->flags & IS_VF) return; /* * dev.(cxgbe|cxl).X.tc. */ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc", CTLFLAG_RD, NULL, "Tx scheduler traffic classes (cl_rl)"); for (i = 0; i < sc->chip_params->nsched_cls; i++) { struct tx_cl_rl_params *tc = &pi->sched_params->cl_rl[i]; snprintf(name, sizeof(name), "%d", i); children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL, "traffic class")); SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "flags", CTLFLAG_RD, &tc->flags, 0, "flags"); SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount", CTLFLAG_RD, &tc->refcount, 0, "references to this class"); #ifdef SBUF_DRAIN SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params", CTLTYPE_STRING | CTLFLAG_RD, sc, (pi->port_id << 16) | i, sysctl_tc_params, "A", "traffic class parameters"); #endif } /* * dev.cxgbe.X.stats. */ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD, NULL, "port statistics"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD, &pi->tx_parse_error, 0, "# of tx packets with invalid length or # of segments"); #define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \ SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \ CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \ sysctl_handle_t4_reg64, "QU", desc) SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_64", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L)); SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L)); SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err", "# of frames received with bad FCS", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_len_err", "# of frames received with length error", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_64", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L)); #undef SYSCTL_ADD_T4_REG64 #define SYSCTL_ADD_T4_PORTSTAT(name, desc) \ SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \ &pi->stats.name, desc) /* We get these from port_stats and they may be stale by up to 1s */ SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0, "# drops due to buffer-group 0 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1, "# drops due to buffer-group 1 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2, "# drops due to buffer-group 2 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3, "# drops due to buffer-group 3 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc0, "# of buffer-group 0 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc1, "# of buffer-group 1 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc2, "# of buffer-group 2 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc3, "# of buffer-group 3 truncated packets"); #undef SYSCTL_ADD_T4_PORTSTAT SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_records", CTLFLAG_RD, &pi->tx_tls_records, "# of TLS records transmitted"); SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_octets", CTLFLAG_RD, &pi->tx_tls_octets, "# of payload octets in transmitted TLS records"); SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_records", CTLFLAG_RD, &pi->rx_tls_records, "# of TLS records received"); SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_octets", CTLFLAG_RD, &pi->rx_tls_octets, "# of payload octets in received TLS records"); } static int sysctl_int_array(SYSCTL_HANDLER_ARGS) { int rc, *i, space = 0; struct sbuf sb; sbuf_new_for_sysctl(&sb, NULL, 64, req); for (i = arg1; arg2; arg2 -= sizeof(int), i++) { if (space) sbuf_printf(&sb, " "); sbuf_printf(&sb, "%d", *i); space = 1; } rc = sbuf_finish(&sb); sbuf_delete(&sb); return (rc); } static int sysctl_bitfield(SYSCTL_HANDLER_ARGS) { int rc; struct sbuf *sb; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", (int)arg2, (char *)arg1); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_btphy(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; int op = arg2; struct adapter *sc = pi->adapter; u_int v; int rc; rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt"); if (rc) return (rc); /* XXX: magic numbers */ rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820, &v); end_synchronized_op(sc, 0); if (rc) return (rc); if (op == 0) v /= 256; rc = sysctl_handle_int(oidp, &v, 0, req); return (rc); } static int sysctl_noflowq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; int rc, val; val = vi->rsrv_noflowq; rc = sysctl_handle_int(oidp, &val, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if ((val >= 1) && (vi->ntxq > 1)) vi->rsrv_noflowq = 1; else vi->rsrv_noflowq = 0; return (rc); } static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc, i; struct sge_rxq *rxq; uint8_t v; idx = vi->tmr_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < 0 || idx >= SGE_NTIMERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4tmr"); if (rc) return (rc); v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1); for_each_rxq(vi, i, rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&rxq->iq.intr_params, v); #else rxq->iq.intr_params = v; #endif } vi->tmr_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (0); } static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc; idx = vi->pktc_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < -1 || idx >= SGE_NCOUNTERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4pktc"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->pktc_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int qsize, rc; qsize = vi->qsize_rxq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (qsize < 128 || (qsize & 7)) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4rxqs"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->qsize_rxq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int qsize, rc; qsize = vi->qsize_txq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (qsize < 128 || qsize > 65536) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4txqs"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->qsize_txq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc; if (req->newptr == NULL) { struct sbuf *sb; static char *bits = "\20\1PAUSE_RX\2PAUSE_TX"; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", lc->fc & (PAUSE_TX | PAUSE_RX), bits); rc = sbuf_finish(sb); sbuf_delete(sb); } else { char s[2]; int n; s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX)); s[1] = 0; rc = sysctl_handle_string(oidp, s, sizeof(s), req); if (rc != 0) return(rc); if (s[1] != 0) return (EINVAL); if (s[0] < '0' || s[0] > '9') return (EINVAL); /* not a number */ n = s[0] - '0'; if (n & ~(PAUSE_TX | PAUSE_RX)) return (EINVAL); /* some other bit is set too */ rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4PAUSE"); if (rc) return (rc); if ((lc->requested_fc & (PAUSE_TX | PAUSE_RX)) != n) { lc->requested_fc &= ~(PAUSE_TX | PAUSE_RX); lc->requested_fc |= n; rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); if (rc == 0) { lc->fc = lc->requested_fc; } } end_synchronized_op(sc, 0); } return (rc); } static int sysctl_fec(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc; if (req->newptr == NULL) { struct sbuf *sb; static char *bits = "\20\1RS\2BASER_RS\3RESERVED"; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", lc->fec & M_FW_PORT_CAP_FEC, bits); rc = sbuf_finish(sb); sbuf_delete(sb); } else { char s[2]; int n; s[0] = '0' + (lc->requested_fec & M_FW_PORT_CAP_FEC); s[1] = 0; rc = sysctl_handle_string(oidp, s, sizeof(s), req); if (rc != 0) return(rc); if (s[1] != 0) return (EINVAL); if (s[0] < '0' || s[0] > '9') return (EINVAL); /* not a number */ n = s[0] - '0'; if (n & ~M_FW_PORT_CAP_FEC) return (EINVAL); /* some other bit is set too */ rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4fec"); if (rc) return (rc); if ((lc->requested_fec & M_FW_PORT_CAP_FEC) != n) { lc->requested_fec = n & G_FW_PORT_CAP_FEC(lc->supported); rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); if (rc == 0) { lc->fec = lc->requested_fec; } } end_synchronized_op(sc, 0); } return (rc); } static int sysctl_autoneg(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc, val, old; if (lc->supported & FW_PORT_CAP_ANEG) val = lc->requested_aneg == AUTONEG_ENABLE ? 1 : 0; else val = -1; rc = sysctl_handle_int(oidp, &val, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if ((lc->supported & FW_PORT_CAP_ANEG) == 0) return (ENOTSUP); if (val == 0) val = AUTONEG_DISABLE; else if (val == 1) val = AUTONEG_ENABLE; else return (EINVAL); if (lc->requested_aneg == val) return (0); /* no change */ rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4aneg"); if (rc) return (rc); old = lc->requested_aneg; lc->requested_aneg = val; rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); if (rc != 0) lc->requested_aneg = old; end_synchronized_op(sc, 0); return (rc); } static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int reg = arg2; uint64_t val; val = t4_read_reg64(sc, reg); return (sysctl_handle_64(oidp, &val, 0, req)); } static int sysctl_temperature(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int rc, t; uint32_t param, val; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp"); if (rc) return (rc); param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) | V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); end_synchronized_op(sc, 0); if (rc) return (rc); /* unknown is returned as 0 but we display -1 in that case */ t = val == 0 ? -1 : val; rc = sysctl_handle_int(oidp, &t, 0, req); return (rc); } #ifdef SBUF_DRAIN static int sysctl_cctrl(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint16_t incr[NMTUS][NCCTRL_WIN]; static const char *dec_fac[] = { "0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875", "0.9375" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); t4_read_cong_tbl(sc, incr); for (i = 0; i < NCCTRL_WIN; ++i) { sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i, incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i], incr[5][i], incr[6][i], incr[7][i]); sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n", incr[8][i], incr[9][i], incr[10][i], incr[11][i], incr[12][i], incr[13][i], incr[14][i], incr[15][i], sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = { "TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI", /* ibq's */ "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI", /* obq's */ "SGE0-RX", "SGE1-RX" /* additional obq's (T5 onwards) */ }; static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n, qid = arg2; uint32_t *buf, *p; char *qtype; u_int cim_num_obq = sc->chip_params->cim_num_obq; KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq, ("%s: bad qid %d\n", __func__, qid)); if (qid < CIM_NUM_IBQ) { /* inbound queue */ qtype = "IBQ"; n = 4 * CIM_IBQ_SIZE; buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = t4_read_cim_ibq(sc, qid, buf, n); } else { /* outbound queue */ qtype = "OBQ"; qid -= CIM_NUM_IBQ; n = 4 * cim_num_obq * CIM_OBQ_SIZE; buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = t4_read_cim_obq(sc, qid, buf, n); } if (rc < 0) { rc = -rc; goto done; } n = rc * sizeof(uint32_t); /* rc has # of words actually read */ rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) goto done; sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); if (sb == NULL) { rc = ENOMEM; goto done; } sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]); for (i = 0, p = buf; i < n; i += 16, p += 4) sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1], p[2], p[3]); rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int cfg; struct sbuf *sb; uint32_t *buf, *p; int rc; MPASS(chip_id(sc) <= CHELSIO_T5); rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg); if (rc != 0) return (rc); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_cim_read_la(sc, buf, NULL); if (rc != 0) goto done; sbuf_printf(sb, "Status Data PC%s", cfg & F_UPDBGLACAPTPCONLY ? "" : " LS0Stat LS0Addr LS0Data"); for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) { if (cfg & F_UPDBGLACAPTPCONLY) { sbuf_printf(sb, "\n %02x %08x %08x", p[5] & 0xff, p[6], p[7]); sbuf_printf(sb, "\n %02x %02x%06x %02x%06x", (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8, p[4] & 0xff, p[5] >> 8); sbuf_printf(sb, "\n %02x %x%07x %x%07x", (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4, p[1] & 0xf, p[2] >> 4); } else { sbuf_printf(sb, "\n %02x %x%07x %x%07x %08x %08x " "%08x%08x%08x%08x", (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4, p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5], p[6], p[7]); } } rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int cfg; struct sbuf *sb; uint32_t *buf, *p; int rc; MPASS(chip_id(sc) > CHELSIO_T5); rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg); if (rc != 0) return (rc); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_cim_read_la(sc, buf, NULL); if (rc != 0) goto done; sbuf_printf(sb, "Status Inst Data PC%s", cfg & F_UPDBGLACAPTPCONLY ? "" : " LS0Stat LS0Addr LS0Data LS1Stat LS1Addr LS1Data"); for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) { if (cfg & F_UPDBGLACAPTPCONLY) { sbuf_printf(sb, "\n %02x %08x %08x %08x", p[3] & 0xff, p[2], p[1], p[0]); sbuf_printf(sb, "\n %02x %02x%06x %02x%06x %02x%06x", (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8, p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8); sbuf_printf(sb, "\n %02x %04x%04x %04x%04x %04x%04x", (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16, p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff, p[6] >> 16); } else { sbuf_printf(sb, "\n %02x %04x%04x %04x%04x %04x%04x " "%08x %08x %08x %08x %08x %08x", (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16, p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff, p[6] >> 16, p[2], p[1], p[0], p[5], p[4], p[3]); } } rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int i; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE); p = buf; for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) { sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2], p[1], p[0]); } sbuf_printf(sb, "\n\nCnt ID Tag UE Data RDY VLD"); for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) { sbuf_printf(sb, "\n%3u %2u %x %u %08x%08x %u %u", (p[2] >> 10) & 0xff, (p[2] >> 7) & 7, (p[2] >> 3) & 0xf, (p[2] >> 2) & 1, (p[1] >> 2) | ((p[2] & 3) << 30), (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1, p[0] & 1); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int i; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL); p = buf; sbuf_printf(sb, "Cntl ID DataBE Addr Data"); for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) { sbuf_printf(sb, "\n %02x %02x %04x %08x %08x%08x%08x%08x", (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff, p[4], p[3], p[2], p[1], p[0]); } sbuf_printf(sb, "\n\nCntl ID Data"); for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) { sbuf_printf(sb, "\n %02x %02x %08x%08x%08x%08x", (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; uint16_t thres[CIM_NUM_IBQ]; uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr; uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat; u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq; cim_num_obq = sc->chip_params->cim_num_obq; if (is_t4(sc)) { ibq_rdaddr = A_UP_IBQ_0_RDADDR; obq_rdaddr = A_UP_OBQ_0_REALADDR; } else { ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR; obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR; } nq = CIM_NUM_IBQ + cim_num_obq; rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat); if (rc == 0) rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr); if (rc != 0) return (rc); t4_read_cimq_cfg(sc, base, size, thres); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, " Queue Base Size Thres RdPtr WrPtr SOP EOP Avail"); for (i = 0; i < CIM_NUM_IBQ; i++, p += 4) sbuf_printf(sb, "\n%7s %5x %5u %5u %6x %4x %4u %4u %5u", qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]), G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]), G_QUEREMFLITS(p[2]) * 16); for ( ; i < nq; i++, p += 4, wr += 2) sbuf_printf(sb, "\n%7s %5x %5u %12x %4x %4u %4u %5u", qname[i], base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff, wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]), G_QUEREMFLITS(p[2]) * 16); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_cpl_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_cpl_stats(sc, &stats, 0); mtx_unlock(&sc->reg_lock); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3"); sbuf_printf(sb, "\nCPL requests: %10u %10u %10u %10u", stats.req[0], stats.req[1], stats.req[2], stats.req[3]); sbuf_printf(sb, "\nCPL responses: %10u %10u %10u %10u", stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]); } else { sbuf_printf(sb, " channel 0 channel 1"); sbuf_printf(sb, "\nCPL requests: %10u %10u", stats.req[0], stats.req[1]); sbuf_printf(sb, "\nCPL responses: %10u %10u", stats.rsp[0], stats.rsp[1]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_usm_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_usm_stats(sc, &stats, 1); sbuf_printf(sb, "Frames: %u\n", stats.frames); sbuf_printf(sb, "Octets: %ju\n", stats.octets); sbuf_printf(sb, "Drops: %u", stats.drops); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static const char * const devlog_level_strings[] = { [FW_DEVLOG_LEVEL_EMERG] = "EMERG", [FW_DEVLOG_LEVEL_CRIT] = "CRIT", [FW_DEVLOG_LEVEL_ERR] = "ERR", [FW_DEVLOG_LEVEL_NOTICE] = "NOTICE", [FW_DEVLOG_LEVEL_INFO] = "INFO", [FW_DEVLOG_LEVEL_DEBUG] = "DEBUG" }; static const char * const devlog_facility_strings[] = { [FW_DEVLOG_FACILITY_CORE] = "CORE", [FW_DEVLOG_FACILITY_CF] = "CF", [FW_DEVLOG_FACILITY_SCHED] = "SCHED", [FW_DEVLOG_FACILITY_TIMER] = "TIMER", [FW_DEVLOG_FACILITY_RES] = "RES", [FW_DEVLOG_FACILITY_HW] = "HW", [FW_DEVLOG_FACILITY_FLR] = "FLR", [FW_DEVLOG_FACILITY_DMAQ] = "DMAQ", [FW_DEVLOG_FACILITY_PHY] = "PHY", [FW_DEVLOG_FACILITY_MAC] = "MAC", [FW_DEVLOG_FACILITY_PORT] = "PORT", [FW_DEVLOG_FACILITY_VI] = "VI", [FW_DEVLOG_FACILITY_FILTER] = "FILTER", [FW_DEVLOG_FACILITY_ACL] = "ACL", [FW_DEVLOG_FACILITY_TM] = "TM", [FW_DEVLOG_FACILITY_QFC] = "QFC", [FW_DEVLOG_FACILITY_DCB] = "DCB", [FW_DEVLOG_FACILITY_ETH] = "ETH", [FW_DEVLOG_FACILITY_OFLD] = "OFLD", [FW_DEVLOG_FACILITY_RI] = "RI", [FW_DEVLOG_FACILITY_ISCSI] = "ISCSI", [FW_DEVLOG_FACILITY_FCOE] = "FCOE", [FW_DEVLOG_FACILITY_FOISCSI] = "FOISCSI", [FW_DEVLOG_FACILITY_FOFCOE] = "FOFCOE", [FW_DEVLOG_FACILITY_CHNET] = "CHNET", }; static int sysctl_devlog(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct devlog_params *dparams = &sc->params.devlog; struct fw_devlog_e *buf, *e; int i, j, rc, nentries, first = 0; struct sbuf *sb; uint64_t ftstamp = UINT64_MAX; if (dparams->addr == 0) return (ENXIO); buf = malloc(dparams->size, M_CXGBE, M_NOWAIT); if (buf == NULL) return (ENOMEM); rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size); if (rc != 0) goto done; nentries = dparams->size / sizeof(struct fw_devlog_e); for (i = 0; i < nentries; i++) { e = &buf[i]; if (e->timestamp == 0) break; /* end */ e->timestamp = be64toh(e->timestamp); e->seqno = be32toh(e->seqno); for (j = 0; j < 8; j++) e->params[j] = be32toh(e->params[j]); if (e->timestamp < ftstamp) { ftstamp = e->timestamp; first = i; } } if (buf[first].timestamp == 0) goto done; /* nothing in the log */ rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) goto done; sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) { rc = ENOMEM; goto done; } sbuf_printf(sb, "%10s %15s %8s %8s %s\n", "Seq#", "Tstamp", "Level", "Facility", "Message"); i = first; do { e = &buf[i]; if (e->timestamp == 0) break; /* end */ sbuf_printf(sb, "%10d %15ju %8s %8s ", e->seqno, e->timestamp, (e->level < nitems(devlog_level_strings) ? devlog_level_strings[e->level] : "UNKNOWN"), (e->facility < nitems(devlog_facility_strings) ? devlog_facility_strings[e->facility] : "UNKNOWN")); sbuf_printf(sb, e->fmt, e->params[0], e->params[1], e->params[2], e->params[3], e->params[4], e->params[5], e->params[6], e->params[7]); if (++i == nentries) i = 0; } while (i != first); rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_fcoe_stats stats[MAX_NCHAN]; int i, nchan = sc->chip_params->nchan; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); for (i = 0; i < nchan; i++) t4_get_fcoe_stats(sc, i, &stats[i], 1); if (nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3"); sbuf_printf(sb, "\noctetsDDP: %16ju %16ju %16ju %16ju", stats[0].octets_ddp, stats[1].octets_ddp, stats[2].octets_ddp, stats[3].octets_ddp); sbuf_printf(sb, "\nframesDDP: %16u %16u %16u %16u", stats[0].frames_ddp, stats[1].frames_ddp, stats[2].frames_ddp, stats[3].frames_ddp); sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u", stats[0].frames_drop, stats[1].frames_drop, stats[2].frames_drop, stats[3].frames_drop); } else { sbuf_printf(sb, " channel 0 channel 1"); sbuf_printf(sb, "\noctetsDDP: %16ju %16ju", stats[0].octets_ddp, stats[1].octets_ddp); sbuf_printf(sb, "\nframesDDP: %16u %16u", stats[0].frames_ddp, stats[1].frames_ddp); sbuf_printf(sb, "\nframesDrop: %16u %16u", stats[0].frames_drop, stats[1].frames_drop); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; unsigned int map, kbps, ipg, mode; unsigned int pace_tab[NTX_SCHED]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP); mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG)); t4_read_pace_tbl(sc, pace_tab); sbuf_printf(sb, "Scheduler Mode Channel Rate (Kbps) " "Class IPG (0.1 ns) Flow IPG (us)"); for (i = 0; i < NTX_SCHED; ++i, map >>= 2) { t4_get_tx_sched(sc, i, &kbps, &ipg, 1); sbuf_printf(sb, "\n %u %-5s %u ", i, (mode & (1 << i)) ? "flow" : "class", map & 3); if (kbps) sbuf_printf(sb, "%9u ", kbps); else sbuf_printf(sb, " disabled "); if (ipg) sbuf_printf(sb, "%13u ", ipg); else sbuf_printf(sb, " disabled "); if (pace_tab[i]) sbuf_printf(sb, "%10u", pace_tab[i]); else sbuf_printf(sb, " disabled"); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, j; uint64_t *p0, *p1; struct lb_port_stats s[2]; static const char *stat_name[] = { "OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:", "UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:", "Frames128To255:", "Frames256To511:", "Frames512To1023:", "Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:", "BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:", "BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:", "BG2FramesTrunc:", "BG3FramesTrunc:" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); memset(s, 0, sizeof(s)); for (i = 0; i < sc->chip_params->nchan; i += 2) { t4_get_lb_stats(sc, i, &s[0]); t4_get_lb_stats(sc, i + 1, &s[1]); p0 = &s[0].octets; p1 = &s[1].octets; sbuf_printf(sb, "%s Loopback %u" " Loopback %u", i == 0 ? "" : "\n", i, i + 1); for (j = 0; j < nitems(stat_name); j++) sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j], *p0++, *p1++); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS) { int rc = 0; struct port_info *pi = arg1; struct link_config *lc = &pi->link_cfg; struct sbuf *sb; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 64, req); if (sb == NULL) return (ENOMEM); if (lc->link_ok || lc->link_down_rc == 255) sbuf_printf(sb, "n/a"); else sbuf_printf(sb, "%s", t4_link_down_rc_str(lc->link_down_rc)); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } struct mem_desc { unsigned int base; unsigned int limit; unsigned int idx; }; static int mem_desc_cmp(const void *a, const void *b) { return ((const struct mem_desc *)a)->base - ((const struct mem_desc *)b)->base; } static void mem_region_show(struct sbuf *sb, const char *name, unsigned int from, unsigned int to) { unsigned int size; if (from == to) return; size = to - from + 1; if (size == 0) return; /* XXX: need humanize_number(3) in libkern for a more readable 'size' */ sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size); } static int sysctl_meminfo(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n; uint32_t lo, hi, used, alloc; static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"}; static const char *region[] = { "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:", "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:", "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:", "TDDP region:", "TPT region:", "STAG region:", "RQ region:", "RQUDP region:", "PBL region:", "TXPBL region:", "DBVFIFO region:", "ULPRX state:", "ULPTX state:", "On-chip queues:", "TLS keys:", }; struct mem_desc avail[4]; struct mem_desc mem[nitems(region) + 3]; /* up to 3 holes */ struct mem_desc *md = mem; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); for (i = 0; i < nitems(mem); i++) { mem[i].limit = 0; mem[i].idx = i; } /* Find and sort the populated memory ranges */ i = 0; lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); if (lo & F_EDRAM0_ENABLE) { hi = t4_read_reg(sc, A_MA_EDRAM0_BAR); avail[i].base = G_EDRAM0_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20); avail[i].idx = 0; i++; } if (lo & F_EDRAM1_ENABLE) { hi = t4_read_reg(sc, A_MA_EDRAM1_BAR); avail[i].base = G_EDRAM1_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20); avail[i].idx = 1; i++; } if (lo & F_EXT_MEM_ENABLE) { hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); avail[i].base = G_EXT_MEM_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EXT_MEM_SIZE(hi) << 20); avail[i].idx = is_t5(sc) ? 3 : 2; /* Call it MC0 for T5 */ i++; } if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) { hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); avail[i].base = G_EXT_MEM1_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EXT_MEM1_SIZE(hi) << 20); avail[i].idx = 4; i++; } if (!i) /* no memory available */ return 0; qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp); (md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR); (md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR); (md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR); (md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE); /* the next few have explicit upper bounds */ md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE); md->limit = md->base - 1 + t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) * G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE)); md++; md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE); md->limit = md->base - 1 + t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) * G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE)); md++; if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { if (chip_id(sc) <= CHELSIO_T5) md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE); else md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR); md->limit = 0; } else { md->base = 0; md->idx = nitems(region); /* hide it */ } md++; #define ulp_region(reg) \ md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\ (md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT) ulp_region(RX_ISCSI); ulp_region(RX_TDDP); ulp_region(TX_TPT); ulp_region(RX_STAG); ulp_region(RX_RQ); ulp_region(RX_RQUDP); ulp_region(RX_PBL); ulp_region(TX_PBL); #undef ulp_region md->base = 0; md->idx = nitems(region); if (!is_t4(sc)) { uint32_t size = 0; uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2); uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE); if (is_t5(sc)) { if (sge_ctrl & F_VFIFO_ENABLE) size = G_DBVFIFO_SIZE(fifo_size); } else size = G_T6_DBVFIFO_SIZE(fifo_size); if (size) { md->base = G_BASEADDR(t4_read_reg(sc, A_SGE_DBVFIFO_BADDR)); md->limit = md->base + (size << 2) - 1; } } md++; md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE); md->limit = 0; md++; md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE); md->limit = 0; md++; md->base = sc->vres.ocq.start; if (sc->vres.ocq.size) md->limit = md->base + sc->vres.ocq.size - 1; else md->idx = nitems(region); /* hide it */ md++; md->base = sc->vres.key.start; if (sc->vres.key.size) md->limit = md->base + sc->vres.key.size - 1; else md->idx = nitems(region); /* hide it */ md++; /* add any address-space holes, there can be up to 3 */ for (n = 0; n < i - 1; n++) if (avail[n].limit < avail[n + 1].base) (md++)->base = avail[n].limit; if (avail[n].limit) (md++)->base = avail[n].limit; n = md - mem; qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp); for (lo = 0; lo < i; lo++) mem_region_show(sb, memory[avail[lo].idx], avail[lo].base, avail[lo].limit - 1); sbuf_printf(sb, "\n"); for (i = 0; i < n; i++) { if (mem[i].idx >= nitems(region)) continue; /* skip holes */ if (!mem[i].limit) mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0; mem_region_show(sb, region[mem[i].idx], mem[i].base, mem[i].limit); } sbuf_printf(sb, "\n"); lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR); hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1; mem_region_show(sb, "uP RAM:", lo, hi); lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR); hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1; mem_region_show(sb, "uP Extmem2:", lo, hi); lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE); sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n", G_PMRXMAXPAGE(lo), t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10, (lo & F_PMRXNUMCHN) ? 2 : 1); lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE); hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE); sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n", G_PMTXMAXPAGE(lo), hi >= (1 << 20) ? (hi >> 20) : (hi >> 10), hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo)); sbuf_printf(sb, "%u p-structs\n", t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT)); for (i = 0; i < 4; i++) { if (chip_id(sc) > CHELSIO_T5) lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4); else lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4); if (is_t5(sc)) { used = G_T5_USED(lo); alloc = G_T5_ALLOC(lo); } else { used = G_USED(lo); alloc = G_ALLOC(lo); } /* For T6 these are MAC buffer groups */ sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated", i, used, alloc); } for (i = 0; i < sc->chip_params->nchan; i++) { if (chip_id(sc) > CHELSIO_T5) lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4); else lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4); if (is_t5(sc)) { used = G_T5_USED(lo); alloc = G_T5_ALLOC(lo); } else { used = G_USED(lo); alloc = G_ALLOC(lo); } /* For T6 these are MAC buffer groups */ sbuf_printf(sb, "\nLoopback %d using %u pages out of %u allocated", i, used, alloc); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static inline void tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask) { *mask = x | y; y = htobe64(y); memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN); } static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; MPASS(chip_id(sc) <= CHELSIO_T5); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Idx Ethernet address Mask Vld Ports PF" " VF Replication P0 P1 P2 P3 ML"); for (i = 0; i < sc->chip_params->mps_tcam_size; i++) { uint64_t tcamx, tcamy, mask; uint32_t cls_lo, cls_hi; uint8_t addr[ETHER_ADDR_LEN]; tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i)); tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i)); if (tcamx & tcamy) continue; tcamxy2valmask(tcamx, tcamy, addr, &mask); cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i)); cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i)); sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx" " %c %#x%4u%4d", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask, (cls_lo & F_SRAM_VLD) ? 'Y' : 'N', G_PORTMAP(cls_hi), G_PF(cls_lo), (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1); if (cls_lo & F_REPLICATE) { struct fw_ldst_cmd ldst_cmd; memset(&ldst_cmd, 0, sizeof(ldst_cmd)); ldst_cmd.op_to_addrspace = htobe32(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ | V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS)); ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd)); ldst_cmd.u.mps.rplc.fid_idx = htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) | V_FW_LDST_CMD_IDX(i)); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mps"); if (rc) break; rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd, sizeof(ldst_cmd), &ldst_cmd); end_synchronized_op(sc, 0); if (rc != 0) { sbuf_printf(sb, "%36d", rc); rc = 0; } else { sbuf_printf(sb, " %08x %08x %08x %08x", be32toh(ldst_cmd.u.mps.rplc.rplc127_96), be32toh(ldst_cmd.u.mps.rplc.rplc95_64), be32toh(ldst_cmd.u.mps.rplc.rplc63_32), be32toh(ldst_cmd.u.mps.rplc.rplc31_0)); } } else sbuf_printf(sb, "%36s", ""); sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo), G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo), G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf); } if (rc) (void) sbuf_finish(sb); else rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; MPASS(chip_id(sc) > CHELSIO_T5); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Idx Ethernet address Mask VNI Mask" " IVLAN Vld DIP_Hit Lookup Port Vld Ports PF VF" " Replication" " P0 P1 P2 P3 ML\n"); for (i = 0; i < sc->chip_params->mps_tcam_size; i++) { uint8_t dip_hit, vlan_vld, lookup_type, port_num; uint16_t ivlan; uint64_t tcamx, tcamy, val, mask; uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy; uint8_t addr[ETHER_ADDR_LEN]; ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0); if (i < 256) ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0); else ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1); t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl); val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1); tcamy = G_DMACH(val) << 32; tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1); data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1); lookup_type = G_DATALKPTYPE(data2); port_num = G_DATAPORTNUM(data2); if (lookup_type && lookup_type != M_DATALKPTYPE) { /* Inner header VNI */ vniy = ((data2 & F_DATAVIDH2) << 23) | (G_DATAVIDH1(data2) << 16) | G_VIDL(val); dip_hit = data2 & F_DATADIPHIT; vlan_vld = 0; } else { vniy = 0; dip_hit = 0; vlan_vld = data2 & F_DATAVIDH2; ivlan = G_VIDL(val); } ctl |= V_CTLXYBITSEL(1); t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl); val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1); tcamx = G_DMACH(val) << 32; tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1); data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1); if (lookup_type && lookup_type != M_DATALKPTYPE) { /* Inner header VNI mask */ vnix = ((data2 & F_DATAVIDH2) << 23) | (G_DATAVIDH1(data2) << 16) | G_VIDL(val); } else vnix = 0; if (tcamx & tcamy) continue; tcamxy2valmask(tcamx, tcamy, addr, &mask); cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i)); cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i)); if (lookup_type && lookup_type != M_DATALKPTYPE) { sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x " "%012jx %06x %06x - - %3c" " 'I' %4x %3c %#x%4u%4d", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N', port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N', G_PORTMAP(cls_hi), G_T6_PF(cls_lo), cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1); } else { sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x " "%012jx - - ", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask); if (vlan_vld) sbuf_printf(sb, "%4u Y ", ivlan); else sbuf_printf(sb, " - N "); sbuf_printf(sb, "- %3c %4x %3c %#x%4u%4d", lookup_type ? 'I' : 'O', port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N', G_PORTMAP(cls_hi), G_T6_PF(cls_lo), cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1); } if (cls_lo & F_T6_REPLICATE) { struct fw_ldst_cmd ldst_cmd; memset(&ldst_cmd, 0, sizeof(ldst_cmd)); ldst_cmd.op_to_addrspace = htobe32(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ | V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS)); ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd)); ldst_cmd.u.mps.rplc.fid_idx = htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) | V_FW_LDST_CMD_IDX(i)); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t6mps"); if (rc) break; rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd, sizeof(ldst_cmd), &ldst_cmd); end_synchronized_op(sc, 0); if (rc != 0) { sbuf_printf(sb, "%72d", rc); rc = 0; } else { sbuf_printf(sb, " %08x %08x %08x %08x" " %08x %08x %08x %08x", be32toh(ldst_cmd.u.mps.rplc.rplc255_224), be32toh(ldst_cmd.u.mps.rplc.rplc223_192), be32toh(ldst_cmd.u.mps.rplc.rplc191_160), be32toh(ldst_cmd.u.mps.rplc.rplc159_128), be32toh(ldst_cmd.u.mps.rplc.rplc127_96), be32toh(ldst_cmd.u.mps.rplc.rplc95_64), be32toh(ldst_cmd.u.mps.rplc.rplc63_32), be32toh(ldst_cmd.u.mps.rplc.rplc31_0)); } } else sbuf_printf(sb, "%72s", ""); sbuf_printf(sb, "%4u%3u%3u%3u %#x", G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo), G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo), (cls_lo >> S_T6_MULTILISTEN0) & 0xf); } if (rc) (void) sbuf_finish(sb); else rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; uint16_t mtus[NMTUS]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_read_mtu_tbl(sc, mtus, NULL); sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u", mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6], mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13], mtus[14], mtus[15]); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS]; uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS]; static const char *tx_stats[MAX_PM_NSTATS] = { "Read:", "Write bypass:", "Write mem:", "Bypass + mem:", "Tx FIFO wait", NULL, "Tx latency" }; static const char *rx_stats[MAX_PM_NSTATS] = { "Read:", "Write bypass:", "Write mem:", "Flush:", "Rx FIFO wait", NULL, "Rx latency" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_pmtx_get_stats(sc, tx_cnt, tx_cyc); t4_pmrx_get_stats(sc, rx_cnt, rx_cyc); sbuf_printf(sb, " Tx pcmds Tx bytes"); for (i = 0; i < 4; i++) { sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); } sbuf_printf(sb, "\n Rx pcmds Rx bytes"); for (i = 0; i < 4; i++) { sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); } if (chip_id(sc) > CHELSIO_T5) { sbuf_printf(sb, "\n Total wait Total occupancy"); sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); i += 2; MPASS(i < nitems(tx_stats)); sbuf_printf(sb, "\n Reads Total wait"); sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_rdma_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_rdma_stats(sc, &stats, 0); mtx_unlock(&sc->reg_lock); sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod); sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_tcp_stats v4, v6; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_tcp_stats(sc, &v4, &v6, 0); mtx_unlock(&sc->reg_lock); sbuf_printf(sb, " IP IPv6\n"); sbuf_printf(sb, "OutRsts: %20u %20u\n", v4.tcp_out_rsts, v6.tcp_out_rsts); sbuf_printf(sb, "InSegs: %20ju %20ju\n", v4.tcp_in_segs, v6.tcp_in_segs); sbuf_printf(sb, "OutSegs: %20ju %20ju\n", v4.tcp_out_segs, v6.tcp_out_segs); sbuf_printf(sb, "RetransSegs: %20ju %20ju", v4.tcp_retrans_segs, v6.tcp_retrans_segs); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tids(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tid_info *t = &sc->tids; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); if (t->natids) { sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1, t->atids_in_use); } if (t->ntids) { sbuf_printf(sb, "TID range: "); if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { uint32_t b, hb; if (chip_id(sc) <= CHELSIO_T5) { b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4; hb = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4; } else { b = t4_read_reg(sc, A_LE_DB_SRVR_START_INDEX); hb = t4_read_reg(sc, A_T6_LE_DB_HASH_TID_BASE); } if (b) sbuf_printf(sb, "0-%u, ", b - 1); sbuf_printf(sb, "%u-%u", hb, t->ntids - 1); } else sbuf_printf(sb, "0-%u", t->ntids - 1); sbuf_printf(sb, ", in use: %u\n", atomic_load_acq_int(&t->tids_in_use)); } if (t->nstids) { sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base, t->stid_base + t->nstids - 1, t->stids_in_use); } if (t->nftids) { sbuf_printf(sb, "FTID range: %u-%u\n", t->ftid_base, t->ftid_base + t->nftids - 1); } if (t->netids) { sbuf_printf(sb, "ETID range: %u-%u\n", t->etid_base, t->etid_base + t->netids - 1); } sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users", t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4), t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6)); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_err_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_err_stats(sc, &stats, 0); mtx_unlock(&sc->reg_lock); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3\n"); sbuf_printf(sb, "macInErrs: %10u %10u %10u %10u\n", stats.mac_in_errs[0], stats.mac_in_errs[1], stats.mac_in_errs[2], stats.mac_in_errs[3]); sbuf_printf(sb, "hdrInErrs: %10u %10u %10u %10u\n", stats.hdr_in_errs[0], stats.hdr_in_errs[1], stats.hdr_in_errs[2], stats.hdr_in_errs[3]); sbuf_printf(sb, "tcpInErrs: %10u %10u %10u %10u\n", stats.tcp_in_errs[0], stats.tcp_in_errs[1], stats.tcp_in_errs[2], stats.tcp_in_errs[3]); sbuf_printf(sb, "tcp6InErrs: %10u %10u %10u %10u\n", stats.tcp6_in_errs[0], stats.tcp6_in_errs[1], stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]); sbuf_printf(sb, "tnlCongDrops: %10u %10u %10u %10u\n", stats.tnl_cong_drops[0], stats.tnl_cong_drops[1], stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]); sbuf_printf(sb, "tnlTxDrops: %10u %10u %10u %10u\n", stats.tnl_tx_drops[0], stats.tnl_tx_drops[1], stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]); sbuf_printf(sb, "ofldVlanDrops: %10u %10u %10u %10u\n", stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1], stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]); sbuf_printf(sb, "ofldChanDrops: %10u %10u %10u %10u\n\n", stats.ofld_chan_drops[0], stats.ofld_chan_drops[1], stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]); } else { sbuf_printf(sb, " channel 0 channel 1\n"); sbuf_printf(sb, "macInErrs: %10u %10u\n", stats.mac_in_errs[0], stats.mac_in_errs[1]); sbuf_printf(sb, "hdrInErrs: %10u %10u\n", stats.hdr_in_errs[0], stats.hdr_in_errs[1]); sbuf_printf(sb, "tcpInErrs: %10u %10u\n", stats.tcp_in_errs[0], stats.tcp_in_errs[1]); sbuf_printf(sb, "tcp6InErrs: %10u %10u\n", stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]); sbuf_printf(sb, "tnlCongDrops: %10u %10u\n", stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]); sbuf_printf(sb, "tnlTxDrops: %10u %10u\n", stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]); sbuf_printf(sb, "ofldVlanDrops: %10u %10u\n", stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]); sbuf_printf(sb, "ofldChanDrops: %10u %10u\n\n", stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]); } sbuf_printf(sb, "ofldNoNeigh: %u\nofldCongDefer: %u", stats.ofld_no_neigh, stats.ofld_cong_defer); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct tp_params *tpp = &sc->params.tp; u_int mask; int rc; mask = tpp->la_mask >> 16; rc = sysctl_handle_int(oidp, &mask, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (mask > 0xffff) return (EINVAL); tpp->la_mask = mask << 16; t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask); return (0); } struct field_desc { const char *name; u_int start; u_int width; }; static void field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f) { char buf[32]; int line_size = 0; while (f->name) { uint64_t mask = (1ULL << f->width) - 1; int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name, ((uintmax_t)v >> f->start) & mask); if (line_size + len >= 79) { line_size = 8; sbuf_printf(sb, "\n "); } sbuf_printf(sb, "%s ", buf); line_size += len + 1; f++; } sbuf_printf(sb, "\n"); } static const struct field_desc tp_la0[] = { { "RcfOpCodeOut", 60, 4 }, { "State", 56, 4 }, { "WcfState", 52, 4 }, { "RcfOpcSrcOut", 50, 2 }, { "CRxError", 49, 1 }, { "ERxError", 48, 1 }, { "SanityFailed", 47, 1 }, { "SpuriousMsg", 46, 1 }, { "FlushInputMsg", 45, 1 }, { "FlushInputCpl", 44, 1 }, { "RssUpBit", 43, 1 }, { "RssFilterHit", 42, 1 }, { "Tid", 32, 10 }, { "InitTcb", 31, 1 }, { "LineNumber", 24, 7 }, { "Emsg", 23, 1 }, { "EdataOut", 22, 1 }, { "Cmsg", 21, 1 }, { "CdataOut", 20, 1 }, { "EreadPdu", 19, 1 }, { "CreadPdu", 18, 1 }, { "TunnelPkt", 17, 1 }, { "RcfPeerFin", 16, 1 }, { "RcfReasonOut", 12, 4 }, { "TxCchannel", 10, 2 }, { "RcfTxChannel", 8, 2 }, { "RxEchannel", 6, 2 }, { "RcfRxChannel", 5, 1 }, { "RcfDataOutSrdy", 4, 1 }, { "RxDvld", 3, 1 }, { "RxOoDvld", 2, 1 }, { "RxCongestion", 1, 1 }, { "TxCongestion", 0, 1 }, { NULL } }; static const struct field_desc tp_la1[] = { { "CplCmdIn", 56, 8 }, { "CplCmdOut", 48, 8 }, { "ESynOut", 47, 1 }, { "EAckOut", 46, 1 }, { "EFinOut", 45, 1 }, { "ERstOut", 44, 1 }, { "SynIn", 43, 1 }, { "AckIn", 42, 1 }, { "FinIn", 41, 1 }, { "RstIn", 40, 1 }, { "DataIn", 39, 1 }, { "DataInVld", 38, 1 }, { "PadIn", 37, 1 }, { "RxBufEmpty", 36, 1 }, { "RxDdp", 35, 1 }, { "RxFbCongestion", 34, 1 }, { "TxFbCongestion", 33, 1 }, { "TxPktSumSrdy", 32, 1 }, { "RcfUlpType", 28, 4 }, { "Eread", 27, 1 }, { "Ebypass", 26, 1 }, { "Esave", 25, 1 }, { "Static0", 24, 1 }, { "Cread", 23, 1 }, { "Cbypass", 22, 1 }, { "Csave", 21, 1 }, { "CPktOut", 20, 1 }, { "RxPagePoolFull", 18, 2 }, { "RxLpbkPkt", 17, 1 }, { "TxLpbkPkt", 16, 1 }, { "RxVfValid", 15, 1 }, { "SynLearned", 14, 1 }, { "SetDelEntry", 13, 1 }, { "SetInvEntry", 12, 1 }, { "CpcmdDvld", 11, 1 }, { "CpcmdSave", 10, 1 }, { "RxPstructsFull", 8, 2 }, { "EpcmdDvld", 7, 1 }, { "EpcmdFlush", 6, 1 }, { "EpcmdTrimPrefix", 5, 1 }, { "EpcmdTrimPostfix", 4, 1 }, { "ERssIp4Pkt", 3, 1 }, { "ERssIp6Pkt", 2, 1 }, { "ERssTcpUdpPkt", 1, 1 }, { "ERssFceFipPkt", 0, 1 }, { NULL } }; static const struct field_desc tp_la2[] = { { "CplCmdIn", 56, 8 }, { "MpsVfVld", 55, 1 }, { "MpsPf", 52, 3 }, { "MpsVf", 44, 8 }, { "SynIn", 43, 1 }, { "AckIn", 42, 1 }, { "FinIn", 41, 1 }, { "RstIn", 40, 1 }, { "DataIn", 39, 1 }, { "DataInVld", 38, 1 }, { "PadIn", 37, 1 }, { "RxBufEmpty", 36, 1 }, { "RxDdp", 35, 1 }, { "RxFbCongestion", 34, 1 }, { "TxFbCongestion", 33, 1 }, { "TxPktSumSrdy", 32, 1 }, { "RcfUlpType", 28, 4 }, { "Eread", 27, 1 }, { "Ebypass", 26, 1 }, { "Esave", 25, 1 }, { "Static0", 24, 1 }, { "Cread", 23, 1 }, { "Cbypass", 22, 1 }, { "Csave", 21, 1 }, { "CPktOut", 20, 1 }, { "RxPagePoolFull", 18, 2 }, { "RxLpbkPkt", 17, 1 }, { "TxLpbkPkt", 16, 1 }, { "RxVfValid", 15, 1 }, { "SynLearned", 14, 1 }, { "SetDelEntry", 13, 1 }, { "SetInvEntry", 12, 1 }, { "CpcmdDvld", 11, 1 }, { "CpcmdSave", 10, 1 }, { "RxPstructsFull", 8, 2 }, { "EpcmdDvld", 7, 1 }, { "EpcmdFlush", 6, 1 }, { "EpcmdTrimPrefix", 5, 1 }, { "EpcmdTrimPostfix", 4, 1 }, { "ERssIp4Pkt", 3, 1 }, { "ERssIp6Pkt", 2, 1 }, { "ERssTcpUdpPkt", 1, 1 }, { "ERssFceFipPkt", 0, 1 }, { NULL } }; static void tp_la_show(struct sbuf *sb, uint64_t *p, int idx) { field_desc_show(sb, *p, tp_la0); } static void tp_la_show2(struct sbuf *sb, uint64_t *p, int idx) { if (idx) sbuf_printf(sb, "\n"); field_desc_show(sb, p[0], tp_la0); if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL) field_desc_show(sb, p[1], tp_la0); } static void tp_la_show3(struct sbuf *sb, uint64_t *p, int idx) { if (idx) sbuf_printf(sb, "\n"); field_desc_show(sb, p[0], tp_la0); if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL) field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1); } static int sysctl_tp_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; uint64_t *buf, *p; int rc; u_int i, inc; void (*show_func)(struct sbuf *, uint64_t *, int); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK); t4_tp_read_la(sc, buf, NULL); p = buf; switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) { case 2: inc = 2; show_func = tp_la_show2; break; case 3: inc = 2; show_func = tp_la_show3; break; default: inc = 1; show_func = tp_la_show; } for (i = 0; i < TPLA_SIZE / inc; i++, p += inc) (*show_func)(sb, p, i); rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; u64 nrate[MAX_NCHAN], orate[MAX_NCHAN]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_chan_txrate(sc, nrate, orate); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3\n"); sbuf_printf(sb, "NIC B/s: %10ju %10ju %10ju %10ju\n", nrate[0], nrate[1], nrate[2], nrate[3]); sbuf_printf(sb, "Offload B/s: %10ju %10ju %10ju %10ju", orate[0], orate[1], orate[2], orate[3]); } else { sbuf_printf(sb, " channel 0 channel 1\n"); sbuf_printf(sb, "NIC B/s: %10ju %10ju\n", nrate[0], nrate[1]); sbuf_printf(sb, "Offload B/s: %10ju %10ju", orate[0], orate[1]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; uint32_t *buf, *p; int rc, i; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_ulprx_read_la(sc, buf); p = buf; sbuf_printf(sb, " Pcmd Type Message" " Data"); for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) { sbuf_printf(sb, "\n%08x%08x %4x %08x %08x%08x%08x%08x", p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, v; MPASS(chip_id(sc) >= CHELSIO_T5); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); v = t4_read_reg(sc, A_SGE_STAT_CFG); if (G_STATSOURCE_T5(v) == 7) { int mode; mode = is_t5(sc) ? G_STATMODE(v) : G_T6_STATMODE(v); if (mode == 0) { sbuf_printf(sb, "total %d, incomplete %d", t4_read_reg(sc, A_SGE_STAT_TOTAL), t4_read_reg(sc, A_SGE_STAT_MATCH)); } else if (mode == 1) { sbuf_printf(sb, "total %d, data overflow %d", t4_read_reg(sc, A_SGE_STAT_TOTAL), t4_read_reg(sc, A_SGE_STAT_MATCH)); } else { sbuf_printf(sb, "unknown mode %d", mode); } } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tc_params(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct tx_cl_rl_params tc; struct sbuf *sb; int i, rc, port_id, mbps, gbps; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); port_id = arg2 >> 16; MPASS(port_id < sc->params.nports); MPASS(sc->port[port_id] != NULL); i = arg2 & 0xffff; MPASS(i < sc->chip_params->nsched_cls); mtx_lock(&sc->tc_lock); tc = sc->port[port_id]->sched_params->cl_rl[i]; mtx_unlock(&sc->tc_lock); if (tc.flags & TX_CLRL_ERROR) { sbuf_printf(sb, "error"); goto done; } if (tc.ratemode == SCHED_CLASS_RATEMODE_REL) { /* XXX: top speed or actual link speed? */ gbps = port_top_speed(sc->port[port_id]); sbuf_printf(sb, " %u%% of %uGbps", tc.maxrate, gbps); } else if (tc.ratemode == SCHED_CLASS_RATEMODE_ABS) { switch (tc.rateunit) { case SCHED_CLASS_RATEUNIT_BITS: mbps = tc.maxrate / 1000; gbps = tc.maxrate / 1000000; if (tc.maxrate == gbps * 1000000) sbuf_printf(sb, " %uGbps", gbps); else if (tc.maxrate == mbps * 1000) sbuf_printf(sb, " %uMbps", mbps); else sbuf_printf(sb, " %uKbps", tc.maxrate); break; case SCHED_CLASS_RATEUNIT_PKTS: sbuf_printf(sb, " %upps", tc.maxrate); break; default: rc = ENXIO; goto done; } } switch (tc.mode) { case SCHED_CLASS_MODE_CLASS: sbuf_printf(sb, " aggregate"); break; case SCHED_CLASS_MODE_FLOW: sbuf_printf(sb, " per-flow"); break; default: rc = ENXIO; goto done; } done: if (rc == 0) rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } #endif #ifdef TCP_OFFLOAD static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int *old_ports, *new_ports; int i, new_count, rc; if (req->newptr == NULL && req->oldptr == NULL) return (SYSCTL_OUT(req, NULL, imax(sc->tt.num_tls_rx_ports, 1) * sizeof(sc->tt.tls_rx_ports[0]))); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tlsrx"); if (rc) return (rc); if (sc->tt.num_tls_rx_ports == 0) { i = -1; rc = SYSCTL_OUT(req, &i, sizeof(i)); } else rc = SYSCTL_OUT(req, sc->tt.tls_rx_ports, sc->tt.num_tls_rx_ports * sizeof(sc->tt.tls_rx_ports[0])); if (rc == 0 && req->newptr != NULL) { new_count = req->newlen / sizeof(new_ports[0]); new_ports = malloc(new_count * sizeof(new_ports[0]), M_CXGBE, M_WAITOK); rc = SYSCTL_IN(req, new_ports, new_count * sizeof(new_ports[0])); if (rc) goto err; /* Allow setting to a single '-1' to clear the list. */ if (new_count == 1 && new_ports[0] == -1) { ADAPTER_LOCK(sc); old_ports = sc->tt.tls_rx_ports; sc->tt.tls_rx_ports = NULL; sc->tt.num_tls_rx_ports = 0; ADAPTER_UNLOCK(sc); free(old_ports, M_CXGBE); } else { for (i = 0; i < new_count; i++) { if (new_ports[i] < 1 || new_ports[i] > IPPORT_MAX) { rc = EINVAL; goto err; } } ADAPTER_LOCK(sc); old_ports = sc->tt.tls_rx_ports; sc->tt.tls_rx_ports = new_ports; sc->tt.num_tls_rx_ports = new_count; ADAPTER_UNLOCK(sc); free(old_ports, M_CXGBE); new_ports = NULL; } err: free(new_ports, M_CXGBE); } end_synchronized_op(sc, 0); return (rc); } static void unit_conv(char *buf, size_t len, u_int val, u_int factor) { u_int rem = val % factor; if (rem == 0) snprintf(buf, len, "%u", val / factor); else { while (rem % 10 == 0) rem /= 10; snprintf(buf, len, "%u.%u", val / factor, rem); } } static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; char buf[16]; u_int res, re; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION); switch (arg2) { case 0: /* timer_tick */ re = G_TIMERRESOLUTION(res); break; case 1: /* TCP timestamp tick */ re = G_TIMESTAMPRESOLUTION(res); break; case 2: /* DACK tick */ re = G_DELAYEDACKRESOLUTION(res); break; default: return (EDOOFUS); } unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000); return (sysctl_handle_string(oidp, buf, sizeof(buf), req)); } static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int res, dack_re, v; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION); dack_re = G_DELAYEDACKRESOLUTION(res); v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER); return (sysctl_handle_int(oidp, &v, 0, req)); } static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int reg = arg2; u_int tre; u_long tp_tick_us, v; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX || reg == A_TP_PERS_MIN || reg == A_TP_PERS_MAX || reg == A_TP_KEEP_IDLE || reg == A_TP_KEEP_INTVL || reg == A_TP_INIT_SRTT || reg == A_TP_FINWAIT2_TIMER); tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION)); tp_tick_us = (cclk_ps << tre) / 1000000; if (reg == A_TP_INIT_SRTT) v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg)); else v = tp_tick_us * t4_read_reg(sc, reg); return (sysctl_handle_long(oidp, &v, 0, req)); } /* * All fields in TP_SHIFT_CNT are 4b and the starting location of the field is * passed to this function. */ static int sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int idx = arg2; u_int v; MPASS(idx >= 0 && idx <= 24); v = (t4_read_reg(sc, A_TP_SHIFT_CNT) >> idx) & 0xf; return (sysctl_handle_int(oidp, &v, 0, req)); } static int sysctl_tp_backoff(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int idx = arg2; u_int shift, v, r; MPASS(idx >= 0 && idx < 16); r = A_TP_TCP_BACKOFF_REG0 + (idx & ~3); shift = (idx & 3) << 3; v = (t4_read_reg(sc, r) >> shift) & M_TIMERBACKOFFINDEX0; return (sysctl_handle_int(oidp, &v, 0, req)); } static int sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc, i; struct sge_ofld_rxq *ofld_rxq; uint8_t v; idx = vi->ofld_tmr_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < 0 || idx >= SGE_NTIMERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4otmr"); if (rc) return (rc); v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->ofld_pktc_idx != -1); for_each_ofld_rxq(vi, i, ofld_rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&ofld_rxq->iq.intr_params, v); #else ofld_rxq->iq.intr_params = v; #endif } vi->ofld_tmr_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (0); } static int sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc; idx = vi->ofld_pktc_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < -1 || idx >= SGE_NCOUNTERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4opktc"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->ofld_pktc_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (rc); } #endif static int get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt) { int rc; if (cntxt->cid > M_CTXTQID) return (EINVAL); if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS && cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM) return (EINVAL); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt"); if (rc) return (rc); if (sc->flags & FW_OK) { rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id, &cntxt->data[0]); if (rc == 0) goto done; } /* * Read via firmware failed or wasn't even attempted. Read directly via * the backdoor. */ rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]); done: end_synchronized_op(sc, 0); return (rc); } static int load_fw(struct adapter *sc, struct t4_data *fw) { int rc; uint8_t *fw_data; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw"); if (rc) return (rc); /* * The firmware, with the sole exception of the memory parity error * handler, runs from memory and not flash. It is almost always safe to * install a new firmware on a running system. Just set bit 1 in * hw.cxgbe.dflags or dev...dflags first. */ if (sc->flags & FULL_INIT_DONE && (sc->debug_flags & DF_LOAD_FW_ANYTIME) == 0) { rc = EBUSY; goto done; } fw_data = malloc(fw->len, M_CXGBE, M_WAITOK); if (fw_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(fw->data, fw_data, fw->len); if (rc == 0) rc = -t4_load_fw(sc, fw_data, fw->len); free(fw_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int load_cfg(struct adapter *sc, struct t4_data *cfg) { int rc; uint8_t *cfg_data = NULL; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf"); if (rc) return (rc); if (cfg->len == 0) { /* clear */ rc = -t4_load_cfg(sc, NULL, 0); goto done; } cfg_data = malloc(cfg->len, M_CXGBE, M_WAITOK); if (cfg_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(cfg->data, cfg_data, cfg->len); if (rc == 0) rc = -t4_load_cfg(sc, cfg_data, cfg->len); free(cfg_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int load_boot(struct adapter *sc, struct t4_bootrom *br) { int rc; uint8_t *br_data = NULL; u_int offset; if (br->len > 1024 * 1024) return (EFBIG); if (br->pf_offset == 0) { /* pfidx */ if (br->pfidx_addr > 7) return (EINVAL); offset = G_OFFSET(t4_read_reg(sc, PF_REG(br->pfidx_addr, A_PCIE_PF_EXPROM_OFST))); } else if (br->pf_offset == 1) { /* offset */ offset = G_OFFSET(br->pfidx_addr); } else { return (EINVAL); } rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldbr"); if (rc) return (rc); if (br->len == 0) { /* clear */ rc = -t4_load_boot(sc, NULL, offset, 0); goto done; } br_data = malloc(br->len, M_CXGBE, M_WAITOK); if (br_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(br->data, br_data, br->len); if (rc == 0) rc = -t4_load_boot(sc, br_data, offset, br->len); free(br_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int load_bootcfg(struct adapter *sc, struct t4_data *bc) { int rc; uint8_t *bc_data = NULL; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf"); if (rc) return (rc); if (bc->len == 0) { /* clear */ rc = -t4_load_bootcfg(sc, NULL, 0); goto done; } bc_data = malloc(bc->len, M_CXGBE, M_WAITOK); if (bc_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(bc->data, bc_data, bc->len); if (rc == 0) rc = -t4_load_bootcfg(sc, bc_data, bc->len); free(bc_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int cudbg_dump(struct adapter *sc, struct t4_cudbg_dump *dump) { int rc; struct cudbg_init *cudbg; void *handle, *buf; /* buf is large, don't block if no memory is available */ buf = malloc(dump->len, M_CXGBE, M_NOWAIT | M_ZERO); if (buf == NULL) return (ENOMEM); handle = cudbg_alloc_handle(); if (handle == NULL) { rc = ENOMEM; goto done; } cudbg = cudbg_get_init(handle); cudbg->adap = sc; cudbg->print = (cudbg_print_cb)printf; #ifndef notyet device_printf(sc->dev, "%s: wr_flash %u, len %u, data %p.\n", __func__, dump->wr_flash, dump->len, dump->data); #endif if (dump->wr_flash) cudbg->use_flash = 1; MPASS(sizeof(cudbg->dbg_bitmap) == sizeof(dump->bitmap)); memcpy(cudbg->dbg_bitmap, dump->bitmap, sizeof(cudbg->dbg_bitmap)); rc = cudbg_collect(handle, buf, &dump->len); if (rc != 0) goto done; rc = copyout(buf, dump->data, dump->len); done: cudbg_free_handle(handle); free(buf, M_CXGBE); return (rc); } static void free_offload_policy(struct t4_offload_policy *op) { struct offload_rule *r; int i; if (op == NULL) return; r = &op->rule[0]; for (i = 0; i < op->nrules; i++, r++) { free(r->bpf_prog.bf_insns, M_CXGBE); } free(op->rule, M_CXGBE); free(op, M_CXGBE); } static int set_offload_policy(struct adapter *sc, struct t4_offload_policy *uop) { int i, rc, len; struct t4_offload_policy *op, *old; struct bpf_program *bf; const struct offload_settings *s; struct offload_rule *r; void *u; if (!is_offload(sc)) return (ENODEV); if (uop->nrules == 0) { /* Delete installed policies. */ op = NULL; goto set_policy; } if (uop->nrules > 256) { /* arbitrary */ return (E2BIG); } /* Copy userspace offload policy to kernel */ op = malloc(sizeof(*op), M_CXGBE, M_ZERO | M_WAITOK); op->nrules = uop->nrules; len = op->nrules * sizeof(struct offload_rule); op->rule = malloc(len, M_CXGBE, M_ZERO | M_WAITOK); rc = copyin(uop->rule, op->rule, len); if (rc) { free(op->rule, M_CXGBE); free(op, M_CXGBE); return (rc); } r = &op->rule[0]; for (i = 0; i < op->nrules; i++, r++) { /* Validate open_type */ if (r->open_type != OPEN_TYPE_LISTEN && r->open_type != OPEN_TYPE_ACTIVE && r->open_type != OPEN_TYPE_PASSIVE && r->open_type != OPEN_TYPE_DONTCARE) { error: /* * Rules 0 to i have malloc'd filters that need to be * freed. Rules i+1 to nrules have userspace pointers * and should be left alone. */ op->nrules = i; free_offload_policy(op); return (rc); } /* Validate settings */ s = &r->settings; if ((s->offload != 0 && s->offload != 1) || s->cong_algo < -1 || s->cong_algo > CONG_ALG_HIGHSPEED || s->sched_class < -1 || s->sched_class >= sc->chip_params->nsched_cls) { rc = EINVAL; goto error; } bf = &r->bpf_prog; u = bf->bf_insns; /* userspace ptr */ bf->bf_insns = NULL; if (bf->bf_len == 0) { /* legal, matches everything */ continue; } len = bf->bf_len * sizeof(*bf->bf_insns); bf->bf_insns = malloc(len, M_CXGBE, M_ZERO | M_WAITOK); rc = copyin(u, bf->bf_insns, len); if (rc != 0) goto error; if (!bpf_validate(bf->bf_insns, bf->bf_len)) { rc = EINVAL; goto error; } } set_policy: rw_wlock(&sc->policy_lock); old = sc->policy; sc->policy = op; rw_wunlock(&sc->policy_lock); free_offload_policy(old); return (0); } #define MAX_READ_BUF_SIZE (128 * 1024) static int read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr) { uint32_t addr, remaining, n; uint32_t *buf; int rc; uint8_t *dst; rc = validate_mem_range(sc, mr->addr, mr->len); if (rc != 0) return (rc); buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK); addr = mr->addr; remaining = mr->len; dst = (void *)mr->data; while (remaining) { n = min(remaining, MAX_READ_BUF_SIZE); read_via_memwin(sc, 2, addr, buf, n); rc = copyout(buf, dst, n); if (rc != 0) break; dst += n; remaining -= n; addr += n; } free(buf, M_CXGBE); return (rc); } #undef MAX_READ_BUF_SIZE static int read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd) { int rc; if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports) return (EINVAL); if (i2cd->len > sizeof(i2cd->data)) return (EFBIG); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd"); if (rc) return (rc); rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr, i2cd->offset, i2cd->len, &i2cd->data[0]); end_synchronized_op(sc, 0); return (rc); } int t4_os_find_pci_capability(struct adapter *sc, int cap) { int i; return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0); } int t4_os_pci_save_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_save(dev, dinfo, 0); return (0); } int t4_os_pci_restore_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_restore(dev, dinfo); return (0); } void t4_os_portmod_changed(struct port_info *pi) { struct adapter *sc = pi->adapter; struct vi_info *vi; struct ifnet *ifp; static const char *mod_str[] = { NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM" }; PORT_LOCK(pi); build_medialist(pi, &pi->media); PORT_UNLOCK(pi); vi = &pi->vi[0]; if (begin_synchronized_op(sc, vi, HOLD_LOCK, "t4mod") == 0) { init_l1cfg(pi); end_synchronized_op(sc, LOCK_HELD); } ifp = vi->ifp; if (pi->mod_type == FW_PORT_MOD_TYPE_NONE) if_printf(ifp, "transceiver unplugged.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN) if_printf(ifp, "unknown transceiver inserted.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED) if_printf(ifp, "unsupported transceiver inserted.\n"); else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) { if_printf(ifp, "%dGbps %s transceiver inserted.\n", port_top_speed(pi), mod_str[pi->mod_type]); } else { if_printf(ifp, "transceiver (type %d) inserted.\n", pi->mod_type); } } void t4_os_link_changed(struct port_info *pi) { struct vi_info *vi; struct ifnet *ifp; struct link_config *lc; int v; for_each_vi(pi, v, vi) { ifp = vi->ifp; if (ifp == NULL) continue; lc = &pi->link_cfg; if (lc->link_ok) { ifp->if_baudrate = IF_Mbps(lc->speed); if_link_state_change(ifp, LINK_STATE_UP); } else { if_link_state_change(ifp, LINK_STATE_DOWN); } } } void t4_iterate(void (*func)(struct adapter *, void *), void *arg) { struct adapter *sc; sx_slock(&t4_list_lock); SLIST_FOREACH(sc, &t4_list, link) { /* * func should not make any assumptions about what state sc is * in - the only guarantee is that sc->sc_lock is a valid lock. */ func(sc, arg); } sx_sunlock(&t4_list_lock); } static int t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, struct thread *td) { int rc; struct adapter *sc = dev->si_drv1; rc = priv_check(td, PRIV_DRIVER); if (rc != 0) return (rc); switch (cmd) { case CHELSIO_T4_GETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) edata->val = t4_read_reg(sc, edata->addr); else if (edata->size == 8) edata->val = t4_read_reg64(sc, edata->addr); else return (EINVAL); break; } case CHELSIO_T4_SETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) { if (edata->val & 0xffffffff00000000) return (EINVAL); t4_write_reg(sc, edata->addr, (uint32_t) edata->val); } else if (edata->size == 8) t4_write_reg64(sc, edata->addr, edata->val); else return (EINVAL); break; } case CHELSIO_T4_REGDUMP: { struct t4_regdump *regs = (struct t4_regdump *)data; int reglen = t4_get_regs_len(sc); uint8_t *buf; if (regs->len < reglen) { regs->len = reglen; /* hint to the caller */ return (ENOBUFS); } regs->len = reglen; buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO); get_regs(sc, regs, buf); rc = copyout(buf, regs->data, reglen); free(buf, M_CXGBE); break; } case CHELSIO_T4_GET_FILTER_MODE: rc = get_filter_mode(sc, (uint32_t *)data); break; case CHELSIO_T4_SET_FILTER_MODE: rc = set_filter_mode(sc, *(uint32_t *)data); break; case CHELSIO_T4_GET_FILTER: rc = get_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_SET_FILTER: rc = set_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_DEL_FILTER: rc = del_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_GET_SGE_CONTEXT: rc = get_sge_context(sc, (struct t4_sge_context *)data); break; case CHELSIO_T4_LOAD_FW: rc = load_fw(sc, (struct t4_data *)data); break; case CHELSIO_T4_GET_MEM: rc = read_card_mem(sc, 2, (struct t4_mem_range *)data); break; case CHELSIO_T4_GET_I2C: rc = read_i2c(sc, (struct t4_i2c_data *)data); break; case CHELSIO_T4_CLEAR_STATS: { int i, v, bg_map; u_int port_id = *(uint32_t *)data; struct port_info *pi; struct vi_info *vi; if (port_id >= sc->params.nports) return (EINVAL); pi = sc->port[port_id]; if (pi == NULL) return (EIO); /* MAC stats */ t4_clr_port_stats(sc, pi->tx_chan); pi->tx_parse_error = 0; pi->tnl_cong_drops = 0; mtx_lock(&sc->reg_lock); for_each_vi(pi, v, vi) { if (vi->flags & VI_INIT_DONE) t4_clr_vi_stats(sc, vi->viid); } bg_map = pi->mps_bg_map; v = 0; /* reuse */ while (bg_map) { i = ffs(bg_map) - 1; t4_write_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1, A_TP_MIB_TNL_CNG_DROP_0 + i); bg_map &= ~(1 << i); } mtx_unlock(&sc->reg_lock); /* * Since this command accepts a port, clear stats for * all VIs on this port. */ for_each_vi(pi, v, vi) { if (vi->flags & VI_INIT_DONE) { struct sge_rxq *rxq; struct sge_txq *txq; struct sge_wrq *wrq; for_each_rxq(vi, i, rxq) { #if defined(INET) || defined(INET6) rxq->lro.lro_queued = 0; rxq->lro.lro_flushed = 0; #endif rxq->rxcsum = 0; rxq->vlan_extraction = 0; } for_each_txq(vi, i, txq) { txq->txcsum = 0; txq->tso_wrs = 0; txq->vlan_insertion = 0; txq->imm_wrs = 0; txq->sgl_wrs = 0; txq->txpkt_wrs = 0; txq->txpkts0_wrs = 0; txq->txpkts1_wrs = 0; txq->txpkts0_pkts = 0; txq->txpkts1_pkts = 0; mp_ring_reset_stats(txq->r); } #ifdef TCP_OFFLOAD /* nothing to clear for each ofld_rxq */ for_each_ofld_txq(vi, i, wrq) { wrq->tx_wrs_direct = 0; wrq->tx_wrs_copied = 0; } #endif if (IS_MAIN_VI(vi)) { wrq = &sc->sge.ctrlq[pi->port_id]; wrq->tx_wrs_direct = 0; wrq->tx_wrs_copied = 0; } } } break; } case CHELSIO_T4_SCHED_CLASS: rc = t4_set_sched_class(sc, (struct t4_sched_params *)data); break; case CHELSIO_T4_SCHED_QUEUE: rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data); break; case CHELSIO_T4_GET_TRACER: rc = t4_get_tracer(sc, (struct t4_tracer *)data); break; case CHELSIO_T4_SET_TRACER: rc = t4_set_tracer(sc, (struct t4_tracer *)data); break; case CHELSIO_T4_LOAD_CFG: rc = load_cfg(sc, (struct t4_data *)data); break; case CHELSIO_T4_LOAD_BOOT: rc = load_boot(sc, (struct t4_bootrom *)data); break; case CHELSIO_T4_LOAD_BOOTCFG: rc = load_bootcfg(sc, (struct t4_data *)data); break; case CHELSIO_T4_CUDBG_DUMP: rc = cudbg_dump(sc, (struct t4_cudbg_dump *)data); break; case CHELSIO_T4_SET_OFLD_POLICY: rc = set_offload_policy(sc, (struct t4_offload_policy *)data); break; default: rc = ENOTTY; } return (rc); } void t4_db_full(struct adapter *sc) { CXGBE_UNIMPLEMENTED(__func__); } void t4_db_dropped(struct adapter *sc) { CXGBE_UNIMPLEMENTED(__func__); } #ifdef TCP_OFFLOAD static int toe_capability(struct vi_info *vi, int enable) { int rc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; ASSERT_SYNCHRONIZED_OP(sc); if (!is_offload(sc)) return (ENODEV); if (enable) { if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) { /* TOE is already enabled. */ return (0); } /* * We need the port's queues around so that we're able to send * and receive CPLs to/from the TOE even if the ifnet for this * port has never been UP'd administratively. */ if (!(vi->flags & VI_INIT_DONE)) { rc = vi_full_init(vi); if (rc) return (rc); } if (!(pi->vi[0].flags & VI_INIT_DONE)) { rc = vi_full_init(&pi->vi[0]); if (rc) return (rc); } if (isset(&sc->offload_map, pi->port_id)) { /* TOE is enabled on another VI of this port. */ pi->uld_vis++; return (0); } if (!uld_active(sc, ULD_TOM)) { rc = t4_activate_uld(sc, ULD_TOM); if (rc == EAGAIN) { log(LOG_WARNING, "You must kldload t4_tom.ko before trying " "to enable TOE on a cxgbe interface.\n"); } if (rc != 0) return (rc); KASSERT(sc->tom_softc != NULL, ("%s: TOM activated but softc NULL", __func__)); KASSERT(uld_active(sc, ULD_TOM), ("%s: TOM activated but flag not set", __func__)); } /* Activate iWARP and iSCSI too, if the modules are loaded. */ if (!uld_active(sc, ULD_IWARP)) (void) t4_activate_uld(sc, ULD_IWARP); if (!uld_active(sc, ULD_ISCSI)) (void) t4_activate_uld(sc, ULD_ISCSI); pi->uld_vis++; setbit(&sc->offload_map, pi->port_id); } else { pi->uld_vis--; if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0) return (0); KASSERT(uld_active(sc, ULD_TOM), ("%s: TOM never initialized?", __func__)); clrbit(&sc->offload_map, pi->port_id); } return (0); } /* * Add an upper layer driver to the global list. */ int t4_register_uld(struct uld_info *ui) { int rc = 0; struct uld_info *u; sx_xlock(&t4_uld_list_lock); SLIST_FOREACH(u, &t4_uld_list, link) { if (u->uld_id == ui->uld_id) { rc = EEXIST; goto done; } } SLIST_INSERT_HEAD(&t4_uld_list, ui, link); ui->refcount = 0; done: sx_xunlock(&t4_uld_list_lock); return (rc); } int t4_unregister_uld(struct uld_info *ui) { int rc = EINVAL; struct uld_info *u; sx_xlock(&t4_uld_list_lock); SLIST_FOREACH(u, &t4_uld_list, link) { if (u == ui) { if (ui->refcount > 0) { rc = EBUSY; goto done; } SLIST_REMOVE(&t4_uld_list, ui, uld_info, link); rc = 0; goto done; } } done: sx_xunlock(&t4_uld_list_lock); return (rc); } int t4_activate_uld(struct adapter *sc, int id) { int rc; struct uld_info *ui; ASSERT_SYNCHRONIZED_OP(sc); if (id < 0 || id > ULD_MAX) return (EINVAL); rc = EAGAIN; /* kldoad the module with this ULD and try again. */ sx_slock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { if (ui->uld_id == id) { if (!(sc->flags & FULL_INIT_DONE)) { rc = adapter_full_init(sc); if (rc != 0) break; } rc = ui->activate(sc); if (rc == 0) { setbit(&sc->active_ulds, id); ui->refcount++; } break; } } sx_sunlock(&t4_uld_list_lock); return (rc); } int t4_deactivate_uld(struct adapter *sc, int id) { int rc; struct uld_info *ui; ASSERT_SYNCHRONIZED_OP(sc); if (id < 0 || id > ULD_MAX) return (EINVAL); rc = ENXIO; sx_slock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { if (ui->uld_id == id) { rc = ui->deactivate(sc); if (rc == 0) { clrbit(&sc->active_ulds, id); ui->refcount--; } break; } } sx_sunlock(&t4_uld_list_lock); return (rc); } int uld_active(struct adapter *sc, int uld_id) { MPASS(uld_id >= 0 && uld_id <= ULD_MAX); return (isset(&sc->active_ulds, uld_id)); } #endif /* * t = ptr to tunable. * nc = number of CPUs. * c = compiled in default for that tunable. */ static void calculate_nqueues(int *t, int nc, const int c) { int nq; if (*t > 0) return; nq = *t < 0 ? -*t : c; *t = min(nc, nq); } /* * Come up with reasonable defaults for some of the tunables, provided they're * not set by the user (in which case we'll use the values as is). */ static void tweak_tunables(void) { int nc = mp_ncpus; /* our snapshot of the number of CPUs */ if (t4_ntxq < 1) { #ifdef RSS t4_ntxq = rss_getnumbuckets(); #else calculate_nqueues(&t4_ntxq, nc, NTXQ); #endif } calculate_nqueues(&t4_ntxq_vi, nc, NTXQ_VI); if (t4_nrxq < 1) { #ifdef RSS t4_nrxq = rss_getnumbuckets(); #else calculate_nqueues(&t4_nrxq, nc, NRXQ); #endif } calculate_nqueues(&t4_nrxq_vi, nc, NRXQ_VI); #ifdef TCP_OFFLOAD calculate_nqueues(&t4_nofldtxq, nc, NOFLDTXQ); calculate_nqueues(&t4_nofldtxq_vi, nc, NOFLDTXQ_VI); calculate_nqueues(&t4_nofldrxq, nc, NOFLDRXQ); calculate_nqueues(&t4_nofldrxq_vi, nc, NOFLDRXQ_VI); if (t4_toecaps_allowed == -1) t4_toecaps_allowed = FW_CAPS_CONFIG_TOE; if (t4_rdmacaps_allowed == -1) { t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP | FW_CAPS_CONFIG_RDMA_RDMAC; } if (t4_iscsicaps_allowed == -1) { t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU | FW_CAPS_CONFIG_ISCSI_TARGET_PDU | FW_CAPS_CONFIG_ISCSI_T10DIF; } if (t4_tmr_idx_ofld < 0 || t4_tmr_idx_ofld >= SGE_NTIMERS) t4_tmr_idx_ofld = TMR_IDX_OFLD; if (t4_pktc_idx_ofld < -1 || t4_pktc_idx_ofld >= SGE_NCOUNTERS) t4_pktc_idx_ofld = PKTC_IDX_OFLD; #else if (t4_toecaps_allowed == -1) t4_toecaps_allowed = 0; if (t4_rdmacaps_allowed == -1) t4_rdmacaps_allowed = 0; if (t4_iscsicaps_allowed == -1) t4_iscsicaps_allowed = 0; #endif #ifdef DEV_NETMAP calculate_nqueues(&t4_nnmtxq_vi, nc, NNMTXQ_VI); calculate_nqueues(&t4_nnmrxq_vi, nc, NNMRXQ_VI); #endif if (t4_tmr_idx < 0 || t4_tmr_idx >= SGE_NTIMERS) t4_tmr_idx = TMR_IDX; if (t4_pktc_idx < -1 || t4_pktc_idx >= SGE_NCOUNTERS) t4_pktc_idx = PKTC_IDX; if (t4_qsize_txq < 128) t4_qsize_txq = 128; if (t4_qsize_rxq < 128) t4_qsize_rxq = 128; while (t4_qsize_rxq & 7) t4_qsize_rxq++; t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX; /* * Number of VIs to create per-port. The first VI is the "main" regular * VI for the port. The rest are additional virtual interfaces on the * same physical port. Note that the main VI does not have native * netmap support but the extra VIs do. * * Limit the number of VIs per port to the number of available * MAC addresses per port. */ if (t4_num_vis < 1) t4_num_vis = 1; if (t4_num_vis > nitems(vi_mac_funcs)) { t4_num_vis = nitems(vi_mac_funcs); printf("cxgbe: number of VIs limited to %d\n", t4_num_vis); } if (pcie_relaxed_ordering < 0 || pcie_relaxed_ordering > 2) { pcie_relaxed_ordering = 1; #if defined(__i386__) || defined(__amd64__) if (cpu_vendor_id == CPU_VENDOR_INTEL) pcie_relaxed_ordering = 0; #endif } } #ifdef DDB static void t4_dump_tcb(struct adapter *sc, int tid) { uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos; reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2); save = t4_read_reg(sc, reg); base = sc->memwin[2].mw_base; /* Dump TCB for the tid */ tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE); tcb_addr += tid * TCB_SIZE; if (is_t4(sc)) { pf = 0; win_pos = tcb_addr & ~0xf; /* start must be 16B aligned */ } else { pf = V_PFNUM(sc->pf); win_pos = tcb_addr & ~0x7f; /* start must be 128B aligned */ } t4_write_reg(sc, reg, win_pos | pf); t4_read_reg(sc, reg); off = tcb_addr - win_pos; for (i = 0; i < 4; i++) { uint32_t buf[8]; for (j = 0; j < 8; j++, off += 4) buf[j] = htonl(t4_read_reg(sc, base + off)); db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n", buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); } t4_write_reg(sc, reg, save); t4_read_reg(sc, reg); } static void t4_dump_devlog(struct adapter *sc) { struct devlog_params *dparams = &sc->params.devlog; struct fw_devlog_e e; int i, first, j, m, nentries, rc; uint64_t ftstamp = UINT64_MAX; if (dparams->start == 0) { db_printf("devlog params not valid\n"); return; } nentries = dparams->size / sizeof(struct fw_devlog_e); m = fwmtype_to_hwmtype(dparams->memtype); /* Find the first entry. */ first = -1; for (i = 0; i < nentries && !db_pager_quit; i++) { rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e), sizeof(e), (void *)&e); if (rc != 0) break; if (e.timestamp == 0) break; e.timestamp = be64toh(e.timestamp); if (e.timestamp < ftstamp) { ftstamp = e.timestamp; first = i; } } if (first == -1) return; i = first; do { rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e), sizeof(e), (void *)&e); if (rc != 0) return; if (e.timestamp == 0) return; e.timestamp = be64toh(e.timestamp); e.seqno = be32toh(e.seqno); for (j = 0; j < 8; j++) e.params[j] = be32toh(e.params[j]); db_printf("%10d %15ju %8s %8s ", e.seqno, e.timestamp, (e.level < nitems(devlog_level_strings) ? devlog_level_strings[e.level] : "UNKNOWN"), (e.facility < nitems(devlog_facility_strings) ? devlog_facility_strings[e.facility] : "UNKNOWN")); db_printf(e.fmt, e.params[0], e.params[1], e.params[2], e.params[3], e.params[4], e.params[5], e.params[6], e.params[7]); if (++i == nentries) i = 0; } while (i != first && !db_pager_quit); } static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table); _DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table); DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL) { device_t dev; int t; bool valid; valid = false; t = db_read_token(); if (t == tIDENT) { dev = device_lookup_by_name(db_tok_string); valid = true; } db_skip_to_eol(); if (!valid) { db_printf("usage: show t4 devlog \n"); return; } if (dev == NULL) { db_printf("device not found\n"); return; } t4_dump_devlog(device_get_softc(dev)); } DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL) { device_t dev; int radix, tid, t; bool valid; valid = false; radix = db_radix; db_radix = 10; t = db_read_token(); if (t == tIDENT) { dev = device_lookup_by_name(db_tok_string); t = db_read_token(); if (t == tNUMBER) { tid = db_tok_number; valid = true; } } db_radix = radix; db_skip_to_eol(); if (!valid) { db_printf("usage: show t4 tcb \n"); return; } if (dev == NULL) { db_printf("device not found\n"); return; } if (tid < 0) { db_printf("invalid tid\n"); return; } t4_dump_tcb(device_get_softc(dev), tid); } #endif /* * Borrowed from cesa_prep_aes_key(). * * NB: The crypto engine wants the words in the decryption key in reverse * order. */ void t4_aes_getdeckey(void *dec_key, const void *enc_key, unsigned int kbits) { uint32_t ek[4 * (RIJNDAEL_MAXNR + 1)]; uint32_t *dkey; int i; rijndaelKeySetupEnc(ek, enc_key, kbits); dkey = dec_key; dkey += (kbits / 8) / 4; switch (kbits) { case 128: for (i = 0; i < 4; i++) *--dkey = htobe32(ek[4 * 10 + i]); break; case 192: for (i = 0; i < 2; i++) *--dkey = htobe32(ek[4 * 11 + 2 + i]); for (i = 0; i < 4; i++) *--dkey = htobe32(ek[4 * 12 + i]); break; case 256: for (i = 0; i < 4; i++) *--dkey = htobe32(ek[4 * 13 + i]); for (i = 0; i < 4; i++) *--dkey = htobe32(ek[4 * 14 + i]); break; } MPASS(dkey == dec_key); } static struct sx mlu; /* mod load unload */ SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload"); static int mod_event(module_t mod, int cmd, void *arg) { int rc = 0; static int loaded = 0; switch (cmd) { case MOD_LOAD: sx_xlock(&mlu); if (loaded++ == 0) { t4_sge_modload(); t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, t4_filter_rpl, CPL_COOKIE_FILTER); t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL, do_l2t_write_rpl, CPL_COOKIE_FILTER); + t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL, + t4_hashfilter_ao_rpl, CPL_COOKIE_HASHFILTER); + t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, + t4_hashfilter_tcb_rpl, CPL_COOKIE_HASHFILTER); + t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS, + t4_del_hashfilter_rpl, CPL_COOKIE_HASHFILTER); t4_register_cpl_handler(CPL_TRACE_PKT, t4_trace_pkt); t4_register_cpl_handler(CPL_T5_TRACE_PKT, t5_trace_pkt); sx_init(&t4_list_lock, "T4/T5 adapters"); SLIST_INIT(&t4_list); #ifdef TCP_OFFLOAD sx_init(&t4_uld_list_lock, "T4/T5 ULDs"); SLIST_INIT(&t4_uld_list); #endif t4_tracer_modload(); tweak_tunables(); } sx_xunlock(&mlu); break; case MOD_UNLOAD: sx_xlock(&mlu); if (--loaded == 0) { int tries; sx_slock(&t4_list_lock); if (!SLIST_EMPTY(&t4_list)) { rc = EBUSY; sx_sunlock(&t4_list_lock); goto done_unload; } #ifdef TCP_OFFLOAD sx_slock(&t4_uld_list_lock); if (!SLIST_EMPTY(&t4_uld_list)) { rc = EBUSY; sx_sunlock(&t4_uld_list_lock); sx_sunlock(&t4_list_lock); goto done_unload; } #endif tries = 0; while (tries++ < 5 && t4_sge_extfree_refs() != 0) { uprintf("%ju clusters with custom free routine " "still is use.\n", t4_sge_extfree_refs()); pause("t4unload", 2 * hz); } #ifdef TCP_OFFLOAD sx_sunlock(&t4_uld_list_lock); #endif sx_sunlock(&t4_list_lock); if (t4_sge_extfree_refs() == 0) { t4_tracer_modunload(); #ifdef TCP_OFFLOAD sx_destroy(&t4_uld_list_lock); #endif sx_destroy(&t4_list_lock); t4_sge_modunload(); loaded = 0; } else { rc = EBUSY; loaded++; /* undo earlier decrement */ } } done_unload: sx_xunlock(&mlu); break; } return (rc); } static devclass_t t4_devclass, t5_devclass, t6_devclass; static devclass_t cxgbe_devclass, cxl_devclass, cc_devclass; static devclass_t vcxgbe_devclass, vcxl_devclass, vcc_devclass; DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0); MODULE_VERSION(t4nex, 1); MODULE_DEPEND(t4nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t4nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0); MODULE_VERSION(t5nex, 1); MODULE_DEPEND(t5nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t5nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(t6nex, pci, t6_driver, t6_devclass, mod_event, 0); MODULE_VERSION(t6nex, 1); MODULE_DEPEND(t6nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t6nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0); MODULE_VERSION(cxgbe, 1); DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0); MODULE_VERSION(cxl, 1); DRIVER_MODULE(cc, t6nex, cc_driver, cc_devclass, 0, 0); MODULE_VERSION(cc, 1); DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0); MODULE_VERSION(vcxgbe, 1); DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0); MODULE_VERSION(vcxl, 1); DRIVER_MODULE(vcc, cc, vcc_driver, vcc_devclass, 0, 0); MODULE_VERSION(vcc, 1); Index: head/sys/dev/cxgbe/t4_sge.c =================================================================== --- head/sys/dev/cxgbe/t4_sge.c (revision 333393) +++ head/sys/dev/cxgbe/t4_sge.c (revision 333394) @@ -1,5247 +1,5268 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_NETMAP #include #include #include #include #include #endif #include "common/common.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "common/t4_msg.h" #include "t4_l2t.h" #include "t4_mp_ring.h" #ifdef T4_PKT_TIMESTAMP #define RX_COPY_THRESHOLD (MINCLSIZE - 8) #else #define RX_COPY_THRESHOLD MINCLSIZE #endif /* * Ethernet frames are DMA'd at this byte offset into the freelist buffer. * 0-7 are valid values. */ static int fl_pktshift = 2; TUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift); /* * Pad ethernet payload up to this boundary. * -1: driver should figure out a good value. * 0: disable padding. * Any power of 2 from 32 to 4096 (both inclusive) is also a valid value. */ int fl_pad = -1; TUNABLE_INT("hw.cxgbe.fl_pad", &fl_pad); /* * Status page length. * -1: driver should figure out a good value. * 64 or 128 are the only other valid values. */ static int spg_len = -1; TUNABLE_INT("hw.cxgbe.spg_len", &spg_len); /* * Congestion drops. * -1: no congestion feedback (not recommended). * 0: backpressure the channel instead of dropping packets right away. * 1: no backpressure, drop packets for the congested queue immediately. */ static int cong_drop = 0; TUNABLE_INT("hw.cxgbe.cong_drop", &cong_drop); /* * Deliver multiple frames in the same free list buffer if they fit. * -1: let the driver decide whether to enable buffer packing or not. * 0: disable buffer packing. * 1: enable buffer packing. */ static int buffer_packing = -1; TUNABLE_INT("hw.cxgbe.buffer_packing", &buffer_packing); /* * Start next frame in a packed buffer at this boundary. * -1: driver should figure out a good value. * T4: driver will ignore this and use the same value as fl_pad above. * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value. */ static int fl_pack = -1; TUNABLE_INT("hw.cxgbe.fl_pack", &fl_pack); /* * Allow the driver to create mbuf(s) in a cluster allocated for rx. * 0: never; always allocate mbufs from the zone_mbuf UMA zone. * 1: ok to create mbuf(s) within a cluster if there is room. */ static int allow_mbufs_in_cluster = 1; TUNABLE_INT("hw.cxgbe.allow_mbufs_in_cluster", &allow_mbufs_in_cluster); /* * Largest rx cluster size that the driver is allowed to allocate. */ static int largest_rx_cluster = MJUM16BYTES; TUNABLE_INT("hw.cxgbe.largest_rx_cluster", &largest_rx_cluster); /* * Size of cluster allocation that's most likely to succeed. The driver will * fall back to this size if it fails to allocate clusters larger than this. */ static int safest_rx_cluster = PAGE_SIZE; TUNABLE_INT("hw.cxgbe.safest_rx_cluster", &safest_rx_cluster); /* * The interrupt holdoff timers are multiplied by this value on T6+. * 1 and 3-17 (both inclusive) are legal values. */ static int tscale = 1; TUNABLE_INT("hw.cxgbe.tscale", &tscale); /* * Number of LRO entries in the lro_ctrl structure per rx queue. */ static int lro_entries = TCP_LRO_ENTRIES; TUNABLE_INT("hw.cxgbe.lro_entries", &lro_entries); /* * This enables presorting of frames before they're fed into tcp_lro_rx. */ static int lro_mbufs = 0; TUNABLE_INT("hw.cxgbe.lro_mbufs", &lro_mbufs); struct txpkts { u_int wr_type; /* type 0 or type 1 */ u_int npkt; /* # of packets in this work request */ u_int plen; /* total payload (sum of all packets) */ u_int len16; /* # of 16B pieces used by this work request */ }; /* A packet's SGL. This + m_pkthdr has all info needed for tx */ struct sgl { struct sglist sg; struct sglist_seg seg[TX_SGL_SEGS]; }; static int service_iq(struct sge_iq *, int); static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t); static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *); static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int); static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *); static inline void init_eq(struct adapter *, struct sge_eq *, int, int, uint8_t, uint16_t, char *); static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, bus_addr_t *, void **); static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, void *); static int alloc_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *, int, int); static int free_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *); static void add_iq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, struct sge_iq *); static void add_fl_sysctls(struct adapter *, struct sysctl_ctx_list *, struct sysctl_oid *, struct sge_fl *); static int alloc_fwq(struct adapter *); static int free_fwq(struct adapter *); static int alloc_mgmtq(struct adapter *); static int free_mgmtq(struct adapter *); static int alloc_rxq(struct vi_info *, struct sge_rxq *, int, int, struct sysctl_oid *); static int free_rxq(struct vi_info *, struct sge_rxq *); #ifdef TCP_OFFLOAD static int alloc_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *, int, int, struct sysctl_oid *); static int free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *); #endif #ifdef DEV_NETMAP static int alloc_nm_rxq(struct vi_info *, struct sge_nm_rxq *, int, int, struct sysctl_oid *); static int free_nm_rxq(struct vi_info *, struct sge_nm_rxq *); static int alloc_nm_txq(struct vi_info *, struct sge_nm_txq *, int, int, struct sysctl_oid *); static int free_nm_txq(struct vi_info *, struct sge_nm_txq *); #endif static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); #ifdef TCP_OFFLOAD static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); #endif static int alloc_eq(struct adapter *, struct vi_info *, struct sge_eq *); static int free_eq(struct adapter *, struct sge_eq *); static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *, struct sysctl_oid *); static int free_wrq(struct adapter *, struct sge_wrq *); static int alloc_txq(struct vi_info *, struct sge_txq *, int, struct sysctl_oid *); static int free_txq(struct vi_info *, struct sge_txq *); static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); static inline void ring_fl_db(struct adapter *, struct sge_fl *); static int refill_fl(struct adapter *, struct sge_fl *, int); static void refill_sfl(void *); static int alloc_fl_sdesc(struct sge_fl *); static void free_fl_sdesc(struct adapter *, struct sge_fl *); static void find_best_refill_source(struct adapter *, struct sge_fl *, int); static void find_safe_refill_source(struct adapter *, struct sge_fl *); static void add_fl_to_sfl(struct adapter *, struct sge_fl *); static inline void get_pkt_gl(struct mbuf *, struct sglist *); static inline u_int txpkt_len16(u_int, u_int); static inline u_int txpkt_vm_len16(u_int, u_int); static inline u_int txpkts0_len16(u_int); static inline u_int txpkts1_len16(void); static u_int write_txpkt_wr(struct sge_txq *, struct fw_eth_tx_pkt_wr *, struct mbuf *, u_int); static u_int write_txpkt_vm_wr(struct adapter *, struct sge_txq *, struct fw_eth_tx_pkt_vm_wr *, struct mbuf *, u_int); static int try_txpkts(struct mbuf *, struct mbuf *, struct txpkts *, u_int); static int add_to_txpkts(struct mbuf *, struct txpkts *, u_int); static u_int write_txpkts_wr(struct sge_txq *, struct fw_eth_tx_pkts_wr *, struct mbuf *, const struct txpkts *, u_int); static void write_gl_to_txd(struct sge_txq *, struct mbuf *, caddr_t *, int); static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); static inline void ring_eq_db(struct adapter *, struct sge_eq *, u_int); static inline uint16_t read_hw_cidx(struct sge_eq *); static inline u_int reclaimable_tx_desc(struct sge_eq *); static inline u_int total_available_tx_desc(struct sge_eq *); static u_int reclaim_tx_descs(struct sge_txq *, u_int); static void tx_reclaim(void *, int); static __be64 get_flit(struct sglist_seg *, int, int); static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *, struct mbuf *); static int handle_fw_msg(struct sge_iq *, const struct rss_header *, struct mbuf *); static int t4_handle_wrerr_rpl(struct adapter *, const __be64 *); static void wrq_tx_drain(void *, int); static void drain_wrq_wr_list(struct adapter *, struct sge_wrq *); static int sysctl_uint16(SYSCTL_HANDLER_ARGS); static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS); static int sysctl_tc(SYSCTL_HANDLER_ARGS); static counter_u64_t extfree_refs; static counter_u64_t extfree_rels; an_handler_t t4_an_handler; fw_msg_handler_t t4_fw_msg_handler[NUM_FW6_TYPES]; cpl_handler_t t4_cpl_handler[NUM_CPL_CMDS]; cpl_handler_t set_tcb_rpl_handlers[NUM_CPL_COOKIES]; cpl_handler_t l2t_write_rpl_handlers[NUM_CPL_COOKIES]; cpl_handler_t act_open_rpl_handlers[NUM_CPL_COOKIES]; +cpl_handler_t abort_rpl_rss_handlers[NUM_CPL_COOKIES]; void t4_register_an_handler(an_handler_t h) { uintptr_t *loc; MPASS(h == NULL || t4_an_handler == NULL); loc = (uintptr_t *)&t4_an_handler; atomic_store_rel_ptr(loc, (uintptr_t)h); } void t4_register_fw_msg_handler(int type, fw_msg_handler_t h) { uintptr_t *loc; MPASS(type < nitems(t4_fw_msg_handler)); MPASS(h == NULL || t4_fw_msg_handler[type] == NULL); /* * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL * handler dispatch table. Reject any attempt to install a handler for * this subtype. */ MPASS(type != FW_TYPE_RSSCPL); MPASS(type != FW6_TYPE_RSSCPL); loc = (uintptr_t *)&t4_fw_msg_handler[type]; atomic_store_rel_ptr(loc, (uintptr_t)h); } void t4_register_cpl_handler(int opcode, cpl_handler_t h) { uintptr_t *loc; MPASS(opcode < nitems(t4_cpl_handler)); MPASS(h == NULL || t4_cpl_handler[opcode] == NULL); loc = (uintptr_t *)&t4_cpl_handler[opcode]; atomic_store_rel_ptr(loc, (uintptr_t)h); } static int set_tcb_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); u_int tid; int cookie; MPASS(m == NULL); tid = GET_TID(cpl); if (is_ftid(iq->adapter, tid)) { /* * The return code for filter-write is put in the CPL cookie so * we have to rely on the hardware tid (is_ftid) to determine * that this is a response to a filter. */ cookie = CPL_COOKIE_FILTER; } else { cookie = G_COOKIE(cpl->cookie); } MPASS(cookie > CPL_COOKIE_RESERVED); MPASS(cookie < nitems(set_tcb_rpl_handlers)); return (set_tcb_rpl_handlers[cookie](iq, rss, m)); } static int l2t_write_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); unsigned int cookie; MPASS(m == NULL); cookie = GET_TID(rpl) & F_SYNC_WR ? CPL_COOKIE_TOM : CPL_COOKIE_FILTER; return (l2t_write_rpl_handlers[cookie](iq, rss, m)); } static int act_open_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); u_int cookie = G_TID_COOKIE(G_AOPEN_ATID(be32toh(cpl->atid_status))); MPASS(m == NULL); MPASS(cookie != CPL_COOKIE_RESERVED); return (act_open_rpl_handlers[cookie](iq, rss, m)); } +static int +abort_rpl_rss_handler(struct sge_iq *iq, const struct rss_header *rss, + struct mbuf *m) +{ + struct adapter *sc = iq->adapter; + u_int cookie; + + MPASS(m == NULL); + if (is_hashfilter(sc)) + cookie = CPL_COOKIE_HASHFILTER; + else + cookie = CPL_COOKIE_TOM; + + return (abort_rpl_rss_handlers[cookie](iq, rss, m)); +} + static void t4_init_shared_cpl_handlers(void) { t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl_handler); t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl_handler); t4_register_cpl_handler(CPL_ACT_OPEN_RPL, act_open_rpl_handler); + t4_register_cpl_handler(CPL_ABORT_RPL_RSS, abort_rpl_rss_handler); } void t4_register_shared_cpl_handler(int opcode, cpl_handler_t h, int cookie) { uintptr_t *loc; MPASS(opcode < nitems(t4_cpl_handler)); MPASS(cookie > CPL_COOKIE_RESERVED); MPASS(cookie < NUM_CPL_COOKIES); MPASS(t4_cpl_handler[opcode] != NULL); switch (opcode) { case CPL_SET_TCB_RPL: loc = (uintptr_t *)&set_tcb_rpl_handlers[cookie]; break; case CPL_L2T_WRITE_RPL: loc = (uintptr_t *)&l2t_write_rpl_handlers[cookie]; break; case CPL_ACT_OPEN_RPL: loc = (uintptr_t *)&act_open_rpl_handlers[cookie]; + break; + case CPL_ABORT_RPL_RSS: + loc = (uintptr_t *)&abort_rpl_rss_handlers[cookie]; break; default: MPASS(0); return; } MPASS(h == NULL || *loc == (uintptr_t)NULL); atomic_store_rel_ptr(loc, (uintptr_t)h); } /* * Called on MOD_LOAD. Validates and calculates the SGE tunables. */ void t4_sge_modload(void) { if (fl_pktshift < 0 || fl_pktshift > 7) { printf("Invalid hw.cxgbe.fl_pktshift value (%d)," " using 2 instead.\n", fl_pktshift); fl_pktshift = 2; } if (spg_len != 64 && spg_len != 128) { int len; #if defined(__i386__) || defined(__amd64__) len = cpu_clflush_line_size > 64 ? 128 : 64; #else len = 64; #endif if (spg_len != -1) { printf("Invalid hw.cxgbe.spg_len value (%d)," " using %d instead.\n", spg_len, len); } spg_len = len; } if (cong_drop < -1 || cong_drop > 1) { printf("Invalid hw.cxgbe.cong_drop value (%d)," " using 0 instead.\n", cong_drop); cong_drop = 0; } if (tscale != 1 && (tscale < 3 || tscale > 17)) { printf("Invalid hw.cxgbe.tscale value (%d)," " using 1 instead.\n", tscale); tscale = 1; } extfree_refs = counter_u64_alloc(M_WAITOK); extfree_rels = counter_u64_alloc(M_WAITOK); counter_u64_zero(extfree_refs); counter_u64_zero(extfree_rels); t4_init_shared_cpl_handlers(); t4_register_cpl_handler(CPL_FW4_MSG, handle_fw_msg); t4_register_cpl_handler(CPL_FW6_MSG, handle_fw_msg); t4_register_cpl_handler(CPL_SGE_EGR_UPDATE, handle_sge_egr_update); t4_register_cpl_handler(CPL_RX_PKT, t4_eth_rx); t4_register_fw_msg_handler(FW6_TYPE_CMD_RPL, t4_handle_fw_rpl); t4_register_fw_msg_handler(FW6_TYPE_WRERR_RPL, t4_handle_wrerr_rpl); } void t4_sge_modunload(void) { counter_u64_free(extfree_refs); counter_u64_free(extfree_rels); } uint64_t t4_sge_extfree_refs(void) { uint64_t refs, rels; rels = counter_u64_fetch(extfree_rels); refs = counter_u64_fetch(extfree_refs); return (refs - rels); } static inline void setup_pad_and_pack_boundaries(struct adapter *sc) { uint32_t v, m; int pad, pack, pad_shift; pad_shift = chip_id(sc) > CHELSIO_T5 ? X_T6_INGPADBOUNDARY_SHIFT : X_INGPADBOUNDARY_SHIFT; pad = fl_pad; if (fl_pad < (1 << pad_shift) || fl_pad > (1 << (pad_shift + M_INGPADBOUNDARY)) || !powerof2(fl_pad)) { /* * If there is any chance that we might use buffer packing and * the chip is a T4, then pick 64 as the pad/pack boundary. Set * it to the minimum allowed in all other cases. */ pad = is_t4(sc) && buffer_packing ? 64 : 1 << pad_shift; /* * For fl_pad = 0 we'll still write a reasonable value to the * register but all the freelists will opt out of padding. * We'll complain here only if the user tried to set it to a * value greater than 0 that was invalid. */ if (fl_pad > 0) { device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value" " (%d), using %d instead.\n", fl_pad, pad); } } m = V_INGPADBOUNDARY(M_INGPADBOUNDARY); v = V_INGPADBOUNDARY(ilog2(pad) - pad_shift); t4_set_reg_field(sc, A_SGE_CONTROL, m, v); if (is_t4(sc)) { if (fl_pack != -1 && fl_pack != pad) { /* Complain but carry on. */ device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored," " using %d instead.\n", fl_pack, pad); } return; } pack = fl_pack; if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || !powerof2(fl_pack)) { pack = max(sc->params.pci.mps, CACHE_LINE_SIZE); MPASS(powerof2(pack)); if (pack < 16) pack = 16; if (pack == 32) pack = 64; if (pack > 4096) pack = 4096; if (fl_pack != -1) { device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value" " (%d), using %d instead.\n", fl_pack, pack); } } m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); if (pack == 16) v = V_INGPACKBOUNDARY(0); else v = V_INGPACKBOUNDARY(ilog2(pack) - 5); MPASS(!is_t4(sc)); /* T4 doesn't have SGE_CONTROL2 */ t4_set_reg_field(sc, A_SGE_CONTROL2, m, v); } /* * adap->params.vpd.cclk must be set up before this is called. */ void t4_tweak_chip_settings(struct adapter *sc) { int i; uint32_t v, m; int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk; int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); static int sge_flbuf_sizes[] = { MCLBYTES, #if MJUMPAGESIZE != MCLBYTES MJUMPAGESIZE, MJUMPAGESIZE - CL_METADATA_SIZE, MJUMPAGESIZE - 2 * MSIZE - CL_METADATA_SIZE, #endif MJUM9BYTES, MJUM16BYTES, MCLBYTES - MSIZE - CL_METADATA_SIZE, MJUM9BYTES - CL_METADATA_SIZE, MJUM16BYTES - CL_METADATA_SIZE, }; KASSERT(sc->flags & MASTER_PF, ("%s: trying to change chip settings when not master.", __func__)); m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | V_EGRSTATUSPAGESIZE(spg_len == 128); t4_set_reg_field(sc, A_SGE_CONTROL, m, v); setup_pad_and_pack_boundaries(sc); v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v); KASSERT(nitems(sge_flbuf_sizes) <= SGE_FLBUF_SIZES, ("%s: hw buffer size table too big", __func__)); for (i = 0; i < min(nitems(sge_flbuf_sizes), SGE_FLBUF_SIZES); i++) { t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i), sge_flbuf_sizes[i]); } v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) | V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]); t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v); KASSERT(intr_timer[0] <= timer_max, ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0], timer_max)); for (i = 1; i < nitems(intr_timer); i++) { KASSERT(intr_timer[i] >= intr_timer[i - 1], ("%s: timers not listed in increasing order (%d)", __func__, i)); while (intr_timer[i] > timer_max) { if (i == nitems(intr_timer) - 1) { intr_timer[i] = timer_max; break; } intr_timer[i] += intr_timer[i - 1]; intr_timer[i] /= 2; } } v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) | V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1])); t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v); v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) | V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3])); t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v); v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) | V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5])); t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v); if (chip_id(sc) >= CHELSIO_T6) { m = V_TSCALE(M_TSCALE); if (tscale == 1) v = 0; else v = V_TSCALE(tscale - 2); t4_set_reg_field(sc, A_SGE_ITP_CONTROL, m, v); if (sc->debug_flags & DF_DISABLE_TCB_CACHE) { m = V_RDTHRESHOLD(M_RDTHRESHOLD) | F_WRTHRTHRESHEN | V_WRTHRTHRESH(M_WRTHRTHRESH); t4_tp_pio_read(sc, &v, 1, A_TP_CMM_CONFIG, 1); v &= ~m; v |= V_RDTHRESHOLD(1) | F_WRTHRTHRESHEN | V_WRTHRTHRESH(16); t4_tp_pio_write(sc, &v, 1, A_TP_CMM_CONFIG, 1); } } /* 4K, 16K, 64K, 256K DDP "page sizes" for TDDP */ v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v); /* * 4K, 8K, 16K, 64K DDP "page sizes" for iSCSI DDP. These have been * chosen with MAXPHYS = 128K in mind. The largest DDP buffer that we * may have to deal with is MAXPHYS + 1 page. */ v = V_HPZ0(0) | V_HPZ1(1) | V_HPZ2(2) | V_HPZ3(4); t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, v); /* We use multiple DDP page sizes both in plain-TOE and ISCSI modes. */ m = v = F_TDDPTAGTCB | F_ISCSITAGTCB; t4_set_reg_field(sc, A_ULP_RX_CTL, m, v); m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; t4_set_reg_field(sc, A_TP_PARA_REG5, m, v); } /* * SGE wants the buffer to be at least 64B and then a multiple of 16. If * padding is in use, the buffer's start and end need to be aligned to the pad * boundary as well. We'll just make sure that the size is a multiple of the * boundary here, it is up to the buffer allocation code to make sure the start * of the buffer is aligned as well. */ static inline int hwsz_ok(struct adapter *sc, int hwsz) { int mask = fl_pad ? sc->params.sge.pad_boundary - 1 : 16 - 1; return (hwsz >= 64 && (hwsz & mask) == 0); } /* * XXX: driver really should be able to deal with unexpected settings. */ int t4_read_chip_settings(struct adapter *sc) { struct sge *s = &sc->sge; struct sge_params *sp = &sc->params.sge; int i, j, n, rc = 0; uint32_t m, v, r; uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); static int sw_buf_sizes[] = { /* Sorted by size */ MCLBYTES, #if MJUMPAGESIZE != MCLBYTES MJUMPAGESIZE, #endif MJUM9BYTES, MJUM16BYTES }; struct sw_zone_info *swz, *safe_swz; struct hw_buf_info *hwb; m = F_RXPKTCPLMODE; v = F_RXPKTCPLMODE; r = sc->params.sge.sge_control; if ((r & m) != v) { device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r); rc = EINVAL; } /* * If this changes then every single use of PAGE_SHIFT in the driver * needs to be carefully reviewed for PAGE_SHIFT vs sp->page_shift. */ if (sp->page_shift != PAGE_SHIFT) { device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r); rc = EINVAL; } /* Filter out unusable hw buffer sizes entirely (mark with -2). */ hwb = &s->hw_buf_info[0]; for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) { r = sc->params.sge.sge_fl_buffer_size[i]; hwb->size = r; hwb->zidx = hwsz_ok(sc, r) ? -1 : -2; hwb->next = -1; } /* * Create a sorted list in decreasing order of hw buffer sizes (and so * increasing order of spare area) for each software zone. * * If padding is enabled then the start and end of the buffer must align * to the pad boundary; if packing is enabled then they must align with * the pack boundary as well. Allocations from the cluster zones are * aligned to min(size, 4K), so the buffer starts at that alignment and * ends at hwb->size alignment. If mbuf inlining is allowed the * starting alignment will be reduced to MSIZE and the driver will * exercise appropriate caution when deciding on the best buffer layout * to use. */ n = 0; /* no usable buffer size to begin with */ swz = &s->sw_zone_info[0]; safe_swz = NULL; for (i = 0; i < SW_ZONE_SIZES; i++, swz++) { int8_t head = -1, tail = -1; swz->size = sw_buf_sizes[i]; swz->zone = m_getzone(swz->size); swz->type = m_gettype(swz->size); if (swz->size < PAGE_SIZE) { MPASS(powerof2(swz->size)); if (fl_pad && (swz->size % sp->pad_boundary != 0)) continue; } if (swz->size == safest_rx_cluster) safe_swz = swz; hwb = &s->hw_buf_info[0]; for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) { if (hwb->zidx != -1 || hwb->size > swz->size) continue; #ifdef INVARIANTS if (fl_pad) MPASS(hwb->size % sp->pad_boundary == 0); #endif hwb->zidx = i; if (head == -1) head = tail = j; else if (hwb->size < s->hw_buf_info[tail].size) { s->hw_buf_info[tail].next = j; tail = j; } else { int8_t *cur; struct hw_buf_info *t; for (cur = &head; *cur != -1; cur = &t->next) { t = &s->hw_buf_info[*cur]; if (hwb->size == t->size) { hwb->zidx = -2; break; } if (hwb->size > t->size) { hwb->next = *cur; *cur = j; break; } } } } swz->head_hwidx = head; swz->tail_hwidx = tail; if (tail != -1) { n++; if (swz->size - s->hw_buf_info[tail].size >= CL_METADATA_SIZE) sc->flags |= BUF_PACKING_OK; } } if (n == 0) { device_printf(sc->dev, "no usable SGE FL buffer size.\n"); rc = EINVAL; } s->safe_hwidx1 = -1; s->safe_hwidx2 = -1; if (safe_swz != NULL) { s->safe_hwidx1 = safe_swz->head_hwidx; for (i = safe_swz->head_hwidx; i != -1; i = hwb->next) { int spare; hwb = &s->hw_buf_info[i]; #ifdef INVARIANTS if (fl_pad) MPASS(hwb->size % sp->pad_boundary == 0); #endif spare = safe_swz->size - hwb->size; if (spare >= CL_METADATA_SIZE) { s->safe_hwidx2 = i; break; } } } if (sc->flags & IS_VF) return (0); v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ); if (r != v) { device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r); rc = EINVAL; } m = v = F_TDDPTAGTCB; r = t4_read_reg(sc, A_ULP_RX_CTL); if ((r & m) != v) { device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r); rc = EINVAL; } m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; r = t4_read_reg(sc, A_TP_PARA_REG5); if ((r & m) != v) { device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r); rc = EINVAL; } t4_init_tp_params(sc, 1); t4_read_mtu_tbl(sc, sc->params.mtus, NULL); t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd); return (rc); } int t4_create_dma_tag(struct adapter *sc) { int rc; rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->dmat); if (rc != 0) { device_printf(sc->dev, "failed to create main DMA tag: %d\n", rc); } return (rc); } void t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *children) { struct sge_params *sp = &sc->params.sge; SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes", CTLTYPE_STRING | CTLFLAG_RD, &sc->sge, 0, sysctl_bufsizes, "A", "freelist buffer sizes"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD, NULL, sp->fl_pktshift, "payload DMA offset in rx buffer (bytes)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD, NULL, sp->pad_boundary, "payload pad boundary (bytes)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD, NULL, sp->spg_len, "status page size (bytes)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD, NULL, cong_drop, "congestion drop setting"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD, NULL, sp->pack_boundary, "payload pack boundary (bytes)"); } int t4_destroy_dma_tag(struct adapter *sc) { if (sc->dmat) bus_dma_tag_destroy(sc->dmat); return (0); } /* * Allocate and initialize the firmware event queue and the management queue. * * Returns errno on failure. Resources allocated up to that point may still be * allocated. Caller is responsible for cleanup in case this function fails. */ int t4_setup_adapter_queues(struct adapter *sc) { int rc; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); sysctl_ctx_init(&sc->ctx); sc->flags |= ADAP_SYSCTL_CTX; /* * Firmware event queue */ rc = alloc_fwq(sc); if (rc != 0) return (rc); /* * Management queue. This is just a control queue that uses the fwq as * its associated iq. */ if (!(sc->flags & IS_VF)) rc = alloc_mgmtq(sc); return (rc); } /* * Idempotent */ int t4_teardown_adapter_queues(struct adapter *sc) { ADAPTER_LOCK_ASSERT_NOTOWNED(sc); /* Do this before freeing the queue */ if (sc->flags & ADAP_SYSCTL_CTX) { sysctl_ctx_free(&sc->ctx); sc->flags &= ~ADAP_SYSCTL_CTX; } free_mgmtq(sc); free_fwq(sc); return (0); } /* Maximum payload that can be delivered with a single iq descriptor */ static inline int mtu_to_max_payload(struct adapter *sc, int mtu, const int toe) { int payload; #ifdef TCP_OFFLOAD if (toe) { int rxcs = G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)); /* Note that COP can set rx_coalesce on/off per connection. */ payload = max(mtu, rxcs); } else { #endif /* large enough even when hw VLAN extraction is disabled */ payload = sc->params.sge.fl_pktshift + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + mtu; #ifdef TCP_OFFLOAD } #endif return (payload); } int t4_setup_vi_queues(struct vi_info *vi) { int rc = 0, i, intr_idx, iqidx; struct sge_rxq *rxq; struct sge_txq *txq; struct sge_wrq *ctrlq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ofld_txq; #endif #ifdef DEV_NETMAP int saved_idx; struct sge_nm_rxq *nm_rxq; struct sge_nm_txq *nm_txq; #endif char name[16]; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifnet *ifp = vi->ifp; struct sysctl_oid *oid = device_get_sysctl_tree(vi->dev); struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); int maxp, mtu = ifp->if_mtu; /* Interrupt vector to start from (when using multiple vectors) */ intr_idx = vi->first_intr; #ifdef DEV_NETMAP saved_idx = intr_idx; if (ifp->if_capabilities & IFCAP_NETMAP) { /* netmap is supported with direct interrupts only. */ MPASS(!forwarding_intr_to_fwq(sc)); /* * We don't have buffers to back the netmap rx queues * right now so we create the queues in a way that * doesn't set off any congestion signal in the chip. */ oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_rxq", CTLFLAG_RD, NULL, "rx queues"); for_each_nm_rxq(vi, i, nm_rxq) { rc = alloc_nm_rxq(vi, nm_rxq, intr_idx, i, oid); if (rc != 0) goto done; intr_idx++; } oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_txq", CTLFLAG_RD, NULL, "tx queues"); for_each_nm_txq(vi, i, nm_txq) { iqidx = vi->first_nm_rxq + (i % vi->nnmrxq); rc = alloc_nm_txq(vi, nm_txq, iqidx, i, oid); if (rc != 0) goto done; } } /* Normal rx queues and netmap rx queues share the same interrupts. */ intr_idx = saved_idx; #endif /* * Allocate rx queues first because a default iqid is required when * creating a tx queue. */ maxp = mtu_to_max_payload(sc, mtu, 0); oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD, NULL, "rx queues"); for_each_rxq(vi, i, rxq) { init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq); snprintf(name, sizeof(name), "%s rxq%d-fl", device_get_nameunit(vi->dev), i); init_fl(sc, &rxq->fl, vi->qsize_rxq / 8, maxp, name); rc = alloc_rxq(vi, rxq, forwarding_intr_to_fwq(sc) ? -1 : intr_idx, i, oid); if (rc != 0) goto done; intr_idx++; } #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) intr_idx = saved_idx + max(vi->nrxq, vi->nnmrxq); #endif #ifdef TCP_OFFLOAD maxp = mtu_to_max_payload(sc, mtu, 1); oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq", CTLFLAG_RD, NULL, "rx queues for offloaded TCP connections"); for_each_ofld_rxq(vi, i, ofld_rxq) { init_iq(&ofld_rxq->iq, sc, vi->ofld_tmr_idx, vi->ofld_pktc_idx, vi->qsize_rxq); snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", device_get_nameunit(vi->dev), i); init_fl(sc, &ofld_rxq->fl, vi->qsize_rxq / 8, maxp, name); rc = alloc_ofld_rxq(vi, ofld_rxq, forwarding_intr_to_fwq(sc) ? -1 : intr_idx, i, oid); if (rc != 0) goto done; intr_idx++; } #endif /* * Now the tx queues. */ oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD, NULL, "tx queues"); for_each_txq(vi, i, txq) { iqidx = vi->first_rxq + (i % vi->nrxq); snprintf(name, sizeof(name), "%s txq%d", device_get_nameunit(vi->dev), i); init_eq(sc, &txq->eq, EQ_ETH, vi->qsize_txq, pi->tx_chan, sc->sge.rxq[iqidx].iq.cntxt_id, name); rc = alloc_txq(vi, txq, i, oid); if (rc != 0) goto done; } #ifdef TCP_OFFLOAD oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_txq", CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections"); for_each_ofld_txq(vi, i, ofld_txq) { struct sysctl_oid *oid2; iqidx = vi->first_ofld_rxq + (i % vi->nofldrxq); snprintf(name, sizeof(name), "%s ofld_txq%d", device_get_nameunit(vi->dev), i); init_eq(sc, &ofld_txq->eq, EQ_OFLD, vi->qsize_txq, pi->tx_chan, sc->sge.ofld_rxq[iqidx].iq.cntxt_id, name); snprintf(name, sizeof(name), "%d", i); oid2 = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL, "offload tx queue"); rc = alloc_wrq(sc, vi, ofld_txq, oid2); if (rc != 0) goto done; } #endif /* * Finally, the control queue. */ if (!IS_MAIN_VI(vi) || sc->flags & IS_VF) goto done; oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD, NULL, "ctrl queue"); ctrlq = &sc->sge.ctrlq[pi->port_id]; snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(vi->dev)); init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, sc->sge.rxq[vi->first_rxq].iq.cntxt_id, name); rc = alloc_wrq(sc, vi, ctrlq, oid); done: if (rc) t4_teardown_vi_queues(vi); return (rc); } /* * Idempotent */ int t4_teardown_vi_queues(struct vi_info *vi) { int i; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct sge_rxq *rxq; struct sge_txq *txq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ofld_txq; #endif #ifdef DEV_NETMAP struct sge_nm_rxq *nm_rxq; struct sge_nm_txq *nm_txq; #endif /* Do this before freeing the queues */ if (vi->flags & VI_SYSCTL_CTX) { sysctl_ctx_free(&vi->ctx); vi->flags &= ~VI_SYSCTL_CTX; } #ifdef DEV_NETMAP if (vi->ifp->if_capabilities & IFCAP_NETMAP) { for_each_nm_txq(vi, i, nm_txq) { free_nm_txq(vi, nm_txq); } for_each_nm_rxq(vi, i, nm_rxq) { free_nm_rxq(vi, nm_rxq); } } #endif /* * Take down all the tx queues first, as they reference the rx queues * (for egress updates, etc.). */ if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF)) free_wrq(sc, &sc->sge.ctrlq[pi->port_id]); for_each_txq(vi, i, txq) { free_txq(vi, txq); } #ifdef TCP_OFFLOAD for_each_ofld_txq(vi, i, ofld_txq) { free_wrq(sc, ofld_txq); } #endif /* * Then take down the rx queues. */ for_each_rxq(vi, i, rxq) { free_rxq(vi, rxq); } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, i, ofld_rxq) { free_ofld_rxq(vi, ofld_rxq); } #endif return (0); } /* * Deals with errors and the firmware event queue. All data rx queues forward * their interrupt to the firmware event queue. */ void t4_intr_all(void *arg) { struct adapter *sc = arg; struct sge_iq *fwq = &sc->sge.fwq; t4_intr_err(arg); if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) { service_iq(fwq, 0); atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE); } } /* Deals with error interrupts */ void t4_intr_err(void *arg) { struct adapter *sc = arg; t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); t4_slow_intr_handler(sc); } void t4_intr_evt(void *arg) { struct sge_iq *iq = arg; if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { service_iq(iq, 0); atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); } } void t4_intr(void *arg) { struct sge_iq *iq = arg; if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { service_iq(iq, 0); atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); } } void t4_vi_intr(void *arg) { struct irq *irq = arg; #ifdef DEV_NETMAP if (atomic_cmpset_int(&irq->nm_state, NM_ON, NM_BUSY)) { t4_nm_intr(irq->nm_rxq); atomic_cmpset_int(&irq->nm_state, NM_BUSY, NM_ON); } #endif if (irq->rxq != NULL) t4_intr(irq->rxq); } static inline int sort_before_lro(struct lro_ctrl *lro) { return (lro->lro_mbuf_max != 0); } /* * Deals with anything and everything on the given ingress queue. */ static int service_iq(struct sge_iq *iq, int budget) { struct sge_iq *q; struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */ struct sge_fl *fl; /* Use iff IQ_HAS_FL */ struct adapter *sc = iq->adapter; struct iq_desc *d = &iq->desc[iq->cidx]; int ndescs = 0, limit; int rsp_type, refill; uint32_t lq; uint16_t fl_hw_cidx; struct mbuf *m0; STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); #if defined(INET) || defined(INET6) const struct timeval lro_timeout = {0, sc->lro_timeout}; struct lro_ctrl *lro = &rxq->lro; #endif KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); limit = budget ? budget : iq->qsize / 16; if (iq->flags & IQ_HAS_FL) { fl = &rxq->fl; fl_hw_cidx = fl->hw_cidx; /* stable snapshot */ } else { fl = NULL; fl_hw_cidx = 0; /* to silence gcc warning */ } #if defined(INET) || defined(INET6) if (iq->flags & IQ_ADJ_CREDIT) { MPASS(sort_before_lro(lro)); iq->flags &= ~IQ_ADJ_CREDIT; if ((d->rsp.u.type_gen & F_RSPD_GEN) != iq->gen) { tcp_lro_flush_all(lro); t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(1) | V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); return (0); } ndescs = 1; } #else MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); #endif /* * We always come back and check the descriptor ring for new indirect * interrupts and other responses after running a single handler. */ for (;;) { while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { rmb(); refill = 0; m0 = NULL; rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); lq = be32toh(d->rsp.pldbuflen_qid); switch (rsp_type) { case X_RSPD_TYPE_FLBUF: KASSERT(iq->flags & IQ_HAS_FL, ("%s: data for an iq (%p) with no freelist", __func__, iq)); m0 = get_fl_payload(sc, fl, lq); if (__predict_false(m0 == NULL)) goto process_iql; refill = IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 2; #ifdef T4_PKT_TIMESTAMP /* * 60 bit timestamp for the payload is * *(uint64_t *)m0->m_pktdat. Note that it is * in the leading free-space in the mbuf. The * kernel can clobber it during a pullup, * m_copymdata, etc. You need to make sure that * the mbuf reaches you unmolested if you care * about the timestamp. */ *(uint64_t *)m0->m_pktdat = be64toh(ctrl->u.last_flit) & 0xfffffffffffffff; #endif /* fall through */ case X_RSPD_TYPE_CPL: KASSERT(d->rss.opcode < NUM_CPL_CMDS, ("%s: bad opcode %02x.", __func__, d->rss.opcode)); t4_cpl_handler[d->rss.opcode](iq, &d->rss, m0); break; case X_RSPD_TYPE_INTR: /* * Interrupts should be forwarded only to queues * that are not forwarding their interrupts. * This means service_iq can recurse but only 1 * level deep. */ KASSERT(budget == 0, ("%s: budget %u, rsp_type %u", __func__, budget, rsp_type)); /* * There are 1K interrupt-capable queues (qids 0 * through 1023). A response type indicating a * forwarded interrupt with a qid >= 1K is an * iWARP async notification. */ if (lq >= 1024) { t4_an_handler(iq, &d->rsp); break; } q = sc->sge.iqmap[lq - sc->sge.iq_start - sc->sge.iq_base]; if (atomic_cmpset_int(&q->state, IQS_IDLE, IQS_BUSY)) { if (service_iq(q, q->qsize / 16) == 0) { atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); } else { STAILQ_INSERT_TAIL(&iql, q, link); } } break; default: KASSERT(0, ("%s: illegal response type %d on iq %p", __func__, rsp_type, iq)); log(LOG_ERR, "%s: illegal response type %d on iq %p", device_get_nameunit(sc->dev), rsp_type, iq); break; } d++; if (__predict_false(++iq->cidx == iq->sidx)) { iq->cidx = 0; iq->gen ^= F_RSPD_GEN; d = &iq->desc[0]; } if (__predict_false(++ndescs == limit)) { t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | V_INGRESSQID(iq->cntxt_id) | V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); ndescs = 0; #if defined(INET) || defined(INET6) if (iq->flags & IQ_LRO_ENABLED && !sort_before_lro(lro) && sc->lro_timeout != 0) { tcp_lro_flush_inactive(lro, &lro_timeout); } #endif if (budget) { if (iq->flags & IQ_HAS_FL) { FL_LOCK(fl); refill_fl(sc, fl, 32); FL_UNLOCK(fl); } return (EINPROGRESS); } } if (refill) { FL_LOCK(fl); refill_fl(sc, fl, 32); FL_UNLOCK(fl); fl_hw_cidx = fl->hw_cidx; } } process_iql: if (STAILQ_EMPTY(&iql)) break; /* * Process the head only, and send it to the back of the list if * it's still not done. */ q = STAILQ_FIRST(&iql); STAILQ_REMOVE_HEAD(&iql, link); if (service_iq(q, q->qsize / 8) == 0) atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); else STAILQ_INSERT_TAIL(&iql, q, link); } #if defined(INET) || defined(INET6) if (iq->flags & IQ_LRO_ENABLED) { if (ndescs > 0 && lro->lro_mbuf_count > 8) { MPASS(sort_before_lro(lro)); /* hold back one credit and don't flush LRO state */ iq->flags |= IQ_ADJ_CREDIT; ndescs--; } else { tcp_lro_flush_all(lro); } } #endif t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); if (iq->flags & IQ_HAS_FL) { int starved; FL_LOCK(fl); starved = refill_fl(sc, fl, 64); FL_UNLOCK(fl); if (__predict_false(starved != 0)) add_fl_to_sfl(sc, fl); } return (0); } static inline int cl_has_metadata(struct sge_fl *fl, struct cluster_layout *cll) { int rc = fl->flags & FL_BUF_PACKING || cll->region1 > 0; if (rc) MPASS(cll->region3 >= CL_METADATA_SIZE); return (rc); } static inline struct cluster_metadata * cl_metadata(struct adapter *sc, struct sge_fl *fl, struct cluster_layout *cll, caddr_t cl) { if (cl_has_metadata(fl, cll)) { struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; return ((struct cluster_metadata *)(cl + swz->size) - 1); } return (NULL); } static void rxb_free(struct mbuf *m) { uma_zone_t zone = m->m_ext.ext_arg1; void *cl = m->m_ext.ext_arg2; uma_zfree(zone, cl); counter_u64_add(extfree_rels, 1); } /* * The mbuf returned by this function could be allocated from zone_mbuf or * constructed in spare room in the cluster. * * The mbuf carries the payload in one of these ways * a) frame inside the mbuf (mbuf from zone_mbuf) * b) m_cljset (for clusters without metadata) zone_mbuf * c) m_extaddref (cluster with metadata) inline mbuf * d) m_extaddref (cluster with metadata) zone_mbuf */ static struct mbuf * get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, int remaining) { struct mbuf *m; struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; struct cluster_layout *cll = &sd->cll; struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; struct hw_buf_info *hwb = &sc->sge.hw_buf_info[cll->hwidx]; struct cluster_metadata *clm = cl_metadata(sc, fl, cll, sd->cl); int len, blen; caddr_t payload; blen = hwb->size - fl->rx_offset; /* max possible in this buf */ len = min(remaining, blen); payload = sd->cl + cll->region1 + fl->rx_offset; if (fl->flags & FL_BUF_PACKING) { const u_int l = fr_offset + len; const u_int pad = roundup2(l, fl->buf_boundary) - l; if (fl->rx_offset + len + pad < hwb->size) blen = len + pad; MPASS(fl->rx_offset + blen <= hwb->size); } else { MPASS(fl->rx_offset == 0); /* not packing */ } if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) { /* * Copy payload into a freshly allocated mbuf. */ m = fr_offset == 0 ? m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); if (m == NULL) return (NULL); fl->mbuf_allocated++; #ifdef T4_PKT_TIMESTAMP /* Leave room for a timestamp */ m->m_data += 8; #endif /* copy data to mbuf */ bcopy(payload, mtod(m, caddr_t), len); } else if (sd->nmbuf * MSIZE < cll->region1) { /* * There's spare room in the cluster for an mbuf. Create one * and associate it with the payload that's in the cluster. */ MPASS(clm != NULL); m = (struct mbuf *)(sd->cl + sd->nmbuf * MSIZE); /* No bzero required */ if (m_init(m, M_NOWAIT, MT_DATA, fr_offset == 0 ? M_PKTHDR | M_NOFREE : M_NOFREE)) return (NULL); fl->mbuf_inlined++; m_extaddref(m, payload, blen, &clm->refcount, rxb_free, swz->zone, sd->cl); if (sd->nmbuf++ == 0) counter_u64_add(extfree_refs, 1); } else { /* * Grab an mbuf from zone_mbuf and associate it with the * payload in the cluster. */ m = fr_offset == 0 ? m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); if (m == NULL) return (NULL); fl->mbuf_allocated++; if (clm != NULL) { m_extaddref(m, payload, blen, &clm->refcount, rxb_free, swz->zone, sd->cl); if (sd->nmbuf++ == 0) counter_u64_add(extfree_refs, 1); } else { m_cljset(m, sd->cl, swz->type); sd->cl = NULL; /* consumed, not a recycle candidate */ } } if (fr_offset == 0) m->m_pkthdr.len = remaining; m->m_len = len; if (fl->flags & FL_BUF_PACKING) { fl->rx_offset += blen; MPASS(fl->rx_offset <= hwb->size); if (fl->rx_offset < hwb->size) return (m); /* without advancing the cidx */ } if (__predict_false(++fl->cidx % 8 == 0)) { uint16_t cidx = fl->cidx / 8; if (__predict_false(cidx == fl->sidx)) fl->cidx = cidx = 0; fl->hw_cidx = cidx; } fl->rx_offset = 0; return (m); } static struct mbuf * get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf) { struct mbuf *m0, *m, **pnext; u_int remaining; const u_int total = G_RSPD_LEN(len_newbuf); if (__predict_false(fl->flags & FL_BUF_RESUME)) { M_ASSERTPKTHDR(fl->m0); MPASS(fl->m0->m_pkthdr.len == total); MPASS(fl->remaining < total); m0 = fl->m0; pnext = fl->pnext; remaining = fl->remaining; fl->flags &= ~FL_BUF_RESUME; goto get_segment; } if (fl->rx_offset > 0 && len_newbuf & F_RSPD_NEWBUF) { fl->rx_offset = 0; if (__predict_false(++fl->cidx % 8 == 0)) { uint16_t cidx = fl->cidx / 8; if (__predict_false(cidx == fl->sidx)) fl->cidx = cidx = 0; fl->hw_cidx = cidx; } } /* * Payload starts at rx_offset in the current hw buffer. Its length is * 'len' and it may span multiple hw buffers. */ m0 = get_scatter_segment(sc, fl, 0, total); if (m0 == NULL) return (NULL); remaining = total - m0->m_len; pnext = &m0->m_next; while (remaining > 0) { get_segment: MPASS(fl->rx_offset == 0); m = get_scatter_segment(sc, fl, total - remaining, remaining); if (__predict_false(m == NULL)) { fl->m0 = m0; fl->pnext = pnext; fl->remaining = remaining; fl->flags |= FL_BUF_RESUME; return (NULL); } *pnext = m; pnext = &m->m_next; remaining -= m->m_len; } *pnext = NULL; M_ASSERTPKTHDR(m0); return (m0); } static int t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) { struct sge_rxq *rxq = iq_to_rxq(iq); struct ifnet *ifp = rxq->ifp; struct adapter *sc = iq->adapter; const struct cpl_rx_pkt *cpl = (const void *)(rss + 1); #if defined(INET) || defined(INET6) struct lro_ctrl *lro = &rxq->lro; #endif static const int sw_hashtype[4][2] = { {M_HASHTYPE_NONE, M_HASHTYPE_NONE}, {M_HASHTYPE_RSS_IPV4, M_HASHTYPE_RSS_IPV6}, {M_HASHTYPE_RSS_TCP_IPV4, M_HASHTYPE_RSS_TCP_IPV6}, {M_HASHTYPE_RSS_UDP_IPV4, M_HASHTYPE_RSS_UDP_IPV6}, }; KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__, rss->opcode)); m0->m_pkthdr.len -= sc->params.sge.fl_pktshift; m0->m_len -= sc->params.sge.fl_pktshift; m0->m_data += sc->params.sge.fl_pktshift; m0->m_pkthdr.rcvif = ifp; M_HASHTYPE_SET(m0, sw_hashtype[rss->hash_type][rss->ipv6]); m0->m_pkthdr.flowid = be32toh(rss->hash_val); if (cpl->csum_calc && !(cpl->err_vec & sc->params.tp.err_vec_mask)) { if (ifp->if_capenable & IFCAP_RXCSUM && cpl->l2info & htobe32(F_RXF_IP)) { m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); rxq->rxcsum++; } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && cpl->l2info & htobe32(F_RXF_IP6)) { m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR); rxq->rxcsum++; } if (__predict_false(cpl->ip_frag)) m0->m_pkthdr.csum_data = be16toh(cpl->csum); else m0->m_pkthdr.csum_data = 0xffff; } if (cpl->vlan_ex) { m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); m0->m_flags |= M_VLANTAG; rxq->vlan_extraction++; } #if defined(INET) || defined(INET6) if (iq->flags & IQ_LRO_ENABLED) { if (sort_before_lro(lro)) { tcp_lro_queue_mbuf(lro, m0); return (0); /* queued for sort, then LRO */ } if (tcp_lro_rx(lro, m0, 0) == 0) return (0); /* queued for LRO */ } #endif ifp->if_input(ifp, m0); return (0); } /* * Must drain the wrq or make sure that someone else will. */ static void wrq_tx_drain(void *arg, int n) { struct sge_wrq *wrq = arg; struct sge_eq *eq = &wrq->eq; EQ_LOCK(eq); if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) drain_wrq_wr_list(wrq->adapter, wrq); EQ_UNLOCK(eq); } static void drain_wrq_wr_list(struct adapter *sc, struct sge_wrq *wrq) { struct sge_eq *eq = &wrq->eq; u_int available, dbdiff; /* # of hardware descriptors */ u_int n; struct wrqe *wr; struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ EQ_LOCK_ASSERT_OWNED(eq); MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs)); wr = STAILQ_FIRST(&wrq->wr_list); MPASS(wr != NULL); /* Must be called with something useful to do */ MPASS(eq->pidx == eq->dbidx); dbdiff = 0; do { eq->cidx = read_hw_cidx(eq); if (eq->pidx == eq->cidx) available = eq->sidx - 1; else available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; MPASS(wr->wrq == wrq); n = howmany(wr->wr_len, EQ_ESIZE); if (available < n) break; dst = (void *)&eq->desc[eq->pidx]; if (__predict_true(eq->sidx - eq->pidx > n)) { /* Won't wrap, won't end exactly at the status page. */ bcopy(&wr->wr[0], dst, wr->wr_len); eq->pidx += n; } else { int first_portion = (eq->sidx - eq->pidx) * EQ_ESIZE; bcopy(&wr->wr[0], dst, first_portion); if (wr->wr_len > first_portion) { bcopy(&wr->wr[first_portion], &eq->desc[0], wr->wr_len - first_portion); } eq->pidx = n - (eq->sidx - eq->pidx); } wrq->tx_wrs_copied++; if (available < eq->sidx / 4 && atomic_cmpset_int(&eq->equiq, 0, 1)) { dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | F_FW_WR_EQUEQ); eq->equeqidx = eq->pidx; } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); eq->equeqidx = eq->pidx; } dbdiff += n; if (dbdiff >= 16) { ring_eq_db(sc, eq, dbdiff); dbdiff = 0; } STAILQ_REMOVE_HEAD(&wrq->wr_list, link); free_wrqe(wr); MPASS(wrq->nwr_pending > 0); wrq->nwr_pending--; MPASS(wrq->ndesc_needed >= n); wrq->ndesc_needed -= n; } while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL); if (dbdiff) ring_eq_db(sc, eq, dbdiff); } /* * Doesn't fail. Holds on to work requests it can't send right away. */ void t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr) { #ifdef INVARIANTS struct sge_eq *eq = &wrq->eq; #endif EQ_LOCK_ASSERT_OWNED(eq); MPASS(wr != NULL); MPASS(wr->wr_len > 0 && wr->wr_len <= SGE_MAX_WR_LEN); MPASS((wr->wr_len & 0x7) == 0); STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link); wrq->nwr_pending++; wrq->ndesc_needed += howmany(wr->wr_len, EQ_ESIZE); if (!TAILQ_EMPTY(&wrq->incomplete_wrs)) return; /* commit_wrq_wr will drain wr_list as well. */ drain_wrq_wr_list(sc, wrq); /* Doorbell must have caught up to the pidx. */ MPASS(eq->pidx == eq->dbidx); } void t4_update_fl_bufsize(struct ifnet *ifp) { struct vi_info *vi = ifp->if_softc; struct adapter *sc = vi->pi->adapter; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; #endif struct sge_fl *fl; int i, maxp, mtu = ifp->if_mtu; maxp = mtu_to_max_payload(sc, mtu, 0); for_each_rxq(vi, i, rxq) { fl = &rxq->fl; FL_LOCK(fl); find_best_refill_source(sc, fl, maxp); FL_UNLOCK(fl); } #ifdef TCP_OFFLOAD maxp = mtu_to_max_payload(sc, mtu, 1); for_each_ofld_rxq(vi, i, ofld_rxq) { fl = &ofld_rxq->fl; FL_LOCK(fl); find_best_refill_source(sc, fl, maxp); FL_UNLOCK(fl); } #endif } static inline int mbuf_nsegs(struct mbuf *m) { M_ASSERTPKTHDR(m); KASSERT(m->m_pkthdr.l5hlen > 0, ("%s: mbuf %p missing information on # of segments.", __func__, m)); return (m->m_pkthdr.l5hlen); } static inline void set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs) { M_ASSERTPKTHDR(m); m->m_pkthdr.l5hlen = nsegs; } static inline int mbuf_len16(struct mbuf *m) { int n; M_ASSERTPKTHDR(m); n = m->m_pkthdr.PH_loc.eight[0]; MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); return (n); } static inline void set_mbuf_len16(struct mbuf *m, uint8_t len16) { M_ASSERTPKTHDR(m); m->m_pkthdr.PH_loc.eight[0] = len16; } static inline int needs_tso(struct mbuf *m) { M_ASSERTPKTHDR(m); if (m->m_pkthdr.csum_flags & CSUM_TSO) { KASSERT(m->m_pkthdr.tso_segsz > 0, ("%s: TSO requested in mbuf %p but MSS not provided", __func__, m)); return (1); } return (0); } static inline int needs_l3_csum(struct mbuf *m) { M_ASSERTPKTHDR(m); if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) return (1); return (0); } static inline int needs_l4_csum(struct mbuf *m) { M_ASSERTPKTHDR(m); if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) return (1); return (0); } static inline int needs_vlan_insertion(struct mbuf *m) { M_ASSERTPKTHDR(m); if (m->m_flags & M_VLANTAG) { KASSERT(m->m_pkthdr.ether_vtag != 0, ("%s: HWVLAN requested in mbuf %p but tag not provided", __func__, m)); return (1); } return (0); } static void * m_advance(struct mbuf **pm, int *poffset, int len) { struct mbuf *m = *pm; int offset = *poffset; uintptr_t p = 0; MPASS(len > 0); for (;;) { if (offset + len < m->m_len) { offset += len; p = mtod(m, uintptr_t) + offset; break; } len -= m->m_len - offset; m = m->m_next; offset = 0; MPASS(m != NULL); } *poffset = offset; *pm = m; return ((void *)p); } /* * Can deal with empty mbufs in the chain that have m_len = 0, but the chain * must have at least one mbuf that's not empty. */ static inline int count_mbuf_nsegs(struct mbuf *m) { vm_paddr_t lastb, next; vm_offset_t va; int len, nsegs; MPASS(m != NULL); nsegs = 0; lastb = 0; for (; m; m = m->m_next) { len = m->m_len; if (__predict_false(len == 0)) continue; va = mtod(m, vm_offset_t); next = pmap_kextract(va); nsegs += sglist_count(m->m_data, len); if (lastb + 1 == next) nsegs--; lastb = pmap_kextract(va + len - 1); } MPASS(nsegs > 0); return (nsegs); } /* * Analyze the mbuf to determine its tx needs. The mbuf passed in may change: * a) caller can assume it's been freed if this function returns with an error. * b) it may get defragged up if the gather list is too long for the hardware. */ int parse_pkt(struct adapter *sc, struct mbuf **mp) { struct mbuf *m0 = *mp, *m; int rc, nsegs, defragged = 0, offset; struct ether_header *eh; void *l3hdr; #if defined(INET) || defined(INET6) struct tcphdr *tcp; #endif uint16_t eh_type; M_ASSERTPKTHDR(m0); if (__predict_false(m0->m_pkthdr.len < ETHER_HDR_LEN)) { rc = EINVAL; fail: m_freem(m0); *mp = NULL; return (rc); } restart: /* * First count the number of gather list segments in the payload. * Defrag the mbuf if nsegs exceeds the hardware limit. */ M_ASSERTPKTHDR(m0); MPASS(m0->m_pkthdr.len > 0); nsegs = count_mbuf_nsegs(m0); if (nsegs > (needs_tso(m0) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)) { if (defragged++ > 0 || (m = m_defrag(m0, M_NOWAIT)) == NULL) { rc = EFBIG; goto fail; } *mp = m0 = m; /* update caller's copy after defrag */ goto restart; } if (__predict_false(nsegs > 2 && m0->m_pkthdr.len <= MHLEN)) { m0 = m_pullup(m0, m0->m_pkthdr.len); if (m0 == NULL) { /* Should have left well enough alone. */ rc = EFBIG; goto fail; } *mp = m0; /* update caller's copy after pullup */ goto restart; } set_mbuf_nsegs(m0, nsegs); if (sc->flags & IS_VF) set_mbuf_len16(m0, txpkt_vm_len16(nsegs, needs_tso(m0))); else set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0))); if (!needs_tso(m0) && !(sc->flags & IS_VF && (needs_l3_csum(m0) || needs_l4_csum(m0)))) return (0); m = m0; eh = mtod(m, struct ether_header *); eh_type = ntohs(eh->ether_type); if (eh_type == ETHERTYPE_VLAN) { struct ether_vlan_header *evh = (void *)eh; eh_type = ntohs(evh->evl_proto); m0->m_pkthdr.l2hlen = sizeof(*evh); } else m0->m_pkthdr.l2hlen = sizeof(*eh); offset = 0; l3hdr = m_advance(&m, &offset, m0->m_pkthdr.l2hlen); switch (eh_type) { #ifdef INET6 case ETHERTYPE_IPV6: { struct ip6_hdr *ip6 = l3hdr; MPASS(!needs_tso(m0) || ip6->ip6_nxt == IPPROTO_TCP); m0->m_pkthdr.l3hlen = sizeof(*ip6); break; } #endif #ifdef INET case ETHERTYPE_IP: { struct ip *ip = l3hdr; m0->m_pkthdr.l3hlen = ip->ip_hl * 4; break; } #endif default: panic("%s: ethertype 0x%04x unknown. if_cxgbe must be compiled" " with the same INET/INET6 options as the kernel.", __func__, eh_type); } #if defined(INET) || defined(INET6) if (needs_tso(m0)) { tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen); m0->m_pkthdr.l4hlen = tcp->th_off * 4; } #endif MPASS(m0 == *mp); return (0); } void * start_wrq_wr(struct sge_wrq *wrq, int len16, struct wrq_cookie *cookie) { struct sge_eq *eq = &wrq->eq; struct adapter *sc = wrq->adapter; int ndesc, available; struct wrqe *wr; void *w; MPASS(len16 > 0); ndesc = howmany(len16, EQ_ESIZE / 16); MPASS(ndesc > 0 && ndesc <= SGE_MAX_WR_NDESC); EQ_LOCK(eq); if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) drain_wrq_wr_list(sc, wrq); if (!STAILQ_EMPTY(&wrq->wr_list)) { slowpath: EQ_UNLOCK(eq); wr = alloc_wrqe(len16 * 16, wrq); if (__predict_false(wr == NULL)) return (NULL); cookie->pidx = -1; cookie->ndesc = ndesc; return (&wr->wr); } eq->cidx = read_hw_cidx(eq); if (eq->pidx == eq->cidx) available = eq->sidx - 1; else available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; if (available < ndesc) goto slowpath; cookie->pidx = eq->pidx; cookie->ndesc = ndesc; TAILQ_INSERT_TAIL(&wrq->incomplete_wrs, cookie, link); w = &eq->desc[eq->pidx]; IDXINCR(eq->pidx, ndesc, eq->sidx); if (__predict_false(cookie->pidx + ndesc > eq->sidx)) { w = &wrq->ss[0]; wrq->ss_pidx = cookie->pidx; wrq->ss_len = len16 * 16; } EQ_UNLOCK(eq); return (w); } void commit_wrq_wr(struct sge_wrq *wrq, void *w, struct wrq_cookie *cookie) { struct sge_eq *eq = &wrq->eq; struct adapter *sc = wrq->adapter; int ndesc, pidx; struct wrq_cookie *prev, *next; if (cookie->pidx == -1) { struct wrqe *wr = __containerof(w, struct wrqe, wr); t4_wrq_tx(sc, wr); return; } if (__predict_false(w == &wrq->ss[0])) { int n = (eq->sidx - wrq->ss_pidx) * EQ_ESIZE; MPASS(wrq->ss_len > n); /* WR had better wrap around. */ bcopy(&wrq->ss[0], &eq->desc[wrq->ss_pidx], n); bcopy(&wrq->ss[n], &eq->desc[0], wrq->ss_len - n); wrq->tx_wrs_ss++; } else wrq->tx_wrs_direct++; EQ_LOCK(eq); ndesc = cookie->ndesc; /* Can be more than SGE_MAX_WR_NDESC here. */ pidx = cookie->pidx; MPASS(pidx >= 0 && pidx < eq->sidx); prev = TAILQ_PREV(cookie, wrq_incomplete_wrs, link); next = TAILQ_NEXT(cookie, link); if (prev == NULL) { MPASS(pidx == eq->dbidx); if (next == NULL || ndesc >= 16) ring_eq_db(wrq->adapter, eq, ndesc); else { MPASS(IDXDIFF(next->pidx, pidx, eq->sidx) == ndesc); next->pidx = pidx; next->ndesc += ndesc; } } else { MPASS(IDXDIFF(pidx, prev->pidx, eq->sidx) == prev->ndesc); prev->ndesc += ndesc; } TAILQ_REMOVE(&wrq->incomplete_wrs, cookie, link); if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) drain_wrq_wr_list(sc, wrq); #ifdef INVARIANTS if (TAILQ_EMPTY(&wrq->incomplete_wrs)) { /* Doorbell must have caught up to the pidx. */ MPASS(wrq->eq.pidx == wrq->eq.dbidx); } #endif EQ_UNLOCK(eq); } static u_int can_resume_eth_tx(struct mp_ring *r) { struct sge_eq *eq = r->cookie; return (total_available_tx_desc(eq) > eq->sidx / 8); } static inline int cannot_use_txpkts(struct mbuf *m) { /* maybe put a GL limit too, to avoid silliness? */ return (needs_tso(m)); } static inline int discard_tx(struct sge_eq *eq) { return ((eq->flags & (EQ_ENABLED | EQ_QFLUSH)) != EQ_ENABLED); } /* * r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to * be consumed. Return the actual number consumed. 0 indicates a stall. */ static u_int eth_tx(struct mp_ring *r, u_int cidx, u_int pidx) { struct sge_txq *txq = r->cookie; struct sge_eq *eq = &txq->eq; struct ifnet *ifp = txq->ifp; struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; u_int total, remaining; /* # of packets */ u_int available, dbdiff; /* # of hardware descriptors */ u_int n, next_cidx; struct mbuf *m0, *tail; struct txpkts txp; struct fw_eth_tx_pkts_wr *wr; /* any fw WR struct will do */ remaining = IDXDIFF(pidx, cidx, r->size); MPASS(remaining > 0); /* Must not be called without work to do. */ total = 0; TXQ_LOCK(txq); if (__predict_false(discard_tx(eq))) { while (cidx != pidx) { m0 = r->items[cidx]; m_freem(m0); if (++cidx == r->size) cidx = 0; } reclaim_tx_descs(txq, 2048); total = remaining; goto done; } /* How many hardware descriptors do we have readily available. */ if (eq->pidx == eq->cidx) available = eq->sidx - 1; else available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; dbdiff = IDXDIFF(eq->pidx, eq->dbidx, eq->sidx); while (remaining > 0) { m0 = r->items[cidx]; M_ASSERTPKTHDR(m0); MPASS(m0->m_nextpkt == NULL); if (available < SGE_MAX_WR_NDESC) { available += reclaim_tx_descs(txq, 64); if (available < howmany(mbuf_len16(m0), EQ_ESIZE / 16)) break; /* out of descriptors */ } next_cidx = cidx + 1; if (__predict_false(next_cidx == r->size)) next_cidx = 0; wr = (void *)&eq->desc[eq->pidx]; if (sc->flags & IS_VF) { total++; remaining--; ETHER_BPF_MTAP(ifp, m0); n = write_txpkt_vm_wr(sc, txq, (void *)wr, m0, available); } else if (remaining > 1 && try_txpkts(m0, r->items[next_cidx], &txp, available) == 0) { /* pkts at cidx, next_cidx should both be in txp. */ MPASS(txp.npkt == 2); tail = r->items[next_cidx]; MPASS(tail->m_nextpkt == NULL); ETHER_BPF_MTAP(ifp, m0); ETHER_BPF_MTAP(ifp, tail); m0->m_nextpkt = tail; if (__predict_false(++next_cidx == r->size)) next_cidx = 0; while (next_cidx != pidx) { if (add_to_txpkts(r->items[next_cidx], &txp, available) != 0) break; tail->m_nextpkt = r->items[next_cidx]; tail = tail->m_nextpkt; ETHER_BPF_MTAP(ifp, tail); if (__predict_false(++next_cidx == r->size)) next_cidx = 0; } n = write_txpkts_wr(txq, wr, m0, &txp, available); total += txp.npkt; remaining -= txp.npkt; } else { total++; remaining--; ETHER_BPF_MTAP(ifp, m0); n = write_txpkt_wr(txq, (void *)wr, m0, available); } MPASS(n >= 1 && n <= available && n <= SGE_MAX_WR_NDESC); available -= n; dbdiff += n; IDXINCR(eq->pidx, n, eq->sidx); if (total_available_tx_desc(eq) < eq->sidx / 4 && atomic_cmpset_int(&eq->equiq, 0, 1)) { wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | F_FW_WR_EQUEQ); eq->equeqidx = eq->pidx; } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); eq->equeqidx = eq->pidx; } if (dbdiff >= 16 && remaining >= 4) { ring_eq_db(sc, eq, dbdiff); available += reclaim_tx_descs(txq, 4 * dbdiff); dbdiff = 0; } cidx = next_cidx; } if (dbdiff != 0) { ring_eq_db(sc, eq, dbdiff); reclaim_tx_descs(txq, 32); } done: TXQ_UNLOCK(txq); return (total); } static inline void init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, int qsize) { KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, ("%s: bad tmr_idx %d", __func__, tmr_idx)); KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ ("%s: bad pktc_idx %d", __func__, pktc_idx)); iq->flags = 0; iq->adapter = sc; iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx); iq->intr_pktc_idx = SGE_NCOUNTERS - 1; if (pktc_idx >= 0) { iq->intr_params |= F_QINTR_CNT_EN; iq->intr_pktc_idx = pktc_idx; } iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */ iq->sidx = iq->qsize - sc->params.sge.spg_len / IQ_ESIZE; } static inline void init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name) { fl->qsize = qsize; fl->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; strlcpy(fl->lockname, name, sizeof(fl->lockname)); if (sc->flags & BUF_PACKING_OK && ((!is_t4(sc) && buffer_packing) || /* T5+: enabled unless 0 */ (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */ fl->flags |= FL_BUF_PACKING; find_best_refill_source(sc, fl, maxp); find_safe_refill_source(sc, fl); } static inline void init_eq(struct adapter *sc, struct sge_eq *eq, int eqtype, int qsize, uint8_t tx_chan, uint16_t iqid, char *name) { KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype)); eq->flags = eqtype & EQ_TYPEMASK; eq->tx_chan = tx_chan; eq->iqid = iqid; eq->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; strlcpy(eq->lockname, name, sizeof(eq->lockname)); } static int alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, bus_dmamap_t *map, bus_addr_t *pa, void **va) { int rc; rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); if (rc != 0) { device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc); goto done; } rc = bus_dmamem_alloc(*tag, va, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); if (rc != 0) { device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc); goto done; } rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); if (rc != 0) { device_printf(sc->dev, "cannot load DMA map: %d\n", rc); goto done; } done: if (rc) free_ring(sc, *tag, *map, *pa, *va); return (rc); } static int free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, bus_addr_t pa, void *va) { if (pa) bus_dmamap_unload(tag, map); if (va) bus_dmamem_free(tag, va, map); if (tag) bus_dma_tag_destroy(tag); return (0); } /* * Allocates the ring for an ingress queue and an optional freelist. If the * freelist is specified it will be allocated and then associated with the * ingress queue. * * Returns errno on failure. Resources allocated up to that point may still be * allocated. Caller is responsible for cleanup in case this function fails. * * If the ingress queue will take interrupts directly then the intr_idx * specifies the vector, starting from 0. -1 means the interrupts for this * queue should be forwarded to the fwq. */ static int alloc_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl, int intr_idx, int cong) { int rc, i, cntxt_id; size_t len; struct fw_iq_cmd c; struct port_info *pi = vi->pi; struct adapter *sc = iq->adapter; struct sge_params *sp = &sc->params.sge; __be32 v = 0; len = iq->qsize * IQ_ESIZE; rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, (void **)&iq->desc); if (rc != 0) return (rc); bzero(&c, sizeof(c)); c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | V_FW_IQ_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | FW_LEN16(c)); /* Special handling for firmware event queue */ if (iq == &sc->sge.fwq) v |= F_FW_IQ_CMD_IQASYNCH; if (intr_idx < 0) { /* Forwarded interrupts, all headed to fwq */ v |= F_FW_IQ_CMD_IQANDST; v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.fwq.cntxt_id); } else { KASSERT(intr_idx < sc->intr_count, ("%s: invalid direct intr_idx %d", __func__, intr_idx)); v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); } c.type_to_iqandstindex = htobe32(v | V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | V_FW_IQ_CMD_VIID(vi->viid) | V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | F_FW_IQ_CMD_IQGTSMODE | V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); c.iqsize = htobe16(iq->qsize); c.iqaddr = htobe64(iq->ba); if (cong >= 0) c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN); if (fl) { mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); len = fl->qsize * EQ_ESIZE; rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, &fl->ba, (void **)&fl->desc); if (rc) return (rc); /* Allocate space for one software descriptor per buffer. */ rc = alloc_fl_sdesc(fl); if (rc != 0) { device_printf(sc->dev, "failed to setup fl software descriptors: %d\n", rc); return (rc); } if (fl->flags & FL_BUF_PACKING) { fl->lowat = roundup2(sp->fl_starve_threshold2, 8); fl->buf_boundary = sp->pack_boundary; } else { fl->lowat = roundup2(sp->fl_starve_threshold, 8); fl->buf_boundary = 16; } if (fl_pad && fl->buf_boundary < sp->pad_boundary) fl->buf_boundary = sp->pad_boundary; c.iqns_to_fl0congen |= htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | (fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN : 0)); if (cong >= 0) { c.iqns_to_fl0congen |= htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) | F_FW_IQ_CMD_FL0CONGCIF | F_FW_IQ_CMD_FL0CONGEN); } c.fl0dcaen_to_fl0cidxfthresh = htobe16(V_FW_IQ_CMD_FL0FBMIN(chip_id(sc) <= CHELSIO_T5 ? X_FETCHBURSTMIN_128B : X_FETCHBURSTMIN_64B) | V_FW_IQ_CMD_FL0FBMAX(chip_id(sc) <= CHELSIO_T5 ? X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B)); c.fl0size = htobe16(fl->qsize); c.fl0addr = htobe64(fl->ba); } rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(sc->dev, "failed to create ingress queue: %d\n", rc); return (rc); } iq->cidx = 0; iq->gen = F_RSPD_GEN; iq->intr_next = iq->intr_params; iq->cntxt_id = be16toh(c.iqid); iq->abs_id = be16toh(c.physiqid); iq->flags |= IQ_ALLOCATED; cntxt_id = iq->cntxt_id - sc->sge.iq_start; if (cntxt_id >= sc->sge.niq) { panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.niq - 1); } sc->sge.iqmap[cntxt_id] = iq; if (fl) { u_int qid; iq->flags |= IQ_HAS_FL; fl->cntxt_id = be16toh(c.fl0id); fl->pidx = fl->cidx = 0; cntxt_id = fl->cntxt_id - sc->sge.eq_start; if (cntxt_id >= sc->sge.neq) { panic("%s: fl->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.neq - 1); } sc->sge.eqmap[cntxt_id] = (void *)fl; qid = fl->cntxt_id; if (isset(&sc->doorbells, DOORBELL_UDB)) { uint32_t s_qpp = sc->params.sge.eq_s_qpp; uint32_t mask = (1 << s_qpp) - 1; volatile uint8_t *udb; udb = sc->udbs_base + UDBS_DB_OFFSET; udb += (qid >> s_qpp) << PAGE_SHIFT; qid &= mask; if (qid < PAGE_SIZE / UDBS_SEG_SIZE) { udb += qid << UDBS_SEG_SHIFT; qid = 0; } fl->udb = (volatile void *)udb; } fl->dbval = V_QID(qid) | sc->chip_params->sge_fl_db; FL_LOCK(fl); /* Enough to make sure the SGE doesn't think it's starved */ refill_fl(sc, fl, fl->lowat); FL_UNLOCK(fl); } if (chip_id(sc) >= CHELSIO_T5 && !(sc->flags & IS_VF) && cong >= 0) { uint32_t param, val; param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | V_FW_PARAMS_PARAM_YZ(iq->cntxt_id); if (cong == 0) val = 1 << 19; else { val = 2 << 19; for (i = 0; i < 4; i++) { if (cong & (1 << i)) val |= 1 << (i << 2); } } rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc != 0) { /* report error but carry on */ device_printf(sc->dev, "failed to set congestion manager context for " "ingress queue %d: %d\n", iq->cntxt_id, rc); } } /* Enable IQ interrupts */ atomic_store_rel_int(&iq->state, IQS_IDLE); t4_write_reg(sc, sc->sge_gts_reg, V_SEINTARM(iq->intr_params) | V_INGRESSQID(iq->cntxt_id)); return (0); } static int free_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl) { int rc; struct adapter *sc = iq->adapter; device_t dev; if (sc == NULL) return (0); /* nothing to do */ dev = vi ? vi->dev : sc->dev; if (iq->flags & IQ_ALLOCATED) { rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff); if (rc != 0) { device_printf(dev, "failed to free queue %p: %d\n", iq, rc); return (rc); } iq->flags &= ~IQ_ALLOCATED; } free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); bzero(iq, sizeof(*iq)); if (fl) { free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, fl->desc); if (fl->sdesc) free_fl_sdesc(sc, fl); if (mtx_initialized(&fl->fl_lock)) mtx_destroy(&fl->fl_lock); bzero(fl, sizeof(*fl)); } return (0); } static void add_iq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, struct sge_iq *iq) { struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &iq->ba, "bus address of descriptor ring"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, iq->qsize * IQ_ESIZE, "descriptor ring size in bytes"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id", CTLTYPE_INT | CTLFLAG_RD, &iq->abs_id, 0, sysctl_uint16, "I", "absolute id of the queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &iq->cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &iq->cidx, 0, sysctl_uint16, "I", "consumer index"); } static void add_fl_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, struct sge_fl *fl) { struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL, "freelist"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &fl->ba, "bus address of descriptor ring"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, fl->sidx * EQ_ESIZE + sc->params.sge.spg_len, "desc ring size in bytes"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &fl->cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the freelist"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL, fl_pad ? 1 : 0, "padding enabled"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL, fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx, 0, "consumer index"); if (fl->flags & FL_BUF_PACKING) { SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset", CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset"); } SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx, 0, "producer index"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_allocated", CTLFLAG_RD, &fl->mbuf_allocated, "# of mbuf allocated"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_inlined", CTLFLAG_RD, &fl->mbuf_inlined, "# of mbuf inlined in clusters"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated", CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled", CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled", CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)"); } static int alloc_fwq(struct adapter *sc) { int rc, intr_idx; struct sge_iq *fwq = &sc->sge.fwq; struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE); if (sc->flags & IS_VF) intr_idx = 0; else intr_idx = sc->intr_count > 1 ? 1 : 0; rc = alloc_iq_fl(&sc->port[0]->vi[0], fwq, NULL, intr_idx, -1); if (rc != 0) { device_printf(sc->dev, "failed to create firmware event queue: %d\n", rc); return (rc); } oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD, NULL, "firmware event queue"); add_iq_sysctls(&sc->ctx, oid, fwq); return (0); } static int free_fwq(struct adapter *sc) { return free_iq_fl(NULL, &sc->sge.fwq, NULL); } static int alloc_mgmtq(struct adapter *sc) { int rc; struct sge_wrq *mgmtq = &sc->sge.mgmtq; char name[16]; struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD, NULL, "management queue"); snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev)); init_eq(sc, &mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan, sc->sge.fwq.cntxt_id, name); rc = alloc_wrq(sc, NULL, mgmtq, oid); if (rc != 0) { device_printf(sc->dev, "failed to create management queue: %d\n", rc); return (rc); } return (0); } static int free_mgmtq(struct adapter *sc) { return free_wrq(sc, &sc->sge.mgmtq); } int tnl_cong(struct port_info *pi, int drop) { if (drop == -1) return (-1); else if (drop == 1) return (0); else return (pi->rx_e_chan_map); } static int alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int intr_idx, int idx, struct sysctl_oid *oid) { int rc; struct adapter *sc = vi->pi->adapter; struct sysctl_oid_list *children; char name[16]; rc = alloc_iq_fl(vi, &rxq->iq, &rxq->fl, intr_idx, tnl_cong(vi->pi, cong_drop)); if (rc != 0) return (rc); if (idx == 0) sc->sge.iq_base = rxq->iq.abs_id - rxq->iq.cntxt_id; else KASSERT(rxq->iq.cntxt_id + sc->sge.iq_base == rxq->iq.abs_id, ("iq_base mismatch")); KASSERT(sc->sge.iq_base == 0 || sc->flags & IS_VF, ("PF with non-zero iq_base")); /* * The freelist is just barely above the starvation threshold right now, * fill it up a bit more. */ FL_LOCK(&rxq->fl); refill_fl(sc, &rxq->fl, 128); FL_UNLOCK(&rxq->fl); #if defined(INET) || defined(INET6) rc = tcp_lro_init_args(&rxq->lro, vi->ifp, lro_entries, lro_mbufs); if (rc != 0) return (rc); MPASS(rxq->lro.ifp == vi->ifp); /* also indicates LRO init'ed */ if (vi->ifp->if_capenable & IFCAP_LRO) rxq->iq.flags |= IQ_LRO_ENABLED; #endif rxq->ifp = vi->ifp; children = SYSCTL_CHILDREN(oid); snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "rx queue"); children = SYSCTL_CHILDREN(oid); add_iq_sysctls(&vi->ctx, oid, &rxq->iq); #if defined(INET) || defined(INET6) SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, &rxq->lro.lro_queued, 0, NULL); SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, &rxq->lro.lro_flushed, 0, NULL); #endif SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, &rxq->rxcsum, "# of times hardware assisted with checksum"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_extraction", CTLFLAG_RD, &rxq->vlan_extraction, "# of times hardware extracted 802.1Q tag"); add_fl_sysctls(sc, &vi->ctx, oid, &rxq->fl); return (rc); } static int free_rxq(struct vi_info *vi, struct sge_rxq *rxq) { int rc; #if defined(INET) || defined(INET6) if (rxq->lro.ifp) { tcp_lro_free(&rxq->lro); rxq->lro.ifp = NULL; } #endif rc = free_iq_fl(vi, &rxq->iq, &rxq->fl); if (rc == 0) bzero(rxq, sizeof(*rxq)); return (rc); } #ifdef TCP_OFFLOAD static int alloc_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq, int intr_idx, int idx, struct sysctl_oid *oid) { struct port_info *pi = vi->pi; int rc; struct sysctl_oid_list *children; char name[16]; rc = alloc_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx, 0); if (rc != 0) return (rc); children = SYSCTL_CHILDREN(oid); snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "rx queue"); add_iq_sysctls(&vi->ctx, oid, &ofld_rxq->iq); add_fl_sysctls(pi->adapter, &vi->ctx, oid, &ofld_rxq->fl); return (rc); } static int free_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq) { int rc; rc = free_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl); if (rc == 0) bzero(ofld_rxq, sizeof(*ofld_rxq)); return (rc); } #endif #ifdef DEV_NETMAP static int alloc_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int intr_idx, int idx, struct sysctl_oid *oid) { int rc; struct sysctl_oid_list *children; struct sysctl_ctx_list *ctx; char name[16]; size_t len; struct adapter *sc = vi->pi->adapter; struct netmap_adapter *na = NA(vi->ifp); MPASS(na != NULL); len = vi->qsize_rxq * IQ_ESIZE; rc = alloc_ring(sc, len, &nm_rxq->iq_desc_tag, &nm_rxq->iq_desc_map, &nm_rxq->iq_ba, (void **)&nm_rxq->iq_desc); if (rc != 0) return (rc); len = na->num_rx_desc * EQ_ESIZE + sc->params.sge.spg_len; rc = alloc_ring(sc, len, &nm_rxq->fl_desc_tag, &nm_rxq->fl_desc_map, &nm_rxq->fl_ba, (void **)&nm_rxq->fl_desc); if (rc != 0) return (rc); nm_rxq->vi = vi; nm_rxq->nid = idx; nm_rxq->iq_cidx = 0; nm_rxq->iq_sidx = vi->qsize_rxq - sc->params.sge.spg_len / IQ_ESIZE; nm_rxq->iq_gen = F_RSPD_GEN; nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0; nm_rxq->fl_sidx = na->num_rx_desc; nm_rxq->intr_idx = intr_idx; nm_rxq->iq_cntxt_id = INVALID_NM_RXQ_CNTXT_ID; ctx = &vi->ctx; children = SYSCTL_CHILDREN(oid); snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "rx queue"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id", CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_abs_id, 0, sysctl_uint16, "I", "absolute id of the queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cidx, 0, sysctl_uint16, "I", "consumer index"); children = SYSCTL_CHILDREN(oid); oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL, "freelist"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->fl_cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the freelist"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &nm_rxq->fl_cidx, 0, "consumer index"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &nm_rxq->fl_pidx, 0, "producer index"); return (rc); } static int free_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq) { struct adapter *sc = vi->pi->adapter; if (vi->flags & VI_INIT_DONE) MPASS(nm_rxq->iq_cntxt_id == INVALID_NM_RXQ_CNTXT_ID); else MPASS(nm_rxq->iq_cntxt_id == 0); free_ring(sc, nm_rxq->iq_desc_tag, nm_rxq->iq_desc_map, nm_rxq->iq_ba, nm_rxq->iq_desc); free_ring(sc, nm_rxq->fl_desc_tag, nm_rxq->fl_desc_map, nm_rxq->fl_ba, nm_rxq->fl_desc); return (0); } static int alloc_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq, int iqidx, int idx, struct sysctl_oid *oid) { int rc; size_t len; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct netmap_adapter *na = NA(vi->ifp); char name[16]; struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len; rc = alloc_ring(sc, len, &nm_txq->desc_tag, &nm_txq->desc_map, &nm_txq->ba, (void **)&nm_txq->desc); if (rc) return (rc); nm_txq->pidx = nm_txq->cidx = 0; nm_txq->sidx = na->num_tx_desc; nm_txq->nid = idx; nm_txq->iqidx = iqidx; nm_txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(G_FW_VIID_PFN(vi->viid)) | V_TXPKT_VF(G_FW_VIID_VIN(vi->viid)) | V_TXPKT_VF_VLD(G_FW_VIID_VIVLD(vi->viid))); nm_txq->cntxt_id = INVALID_NM_TXQ_CNTXT_ID; snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "netmap tx queue"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, &nm_txq->cntxt_id, 0, "SGE context id of the queue"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &nm_txq->cidx, 0, sysctl_uint16, "I", "consumer index"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", CTLTYPE_INT | CTLFLAG_RD, &nm_txq->pidx, 0, sysctl_uint16, "I", "producer index"); return (rc); } static int free_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq) { struct adapter *sc = vi->pi->adapter; if (vi->flags & VI_INIT_DONE) MPASS(nm_txq->cntxt_id == INVALID_NM_TXQ_CNTXT_ID); else MPASS(nm_txq->cntxt_id == 0); free_ring(sc, nm_txq->desc_tag, nm_txq->desc_map, nm_txq->ba, nm_txq->desc); return (0); } #endif static int ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) { int rc, cntxt_id; struct fw_eq_ctrl_cmd c; int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; bzero(&c, sizeof(c)); c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | V_FW_EQ_CTRL_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); c.physeqid_pkd = htobe32(0); c.fetchszm_to_iqid = htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); c.dcaen_to_eqsize = htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) | V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | V_FW_EQ_CTRL_CMD_EQSIZE(qsize)); c.eqaddr = htobe64(eq->ba); rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(sc->dev, "failed to create control queue %d: %d\n", eq->tx_chan, rc); return (rc); } eq->flags |= EQ_ALLOCATED; eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); cntxt_id = eq->cntxt_id - sc->sge.eq_start; if (cntxt_id >= sc->sge.neq) panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.neq - 1); sc->sge.eqmap[cntxt_id] = eq; return (rc); } static int eth_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) { int rc, cntxt_id; struct fw_eq_eth_cmd c; int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; bzero(&c, sizeof(c)); c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | V_FW_EQ_ETH_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); c.fetchszm_to_iqid = htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | V_FW_EQ_ETH_CMD_IQID(eq->iqid)); c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | V_FW_EQ_ETH_CMD_EQSIZE(qsize)); c.eqaddr = htobe64(eq->ba); rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(vi->dev, "failed to create Ethernet egress queue: %d\n", rc); return (rc); } eq->flags |= EQ_ALLOCATED; eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); eq->abs_id = G_FW_EQ_ETH_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); cntxt_id = eq->cntxt_id - sc->sge.eq_start; if (cntxt_id >= sc->sge.neq) panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.neq - 1); sc->sge.eqmap[cntxt_id] = eq; return (rc); } #ifdef TCP_OFFLOAD static int ofld_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) { int rc, cntxt_id; struct fw_eq_ofld_cmd c; int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; bzero(&c, sizeof(c)); c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) | V_FW_EQ_OFLD_CMD_VFN(0)); c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC | F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); c.fetchszm_to_iqid = htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid)); c.dcaen_to_eqsize = htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) | V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) | V_FW_EQ_OFLD_CMD_EQSIZE(qsize)); c.eqaddr = htobe64(eq->ba); rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(vi->dev, "failed to create egress queue for TCP offload: %d\n", rc); return (rc); } eq->flags |= EQ_ALLOCATED; eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd)); cntxt_id = eq->cntxt_id - sc->sge.eq_start; if (cntxt_id >= sc->sge.neq) panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.neq - 1); sc->sge.eqmap[cntxt_id] = eq; return (rc); } #endif static int alloc_eq(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) { int rc, qsize; size_t len; mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; len = qsize * EQ_ESIZE; rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, &eq->ba, (void **)&eq->desc); if (rc) return (rc); eq->pidx = eq->cidx = 0; eq->equeqidx = eq->dbidx = 0; eq->doorbells = sc->doorbells; switch (eq->flags & EQ_TYPEMASK) { case EQ_CTRL: rc = ctrl_eq_alloc(sc, eq); break; case EQ_ETH: rc = eth_eq_alloc(sc, vi, eq); break; #ifdef TCP_OFFLOAD case EQ_OFLD: rc = ofld_eq_alloc(sc, vi, eq); break; #endif default: panic("%s: invalid eq type %d.", __func__, eq->flags & EQ_TYPEMASK); } if (rc != 0) { device_printf(sc->dev, "failed to allocate egress queue(%d): %d\n", eq->flags & EQ_TYPEMASK, rc); } if (isset(&eq->doorbells, DOORBELL_UDB) || isset(&eq->doorbells, DOORBELL_UDBWC) || isset(&eq->doorbells, DOORBELL_WCWR)) { uint32_t s_qpp = sc->params.sge.eq_s_qpp; uint32_t mask = (1 << s_qpp) - 1; volatile uint8_t *udb; udb = sc->udbs_base + UDBS_DB_OFFSET; udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */ eq->udb_qid = eq->cntxt_id & mask; /* id in page */ if (eq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE) clrbit(&eq->doorbells, DOORBELL_WCWR); else { udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */ eq->udb_qid = 0; } eq->udb = (volatile void *)udb; } return (rc); } static int free_eq(struct adapter *sc, struct sge_eq *eq) { int rc; if (eq->flags & EQ_ALLOCATED) { switch (eq->flags & EQ_TYPEMASK) { case EQ_CTRL: rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); break; case EQ_ETH: rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); break; #ifdef TCP_OFFLOAD case EQ_OFLD: rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); break; #endif default: panic("%s: invalid eq type %d.", __func__, eq->flags & EQ_TYPEMASK); } if (rc != 0) { device_printf(sc->dev, "failed to free egress queue (%d): %d\n", eq->flags & EQ_TYPEMASK, rc); return (rc); } eq->flags &= ~EQ_ALLOCATED; } free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); if (mtx_initialized(&eq->eq_lock)) mtx_destroy(&eq->eq_lock); bzero(eq, sizeof(*eq)); return (0); } static int alloc_wrq(struct adapter *sc, struct vi_info *vi, struct sge_wrq *wrq, struct sysctl_oid *oid) { int rc; struct sysctl_ctx_list *ctx = vi ? &vi->ctx : &sc->ctx; struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); rc = alloc_eq(sc, vi, &wrq->eq); if (rc) return (rc); wrq->adapter = sc; TASK_INIT(&wrq->wrq_tx_task, 0, wrq_tx_drain, wrq); TAILQ_INIT(&wrq->incomplete_wrs); STAILQ_INIT(&wrq->wr_list); wrq->nwr_pending = 0; wrq->ndesc_needed = 0; SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &wrq->eq.ba, "bus address of descriptor ring"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, wrq->eq.sidx * EQ_ESIZE + sc->params.sge.spg_len, "desc ring size in bytes"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, &wrq->eq.cntxt_id, 0, "SGE context id of the queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I", "consumer index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx", CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I", "producer index"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL, wrq->eq.sidx, "status page index"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_direct", CTLFLAG_RD, &wrq->tx_wrs_direct, "# of work requests (direct)"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_copied", CTLFLAG_RD, &wrq->tx_wrs_copied, "# of work requests (copied)"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_sspace", CTLFLAG_RD, &wrq->tx_wrs_ss, "# of work requests (copied from scratch space)"); return (rc); } static int free_wrq(struct adapter *sc, struct sge_wrq *wrq) { int rc; rc = free_eq(sc, &wrq->eq); if (rc) return (rc); bzero(wrq, sizeof(*wrq)); return (0); } static int alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx, struct sysctl_oid *oid) { int rc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct sge_eq *eq = &txq->eq; char name[16]; struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); rc = mp_ring_alloc(&txq->r, eq->sidx, txq, eth_tx, can_resume_eth_tx, M_CXGBE, M_WAITOK); if (rc != 0) { device_printf(sc->dev, "failed to allocate mp_ring: %d\n", rc); return (rc); } rc = alloc_eq(sc, vi, eq); if (rc != 0) { mp_ring_free(txq->r); txq->r = NULL; return (rc); } /* Can't fail after this point. */ if (idx == 0) sc->sge.eq_base = eq->abs_id - eq->cntxt_id; else KASSERT(eq->cntxt_id + sc->sge.eq_base == eq->abs_id, ("eq_base mismatch")); KASSERT(sc->sge.eq_base == 0 || sc->flags & IS_VF, ("PF with non-zero eq_base")); TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq); txq->ifp = vi->ifp; txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK); if (sc->flags & IS_VF) txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | V_TXPKT_INTF(pi->tx_chan)); else txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(G_FW_VIID_PFN(vi->viid)) | V_TXPKT_VF(G_FW_VIID_VIN(vi->viid)) | V_TXPKT_VF_VLD(G_FW_VIID_VIVLD(vi->viid))); txq->tc_idx = -1; txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE, M_ZERO | M_WAITOK); snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "tx queue"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UAUTO(&vi->ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &eq->ba, "bus address of descriptor ring"); SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, eq->sidx * EQ_ESIZE + sc->params.sge.spg_len, "desc ring size in bytes"); SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD, &eq->abs_id, 0, "absolute id of the queue"); SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, &eq->cntxt_id, 0, "SGE context id of the queue"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I", "consumer index"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I", "producer index"); SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL, eq->sidx, "status page index"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "tc", CTLTYPE_INT | CTLFLAG_RW, vi, idx, sysctl_tc, "I", "traffic class (-1 means none)"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, &txq->txcsum, "# of times hardware assisted with checksum"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_insertion", CTLFLAG_RD, &txq->vlan_insertion, "# of times hardware inserted 802.1Q tag"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, &txq->tso_wrs, "# of TSO work requests"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, &txq->imm_wrs, "# of work requests with immediate data"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, &txq->sgl_wrs, "# of work requests with direct SGL"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_wrs", CTLFLAG_RD, &txq->txpkts0_wrs, "# of txpkts (type 0) work requests"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_wrs", CTLFLAG_RD, &txq->txpkts1_wrs, "# of txpkts (type 1) work requests"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_pkts", CTLFLAG_RD, &txq->txpkts0_pkts, "# of frames tx'd using type0 txpkts work requests"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_pkts", CTLFLAG_RD, &txq->txpkts1_pkts, "# of frames tx'd using type1 txpkts work requests"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_enqueues", CTLFLAG_RD, &txq->r->enqueues, "# of enqueues to the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_drops", CTLFLAG_RD, &txq->r->drops, "# of drops in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_starts", CTLFLAG_RD, &txq->r->starts, "# of normal consumer starts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_stalls", CTLFLAG_RD, &txq->r->stalls, "# of consumer stalls in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_restarts", CTLFLAG_RD, &txq->r->restarts, "# of consumer restarts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_abdications", CTLFLAG_RD, &txq->r->abdications, "# of consumer abdications in the mp_ring for this queue"); return (0); } static int free_txq(struct vi_info *vi, struct sge_txq *txq) { int rc; struct adapter *sc = vi->pi->adapter; struct sge_eq *eq = &txq->eq; rc = free_eq(sc, eq); if (rc) return (rc); sglist_free(txq->gl); free(txq->sdesc, M_CXGBE); mp_ring_free(txq->r); bzero(txq, sizeof(*txq)); return (0); } static void oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *ba = arg; KASSERT(nseg == 1, ("%s meant for single segment mappings only.", __func__)); *ba = error ? 0 : segs->ds_addr; } static inline void ring_fl_db(struct adapter *sc, struct sge_fl *fl) { uint32_t n, v; n = IDXDIFF(fl->pidx / 8, fl->dbidx, fl->sidx); MPASS(n > 0); wmb(); v = fl->dbval | V_PIDX(n); if (fl->udb) *fl->udb = htole32(v); else t4_write_reg(sc, sc->sge_kdoorbell_reg, v); IDXINCR(fl->dbidx, n, fl->sidx); } /* * Fills up the freelist by allocating up to 'n' buffers. Buffers that are * recycled do not count towards this allocation budget. * * Returns non-zero to indicate that this freelist should be added to the list * of starving freelists. */ static int refill_fl(struct adapter *sc, struct sge_fl *fl, int n) { __be64 *d; struct fl_sdesc *sd; uintptr_t pa; caddr_t cl; struct cluster_layout *cll; struct sw_zone_info *swz; struct cluster_metadata *clm; uint16_t max_pidx; uint16_t hw_cidx = fl->hw_cidx; /* stable snapshot */ FL_LOCK_ASSERT_OWNED(fl); /* * We always stop at the beginning of the hardware descriptor that's just * before the one with the hw cidx. This is to avoid hw pidx = hw cidx, * which would mean an empty freelist to the chip. */ max_pidx = __predict_false(hw_cidx == 0) ? fl->sidx - 1 : hw_cidx - 1; if (fl->pidx == max_pidx * 8) return (0); d = &fl->desc[fl->pidx]; sd = &fl->sdesc[fl->pidx]; cll = &fl->cll_def; /* default layout */ swz = &sc->sge.sw_zone_info[cll->zidx]; while (n > 0) { if (sd->cl != NULL) { if (sd->nmbuf == 0) { /* * Fast recycle without involving any atomics on * the cluster's metadata (if the cluster has * metadata). This happens when all frames * received in the cluster were small enough to * fit within a single mbuf each. */ fl->cl_fast_recycled++; #ifdef INVARIANTS clm = cl_metadata(sc, fl, &sd->cll, sd->cl); if (clm != NULL) MPASS(clm->refcount == 1); #endif goto recycled_fast; } /* * Cluster is guaranteed to have metadata. Clusters * without metadata always take the fast recycle path * when they're recycled. */ clm = cl_metadata(sc, fl, &sd->cll, sd->cl); MPASS(clm != NULL); if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { fl->cl_recycled++; counter_u64_add(extfree_rels, 1); goto recycled; } sd->cl = NULL; /* gave up my reference */ } MPASS(sd->cl == NULL); alloc: cl = uma_zalloc(swz->zone, M_NOWAIT); if (__predict_false(cl == NULL)) { if (cll == &fl->cll_alt || fl->cll_alt.zidx == -1 || fl->cll_def.zidx == fl->cll_alt.zidx) break; /* fall back to the safe zone */ cll = &fl->cll_alt; swz = &sc->sge.sw_zone_info[cll->zidx]; goto alloc; } fl->cl_allocated++; n--; pa = pmap_kextract((vm_offset_t)cl); pa += cll->region1; sd->cl = cl; sd->cll = *cll; *d = htobe64(pa | cll->hwidx); clm = cl_metadata(sc, fl, cll, cl); if (clm != NULL) { recycled: #ifdef INVARIANTS clm->sd = sd; #endif clm->refcount = 1; } sd->nmbuf = 0; recycled_fast: d++; sd++; if (__predict_false(++fl->pidx % 8 == 0)) { uint16_t pidx = fl->pidx / 8; if (__predict_false(pidx == fl->sidx)) { fl->pidx = 0; pidx = 0; sd = fl->sdesc; d = fl->desc; } if (pidx == max_pidx) break; if (IDXDIFF(pidx, fl->dbidx, fl->sidx) >= 4) ring_fl_db(sc, fl); } } if (fl->pidx / 8 != fl->dbidx) ring_fl_db(sc, fl); return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); } /* * Attempt to refill all starving freelists. */ static void refill_sfl(void *arg) { struct adapter *sc = arg; struct sge_fl *fl, *fl_temp; mtx_assert(&sc->sfl_lock, MA_OWNED); TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { FL_LOCK(fl); refill_fl(sc, fl, 64); if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { TAILQ_REMOVE(&sc->sfl, fl, link); fl->flags &= ~FL_STARVING; } FL_UNLOCK(fl); } if (!TAILQ_EMPTY(&sc->sfl)) callout_schedule(&sc->sfl_callout, hz / 5); } static int alloc_fl_sdesc(struct sge_fl *fl) { fl->sdesc = malloc(fl->sidx * 8 * sizeof(struct fl_sdesc), M_CXGBE, M_ZERO | M_WAITOK); return (0); } static void free_fl_sdesc(struct adapter *sc, struct sge_fl *fl) { struct fl_sdesc *sd; struct cluster_metadata *clm; struct cluster_layout *cll; int i; sd = fl->sdesc; for (i = 0; i < fl->sidx * 8; i++, sd++) { if (sd->cl == NULL) continue; cll = &sd->cll; clm = cl_metadata(sc, fl, cll, sd->cl); if (sd->nmbuf == 0) uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl); else if (clm && atomic_fetchadd_int(&clm->refcount, -1) == 1) { uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl); counter_u64_add(extfree_rels, 1); } sd->cl = NULL; } free(fl->sdesc, M_CXGBE); fl->sdesc = NULL; } static inline void get_pkt_gl(struct mbuf *m, struct sglist *gl) { int rc; M_ASSERTPKTHDR(m); sglist_reset(gl); rc = sglist_append_mbuf(gl, m); if (__predict_false(rc != 0)) { panic("%s: mbuf %p (%d segs) was vetted earlier but now fails " "with %d.", __func__, m, mbuf_nsegs(m), rc); } KASSERT(gl->sg_nseg == mbuf_nsegs(m), ("%s: nsegs changed for mbuf %p from %d to %d", __func__, m, mbuf_nsegs(m), gl->sg_nseg)); KASSERT(gl->sg_nseg > 0 && gl->sg_nseg <= (needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS), ("%s: %d segments, should have been 1 <= nsegs <= %d", __func__, gl->sg_nseg, needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)); } /* * len16 for a txpkt WR with a GL. Includes the firmware work request header. */ static inline u_int txpkt_len16(u_int nsegs, u_int tso) { u_int n; MPASS(nsegs > 0); nsegs--; /* first segment is part of ulptx_sgl */ n = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); if (tso) n += sizeof(struct cpl_tx_pkt_lso_core); return (howmany(n, 16)); } /* * len16 for a txpkt_vm WR with a GL. Includes the firmware work * request header. */ static inline u_int txpkt_vm_len16(u_int nsegs, u_int tso) { u_int n; MPASS(nsegs > 0); nsegs--; /* first segment is part of ulptx_sgl */ n = sizeof(struct fw_eth_tx_pkt_vm_wr) + sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); if (tso) n += sizeof(struct cpl_tx_pkt_lso_core); return (howmany(n, 16)); } /* * len16 for a txpkts type 0 WR with a GL. Does not include the firmware work * request header. */ static inline u_int txpkts0_len16(u_int nsegs) { u_int n; MPASS(nsegs > 0); nsegs--; /* first segment is part of ulptx_sgl */ n = sizeof(struct ulp_txpkt) + sizeof(struct ulptx_idata) + sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); return (howmany(n, 16)); } /* * len16 for a txpkts type 1 WR with a GL. Does not include the firmware work * request header. */ static inline u_int txpkts1_len16(void) { u_int n; n = sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl); return (howmany(n, 16)); } static inline u_int imm_payload(u_int ndesc) { u_int n; n = ndesc * EQ_ESIZE - sizeof(struct fw_eth_tx_pkt_wr) - sizeof(struct cpl_tx_pkt_core); return (n); } /* * Write a VM txpkt WR for this packet to the hardware descriptors, update the * software descriptor, and advance the pidx. It is guaranteed that enough * descriptors are available. * * The return value is the # of hardware descriptors used. */ static u_int write_txpkt_vm_wr(struct adapter *sc, struct sge_txq *txq, struct fw_eth_tx_pkt_vm_wr *wr, struct mbuf *m0, u_int available) { struct sge_eq *eq = &txq->eq; struct tx_sdesc *txsd; struct cpl_tx_pkt_core *cpl; uint32_t ctrl; /* used in many unrelated places */ uint64_t ctrl1; int csum_type, len16, ndesc, pktlen, nsegs; caddr_t dst; TXQ_LOCK_ASSERT_OWNED(txq); M_ASSERTPKTHDR(m0); MPASS(available > 0 && available < eq->sidx); len16 = mbuf_len16(m0); nsegs = mbuf_nsegs(m0); pktlen = m0->m_pkthdr.len; ctrl = sizeof(struct cpl_tx_pkt_core); if (needs_tso(m0)) ctrl += sizeof(struct cpl_tx_pkt_lso_core); ndesc = howmany(len16, EQ_ESIZE / 16); MPASS(ndesc <= available); /* Firmware work request header */ MPASS(wr == (void *)&eq->desc[eq->pidx]); wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_VM_WR) | V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); ctrl = V_FW_WR_LEN16(len16); wr->equiq_to_len16 = htobe32(ctrl); wr->r3[0] = 0; wr->r3[1] = 0; /* * Copy over ethmacdst, ethmacsrc, ethtype, and vlantci. * vlantci is ignored unless the ethtype is 0x8100, so it's * simpler to always copy it rather than making it * conditional. Also, it seems that we do not have to set * vlantci or fake the ethtype when doing VLAN tag insertion. */ m_copydata(m0, 0, sizeof(struct ether_header) + 2, wr->ethmacdst); csum_type = -1; if (needs_tso(m0)) { struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && m0->m_pkthdr.l4hlen > 0, ("%s: mbuf %p needs TSO but missing header lengths", __func__, m0)); ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) ctrl |= V_LSO_ETHHDR_LEN(1); if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) ctrl |= F_LSO_IPV6; lso->lso_ctrl = htobe32(ctrl); lso->ipid_ofst = htobe16(0); lso->mss = htobe16(m0->m_pkthdr.tso_segsz); lso->seqno_offset = htobe32(0); lso->len = htobe32(pktlen); if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) csum_type = TX_CSUM_TCPIP6; else csum_type = TX_CSUM_TCPIP; cpl = (void *)(lso + 1); txq->tso_wrs++; } else { if (m0->m_pkthdr.csum_flags & CSUM_IP_TCP) csum_type = TX_CSUM_TCPIP; else if (m0->m_pkthdr.csum_flags & CSUM_IP_UDP) csum_type = TX_CSUM_UDPIP; else if (m0->m_pkthdr.csum_flags & CSUM_IP6_TCP) csum_type = TX_CSUM_TCPIP6; else if (m0->m_pkthdr.csum_flags & CSUM_IP6_UDP) csum_type = TX_CSUM_UDPIP6; #if defined(INET) else if (m0->m_pkthdr.csum_flags & CSUM_IP) { /* * XXX: The firmware appears to stomp on the * fragment/flags field of the IP header when * using TX_CSUM_IP. Fall back to doing * software checksums. */ u_short *sump; struct mbuf *m; int offset; m = m0; offset = 0; sump = m_advance(&m, &offset, m0->m_pkthdr.l2hlen + offsetof(struct ip, ip_sum)); *sump = in_cksum_skip(m0, m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen, m0->m_pkthdr.l2hlen); m0->m_pkthdr.csum_flags &= ~CSUM_IP; } #endif cpl = (void *)(wr + 1); } /* Checksum offload */ ctrl1 = 0; if (needs_l3_csum(m0) == 0) ctrl1 |= F_TXPKT_IPCSUM_DIS; if (csum_type >= 0) { KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0, ("%s: mbuf %p needs checksum offload but missing header lengths", __func__, m0)); if (chip_id(sc) <= CHELSIO_T5) { ctrl1 |= V_TXPKT_ETHHDR_LEN(m0->m_pkthdr.l2hlen - ETHER_HDR_LEN); } else { ctrl1 |= V_T6_TXPKT_ETHHDR_LEN(m0->m_pkthdr.l2hlen - ETHER_HDR_LEN); } ctrl1 |= V_TXPKT_IPHDR_LEN(m0->m_pkthdr.l3hlen); ctrl1 |= V_TXPKT_CSUM_TYPE(csum_type); } else ctrl1 |= F_TXPKT_L4CSUM_DIS; if (m0->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) txq->txcsum++; /* some hardware assistance provided */ /* VLAN tag insertion */ if (needs_vlan_insertion(m0)) { ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); txq->vlan_insertion++; } /* CPL header */ cpl->ctrl0 = txq->cpl_ctrl0; cpl->pack = 0; cpl->len = htobe16(pktlen); cpl->ctrl1 = htobe64(ctrl1); /* SGL */ dst = (void *)(cpl + 1); /* * A packet using TSO will use up an entire descriptor for the * firmware work request header, LSO CPL, and TX_PKT_XT CPL. * If this descriptor is the last descriptor in the ring, wrap * around to the front of the ring explicitly for the start of * the sgl. */ if (dst == (void *)&eq->desc[eq->sidx]) { dst = (void *)&eq->desc[0]; write_gl_to_txd(txq, m0, &dst, 0); } else write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); txq->sgl_wrs++; txq->txpkt_wrs++; txsd = &txq->sdesc[eq->pidx]; txsd->m = m0; txsd->desc_used = ndesc; return (ndesc); } /* * Write a txpkt WR for this packet to the hardware descriptors, update the * software descriptor, and advance the pidx. It is guaranteed that enough * descriptors are available. * * The return value is the # of hardware descriptors used. */ static u_int write_txpkt_wr(struct sge_txq *txq, struct fw_eth_tx_pkt_wr *wr, struct mbuf *m0, u_int available) { struct sge_eq *eq = &txq->eq; struct tx_sdesc *txsd; struct cpl_tx_pkt_core *cpl; uint32_t ctrl; /* used in many unrelated places */ uint64_t ctrl1; int len16, ndesc, pktlen, nsegs; caddr_t dst; TXQ_LOCK_ASSERT_OWNED(txq); M_ASSERTPKTHDR(m0); MPASS(available > 0 && available < eq->sidx); len16 = mbuf_len16(m0); nsegs = mbuf_nsegs(m0); pktlen = m0->m_pkthdr.len; ctrl = sizeof(struct cpl_tx_pkt_core); if (needs_tso(m0)) ctrl += sizeof(struct cpl_tx_pkt_lso_core); else if (pktlen <= imm_payload(2) && available >= 2) { /* Immediate data. Recalculate len16 and set nsegs to 0. */ ctrl += pktlen; len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core) + pktlen, 16); nsegs = 0; } ndesc = howmany(len16, EQ_ESIZE / 16); MPASS(ndesc <= available); /* Firmware work request header */ MPASS(wr == (void *)&eq->desc[eq->pidx]); wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); ctrl = V_FW_WR_LEN16(len16); wr->equiq_to_len16 = htobe32(ctrl); wr->r3 = 0; if (needs_tso(m0)) { struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && m0->m_pkthdr.l4hlen > 0, ("%s: mbuf %p needs TSO but missing header lengths", __func__, m0)); ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) ctrl |= V_LSO_ETHHDR_LEN(1); if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) ctrl |= F_LSO_IPV6; lso->lso_ctrl = htobe32(ctrl); lso->ipid_ofst = htobe16(0); lso->mss = htobe16(m0->m_pkthdr.tso_segsz); lso->seqno_offset = htobe32(0); lso->len = htobe32(pktlen); cpl = (void *)(lso + 1); txq->tso_wrs++; } else cpl = (void *)(wr + 1); /* Checksum offload */ ctrl1 = 0; if (needs_l3_csum(m0) == 0) ctrl1 |= F_TXPKT_IPCSUM_DIS; if (needs_l4_csum(m0) == 0) ctrl1 |= F_TXPKT_L4CSUM_DIS; if (m0->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) txq->txcsum++; /* some hardware assistance provided */ /* VLAN tag insertion */ if (needs_vlan_insertion(m0)) { ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); txq->vlan_insertion++; } /* CPL header */ cpl->ctrl0 = txq->cpl_ctrl0; cpl->pack = 0; cpl->len = htobe16(pktlen); cpl->ctrl1 = htobe64(ctrl1); /* SGL */ dst = (void *)(cpl + 1); if (nsegs > 0) { write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); txq->sgl_wrs++; } else { struct mbuf *m; for (m = m0; m != NULL; m = m->m_next) { copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); #ifdef INVARIANTS pktlen -= m->m_len; #endif } #ifdef INVARIANTS KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); #endif txq->imm_wrs++; } txq->txpkt_wrs++; txsd = &txq->sdesc[eq->pidx]; txsd->m = m0; txsd->desc_used = ndesc; return (ndesc); } static int try_txpkts(struct mbuf *m, struct mbuf *n, struct txpkts *txp, u_int available) { u_int needed, nsegs1, nsegs2, l1, l2; if (cannot_use_txpkts(m) || cannot_use_txpkts(n)) return (1); nsegs1 = mbuf_nsegs(m); nsegs2 = mbuf_nsegs(n); if (nsegs1 + nsegs2 == 2) { txp->wr_type = 1; l1 = l2 = txpkts1_len16(); } else { txp->wr_type = 0; l1 = txpkts0_len16(nsegs1); l2 = txpkts0_len16(nsegs2); } txp->len16 = howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + l1 + l2; needed = howmany(txp->len16, EQ_ESIZE / 16); if (needed > SGE_MAX_WR_NDESC || needed > available) return (1); txp->plen = m->m_pkthdr.len + n->m_pkthdr.len; if (txp->plen > 65535) return (1); txp->npkt = 2; set_mbuf_len16(m, l1); set_mbuf_len16(n, l2); return (0); } static int add_to_txpkts(struct mbuf *m, struct txpkts *txp, u_int available) { u_int plen, len16, needed, nsegs; MPASS(txp->wr_type == 0 || txp->wr_type == 1); nsegs = mbuf_nsegs(m); if (needs_tso(m) || (txp->wr_type == 1 && nsegs != 1)) return (1); plen = txp->plen + m->m_pkthdr.len; if (plen > 65535) return (1); if (txp->wr_type == 0) len16 = txpkts0_len16(nsegs); else len16 = txpkts1_len16(); needed = howmany(txp->len16 + len16, EQ_ESIZE / 16); if (needed > SGE_MAX_WR_NDESC || needed > available) return (1); txp->npkt++; txp->plen = plen; txp->len16 += len16; set_mbuf_len16(m, len16); return (0); } /* * Write a txpkts WR for the packets in txp to the hardware descriptors, update * the software descriptor, and advance the pidx. It is guaranteed that enough * descriptors are available. * * The return value is the # of hardware descriptors used. */ static u_int write_txpkts_wr(struct sge_txq *txq, struct fw_eth_tx_pkts_wr *wr, struct mbuf *m0, const struct txpkts *txp, u_int available) { struct sge_eq *eq = &txq->eq; struct tx_sdesc *txsd; struct cpl_tx_pkt_core *cpl; uint32_t ctrl; uint64_t ctrl1; int ndesc, checkwrap; struct mbuf *m; void *flitp; TXQ_LOCK_ASSERT_OWNED(txq); MPASS(txp->npkt > 0); MPASS(txp->plen < 65536); MPASS(m0 != NULL); MPASS(m0->m_nextpkt != NULL); MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16)); MPASS(available > 0 && available < eq->sidx); ndesc = howmany(txp->len16, EQ_ESIZE / 16); MPASS(ndesc <= available); MPASS(wr == (void *)&eq->desc[eq->pidx]); wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); ctrl = V_FW_WR_LEN16(txp->len16); wr->equiq_to_len16 = htobe32(ctrl); wr->plen = htobe16(txp->plen); wr->npkt = txp->npkt; wr->r3 = 0; wr->type = txp->wr_type; flitp = wr + 1; /* * At this point we are 16B into a hardware descriptor. If checkwrap is * set then we know the WR is going to wrap around somewhere. We'll * check for that at appropriate points. */ checkwrap = eq->sidx - ndesc < eq->pidx; for (m = m0; m != NULL; m = m->m_nextpkt) { if (txp->wr_type == 0) { struct ulp_txpkt *ulpmc; struct ulptx_idata *ulpsc; /* ULP master command */ ulpmc = flitp; ulpmc->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(eq->iqid)); ulpmc->len = htobe32(mbuf_len16(m)); /* ULP subcommand */ ulpsc = (void *)(ulpmc + 1); ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) | F_ULP_TX_SC_MORE); ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); cpl = (void *)(ulpsc + 1); if (checkwrap && (uintptr_t)cpl == (uintptr_t)&eq->desc[eq->sidx]) cpl = (void *)&eq->desc[0]; } else { cpl = flitp; } /* Checksum offload */ ctrl1 = 0; if (needs_l3_csum(m) == 0) ctrl1 |= F_TXPKT_IPCSUM_DIS; if (needs_l4_csum(m) == 0) ctrl1 |= F_TXPKT_L4CSUM_DIS; if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) txq->txcsum++; /* some hardware assistance provided */ /* VLAN tag insertion */ if (needs_vlan_insertion(m)) { ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); txq->vlan_insertion++; } /* CPL header */ cpl->ctrl0 = txq->cpl_ctrl0; cpl->pack = 0; cpl->len = htobe16(m->m_pkthdr.len); cpl->ctrl1 = htobe64(ctrl1); flitp = cpl + 1; if (checkwrap && (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx]) flitp = (void *)&eq->desc[0]; write_gl_to_txd(txq, m, (caddr_t *)(&flitp), checkwrap); } if (txp->wr_type == 0) { txq->txpkts0_pkts += txp->npkt; txq->txpkts0_wrs++; } else { txq->txpkts1_pkts += txp->npkt; txq->txpkts1_wrs++; } txsd = &txq->sdesc[eq->pidx]; txsd->m = m0; txsd->desc_used = ndesc; return (ndesc); } /* * If the SGL ends on an address that is not 16 byte aligned, this function will * add a 0 filled flit at the end. */ static void write_gl_to_txd(struct sge_txq *txq, struct mbuf *m, caddr_t *to, int checkwrap) { struct sge_eq *eq = &txq->eq; struct sglist *gl = txq->gl; struct sglist_seg *seg; __be64 *flitp, *wrap; struct ulptx_sgl *usgl; int i, nflits, nsegs; KASSERT(((uintptr_t)(*to) & 0xf) == 0, ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); get_pkt_gl(m, gl); nsegs = gl->sg_nseg; MPASS(nsegs > 0); nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2; flitp = (__be64 *)(*to); wrap = (__be64 *)(&eq->desc[eq->sidx]); seg = &gl->sg_segs[0]; usgl = (void *)flitp; /* * We start at a 16 byte boundary somewhere inside the tx descriptor * ring, so we're at least 16 bytes away from the status page. There is * no chance of a wrap around in the middle of usgl (which is 16 bytes). */ usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | V_ULPTX_NSGE(nsegs)); usgl->len0 = htobe32(seg->ss_len); usgl->addr0 = htobe64(seg->ss_paddr); seg++; if (checkwrap == 0 || (uintptr_t)(flitp + nflits) <= (uintptr_t)wrap) { /* Won't wrap around at all */ for (i = 0; i < nsegs - 1; i++, seg++) { usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len); usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr); } if (i & 1) usgl->sge[i / 2].len[1] = htobe32(0); flitp += nflits; } else { /* Will wrap somewhere in the rest of the SGL */ /* 2 flits already written, write the rest flit by flit */ flitp = (void *)(usgl + 1); for (i = 0; i < nflits - 2; i++) { if (flitp == wrap) flitp = (void *)eq->desc; *flitp++ = get_flit(seg, nsegs - 1, i); } } if (nflits & 1) { MPASS(((uintptr_t)flitp) & 0xf); *flitp++ = 0; } MPASS((((uintptr_t)flitp) & 0xf) == 0); if (__predict_false(flitp == wrap)) *to = (void *)eq->desc; else *to = (void *)flitp; } static inline void copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) { MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); if (__predict_true((uintptr_t)(*to) + len <= (uintptr_t)&eq->desc[eq->sidx])) { bcopy(from, *to, len); (*to) += len; } else { int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to); bcopy(from, *to, portion); from += portion; portion = len - portion; /* remaining */ bcopy(from, (void *)eq->desc, portion); (*to) = (caddr_t)eq->desc + portion; } } static inline void ring_eq_db(struct adapter *sc, struct sge_eq *eq, u_int n) { u_int db; MPASS(n > 0); db = eq->doorbells; if (n > 1) clrbit(&db, DOORBELL_WCWR); wmb(); switch (ffs(db) - 1) { case DOORBELL_UDB: *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); break; case DOORBELL_WCWR: { volatile uint64_t *dst, *src; int i; /* * Queues whose 128B doorbell segment fits in the page do not * use relative qid (udb_qid is always 0). Only queues with * doorbell segments can do WCWR. */ KASSERT(eq->udb_qid == 0 && n == 1, ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p", __func__, eq->doorbells, n, eq->dbidx, eq)); dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET - UDBS_DB_OFFSET); i = eq->dbidx; src = (void *)&eq->desc[i]; while (src != (void *)&eq->desc[i + 1]) *dst++ = *src++; wmb(); break; } case DOORBELL_UDBWC: *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); wmb(); break; case DOORBELL_KDB: t4_write_reg(sc, sc->sge_kdoorbell_reg, V_QID(eq->cntxt_id) | V_PIDX(n)); break; } IDXINCR(eq->dbidx, n, eq->sidx); } static inline u_int reclaimable_tx_desc(struct sge_eq *eq) { uint16_t hw_cidx; hw_cidx = read_hw_cidx(eq); return (IDXDIFF(hw_cidx, eq->cidx, eq->sidx)); } static inline u_int total_available_tx_desc(struct sge_eq *eq) { uint16_t hw_cidx, pidx; hw_cidx = read_hw_cidx(eq); pidx = eq->pidx; if (pidx == hw_cidx) return (eq->sidx - 1); else return (IDXDIFF(hw_cidx, pidx, eq->sidx) - 1); } static inline uint16_t read_hw_cidx(struct sge_eq *eq) { struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; uint16_t cidx = spg->cidx; /* stable snapshot */ return (be16toh(cidx)); } /* * Reclaim 'n' descriptors approximately. */ static u_int reclaim_tx_descs(struct sge_txq *txq, u_int n) { struct tx_sdesc *txsd; struct sge_eq *eq = &txq->eq; u_int can_reclaim, reclaimed; TXQ_LOCK_ASSERT_OWNED(txq); MPASS(n > 0); reclaimed = 0; can_reclaim = reclaimable_tx_desc(eq); while (can_reclaim && reclaimed < n) { int ndesc; struct mbuf *m, *nextpkt; txsd = &txq->sdesc[eq->cidx]; ndesc = txsd->desc_used; /* Firmware doesn't return "partial" credits. */ KASSERT(can_reclaim >= ndesc, ("%s: unexpected number of credits: %d, %d", __func__, can_reclaim, ndesc)); for (m = txsd->m; m != NULL; m = nextpkt) { nextpkt = m->m_nextpkt; m->m_nextpkt = NULL; m_freem(m); } reclaimed += ndesc; can_reclaim -= ndesc; IDXINCR(eq->cidx, ndesc, eq->sidx); } return (reclaimed); } static void tx_reclaim(void *arg, int n) { struct sge_txq *txq = arg; struct sge_eq *eq = &txq->eq; do { if (TXQ_TRYLOCK(txq) == 0) break; n = reclaim_tx_descs(txq, 32); if (eq->cidx == eq->pidx) eq->equeqidx = eq->pidx; TXQ_UNLOCK(txq); } while (n > 0); } static __be64 get_flit(struct sglist_seg *segs, int nsegs, int idx) { int i = (idx / 3) * 2; switch (idx % 3) { case 0: { uint64_t rc; rc = (uint64_t)segs[i].ss_len << 32; if (i + 1 < nsegs) rc |= (uint64_t)(segs[i + 1].ss_len); return (htobe64(rc)); } case 1: return (htobe64(segs[i].ss_paddr)); case 2: return (htobe64(segs[i + 1].ss_paddr)); } return (0); } static void find_best_refill_source(struct adapter *sc, struct sge_fl *fl, int maxp) { int8_t zidx, hwidx, idx; uint16_t region1, region3; int spare, spare_needed, n; struct sw_zone_info *swz; struct hw_buf_info *hwb, *hwb_list = &sc->sge.hw_buf_info[0]; /* * Buffer Packing: Look for PAGE_SIZE or larger zone which has a bufsize * large enough for the max payload and cluster metadata. Otherwise * settle for the largest bufsize that leaves enough room in the cluster * for metadata. * * Without buffer packing: Look for the smallest zone which has a * bufsize large enough for the max payload. Settle for the largest * bufsize available if there's nothing big enough for max payload. */ spare_needed = fl->flags & FL_BUF_PACKING ? CL_METADATA_SIZE : 0; swz = &sc->sge.sw_zone_info[0]; hwidx = -1; for (zidx = 0; zidx < SW_ZONE_SIZES; zidx++, swz++) { if (swz->size > largest_rx_cluster) { if (__predict_true(hwidx != -1)) break; /* * This is a misconfiguration. largest_rx_cluster is * preventing us from finding a refill source. See * dev.t5nex..buffer_sizes to figure out why. */ device_printf(sc->dev, "largest_rx_cluster=%u leaves no" " refill source for fl %p (dma %u). Ignored.\n", largest_rx_cluster, fl, maxp); } for (idx = swz->head_hwidx; idx != -1; idx = hwb->next) { hwb = &hwb_list[idx]; spare = swz->size - hwb->size; if (spare < spare_needed) continue; hwidx = idx; /* best option so far */ if (hwb->size >= maxp) { if ((fl->flags & FL_BUF_PACKING) == 0) goto done; /* stop looking (not packing) */ if (swz->size >= safest_rx_cluster) goto done; /* stop looking (packing) */ } break; /* keep looking, next zone */ } } done: /* A usable hwidx has been located. */ MPASS(hwidx != -1); hwb = &hwb_list[hwidx]; zidx = hwb->zidx; swz = &sc->sge.sw_zone_info[zidx]; region1 = 0; region3 = swz->size - hwb->size; /* * Stay within this zone and see if there is a better match when mbuf * inlining is allowed. Remember that the hwidx's are sorted in * decreasing order of size (so in increasing order of spare area). */ for (idx = hwidx; idx != -1; idx = hwb->next) { hwb = &hwb_list[idx]; spare = swz->size - hwb->size; if (allow_mbufs_in_cluster == 0 || hwb->size < maxp) break; /* * Do not inline mbufs if doing so would violate the pad/pack * boundary alignment requirement. */ if (fl_pad && (MSIZE % sc->params.sge.pad_boundary) != 0) continue; if (fl->flags & FL_BUF_PACKING && (MSIZE % sc->params.sge.pack_boundary) != 0) continue; if (spare < CL_METADATA_SIZE + MSIZE) continue; n = (spare - CL_METADATA_SIZE) / MSIZE; if (n > howmany(hwb->size, maxp)) break; hwidx = idx; if (fl->flags & FL_BUF_PACKING) { region1 = n * MSIZE; region3 = spare - region1; } else { region1 = MSIZE; region3 = spare - region1; break; } } KASSERT(zidx >= 0 && zidx < SW_ZONE_SIZES, ("%s: bad zone %d for fl %p, maxp %d", __func__, zidx, fl, maxp)); KASSERT(hwidx >= 0 && hwidx <= SGE_FLBUF_SIZES, ("%s: bad hwidx %d for fl %p, maxp %d", __func__, hwidx, fl, maxp)); KASSERT(region1 + sc->sge.hw_buf_info[hwidx].size + region3 == sc->sge.sw_zone_info[zidx].size, ("%s: bad buffer layout for fl %p, maxp %d. " "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, sc->sge.sw_zone_info[zidx].size, region1, sc->sge.hw_buf_info[hwidx].size, region3)); if (fl->flags & FL_BUF_PACKING || region1 > 0) { KASSERT(region3 >= CL_METADATA_SIZE, ("%s: no room for metadata. fl %p, maxp %d; " "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, sc->sge.sw_zone_info[zidx].size, region1, sc->sge.hw_buf_info[hwidx].size, region3)); KASSERT(region1 % MSIZE == 0, ("%s: bad mbuf region for fl %p, maxp %d. " "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, sc->sge.sw_zone_info[zidx].size, region1, sc->sge.hw_buf_info[hwidx].size, region3)); } fl->cll_def.zidx = zidx; fl->cll_def.hwidx = hwidx; fl->cll_def.region1 = region1; fl->cll_def.region3 = region3; } static void find_safe_refill_source(struct adapter *sc, struct sge_fl *fl) { struct sge *s = &sc->sge; struct hw_buf_info *hwb; struct sw_zone_info *swz; int spare; int8_t hwidx; if (fl->flags & FL_BUF_PACKING) hwidx = s->safe_hwidx2; /* with room for metadata */ else if (allow_mbufs_in_cluster && s->safe_hwidx2 != -1) { hwidx = s->safe_hwidx2; hwb = &s->hw_buf_info[hwidx]; swz = &s->sw_zone_info[hwb->zidx]; spare = swz->size - hwb->size; /* no good if there isn't room for an mbuf as well */ if (spare < CL_METADATA_SIZE + MSIZE) hwidx = s->safe_hwidx1; } else hwidx = s->safe_hwidx1; if (hwidx == -1) { /* No fallback source */ fl->cll_alt.hwidx = -1; fl->cll_alt.zidx = -1; return; } hwb = &s->hw_buf_info[hwidx]; swz = &s->sw_zone_info[hwb->zidx]; spare = swz->size - hwb->size; fl->cll_alt.hwidx = hwidx; fl->cll_alt.zidx = hwb->zidx; if (allow_mbufs_in_cluster && (fl_pad == 0 || (MSIZE % sc->params.sge.pad_boundary) == 0)) fl->cll_alt.region1 = ((spare - CL_METADATA_SIZE) / MSIZE) * MSIZE; else fl->cll_alt.region1 = 0; fl->cll_alt.region3 = spare - fl->cll_alt.region1; } static void add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) { mtx_lock(&sc->sfl_lock); FL_LOCK(fl); if ((fl->flags & FL_DOOMED) == 0) { fl->flags |= FL_STARVING; TAILQ_INSERT_TAIL(&sc->sfl, fl, link); callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc); } FL_UNLOCK(fl); mtx_unlock(&sc->sfl_lock); } static void handle_wrq_egr_update(struct adapter *sc, struct sge_eq *eq) { struct sge_wrq *wrq = (void *)eq; atomic_readandclear_int(&eq->equiq); taskqueue_enqueue(sc->tq[eq->tx_chan], &wrq->wrq_tx_task); } static void handle_eth_egr_update(struct adapter *sc, struct sge_eq *eq) { struct sge_txq *txq = (void *)eq; MPASS((eq->flags & EQ_TYPEMASK) == EQ_ETH); atomic_readandclear_int(&eq->equiq); mp_ring_check_drainage(txq->r, 0); taskqueue_enqueue(sc->tq[eq->tx_chan], &txq->tx_reclaim_task); } static int handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1); unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); struct adapter *sc = iq->adapter; struct sge *s = &sc->sge; struct sge_eq *eq; static void (*h[])(struct adapter *, struct sge_eq *) = {NULL, &handle_wrq_egr_update, &handle_eth_egr_update, &handle_wrq_egr_update}; KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, rss->opcode)); eq = s->eqmap[qid - s->eq_start - s->eq_base]; (*h[eq->flags & EQ_TYPEMASK])(sc, eq); return (0); } /* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */ CTASSERT(offsetof(struct cpl_fw4_msg, data) == \ offsetof(struct cpl_fw6_msg, data)); static int handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, rss->opcode)); if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) { const struct rss_header *rss2; rss2 = (const struct rss_header *)&cpl->data[0]; return (t4_cpl_handler[rss2->opcode](iq, rss2, m)); } return (t4_fw_msg_handler[cpl->type](sc, &cpl->data[0])); } /** * t4_handle_wrerr_rpl - process a FW work request error message * @adap: the adapter * @rpl: start of the FW message */ static int t4_handle_wrerr_rpl(struct adapter *adap, const __be64 *rpl) { u8 opcode = *(const u8 *)rpl; const struct fw_error_cmd *e = (const void *)rpl; unsigned int i; if (opcode != FW_ERROR_CMD) { log(LOG_ERR, "%s: Received WRERR_RPL message with opcode %#x\n", device_get_nameunit(adap->dev), opcode); return (EINVAL); } log(LOG_ERR, "%s: FW_ERROR (%s) ", device_get_nameunit(adap->dev), G_FW_ERROR_CMD_FATAL(be32toh(e->op_to_type)) ? "fatal" : "non-fatal"); switch (G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))) { case FW_ERROR_TYPE_EXCEPTION: log(LOG_ERR, "exception info:\n"); for (i = 0; i < nitems(e->u.exception.info); i++) log(LOG_ERR, "%s%08x", i == 0 ? "\t" : " ", be32toh(e->u.exception.info[i])); log(LOG_ERR, "\n"); break; case FW_ERROR_TYPE_HWMODULE: log(LOG_ERR, "HW module regaddr %08x regval %08x\n", be32toh(e->u.hwmodule.regaddr), be32toh(e->u.hwmodule.regval)); break; case FW_ERROR_TYPE_WR: log(LOG_ERR, "WR cidx %d PF %d VF %d eqid %d hdr:\n", be16toh(e->u.wr.cidx), G_FW_ERROR_CMD_PFN(be16toh(e->u.wr.pfn_vfn)), G_FW_ERROR_CMD_VFN(be16toh(e->u.wr.pfn_vfn)), be32toh(e->u.wr.eqid)); for (i = 0; i < nitems(e->u.wr.wrhdr); i++) log(LOG_ERR, "%s%02x", i == 0 ? "\t" : " ", e->u.wr.wrhdr[i]); log(LOG_ERR, "\n"); break; case FW_ERROR_TYPE_ACL: log(LOG_ERR, "ACL cidx %d PF %d VF %d eqid %d %s", be16toh(e->u.acl.cidx), G_FW_ERROR_CMD_PFN(be16toh(e->u.acl.pfn_vfn)), G_FW_ERROR_CMD_VFN(be16toh(e->u.acl.pfn_vfn)), be32toh(e->u.acl.eqid), G_FW_ERROR_CMD_MV(be16toh(e->u.acl.mv_pkd)) ? "vlanid" : "MAC"); for (i = 0; i < nitems(e->u.acl.val); i++) log(LOG_ERR, " %02x", e->u.acl.val[i]); log(LOG_ERR, "\n"); break; default: log(LOG_ERR, "type %#x\n", G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))); return (EINVAL); } return (0); } static int sysctl_uint16(SYSCTL_HANDLER_ARGS) { uint16_t *id = arg1; int i = *id; return sysctl_handle_int(oidp, &i, 0, req); } static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS) { struct sge *s = arg1; struct hw_buf_info *hwb = &s->hw_buf_info[0]; struct sw_zone_info *swz = &s->sw_zone_info[0]; int i, rc; struct sbuf sb; char c; sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND); for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) { if (hwb->zidx >= 0 && swz[hwb->zidx].size <= largest_rx_cluster) c = '*'; else c = '\0'; sbuf_printf(&sb, "%u%c ", hwb->size, c); } sbuf_trim(&sb); sbuf_finish(&sb); rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); sbuf_delete(&sb); return (rc); } static int sysctl_tc(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct port_info *pi; struct adapter *sc; struct sge_txq *txq; struct tx_cl_rl_params *tc; int qidx = arg2, rc, tc_idx; uint32_t fw_queue, fw_class; MPASS(qidx >= 0 && qidx < vi->ntxq); pi = vi->pi; sc = pi->adapter; txq = &sc->sge.txq[vi->first_txq + qidx]; tc_idx = txq->tc_idx; rc = sysctl_handle_int(oidp, &tc_idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (sc->flags & IS_VF) return (EPERM); /* Note that -1 is legitimate input (it means unbind). */ if (tc_idx < -1 || tc_idx >= sc->chip_params->nsched_cls) return (EINVAL); mtx_lock(&sc->tc_lock); if (tc_idx == txq->tc_idx) { rc = 0; /* No change, nothing to do. */ goto done; } fw_queue = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id); if (tc_idx == -1) fw_class = 0xffffffff; /* Unbind. */ else { /* * Bind to a different class. */ tc = &pi->sched_params->cl_rl[tc_idx]; if (tc->flags & TX_CLRL_ERROR) { /* Previous attempt to set the cl-rl params failed. */ rc = EIO; goto done; } else { /* * Ok to proceed. Place a reference on the new class * while still holding on to the reference on the * previous class, if any. */ fw_class = tc_idx; tc->refcount++; } } mtx_unlock(&sc->tc_lock); rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4stc"); if (rc) return (rc); rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue, &fw_class); end_synchronized_op(sc, 0); mtx_lock(&sc->tc_lock); if (rc == 0) { if (txq->tc_idx != -1) { tc = &pi->sched_params->cl_rl[txq->tc_idx]; MPASS(tc->refcount > 0); tc->refcount--; } txq->tc_idx = tc_idx; } else if (tc_idx != -1) { tc = &pi->sched_params->cl_rl[tc_idx]; MPASS(tc->refcount > 0); tc->refcount--; } done: mtx_unlock(&sc->tc_lock); return (rc); } Index: head/sys/dev/cxgbe/tom/t4_connect.c =================================================================== --- head/sys/dev/cxgbe/tom/t4_connect.c (revision 333393) +++ head/sys/dev/cxgbe/tom/t4_connect.c (revision 333394) @@ -1,526 +1,502 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" /* * Active open succeeded. */ static int do_act_establish(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_act_establish *cpl = (const void *)(rss + 1); u_int tid = GET_TID(cpl); u_int atid = G_TID_TID(ntohl(cpl->tos_atid)); struct toepcb *toep = lookup_atid(sc, atid); struct inpcb *inp = toep->inp; KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == atid, ("%s: toep tid/atid mismatch", __func__)); CTR3(KTR_CXGBE, "%s: atid %u, tid %u", __func__, atid, tid); free_atid(sc, atid); CURVNET_SET(toep->vnet); INP_WLOCK(inp); toep->tid = tid; insert_tid(sc, tid, toep, inp->inp_vflag & INP_IPV6 ? 2 : 1); if (inp->inp_flags & INP_DROPPED) { /* socket closed by the kernel before hw told us it connected */ send_flowc_wr(toep, NULL); send_reset(sc, toep, be32toh(cpl->snd_isn)); goto done; } make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt); if (toep->ulp_mode == ULP_MODE_TLS) tls_establish(toep); done: INP_WUNLOCK(inp); CURVNET_RESTORE(); return (0); } -/* - * Convert an ACT_OPEN_RPL status to an errno. - */ -static inline int -act_open_rpl_status_to_errno(int status) -{ - - switch (status) { - case CPL_ERR_CONN_RESET: - return (ECONNREFUSED); - case CPL_ERR_ARP_MISS: - return (EHOSTUNREACH); - case CPL_ERR_CONN_TIMEDOUT: - return (ETIMEDOUT); - case CPL_ERR_TCAM_FULL: - return (EAGAIN); - case CPL_ERR_CONN_EXIST: - log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n"); - return (EAGAIN); - default: - return (EIO); - } -} - void act_open_failure_cleanup(struct adapter *sc, u_int atid, u_int status) { struct toepcb *toep = lookup_atid(sc, atid); struct inpcb *inp = toep->inp; struct toedev *tod = &toep->td->tod; free_atid(sc, atid); toep->tid = -1; CURVNET_SET(toep->vnet); if (status != EAGAIN) INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); toe_connect_failed(tod, inp, status); final_cpl_received(toep); /* unlocks inp */ if (status != EAGAIN) INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } /* * Active open failed. */ static int do_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); u_int atid = G_TID_TID(G_AOPEN_ATID(be32toh(cpl->atid_status))); u_int status = G_AOPEN_STATUS(be32toh(cpl->atid_status)); struct toepcb *toep = lookup_atid(sc, atid); int rc; KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == atid, ("%s: toep tid/atid mismatch", __func__)); CTR3(KTR_CXGBE, "%s: atid %u, status %u ", __func__, atid, status); /* Ignore negative advice */ if (negative_advice(status)) return (0); if (status && act_open_has_tid(status)) release_tid(sc, GET_TID(cpl), toep->ctrlq); rc = act_open_rpl_status_to_errno(status); act_open_failure_cleanup(sc, atid, rc); return (0); } /* * Options2 for active open. */ static uint32_t calc_opt2a(struct socket *so, struct toepcb *toep, const struct offload_settings *s) { struct tcpcb *tp = so_sototcpcb(so); struct port_info *pi = toep->vi->pi; struct adapter *sc = pi->adapter; uint32_t opt2 = 0; /* * rx flow control, rx coalesce, congestion control, and tx pace are all * explicitly set by the driver. On T5+ the ISS is also set by the * driver to the value picked by the kernel. */ if (is_t4(sc)) { opt2 |= F_RX_FC_VALID | F_RX_COALESCE_VALID; opt2 |= F_CONG_CNTRL_VALID | F_PACE_VALID; } else { opt2 |= F_T5_OPT_2_VALID; /* all 4 valid */ opt2 |= F_T5_ISS; /* ISS provided in CPL */ } if (s->sack > 0 || (s->sack < 0 && (tp->t_flags & TF_SACK_PERMIT))) opt2 |= F_SACK_EN; if (s->tstamp > 0 || (s->tstamp < 0 && (tp->t_flags & TF_REQ_TSTMP))) opt2 |= F_TSTAMPS_EN; if (tp->t_flags & TF_REQ_SCALE) opt2 |= F_WND_SCALE_EN; if (s->ecn > 0 || (s->ecn < 0 && V_tcp_do_ecn == 1)) opt2 |= F_CCTRL_ECN; /* XXX: F_RX_CHANNEL for multiple rx c-chan support goes here. */ opt2 |= V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]); /* These defaults are subject to ULP specific fixups later. */ opt2 |= V_RX_FC_DDP(0) | V_RX_FC_DISABLE(0); opt2 |= V_PACE(0); if (s->cong_algo >= 0) opt2 |= V_CONG_CNTRL(s->cong_algo); else if (sc->tt.cong_algorithm >= 0) opt2 |= V_CONG_CNTRL(sc->tt.cong_algorithm & M_CONG_CNTRL); else { struct cc_algo *cc = CC_ALGO(tp); if (strcasecmp(cc->name, "reno") == 0) opt2 |= V_CONG_CNTRL(CONG_ALG_RENO); else if (strcasecmp(cc->name, "tahoe") == 0) opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); if (strcasecmp(cc->name, "newreno") == 0) opt2 |= V_CONG_CNTRL(CONG_ALG_NEWRENO); if (strcasecmp(cc->name, "highspeed") == 0) opt2 |= V_CONG_CNTRL(CONG_ALG_HIGHSPEED); else { /* * Use newreno in case the algorithm selected by the * host stack is not supported by the hardware. */ opt2 |= V_CONG_CNTRL(CONG_ALG_NEWRENO); } } if (s->rx_coalesce > 0 || (s->rx_coalesce < 0 && sc->tt.rx_coalesce)) opt2 |= V_RX_COALESCE(M_RX_COALESCE); /* Note that ofld_rxq is already set according to s->rxq. */ opt2 |= F_RSS_QUEUE_VALID; opt2 |= V_RSS_QUEUE(toep->ofld_rxq->iq.abs_id); #ifdef USE_DDP_RX_FLOW_CONTROL if (toep->ulp_mode == ULP_MODE_TCPDDP) opt2 |= F_RX_FC_DDP; #endif if (toep->ulp_mode == ULP_MODE_TLS) { opt2 &= ~V_RX_COALESCE(M_RX_COALESCE); opt2 |= F_RX_FC_DISABLE; } return (htobe32(opt2)); } void t4_init_connect_cpl_handlers(void) { t4_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish); t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl, CPL_COOKIE_TOM); } void t4_uninit_connect_cpl_handlers(void) { t4_register_cpl_handler(CPL_ACT_ESTABLISH, NULL); t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL, NULL, CPL_COOKIE_TOM); } #define DONT_OFFLOAD_ACTIVE_OPEN(x) do { \ reason = __LINE__; \ rc = (x); \ goto failed; \ } while (0) static inline int act_open_cpl_size(struct adapter *sc, int isipv6) { int idx; static const int sz_table[3][2] = { { sizeof (struct cpl_act_open_req), sizeof (struct cpl_act_open_req6) }, { sizeof (struct cpl_t5_act_open_req), sizeof (struct cpl_t5_act_open_req6) }, { sizeof (struct cpl_t6_act_open_req), sizeof (struct cpl_t6_act_open_req6) }, }; MPASS(chip_id(sc) >= CHELSIO_T4); idx = min(chip_id(sc) - CHELSIO_T4, 2); return (sz_table[idx][!!isipv6]); } /* * active open (soconnect). * * State of affairs on entry: * soisconnecting (so_state |= SS_ISCONNECTING) * tcbinfo not locked (This has changed - used to be WLOCKed) * inp WLOCKed * tp->t_state = TCPS_SYN_SENT * rtalloc1, RT_UNLOCK on rt. */ int t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, struct sockaddr *nam) { struct adapter *sc = tod->tod_softc; struct tom_data *td = tod_td(tod); struct toepcb *toep = NULL; struct wrqe *wr = NULL; struct ifnet *rt_ifp = rt->rt_ifp; struct vi_info *vi; int mtu_idx, rscale, qid_atid, rc, isipv6, txqid, rxqid; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); int reason; struct offload_settings settings; uint16_t vid = 0xffff; INP_WLOCK_ASSERT(inp); KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6, ("%s: dest addr %p has family %u", __func__, nam, nam->sa_family)); if (rt_ifp->if_type == IFT_ETHER) vi = rt_ifp->if_softc; else if (rt_ifp->if_type == IFT_L2VLAN) { struct ifnet *ifp = VLAN_COOKIE(rt_ifp); vi = ifp->if_softc; VLAN_TAG(rt_ifp, &vid); } else if (rt_ifp->if_type == IFT_IEEE8023ADLAG) DONT_OFFLOAD_ACTIVE_OPEN(ENOSYS); /* XXX: implement lagg+TOE */ else DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); rw_rlock(&sc->policy_lock); settings = *lookup_offload_policy(sc, OPEN_TYPE_ACTIVE, NULL, vid, inp); rw_runlock(&sc->policy_lock); if (!settings.offload) DONT_OFFLOAD_ACTIVE_OPEN(EPERM); if (settings.txq >= 0 && settings.txq < vi->nofldtxq) txqid = settings.txq; else txqid = arc4random() % vi->nofldtxq; txqid += vi->first_ofld_txq; if (settings.rxq >= 0 && settings.rxq < vi->nofldrxq) rxqid = settings.rxq; else rxqid = arc4random() % vi->nofldrxq; rxqid += vi->first_ofld_rxq; toep = alloc_toepcb(vi, txqid, rxqid, M_NOWAIT | M_ZERO); if (toep == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); toep->tid = alloc_atid(sc, toep); if (toep->tid < 0) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); toep->l2te = t4_l2t_get(vi->pi, rt_ifp, rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam); if (toep->l2te == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); isipv6 = nam->sa_family == AF_INET6; wr = alloc_wrqe(act_open_cpl_size(sc, isipv6), toep->ctrlq); if (wr == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); toep->vnet = so->so_vnet; set_ulp_mode(toep, select_ulp_mode(so, sc, &settings)); SOCKBUF_LOCK(&so->so_rcv); /* opt0 rcv_bufsiz initially, assumes its normal meaning later */ toep->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ); SOCKBUF_UNLOCK(&so->so_rcv); /* * The kernel sets request_r_scale based on sb_max whereas we need to * take hardware's MAX_RCV_WND into account too. This is normally a * no-op as MAX_RCV_WND is much larger than the default sb_max. */ if (tp->t_flags & TF_REQ_SCALE) rscale = tp->request_r_scale = select_rcv_wscale(); else rscale = 0; mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, &settings); qid_atid = V_TID_QID(toep->ofld_rxq->iq.abs_id) | V_TID_TID(toep->tid) | V_TID_COOKIE(CPL_COOKIE_TOM); if (isipv6) { struct cpl_act_open_req6 *cpl = wrtod(wr); struct cpl_t5_act_open_req6 *cpl5 = (void *)cpl; struct cpl_t6_act_open_req6 *cpl6 = (void *)cpl; if ((inp->inp_vflag & INP_IPV6) == 0) DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); toep->ce = hold_lip(td, &inp->in6p_laddr, NULL); if (toep->ce == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOENT); switch (chip_id(sc)) { case CHELSIO_T4: INIT_TP_WR(cpl, 0); cpl->params = select_ntuple(vi, toep->l2te); break; case CHELSIO_T5: INIT_TP_WR(cpl5, 0); cpl5->iss = htobe32(tp->iss); cpl5->params = select_ntuple(vi, toep->l2te); break; case CHELSIO_T6: default: INIT_TP_WR(cpl6, 0); cpl6->iss = htobe32(tp->iss); cpl6->params = select_ntuple(vi, toep->l2te); break; } OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, qid_atid)); cpl->local_port = inp->inp_lport; cpl->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0]; cpl->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8]; cpl->peer_port = inp->inp_fport; cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0]; cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8]; cpl->opt0 = calc_opt0(so, vi, toep->l2te, mtu_idx, rscale, toep->rx_credits, toep->ulp_mode, &settings); cpl->opt2 = calc_opt2a(so, toep, &settings); } else { struct cpl_act_open_req *cpl = wrtod(wr); struct cpl_t5_act_open_req *cpl5 = (void *)cpl; struct cpl_t6_act_open_req *cpl6 = (void *)cpl; switch (chip_id(sc)) { case CHELSIO_T4: INIT_TP_WR(cpl, 0); cpl->params = select_ntuple(vi, toep->l2te); break; case CHELSIO_T5: INIT_TP_WR(cpl5, 0); cpl5->iss = htobe32(tp->iss); cpl5->params = select_ntuple(vi, toep->l2te); break; case CHELSIO_T6: default: INIT_TP_WR(cpl6, 0); cpl6->iss = htobe32(tp->iss); cpl6->params = select_ntuple(vi, toep->l2te); break; } OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_atid)); inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip, &cpl->peer_port); cpl->opt0 = calc_opt0(so, vi, toep->l2te, mtu_idx, rscale, toep->rx_credits, toep->ulp_mode, &settings); cpl->opt2 = calc_opt2a(so, toep, &settings); } CTR5(KTR_CXGBE, "%s: atid %u (%s), toep %p, inp %p", __func__, toep->tid, tcpstates[tp->t_state], toep, inp); offload_socket(so, toep); rc = t4_l2t_send(sc, wr, toep->l2te); if (rc == 0) { toep->flags |= TPF_CPL_PENDING; return (0); } undo_offload_socket(so); reason = __LINE__; failed: CTR3(KTR_CXGBE, "%s: not offloading (%d), rc %d", __func__, reason, rc); if (wr) free_wrqe(wr); if (toep) { if (toep->tid >= 0) free_atid(sc, toep->tid); if (toep->l2te) t4_l2t_release(toep->l2te); if (toep->ce) release_lip(td, toep->ce); free_toepcb(toep); } return (rc); } #endif Index: head/sys/dev/cxgbe/tom/t4_cpl_io.c =================================================================== --- head/sys/dev/cxgbe/tom/t4_cpl_io.c (revision 333393) +++ head/sys/dev/cxgbe/tom/t4_cpl_io.c (revision 333394) @@ -1,2368 +1,2369 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012, 2015 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ratelimit.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include #include #include #include #include #include #include #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_tcb.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" static void t4_aiotx_cancel(struct kaiocb *job); static void t4_aiotx_queue_toep(struct toepcb *toep); static size_t aiotx_mbuf_pgoff(struct mbuf *m) { struct aiotx_buffer *ab; MPASS(IS_AIOTX_MBUF(m)); ab = m->m_ext.ext_arg1; return ((ab->ps.offset + (uintptr_t)m->m_ext.ext_arg2) % PAGE_SIZE); } static vm_page_t * aiotx_mbuf_pages(struct mbuf *m) { struct aiotx_buffer *ab; int npages; MPASS(IS_AIOTX_MBUF(m)); ab = m->m_ext.ext_arg1; npages = (ab->ps.offset + (uintptr_t)m->m_ext.ext_arg2) / PAGE_SIZE; return (ab->ps.pages + npages); } void send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp) { struct wrqe *wr; struct fw_flowc_wr *flowc; unsigned int nparams, flowclen, paramidx; struct vi_info *vi = toep->vi; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; unsigned int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT), ("%s: flowc for tid %u sent already", __func__, toep->tid)); if (ftxp != NULL) nparams = 8; else nparams = 6; if (toep->ulp_mode == ULP_MODE_TLS) nparams++; if (toep->tls.fcplenmax != 0) nparams++; if (toep->tc_idx != -1) { MPASS(toep->tc_idx >= 0 && toep->tc_idx < sc->chip_params->nsched_cls); nparams++; } flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } flowc = wrtod(wr); memset(flowc, 0, wr->wr_len); flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(nparams)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | V_FW_WR_FLOWID(toep->tid)); #define FLOWC_PARAM(__m, __v) \ do { \ flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \ flowc->mnemval[paramidx].val = htobe32(__v); \ paramidx++; \ } while (0) paramidx = 0; FLOWC_PARAM(PFNVFN, pfvf); FLOWC_PARAM(CH, pi->tx_chan); FLOWC_PARAM(PORT, pi->tx_chan); FLOWC_PARAM(IQID, toep->ofld_rxq->iq.abs_id); if (ftxp) { uint32_t sndbuf = min(ftxp->snd_space, sc->tt.sndbuf); FLOWC_PARAM(SNDNXT, ftxp->snd_nxt); FLOWC_PARAM(RCVNXT, ftxp->rcv_nxt); FLOWC_PARAM(SNDBUF, sndbuf); FLOWC_PARAM(MSS, ftxp->mss); CTR6(KTR_CXGBE, "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x", __func__, toep->tid, ftxp->mss, sndbuf, ftxp->snd_nxt, ftxp->rcv_nxt); } else { FLOWC_PARAM(SNDBUF, 512); FLOWC_PARAM(MSS, 512); CTR2(KTR_CXGBE, "%s: tid %u", __func__, toep->tid); } if (toep->ulp_mode == ULP_MODE_TLS) FLOWC_PARAM(ULP_MODE, toep->ulp_mode); if (toep->tls.fcplenmax != 0) FLOWC_PARAM(TXDATAPLEN_MAX, toep->tls.fcplenmax); if (toep->tc_idx != -1) FLOWC_PARAM(SCHEDCLASS, toep->tc_idx); #undef FLOWC_PARAM KASSERT(paramidx == nparams, ("nparams mismatch")); txsd->tx_credits = howmany(flowclen, 16); txsd->plen = 0; KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, ("%s: not enough credits (%d)", __func__, toep->tx_credits)); toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; toep->flags |= TPF_FLOWC_WR_SENT; t4_wrq_tx(sc, wr); } #ifdef RATELIMIT /* * Input is Bytes/second (so_max_pacing-rate), chip counts in Kilobits/second. */ static int update_tx_rate_limit(struct adapter *sc, struct toepcb *toep, u_int Bps) { int tc_idx, rc; const u_int kbps = (u_int) (uint64_t)Bps * 8ULL / 1000; const int port_id = toep->vi->pi->port_id; CTR3(KTR_CXGBE, "%s: tid %u, rate %uKbps", __func__, toep->tid, kbps); if (kbps == 0) { /* unbind */ tc_idx = -1; } else { rc = t4_reserve_cl_rl_kbps(sc, port_id, kbps, &tc_idx); if (rc != 0) return (rc); MPASS(tc_idx >= 0 && tc_idx < sc->chip_params->nsched_cls); } if (toep->tc_idx != tc_idx) { struct wrqe *wr; struct fw_flowc_wr *flowc; int nparams = 1, flowclen, flowclen16; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); flowclen16 = howmany(flowclen, 16); if (toep->tx_credits < flowclen16 || toep->txsd_avail == 0 || (wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq)) == NULL) { if (tc_idx >= 0) t4_release_cl_rl_kbps(sc, port_id, tc_idx); return (ENOMEM); } flowc = wrtod(wr); memset(flowc, 0, wr->wr_len); flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(nparams)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(flowclen16) | V_FW_WR_FLOWID(toep->tid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; if (tc_idx == -1) flowc->mnemval[0].val = htobe32(0xff); else flowc->mnemval[0].val = htobe32(tc_idx); txsd->tx_credits = flowclen16; txsd->plen = 0; toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; t4_wrq_tx(sc, wr); } if (toep->tc_idx >= 0) t4_release_cl_rl_kbps(sc, port_id, toep->tc_idx); toep->tc_idx = tc_idx; return (0); } #endif void send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt) { struct wrqe *wr; struct cpl_abort_req *req; int tid = toep->tid; struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); /* don't use if INP_DROPPED */ INP_WLOCK_ASSERT(inp); CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s", __func__, toep->tid, inp->inp_flags & INP_DROPPED ? "inp dropped" : tcpstates[tp->t_state], toep->flags, inp->inp_flags, toep->flags & TPF_ABORT_SHUTDOWN ? " (abort already in progress)" : ""); if (toep->flags & TPF_ABORT_SHUTDOWN) return; /* abort already in progress */ toep->flags |= TPF_ABORT_SHUTDOWN; KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %d.", __func__, tid)); wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid); if (inp->inp_flags & INP_DROPPED) req->rsvd0 = htobe32(snd_nxt); else req->rsvd0 = htobe32(tp->snd_nxt); req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT); req->cmd = CPL_ABORT_SEND_RST; /* * XXX: What's the correct way to tell that the inp hasn't been detached * from its socket? Should I even be flushing the snd buffer here? */ if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { struct socket *so = inp->inp_socket; if (so != NULL) /* because I'm not sure. See comment above */ sbflush(&so->so_snd); } t4_l2t_send(sc, wr, toep->l2te); } /* * Called when a connection is established to translate the TCP options * reported by HW to FreeBSD's native format. */ static void assign_rxopt(struct tcpcb *tp, unsigned int opt) { struct toepcb *toep = tp->t_toe; struct inpcb *inp = tp->t_inpcb; struct adapter *sc = td_adapter(toep->td); int n; INP_LOCK_ASSERT(inp); if (inp->inp_inc.inc_flags & INC_ISIPV6) n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); else n = sizeof(struct ip) + sizeof(struct tcphdr); tp->t_maxseg = sc->params.mtus[G_TCPOPT_MSS(opt)] - n; if (G_TCPOPT_TSTAMP(opt)) { tp->t_flags |= TF_RCVD_TSTMP; /* timestamps ok */ tp->ts_recent = 0; /* hmmm */ tp->ts_recent_age = tcp_ts_getticks(); tp->t_maxseg -= TCPOLEN_TSTAMP_APPA; } CTR5(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u), mss %u", __func__, toep->tid, G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)], tp->t_maxseg); if (G_TCPOPT_SACK(opt)) tp->t_flags |= TF_SACK_PERMIT; /* should already be set */ else tp->t_flags &= ~TF_SACK_PERMIT; /* sack disallowed by peer */ if (G_TCPOPT_WSCALE_OK(opt)) tp->t_flags |= TF_RCVD_SCALE; /* Doing window scaling? */ if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == (TF_RCVD_SCALE | TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; tp->snd_scale = G_TCPOPT_SND_WSCALE(opt); } } /* * Completes some final bits of initialization for just established connections * and changes their state to TCPS_ESTABLISHED. * * The ISNs are from after the exchange of SYNs. i.e., the true ISN + 1. */ void make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn, uint16_t opt) { struct inpcb *inp = toep->inp; struct socket *so = inp->inp_socket; struct tcpcb *tp = intotcpcb(inp); long bufsize; uint32_t iss = be32toh(snd_isn) - 1; /* true ISS */ uint32_t irs = be32toh(rcv_isn) - 1; /* true IRS */ uint16_t tcpopt = be16toh(opt); struct flowc_tx_params ftxp; INP_WLOCK_ASSERT(inp); KASSERT(tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED, ("%s: TCP state %s", __func__, tcpstates[tp->t_state])); CTR6(KTR_CXGBE, "%s: tid %d, so %p, inp %p, tp %p, toep %p", __func__, toep->tid, so, inp, tp, toep); tp->t_state = TCPS_ESTABLISHED; tp->t_starttime = ticks; TCPSTAT_INC(tcps_connects); tp->irs = irs; tcp_rcvseqinit(tp); tp->rcv_wnd = toep->rx_credits << 10; tp->rcv_adv += tp->rcv_wnd; tp->last_ack_sent = tp->rcv_nxt; /* * If we were unable to send all rx credits via opt0, save the remainder * in rx_credits so that they can be handed over with the next credit * update. */ SOCKBUF_LOCK(&so->so_rcv); bufsize = select_rcv_wnd(so); SOCKBUF_UNLOCK(&so->so_rcv); toep->rx_credits = bufsize - tp->rcv_wnd; tp->iss = iss; tcp_sendseqinit(tp); tp->snd_una = iss + 1; tp->snd_nxt = iss + 1; tp->snd_max = iss + 1; assign_rxopt(tp, tcpopt); SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) bufsize = V_tcp_autosndbuf_max; else bufsize = sbspace(&so->so_snd); SOCKBUF_UNLOCK(&so->so_snd); ftxp.snd_nxt = tp->snd_nxt; ftxp.rcv_nxt = tp->rcv_nxt; ftxp.snd_space = bufsize; ftxp.mss = tp->t_maxseg; send_flowc_wr(toep, &ftxp); soisconnected(so); } int send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits) { struct wrqe *wr; struct cpl_rx_data_ack *req; uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); KASSERT(credits >= 0, ("%s: %d credits", __func__, credits)); wr = alloc_wrqe(sizeof(*req), toep->ctrlq); if (wr == NULL) return (0); req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid); req->credit_dack = htobe32(dack | V_RX_CREDITS(credits)); t4_wrq_tx(sc, wr); return (credits); } void send_rx_modulate(struct adapter *sc, struct toepcb *toep) { struct wrqe *wr; struct cpl_rx_data_ack *req; wr = alloc_wrqe(sizeof(*req), toep->ctrlq); if (wr == NULL) return; req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid); req->credit_dack = htobe32(F_RX_MODULATE_RX); t4_wrq_tx(sc, wr); } void t4_rcvd_locked(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_rcv; struct toepcb *toep = tp->t_toe; int credits; INP_WLOCK_ASSERT(inp); SOCKBUF_LOCK_ASSERT(sb); KASSERT(toep->sb_cc >= sbused(sb), ("%s: sb %p has more data (%d) than last time (%d).", __func__, sb, sbused(sb), toep->sb_cc)); credits = toep->sb_cc - sbused(sb); toep->sb_cc = sbused(sb); if (toep->ulp_mode == ULP_MODE_TLS) { if (toep->tls.rcv_over >= credits) { toep->tls.rcv_over -= credits; credits = 0; } else { credits -= toep->tls.rcv_over; toep->tls.rcv_over = 0; } } toep->rx_credits += credits; if (toep->rx_credits > 0 && (tp->rcv_wnd <= 32 * 1024 || toep->rx_credits >= 64 * 1024 || (toep->rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) || toep->sb_cc + tp->rcv_wnd < sb->sb_lowat)) { credits = send_rx_credits(sc, toep, toep->rx_credits); toep->rx_credits -= credits; tp->rcv_wnd += credits; tp->rcv_adv += credits; } else if (toep->flags & TPF_FORCE_CREDITS) send_rx_modulate(sc, toep); } void t4_rcvd(struct toedev *tod, struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_rcv; SOCKBUF_LOCK(sb); t4_rcvd_locked(tod, tp); SOCKBUF_UNLOCK(sb); } /* * Close a connection by sending a CPL_CLOSE_CON_REQ message. */ int t4_close_conn(struct adapter *sc, struct toepcb *toep) { struct wrqe *wr; struct cpl_close_con_req *req; unsigned int tid = toep->tid; CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid, toep->flags & TPF_FIN_SENT ? ", IGNORED" : ""); if (toep->flags & TPF_FIN_SENT) return (0); KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %u.", __func__, tid)); wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr))); req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) | V_FW_WR_FLOWID(tid)); req->wr.wr_lo = cpu_to_be64(0); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); req->rsvd = 0; toep->flags |= TPF_FIN_SENT; toep->flags &= ~TPF_SEND_FIN; t4_l2t_send(sc, wr, toep->l2te); return (0); } #define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16) #define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16)) /* Maximum amount of immediate data we could stuff in a WR */ static inline int max_imm_payload(int tx_credits) { const int n = 2; /* Use only up to 2 desc for imm. data WR */ KASSERT(tx_credits >= 0 && tx_credits <= MAX_OFLD_TX_CREDITS, ("%s: %d credits", __func__, tx_credits)); if (tx_credits < MIN_OFLD_TX_CREDITS) return (0); if (tx_credits >= (n * EQ_ESIZE) / 16) return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr)); else return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr)); } /* Maximum number of SGL entries we could stuff in a WR */ static inline int max_dsgl_nsegs(int tx_credits) { int nseg = 1; /* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */ int sge_pair_credits = tx_credits - MIN_OFLD_TX_CREDITS; KASSERT(tx_credits >= 0 && tx_credits <= MAX_OFLD_TX_CREDITS, ("%s: %d credits", __func__, tx_credits)); if (tx_credits < MIN_OFLD_TX_CREDITS) return (0); nseg += 2 * (sge_pair_credits * 16 / 24); if ((sge_pair_credits * 16) % 24 == 16) nseg++; return (nseg); } static inline void write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen, unsigned int plen, uint8_t credits, int shove, int ulp_submode, int txalign) { struct fw_ofld_tx_data_wr *txwr = dst; txwr->op_to_immdlen = htobe32(V_WR_OP(FW_OFLD_TX_DATA_WR) | V_FW_WR_IMMDLEN(immdlen)); txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) | V_FW_WR_LEN16(credits)); txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(toep->ulp_mode) | V_TX_ULP_SUBMODE(ulp_submode) | V_TX_URG(0) | V_TX_SHOVE(shove)); txwr->plen = htobe32(plen); if (txalign > 0) { struct tcpcb *tp = intotcpcb(toep->inp); if (plen < 2 * tp->t_maxseg || is_10G_port(toep->vi->pi)) txwr->lsodisable_to_flags |= htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE); else txwr->lsodisable_to_flags |= htobe32(F_FW_OFLD_TX_DATA_WR_ALIGNPLD | (tp->t_flags & TF_NODELAY ? 0 : F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE)); } } /* * Generate a DSGL from a starting mbuf. The total number of segments and the * maximum segments in any one mbuf are provided. */ static void write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n) { struct mbuf *m; struct ulptx_sgl *usgl = dst; int i, j, rc; struct sglist sg; struct sglist_seg segs[n]; KASSERT(nsegs > 0, ("%s: nsegs 0", __func__)); sglist_init(&sg, n, segs); usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | V_ULPTX_NSGE(nsegs)); i = -1; for (m = start; m != stop; m = m->m_next) { if (IS_AIOTX_MBUF(m)) rc = sglist_append_vmpages(&sg, aiotx_mbuf_pages(m), aiotx_mbuf_pgoff(m), m->m_len); else rc = sglist_append(&sg, mtod(m, void *), m->m_len); if (__predict_false(rc != 0)) panic("%s: sglist_append %d", __func__, rc); for (j = 0; j < sg.sg_nseg; i++, j++) { if (i < 0) { usgl->len0 = htobe32(segs[j].ss_len); usgl->addr0 = htobe64(segs[j].ss_paddr); } else { usgl->sge[i / 2].len[i & 1] = htobe32(segs[j].ss_len); usgl->sge[i / 2].addr[i & 1] = htobe64(segs[j].ss_paddr); } #ifdef INVARIANTS nsegs--; #endif } sglist_reset(&sg); } if (i & 1) usgl->sge[i / 2].len[1] = htobe32(0); KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p", __func__, nsegs, start, stop)); } /* * Max number of SGL entries an offload tx work request can have. This is 41 * (1 + 40) for a full 512B work request. * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40) */ #define OFLD_SGL_LEN (41) /* * Send data and/or a FIN to the peer. * * The socket's so_snd buffer consists of a stream of data starting with sb_mb * and linked together with m_next. sb_sndptr, if set, is the last mbuf that * was transmitted. * * drop indicates the number of bytes that should be dropped from the head of * the send buffer. It is an optimization that lets do_fw4_ack avoid creating * contention on the send buffer lock (before this change it used to do * sowwakeup and then t4_push_frames right after that when recovering from tx * stalls). When drop is set this function MUST drop the bytes and wake up any * writers. */ void t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop) { struct mbuf *sndptr, *m, *sb_sndptr; struct fw_ofld_tx_data_wr *txwr; struct wrqe *wr; u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_snd; int tx_credits, shove, compl, sowwakeup; struct ofld_tx_sdesc *txsd; bool aiotx_mbuf_seen; INP_WLOCK_ASSERT(inp); KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); KASSERT(toep->ulp_mode == ULP_MODE_NONE || toep->ulp_mode == ULP_MODE_TCPDDP || toep->ulp_mode == ULP_MODE_TLS || toep->ulp_mode == ULP_MODE_RDMA, ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep)); #ifdef VERBOSE_TRACES CTR4(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d", __func__, toep->tid, toep->flags, tp->t_flags); #endif if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) return; #ifdef RATELIMIT if (__predict_false(inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) && (update_tx_rate_limit(sc, toep, so->so_max_pacing_rate) == 0)) { inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED; } #endif /* * This function doesn't resume by itself. Someone else must clear the * flag and call this function. */ if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { KASSERT(drop == 0, ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); return; } txsd = &toep->txsd[toep->txsd_pidx]; do { tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); max_imm = max_imm_payload(tx_credits); max_nsegs = max_dsgl_nsegs(tx_credits); SOCKBUF_LOCK(sb); sowwakeup = drop; if (drop) { sbdrop_locked(sb, drop); drop = 0; } sb_sndptr = sb->sb_sndptr; sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb; plen = 0; nsegs = 0; max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ aiotx_mbuf_seen = false; for (m = sndptr; m != NULL; m = m->m_next) { int n; if (IS_AIOTX_MBUF(m)) n = sglist_count_vmpages(aiotx_mbuf_pages(m), aiotx_mbuf_pgoff(m), m->m_len); else n = sglist_count(mtod(m, void *), m->m_len); nsegs += n; plen += m->m_len; /* This mbuf sent us _over_ the nsegs limit, back out */ if (plen > max_imm && nsegs > max_nsegs) { nsegs -= n; plen -= m->m_len; if (plen == 0) { /* Too few credits */ toep->flags |= TPF_TX_SUSPENDED; if (sowwakeup) { if (!TAILQ_EMPTY( &toep->aiotx_jobq)) t4_aiotx_queue_toep( toep); sowwakeup_locked(so); } else SOCKBUF_UNLOCK(sb); SOCKBUF_UNLOCK_ASSERT(sb); return; } break; } if (IS_AIOTX_MBUF(m)) aiotx_mbuf_seen = true; if (max_nsegs_1mbuf < n) max_nsegs_1mbuf = n; sb_sndptr = m; /* new sb->sb_sndptr if all goes well */ /* This mbuf put us right at the max_nsegs limit */ if (plen > max_imm && nsegs == max_nsegs) { m = m->m_next; break; } } if (sbused(sb) > sb->sb_hiwat * 5 / 8 && toep->plen_nocompl + plen >= sb->sb_hiwat / 4) compl = 1; else compl = 0; if (sb->sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf && sb->sb_hiwat < V_tcp_autosndbuf_max && sbused(sb) >= sb->sb_hiwat * 7 / 8) { int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc, V_tcp_autosndbuf_max); if (!sbreserve_locked(sb, newsize, so, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; else sowwakeup = 1; /* room available */ } if (sowwakeup) { if (!TAILQ_EMPTY(&toep->aiotx_jobq)) t4_aiotx_queue_toep(toep); sowwakeup_locked(so); } else SOCKBUF_UNLOCK(sb); SOCKBUF_UNLOCK_ASSERT(sb); /* nothing to send */ if (plen == 0) { KASSERT(m == NULL, ("%s: nothing to send, but m != NULL", __func__)); break; } if (__predict_false(toep->flags & TPF_FIN_SENT)) panic("%s: excess tx.", __func__); shove = m == NULL && !(tp->t_flags & TF_MORETOCOME); if (plen <= max_imm && !aiotx_mbuf_seen) { /* Immediate data tx */ wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), toep->ofld_txq); if (wr == NULL) { /* XXX: how will we recover from this? */ toep->flags |= TPF_TX_SUSPENDED; return; } txwr = wrtod(wr); credits = howmany(wr->wr_len, 16); write_tx_wr(txwr, toep, plen, plen, credits, shove, 0, sc->tt.tx_align); m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); nsegs = 0; } else { int wr_len; /* DSGL tx */ wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq); if (wr == NULL) { /* XXX: how will we recover from this? */ toep->flags |= TPF_TX_SUSPENDED; return; } txwr = wrtod(wr); credits = howmany(wr_len, 16); write_tx_wr(txwr, toep, 0, plen, credits, shove, 0, sc->tt.tx_align); write_tx_sgl(txwr + 1, sndptr, m, nsegs, max_nsegs_1mbuf); if (wr_len & 0xf) { uint64_t *pad = (uint64_t *) ((uintptr_t)txwr + wr_len); *pad = 0; } } KASSERT(toep->tx_credits >= credits, ("%s: not enough credits", __func__)); toep->tx_credits -= credits; toep->tx_nocompl += credits; toep->plen_nocompl += plen; if (toep->tx_credits <= toep->tx_total * 3 / 8 && toep->tx_nocompl >= toep->tx_total / 4) compl = 1; if (compl || toep->ulp_mode == ULP_MODE_RDMA) { txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); toep->tx_nocompl = 0; toep->plen_nocompl = 0; } tp->snd_nxt += plen; tp->snd_max += plen; SOCKBUF_LOCK(sb); KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__)); sb->sb_sndptr = sb_sndptr; SOCKBUF_UNLOCK(sb); toep->flags |= TPF_TX_DATA_SENT; if (toep->tx_credits < MIN_OFLD_TX_CREDITS) toep->flags |= TPF_TX_SUSPENDED; KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); txsd->plen = plen; txsd->tx_credits = credits; txsd++; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { toep->txsd_pidx = 0; txsd = &toep->txsd[0]; } toep->txsd_avail--; t4_l2t_send(sc, wr, toep->l2te); } while (m != NULL); /* Send a FIN if requested, but only if there's no more data to send */ if (m == NULL && toep->flags & TPF_SEND_FIN) t4_close_conn(sc, toep); } static inline void rqdrop_locked(struct mbufq *q, int plen) { struct mbuf *m; while (plen > 0) { m = mbufq_dequeue(q); /* Too many credits. */ MPASS(m != NULL); M_ASSERTPKTHDR(m); /* Partial credits. */ MPASS(plen >= m->m_pkthdr.len); plen -= m->m_pkthdr.len; m_freem(m); } } void t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) { struct mbuf *sndptr, *m; struct fw_ofld_tx_data_wr *txwr; struct wrqe *wr; u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; u_int adjusted_plen, ulp_submode; struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); int tx_credits, shove; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; struct mbufq *pduq = &toep->ulp_pduq; static const u_int ulp_extra_len[] = {0, 4, 4, 8}; INP_WLOCK_ASSERT(inp); KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); KASSERT(toep->ulp_mode == ULP_MODE_ISCSI, ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep)); if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) return; /* * This function doesn't resume by itself. Someone else must clear the * flag and call this function. */ if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { KASSERT(drop == 0, ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); return; } if (drop) rqdrop_locked(&toep->ulp_pdu_reclaimq, drop); while ((sndptr = mbufq_first(pduq)) != NULL) { M_ASSERTPKTHDR(sndptr); tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); max_imm = max_imm_payload(tx_credits); max_nsegs = max_dsgl_nsegs(tx_credits); plen = 0; nsegs = 0; max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ for (m = sndptr; m != NULL; m = m->m_next) { int n = sglist_count(mtod(m, void *), m->m_len); nsegs += n; plen += m->m_len; /* * This mbuf would send us _over_ the nsegs limit. * Suspend tx because the PDU can't be sent out. */ if (plen > max_imm && nsegs > max_nsegs) { toep->flags |= TPF_TX_SUSPENDED; return; } if (max_nsegs_1mbuf < n) max_nsegs_1mbuf = n; } if (__predict_false(toep->flags & TPF_FIN_SENT)) panic("%s: excess tx.", __func__); /* * We have a PDU to send. All of it goes out in one WR so 'm' * is NULL. A PDU's length is always a multiple of 4. */ MPASS(m == NULL); MPASS((plen & 3) == 0); MPASS(sndptr->m_pkthdr.len == plen); shove = !(tp->t_flags & TF_MORETOCOME); ulp_submode = mbuf_ulp_submode(sndptr); MPASS(ulp_submode < nitems(ulp_extra_len)); /* * plen doesn't include header and data digests, which are * generated and inserted in the right places by the TOE, but * they do occupy TCP sequence space and need to be accounted * for. */ adjusted_plen = plen + ulp_extra_len[ulp_submode]; if (plen <= max_imm) { /* Immediate data tx */ wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), toep->ofld_txq); if (wr == NULL) { /* XXX: how will we recover from this? */ toep->flags |= TPF_TX_SUSPENDED; return; } txwr = wrtod(wr); credits = howmany(wr->wr_len, 16); write_tx_wr(txwr, toep, plen, adjusted_plen, credits, shove, ulp_submode, sc->tt.tx_align); m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); nsegs = 0; } else { int wr_len; /* DSGL tx */ wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq); if (wr == NULL) { /* XXX: how will we recover from this? */ toep->flags |= TPF_TX_SUSPENDED; return; } txwr = wrtod(wr); credits = howmany(wr_len, 16); write_tx_wr(txwr, toep, 0, adjusted_plen, credits, shove, ulp_submode, sc->tt.tx_align); write_tx_sgl(txwr + 1, sndptr, m, nsegs, max_nsegs_1mbuf); if (wr_len & 0xf) { uint64_t *pad = (uint64_t *) ((uintptr_t)txwr + wr_len); *pad = 0; } } KASSERT(toep->tx_credits >= credits, ("%s: not enough credits", __func__)); m = mbufq_dequeue(pduq); MPASS(m == sndptr); mbufq_enqueue(&toep->ulp_pdu_reclaimq, m); toep->tx_credits -= credits; toep->tx_nocompl += credits; toep->plen_nocompl += plen; if (toep->tx_credits <= toep->tx_total * 3 / 8 && toep->tx_nocompl >= toep->tx_total / 4) { txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); toep->tx_nocompl = 0; toep->plen_nocompl = 0; } tp->snd_nxt += adjusted_plen; tp->snd_max += adjusted_plen; toep->flags |= TPF_TX_DATA_SENT; if (toep->tx_credits < MIN_OFLD_TX_CREDITS) toep->flags |= TPF_TX_SUSPENDED; KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); txsd->plen = plen; txsd->tx_credits = credits; txsd++; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { toep->txsd_pidx = 0; txsd = &toep->txsd[0]; } toep->txsd_avail--; t4_l2t_send(sc, wr, toep->l2te); } /* Send a FIN if requested, but only if there are no more PDUs to send */ if (mbufq_first(pduq) == NULL && toep->flags & TPF_SEND_FIN) t4_close_conn(sc, toep); } int t4_tod_output(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; #ifdef INVARIANTS struct inpcb *inp = tp->t_inpcb; #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("%s: inp %p dropped.", __func__, inp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); if (toep->ulp_mode == ULP_MODE_ISCSI) t4_push_pdus(sc, toep, 0); else if (tls_tx_key(toep)) t4_push_tls_records(sc, toep, 0); else t4_push_frames(sc, toep, 0); return (0); } int t4_send_fin(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; #ifdef INVARIANTS struct inpcb *inp = tp->t_inpcb; #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("%s: inp %p dropped.", __func__, inp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); toep->flags |= TPF_SEND_FIN; if (tp->t_state >= TCPS_ESTABLISHED) { if (toep->ulp_mode == ULP_MODE_ISCSI) t4_push_pdus(sc, toep, 0); else if (tls_tx_key(toep)) t4_push_tls_records(sc, toep, 0); else t4_push_frames(sc, toep, 0); } return (0); } int t4_send_rst(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; #if defined(INVARIANTS) struct inpcb *inp = tp->t_inpcb; #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("%s: inp %p dropped.", __func__, inp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); /* hmmmm */ KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc for tid %u [%s] not sent already", __func__, toep->tid, tcpstates[tp->t_state])); send_reset(sc, toep, 0); return (0); } /* * Peer has sent us a FIN. */ static int do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_peer_close *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp = NULL; struct socket *so; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_PEER_CLOSE, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (__predict_false(toep->flags & TPF_SYNQE)) { #ifdef INVARIANTS struct synq_entry *synqe = (void *)toep; INP_WLOCK(synqe->lctx->inp); if (synqe->flags & TPF_SYNQE_HAS_L2TE) { KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, ("%s: listen socket closed but tid %u not aborted.", __func__, tid)); } else { /* * do_pass_accept_req is still running and will * eventually take care of this tid. */ } INP_WUNLOCK(synqe->lctx->inp); #endif CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, toep, toep->flags); return (0); } KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); CURVNET_SET(toep->vnet); INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = intotcpcb(inp); CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp); if (toep->flags & TPF_ABORT_SHUTDOWN) goto done; tp->rcv_nxt++; /* FIN */ so = inp->inp_socket; if (toep->ulp_mode == ULP_MODE_TCPDDP) { DDP_LOCK(toep); if (__predict_false(toep->ddp.flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) handle_ddp_close(toep, tp, cpl->rcv_nxt); DDP_UNLOCK(toep); } socantrcvmore(so); if (toep->ulp_mode != ULP_MODE_RDMA) { KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt), ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt, be32toh(cpl->rcv_nxt))); } switch (tp->t_state) { case TCPS_SYN_RECEIVED: tp->t_starttime = ticks; /* FALLTHROUGH */ case TCPS_ESTABLISHED: tp->t_state = TCPS_CLOSE_WAIT; break; case TCPS_FIN_WAIT_1: tp->t_state = TCPS_CLOSING; break; case TCPS_FIN_WAIT_2: tcp_twstart(tp); INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); INP_WLOCK(inp); final_cpl_received(toep); return (0); default: log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n", __func__, tid, tp->t_state); } done: INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return (0); } /* * Peer has ACK'd our FIN. */ static int do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp = NULL; struct socket *so = NULL; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_CLOSE_CON_RPL, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); CURVNET_SET(toep->vnet); INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = intotcpcb(inp); CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x", __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags); if (toep->flags & TPF_ABORT_SHUTDOWN) goto done; so = inp->inp_socket; tp->snd_una = be32toh(cpl->snd_nxt) - 1; /* exclude FIN */ switch (tp->t_state) { case TCPS_CLOSING: /* see TCPS_FIN_WAIT_2 in do_peer_close too */ tcp_twstart(tp); release: INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); INP_WLOCK(inp); final_cpl_received(toep); /* no more CPLs expected */ return (0); case TCPS_LAST_ACK: if (tcp_close(tp)) INP_WUNLOCK(inp); goto release; case TCPS_FIN_WAIT_1: if (so->so_rcv.sb_state & SBS_CANTRCVMORE) soisdisconnected(so); tp->t_state = TCPS_FIN_WAIT_2; break; default: log(LOG_ERR, "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n", __func__, tid, tcpstates[tp->t_state]); } done: INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return (0); } void send_abort_rpl(struct adapter *sc, struct sge_wrq *ofld_txq, int tid, int rst_status) { struct wrqe *wr; struct cpl_abort_rpl *cpl; wr = alloc_wrqe(sizeof(*cpl), ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } cpl = wrtod(wr); INIT_TP_WR_MIT_CPL(cpl, CPL_ABORT_RPL, tid); cpl->cmd = rst_status; t4_wrq_tx(sc, wr); } static int abort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason) { switch (abort_reason) { case CPL_ERR_BAD_SYN: case CPL_ERR_CONN_RESET: return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); case CPL_ERR_XMIT_TIMEDOUT: case CPL_ERR_PERSIST_TIMEDOUT: case CPL_ERR_FINWAIT2_TIMEDOUT: case CPL_ERR_KEEPALIVE_TIMEDOUT: return (ETIMEDOUT); default: return (EIO); } } /* * TCP RST from the peer, timeout, or some other such critical error. */ static int do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct sge_wrq *ofld_txq = toep->ofld_txq; struct inpcb *inp; struct tcpcb *tp; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_ABORT_REQ_RSS, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (toep->flags & TPF_SYNQE) return (do_abort_req_synqe(iq, rss, m)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); if (negative_advice(cpl->status)) { CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)", __func__, cpl->status, tid, toep->flags); return (0); /* Ignore negative advice */ } inp = toep->inp; CURVNET_SET(toep->vnet); INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */ INP_WLOCK(inp); tp = intotcpcb(inp); CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d", __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp->inp_flags, cpl->status); /* * If we'd initiated an abort earlier the reply to it is responsible for * cleaning up resources. Otherwise we tear everything down right here * right now. We owe the T4 a CPL_ABORT_RPL no matter what. */ if (toep->flags & TPF_ABORT_SHUTDOWN) { INP_WUNLOCK(inp); goto done; } toep->flags |= TPF_ABORT_SHUTDOWN; if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { struct socket *so = inp->inp_socket; if (so != NULL) so_error_set(so, abort_status_to_errno(tp, cpl->status)); tp = tcp_close(tp); if (tp == NULL) INP_WLOCK(inp); /* re-acquire */ } final_cpl_received(toep); done: INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); return (0); } /* * Reply to the CPL_ABORT_REQ (send_reset) */ static int do_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_ABORT_RPL_RSS, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (toep->flags & TPF_SYNQE) return (do_abort_rpl_synqe(iq, rss, m)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d", __func__, tid, toep, inp, cpl->status); KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, ("%s: wasn't expecting abort reply", __func__)); INP_WLOCK(inp); final_cpl_received(toep); return (0); } static int do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_rx_data *cpl = mtod(m, const void *); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp; struct socket *so; struct sockbuf *sb; int len; uint32_t ddp_placed = 0; if (__predict_false(toep->flags & TPF_SYNQE)) { #ifdef INVARIANTS struct synq_entry *synqe = (void *)toep; INP_WLOCK(synqe->lctx->inp); if (synqe->flags & TPF_SYNQE_HAS_L2TE) { KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, ("%s: listen socket closed but tid %u not aborted.", __func__, tid)); } else { /* * do_pass_accept_req is still running and will * eventually take care of this tid. */ } INP_WUNLOCK(synqe->lctx->inp); #endif CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, toep, toep->flags); m_freem(m); return (0); } KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); /* strip off CPL header */ m_adj(m, sizeof(*cpl)); len = m->m_pkthdr.len; INP_WLOCK(inp); if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", __func__, tid, len, inp->inp_flags); INP_WUNLOCK(inp); m_freem(m); return (0); } tp = intotcpcb(inp); if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq))) ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt; tp->rcv_nxt += len; if (tp->rcv_wnd < len) { KASSERT(toep->ulp_mode == ULP_MODE_RDMA, ("%s: negative window size", __func__)); } tp->rcv_wnd -= len; tp->t_rcvtime = ticks; if (toep->ulp_mode == ULP_MODE_TCPDDP) DDP_LOCK(toep); so = inp_inpcbtosocket(inp); sb = &so->so_rcv; SOCKBUF_LOCK(sb); if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) { CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)", __func__, tid, len); m_freem(m); SOCKBUF_UNLOCK(sb); if (toep->ulp_mode == ULP_MODE_TCPDDP) DDP_UNLOCK(toep); INP_WUNLOCK(inp); CURVNET_SET(toep->vnet); INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = tcp_drop(tp, ECONNRESET); if (tp) INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return (0); } /* receive buffer autosize */ MPASS(toep->vnet == so->so_vnet); CURVNET_SET(toep->vnet); if (sb->sb_flags & SB_AUTOSIZE && V_tcp_do_autorcvbuf && sb->sb_hiwat < V_tcp_autorcvbuf_max && len > (sbspace(sb) / 8 * 7)) { unsigned int hiwat = sb->sb_hiwat; unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max); if (!sbreserve_locked(sb, newsize, so, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; else toep->rx_credits += newsize - hiwat; } if (toep->ulp_mode == ULP_MODE_TCPDDP) { int changed = !(toep->ddp.flags & DDP_ON) ^ cpl->ddp_off; if (toep->ddp.waiting_count != 0 || toep->ddp.active_count != 0) CTR3(KTR_CXGBE, "%s: tid %u, non-ddp rx (%d bytes)", __func__, tid, len); if (changed) { if (toep->ddp.flags & DDP_SC_REQ) toep->ddp.flags ^= DDP_ON | DDP_SC_REQ; else { KASSERT(cpl->ddp_off == 1, ("%s: DDP switched on by itself.", __func__)); /* Fell out of DDP mode */ toep->ddp.flags &= ~DDP_ON; CTR1(KTR_CXGBE, "%s: fell out of DDP mode", __func__); insert_ddp_data(toep, ddp_placed); } } if (toep->ddp.flags & DDP_ON) { /* * CPL_RX_DATA with DDP on can only be an indicate. * Start posting queued AIO requests via DDP. The * payload that arrived in this indicate is appended * to the socket buffer as usual. */ handle_ddp_indicate(toep); } } KASSERT(toep->sb_cc >= sbused(sb), ("%s: sb %p has more data (%d) than last time (%d).", __func__, sb, sbused(sb), toep->sb_cc)); toep->rx_credits += toep->sb_cc - sbused(sb); sbappendstream_locked(sb, m, 0); toep->sb_cc = sbused(sb); if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) { int credits; credits = send_rx_credits(sc, toep, toep->rx_credits); toep->rx_credits -= credits; tp->rcv_wnd += credits; tp->rcv_adv += credits; } if (toep->ulp_mode == ULP_MODE_TCPDDP && toep->ddp.waiting_count > 0 && sbavail(sb) != 0) { CTR2(KTR_CXGBE, "%s: tid %u queueing AIO task", __func__, tid); ddp_queue_toep(toep); } sorwakeup_locked(so); SOCKBUF_UNLOCK_ASSERT(sb); if (toep->ulp_mode == ULP_MODE_TCPDDP) DDP_UNLOCK(toep); INP_WUNLOCK(inp); CURVNET_RESTORE(); return (0); } #define S_CPL_FW4_ACK_OPCODE 24 #define M_CPL_FW4_ACK_OPCODE 0xff #define V_CPL_FW4_ACK_OPCODE(x) ((x) << S_CPL_FW4_ACK_OPCODE) #define G_CPL_FW4_ACK_OPCODE(x) \ (((x) >> S_CPL_FW4_ACK_OPCODE) & M_CPL_FW4_ACK_OPCODE) #define S_CPL_FW4_ACK_FLOWID 0 #define M_CPL_FW4_ACK_FLOWID 0xffffff #define V_CPL_FW4_ACK_FLOWID(x) ((x) << S_CPL_FW4_ACK_FLOWID) #define G_CPL_FW4_ACK_FLOWID(x) \ (((x) >> S_CPL_FW4_ACK_FLOWID) & M_CPL_FW4_ACK_FLOWID) #define S_CPL_FW4_ACK_CR 24 #define M_CPL_FW4_ACK_CR 0xff #define V_CPL_FW4_ACK_CR(x) ((x) << S_CPL_FW4_ACK_CR) #define G_CPL_FW4_ACK_CR(x) (((x) >> S_CPL_FW4_ACK_CR) & M_CPL_FW4_ACK_CR) #define S_CPL_FW4_ACK_SEQVAL 0 #define M_CPL_FW4_ACK_SEQVAL 0x1 #define V_CPL_FW4_ACK_SEQVAL(x) ((x) << S_CPL_FW4_ACK_SEQVAL) #define G_CPL_FW4_ACK_SEQVAL(x) \ (((x) >> S_CPL_FW4_ACK_SEQVAL) & M_CPL_FW4_ACK_SEQVAL) #define F_CPL_FW4_ACK_SEQVAL V_CPL_FW4_ACK_SEQVAL(1U) static int do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp; struct tcpcb *tp; struct socket *so; uint8_t credits = cpl->credits; struct ofld_tx_sdesc *txsd; int plen; #ifdef INVARIANTS unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl))); #endif /* * Very unusual case: we'd sent a flowc + abort_req for a synq entry and * now this comes back carrying the credits for the flowc. */ if (__predict_false(toep->flags & TPF_SYNQE)) { KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, ("%s: credits for a synq entry %p", __func__, toep)); return (0); } inp = toep->inp; KASSERT(opcode == CPL_FW4_ACK, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); INP_WLOCK(inp); if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) { INP_WUNLOCK(inp); return (0); } KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0, ("%s: inp_flags 0x%x", __func__, inp->inp_flags)); tp = intotcpcb(inp); if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) { tcp_seq snd_una = be32toh(cpl->snd_una); #ifdef INVARIANTS if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { log(LOG_ERR, "%s: unexpected seq# %x for TID %u, snd_una %x\n", __func__, snd_una, toep->tid, tp->snd_una); } #endif if (tp->snd_una != snd_una) { tp->snd_una = snd_una; tp->ts_recent_age = tcp_ts_getticks(); } } #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: tid %d credits %u", __func__, tid, credits); #endif so = inp->inp_socket; txsd = &toep->txsd[toep->txsd_cidx]; plen = 0; while (credits) { KASSERT(credits >= txsd->tx_credits, ("%s: too many (or partial) credits", __func__)); credits -= txsd->tx_credits; toep->tx_credits += txsd->tx_credits; plen += txsd->plen; if (txsd->iv_buffer) { free(txsd->iv_buffer, M_CXGBE); txsd->iv_buffer = NULL; } txsd++; toep->txsd_avail++; KASSERT(toep->txsd_avail <= toep->txsd_total, ("%s: txsd avail > total", __func__)); if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) { txsd = &toep->txsd[0]; toep->txsd_cidx = 0; } } if (toep->tx_credits == toep->tx_total) { toep->tx_nocompl = 0; toep->plen_nocompl = 0; } if (toep->flags & TPF_TX_SUSPENDED && toep->tx_credits >= toep->tx_total / 4) { #ifdef VERBOSE_TRACES CTR2(KTR_CXGBE, "%s: tid %d calling t4_push_frames", __func__, tid); #endif toep->flags &= ~TPF_TX_SUSPENDED; CURVNET_SET(toep->vnet); if (toep->ulp_mode == ULP_MODE_ISCSI) t4_push_pdus(sc, toep, plen); else if (tls_tx_key(toep)) t4_push_tls_records(sc, toep, plen); else t4_push_frames(sc, toep, plen); CURVNET_RESTORE(); } else if (plen > 0) { struct sockbuf *sb = &so->so_snd; int sbu; SOCKBUF_LOCK(sb); sbu = sbused(sb); if (toep->ulp_mode == ULP_MODE_ISCSI) { if (__predict_false(sbu > 0)) { /* * The data trasmitted before the tid's ULP mode * changed to ISCSI is still in so_snd. * Incoming credits should account for so_snd * first. */ sbdrop_locked(sb, min(sbu, plen)); plen -= min(sbu, plen); } sowwakeup_locked(so); /* unlocks so_snd */ rqdrop_locked(&toep->ulp_pdu_reclaimq, plen); } else { #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: tid %d dropped %d bytes", __func__, tid, plen); #endif sbdrop_locked(sb, plen); if (tls_tx_key(toep)) { struct tls_ofld_info *tls_ofld = &toep->tls; MPASS(tls_ofld->sb_off >= plen); tls_ofld->sb_off -= plen; } if (!TAILQ_EMPTY(&toep->aiotx_jobq)) t4_aiotx_queue_toep(toep); sowwakeup_locked(so); /* unlocks so_snd */ } SOCKBUF_UNLOCK_ASSERT(sb); } INP_WUNLOCK(inp); return (0); } void t4_set_tcb_field(struct adapter *sc, struct sge_wrq *wrq, struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val, int reply, int cookie) { struct wrqe *wr; struct cpl_set_tcb_field *req; struct ofld_tx_sdesc *txsd; MPASS((cookie & ~M_COOKIE) == 0); if (reply) { MPASS(cookie != CPL_COOKIE_RESERVED); } wr = alloc_wrqe(sizeof(*req), wrq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid); req->reply_ctrl = htobe16(V_QUEUENO(toep->ofld_rxq->iq.abs_id)); if (reply == 0) req->reply_ctrl |= htobe16(F_NO_REPLY); req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(cookie)); req->mask = htobe64(mask); req->val = htobe64(val); if ((wrq->eq.flags & EQ_TYPEMASK) == EQ_OFLD) { txsd = &toep->txsd[toep->txsd_pidx]; txsd->tx_credits = howmany(sizeof(*req), 16); txsd->plen = 0; KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, ("%s: not enough credits (%d)", __func__, toep->tx_credits)); toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; } t4_wrq_tx(sc, wr); } void t4_init_cpl_io_handlers(void) { t4_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); t4_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl); t4_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); - t4_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl); + t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl, + CPL_COOKIE_TOM); t4_register_cpl_handler(CPL_RX_DATA, do_rx_data); t4_register_cpl_handler(CPL_FW4_ACK, do_fw4_ack); } void t4_uninit_cpl_io_handlers(void) { t4_register_cpl_handler(CPL_PEER_CLOSE, NULL); t4_register_cpl_handler(CPL_CLOSE_CON_RPL, NULL); t4_register_cpl_handler(CPL_ABORT_REQ_RSS, NULL); t4_register_cpl_handler(CPL_ABORT_RPL_RSS, NULL); t4_register_cpl_handler(CPL_RX_DATA, NULL); t4_register_cpl_handler(CPL_FW4_ACK, NULL); } /* * Use the 'backend3' field in AIO jobs to store the amount of data * sent by the AIO job so far and the 'backend4' field to hold an * error that should be reported when the job is completed. */ #define aio_sent backend3 #define aio_error backend4 #define jobtotid(job) \ (((struct toepcb *)(so_sototcpcb((job)->fd_file->f_data)->t_toe))->tid) static void free_aiotx_buffer(struct aiotx_buffer *ab) { struct kaiocb *job; long status; int error; if (refcount_release(&ab->refcount) == 0) return; job = ab->job; error = job->aio_error; status = job->aio_sent; vm_page_unhold_pages(ab->ps.pages, ab->ps.npages); free(ab, M_CXGBE); #ifdef VERBOSE_TRACES CTR5(KTR_CXGBE, "%s: tid %d completed %p len %ld, error %d", __func__, jobtotid(job), job, status, error); #endif if (error == ECANCELED && status != 0) error = 0; if (error == ECANCELED) aio_cancel(job); else if (error) aio_complete(job, -1, error); else aio_complete(job, status, 0); } static void t4_aiotx_mbuf_free(struct mbuf *m) { struct aiotx_buffer *ab = m->m_ext.ext_arg1; #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: completed %d bytes for tid %d", __func__, m->m_len, jobtotid(ab->job)); #endif free_aiotx_buffer(ab); } /* * Hold the buffer backing an AIO request and return an AIO transmit * buffer. */ static int hold_aio(struct kaiocb *job) { struct aiotx_buffer *ab; struct vmspace *vm; vm_map_t map; vm_offset_t start, end, pgoff; int n; MPASS(job->backend1 == NULL); /* * The AIO subsystem will cancel and drain all requests before * permitting a process to exit or exec, so p_vmspace should * be stable here. */ vm = job->userproc->p_vmspace; map = &vm->vm_map; start = (uintptr_t)job->uaiocb.aio_buf; pgoff = start & PAGE_MASK; end = round_page(start + job->uaiocb.aio_nbytes); start = trunc_page(start); n = atop(end - start); ab = malloc(sizeof(*ab) + n * sizeof(vm_page_t), M_CXGBE, M_WAITOK | M_ZERO); refcount_init(&ab->refcount, 1); ab->ps.pages = (vm_page_t *)(ab + 1); ab->ps.npages = vm_fault_quick_hold_pages(map, start, end - start, VM_PROT_WRITE, ab->ps.pages, n); if (ab->ps.npages < 0) { free(ab, M_CXGBE); return (EFAULT); } KASSERT(ab->ps.npages == n, ("hold_aio: page count mismatch: %d vs %d", ab->ps.npages, n)); ab->ps.offset = pgoff; ab->ps.len = job->uaiocb.aio_nbytes; ab->job = job; job->backend1 = ab; #ifdef VERBOSE_TRACES CTR5(KTR_CXGBE, "%s: tid %d, new pageset %p for job %p, npages %d", __func__, jobtotid(job), &ab->ps, job, ab->ps.npages); #endif return (0); } static void t4_aiotx_process_job(struct toepcb *toep, struct socket *so, struct kaiocb *job) { struct adapter *sc; struct sockbuf *sb; struct file *fp; struct aiotx_buffer *ab; struct inpcb *inp; struct tcpcb *tp; struct mbuf *m; int error; bool moretocome, sendmore; sc = td_adapter(toep->td); sb = &so->so_snd; SOCKBUF_UNLOCK(sb); fp = job->fd_file; ab = job->backend1; m = NULL; #ifdef MAC error = mac_socket_check_send(fp->f_cred, so); if (error != 0) goto out; #endif if (ab == NULL) { error = hold_aio(job); if (error != 0) goto out; ab = job->backend1; } /* Inline sosend_generic(). */ job->msgsnd = 1; error = sblock(sb, SBL_WAIT); MPASS(error == 0); sendanother: m = m_get(M_WAITOK, MT_DATA); SOCKBUF_LOCK(sb); if (so->so_snd.sb_state & SBS_CANTSENDMORE) { SOCKBUF_UNLOCK(sb); sbunlock(sb); if ((so->so_options & SO_NOSIGPIPE) == 0) { PROC_LOCK(job->userproc); kern_psignal(job->userproc, SIGPIPE); PROC_UNLOCK(job->userproc); } error = EPIPE; goto out; } if (so->so_error) { error = so->so_error; so->so_error = 0; SOCKBUF_UNLOCK(sb); sbunlock(sb); goto out; } if ((so->so_state & SS_ISCONNECTED) == 0) { SOCKBUF_UNLOCK(sb); sbunlock(sb); error = ENOTCONN; goto out; } if (sbspace(sb) < sb->sb_lowat) { MPASS(job->aio_sent == 0 || !(so->so_state & SS_NBIO)); /* * Don't block if there is too little room in the socket * buffer. Instead, requeue the request. */ if (!aio_set_cancel_function(job, t4_aiotx_cancel)) { SOCKBUF_UNLOCK(sb); sbunlock(sb); error = ECANCELED; goto out; } TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list); SOCKBUF_UNLOCK(sb); sbunlock(sb); goto out; } /* * Write as much data as the socket permits, but no more than a * a single sndbuf at a time. */ m->m_len = sbspace(sb); if (m->m_len > ab->ps.len - job->aio_sent) { m->m_len = ab->ps.len - job->aio_sent; moretocome = false; } else moretocome = true; if (m->m_len > sc->tt.sndbuf) { m->m_len = sc->tt.sndbuf; sendmore = true; } else sendmore = false; if (!TAILQ_EMPTY(&toep->aiotx_jobq)) moretocome = true; SOCKBUF_UNLOCK(sb); MPASS(m->m_len != 0); /* Inlined tcp_usr_send(). */ inp = toep->inp; INP_WLOCK(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { INP_WUNLOCK(inp); sbunlock(sb); error = ECONNRESET; goto out; } refcount_acquire(&ab->refcount); m_extadd(m, NULL, ab->ps.len, t4_aiotx_mbuf_free, ab, (void *)(uintptr_t)job->aio_sent, 0, EXT_NET_DRV); m->m_ext.ext_flags |= EXT_FLAG_AIOTX; job->aio_sent += m->m_len; sbappendstream(sb, m, 0); m = NULL; if (!(inp->inp_flags & INP_DROPPED)) { tp = intotcpcb(inp); if (moretocome) tp->t_flags |= TF_MORETOCOME; error = tp->t_fb->tfb_tcp_output(tp); if (moretocome) tp->t_flags &= ~TF_MORETOCOME; } INP_WUNLOCK(inp); if (sendmore) goto sendanother; sbunlock(sb); if (error) goto out; /* * If this is a non-blocking socket and the request has not * been fully completed, requeue it until the socket is ready * again. */ if (job->aio_sent < job->uaiocb.aio_nbytes && !(so->so_state & SS_NBIO)) { SOCKBUF_LOCK(sb); if (!aio_set_cancel_function(job, t4_aiotx_cancel)) { SOCKBUF_UNLOCK(sb); error = ECANCELED; goto out; } TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list); return; } /* * If the request will not be requeued, drop a reference on * the aiotx buffer. Any mbufs in flight should still * contain a reference, but this drops the reference that the * job owns while it is waiting to queue mbufs to the socket. */ free_aiotx_buffer(ab); out: if (error) { if (ab != NULL) { job->aio_error = error; free_aiotx_buffer(ab); } else { MPASS(job->aio_sent == 0); aio_complete(job, -1, error); } } if (m != NULL) m_free(m); SOCKBUF_LOCK(sb); } static void t4_aiotx_task(void *context, int pending) { struct toepcb *toep = context; struct inpcb *inp = toep->inp; struct socket *so = inp->inp_socket; struct kaiocb *job; CURVNET_SET(toep->vnet); SOCKBUF_LOCK(&so->so_snd); while (!TAILQ_EMPTY(&toep->aiotx_jobq) && sowriteable(so)) { job = TAILQ_FIRST(&toep->aiotx_jobq); TAILQ_REMOVE(&toep->aiotx_jobq, job, list); if (!aio_clear_cancel_function(job)) continue; t4_aiotx_process_job(toep, so, job); } toep->aiotx_task_active = false; SOCKBUF_UNLOCK(&so->so_snd); CURVNET_RESTORE(); free_toepcb(toep); } static void t4_aiotx_queue_toep(struct toepcb *toep) { SOCKBUF_LOCK_ASSERT(&toep->inp->inp_socket->so_snd); #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: queueing aiotx task for tid %d, active = %s", __func__, toep->tid, toep->aiotx_task_active ? "true" : "false"); #endif if (toep->aiotx_task_active) return; toep->aiotx_task_active = true; hold_toepcb(toep); soaio_enqueue(&toep->aiotx_task); } static void t4_aiotx_cancel(struct kaiocb *job) { struct aiotx_buffer *ab; struct socket *so; struct sockbuf *sb; struct tcpcb *tp; struct toepcb *toep; so = job->fd_file->f_data; tp = so_sototcpcb(so); toep = tp->t_toe; MPASS(job->uaiocb.aio_lio_opcode == LIO_WRITE); sb = &so->so_snd; SOCKBUF_LOCK(sb); if (!aio_cancel_cleared(job)) TAILQ_REMOVE(&toep->aiotx_jobq, job, list); SOCKBUF_UNLOCK(sb); ab = job->backend1; if (ab != NULL) free_aiotx_buffer(ab); else aio_cancel(job); } int t4_aio_queue_aiotx(struct socket *so, struct kaiocb *job) { struct tcpcb *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; struct adapter *sc = td_adapter(toep->td); /* This only handles writes. */ if (job->uaiocb.aio_lio_opcode != LIO_WRITE) return (EOPNOTSUPP); if (!sc->tt.tx_zcopy) return (EOPNOTSUPP); if (tls_tx_key(toep)) return (EOPNOTSUPP); SOCKBUF_LOCK(&so->so_snd); #ifdef VERBOSE_TRACES CTR2(KTR_CXGBE, "%s: queueing %p", __func__, job); #endif if (!aio_set_cancel_function(job, t4_aiotx_cancel)) panic("new job was cancelled"); TAILQ_INSERT_TAIL(&toep->aiotx_jobq, job, list); if (sowriteable(so)) t4_aiotx_queue_toep(toep); SOCKBUF_UNLOCK(&so->so_snd); return (0); } void aiotx_init_toep(struct toepcb *toep) { TAILQ_INIT(&toep->aiotx_jobq); TASK_INIT(&toep->aiotx_task, 0, t4_aiotx_task, toep); } #endif