Index: projects/cxl_iscsi/sys/dev/cxgbe/adapter.h =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/adapter.h (revision 279896) +++ projects/cxl_iscsi/sys/dev/cxgbe/adapter.h (revision 279897) @@ -1,1081 +1,1081 @@ /*- * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef __T4_ADAPTER_H__ #define __T4_ADAPTER_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "offload.h" #include "common/t4_msg.h" #include "firmware/t4fw_interface.h" #define KTR_CXGBE KTR_SPARE3 MALLOC_DECLARE(M_CXGBE); #define CXGBE_UNIMPLEMENTED(s) \ panic("%s (%s, line %d) not implemented yet.", s, __FILE__, __LINE__) #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } #else #define prefetch(x) #endif #ifndef SYSCTL_ADD_UQUAD #define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD #define sysctl_handle_64 sysctl_handle_quad #define CTLTYPE_U64 CTLTYPE_QUAD #endif #if (__FreeBSD_version >= 900030) || \ ((__FreeBSD_version >= 802507) && (__FreeBSD_version < 900000)) #define SBUF_DRAIN 1 #endif #ifdef __amd64__ /* XXX: need systemwide bus_space_read_8/bus_space_write_8 */ static __inline uint64_t t4_bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { KASSERT(tag == X86_BUS_SPACE_MEM, ("%s: can only handle mem space", __func__)); return (*(volatile uint64_t *)(handle + offset)); } static __inline void t4_bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, uint64_t value) { KASSERT(tag == X86_BUS_SPACE_MEM, ("%s: can only handle mem space", __func__)); *(volatile uint64_t *)(bsh + offset) = value; } #else static __inline uint64_t t4_bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { return (uint64_t)bus_space_read_4(tag, handle, offset) + ((uint64_t)bus_space_read_4(tag, handle, offset + 4) << 32); } static __inline void t4_bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, uint64_t value) { bus_space_write_4(tag, bsh, offset, value); bus_space_write_4(tag, bsh, offset + 4, value >> 32); } #endif struct adapter; typedef struct adapter adapter_t; enum { /* * All ingress queues use this entry size. Note that the firmware event * queue and any iq expecting CPL_RX_PKT in the descriptor needs this to * be at least 64. */ IQ_ESIZE = 64, /* Default queue sizes for all kinds of ingress queues */ FW_IQ_QSIZE = 256, RX_IQ_QSIZE = 1024, /* All egress queues use this entry size */ EQ_ESIZE = 64, /* Default queue sizes for all kinds of egress queues */ CTRL_EQ_QSIZE = 128, TX_EQ_QSIZE = 1024, #if MJUMPAGESIZE != MCLBYTES SW_ZONE_SIZES = 4, /* cluster, jumbop, jumbo9k, jumbo16k */ #else SW_ZONE_SIZES = 3, /* cluster, jumbo9k, jumbo16k */ #endif CL_METADATA_SIZE = CACHE_LINE_SIZE, SGE_MAX_WR_NDESC = SGE_MAX_WR_LEN / EQ_ESIZE, /* max WR size in desc */ TX_SGL_SEGS = 39, TX_SGL_SEGS_TSO = 38, TX_WR_FLITS = SGE_MAX_WR_LEN / 8 }; enum { /* adapter intr_type */ INTR_INTX = (1 << 0), INTR_MSI = (1 << 1), INTR_MSIX = (1 << 2) }; enum { XGMAC_MTU = (1 << 0), XGMAC_PROMISC = (1 << 1), XGMAC_ALLMULTI = (1 << 2), XGMAC_VLANEX = (1 << 3), XGMAC_UCADDR = (1 << 4), XGMAC_MCADDRS = (1 << 5), XGMAC_ALL = 0xffff }; enum { /* flags understood by begin_synchronized_op */ HOLD_LOCK = (1 << 0), SLEEP_OK = (1 << 1), INTR_OK = (1 << 2), /* flags understood by end_synchronized_op */ LOCK_HELD = HOLD_LOCK, }; enum { /* adapter flags */ FULL_INIT_DONE = (1 << 0), FW_OK = (1 << 1), /* INTR_DIRECT = (1 << 2), No longer used. */ MASTER_PF = (1 << 3), ADAP_SYSCTL_CTX = (1 << 4), /* TOM_INIT_DONE= (1 << 5), No longer used */ BUF_PACKING_OK = (1 << 6), CXGBE_BUSY = (1 << 9), /* port flags */ DOOMED = (1 << 0), PORT_INIT_DONE = (1 << 1), PORT_SYSCTL_CTX = (1 << 2), HAS_TRACEQ = (1 << 3), INTR_RXQ = (1 << 4), /* All NIC rxq's take interrupts */ INTR_OFLD_RXQ = (1 << 5), /* All TOE rxq's take interrupts */ INTR_NM_RXQ = (1 << 6), /* All netmap rxq's take interrupts */ INTR_ALL = (INTR_RXQ | INTR_OFLD_RXQ | INTR_NM_RXQ), }; #define IS_DOOMED(pi) ((pi)->flags & DOOMED) #define SET_DOOMED(pi) do {(pi)->flags |= DOOMED;} while (0) #define IS_BUSY(sc) ((sc)->flags & CXGBE_BUSY) #define SET_BUSY(sc) do {(sc)->flags |= CXGBE_BUSY;} while (0) #define CLR_BUSY(sc) do {(sc)->flags &= ~CXGBE_BUSY;} while (0) struct port_info { device_t dev; struct adapter *adapter; struct ifnet *ifp; struct ifmedia media; struct mtx pi_lock; char lockname[16]; unsigned long flags; int if_flags; uint16_t *rss; uint16_t viid; int16_t xact_addr_filt;/* index of exact MAC address filter */ uint16_t rss_size; /* size of VI's RSS table slice */ uint8_t lport; /* associated offload logical port */ int8_t mdio_addr; uint8_t port_type; uint8_t mod_type; uint8_t port_id; uint8_t tx_chan; uint8_t rx_chan_map; /* rx MPS channel bitmap */ /* These need to be int as they are used in sysctl */ int ntxq; /* # of tx queues */ int first_txq; /* index of first tx queue */ int rsrv_noflowq; /* Reserve queue 0 for non-flowid packets */ int nrxq; /* # of rx queues */ int first_rxq; /* index of first rx queue */ #ifdef TCP_OFFLOAD int nofldtxq; /* # of offload tx queues */ int first_ofld_txq; /* index of first offload tx queue */ int nofldrxq; /* # of offload rx queues */ int first_ofld_rxq; /* index of first offload rx queue */ #endif #ifdef DEV_NETMAP int nnmtxq; /* # of netmap tx queues */ int first_nm_txq; /* index of first netmap tx queue */ int nnmrxq; /* # of netmap rx queues */ int first_nm_rxq; /* index of first netmap rx queue */ struct ifnet *nm_ifp; struct ifmedia nm_media; int nmif_flags; uint16_t nm_viid; int16_t nm_xact_addr_filt; uint16_t nm_rss_size; /* size of netmap VI's RSS table slice */ #endif int tmr_idx; int pktc_idx; int qsize_rxq; int qsize_txq; int linkdnrc; struct link_config link_cfg; struct timeval last_refreshed; struct port_stats stats; u_int tnl_cong_drops; u_int tx_parse_error; eventhandler_tag vlan_c; struct callout tick; struct sysctl_ctx_list ctx; /* from ifconfig up to driver detach */ uint8_t hw_addr[ETHER_ADDR_LEN]; /* factory MAC address, won't change */ }; /* Where the cluster came from, how it has been carved up. */ struct cluster_layout { int8_t zidx; int8_t hwidx; uint16_t region1; /* mbufs laid out within this region */ /* region2 is the DMA region */ uint16_t region3; /* cluster_metadata within this region */ }; struct cluster_metadata { u_int refcount; #ifdef INVARIANTS struct fl_sdesc *sd; /* For debug only. Could easily be stale */ #endif }; struct fl_sdesc { caddr_t cl; uint16_t nmbuf; /* # of driver originated mbufs with ref on cluster */ struct cluster_layout cll; }; struct tx_desc { __be64 flit[8]; }; struct tx_sdesc { struct mbuf *m; /* m_nextpkt linked chain of frames */ uint8_t desc_used; /* # of hardware descriptors used by the WR */ }; #define IQ_PAD (IQ_ESIZE - sizeof(struct rsp_ctrl) - sizeof(struct rss_header)) struct iq_desc { struct rss_header rss; uint8_t cpl[IQ_PAD]; struct rsp_ctrl rsp; }; #undef IQ_PAD CTASSERT(sizeof(struct iq_desc) == IQ_ESIZE); enum { /* iq flags */ IQ_ALLOCATED = (1 << 0), /* firmware resources allocated */ IQ_HAS_FL = (1 << 1), /* iq associated with a freelist */ IQ_INTR = (1 << 2), /* iq takes direct interrupt */ IQ_LRO_ENABLED = (1 << 3), /* iq is an eth rxq with LRO enabled */ /* iq state */ IQS_DISABLED = 0, IQS_BUSY = 1, IQS_IDLE = 2, }; /* * Ingress Queue: T4 is producer, driver is consumer. */ struct sge_iq { uint32_t flags; volatile int state; struct adapter *adapter; struct iq_desc *desc; /* KVA of descriptor ring */ int8_t intr_pktc_idx; /* packet count threshold index */ uint8_t gen; /* generation bit */ uint8_t intr_params; /* interrupt holdoff parameters */ uint8_t intr_next; /* XXX: holdoff for next interrupt */ uint16_t qsize; /* size (# of entries) of the queue */ uint16_t sidx; /* index of the entry with the status page */ uint16_t cidx; /* consumer index */ uint16_t cntxt_id; /* SGE context id for the iq */ uint16_t abs_id; /* absolute SGE id for the iq */ STAILQ_ENTRY(sge_iq) link; bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_addr_t ba; /* bus address of descriptor ring */ }; enum { EQ_CTRL = 1, EQ_ETH = 2, EQ_OFLD = 3, /* eq flags */ EQ_TYPEMASK = 0x3, /* 2 lsbits hold the type (see above) */ EQ_ALLOCATED = (1 << 2), /* firmware resources allocated */ EQ_ENABLED = (1 << 3), /* open for business */ }; /* Listed in order of preference. Update t4_sysctls too if you change these */ enum {DOORBELL_UDB, DOORBELL_WCWR, DOORBELL_UDBWC, DOORBELL_KDB}; /* * Egress Queue: driver is producer, T4 is consumer. * * Note: A free list is an egress queue (driver produces the buffers and T4 * consumes them) but it's special enough to have its own struct (see sge_fl). */ struct sge_eq { unsigned int flags; /* MUST be first */ unsigned int cntxt_id; /* SGE context id for the eq */ struct mtx eq_lock; struct tx_desc *desc; /* KVA of descriptor ring */ uint16_t doorbells; volatile uint32_t *udb; /* KVA of doorbell (lies within BAR2) */ u_int udb_qid; /* relative qid within the doorbell page */ uint16_t sidx; /* index of the entry with the status page */ uint16_t cidx; /* consumer idx (desc idx) */ uint16_t pidx; /* producer idx (desc idx) */ uint16_t equeqidx; /* EQUEQ last requested at this pidx */ uint16_t dbidx; /* pidx of the most recent doorbell */ uint16_t iqid; /* iq that gets egr_update for the eq */ uint8_t tx_chan; /* tx channel used by the eq */ volatile u_int equiq; /* EQUIQ outstanding */ bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_addr_t ba; /* bus address of descriptor ring */ char lockname[16]; }; struct sw_zone_info { uma_zone_t zone; /* zone that this cluster comes from */ int size; /* size of cluster: 2K, 4K, 9K, 16K, etc. */ int type; /* EXT_xxx type of the cluster */ int8_t head_hwidx; int8_t tail_hwidx; }; struct hw_buf_info { int8_t zidx; /* backpointer to zone; -ve means unused */ int8_t next; /* next hwidx for this zone; -1 means no more */ int size; }; enum { FL_STARVING = (1 << 0), /* on the adapter's list of starving fl's */ FL_DOOMED = (1 << 1), /* about to be destroyed */ FL_BUF_PACKING = (1 << 2), /* buffer packing enabled */ FL_BUF_RESUME = (1 << 3), /* resume from the middle of the frame */ }; #define FL_RUNNING_LOW(fl) \ (IDXDIFF(fl->dbidx * 8, fl->cidx, fl->sidx * 8) <= fl->lowat) #define FL_NOT_RUNNING_LOW(fl) \ (IDXDIFF(fl->dbidx * 8, fl->cidx, fl->sidx * 8) >= 2 * fl->lowat) struct sge_fl { struct mtx fl_lock; __be64 *desc; /* KVA of descriptor ring, ptr to addresses */ struct fl_sdesc *sdesc; /* KVA of software descriptor ring */ struct cluster_layout cll_def; /* default refill zone, layout */ uint16_t lowat; /* # of buffers <= this means fl needs help */ int flags; uint16_t buf_boundary; /* The 16b idx all deal with hw descriptors */ uint16_t dbidx; /* hw pidx after last doorbell */ uint16_t sidx; /* index of status page */ volatile uint16_t hw_cidx; /* The 32b idx are all buffer idx, not hardware descriptor idx */ uint32_t cidx; /* consumer index */ uint32_t pidx; /* producer index */ uint32_t dbval; u_int rx_offset; /* offset in fl buf (when buffer packing) */ volatile uint32_t *udb; uint64_t mbuf_allocated;/* # of mbuf allocated from zone_mbuf */ uint64_t mbuf_inlined; /* # of mbuf created within clusters */ uint64_t cl_allocated; /* # of clusters allocated */ uint64_t cl_recycled; /* # of clusters recycled */ uint64_t cl_fast_recycled; /* # of clusters recycled (fast) */ /* These 3 are valid when FL_BUF_RESUME is set, stale otherwise. */ struct mbuf *m0; struct mbuf **pnext; u_int remaining; uint16_t qsize; /* # of hw descriptors (status page included) */ uint16_t cntxt_id; /* SGE context id for the freelist */ TAILQ_ENTRY(sge_fl) link; /* All starving freelists */ bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; char lockname[16]; bus_addr_t ba; /* bus address of descriptor ring */ struct cluster_layout cll_alt; /* alternate refill zone, layout */ }; struct mp_ring; /* txq: SGE egress queue + what's needed for Ethernet NIC */ struct sge_txq { struct sge_eq eq; /* MUST be first */ struct ifnet *ifp; /* the interface this txq belongs to */ struct mp_ring *r; /* tx software ring */ struct tx_sdesc *sdesc; /* KVA of software descriptor ring */ struct sglist *gl; __be32 cpl_ctrl0; /* for convenience */ struct task tx_reclaim_task; /* stats for common events first */ uint64_t txcsum; /* # of times hardware assisted with checksum */ uint64_t tso_wrs; /* # of TSO work requests */ uint64_t vlan_insertion;/* # of times VLAN tag was inserted */ uint64_t imm_wrs; /* # of work requests with immediate data */ uint64_t sgl_wrs; /* # of work requests with direct SGL */ uint64_t txpkt_wrs; /* # of txpkt work requests (not coalesced) */ uint64_t txpkts0_wrs; /* # of type0 coalesced tx work requests */ uint64_t txpkts1_wrs; /* # of type1 coalesced tx work requests */ uint64_t txpkts0_pkts; /* # of frames in type0 coalesced tx WRs */ uint64_t txpkts1_pkts; /* # of frames in type1 coalesced tx WRs */ /* stats for not-that-common events */ } __aligned(CACHE_LINE_SIZE); /* rxq: SGE ingress queue + SGE free list + miscellaneous items */ struct sge_rxq { struct sge_iq iq; /* MUST be first */ struct sge_fl fl; /* MUST follow iq */ struct ifnet *ifp; /* the interface this rxq belongs to */ #if defined(INET) || defined(INET6) struct lro_ctrl lro; /* LRO state */ #endif /* stats for common events first */ uint64_t rxcsum; /* # of times hardware assisted with checksum */ uint64_t vlan_extraction;/* # of times VLAN tag was extracted */ /* stats for not-that-common events */ } __aligned(CACHE_LINE_SIZE); static inline struct sge_rxq * iq_to_rxq(struct sge_iq *iq) { return (__containerof(iq, struct sge_rxq, iq)); } #ifdef TCP_OFFLOAD /* ofld_rxq: SGE ingress queue + SGE free list + miscellaneous items */ struct sge_ofld_rxq { struct sge_iq iq; /* MUST be first */ struct sge_fl fl; /* MUST follow iq */ } __aligned(CACHE_LINE_SIZE); static inline struct sge_ofld_rxq * iq_to_ofld_rxq(struct sge_iq *iq) { return (__containerof(iq, struct sge_ofld_rxq, iq)); } #endif struct wrqe { STAILQ_ENTRY(wrqe) link; struct sge_wrq *wrq; int wr_len; char wr[] __aligned(16); }; struct wrq_cookie { TAILQ_ENTRY(wrq_cookie) link; int ndesc; int pidx; }; /* * wrq: SGE egress queue that is given prebuilt work requests. Both the control * and offload tx queues are of this type. */ struct sge_wrq { struct sge_eq eq; /* MUST be first */ struct adapter *adapter; struct task wrq_tx_task; /* Tx desc reserved but WR not "committed" yet. */ TAILQ_HEAD(wrq_incomplete_wrs , wrq_cookie) incomplete_wrs; /* List of WRs ready to go out as soon as descriptors are available. */ STAILQ_HEAD(, wrqe) wr_list; u_int nwr_pending; u_int ndesc_needed; /* stats for common events first */ uint64_t tx_wrs_direct; /* # of WRs written directly to desc ring. */ uint64_t tx_wrs_ss; /* # of WRs copied from scratch space. */ uint64_t tx_wrs_copied; /* # of WRs queued and copied to desc ring. */ /* stats for not-that-common events */ /* * Scratch space for work requests that wrap around after reaching the * status page, and some infomation about the last WR that used it. */ uint16_t ss_pidx; uint16_t ss_len; uint8_t ss[SGE_MAX_WR_LEN]; } __aligned(CACHE_LINE_SIZE); #ifdef DEV_NETMAP struct sge_nm_rxq { struct port_info *pi; struct iq_desc *iq_desc; uint16_t iq_abs_id; uint16_t iq_cntxt_id; uint16_t iq_cidx; uint16_t iq_sidx; uint8_t iq_gen; __be64 *fl_desc; uint16_t fl_cntxt_id; uint32_t fl_cidx; uint32_t fl_pidx; uint32_t fl_sidx; uint32_t fl_db_val; u_int fl_hwidx:4; u_int nid; /* netmap ring # for this queue */ /* infrequently used items after this */ bus_dma_tag_t iq_desc_tag; bus_dmamap_t iq_desc_map; bus_addr_t iq_ba; int intr_idx; bus_dma_tag_t fl_desc_tag; bus_dmamap_t fl_desc_map; bus_addr_t fl_ba; } __aligned(CACHE_LINE_SIZE); struct sge_nm_txq { struct tx_desc *desc; uint16_t cidx; uint16_t pidx; uint16_t sidx; uint16_t equiqidx; /* EQUIQ last requested at this pidx */ uint16_t equeqidx; /* EQUEQ last requested at this pidx */ uint16_t dbidx; /* pidx of the most recent doorbell */ uint16_t doorbells; volatile uint32_t *udb; u_int udb_qid; u_int cntxt_id; __be32 cpl_ctrl0; /* for convenience */ u_int nid; /* netmap ring # for this queue */ /* infrequently used items after this */ bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_addr_t ba; int iqidx; } __aligned(CACHE_LINE_SIZE); #endif struct sge { int timer_val[SGE_NTIMERS]; int counter_val[SGE_NCOUNTERS]; int fl_starve_threshold; int fl_starve_threshold2; int eq_s_qpp; int iq_s_qpp; int nrxq; /* total # of Ethernet rx queues */ int ntxq; /* total # of Ethernet tx tx queues */ #ifdef TCP_OFFLOAD int nofldrxq; /* total # of TOE rx queues */ int nofldtxq; /* total # of TOE tx queues */ #endif #ifdef DEV_NETMAP int nnmrxq; /* total # of netmap rx queues */ int nnmtxq; /* total # of netmap tx queues */ #endif int niq; /* total # of ingress queues */ int neq; /* total # of egress queues */ struct sge_iq fwq; /* Firmware event queue */ struct sge_wrq mgmtq; /* Management queue (control queue) */ struct sge_wrq *ctrlq; /* Control queues */ struct sge_txq *txq; /* NIC tx queues */ struct sge_rxq *rxq; /* NIC rx queues */ #ifdef TCP_OFFLOAD struct sge_wrq *ofld_txq; /* TOE tx queues */ struct sge_ofld_rxq *ofld_rxq; /* TOE rx queues */ #endif #ifdef DEV_NETMAP struct sge_nm_txq *nm_txq; /* netmap tx queues */ struct sge_nm_rxq *nm_rxq; /* netmap rx queues */ #endif uint16_t iq_start; int eq_start; struct sge_iq **iqmap; /* iq->cntxt_id to iq mapping */ struct sge_eq **eqmap; /* eq->cntxt_id to eq mapping */ int pad_boundary; int pack_boundary; int8_t safe_hwidx1; /* may not have room for metadata */ int8_t safe_hwidx2; /* with room for metadata and maybe more */ struct sw_zone_info sw_zone_info[SW_ZONE_SIZES]; struct hw_buf_info hw_buf_info[SGE_FLBUF_SIZES]; }; struct rss_header; typedef int (*cpl_handler_t)(struct sge_iq *, const struct rss_header *, struct mbuf *); typedef int (*an_handler_t)(struct sge_iq *, const struct rsp_ctrl *); typedef int (*fw_msg_handler_t)(struct adapter *, const __be64 *); struct adapter { SLIST_ENTRY(adapter) link; device_t dev; struct cdev *cdev; /* PCIe register resources */ int regs_rid; struct resource *regs_res; int msix_rid; struct resource *msix_res; bus_space_handle_t bh; bus_space_tag_t bt; bus_size_t mmio_len; int udbs_rid; struct resource *udbs_res; volatile uint8_t *udbs_base; unsigned int pf; unsigned int mbox; /* Interrupt information */ int intr_type; int intr_count; struct irq { struct resource *res; int rid; void *tag; } *irq; bus_dma_tag_t dmat; /* Parent DMA tag */ struct sge sge; int lro_timeout; struct taskqueue *tq[NCHAN]; /* General purpose taskqueues */ struct port_info *port[MAX_NPORTS]; uint8_t chan_map[NCHAN]; #ifdef TCP_OFFLOAD void *tom_softc; /* (struct tom_data *) */ struct tom_tunables tt; void *iwarp_softc; /* (struct c4iw_dev *) */ - void *iscsi_softc; + void *iscsi_softc; /* (struct cxgbei_data *) */ #endif struct l2t_data *l2t; /* L2 table */ struct tid_info tids; uint16_t doorbells; int open_device_map; #ifdef TCP_OFFLOAD int offload_map; /* ports with IFCAP_TOE enabled */ int active_ulds; /* ULDs activated on this adapter */ #endif int flags; char ifp_lockname[16]; struct mtx ifp_lock; struct ifnet *ifp; /* tracer ifp */ struct ifmedia media; int traceq; /* iq used by all tracers, -1 if none */ int tracer_valid; /* bitmap of valid tracers */ int tracer_enabled; /* bitmap of enabled tracers */ char fw_version[32]; char cfg_file[32]; u_int cfcsum; struct adapter_params params; struct t4_virt_res vres; uint16_t linkcaps; uint16_t niccaps; uint16_t toecaps; uint16_t rdmacaps; uint16_t iscsicaps; uint16_t fcoecaps; struct sysctl_ctx_list ctx; /* from adapter_full_init to full_uninit */ struct mtx sc_lock; char lockname[16]; /* Starving free lists */ struct mtx sfl_lock; /* same cache-line as sc_lock? but that's ok */ TAILQ_HEAD(, sge_fl) sfl; struct callout sfl_callout; struct mtx regwin_lock; /* for indirect reads and memory windows */ an_handler_t an_handler __aligned(CACHE_LINE_SIZE); fw_msg_handler_t fw_msg_handler[5]; /* NUM_FW6_TYPES */ cpl_handler_t cpl_handler[0xef]; /* NUM_CPL_CMDS */ #ifdef INVARIANTS const char *last_op; const void *last_op_thr; #endif int sc_do_rxcopy; }; #define ADAPTER_LOCK(sc) mtx_lock(&(sc)->sc_lock) #define ADAPTER_UNLOCK(sc) mtx_unlock(&(sc)->sc_lock) #define ADAPTER_LOCK_ASSERT_OWNED(sc) mtx_assert(&(sc)->sc_lock, MA_OWNED) #define ADAPTER_LOCK_ASSERT_NOTOWNED(sc) mtx_assert(&(sc)->sc_lock, MA_NOTOWNED) #define ASSERT_SYNCHRONIZED_OP(sc) \ KASSERT(IS_BUSY(sc) && \ (mtx_owned(&(sc)->sc_lock) || sc->last_op_thr == curthread), \ ("%s: operation not synchronized.", __func__)) #define PORT_LOCK(pi) mtx_lock(&(pi)->pi_lock) #define PORT_UNLOCK(pi) mtx_unlock(&(pi)->pi_lock) #define PORT_LOCK_ASSERT_OWNED(pi) mtx_assert(&(pi)->pi_lock, MA_OWNED) #define PORT_LOCK_ASSERT_NOTOWNED(pi) mtx_assert(&(pi)->pi_lock, MA_NOTOWNED) #define FL_LOCK(fl) mtx_lock(&(fl)->fl_lock) #define FL_TRYLOCK(fl) mtx_trylock(&(fl)->fl_lock) #define FL_UNLOCK(fl) mtx_unlock(&(fl)->fl_lock) #define FL_LOCK_ASSERT_OWNED(fl) mtx_assert(&(fl)->fl_lock, MA_OWNED) #define FL_LOCK_ASSERT_NOTOWNED(fl) mtx_assert(&(fl)->fl_lock, MA_NOTOWNED) #define RXQ_FL_LOCK(rxq) FL_LOCK(&(rxq)->fl) #define RXQ_FL_UNLOCK(rxq) FL_UNLOCK(&(rxq)->fl) #define RXQ_FL_LOCK_ASSERT_OWNED(rxq) FL_LOCK_ASSERT_OWNED(&(rxq)->fl) #define RXQ_FL_LOCK_ASSERT_NOTOWNED(rxq) FL_LOCK_ASSERT_NOTOWNED(&(rxq)->fl) #define EQ_LOCK(eq) mtx_lock(&(eq)->eq_lock) #define EQ_TRYLOCK(eq) mtx_trylock(&(eq)->eq_lock) #define EQ_UNLOCK(eq) mtx_unlock(&(eq)->eq_lock) #define EQ_LOCK_ASSERT_OWNED(eq) mtx_assert(&(eq)->eq_lock, MA_OWNED) #define EQ_LOCK_ASSERT_NOTOWNED(eq) mtx_assert(&(eq)->eq_lock, MA_NOTOWNED) #define TXQ_LOCK(txq) EQ_LOCK(&(txq)->eq) #define TXQ_TRYLOCK(txq) EQ_TRYLOCK(&(txq)->eq) #define TXQ_UNLOCK(txq) EQ_UNLOCK(&(txq)->eq) #define TXQ_LOCK_ASSERT_OWNED(txq) EQ_LOCK_ASSERT_OWNED(&(txq)->eq) #define TXQ_LOCK_ASSERT_NOTOWNED(txq) EQ_LOCK_ASSERT_NOTOWNED(&(txq)->eq) #define for_each_txq(pi, iter, q) \ for (q = &pi->adapter->sge.txq[pi->first_txq], iter = 0; \ iter < pi->ntxq; ++iter, ++q) #define for_each_rxq(pi, iter, q) \ for (q = &pi->adapter->sge.rxq[pi->first_rxq], iter = 0; \ iter < pi->nrxq; ++iter, ++q) #define for_each_ofld_txq(pi, iter, q) \ for (q = &pi->adapter->sge.ofld_txq[pi->first_ofld_txq], iter = 0; \ iter < pi->nofldtxq; ++iter, ++q) #define for_each_ofld_rxq(pi, iter, q) \ for (q = &pi->adapter->sge.ofld_rxq[pi->first_ofld_rxq], iter = 0; \ iter < pi->nofldrxq; ++iter, ++q) #define for_each_nm_txq(pi, iter, q) \ for (q = &pi->adapter->sge.nm_txq[pi->first_nm_txq], iter = 0; \ iter < pi->nnmtxq; ++iter, ++q) #define for_each_nm_rxq(pi, iter, q) \ for (q = &pi->adapter->sge.nm_rxq[pi->first_nm_rxq], iter = 0; \ iter < pi->nnmrxq; ++iter, ++q) #define IDXINCR(idx, incr, wrap) do { \ idx = wrap - idx > incr ? idx + incr : incr - (wrap - idx); \ } while (0) #define IDXDIFF(head, tail, wrap) \ ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) /* One for errors, one for firmware events */ #define T4_EXTRA_INTR 2 static inline uint32_t t4_read_reg(struct adapter *sc, uint32_t reg) { return bus_space_read_4(sc->bt, sc->bh, reg); } static inline void t4_write_reg(struct adapter *sc, uint32_t reg, uint32_t val) { bus_space_write_4(sc->bt, sc->bh, reg, val); } static inline uint64_t t4_read_reg64(struct adapter *sc, uint32_t reg) { return t4_bus_space_read_8(sc->bt, sc->bh, reg); } static inline void t4_write_reg64(struct adapter *sc, uint32_t reg, uint64_t val) { t4_bus_space_write_8(sc->bt, sc->bh, reg, val); } static inline void t4_os_pci_read_cfg1(struct adapter *sc, int reg, uint8_t *val) { *val = pci_read_config(sc->dev, reg, 1); } static inline void t4_os_pci_write_cfg1(struct adapter *sc, int reg, uint8_t val) { pci_write_config(sc->dev, reg, val, 1); } static inline void t4_os_pci_read_cfg2(struct adapter *sc, int reg, uint16_t *val) { *val = pci_read_config(sc->dev, reg, 2); } static inline void t4_os_pci_write_cfg2(struct adapter *sc, int reg, uint16_t val) { pci_write_config(sc->dev, reg, val, 2); } static inline void t4_os_pci_read_cfg4(struct adapter *sc, int reg, uint32_t *val) { *val = pci_read_config(sc->dev, reg, 4); } static inline void t4_os_pci_write_cfg4(struct adapter *sc, int reg, uint32_t val) { pci_write_config(sc->dev, reg, val, 4); } static inline struct port_info * adap2pinfo(struct adapter *sc, int idx) { return (sc->port[idx]); } static inline void t4_os_set_hw_addr(struct adapter *sc, int idx, uint8_t hw_addr[]) { bcopy(hw_addr, sc->port[idx]->hw_addr, ETHER_ADDR_LEN); } static inline bool is_10G_port(const struct port_info *pi) { return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G) != 0); } static inline bool is_40G_port(const struct port_info *pi) { return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G) != 0); } static inline int tx_resume_threshold(struct sge_eq *eq) { /* not quite the same as qsize / 4, but this will do. */ return (eq->sidx / 4); } /* t4_main.c */ int t4_os_find_pci_capability(struct adapter *, int); int t4_os_pci_save_state(struct adapter *); int t4_os_pci_restore_state(struct adapter *); void t4_os_portmod_changed(const struct adapter *, int); void t4_os_link_changed(struct adapter *, int, int, int); void t4_iterate(void (*)(struct adapter *, void *), void *); int t4_register_cpl_handler(struct adapter *, int, cpl_handler_t); int t4_register_an_handler(struct adapter *, an_handler_t); int t4_register_fw_msg_handler(struct adapter *, int, fw_msg_handler_t); int t4_filter_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); int begin_synchronized_op(struct adapter *, struct port_info *, int, char *); void end_synchronized_op(struct adapter *, int); int update_mac_settings(struct ifnet *, int); int adapter_full_init(struct adapter *); int adapter_full_uninit(struct adapter *); int port_full_init(struct port_info *); int port_full_uninit(struct port_info *); #ifdef DEV_NETMAP /* t4_netmap.c */ int create_netmap_ifnet(struct port_info *); int destroy_netmap_ifnet(struct port_info *); void t4_nm_intr(void *); #endif /* t4_sge.c */ void t4_sge_modload(void); void t4_sge_modunload(void); uint64_t t4_sge_extfree_refs(void); void t4_init_sge_cpl_handlers(struct adapter *); void t4_tweak_chip_settings(struct adapter *); int t4_read_chip_settings(struct adapter *); int t4_create_dma_tag(struct adapter *); void t4_sge_sysctls(struct adapter *, struct sysctl_ctx_list *, struct sysctl_oid_list *); int t4_destroy_dma_tag(struct adapter *); int t4_setup_adapter_queues(struct adapter *); int t4_teardown_adapter_queues(struct adapter *); int t4_setup_port_queues(struct port_info *); int t4_teardown_port_queues(struct port_info *); void t4_intr_all(void *); void t4_intr(void *); void t4_intr_err(void *); void t4_intr_evt(void *); void t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct wrqe *); void t4_update_fl_bufsize(struct ifnet *); int parse_pkt(struct mbuf **); void *start_wrq_wr(struct sge_wrq *, int, struct wrq_cookie *); void commit_wrq_wr(struct sge_wrq *, void *, struct wrq_cookie *); int tnl_cong(struct port_info *); /* t4_tracer.c */ struct t4_tracer; void t4_tracer_modload(void); void t4_tracer_modunload(void); void t4_tracer_port_detach(struct adapter *); int t4_get_tracer(struct adapter *, struct t4_tracer *); int t4_set_tracer(struct adapter *, struct t4_tracer *); int t4_trace_pkt(struct sge_iq *, const struct rss_header *, struct mbuf *); int t5_trace_pkt(struct sge_iq *, const struct rss_header *, struct mbuf *); static inline struct wrqe * alloc_wrqe(int wr_len, struct sge_wrq *wrq) { int len = offsetof(struct wrqe, wr) + wr_len; struct wrqe *wr; wr = malloc(len, M_CXGBE, M_NOWAIT); if (__predict_false(wr == NULL)) return (NULL); wr->wr_len = wr_len; wr->wrq = wrq; return (wr); } static inline void * wrtod(struct wrqe *wr) { return (&wr->wr[0]); } static inline void free_wrqe(struct wrqe *wr) { free(wr, M_CXGBE); } static inline void t4_wrq_tx(struct adapter *sc, struct wrqe *wr) { struct sge_wrq *wrq = wr->wrq; TXQ_LOCK(wrq); t4_wrq_tx_locked(sc, wrq, wr); TXQ_UNLOCK(wrq); } #endif Index: projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/mbufq.h =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/mbufq.h (revision 279896) +++ projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/mbufq.h (nonexistent) @@ -1,121 +0,0 @@ -/************************************************************************** - -Copyright (c) 2007-2008, Chelsio Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Neither the name of the Chelsio Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - -$FreeBSD$ - -***************************************************************************/ - -#ifndef CXGB_MBUFQ_H_ -#define CXGB_MBUFQ_H_ - -struct mbuf_head { - struct mbuf *head; - struct mbuf *tail; - uint32_t qlen; - uint32_t qsize; - struct mtx lock; -}; - -static __inline void -mbufq_init(struct mbuf_head *l) -{ - l->head = l->tail = NULL; - l->qlen = l->qsize = 0; -} - -static __inline int -mbufq_empty(struct mbuf_head *l) -{ - return (l->head == NULL); -} - -static __inline int -mbufq_len(struct mbuf_head *l) -{ - return (l->qlen); -} - -static __inline int -mbufq_size(struct mbuf_head *l) -{ - return (l->qsize); -} - -static __inline int -mbufq_head_size(struct mbuf_head *l) -{ - return (l->head ? l->head->m_pkthdr.len : 0); -} - -static __inline void -mbufq_tail(struct mbuf_head *l, struct mbuf *m) -{ - l->qlen++; - if (l->head == NULL) - l->head = m; - else - l->tail->m_nextpkt = m; - l->tail = m; - l->qsize += m->m_pkthdr.len; -} - -static __inline struct mbuf * -mbufq_dequeue(struct mbuf_head *l) -{ - struct mbuf *m; - - m = l->head; - if (m) { - if (m == l->tail) - l->head = l->tail = NULL; - else - l->head = m->m_nextpkt; - m->m_nextpkt = NULL; - l->qlen--; - l->qsize -= m->m_pkthdr.len; - } - - return (m); -} - -static __inline struct mbuf * -mbufq_peek(const struct mbuf_head *l) -{ - return (l->head); -} - -static __inline void -mbufq_append(struct mbuf_head *a, struct mbuf_head *b) -{ - if (a->tail) - a->tail->m_nextpkt = b->head; - if (b->tail) - a->tail = b->tail; - a->qlen += b->qlen; - a->qsize += b->qsize; -} -#endif /* CXGB_MBUFQ_H_ */ Index: projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei.c =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei.c (revision 279896) +++ projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei.c (revision 279897) @@ -1,1495 +1,1275 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * * Chelsio T5xx iSCSI driver * * Written by: Sreenivasa Honnur * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for PCIE_MEM_ACCESS */ #include #include "cxgbei.h" #include "cxgbei_ulp2_ddp.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* forward declarations */ struct icl_pdu * icl_pdu_new_empty(struct icl_conn *, int ); void icl_pdu_free(struct icl_pdu *); /* mbuf_tag management functions */ struct ulp_mbuf_cb * get_ulp_mbuf_cb(struct mbuf *m) { struct m_tag *mtag = NULL; mtag = m_tag_get(CXGBE_ISCSI_MBUF_TAG, sizeof(struct ulp_mbuf_cb), M_NOWAIT); if (mtag == NULL) { printf("%s: mtag alloc failed\n", __func__); return NULL; } bzero(mtag + 1, sizeof(struct ulp_mbuf_cb)); m_tag_prepend(m, mtag); return ((struct ulp_mbuf_cb *)(mtag + 1)); } static struct ulp_mbuf_cb * find_ulp_mbuf_cb(struct mbuf *m) { struct m_tag *mtag = NULL; if ((mtag = m_tag_find(m, CXGBE_ISCSI_MBUF_TAG, NULL)) == NULL) return (NULL); return ((struct ulp_mbuf_cb *)(mtag + 1)); } /* * Direct Data Placement - * Directly place the iSCSI Data-In or Data-Out PDU's payload into pre-posted * final destination host-memory buffers based on the Initiator Task Tag (ITT) * in Data-In or Target Task Tag (TTT) in Data-Out PDUs. * The host memory address is programmed into h/w in the format of pagepod * entries. * The location of the pagepod entry is encoded into ddp tag which is used as * the base for ITT/TTT. */ -#define T4_DDP -#ifdef T4_DDP + /* * functions to program the pagepod in h/w */ -static void * -t4_tdev2ddp(void *tdev) -{ - struct adapter *sc = ((struct toedev *)tdev)->tod_softc; - return (sc->iscsi_softc); -} static void inline ppod_set(struct pagepod *ppod, struct cxgbei_ulp2_pagepod_hdr *hdr, struct cxgbei_ulp2_gather_list *gl, unsigned int pidx) { int i; memcpy(ppod, hdr, sizeof(*hdr)); for (i = 0; i < (PPOD_PAGES + 1); i++, pidx++) { ppod->addr[i] = pidx < gl->nelem ? cpu_to_be64(gl->dma_sg[pidx].phys_addr) : 0ULL; } } static void inline ppod_clear(struct pagepod *ppod) { memset(ppod, 0, sizeof(*ppod)); } static inline void ulp_mem_io_set_hdr(struct adapter *sc, int tid, struct ulp_mem_io *req, unsigned int wr_len, unsigned int dlen, unsigned int pm_addr) { struct ulptx_idata *idata = (struct ulptx_idata *)(req + 1); INIT_ULPTX_WR(req, wr_len, 0, 0); req->cmd = cpu_to_be32(V_ULPTX_CMD(ULP_TX_MEM_WRITE) | V_ULP_MEMIO_ORDER(is_t4(sc)) | V_T5_ULP_MEMIO_IMM(is_t5(sc))); req->dlen = htonl(V_ULP_MEMIO_DATA_LEN(dlen >> 5)); req->len16 = htonl(DIV_ROUND_UP(wr_len - sizeof(req->wr), 16) | V_FW_WR_FLOWID(tid)); req->lock_addr = htonl(V_ULP_MEMIO_ADDR(pm_addr >> 5)); idata->cmd_more = htonl(V_ULPTX_CMD(ULP_TX_SC_IMM)); idata->len = htonl(dlen); } #define PPOD_SIZE sizeof(struct pagepod) #define ULPMEM_IDATA_MAX_NPPODS 1 /* 256/PPOD_SIZE */ #define PCIE_MEMWIN_MAX_NPPODS 16 /* 1024/PPOD_SIZE */ static int -ppod_write_idata(struct cxgbei_ulp2_ddp_info *ddp, +ppod_write_idata(struct cxgbei_data *ci, struct cxgbei_ulp2_pagepod_hdr *hdr, unsigned int idx, unsigned int npods, struct cxgbei_ulp2_gather_list *gl, unsigned int gl_pidx, struct toepcb *toep) { - unsigned int dlen = PPOD_SIZE * npods; - unsigned int pm_addr = idx * PPOD_SIZE + ddp->llimit; - unsigned int wr_len = roundup(sizeof(struct ulp_mem_io) + - sizeof(struct ulptx_idata) + dlen, 16); + u_int dlen = PPOD_SIZE * npods; + u_int pm_addr = idx * PPOD_SIZE + ci->llimit; + u_int wr_len = roundup(sizeof(struct ulp_mem_io) + + sizeof(struct ulptx_idata) + dlen, 16); struct ulp_mem_io *req; struct ulptx_idata *idata; struct pagepod *ppod; - unsigned int i; + u_int i; struct wrqe *wr; struct adapter *sc = toep->port->adapter; wr = alloc_wrqe(wr_len, toep->ctrlq); if (wr == NULL) { - printf("%s: alloc wrqe failed\n", __func__); - return ENOMEM; + CXGBE_UNIMPLEMENTED("ppod_write_idata: alloc_wrqe failure"); + return (ENOMEM); } req = wrtod(wr); memset(req, 0, wr_len); ulp_mem_io_set_hdr(sc, toep->tid, req, wr_len, dlen, pm_addr); idata = (struct ulptx_idata *)(req + 1); ppod = (struct pagepod *)(idata + 1); for (i = 0; i < npods; i++, ppod++, gl_pidx += PPOD_PAGES) { if (!hdr) /* clear the pagepod */ ppod_clear(ppod); else /* set the pagepod */ ppod_set(ppod, hdr, gl, gl_pidx); } t4_wrq_tx(sc, wr); return 0; } -static int -t4_ddp_set_map(struct cxgbei_ulp2_ddp_info *ddp, - void *isockp, struct cxgbei_ulp2_pagepod_hdr *hdr, - unsigned int idx, unsigned int npods, - struct cxgbei_ulp2_gather_list *gl, int reply) +int +t4_ddp_set_map(struct cxgbei_data *ci, void *isockp, + struct cxgbei_ulp2_pagepod_hdr *hdr, u_int idx, u_int npods, + struct cxgbei_ulp2_gather_list *gl, int reply) { - iscsi_socket *isock = (iscsi_socket *)isockp; - struct socket *sk; - struct toepcb *toep; - struct tcpcb *tp; + struct iscsi_socket *isock = (struct iscsi_socket *)isockp; + struct toepcb *toep = isock->toep; int err; unsigned int pidx = 0, w_npods = 0, cnt; - if (isock == NULL) - return EINVAL; - sk = isock->sock; - tp = so_sototcpcb(sk); - toep = tp->t_toe; - /* * on T4, if we use a mix of IMMD and DSGL with ULP_MEM_WRITE, * the order would not be garanteed, so we will stick with IMMD */ gl->tid = toep->tid; gl->port_id = toep->port->port_id; gl->egress_dev = (void *)toep->port->ifp; /* send via immediate data */ for (; w_npods < npods; idx += cnt, w_npods += cnt, pidx += PPOD_PAGES) { cnt = npods - w_npods; if (cnt > ULPMEM_IDATA_MAX_NPPODS) cnt = ULPMEM_IDATA_MAX_NPPODS; - err = ppod_write_idata(ddp, hdr, idx, cnt, gl, + err = ppod_write_idata(ci, hdr, idx, cnt, gl, pidx, toep); if (err) { printf("%s: ppod_write_idata failed\n", __func__); break; } } return err; } -static void -t4_ddp_clear_map(struct cxgbei_ulp2_ddp_info *ddp, - struct cxgbei_ulp2_gather_list *gl, - unsigned int tag, unsigned int idx, unsigned int npods, - iscsi_socket *isock) +void +t4_ddp_clear_map(struct cxgbei_data *ci, struct cxgbei_ulp2_gather_list *gl, + u_int tag, u_int idx, u_int npods, struct iscsi_socket *isock) { - struct socket *sk; - struct toepcb *toep; - struct tcpcb *tp; + struct toepcb *toep = isock->toep; int err = -1; + u_int pidx = 0; + u_int w_npods = 0; + u_int cnt; - sk = isock->sock; - tp = so_sototcpcb(sk); - toep = tp->t_toe; - - /* send via immediate data */ - unsigned int pidx = 0; - unsigned int w_npods = 0; - unsigned int cnt; - for (; w_npods < npods; idx += cnt, w_npods += cnt, pidx += PPOD_PAGES) { cnt = npods - w_npods; if (cnt > ULPMEM_IDATA_MAX_NPPODS) cnt = ULPMEM_IDATA_MAX_NPPODS; - err = ppod_write_idata(ddp, NULL, idx, cnt, gl, 0, toep); + err = ppod_write_idata(ci, NULL, idx, cnt, gl, 0, toep); if (err) break; } } -#endif -/* - * cxgbei device management - * maintains a list of the cxgbei devices - */ -typedef struct offload_device { - SLIST_ENTRY(offload_device) link; - unsigned char d_version; - unsigned char d_tx_hdrlen; /* CPL_TX_DATA, < 256 */ - unsigned char d_ulp_rx_datagap; /* for coalesced iscsi msg */ - unsigned char filler; - - unsigned int d_flag; - unsigned int d_payload_tmax; - unsigned int d_payload_rmax; - - struct cxgbei_ulp2_tag_format d_tag_format; - void *d_tdev; - void *d_pdev; - void* (*tdev2ddp)(void *tdev); -}offload_device; - -SLIST_HEAD(, offload_device) odev_list; - -static void t4_unregister_cpl_handler_with_tom(struct adapter *sc); -static offload_device * -offload_device_new(void *tdev) -{ - offload_device *odev = NULL; - odev = malloc(sizeof(struct offload_device), - M_CXGBE, M_NOWAIT | M_ZERO); - if (odev) { - odev->d_tdev = tdev; - SLIST_INSERT_HEAD(&odev_list, odev, link); - } - - return odev; -} - -static offload_device * -offload_device_find(struct toedev *tdev) -{ - offload_device *odev = NULL; - - if (!SLIST_EMPTY(&odev_list)) { - SLIST_FOREACH(odev, &odev_list, link) { - if (odev->d_tdev == tdev) - break; - } - } - return odev; -} - -static void -cxgbei_odev_cleanup(offload_device *odev) -{ - struct toedev *tdev = odev->d_tdev; - struct adapter *sc = (struct adapter *)tdev->tod_softc; - - /* de-register ULP CPL handlers with TOM */ - t4_unregister_cpl_handler_with_tom(sc); - if (odev->d_flag & ODEV_FLAG_ULP_DDP_ENABLED) { - if (sc->iscsi_softc) - cxgbei_ulp2_ddp_cleanup( - (struct cxgbei_ulp2_ddp_info **)&sc->iscsi_softc); - } - return; -} - -static void -offload_device_remove() -{ - offload_device *odev = NULL, *next = NULL; - - if (SLIST_EMPTY(&odev_list)) - return; - - for (odev = SLIST_FIRST(&odev_list); odev != NULL; odev = next) { - SLIST_REMOVE(&odev_list, odev, offload_device, link); - next = SLIST_NEXT(odev, link); - cxgbei_odev_cleanup(odev); - free(odev, M_CXGBE); - } - - return; -} - static int -cxgbei_map_sg(cxgbei_sgl *sgl, struct ccb_scsiio *csio) +cxgbei_map_sg(struct cxgbei_sgl *sgl, struct ccb_scsiio *csio) { unsigned int data_len = csio->dxfer_len; unsigned int sgoffset = (uint64_t)csio->data_ptr & PAGE_MASK; unsigned int nsge; unsigned char *sgaddr = csio->data_ptr; unsigned int len = 0; nsge = (csio->dxfer_len + sgoffset + PAGE_SIZE - 1) >> PAGE_SHIFT; sgl->sg_addr = sgaddr; sgl->sg_offset = sgoffset; if (data_len < (PAGE_SIZE - sgoffset)) len = data_len; else len = PAGE_SIZE - sgoffset; sgl->sg_length = len; data_len -= len; sgaddr += len; sgl = sgl+1; while (data_len > 0) { sgl->sg_addr = sgaddr; len = (data_len < PAGE_SIZE)? data_len: PAGE_SIZE; sgl->sg_length = len; sgaddr += len; data_len -= len; sgl = sgl + 1; } return nsge; } static int -cxgbei_map_sg_tgt(cxgbei_sgl *sgl, union ctl_io *io) +cxgbei_map_sg_tgt(struct cxgbei_sgl *sgl, union ctl_io *io) { unsigned int data_len, sgoffset, nsge; unsigned char *sgaddr; unsigned int len = 0, index = 0, ctl_sg_count, i; struct ctl_sg_entry ctl_sg_entry, *ctl_sglist; if (io->scsiio.kern_sg_entries > 0) { ctl_sglist = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; ctl_sg_count = io->scsiio.kern_sg_entries; } else { ctl_sglist = &ctl_sg_entry; ctl_sglist->addr = io->scsiio.kern_data_ptr; ctl_sglist->len = io->scsiio.kern_data_len; ctl_sg_count = 1; } sgaddr = sgl->sg_addr = ctl_sglist[index].addr; sgoffset = sgl->sg_offset = (uint64_t)sgl->sg_addr & PAGE_MASK; data_len = ctl_sglist[index].len; if (data_len < (PAGE_SIZE - sgoffset)) len = data_len; else len = PAGE_SIZE - sgoffset; sgl->sg_length = len; data_len -= len; sgaddr += len; sgl = sgl+1; len = 0; for (i = 0; i< ctl_sg_count; i++) len += ctl_sglist[i].len; nsge = (len + sgoffset + PAGE_SIZE -1) >> PAGE_SHIFT; while (data_len > 0) { sgl->sg_addr = sgaddr; len = (data_len < PAGE_SIZE)? data_len: PAGE_SIZE; sgl->sg_length = len; sgaddr += len; data_len -= len; sgl = sgl + 1; if (data_len == 0) { if (index == ctl_sg_count - 1) break; index++; sgaddr = ctl_sglist[index].addr; data_len = ctl_sglist[index].len; } } return nsge; } static int -t4_sk_ddp_tag_reserve(iscsi_socket *isock, unsigned int xferlen, - cxgbei_sgl *sgl, unsigned int sgcnt, - unsigned int *ddp_tag) +t4_sk_ddp_tag_reserve(struct cxgbei_data *ci, struct iscsi_socket *isock, + u_int xferlen, struct cxgbei_sgl *sgl, u_int sgcnt, u_int *ddp_tag) { - offload_device *odev = isock->s_odev; - struct toedev *tdev = odev->d_tdev; struct cxgbei_ulp2_gather_list *gl; int err = -EINVAL; - struct adapter *sc = tdev->tod_softc; - struct cxgbei_ulp2_ddp_info *ddp; + struct toepcb *toep = isock->toep; - ddp = (struct cxgbei_ulp2_ddp_info *)sc->iscsi_softc; - if (ddp == NULL) - return ENOMEM; - - gl = cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec(xferlen, sgl, sgcnt, - odev->d_tdev, 0); + gl = cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec(xferlen, sgl, sgcnt, ci, 0); if (gl) { - err = cxgbei_ulp2_ddp_tag_reserve(odev->tdev2ddp(tdev), - isock, - isock->s_tid, - &odev->d_tag_format, - ddp_tag, gl, - 0, 0); + err = cxgbei_ulp2_ddp_tag_reserve(ci, isock, toep->tid, + &ci->tag_format, ddp_tag, gl, 0, 0); if (err) { - CTR1(KTR_CXGBE, - "%s: ddp_tag_reserve failed\n", __func__); - cxgbei_ulp2_ddp_release_gl(gl, odev->d_tdev); + cxgbei_ulp2_ddp_release_gl(ci, gl); } } return err; } static unsigned int cxgbei_task_reserve_itt(struct icl_conn *ic, void **prv, struct ccb_scsiio *scmd, unsigned int *itt) { int xferlen = scmd->dxfer_len; - cxgbei_task_data *tdata = NULL; - cxgbei_sgl *sge = NULL; - struct socket *so = ic->ic_socket; - iscsi_socket *isock = (iscsi_socket *)(so)->so_emuldata; + struct cxgbei_task_data *tdata = NULL; + struct cxgbei_sgl *sge = NULL; + struct iscsi_socket *isock = ic->ic_ofld_prv0; + struct toepcb *toep = isock->toep; + struct adapter *sc = td_adapter(toep->td); + struct cxgbei_data *ci = sc->iscsi_softc; int err = -1; - offload_device *odev = isock->s_odev; - tdata = (cxgbei_task_data *)*prv; - if ((xferlen == 0) || (tdata == NULL)) { + tdata = (struct cxgbei_task_data *)*prv; + if (xferlen == 0 || tdata == NULL) goto out; - } if (xferlen < DDP_THRESHOLD) goto out; if ((scmd->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { tdata->nsge = cxgbei_map_sg(tdata->sgl, scmd); if (tdata->nsge == 0) { CTR1(KTR_CXGBE, "%s: map_sg failed\n", __func__); return 0; } sge = tdata->sgl; tdata->sc_ddp_tag = *itt; CTR3(KTR_CXGBE, "%s: *itt:0x%x sc_ddp_tag:0x%x\n", __func__, *itt, tdata->sc_ddp_tag); - if (cxgbei_ulp2_sw_tag_usable(&odev->d_tag_format, + if (cxgbei_ulp2_sw_tag_usable(&ci->tag_format, tdata->sc_ddp_tag)) { - err = t4_sk_ddp_tag_reserve(isock, scmd->dxfer_len, sge, - tdata->nsge, &tdata->sc_ddp_tag); + err = t4_sk_ddp_tag_reserve(ci, isock, scmd->dxfer_len, + sge, tdata->nsge, &tdata->sc_ddp_tag); } else { CTR3(KTR_CXGBE, "%s: itt:0x%x sc_ddp_tag:0x%x not usable\n", __func__, *itt, tdata->sc_ddp_tag); } } out: if (err < 0) tdata->sc_ddp_tag = - cxgbei_ulp2_set_non_ddp_tag(&odev->d_tag_format, *itt); + cxgbei_ulp2_set_non_ddp_tag(&ci->tag_format, *itt); return tdata->sc_ddp_tag; } static unsigned int cxgbei_task_reserve_ttt(struct icl_conn *ic, void **prv, union ctl_io *io, unsigned int *ttt) { - struct socket *so = ic->ic_socket; - iscsi_socket *isock = (iscsi_socket *)(so)->so_emuldata; - cxgbei_task_data *tdata = NULL; - offload_device *odev = isock->s_odev; + struct iscsi_socket *isock = ic->ic_ofld_prv0; + struct toepcb *toep = isock->toep; + struct adapter *sc = td_adapter(toep->td); + struct cxgbei_data *ci = sc->iscsi_softc; + struct cxgbei_task_data *tdata = NULL; int xferlen, err = -1; - cxgbei_sgl *sge = NULL; + struct cxgbei_sgl *sge = NULL; xferlen = (io->scsiio.kern_data_len - io->scsiio.ext_data_filled); - tdata = (cxgbei_task_data *)*prv; + tdata = (struct cxgbei_task_data *)*prv; if ((xferlen == 0) || (tdata == NULL)) goto out; if (xferlen < DDP_THRESHOLD) goto out; tdata->nsge = cxgbei_map_sg_tgt(tdata->sgl, io); if (tdata->nsge == 0) { CTR1(KTR_CXGBE, "%s: map_sg failed\n", __func__); return 0; } sge = tdata->sgl; tdata->sc_ddp_tag = *ttt; - if (cxgbei_ulp2_sw_tag_usable(&odev->d_tag_format, tdata->sc_ddp_tag)) { - err = t4_sk_ddp_tag_reserve(isock, xferlen, sge, tdata->nsge, - &tdata->sc_ddp_tag); + if (cxgbei_ulp2_sw_tag_usable(&ci->tag_format, tdata->sc_ddp_tag)) { + err = t4_sk_ddp_tag_reserve(ci, isock, xferlen, sge, + tdata->nsge, &tdata->sc_ddp_tag); } else { CTR2(KTR_CXGBE, "%s: sc_ddp_tag:0x%x not usable\n", __func__, tdata->sc_ddp_tag); } out: if (err < 0) tdata->sc_ddp_tag = - cxgbei_ulp2_set_non_ddp_tag(&odev->d_tag_format, *ttt); + cxgbei_ulp2_set_non_ddp_tag(&ci->tag_format, *ttt); return tdata->sc_ddp_tag; } static int -t4_sk_ddp_tag_release(iscsi_socket *isock, unsigned int ddp_tag) +t4_sk_ddp_tag_release(struct iscsi_socket *isock, unsigned int ddp_tag) { - offload_device *odev = isock->s_odev; - struct toedev *tdev = odev->d_tdev; + struct toepcb *toep = isock->toep; + struct adapter *sc = td_adapter(toep->td); + struct cxgbei_data *ci = sc->iscsi_softc; - cxgbei_ulp2_ddp_tag_release(odev->tdev2ddp(tdev), ddp_tag, isock); - return 0; + cxgbei_ulp2_ddp_tag_release(ci, ddp_tag, isock); + + return (0); } -#ifdef T4_DDP -static struct cxgbei_ulp2_ddp_info * -t4_ddp_init(struct ifnet *dev, struct toedev *tdev) + +static int +cxgbei_ddp_init(struct adapter *sc, struct cxgbei_data *ci) { - struct cxgbei_ulp2_ddp_info *ddp; - struct adapter *sc = tdev->tod_softc; - struct ulp_iscsi_info uinfo; + int nppods, bits, max_sz, rc; + static const u_int pgsz_order[] = {0, 1, 2, 3}; - memset(&uinfo, 0, sizeof(struct ulp_iscsi_info)); - uinfo.llimit = sc->vres.iscsi.start; - uinfo.ulimit = sc->vres.iscsi.start + sc->vres.iscsi.size - 1; - uinfo.max_rxsz = uinfo.max_txsz = - G_MAXRXDATA(t4_read_reg(sc, A_TP_PARA_REG2)); + MPASS(sc->vres.iscsi.size > 0); - if (sc->vres.iscsi.size == 0) { - printf("%s: iSCSI capabilities not enabled.\n", __func__); - return NULL; - } - printf("T4, ddp 0x%x ~ 0x%x, size %u, iolen %u, ulpddp:0x%p\n", - uinfo.llimit, uinfo.ulimit, sc->vres.iscsi.size, - uinfo.max_rxsz, sc->iscsi_softc); + ci->llimit = sc->vres.iscsi.start; + ci->ulimit = sc->vres.iscsi.start + sc->vres.iscsi.size - 1; + max_sz = G_MAXRXDATA(t4_read_reg(sc, A_TP_PARA_REG2)); - cxgbei_ulp2_ddp_init((void *)tdev, - (struct cxgbei_ulp2_ddp_info **)&sc->iscsi_softc, - &uinfo); - ddp = (struct cxgbei_ulp2_ddp_info *)sc->iscsi_softc; - if (ddp) { - unsigned int pgsz_order[4]; - int i; + nppods = sc->vres.iscsi.size >> IPPOD_SIZE_SHIFT; + if (nppods <= 1024) + return (ENXIO); - for (i = 0; i < 4; i++) - pgsz_order[i] = uinfo.pgsz_factor[i]; + bits = fls(nppods); + if (bits > IPPOD_IDX_MAX_SIZE) + bits = IPPOD_IDX_MAX_SIZE; + nppods = (1 << (bits - 1)) - 1; - t4_iscsi_init(dev, uinfo.tagmask, pgsz_order); + rc = bus_dma_tag_create(NULL, 1, 0, BUS_SPACE_MAXADDR, + BUS_SPACE_MAXADDR, NULL, NULL, UINT32_MAX , 8, BUS_SPACE_MAXSIZE, + BUS_DMA_ALLOCNOW, NULL, NULL, &ci->ulp_ddp_tag); + if (rc != 0) { + device_printf(sc->dev, "%s: failed to create DMA tag: %u.\n", + __func__, rc); + return (rc); + } - ddp->ddp_set_map = t4_ddp_set_map; - ddp->ddp_clear_map = t4_ddp_clear_map; + ci->colors = malloc(nppods * sizeof(char), M_CXGBE, M_NOWAIT | M_ZERO); + ci->gl_map = malloc(nppods * sizeof(struct cxgbei_ulp2_gather_list *), + M_CXGBE, M_NOWAIT | M_ZERO); + if (ci->colors == NULL || ci->gl_map == NULL) { + bus_dma_tag_destroy(ci->ulp_ddp_tag); + free(ci->colors, M_CXGBE); + free(ci->gl_map, M_CXGBE); + return (ENOMEM); } - return ddp; -} -#endif -static struct socket * -cpl_find_sock(struct adapter *sc, unsigned int hwtid) -{ - struct socket *sk; - struct toepcb *toep = lookup_tid(sc, hwtid); - struct inpcb *inp = toep->inp; + mtx_init(&ci->map_lock, "ddp lock", NULL, MTX_DEF | MTX_DUPOK); + ci->max_txsz = ci->max_rxsz = min(max_sz, ULP2_MAX_PKT_SIZE); + ci->nppods = nppods; + ci->idx_last = nppods; + ci->idx_bits = bits; + ci->idx_mask = (1 << bits) - 1; + ci->rsvd_tag_mask = (1 << (bits + IPPOD_IDX_SHIFT)) - 1; - INP_WLOCK(inp); - sk = inp->inp_socket; - INP_WUNLOCK(inp); - if (sk == NULL) - CTR2(KTR_CXGBE, - "%s: T4 CPL tid 0x%x, sk NULL.\n", __func__, hwtid); - return sk; + ci->tag_format.sw_bits = bits; + ci->tag_format.rsvd_bits = bits; + ci->tag_format.rsvd_shift = IPPOD_IDX_SHIFT; + ci->tag_format.rsvd_mask = ci->idx_mask; + + t4_iscsi_init(sc, ci->idx_mask << IPPOD_IDX_SHIFT, pgsz_order); + + return (rc); } static void -process_rx_iscsi_hdr(struct socket *sk, struct mbuf *m) +process_rx_iscsi_hdr(struct toepcb *toep, struct mbuf *m) { - struct tcpcb *tp = so_sototcpcb(sk); - struct toepcb *toep = tp->t_toe; - struct cpl_iscsi_hdr *cpl = mtod(m, struct cpl_iscsi_hdr *); struct ulp_mbuf_cb *cb, *lcb; struct mbuf *lmbuf; - unsigned char *byte; - iscsi_socket *isock = (iscsi_socket *)(sk)->so_emuldata; - unsigned int hlen, dlen, plen; + u_char *byte; + struct iscsi_socket *isock = toep->ulpcb; + struct tcpcb *tp = intotcpcb(toep->inp); + u_int hlen, dlen, plen; - if (isock == NULL) - goto err_out; + MPASS(isock != NULL); + M_ASSERTPKTHDR(m); - if (toep == NULL) - goto err_out; - if ((m->m_flags & M_PKTHDR) == 0) { - printf("%s: m:%p no M_PKTHDR can't allocate m_tag\n", - __func__, m); - goto err_out; - } + mtx_lock(&isock->iscsi_rcvq_lock); - mtx_lock(&isock->iscsi_rcv_mbufq.lock); - /* allocate m_tag to hold ulp info */ cb = get_ulp_mbuf_cb(m); if (cb == NULL) { printf("%s: Error allocation m_tag\n", __func__); goto err_out1; } cb->seq = ntohl(cpl->seq); /* strip off CPL header */ m_adj(m, sizeof(*cpl)); /* figure out if this is the pdu header or data */ cb->ulp_mode = ULP_MODE_ISCSI; if (isock->mbuf_ulp_lhdr == NULL) { - iscsi_socket *isock = (iscsi_socket *)(sk)->so_emuldata; isock->mbuf_ulp_lhdr = lmbuf = m; lcb = cb; cb->flags = SBUF_ULP_FLAG_HDR_RCVD | SBUF_ULP_FLAG_COALESCE_OFF; /* we only update tp->rcv_nxt once per pdu */ if (cb->seq != tp->rcv_nxt) { CTR3(KTR_CXGBE, "tid 0x%x, CPL_ISCSI_HDR, BAD seq got 0x%x exp 0x%x.\n", toep->tid, cb->seq, tp->rcv_nxt); goto err_out1; } byte = m->m_data; hlen = ntohs(cpl->len); dlen = ntohl(*(unsigned int *)(byte + 4)) & 0xFFFFFF; plen = ntohs(cpl->pdu_len_ddp); lcb->ulp.iscsi.pdulen = (hlen + dlen + 3) & (~0x3); /* workaround for cpl->pdu_len_ddp since it does not include the data digest count */ if (dlen) lcb->ulp.iscsi.pdulen += isock->s_dcrc_len; tp->rcv_nxt += lcb->ulp.iscsi.pdulen; if (tp->rcv_wnd <= lcb->ulp.iscsi.pdulen) CTR3(KTR_CXGBE, "%s: Neg rcv_wnd:0x%lx pdulen:0x%x\n", __func__, tp->rcv_wnd, lcb->ulp.iscsi.pdulen); tp->rcv_wnd -= lcb->ulp.iscsi.pdulen; tp->t_rcvtime = ticks; } else { lmbuf = isock->mbuf_ulp_lhdr; lcb = find_ulp_mbuf_cb(lmbuf); if (lcb == NULL) { printf("%s: lmbuf:%p lcb is NULL\n", __func__, lmbuf); goto err_out1; } lcb->flags |= SBUF_ULP_FLAG_DATA_RCVD | SBUF_ULP_FLAG_COALESCE_OFF; cb->flags = SBUF_ULP_FLAG_DATA_RCVD; /* padding */ if ((m->m_len % 4) != 0) { m->m_len += 4 - (m->m_len % 4); } } - mbufq_tail(&isock->iscsi_rcv_mbufq, m); - mtx_unlock(&isock->iscsi_rcv_mbufq.lock); + mbufq_enqueue(&isock->iscsi_rcvq, m); + mtx_unlock(&isock->iscsi_rcvq_lock); return; err_out1: - mtx_unlock(&isock->iscsi_rcv_mbufq.lock); -err_out: + mtx_unlock(&isock->iscsi_rcvq_lock); m_freem(m); - return; } /* hand over received PDU to iscsi_initiator */ static void iscsi_conn_receive_pdu(struct iscsi_socket *isock) { struct icl_pdu *response = NULL; struct icl_conn *ic = (struct icl_conn*)isock->s_conn; struct mbuf *m; struct ulp_mbuf_cb *cb = NULL; int data_len; response = icl_pdu_new_empty(isock->s_conn, M_NOWAIT); if (response == NULL) { panic("%s: failed to alloc icl_pdu\n", __func__); return; } - m = mbufq_peek(&isock->iscsi_rcv_mbufq); + m = mbufq_first(&isock->iscsi_rcvq); if (m) { cb = find_ulp_mbuf_cb(m); if (cb == NULL) { panic("%s: m:%p cb is NULL\n", __func__, m); goto err_out; } if (!(cb->flags & SBUF_ULP_FLAG_STATUS_RCVD)) goto err_out; } /* BHS */ - mbufq_dequeue(&isock->iscsi_rcv_mbufq); + mbufq_dequeue(&isock->iscsi_rcvq); data_len = cb->ulp.iscsi.pdulen; - CTR5(KTR_CXGBE, "%s: response:%p m:%p m_len:%d data_len:%d\n", + CTR5(KTR_CXGBE, "%s: response:%p m:%p m_len:%d data_len:%d", __func__, response, m, m->m_len, data_len); response->ip_bhs_mbuf = m; response->ip_bhs = mtod(response->ip_bhs_mbuf, struct iscsi_bhs *); /* data */ if (cb->flags & SBUF_ULP_FLAG_DATA_RCVD) { - m = mbufq_peek(&isock->iscsi_rcv_mbufq); + m = mbufq_first(&isock->iscsi_rcvq); if (m == NULL) { CTR1(KTR_CXGBE, "%s:No Data\n", __func__); goto err_out; } - mbufq_dequeue(&isock->iscsi_rcv_mbufq); + mbufq_dequeue(&isock->iscsi_rcvq); response->ip_data_mbuf = m; response->ip_data_len += response->ip_data_mbuf->m_len; } else { /* Data is DDP'ed */ response->ip_ofld_prv0 = 1; } (ic->ic_receive)(response); return; err_out: icl_pdu_free(response); return; } static void -process_rx_data_ddp(struct socket *sk, void *m) +process_rx_data_ddp(struct toepcb *toep, const struct cpl_rx_data_ddp *cpl) { - struct cpl_rx_data_ddp *cpl = (struct cpl_rx_data_ddp *)m; - struct tcpcb *tp = so_sototcpcb(sk); - struct toepcb *toep = tp->t_toe; - struct inpcb *inp = toep->inp; struct mbuf *lmbuf; struct ulp_mbuf_cb *lcb, *lcb1; unsigned int val, pdulen; - iscsi_socket *isock = (iscsi_socket *)(sk)->so_emuldata; + struct iscsi_socket *isock = toep->ulpcb; + struct inpcb *inp = toep->inp; - if (isock == NULL) - return; + MPASS(isock != NULL); if (isock->mbuf_ulp_lhdr == NULL) { panic("%s: tid 0x%x, rcv RX_DATA_DDP w/o pdu header.\n", - __func__, toep->tid); + __func__, toep->tid); return; } - mtx_lock(&isock->iscsi_rcv_mbufq.lock); + mtx_lock(&isock->iscsi_rcvq_lock); lmbuf = isock->mbuf_ulp_lhdr; if (lmbuf->m_nextpkt) { lcb1 = find_ulp_mbuf_cb(lmbuf->m_nextpkt); lcb1->flags |= SBUF_ULP_FLAG_STATUS_RCVD; } lcb = find_ulp_mbuf_cb(isock->mbuf_ulp_lhdr); if (lcb == NULL) { CTR2(KTR_CXGBE, "%s: mtag NULL lmbuf :%p\n", __func__, lmbuf); - mtx_unlock(&isock->iscsi_rcv_mbufq.lock); + mtx_unlock(&isock->iscsi_rcvq_lock); return; } lcb->flags |= SBUF_ULP_FLAG_STATUS_RCVD; isock->mbuf_ulp_lhdr = NULL; if (ntohs(cpl->len) != lcb->ulp.iscsi.pdulen) { CTR3(KTR_CXGBE, "tid 0x%x, RX_DATA_DDP pdulen %u != %u.\n", toep->tid, ntohs(cpl->len), lcb->ulp.iscsi.pdulen); CTR4(KTR_CXGBE, "%s: lmbuf:%p lcb:%p lcb->flags:0x%x\n", __func__, lmbuf, lcb, lcb->flags); } lcb->ulp.iscsi.ddigest = ntohl(cpl->ulp_crc); pdulen = lcb->ulp.iscsi.pdulen; val = ntohl(cpl->ddpvld); if (val & F_DDP_PADDING_ERR) lcb->flags |= SBUF_ULP_FLAG_PAD_ERROR; if (val & F_DDP_HDRCRC_ERR) lcb->flags |= SBUF_ULP_FLAG_HCRC_ERROR; if (val & F_DDP_DATACRC_ERR) lcb->flags |= SBUF_ULP_FLAG_DCRC_ERROR; if (!(lcb->flags & SBUF_ULP_FLAG_DATA_RCVD)) { lcb->flags |= SBUF_ULP_FLAG_DATA_DDPED; } #ifdef __T4_DBG_DDP_FAILURE__ // else { unsigned char *bhs = lmbuf->m_data; unsigned char opcode = bhs[0]; unsigned int dlen = ntohl(*(unsigned int *)(bhs + 4)) & 0xFFFFFF; unsigned int ttt = ntohl(*(unsigned int *)(bhs + 20)); unsigned int offset = ntohl(*(unsigned int *)(bhs + 40)); if (dlen >= 2096) { /* data_out and should be ddp'ed */ if ((opcode & 0x3F) == 0x05 && ttt != 0xFFFFFFFF) { printf("CPL_RX_DATA_DDP: tid 0x%x, data-out %s ddp'ed\ (%u+%u), ttt 0x%x, seq 0x%x, ddpvld 0x%x.\n", toep->tid, (lcb->flags & SBUF_ULP_FLAG_DATA_DDPED) ? "IS" : "NOT", offset, dlen, ttt, ntohl(cpl->seq), ntohl(cpl->ddpvld)); } if ((opcode & 0x3F) == 0x25) { //if (!(lcb->flags & SBUF_ULP_FLAG_DATA_DDPED)) printf("CPL_RX_DATA_DDP: tid 0x%x, data-in %s ddp'ed\ (%u+%u), seq 0x%x, ddpvld 0x%x.\n", toep->tid, (lcb->flags & SBUF_ULP_FLAG_DATA_DDPED) ? "IS" : "NOT", offset, dlen, ntohl(cpl->seq), ntohl(cpl->ddpvld)); } } } #endif iscsi_conn_receive_pdu(isock); - mtx_unlock(&isock->iscsi_rcv_mbufq.lock); + mtx_unlock(&isock->iscsi_rcvq_lock); /* update rx credits */ INP_WLOCK(inp); - SOCK_LOCK(sk); + /* XXXNP: does this want the so_rcv lock? (happens to be the same) */ + SOCK_LOCK(inp->inp_socket); toep->sb_cc += pdulen; - SOCK_UNLOCK(sk); - CTR4(KTR_CXGBE, "sk:%p sb_cc 0x%x, rcv_nxt 0x%x rcv_wnd:0x%lx.\n", - sk, toep->sb_cc, tp->rcv_nxt, tp->rcv_wnd); - t4_rcvd(&toep->td->tod, tp); + SOCK_UNLOCK(inp->inp_socket); + t4_rcvd(&toep->td->tod, intotcpcb(inp)); INP_WUNLOCK(inp); return; } static void -drop_fw_acked_ulp_data(struct socket *sk, struct toepcb *toep, int len) +drop_fw_acked_ulp_data(struct toepcb *toep, int len) { struct mbuf *m, *next; struct ulp_mbuf_cb *cb; - iscsi_socket *isock = (iscsi_socket *)(sk)->so_emuldata; struct icl_pdu *req; + struct iscsi_socket *isock = toep->ulpcb; - if (len == 0 || (isock == NULL)) - return; + MPASS(len > 0); - mtx_lock(&isock->ulp2_wrq.lock); + mtx_lock(&isock->ulp2_wrq_lock); while (len > 0) { m = mbufq_dequeue(&isock->ulp2_wrq); - if(m == NULL) break; + MPASS(m != NULL); /* excess credits */ - for(next = m; next !=NULL; next = next->m_next) + for (next = m; next != NULL; next = next->m_next) { + MPASS(len >= next->m_len); /* excess credits */ len -= next->m_len; + } cb = find_ulp_mbuf_cb(m); - - if (cb && isock && cb->pdu) { + if (cb && cb->pdu) { req = (struct icl_pdu *)cb->pdu; req->ip_bhs_mbuf = NULL; icl_pdu_free(req); } m_freem(m); } - mtx_unlock(&isock->ulp2_wrq.lock); - return; + mtx_unlock(&isock->ulp2_wrq_lock); } -static void -process_fw4_ack(struct socket *sk, int *plen) -{ - struct tcpcb *tp = so_sototcpcb(sk); - struct toepcb *toep = tp->t_toe; - - drop_fw_acked_ulp_data(sk, toep, *plen); - - return; -} - static int -do_set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) -{ - return 0; -} - -static int do_rx_iscsi_hdr(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { - struct socket *sk; struct adapter *sc = iq->adapter; - struct cpl_iscsi_hdr *cpl = mtod(m, struct cpl_iscsi_hdr *); - sk = cpl_find_sock(sc, GET_TID(cpl)); + struct cpl_iscsi_hdr *cpl = mtod(m, struct cpl_iscsi_hdr *); /* XXXNP */ + u_int tid = GET_TID(cpl); + struct toepcb *toep = lookup_tid(sc, tid); - if (sk == NULL) - return CPL_RET_UNKNOWN_TID | CPL_RET_BUF_DONE; + process_rx_iscsi_hdr(toep, m); - process_rx_iscsi_hdr(sk, m); - return 0; + return (0); } static int -do_rx_data_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) -{ - return 0; -} - -static int do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { - struct socket *sk; - struct adapter *sc; - const struct cpl_rx_iscsi_ddp *cpl = (const void *)(rss + 1); + struct adapter *sc = iq->adapter; + const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1); + u_int tid = GET_TID(cpl); + struct toepcb *toep = lookup_tid(sc, tid); - if (iq == NULL) - return 0; - sc = iq->adapter; - if (sc == NULL) - return 0; + process_rx_data_ddp(toep, cpl); - sk = cpl_find_sock(sc, GET_TID(cpl)); - if (sk == NULL) - return CPL_RET_UNKNOWN_TID | CPL_RET_BUF_DONE; - - process_rx_data_ddp(sk, (void *)cpl); - return 0; + return (0); } + static int -t4_ulp_mbuf_push(struct socket *so, struct mbuf *m) +t4_ulp_mbuf_push(struct iscsi_socket *isock, struct mbuf *m) { - struct tcpcb *tp = so_sototcpcb(so); - struct toepcb *toep = tp->t_toe; - struct inpcb *inp = so_sotoinpcb(so); - iscsi_socket *isock = (iscsi_socket *)(so)->so_emuldata;; + struct toepcb *toep = isock->toep; - if (isock == NULL) { - m_freem(m); - return EINVAL; - } - /* append mbuf to ULP queue */ - mtx_lock(&isock->ulp2_writeq.lock); - mbufq_tail(&isock->ulp2_writeq, m); - mtx_unlock(&isock->ulp2_writeq.lock); + mtx_lock(&isock->ulp2_writeq_lock); + mbufq_enqueue(&isock->ulp2_writeq, m); + mtx_unlock(&isock->ulp2_writeq_lock); - INP_WLOCK(inp); + INP_WLOCK(toep->inp); t4_ulp_push_frames(toep->td->tod.tod_softc, toep, 0); - INP_WUNLOCK(inp); - return 0; + INP_WUNLOCK(toep->inp); + + return (0); } static struct mbuf * -iscsi_queue_handler_callback(struct socket *sk, unsigned int cmd, int *qlen) +get_writeq_len(struct toepcb *toep, int *qlen) { - iscsi_socket *isock; - struct mbuf *m0 = NULL; + struct iscsi_socket *isock = toep->ulpcb; - if (sk == NULL) - return NULL; - isock = (iscsi_socket *)(sk)->so_emuldata; - if (isock == NULL) - return NULL; - - switch (cmd) { - case 0:/* PEEK */ - m0 = mbufq_peek(&isock->ulp2_writeq); - break; - case 1:/* QUEUE_LEN */ - *qlen = mbufq_len(&isock->ulp2_writeq); - m0 = mbufq_peek(&isock->ulp2_writeq); - break; - case 2:/* DEQUEUE */ - mtx_lock(&isock->ulp2_writeq.lock); - m0 = mbufq_dequeue(&isock->ulp2_writeq); - mtx_unlock(&isock->ulp2_writeq.lock); - - mtx_lock(&isock->ulp2_wrq.lock); - mbufq_tail(&isock->ulp2_wrq, m0); - mtx_unlock(&isock->ulp2_wrq.lock); - - m0 = mbufq_peek(&isock->ulp2_writeq); - break; - } - return m0; + *qlen = mbufq_len(&isock->ulp2_writeq); + return (mbufq_first(&isock->ulp2_writeq)); } -static void -iscsi_cpl_handler_callback(struct tom_data *td, struct socket *sk, - void *m, unsigned int op) +static struct mbuf * +do_writeq_next(struct toepcb *toep) { - if ((sk == NULL) || (sk->so_emuldata == NULL)) - return; + struct iscsi_socket *isock = toep->ulpcb; + struct mbuf *m; - switch (op) { - case CPL_ISCSI_HDR: - process_rx_iscsi_hdr(sk, m); - break; - case CPL_RX_DATA_DDP: - process_rx_data_ddp(sk, m); - break; - case CPL_SET_TCB_RPL: - break; - case CPL_FW4_ACK: - process_fw4_ack(sk, m); - break; - default: - CTR2(KTR_CXGBE, "sk 0x%p, op 0x%x from TOM, NOT supported.\n", - sk, op); - break; - } + mtx_lock(&isock->ulp2_writeq_lock); + m = mbufq_dequeue(&isock->ulp2_writeq); + mtx_unlock(&isock->ulp2_writeq_lock); + + mtx_lock(&isock->ulp2_wrq_lock); + mbufq_enqueue(&isock->ulp2_wrq, m); + mtx_unlock(&isock->ulp2_wrq_lock); + + return (mbufq_first(&isock->ulp2_writeq)); } static void t4_register_cpl_handler_with_tom(struct adapter *sc) { - t4tom_register_cpl_iscsi_callback(iscsi_cpl_handler_callback); - t4tom_register_queue_iscsi_callback(iscsi_queue_handler_callback); + t4_register_cpl_handler(sc, CPL_ISCSI_HDR, do_rx_iscsi_hdr); t4_register_cpl_handler(sc, CPL_ISCSI_DATA, do_rx_iscsi_hdr); - t4tom_cpl_handler_register_flag |= - 1 << TOM_CPL_ISCSI_HDR_REGISTERED_BIT; - - if (!t4tom_cpl_handler_registered(sc, CPL_SET_TCB_RPL)) { - t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, do_set_tcb_rpl); - t4tom_cpl_handler_register_flag |= - 1 << TOM_CPL_SET_TCB_RPL_REGISTERED_BIT; - CTR0(KTR_CXGBE, "register t4 cpl handler CPL_SET_TCB_RPL.\n"); - } - t4_register_cpl_handler(sc, CPL_RX_ISCSI_DDP, do_rx_iscsi_ddp); - - if (!t4tom_cpl_handler_registered(sc, CPL_RX_DATA_DDP)) { - t4_register_cpl_handler(sc, CPL_RX_DATA_DDP, do_rx_data_ddp); - t4tom_cpl_handler_register_flag |= - 1 << TOM_CPL_RX_DATA_DDP_REGISTERED_BIT; - CTR0(KTR_CXGBE, "register t4 cpl handler CPL_RX_DATA_DDP.\n"); - } } static void t4_unregister_cpl_handler_with_tom(struct adapter *sc) { - /* de-register CPL handles */ - t4tom_register_cpl_iscsi_callback(NULL); - t4tom_register_queue_iscsi_callback(NULL); - if (t4tom_cpl_handler_register_flag & - (1 << TOM_CPL_ISCSI_HDR_REGISTERED_BIT)) { - t4_register_cpl_handler(sc, CPL_ISCSI_HDR, NULL); - t4_register_cpl_handler(sc, CPL_ISCSI_DATA, NULL); - } - if (t4tom_cpl_handler_register_flag & - (1 << TOM_CPL_SET_TCB_RPL_REGISTERED_BIT)) - t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, NULL); + + t4_register_cpl_handler(sc, CPL_ISCSI_HDR, NULL); + t4_register_cpl_handler(sc, CPL_ISCSI_DATA, NULL); t4_register_cpl_handler(sc, CPL_RX_ISCSI_DDP, NULL); - if (t4tom_cpl_handler_register_flag & - (1 << TOM_CPL_RX_DATA_DDP_REGISTERED_BIT)) - t4_register_cpl_handler(sc, CPL_RX_DATA_DDP, NULL); } static int -send_set_tcb_field(struct socket *sk, u16 word, u64 mask, u64 val, - int no_reply) +send_set_tcb_field(struct toepcb * toep, uint16_t word, uint64_t mask, + uint64_t val, int no_reply) { struct wrqe *wr; struct cpl_set_tcb_field *req; - struct inpcb *inp = sotoinpcb(sk); - struct tcpcb *tp = intotcpcb(inp); - struct toepcb *toep = tp->t_toe; wr = alloc_wrqe(sizeof(*req), toep->ctrlq); if (wr == NULL) return EINVAL; req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid); req->reply_ctrl = htobe16(V_NO_REPLY(no_reply) | V_QUEUENO(toep->ofld_rxq->iq.abs_id)); req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0)); req->mask = htobe64(mask); req->val = htobe64(val); t4_wrq_tx(toep->td->tod.tod_softc, wr); - return 0; + + return (0); } static int -cxgbei_set_ulp_mode(struct socket *so, struct toepcb *toep, - unsigned char hcrc, unsigned char dcrc) +cxgbei_set_ulp_mode(struct toepcb *toep, u_char hcrc, u_char dcrc) { - int rv = 0, val = 0; + int val = 0; - toep->ulp_mode = ULP_MODE_ISCSI; if (hcrc) val |= ULP_CRC_HEADER; if (dcrc) val |= ULP_CRC_DATA; val <<= 4; val |= ULP_MODE_ISCSI; - rv = send_set_tcb_field(so, 0, 0xfff, val, 0); - return rv; -} -static offload_device * -add_cxgbei_dev(struct ifnet *dev, struct toedev *tdev) -{ -#ifdef T4_DDP - struct cxgbei_ulp2_ddp_info *ddp; -#endif - offload_device *odev = NULL; - odev = offload_device_new(tdev); - if (odev == NULL) { - printf("%s: odev is NULL\n", __func__); - return odev; - } - printf("%s:New T4 %s, tdev 0x%p, odev 0x%p.\n", - __func__, dev->if_xname, tdev, odev); - odev->d_tdev = tdev; - odev->d_ulp_rx_datagap = sizeof(struct cpl_iscsi_hdr_no_rss); - odev->d_flag = ODEV_FLAG_ULP_CRC_ENABLED; - -#ifdef T4_DDP - odev->tdev2ddp = t4_tdev2ddp; - ddp = t4_ddp_init(dev, tdev); - if (ddp) { - printf("T4 %s, odev 0x%p, ddp 0x%p initialized.\n", - dev->if_xname, odev, ddp); - - odev->d_flag |= ODEV_FLAG_ULP_DDP_ENABLED; - cxgbei_ulp2_adapter_ddp_info(ddp, - (struct cxgbei_ulp2_tag_format *)&odev->d_tag_format, - &odev->d_payload_tmax, &odev->d_payload_rmax); - } -#endif - return odev; + return (send_set_tcb_field(toep, 0, 0xfff, val, 1)); } /* initiator */ void cxgbei_conn_task_reserve_itt(void *conn, void **prv, void *scmd, unsigned int *itt) { unsigned int tag; tag = cxgbei_task_reserve_itt(conn, prv, scmd, itt); if (tag) *itt = htonl(tag); return; } /* target */ void cxgbei_conn_transfer_reserve_ttt(void *conn, void **prv, void *scmd, unsigned int *ttt) { unsigned int tag; tag = cxgbei_task_reserve_ttt(conn, prv, scmd, ttt); if (tag) *ttt = htonl(tag); return; } void cxgbei_cleanup_task(void *conn, void *ofld_priv) { struct icl_conn *ic = (struct icl_conn *)conn; - cxgbei_task_data *tdata = NULL; - struct socket *so = NULL; - iscsi_socket *isock = NULL; - offload_device *odev = NULL; + struct cxgbei_task_data *tdata = ofld_priv; + struct iscsi_socket *isock = ic->ic_ofld_prv0; + struct toepcb *toep = isock->toep; + struct adapter *sc = td_adapter(toep->td); + struct cxgbei_data *ci = sc->iscsi_softc; - if (ic->ic_socket == NULL) return; + MPASS(isock != NULL); + MPASS(tdata != NULL); - so = ic->ic_socket; - - isock = (iscsi_socket *)(so)->so_emuldata; - if (isock == NULL) return; - odev = isock->s_odev; - - tdata = (cxgbei_task_data *)(ofld_priv); - if (tdata == NULL) return; - - if (cxgbei_ulp2_is_ddp_tag(&odev->d_tag_format, tdata->sc_ddp_tag)) + if (cxgbei_ulp2_is_ddp_tag(&ci->tag_format, tdata->sc_ddp_tag)) t4_sk_ddp_tag_release(isock, tdata->sc_ddp_tag); memset(tdata, 0, sizeof(*tdata)); - return; } static void t4_sk_tx_mbuf_setmode(struct icl_pdu *req, void *toep, void *mbuf, unsigned char mode, unsigned char hcrc, unsigned char dcrc) { struct mbuf *m = (struct mbuf *)mbuf; struct ulp_mbuf_cb *cb; cb = get_ulp_mbuf_cb(m); if (cb == NULL) return; cb->ulp_mode = ULP_MODE_ISCSI << 4; if (hcrc) cb->ulp_mode |= 1; if (dcrc) cb->ulp_mode |= 2; cb->pdu = req; return; } int -cxgbei_conn_xmit_pdu(void *conn, void *ioreq) +cxgbei_conn_xmit_pdu(struct icl_conn *ic, struct icl_pdu *req) { - struct icl_conn *ic = (struct icl_conn *)conn; - struct icl_pdu *req = (struct icl_pdu *)ioreq; struct mbuf *m = req->ip_bhs_mbuf; - struct socket *so = ic->ic_socket; - struct tcpcb *tp = so_sototcpcb(so); + struct iscsi_socket *isock = ic->ic_ofld_prv0; + struct toepcb *toep = isock->toep; - t4_sk_tx_mbuf_setmode(req, tp->t_toe, m, 2, - ic->ic_header_crc32c ? ISCSI_HEADER_DIGEST_SIZE : 0, - (req->ip_data_len && ic->ic_data_crc32c) ? ISCSI_DATA_DIGEST_SIZE : 0); + t4_sk_tx_mbuf_setmode(req, toep, m, 2, + ic->ic_header_crc32c ? ISCSI_HEADER_DIGEST_SIZE : 0, + (req->ip_data_len && ic->ic_data_crc32c) ? ISCSI_DATA_DIGEST_SIZE : 0); - t4_ulp_mbuf_push(ic->ic_socket, m); - return 0; + t4_ulp_mbuf_push(isock, m); + return (0); } -/* called from host iscsi, socket is passed as argument */ int -cxgbei_conn_set_ulp_mode(struct socket *so, void *conn) +cxgbei_conn_handoff(struct icl_conn *ic) { - struct tcpcb *tp = so_sototcpcb(so); - struct toepcb *toep = tp->t_toe; - struct adapter *sc = NULL; - struct toedev *tdev = NULL; - iscsi_socket *isock = NULL; - struct ifnet *ifp = NULL; - unsigned int tid = toep->tid; - offload_device *odev = NULL; - struct icl_conn *ic = (struct icl_conn*)conn; + struct tcpcb *tp = so_sototcpcb(ic->ic_socket); + struct toepcb *toep; + struct iscsi_socket *isock; - if (toep == NULL) return EINVAL; + if (!(tp->t_flags & TF_TOE)) + return (ENOTSUP); /* Connection is not offloaded. */ + MPASS(tp->tod != NULL); + MPASS(tp->t_toe != NULL); - ifp = toep->port->ifp; - if (ifp == NULL) return EINVAL; - - if (!(sototcpcb(so)->t_flags & TF_TOE) || - !(ifp->if_capenable & IFCAP_TOE)) { - printf("%s: TOE not enabled on:%s\n", __func__, ifp->if_xname); - return EINVAL; - } - - /* if ULP_MODE is set by TOE driver, treat it as non-offloaded */ - if (toep->ulp_mode) { - CTR3(KTR_CXGBE, "%s: T4 sk 0x%p, ulp mode already set 0x%x.\n", - __func__, so, toep->ulp_mode); - return EINVAL; - } - sc = toep->port->adapter; - tdev = &toep->td->tod; - /* if toe dev is not set, treat it as non-offloaded */ - if (tdev == NULL) { - CTR2(KTR_CXGBE, "%s: T4 sk 0x%p, tdev NULL.\n", __func__, so); - return EINVAL; - } - - isock = (iscsi_socket *)malloc(sizeof(iscsi_socket), M_CXGBE, - M_NOWAIT | M_ZERO); - if (isock == NULL) { - printf("%s: T4 sk 0x%p, isock alloc failed.\n", __func__, so); - return EINVAL; - } - isock->mbuf_ulp_lhdr = NULL; - isock->sock = so; - isock->s_conn = conn; - so->so_emuldata = isock; - - mtx_init(&isock->iscsi_rcv_mbufq.lock,"isock_lock" , NULL, MTX_DEF); - mtx_init(&isock->ulp2_wrq.lock,"ulp2_wrq lock" , NULL, MTX_DEF); - mtx_init(&isock->ulp2_writeq.lock,"ulp2_writeq lock" , NULL, MTX_DEF); - - CTR6(KTR_CXGBE, - "%s: sc:%p toep:%p iscsi_start:0x%x iscsi_size:0x%x caps:%d.\n", - __func__, sc, toep, sc->vres.iscsi.start, - sc->vres.iscsi.size, sc->iscsicaps); /* - * Register ULP CPL handlers with TOM - * Register CPL_RX_ISCSI_HDR, CPL_RX_DATA_DDP callbacks with TOM + * XXXNP: Seems broken. How can we assume that the tod/toep is what we + * think it is? */ - t4_register_cpl_handler_with_tom(sc); - /* - * DDP initialization. Once for each tdev - * check if DDP is already configured for this tdev - */ - odev = offload_device_find(tdev); - if (odev == NULL) /* for each tdev we have a corresponding odev */ - { - if ((odev = add_cxgbei_dev(ifp, tdev)) == NULL) { - CTR3(KTR_CXGBE, - "T4 sk 0x%p, tdev %s, 0x%p, odev NULL.\n", - so, ifp->if_xname, tdev); - return EINVAL; - } - } + toep = tp->t_toe; + if (toep->ulp_mode) + return (EBUSY); /* Stay away if ulp_mode is already set. */ - CTR3(KTR_CXGBE, "tdev:%p sc->iscsi_softc:%p odev:%p\n", - tdev, sc->iscsi_softc, odev); - isock->s_odev = odev; - isock->s_tid = tid; + isock = malloc(sizeof(struct iscsi_socket), M_CXGBE, M_NOWAIT | M_ZERO); + if (isock == NULL) + return (ENOMEM); + isock->s_conn = ic; + isock->toep = toep; + isock->s_dcrc_len = ic->ic_data_crc32c ? 4 : 0; - isock->s_rmax = odev->d_payload_rmax; - isock->s_tmax = odev->d_payload_tmax; + mbufq_init(&isock->iscsi_rcvq, INT_MAX); + mtx_init(&isock->iscsi_rcvq_lock,"isock_lock" , NULL, MTX_DEF); - /* XXX cap the xmit pdu size to be 12K for now until f/w is ready */ - if (isock->s_tmax > (12288 + ISCSI_PDU_NONPAYLOAD_LEN)) - isock->s_tmax = 12288 + ISCSI_PDU_NONPAYLOAD_LEN; + mbufq_init(&isock->ulp2_wrq, INT_MAX); + mtx_init(&isock->ulp2_wrq_lock,"ulp2_wrq lock" , NULL, MTX_DEF); - /* set toe DDP off */ - so->so_options |= SO_NO_DDP; + mbufq_init(&isock->ulp2_writeq, INT_MAX); + mtx_init(&isock->ulp2_writeq_lock,"ulp2_writeq lock" , NULL, MTX_DEF); - /* Move connection to ULP mode, SET_TCB_FIELD */ - cxgbei_set_ulp_mode(so, toep, - ic->ic_header_crc32c, ic->ic_data_crc32c); + /* Move connection to ULP mode. */ + ic->ic_socket->so_options |= SO_NO_DDP; + toep->ulp_mode = ULP_MODE_ISCSI; + toep->ulpcb = isock; + ic->ic_ofld_prv0 = isock; - isock->s_hcrc_len = (ic->ic_header_crc32c ? 4 : 0); - isock->s_dcrc_len = (ic->ic_data_crc32c ? 4 : 0); - return 0; + return (cxgbei_set_ulp_mode(toep, ic->ic_header_crc32c, ic->ic_data_crc32c)); } int -cxgbei_conn_close(struct socket *so) +cxgbei_conn_close(struct icl_conn *ic) { - iscsi_socket *isock = NULL; - isock = (iscsi_socket *)(so)->so_emuldata; + struct iscsi_socket *isock = ic->ic_ofld_prv0; + struct toepcb *toep = isock->toep; struct mbuf *m; struct ulp_mbuf_cb *cb; struct icl_pdu *req; - so->so_emuldata = NULL; + MPASS(isock != NULL); /* free isock Qs */ - while ((m = mbufq_dequeue(&isock->iscsi_rcv_mbufq)) != NULL) - m_freem(m); + /* + * XXXNP: some drained with lock held, some without. And the test for + * whether the lock has even been initialized is after it has been + * grabbed and released already. + * + * An even larger issue is whether the TCP connection is going down + * gracefully or not. Can't simply throw away stuff in send/rcv buffers + * if the TCP shutdown is supposed to be graceful. + */ + mbufq_drain(&isock->iscsi_rcvq); + mbufq_drain(&isock->ulp2_writeq); - while ((m = mbufq_dequeue(&isock->ulp2_writeq)) != NULL) - m_freem(m); - - mtx_lock(&isock->ulp2_wrq.lock); + mtx_lock(&isock->ulp2_wrq_lock); while ((m = mbufq_dequeue(&isock->ulp2_wrq)) != NULL) { cb = find_ulp_mbuf_cb(m); - if (cb && isock && cb->pdu) { + if (cb && cb->pdu) { req = (struct icl_pdu *)cb->pdu; req->ip_bhs_mbuf = NULL; icl_pdu_free(req); } m_freem(m); } - mtx_unlock(&isock->ulp2_wrq.lock); + mtx_unlock(&isock->ulp2_wrq_lock); - if (mtx_initialized(&isock->iscsi_rcv_mbufq.lock)) - mtx_destroy(&isock->iscsi_rcv_mbufq.lock); + if (mtx_initialized(&isock->iscsi_rcvq_lock)) + mtx_destroy(&isock->iscsi_rcvq_lock); - if (mtx_initialized(&isock->ulp2_wrq.lock)) - mtx_destroy(&isock->ulp2_wrq.lock); + if (mtx_initialized(&isock->ulp2_wrq_lock)) + mtx_destroy(&isock->ulp2_wrq_lock); - if (mtx_initialized(&isock->ulp2_writeq.lock)) - mtx_destroy(&isock->ulp2_writeq.lock); + if (mtx_initialized(&isock->ulp2_writeq_lock)) + mtx_destroy(&isock->ulp2_writeq_lock); + /* XXXNP: Should the ulpcb and ulp_mode be cleared here? */ + toep->ulp_mode = ULP_MODE_NONE; /* dubious without inp lock */ + free(isock, M_CXGBE); - return 0; + return (0); } static int -cxgbei_loader(struct module *mod, int cmd, void *arg) +cxgbei_activate(struct adapter *sc) { - int err = 0; + struct cxgbei_data *ci; + int rc; + ASSERT_SYNCHRONIZED_OP(sc); + + if (uld_active(sc, ULD_ISCSI)) { + KASSERT(0, ("%s: iSCSI offload already enabled on adapter %p", + __func__, sc)); + return (0); + } + + if (sc->iscsicaps == 0 || sc->vres.iscsi.size == 0) { + device_printf(sc->dev, + "not iSCSI offload capable, or capability disabled.\n"); + return (ENOSYS); + } + + /* per-adapter softc for iSCSI */ + ci = malloc(sizeof(*ci), M_CXGBE, M_ZERO | M_NOWAIT); + if (ci == NULL) + return (ENOMEM); + + rc = cxgbei_ddp_init(sc, ci); + if (rc != 0) { + free(ci, M_CXGBE); + return (rc); + } + + t4_register_cpl_handler_with_tom(sc); + sc->iscsi_softc = ci; + + return (0); +} + +static int +cxgbei_deactivate(struct adapter *sc) +{ + + ASSERT_SYNCHRONIZED_OP(sc); + + if (sc->iscsi_softc != NULL) { + cxgbei_ddp_cleanup(sc->iscsi_softc); + t4_unregister_cpl_handler_with_tom(sc); + free(sc->iscsi_softc, M_CXGBE); + sc->iscsi_softc = NULL; + } + + return (0); +} + +static void +cxgbei_activate_all(struct adapter *sc, void *arg __unused) +{ + + if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isact") != 0) + return; + + /* Activate iSCSI if any port on this adapter has IFCAP_TOE enabled. */ + if (sc->offload_map && !uld_active(sc, ULD_ISCSI)) + (void) t4_activate_uld(sc, ULD_ISCSI); + + end_synchronized_op(sc, 0); +} + +static void +cxgbei_deactivate_all(struct adapter *sc, void *arg __unused) +{ + + if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isdea") != 0) + return; + + if (uld_active(sc, ULD_ISCSI)) + (void) t4_deactivate_uld(sc, ULD_ISCSI); + + end_synchronized_op(sc, 0); +} + +static struct uld_info cxgbei_uld_info = { + .uld_id = ULD_ISCSI, + .activate = cxgbei_activate, + .deactivate = cxgbei_deactivate, +}; + +extern void (*cxgbei_fw4_ack)(struct toepcb *, int); +extern void (*cxgbei_rx_data_ddp)(struct toepcb *, + const struct cpl_rx_data_ddp *); +extern struct mbuf *(*cxgbei_writeq_len)(struct toepcb *, int *); +extern struct mbuf *(*cxgbei_writeq_next)(struct toepcb *); + +static int +cxgbei_mod_load(void) +{ + int rc; + + cxgbei_fw4_ack = drop_fw_acked_ulp_data; + cxgbei_rx_data_ddp = process_rx_data_ddp; + cxgbei_writeq_len = get_writeq_len; + cxgbei_writeq_next = do_writeq_next; + + rc = t4_register_uld(&cxgbei_uld_info); + if (rc != 0) + return (rc); + + t4_iterate(cxgbei_activate_all, NULL); + + return (rc); +} + +static int +cxgbei_mod_unload(void) +{ + + t4_iterate(cxgbei_deactivate_all, NULL); + + if (t4_unregister_uld(&cxgbei_uld_info) == EBUSY) + return (EBUSY); + + return (0); +} + +static int +cxgbei_modevent(module_t mod, int cmd, void *arg) +{ + int rc = 0; + switch (cmd) { case MOD_LOAD: - SLIST_INIT(&odev_list); - printf("cxgbei module loaded Sucessfully.\n"); + rc = cxgbei_mod_load(); break; + case MOD_UNLOAD: - offload_device_remove(); - printf("cxgbei cleanup completed sucessfully.\n"); + rc = cxgbei_mod_unload(); break; + default: - err = (EINVAL); - break; + rc = EINVAL; } - return (err); + return (rc); } static moduledata_t cxgbei_mod = { "cxgbei", - cxgbei_loader, + cxgbei_modevent, NULL, }; MODULE_VERSION(cxgbei, 1); DECLARE_MODULE(cxgbei, cxgbei_mod, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(cxgbei, t4_tom, 1, 1, 1); MODULE_DEPEND(cxgbei, cxgbe, 1, 1, 1); MODULE_DEPEND(cxgbei, icl, 1, 1, 1); Index: projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei.h =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei.h (revision 279896) +++ projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei.h (revision 279897) @@ -1,158 +1,154 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * * Chelsio T5xx iSCSI driver * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef __CXGBEI_OFLD_H__ #define __CXGBEI_OFLD_H__ -#include "mbufq.h" -typedef struct iscsi_socket { - /* iscsi private */ - unsigned char s_flag; - unsigned char s_cpuno; /* bind to cpuno */ - unsigned char s_mode; /* offload mode */ - unsigned char s_txhold; +struct iscsi_socket { + u_char s_dcrc_len; + void *s_conn; /* ic_conn pointer */ + struct toepcb *toep; - unsigned char s_ddp_pgidx; /* ddp page selection */ - unsigned char s_hcrc_len; - unsigned char s_dcrc_len; - unsigned char filler[1]; + /* + * XXXNP: locks on the same line. + * XXXNP: are the locks even needed? Why not use so_snd/so_rcv mtx to + * guard the write and rcv queues? + */ + struct mbufq iscsi_rcvq; /* rx - ULP mbufs */ + struct mtx iscsi_rcvq_lock; - unsigned int s_tid; /* for debug only */ - unsigned int s_tmax; - unsigned int s_rmax; - unsigned int s_mss; - void *s_odev; /* offload device, if any */ - void *s_appdata; /* upperlayer data pointer */ - void *s_private; /* underlying socket related info. */ - void *s_conn; /* ic_conn pointer */ - struct socket *sock; - struct mbuf_head iscsi_rcv_mbufq;/* rx - ULP mbufs */ - struct mbuf_head ulp2_writeq; /* tx - ULP mbufs */ - struct mbuf_head ulp2_wrq; /* tx wr- ULP mbufs */ + struct mbufq ulp2_writeq; /* tx - ULP mbufs */ + struct mtx ulp2_writeq_lock; + struct mbufq ulp2_wrq; /* tx wr- ULP mbufs */ + struct mtx ulp2_wrq_lock; + struct mbuf *mbuf_ulp_lhdr; struct mbuf *mbuf_ulp_ldata; -}iscsi_socket; +}; -#define ISCSI_SG_SBUF_DMABLE 0x1 -#define ISCSI_SG_SBUF_DMA_ONLY 0x2 /*private*/ -#define ISCSI_SG_BUF_ALLOC 0x10 -#define ISCSI_SG_PAGE_ALLOC 0x20 -#define ISCSI_SG_SBUF_MAP_NEEDED 0x40 -#define ISCSI_SG_SBUF_MAPPED 0x80 - -#define ISCSI_SG_SBUF_LISTHEAD 0x100 -#define ISCSI_SG_SBUF_LISTTAIL 0x200 -#define ISCSI_SG_SBUF_XFER_DONE 0x400 - -typedef struct cxgbei_sgl { +struct cxgbei_sgl { int sg_flag; void *sg_addr; void *sg_dma_addr; size_t sg_offset; size_t sg_length; -} cxgbei_sgl; +}; #define cxgbei_scsi_for_each_sg(_sgl, _sgel, _n, _i) \ for (_i = 0, _sgel = (cxgbei_sgl*) (_sgl); _i < _n; _i++, \ _sgel++) #define sg_dma_addr(_sgel) _sgel->sg_dma_addr #define sg_virt(_sgel) _sgel->sg_addr #define sg_len(_sgel) _sgel->sg_length #define sg_off(_sgel) _sgel->sg_offset #define sg_next(_sgel) _sgel + 1 #define SBUF_ULP_FLAG_HDR_RCVD 0x1 #define SBUF_ULP_FLAG_DATA_RCVD 0x2 #define SBUF_ULP_FLAG_STATUS_RCVD 0x4 #define SBUF_ULP_FLAG_COALESCE_OFF 0x8 #define SBUF_ULP_FLAG_HCRC_ERROR 0x10 #define SBUF_ULP_FLAG_DCRC_ERROR 0x20 #define SBUF_ULP_FLAG_PAD_ERROR 0x40 #define SBUF_ULP_FLAG_DATA_DDPED 0x80 -/* Flags for return value of CPL message handlers */ -enum { - CPL_RET_BUF_DONE = 1, /* buffer processing done buffer may be freed */ - CPL_RET_BAD_MSG = 2, /* bad CPL message (e.g., unknown opcode) */ - CPL_RET_UNKNOWN_TID = 4 /* unexpected unknown TID */ -}; - - /* * Similar to tcp_skb_cb but with ULP elements added to support DDP, iSCSI, * etc. */ struct ulp_mbuf_cb { uint8_t ulp_mode; /* ULP mode/submode of sk_buff */ uint8_t flags; /* TCP-like flags */ uint32_t seq; /* TCP sequence number */ union { /* ULP-specific fields */ struct { uint32_t ddigest; /* ULP rx_data_ddp selected field*/ uint32_t pdulen; /* ULP rx_data_ddp selected field*/ } iscsi; struct { uint32_t offset; /* ULP DDP offset notification */ uint8_t flags; /* ULP DDP flags ... */ } ddp; } ulp; uint8_t ulp_data[16]; /* scratch area for ULP */ void *pdu; /* pdu pointer */ }; -/* private data for eack scsi task */ -typedef struct cxgbei_task_data { - cxgbei_sgl sgl[256]; - unsigned int nsge; - unsigned int sc_ddp_tag; -}cxgbei_task_data; +/* private data for each scsi task */ +struct cxgbei_task_data { + struct cxgbei_sgl sgl[256]; + u_int nsge; + u_int sc_ddp_tag; +}; -static unsigned char t4tom_cpl_handler_register_flag; -enum { - TOM_CPL_ISCSI_HDR_REGISTERED_BIT, - TOM_CPL_SET_TCB_RPL_REGISTERED_BIT, - TOM_CPL_RX_DATA_DDP_REGISTERED_BIT +struct cxgbei_ulp2_tag_format { + u_char sw_bits; + u_char rsvd_bits; + u_char rsvd_shift; + u_char filler[1]; + uint32_t rsvd_mask; }; -#define ODEV_FLAG_ULP_CRC_ENABLED 0x1 -#define ODEV_FLAG_ULP_DDP_ENABLED 0x2 -#define ODEV_FLAG_ULP_TX_ALLOC_DIGEST 0x4 -#define ODEV_FLAG_ULP_RX_PAD_INCLUDED 0x8 +struct cxgbei_data { + u_int max_txsz; + u_int max_rxsz; + u_int llimit; + u_int ulimit; + u_int nppods; + u_int idx_last; + u_char idx_bits; + uint32_t idx_mask; + uint32_t rsvd_tag_mask; -#define ODEV_FLAG_ULP_ENABLED \ - (ODEV_FLAG_ULP_CRC_ENABLED | ODEV_FLAG_ULP_DDP_ENABLED) + struct mtx map_lock; + bus_dma_tag_t ulp_ddp_tag; + unsigned char *colors; + struct cxgbei_ulp2_gather_list **gl_map; -struct ulp_mbuf_cb * get_ulp_mbuf_cb(struct mbuf *); -int cxgbei_conn_set_ulp_mode(struct socket *, void *); -int cxgbei_conn_close(struct socket *); + struct cxgbei_ulp2_tag_format tag_format; +}; + +struct icl_conn; +struct icl_pdu; + +struct ulp_mbuf_cb *get_ulp_mbuf_cb(struct mbuf *); +int cxgbei_conn_handoff(struct icl_conn *); +int cxgbei_conn_close(struct icl_conn *); void cxgbei_conn_task_reserve_itt(void *, void **, void *, unsigned int *); void cxgbei_conn_transfer_reserve_ttt(void *, void **, void *, unsigned int *); void cxgbei_cleanup_task(void *, void *); -int cxgbei_conn_xmit_pdu(void *, void *); +int cxgbei_conn_xmit_pdu(struct icl_conn *, struct icl_pdu *); + +struct cxgbei_ulp2_pagepod_hdr; +int t4_ddp_set_map(struct cxgbei_data *, void *, + struct cxgbei_ulp2_pagepod_hdr *, u_int, u_int, + struct cxgbei_ulp2_gather_list *, int); +void t4_ddp_clear_map(struct cxgbei_data *, struct cxgbei_ulp2_gather_list *, + u_int, u_int, u_int, struct iscsi_socket *); #endif Index: projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei_ulp2_ddp.c =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei_ulp2_ddp.c (revision 279896) +++ projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei_ulp2_ddp.c (revision 279897) @@ -1,703 +1,413 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * * Chelsio T5xx iSCSI driver * cxgbei_ulp2_ddp.c: Chelsio iSCSI DDP Manager. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for PCIE_MEM_ACCESS */ #include #include "cxgbei.h" #include "cxgbei_ulp2_ddp.h" -static inline int -cxgbei_counter_dec_and_read(volatile int *p) -{ - atomic_subtract_acq_int(p, 1); - return atomic_load_acq_int(p); -} - -static inline int -get_order(unsigned long size) -{ - int order; - - size = (size - 1) >> PAGE_SHIFT; - order = 0; - while (size) { - order++; - size >>= 1; - } - return (order); -} - /* * Map a single buffer address. */ static void ulp2_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *ba = arg; if (error) return; KASSERT(nseg == 1, ("%s: %d segments returned!", __func__, nseg)); *ba = segs->ds_addr; } -static int -ulp2_dma_tag_create(struct cxgbei_ulp2_ddp_info *ddp) -{ - int rc; - - rc = bus_dma_tag_create(NULL, 1, 0, BUS_SPACE_MAXADDR, - BUS_SPACE_MAXADDR, NULL, NULL, UINT32_MAX , 8, - BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, - &ddp->ulp_ddp_tag); - - if (rc != 0) { - printf("%s(%d): bus_dma_tag_create() " - "failed (rc = %d)!\n", - __FILE__, __LINE__, rc); - return rc; - } - return 0; -} - /* * iSCSI Direct Data Placement * * T4/5 ulp2 h/w can directly place the iSCSI Data-In or Data-Out PDU's * payload into pre-posted final destination host-memory buffers based on the * Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT) in Data-Out * PDUs. * * The host memory address is programmed into h/w in the format of pagepod * entries. * The location of the pagepod entry is encoded into ddp tag which is used or * is the base for ITT/TTT. */ -unsigned char ddp_page_order[DDP_PGIDX_MAX] = {0, 1, 2, 4}; -unsigned char ddp_page_shift[DDP_PGIDX_MAX] = {12, 13, 14, 16}; -unsigned char page_idx = DDP_PGIDX_MAX; static inline int -ddp_find_unused_entries(struct cxgbei_ulp2_ddp_info *ddp, - unsigned int start, unsigned int max, - unsigned int count, unsigned int *idx, - struct cxgbei_ulp2_gather_list *gl) +ddp_find_unused_entries(struct cxgbei_data *ci, u_int start, u_int max, + u_int count, u_int *idx, struct cxgbei_ulp2_gather_list *gl) { unsigned int i, j, k; /* not enough entries */ - if ((max - start) < count) - return EBUSY; + if (max - start < count) + return (EBUSY); max -= count; - mtx_lock(&ddp->map_lock); + mtx_lock(&ci->map_lock); for (i = start; i < max;) { for (j = 0, k = i; j < count; j++, k++) { - if (ddp->gl_map[k]) + if (ci->gl_map[k]) break; } if (j == count) { for (j = 0, k = i; j < count; j++, k++) - ddp->gl_map[k] = gl; - mtx_unlock(&ddp->map_lock); + ci->gl_map[k] = gl; + mtx_unlock(&ci->map_lock); *idx = i; - return 0; + return (0); } i += j + 1; } - mtx_unlock(&ddp->map_lock); - return EBUSY; + mtx_unlock(&ci->map_lock); + return (EBUSY); } static inline void -ddp_unmark_entries(struct cxgbei_ulp2_ddp_info *ddp, - int start, int count) +ddp_unmark_entries(struct cxgbei_data *ci, u_int start, u_int count) { - mtx_lock(&ddp->map_lock); - memset(&ddp->gl_map[start], 0, + + mtx_lock(&ci->map_lock); + memset(&ci->gl_map[start], 0, count * sizeof(struct cxgbei_ulp2_gather_list *)); - mtx_unlock(&ddp->map_lock); + mtx_unlock(&ci->map_lock); } -/** - * cxgbei_ulp2_ddp_find_page_index - return ddp page index for a given page size - * @pgsz: page size - * return the ddp page index, if no match is found return DDP_PGIDX_MAX. - */ -int -cxgbei_ulp2_ddp_find_page_index(unsigned long pgsz) -{ - int i; - - for (i = 0; i < DDP_PGIDX_MAX; i++) { - if (pgsz == (1UL << ddp_page_shift[i])) - return i; - } - CTR1(KTR_CXGBE, "ddp page size 0x%lx not supported.\n", pgsz); - return DDP_PGIDX_MAX; -} - -static int -cxgbei_ulp2_ddp_adjust_page_table(void) -{ - int i; - unsigned int base_order, order; - - if (PAGE_SIZE < (1UL << ddp_page_shift[0])) { - CTR2(KTR_CXGBE, "PAGE_SIZE %u too small, min. %lu.\n", - PAGE_SIZE, 1UL << ddp_page_shift[0]); - return EINVAL; - } - - base_order = get_order(1UL << ddp_page_shift[0]); - order = get_order(1 << PAGE_SHIFT); - for (i = 0; i < DDP_PGIDX_MAX; i++) { - /* first is the kernel page size, then just doubling the size */ - ddp_page_order[i] = order - base_order + i; - ddp_page_shift[i] = PAGE_SHIFT + i; - } - return 0; -} - - static inline void -ddp_gl_unmap(struct toedev *tdev, - struct cxgbei_ulp2_gather_list *gl) +ddp_gl_unmap(struct cxgbei_data *ci, struct cxgbei_ulp2_gather_list *gl) { int i; - struct adapter *sc = tdev->tod_softc; - struct cxgbei_ulp2_ddp_info *ddp = sc->iscsi_softc; if (!gl->pages[0]) return; for (i = 0; i < gl->nelem; i++) { - bus_dmamap_unload(ddp->ulp_ddp_tag, gl->dma_sg[i].bus_map); - bus_dmamap_destroy(ddp->ulp_ddp_tag, gl->dma_sg[i].bus_map); + bus_dmamap_unload(ci->ulp_ddp_tag, gl->dma_sg[i].bus_map); + bus_dmamap_destroy(ci->ulp_ddp_tag, gl->dma_sg[i].bus_map); } } static inline int -ddp_gl_map(struct toedev *tdev, - struct cxgbei_ulp2_gather_list *gl) +ddp_gl_map(struct cxgbei_data *ci, struct cxgbei_ulp2_gather_list *gl) { int i, rc; bus_addr_t pa; - struct cxgbei_ulp2_ddp_info *ddp; - struct adapter *sc = tdev->tod_softc; - ddp = (struct cxgbei_ulp2_ddp_info *)sc->iscsi_softc; - if (ddp == NULL) { - printf("%s: DDP is NULL tdev:%p sc:%p ddp:%p\n", - __func__, tdev, sc, ddp); - return ENOMEM; - } - mtx_lock(&ddp->map_lock); + MPASS(ci != NULL); + + mtx_lock(&ci->map_lock); for (i = 0; i < gl->nelem; i++) { - rc = bus_dmamap_create(ddp->ulp_ddp_tag, 0, - &gl->dma_sg[i].bus_map); - if (rc != 0) { - printf("%s: unable to map page 0x%p.\n", - __func__, gl->pages[i]); + rc = bus_dmamap_create(ci->ulp_ddp_tag, 0, + &gl->dma_sg[i].bus_map); + if (rc != 0) goto unmap; - } - rc = bus_dmamap_load(ddp->ulp_ddp_tag, gl->dma_sg[i].bus_map, + rc = bus_dmamap_load(ci->ulp_ddp_tag, gl->dma_sg[i].bus_map, gl->pages[i], PAGE_SIZE, ulp2_dma_map_addr, &pa, BUS_DMA_NOWAIT); - if (rc != 0) { - printf("%s:unable to load page 0x%p.\n", - __func__, gl->pages[i]); + if (rc != 0) goto unmap; - } gl->dma_sg[i].phys_addr = pa; } - mtx_unlock(&ddp->map_lock); + mtx_unlock(&ci->map_lock); - return 0; + return (0); unmap: if (i) { - unsigned int nelem = gl->nelem; + u_int nelem = gl->nelem; gl->nelem = i; - ddp_gl_unmap(tdev, gl); + ddp_gl_unmap(ci, gl); gl->nelem = nelem; } - return ENOMEM; + return (ENOMEM); } /** * cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec - build ddp page buffer list * @xferlen: total buffer length * @sgl: page buffer scatter-gather list (struct cxgbei_sgl) * @sgcnt: # of page buffers * @gfp: allocation mode * * construct a ddp page buffer list from the scsi scattergather list. * coalesce buffers as much as possible, and obtain dma addresses for * each page. * * Return the cxgbei_ulp2_gather_list constructed from the page buffers if the * memory can be used for ddp. Return NULL otherwise. */ struct cxgbei_ulp2_gather_list * -cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec - (unsigned int xferlen, cxgbei_sgl *sgl, - unsigned int sgcnt, void *tdev, - int gfp) +cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec(u_int xferlen, struct cxgbei_sgl *sgl, + u_int sgcnt, struct cxgbei_data *ci, int gfp) { struct cxgbei_ulp2_gather_list *gl; - cxgbei_sgl *sg = sgl; + struct cxgbei_sgl *sg = sgl; void *sgpage = (void *)((u64)sg->sg_addr & (~PAGE_MASK)); unsigned int sglen = sg->sg_length; unsigned int sgoffset = (u64)sg->sg_addr & PAGE_MASK; unsigned int npages = (xferlen + sgoffset + PAGE_SIZE - 1) >> PAGE_SHIFT; int i = 1, j = 0; if (xferlen <= DDP_THRESHOLD) { CTR2(KTR_CXGBE, "xfer %u < threshold %u, no ddp.\n", xferlen, DDP_THRESHOLD); return NULL; } gl = malloc(sizeof(struct cxgbei_ulp2_gather_list) + npages * (sizeof(struct dma_segments) + sizeof(void *)), M_DEVBUF, M_NOWAIT | M_ZERO); - if (gl == NULL) { - printf("%s: gl alloc failed\n", __func__); - return NULL; - } + if (gl == NULL) + return (NULL); gl->pages = (void **)&gl->dma_sg[npages]; gl->length = xferlen; gl->offset = sgoffset; gl->pages[0] = sgpage; CTR6(KTR_CXGBE, "%s: xferlen:0x%x len:0x%x off:0x%x sg_addr:%p npages:%d\n", __func__, xferlen, gl->length, gl->offset, sg->sg_addr, npages); for (i = 1, sg = sg_next(sg); i < sgcnt; i++, sg = sg_next(sg)) { void *page = sg->sg_addr; if (sgpage == page && sg->sg_offset == sgoffset + sglen) sglen += sg->sg_length; else { /* make sure the sgl is fit for ddp: * each has the same page size, and * all of the middle pages are used completely */ if ((j && sgoffset) || ((i != sgcnt - 1) && ((sglen + sgoffset) & ~CXGBEI_PAGE_MASK))){ goto error_out; } j++; if (j == gl->nelem || sg->sg_offset) { goto error_out; } gl->pages[j] = page; sglen = sg->sg_length; sgoffset = sg->sg_offset; sgpage = page; } } gl->nelem = ++j; - if (ddp_gl_map(tdev, gl) < 0) + if (ddp_gl_map(ci, gl) < 0) goto error_out; return gl; error_out: free(gl, M_DEVBUF); return NULL; } /** * cxgbei_ulp2_ddp_release_gl - release a page buffer list * @gl: a ddp page buffer list * @pdev: pci_dev used for pci_unmap * free a ddp page buffer list resulted from cxgbei_ulp2_ddp_make_gl(). */ void -cxgbei_ulp2_ddp_release_gl(struct cxgbei_ulp2_gather_list *gl, void *tdev) +cxgbei_ulp2_ddp_release_gl(struct cxgbei_data *ci, + struct cxgbei_ulp2_gather_list *gl) { - ddp_gl_unmap(tdev, gl); + + ddp_gl_unmap(ci, gl); free(gl, M_DEVBUF); } /** * cxgbei_ulp2_ddp_tag_reserve - set up ddp for a data transfer - * @ddp: adapter's ddp info + * @ci: adapter's ddp info * @tid: connection id * @tformat: tag format * @tagp: contains s/w tag initially, will be updated with ddp/hw tag * @gl: the page momory list * @gfp: allocation mode * * ddp setup for a given page buffer list and construct the ddp tag. * return 0 if success, < 0 otherwise. */ int -cxgbei_ulp2_ddp_tag_reserve(struct cxgbei_ulp2_ddp_info *ddp, - void *isock, unsigned int tid, - struct cxgbei_ulp2_tag_format *tformat, - u32 *tagp, struct cxgbei_ulp2_gather_list *gl, - int gfp, int reply) +cxgbei_ulp2_ddp_tag_reserve(struct cxgbei_data *ci, void *isock, u_int tid, + struct cxgbei_ulp2_tag_format *tformat, u32 *tagp, + struct cxgbei_ulp2_gather_list *gl, int gfp, int reply) { struct cxgbei_ulp2_pagepod_hdr hdr; - unsigned int npods, idx; - int rv; + u_int npods, idx; + int rc; u32 sw_tag = *tagp; u32 tag; - if (page_idx >= DDP_PGIDX_MAX || !ddp || !gl || !gl->nelem || - gl->length < DDP_THRESHOLD) { - CTR3(KTR_CXGBE, "pgidx %u, xfer %u/%u, NO ddp.\n", - page_idx, gl->length, DDP_THRESHOLD); - return EINVAL; - } + MPASS(ci != NULL); + if (!gl || !gl->nelem || gl->length < DDP_THRESHOLD) + return (EINVAL); + npods = (gl->nelem + IPPOD_PAGES_MAX - 1) >> IPPOD_PAGES_SHIFT; - if (ddp->idx_last == ddp->nppods) - rv = ddp_find_unused_entries(ddp, 0, ddp->nppods, - npods, &idx, gl); + if (ci->idx_last == ci->nppods) + rc = ddp_find_unused_entries(ci, 0, ci->nppods, npods, &idx, + gl); else { - rv = ddp_find_unused_entries(ddp, ddp->idx_last + 1, - ddp->nppods, npods, &idx, gl); - if (rv && ddp->idx_last >= npods) { - rv = ddp_find_unused_entries(ddp, 0, - min(ddp->idx_last + npods, ddp->nppods), + rc = ddp_find_unused_entries(ci, ci->idx_last + 1, + ci->nppods, npods, &idx, gl); + if (rc && ci->idx_last >= npods) { + rc = ddp_find_unused_entries(ci, 0, + min(ci->idx_last + npods, ci->nppods), npods, &idx, gl); } } - if (rv) { + if (rc) { CTR3(KTR_CXGBE, "xferlen %u, gl %u, npods %u NO DDP.\n", gl->length, gl->nelem, npods); - return rv; + return (rc); } - tag = cxgbei_ulp2_ddp_tag_base(idx, ddp, tformat, sw_tag); + tag = cxgbei_ulp2_ddp_tag_base(idx, ci->colors, tformat, sw_tag); CTR4(KTR_CXGBE, "%s: sw_tag:0x%x idx:0x%x tag:0x%x\n", __func__, sw_tag, idx, tag); hdr.rsvd = 0; hdr.vld_tid = htonl(F_IPPOD_VALID | V_IPPOD_TID(tid)); - hdr.pgsz_tag_clr = htonl(tag & ddp->rsvd_tag_mask); + hdr.pgsz_tag_clr = htonl(tag & ci->rsvd_tag_mask); hdr.maxoffset = htonl(gl->length); hdr.pgoffset = htonl(gl->offset); - rv = ddp->ddp_set_map(ddp, isock, &hdr, idx, npods, gl, reply); - if (rv < 0) + rc = t4_ddp_set_map(ci, isock, &hdr, idx, npods, gl, reply); + if (rc < 0) goto unmark_entries; - ddp->idx_last = idx; + ci->idx_last = idx; *tagp = tag; - return 0; + return (0); unmark_entries: - ddp_unmark_entries(ddp, idx, npods); - return rv; + ddp_unmark_entries(ci, idx, npods); + return (rc); } /** * cxgbei_ulp2_ddp_tag_release - release a ddp tag - * @ddp: adapter's ddp info + * @ci: adapter's ddp info * @tag: ddp tag * ddp cleanup for a given ddp tag and release all the resources held */ void -cxgbei_ulp2_ddp_tag_release(struct cxgbei_ulp2_ddp_info *ddp, u32 tag, - iscsi_socket *isock) +cxgbei_ulp2_ddp_tag_release(struct cxgbei_data *ci, uint32_t tag, + struct iscsi_socket *isock) { - u32 idx; + uint32_t idx; - if (ddp == NULL) { - CTR2(KTR_CXGBE, "%s:release ddp tag 0x%x, ddp NULL.\n", - __func__, tag); - return; - } - if (isock == NULL) - return; + MPASS(ci != NULL); + MPASS(isock != NULL); - idx = (tag >> IPPOD_IDX_SHIFT) & ddp->idx_mask; + idx = (tag >> IPPOD_IDX_SHIFT) & ci->idx_mask; CTR3(KTR_CXGBE, "tag:0x%x idx:0x%x nppods:0x%x\n", - tag, idx, ddp->nppods); - if (idx < ddp->nppods) { - struct cxgbei_ulp2_gather_list *gl = ddp->gl_map[idx]; + tag, idx, ci->nppods); + if (idx < ci->nppods) { + struct cxgbei_ulp2_gather_list *gl = ci->gl_map[idx]; unsigned int npods; if (!gl || !gl->nelem) { CTR4(KTR_CXGBE, "release 0x%x, idx 0x%x, gl 0x%p, %u.\n", tag, idx, gl, gl ? gl->nelem : 0); return; } npods = (gl->nelem + IPPOD_PAGES_MAX - 1) >> IPPOD_PAGES_SHIFT; CTR3(KTR_CXGBE, "ddp tag 0x%x, release idx 0x%x, npods %u.\n", tag, idx, npods); - ddp->ddp_clear_map(ddp, gl, tag, idx, npods, isock); - ddp_unmark_entries(ddp, idx, npods); - cxgbei_ulp2_ddp_release_gl(gl, ddp->tdev); + t4_ddp_clear_map(ci, gl, tag, idx, npods, isock); + ddp_unmark_entries(ci, idx, npods); + cxgbei_ulp2_ddp_release_gl(ci, gl); } else CTR3(KTR_CXGBE, "ddp tag 0x%x, idx 0x%x > max 0x%x.\n", - tag, idx, ddp->nppods); + tag, idx, ci->nppods); } /** - * cxgbei_ulp2_adapter_ddp_info - read the adapter's ddp information - * @ddp: adapter's ddp info - * @tformat: tag format - * @txsz: max tx pdu payload size, filled in by this func. - * @rxsz: max rx pdu payload size, filled in by this func. - * setup the tag format for a given iscsi entity + * cxgbei_ddp_cleanup - release the adapter's ddp resources */ -int -cxgbei_ulp2_adapter_ddp_info(struct cxgbei_ulp2_ddp_info *ddp, - struct cxgbei_ulp2_tag_format *tformat, - unsigned int *txsz, unsigned int *rxsz) -{ - unsigned char idx_bits; - - if (tformat == NULL) - return EINVAL; - - if (ddp == NULL) - return EINVAL; - - idx_bits = 32 - tformat->sw_bits; - tformat->sw_bits = ddp->idx_bits; - tformat->rsvd_bits = ddp->idx_bits; - tformat->rsvd_shift = IPPOD_IDX_SHIFT; - tformat->rsvd_mask = (1 << tformat->rsvd_bits) - 1; - - CTR4(KTR_CXGBE, "tag format: sw %u, rsvd %u,%u, mask 0x%x.\n", - tformat->sw_bits, tformat->rsvd_bits, - tformat->rsvd_shift, tformat->rsvd_mask); - - *txsz = min(ULP2_MAX_PDU_PAYLOAD, - ddp->max_txsz - ISCSI_PDU_NONPAYLOAD_LEN); - *rxsz = min(ULP2_MAX_PDU_PAYLOAD, - ddp->max_rxsz - ISCSI_PDU_NONPAYLOAD_LEN); - CTR4(KTR_CXGBE, "max payload size: %u/%u, %u/%u.\n", - *txsz, ddp->max_txsz, *rxsz, ddp->max_rxsz); - return 0; -} - -/** - * cxgbei_ulp2_ddp_cleanup - release the cxgbX adapter's ddp resource - * @tdev: t4cdev adapter - * release all the resource held by the ddp pagepod manager for a given - * adapter if needed - */ void -cxgbei_ulp2_ddp_cleanup(struct cxgbei_ulp2_ddp_info **ddp_pp) +cxgbei_ddp_cleanup(struct cxgbei_data *ci) { int i = 0; - struct cxgbei_ulp2_ddp_info *ddp = *ddp_pp; - if (ddp == NULL) - return; - - CTR2(KTR_CXGBE, "tdev, release ddp 0x%p, ref %d.\n", - ddp, atomic_load_acq_int(&ddp->refcnt)); - - if (ddp && (cxgbei_counter_dec_and_read(&ddp->refcnt) == 0)) { - *ddp_pp = NULL; - while (i < ddp->nppods) { - struct cxgbei_ulp2_gather_list *gl = ddp->gl_map[i]; - if (gl) { - int npods = (gl->nelem + IPPOD_PAGES_MAX - 1) - >> IPPOD_PAGES_SHIFT; - CTR2(KTR_CXGBE, - "tdev, ddp %d + %d.\n", i, npods); - free(gl, M_DEVBUF); - i += npods; - } else - i++; - } - bus_dmamap_unload(ddp->ulp_ddp_tag, ddp->ulp_ddp_map); - cxgbei_ulp2_free_big_mem(ddp); + while (i < ci->nppods) { + struct cxgbei_ulp2_gather_list *gl = ci->gl_map[i]; + if (gl) { + int npods = (gl->nelem + IPPOD_PAGES_MAX - 1) + >> IPPOD_PAGES_SHIFT; + free(gl, M_DEVBUF); + i += npods; + } else + i++; } -} - -/** - * ddp_init - initialize the cxgb3/4 adapter's ddp resource - * @tdev_name: device name - * @tdev: device - * @ddp: adapter's ddp info - * @uinfo: adapter's iscsi info - * initialize the ddp pagepod manager for a given adapter - */ -static void -ddp_init(void *tdev, - struct cxgbei_ulp2_ddp_info **ddp_pp, - struct ulp_iscsi_info *uinfo) -{ - struct cxgbei_ulp2_ddp_info *ddp = *ddp_pp; - unsigned int ppmax, bits; - int i, rc; - - if (uinfo->ulimit <= uinfo->llimit) { - printf("%s: tdev, ddp 0x%x >= 0x%x.\n", - __func__, uinfo->llimit, uinfo->ulimit); - return; - } - if (ddp) { - atomic_add_acq_int(&ddp->refcnt, 1); - CTR2(KTR_CXGBE, "tdev, ddp 0x%p already set up, %d.\n", - ddp, atomic_load_acq_int(&ddp->refcnt)); - return; - } - - ppmax = (uinfo->ulimit - uinfo->llimit + 1) >> IPPOD_SIZE_SHIFT; - if (ppmax <= 1024) { - CTR3(KTR_CXGBE, "tdev, ddp 0x%x ~ 0x%x, nppod %u < 1K.\n", - uinfo->llimit, uinfo->ulimit, ppmax); - return; - } - bits = (fls(ppmax) - 1) + 1; - - if (bits > IPPOD_IDX_MAX_SIZE) - bits = IPPOD_IDX_MAX_SIZE; - ppmax = (1 << (bits - 1)) - 1; - - ddp = cxgbei_ulp2_alloc_big_mem(sizeof(struct cxgbei_ulp2_ddp_info) + - ppmax * (sizeof(struct cxgbei_ulp2_gather_list *) + - sizeof(unsigned char))); - if (ddp == NULL) { - CTR1(KTR_CXGBE, "unable to alloc ddp 0x%d, ddp disabled.\n", - ppmax); - return; - } - ddp->colors = (unsigned char *)(ddp + 1); - ddp->gl_map = (struct cxgbei_ulp2_gather_list **)(ddp->colors + - ppmax * sizeof(unsigned char)); - *ddp_pp = ddp; - - mtx_init(&ddp->map_lock, "ddp lock", NULL, - MTX_DEF | MTX_DUPOK| MTX_RECURSE); - - atomic_set_acq_int(&ddp->refcnt, 1); - - /* dma_tag create */ - rc = ulp2_dma_tag_create(ddp); - if (rc) { - printf("%s: unable to alloc ddp 0x%d, ddp disabled.\n", - __func__, ppmax); - return; - } - - ddp->tdev = tdev; - ddp->max_txsz = min(uinfo->max_txsz, ULP2_MAX_PKT_SIZE); - ddp->max_rxsz = min(uinfo->max_rxsz, ULP2_MAX_PKT_SIZE); - ddp->llimit = uinfo->llimit; - ddp->ulimit = uinfo->ulimit; - ddp->nppods = ppmax; - ddp->idx_last = ppmax; - ddp->idx_bits = bits; - ddp->idx_mask = (1 << bits) - 1; - ddp->rsvd_tag_mask = (1 << (bits + IPPOD_IDX_SHIFT)) - 1; - - CTR2(KTR_CXGBE, - "gl map 0x%p, idx_last %u.\n", ddp->gl_map, ddp->idx_last); - uinfo->tagmask = ddp->idx_mask << IPPOD_IDX_SHIFT; - for (i = 0; i < DDP_PGIDX_MAX; i++) - uinfo->pgsz_factor[i] = ddp_page_order[i]; - uinfo->ulimit = uinfo->llimit + (ppmax << IPPOD_SIZE_SHIFT); - - printf("nppods %u, bits %u, mask 0x%x,0x%x pkt %u/%u," - " %u/%u.\n", - ppmax, ddp->idx_bits, ddp->idx_mask, - ddp->rsvd_tag_mask, ddp->max_txsz, uinfo->max_txsz, - ddp->max_rxsz, uinfo->max_rxsz); - - rc = bus_dmamap_create(ddp->ulp_ddp_tag, 0, &ddp->ulp_ddp_map); - if (rc != 0) { - printf("%s: bus_dmamap_Create failed\n", __func__); - return; - } -} - -/** - * cxgbei_ulp2_ddp_init - initialize ddp functions - */ -void -cxgbei_ulp2_ddp_init(void *tdev, - struct cxgbei_ulp2_ddp_info **ddp_pp, - struct ulp_iscsi_info *uinfo) -{ - if (page_idx == DDP_PGIDX_MAX) { - page_idx = cxgbei_ulp2_ddp_find_page_index(PAGE_SIZE); - - if (page_idx == DDP_PGIDX_MAX) { - if (cxgbei_ulp2_ddp_adjust_page_table()) { - CTR1(KTR_CXGBE, "PAGE_SIZE %x, ddp disabled.\n", - PAGE_SIZE); - return; - } - } - page_idx = cxgbei_ulp2_ddp_find_page_index(PAGE_SIZE); - } - - ddp_init(tdev, ddp_pp, uinfo); + free(ci->colors, M_CXGBE); + free(ci->gl_map, M_CXGBE); } Index: projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei_ulp2_ddp.h =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei_ulp2_ddp.h (revision 279896) +++ projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/cxgbei_ulp2_ddp.h (revision 279897) @@ -1,348 +1,214 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * * Chelsio T5xx iSCSI driver * cxgbei_ulp2_ddp.c: Chelsio iSCSI DDP Manager. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef __CXGBEI_ULP2_DDP_H__ #define __CXGBEI_ULP2_DDP_H__ -#include -#include -#include - -#include -#include -#include - -/* - * Structure used to return information to the iscsi layer. - */ -struct ulp_iscsi_info { - unsigned int offset; - unsigned int llimit; - unsigned int ulimit; - unsigned int tagmask; - unsigned char pgsz_factor[4]; - unsigned int max_rxsz; - unsigned int max_txsz; -}; - -/* - * struct cxgbei_ulp2_tag_format - cxgbei ulp tag format for an iscsi entity - * - * @sw_bits: # of bits used by iscsi software layer - * @rsvd_bits: # of bits used by h/w - * @rsvd_shift: h/w bits shift left - * @rsvd_mask: reserved bit mask - */ -typedef struct cxgbei_ulp2_tag_format { - unsigned char sw_bits; - unsigned char rsvd_bits; - unsigned char rsvd_shift; - unsigned char filler[1]; - uint32_t rsvd_mask; -}cxgbei_ulp2_tag_format; - #define CXGBEI_PAGE_MASK (~(PAGE_SIZE-1)) #define DDP_THRESHOLD 2048 /* * cxgbei ddp tag are 32 bits, it consists of reserved bits used by h/w and * non-reserved bits that can be used by the iscsi s/w. * The reserved bits are identified by the rsvd_bits and rsvd_shift fields * in struct cxgbei_ulp2_tag_format. * * The upper most reserved bit can be used to check if a tag is ddp tag or not: * if the bit is 0, the tag is a valid ddp tag */ /* * cxgbei_ulp2_is_ddp_tag - check if a given tag is a hw/ddp tag * @tformat: tag format information * @tag: tag to be checked * * return true if the tag is a ddp tag, false otherwise. */ static inline int cxgbei_ulp2_is_ddp_tag(struct cxgbei_ulp2_tag_format *tformat, uint32_t tag) { - return !(tag & (1 << (tformat->rsvd_bits + tformat->rsvd_shift - 1))); + + return (!(tag & (1 << (tformat->rsvd_bits + tformat->rsvd_shift - 1)))); } /* * cxgbei_ulp2_sw_tag_usable - check if s/w tag has enough bits left for hw bits * @tformat: tag format information * @sw_tag: s/w tag to be checked * * return true if the tag can be used for hw ddp tag, false otherwise. */ static inline int cxgbei_ulp2_sw_tag_usable(struct cxgbei_ulp2_tag_format *tformat, - uint32_t sw_tag) + uint32_t sw_tag) { - return 1; + return (1); /* XXXNP: huh? */ + sw_tag >>= (32 - tformat->rsvd_bits + tformat->rsvd_shift); return !sw_tag; } /* * cxgbei_ulp2_set_non_ddp_tag - mark a given s/w tag as an invalid ddp tag * @tformat: tag format information * @sw_tag: s/w tag to be checked * * insert 1 at the upper most reserved bit to mark it as an invalid ddp tag. */ static inline uint32_t cxgbei_ulp2_set_non_ddp_tag(struct cxgbei_ulp2_tag_format *tformat, uint32_t sw_tag) { uint32_t rsvd_bits = tformat->rsvd_bits + tformat->rsvd_shift; if (sw_tag) { u32 v1 = sw_tag & ((1 << (rsvd_bits - 1)) - 1); u32 v2 = (sw_tag >> (rsvd_bits - 1)) << rsvd_bits; return v2 | (1 << (rsvd_bits - 1)) | v1; } return sw_tag | (1 << (rsvd_bits - 1)) ; } struct dma_segments { bus_dmamap_t bus_map; bus_addr_t phys_addr; }; /* * struct cxgbei_ulp2_gather_list - cxgbei direct data placement memory * * @tag: ddp tag * @length: total data buffer length * @offset: initial offset to the 1st page * @nelem: # of pages * @pages: page pointers * @phys_addr: physical address */ struct cxgbei_ulp2_gather_list { uint32_t tag; uint32_t tid; uint32_t port_id; void *egress_dev; unsigned int length; unsigned int offset; unsigned int nelem; bus_size_t mapsize; bus_dmamap_t bus_map; bus_dma_segment_t *segments; void **pages; struct dma_segments dma_sg[0]; }; -struct cxgbei_ulp2_pagepod_hdr; -/* - * struct cxgbei_ulp2_ddp_info - direct data placement for pdu payload - * - * @list: list head to link elements - * @refcnt: count of iscsi entities using it - * @tdev: pointer to tXcdev used by cxgbX driver - * @max_txsz: max tx packet size for ddp - * @max_rxsz: max rx packet size for ddp - * @llimit: lower bound of the page pod memory - * @ulimit: upper bound of the page pod memory - * @nppods: # of page pod entries - * @idx_last: page pod entry last used - * @idx_bits: # of bits the pagepod index would take - * @idx_mask: pagepod index mask - * @rsvd_tag_mask: tag mask - * @map_lock: lock to synchonize access to the page pod map - * @gl_map: ddp memory gather list - */ -struct cxgbei_ulp2_ddp_info { - SLIST_ENTRY(cxgbei_ulp2_ddp_info) cxgbei_ulp2_ddp_list; - volatile int refcnt; - void *tdev; /* t5odev */ - unsigned int max_txsz; - unsigned int max_rxsz; - unsigned int llimit; - unsigned int ulimit; - unsigned int nppods; - unsigned int idx_last; - unsigned char idx_bits; - unsigned char filler[3]; - uint32_t idx_mask; - uint32_t rsvd_tag_mask; - bus_addr_t rsvd_page_phys_addr; - - int (*ddp_set_map)(struct cxgbei_ulp2_ddp_info *ddp, - void *isock, - struct cxgbei_ulp2_pagepod_hdr *hdr, - unsigned int idx, unsigned int npods, - struct cxgbei_ulp2_gather_list *gl, int reply); - void (*ddp_clear_map)(struct cxgbei_ulp2_ddp_info *ddp, - struct cxgbei_ulp2_gather_list *gl, - unsigned int tag, unsigned int idx, - unsigned int npods, - iscsi_socket *isock); - - struct mtx map_lock; - bus_dma_tag_t ulp_ddp_tag; - bus_dmamap_t ulp_ddp_map; - unsigned char *colors; - struct cxgbei_ulp2_gather_list **gl_map; -}; - #define IPPOD_SIZE sizeof(struct cxgbei_ulp2_pagepod) /* 64 */ #define IPPOD_SIZE_SHIFT 6 #define IPPOD_COLOR_SHIFT 0 #define IPPOD_COLOR_SIZE 6 #define IPPOD_COLOR_MASK ((1 << IPPOD_COLOR_SIZE) - 1) #define IPPOD_IDX_SHIFT IPPOD_COLOR_SIZE #define IPPOD_IDX_MAX_SIZE 24 #define S_IPPOD_TID 0 #define M_IPPOD_TID 0xFFFFFF #define V_IPPOD_TID(x) ((x) << S_IPPOD_TID) #define S_IPPOD_VALID 24 #define V_IPPOD_VALID(x) ((x) << S_IPPOD_VALID) #define F_IPPOD_VALID V_IPPOD_VALID(1U) #define S_IPPOD_COLOR 0 #define M_IPPOD_COLOR 0x3F #define V_IPPOD_COLOR(x) ((x) << S_IPPOD_COLOR) #define S_IPPOD_TAG 6 #define M_IPPOD_TAG 0xFFFFFF #define V_IPPOD_TAG(x) ((x) << S_IPPOD_TAG) #define S_IPPOD_PGSZ 30 #define M_IPPOD_PGSZ 0x3 #define V_IPPOD_PGSZ(x) ((x) << S_IPPOD_PGSZ) static inline uint32_t -cxgbei_ulp2_ddp_tag_base(unsigned int idx, struct cxgbei_ulp2_ddp_info *ddp, - struct cxgbei_ulp2_tag_format *tformat, uint32_t sw_tag) +cxgbei_ulp2_ddp_tag_base(u_int idx, u_char *colors, + struct cxgbei_ulp2_tag_format *tformat, uint32_t sw_tag) { - ddp->colors[idx]++; - if (ddp->colors[idx] == (1 << IPPOD_IDX_SHIFT)) - ddp->colors[idx] = 0; + if (__predict_false(++colors[idx] == 1 << IPPOD_IDX_SHIFT)) + colors[idx] = 0; - sw_tag <<= (tformat->rsvd_bits + tformat->rsvd_shift); + sw_tag <<= tformat->rsvd_bits + tformat->rsvd_shift; - return sw_tag | (idx << 6) | ddp->colors[idx]; + return (sw_tag | idx << IPPOD_IDX_SHIFT | colors[idx]); } #define ISCSI_PDU_NONPAYLOAD_LEN 312 /* bhs(48) + ahs(256) + digest(8) */ /* * align pdu size to multiple of 512 for better performance */ #define cxgbei_align_pdu_size(n) do { n = (n) & (~511); } while (0) #define ULP2_MAX_PKT_SIZE 16224 #define ULP2_MAX_PDU_PAYLOAD (ULP2_MAX_PKT_SIZE - ISCSI_PDU_NONPAYLOAD_LEN) #define IPPOD_PAGES_MAX 4 #define IPPOD_PAGES_SHIFT 2 /* 4 pages per pod */ /* * struct pagepod_hdr, pagepod - pagepod format */ struct cxgbei_ulp2_pagepod_hdr { uint32_t vld_tid; uint32_t pgsz_tag_clr; uint32_t maxoffset; uint32_t pgoffset; uint64_t rsvd; }; struct cxgbei_ulp2_pagepod { struct cxgbei_ulp2_pagepod_hdr hdr; uint64_t addr[IPPOD_PAGES_MAX + 1]; }; -/* - * ddp page size array - */ -#define DDP_PGIDX_MAX 4 -extern unsigned char ddp_page_order[DDP_PGIDX_MAX]; -extern unsigned char page_idx; +int cxgbei_ulp2_ddp_tag_reserve(struct cxgbei_data *, void *, unsigned int, + struct cxgbei_ulp2_tag_format *, uint32_t *, + struct cxgbei_ulp2_gather_list *, int , int ); +void cxgbei_ulp2_ddp_tag_release(struct cxgbei_data *, uint32_t, + struct iscsi_socket *); +struct cxgbei_ulp2_gather_list *cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec(u_int, + struct cxgbei_sgl *, u_int, struct cxgbei_data *, int); +void cxgbei_ulp2_ddp_release_gl(struct cxgbei_data *, + struct cxgbei_ulp2_gather_list *); -/* - * large memory chunk allocation/release - * use vmalloc() if kmalloc() fails - */ -static inline void * -cxgbei_ulp2_alloc_big_mem(unsigned int size) -{ - void *p = NULL; +int cxgbei_ulp2_ddp_find_page_index(u_long); +int cxgbei_ulp2_adapter_ddp_info(struct cxgbei_data *, + struct cxgbei_ulp2_tag_format *); - p = malloc(size, M_TEMP, M_NOWAIT | M_ZERO); - - return p; -} - -static inline void -cxgbei_ulp2_free_big_mem(void *addr) -{ - free(addr, M_TEMP); -} - -int cxgbei_ulp2_ddp_tag_reserve(struct cxgbei_ulp2_ddp_info *, - void *, unsigned int , - struct cxgbei_ulp2_tag_format *, uint32_t *, - struct cxgbei_ulp2_gather_list *, int , int ); -void cxgbei_ulp2_ddp_tag_release(struct cxgbei_ulp2_ddp_info *, - uint32_t, iscsi_socket *); - -struct cxgbei_ulp2_gather_list *cxgbei_ulp2_ddp_make_gl(unsigned int , - struct sglist *, - unsigned int , - struct pci_conf *, - int); - -struct cxgbei_ulp2_gather_list *cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec( - unsigned int, - cxgbei_sgl *, - unsigned int, - void *, - int); - -void cxgbei_ulp2_ddp_release_gl(struct cxgbei_ulp2_gather_list *, void *); - -int cxgbei_ulp2_ddp_find_page_index(unsigned long); -int cxgbei_ulp2_adapter_ddp_info(struct cxgbei_ulp2_ddp_info *, - struct cxgbei_ulp2_tag_format *, - unsigned int *, unsigned int *); - -void cxgbei_ulp2_ddp_cleanup(struct cxgbei_ulp2_ddp_info **); -void cxgbei_ulp2_ddp_init(void *, - struct cxgbei_ulp2_ddp_info **, - struct ulp_iscsi_info *); -int cxgbei_ulp2_init(void); -void cxgbei_ulp2_exit(void); +void cxgbei_ddp_cleanup(struct cxgbei_data *); #endif Index: projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/icl_cxgbei.c =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/icl_cxgbei.c (revision 279896) +++ projects/cxl_iscsi/sys/dev/cxgbe/cxgbei/icl_cxgbei.c (revision 279897) @@ -1,819 +1,821 @@ /*- * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * This software was developed by Edward Tomasz Napierala under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * cxgbei implementation of iSCSI Common Layer kobj(9) interface. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include "cxgbei.h" SYSCTL_NODE(_kern_icl, OID_AUTO, cxgbei, CTLFLAG_RD, 0, "Chelsio iSCSI offload"); static int coalesce = 1; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, coalesce, CTLFLAG_RWTUN, &coalesce, 0, "Try to coalesce PDUs before sending"); static int partial_receive_len = 128 * 1024; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, &partial_receive_len, 0, "Minimum read size for partially received " "data segment"); static int sendspace = 1048576; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, sendspace, CTLFLAG_RWTUN, &sendspace, 0, "Default send socket buffer size"); static int recvspace = 1048576; SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, recvspace, CTLFLAG_RWTUN, &recvspace, 0, "Default receive socket buffer size"); static MALLOC_DEFINE(M_ICL_CXGBEI, "icl_cxgbei", "iSCSI software backend"); static uma_zone_t icl_pdu_zone; static uma_zone_t icl_transfer_zone; static volatile u_int icl_ncons; #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) STAILQ_HEAD(icl_pdu_stailq, icl_pdu); static icl_conn_new_pdu_t icl_cxgbei_conn_new_pdu; static icl_conn_pdu_free_t icl_cxgbei_conn_pdu_free; static icl_conn_pdu_data_segment_length_t icl_cxgbei_conn_pdu_data_segment_length; static icl_conn_pdu_append_data_t icl_cxgbei_conn_pdu_append_data; static icl_conn_pdu_get_data_t icl_cxgbei_conn_pdu_get_data; static icl_conn_pdu_queue_t icl_cxgbei_conn_pdu_queue; static icl_conn_handoff_t icl_cxgbei_conn_handoff; static icl_conn_free_t icl_cxgbei_conn_free; static icl_conn_close_t icl_cxgbei_conn_close; static icl_conn_connected_t icl_cxgbei_conn_connected; static icl_conn_task_setup_t icl_cxgbei_conn_task_setup; static icl_conn_task_done_t icl_cxgbei_conn_task_done; static icl_conn_transfer_setup_t icl_cxgbei_conn_transfer_setup; static icl_conn_transfer_done_t icl_cxgbei_conn_transfer_done; static kobj_method_t icl_cxgbei_methods[] = { KOBJMETHOD(icl_conn_new_pdu, icl_cxgbei_conn_new_pdu), KOBJMETHOD(icl_conn_pdu_free, icl_cxgbei_conn_pdu_free), KOBJMETHOD(icl_conn_pdu_data_segment_length, icl_cxgbei_conn_pdu_data_segment_length), KOBJMETHOD(icl_conn_pdu_append_data, icl_cxgbei_conn_pdu_append_data), KOBJMETHOD(icl_conn_pdu_get_data, icl_cxgbei_conn_pdu_get_data), KOBJMETHOD(icl_conn_pdu_queue, icl_cxgbei_conn_pdu_queue), KOBJMETHOD(icl_conn_handoff, icl_cxgbei_conn_handoff), KOBJMETHOD(icl_conn_free, icl_cxgbei_conn_free), KOBJMETHOD(icl_conn_close, icl_cxgbei_conn_close), KOBJMETHOD(icl_conn_connected, icl_cxgbei_conn_connected), KOBJMETHOD(icl_conn_task_setup, icl_cxgbei_conn_task_setup), KOBJMETHOD(icl_conn_task_done, icl_cxgbei_conn_task_done), KOBJMETHOD(icl_conn_transfer_setup, icl_cxgbei_conn_transfer_setup), KOBJMETHOD(icl_conn_transfer_done, icl_cxgbei_conn_transfer_done), { 0, 0 } }; DEFINE_CLASS(icl_cxgbei, icl_cxgbei_methods, sizeof(struct icl_conn)); struct icl_pdu * icl_pdu_new_empty(struct icl_conn *ic, int flags); void icl_pdu_free(struct icl_pdu *ip); struct icl_pdu * icl_pdu_new_empty(struct icl_conn *ic, int flags) { struct icl_pdu *ip; #ifdef DIAGNOSTIC refcount_acquire(&ic->ic_outstanding_pdus); #endif ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO); if (ip == NULL) { ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); #ifdef DIAGNOSTIC refcount_release(&ic->ic_outstanding_pdus); #endif return (NULL); } ip->ip_conn = ic; return (ip); } void icl_pdu_free(struct icl_pdu *ip) { struct icl_conn *ic; ic = ip->ip_conn; m_freem(ip->ip_bhs_mbuf); m_freem(ip->ip_ahs_mbuf); m_freem(ip->ip_data_mbuf); uma_zfree(icl_pdu_zone, ip); #ifdef DIAGNOSTIC refcount_release(&ic->ic_outstanding_pdus); #endif } void icl_cxgbei_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) { icl_pdu_free(ip); } /* * Allocate icl_pdu with empty BHS to fill up by the caller. */ struct icl_pdu * icl_cxgbei_conn_new_pdu(struct icl_conn *ic, int flags) { struct icl_pdu *ip; ip = icl_pdu_new_empty(ic, flags); if (ip == NULL) return (NULL); ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs), flags, MT_DATA, M_PKTHDR); if (ip->ip_bhs_mbuf == NULL) { ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); icl_pdu_free(ip); return (NULL); } ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); return (ip); } static size_t icl_pdu_data_segment_length(const struct icl_pdu *request) { uint32_t len = 0; len += request->ip_bhs->bhs_data_segment_len[0]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[1]; len <<= 8; len += request->ip_bhs->bhs_data_segment_len[2]; return (len); } size_t icl_cxgbei_conn_pdu_data_segment_length(struct icl_conn *ic, const struct icl_pdu *request) { return (icl_pdu_data_segment_length(request)); } static void icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) { response->ip_bhs->bhs_data_segment_len[2] = len; response->ip_bhs->bhs_data_segment_len[1] = len >> 8; response->ip_bhs->bhs_data_segment_len[0] = len >> 16; } static size_t icl_pdu_padding(const struct icl_pdu *ip) { if ((ip->ip_data_len % 4) != 0) return (4 - (ip->ip_data_len % 4)); return (0); } static size_t icl_pdu_size(const struct icl_pdu *response) { size_t len; KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); len = sizeof(struct iscsi_bhs) + response->ip_data_len + icl_pdu_padding(response); return (len); } static uint32_t icl_conn_build_tasktag(struct icl_conn *ic, uint32_t tag) { return tag; } static int icl_soupcall_receive(struct socket *so, void *arg, int waitflag) { struct icl_conn *ic; if (!soreadable(so)) return (SU_OK); ic = arg; cv_signal(&ic->ic_receive_cv); return (SU_OK); } static int icl_pdu_finalize(struct icl_pdu *request) { size_t padding, pdu_len; uint32_t zero = 0; int ok; struct icl_conn *ic; ic = request->ip_conn; icl_pdu_set_data_segment_length(request, request->ip_data_len); pdu_len = icl_pdu_size(request); if (request->ip_data_len != 0) { padding = icl_pdu_padding(request); if (padding > 0) { ok = m_append(request->ip_data_mbuf, padding, (void *)&zero); if (ok != 1) { ICL_WARN("failed to append padding"); return (1); } } m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); request->ip_data_mbuf = NULL; } request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; return (0); } static int icl_soupcall_send(struct socket *so, void *arg, int waitflag) { struct icl_conn *ic; if (!sowriteable(so)) return (SU_OK); ic = arg; ICL_CONN_LOCK(ic); ic->ic_check_send_space = true; ICL_CONN_UNLOCK(ic); cv_signal(&ic->ic_send_cv); return (SU_OK); } static int icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len, int flags) { struct mbuf *mb, *newmb; size_t copylen, off = 0; KASSERT(len > 0, ("len == 0")); newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR); if (newmb == NULL) { ICL_WARN("failed to allocate mbuf for %zd bytes", len); return (ENOMEM); } for (mb = newmb; mb != NULL; mb = mb->m_next) { copylen = min(M_TRAILINGSPACE(mb), len - off); memcpy(mtod(mb, char *), (const char *)addr + off, copylen); mb->m_len = copylen; off += copylen; } KASSERT(off == len, ("%s: off != len", __func__)); if (request->ip_data_mbuf == NULL) { request->ip_data_mbuf = newmb; request->ip_data_len = len; } else { m_cat(request->ip_data_mbuf, newmb); request->ip_data_len += len; } return (0); } int icl_cxgbei_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, const void *addr, size_t len, int flags) { return (icl_pdu_append_data(request, addr, len, flags)); } static void icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len) { /* data is DDP'ed, no need to copy */ if (ip->ip_ofld_prv0) return; m_copydata(ip->ip_data_mbuf, off, len, addr); } void icl_cxgbei_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, size_t off, void *addr, size_t len) { return (icl_pdu_get_data(ip, off, addr, len)); } static void icl_pdu_queue(struct icl_pdu *ip) { struct icl_conn *ic; ic = ip->ip_conn; ICL_CONN_LOCK_ASSERT(ic); if (ic->ic_disconnecting || ic->ic_socket == NULL) { ICL_DEBUG("icl_pdu_queue on closed connection"); icl_pdu_free(ip); return; } icl_pdu_finalize(ip); cxgbei_conn_xmit_pdu(ic, ip); } void icl_cxgbei_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) { icl_pdu_queue(ip); } static struct icl_conn * icl_cxgbei_new_conn(const char *name, struct mtx *lock) { struct icl_conn *ic; refcount_acquire(&icl_ncons); ic = (struct icl_conn *)kobj_create(&icl_cxgbei_class, M_ICL_CXGBEI, M_WAITOK | M_ZERO); STAILQ_INIT(&ic->ic_to_send); ic->ic_lock = lock; cv_init(&ic->ic_send_cv, "icl_tx"); cv_init(&ic->ic_receive_cv, "icl_rx"); #ifdef DIAGNOSTIC refcount_init(&ic->ic_outstanding_pdus, 0); #endif ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH; ic->ic_name = name; ic->ic_offload = strdup("cxgbei", M_TEMP);; return (ic); } void icl_cxgbei_conn_free(struct icl_conn *ic) { cv_destroy(&ic->ic_send_cv); cv_destroy(&ic->ic_receive_cv); kobj_delete((struct kobj *)ic, M_ICL_CXGBEI); refcount_release(&icl_ncons); } +/* XXXNP: what is this for? There's no conn_start method. */ static int icl_conn_start(struct icl_conn *ic) { size_t minspace; struct sockopt opt; int error, one = 1; ICL_CONN_LOCK(ic); /* * XXX: Ugly hack. */ if (ic->ic_socket == NULL) { ICL_CONN_UNLOCK(ic); return (EINVAL); } ic->ic_receive_state = ICL_CONN_STATE_BHS; ic->ic_receive_len = sizeof(struct iscsi_bhs); ic->ic_disconnecting = false; ICL_CONN_UNLOCK(ic); /* * For sendspace, this is required because the current code cannot * send a PDU in pieces; thus, the minimum buffer size is equal * to the maximum PDU size. "+4" is to account for possible padding. * * What we should actually do here is to use autoscaling, but set * some minimal buffer size to "minspace". I don't know a way to do * that, though. */ minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; if (sendspace < minspace) { ICL_WARN("kern.icl.sendspace too low; must be at least %zd", minspace); sendspace = minspace; } if (recvspace < minspace) { ICL_WARN("kern.icl.recvspace too low; must be at least %zd", minspace); recvspace = minspace; } error = soreserve(ic->ic_socket, sendspace, recvspace); if (error != 0) { ICL_WARN("soreserve failed with error %d", error); icl_cxgbei_conn_close(ic); return (error); } ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; /* * Disable Nagle. */ bzero(&opt, sizeof(opt)); opt.sopt_dir = SOPT_SET; opt.sopt_level = IPPROTO_TCP; opt.sopt_name = TCP_NODELAY; opt.sopt_val = &one; opt.sopt_valsize = sizeof(one); error = sosetopt(ic->ic_socket, &opt); if (error != 0) { ICL_WARN("disabling TCP_NODELAY failed with error %d", error); icl_cxgbei_conn_close(ic); return (error); } /* * Register socket upcall, to get notified about incoming PDUs * and free space to send outgoing ones. */ SOCKBUF_LOCK(&ic->ic_socket->so_snd); soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic); SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); SOCKBUF_LOCK(&ic->ic_socket->so_rcv); soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic); SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); return (0); } int icl_cxgbei_conn_handoff(struct icl_conn *ic, int fd) { struct file *fp; struct socket *so; cap_rights_t rights; int error; ICL_CONN_LOCK_ASSERT_NOT(ic); /* * Steal the socket from userland. */ error = fget(curthread, fd, cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); if (error != 0) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, curthread); return (EINVAL); } so = fp->f_data; if (so->so_type != SOCK_STREAM) { fdrop(fp, curthread); return (EINVAL); } ICL_CONN_LOCK(ic); if (ic->ic_socket != NULL) { ICL_CONN_UNLOCK(ic); fdrop(fp, curthread); return (EBUSY); } ic->ic_socket = fp->f_data; fp->f_ops = &badfileops; fp->f_data = NULL; fdrop(fp, curthread); ICL_CONN_UNLOCK(ic); error = icl_conn_start(ic); - if(!error) { - cxgbei_conn_set_ulp_mode(ic->ic_socket, ic); - } + if (!error) + cxgbei_conn_handoff(ic); return (error); } void icl_cxgbei_conn_close(struct icl_conn *ic) { struct icl_pdu *pdu; ICL_CONN_LOCK_ASSERT_NOT(ic); ICL_CONN_LOCK(ic); if (ic->ic_socket == NULL) { ICL_CONN_UNLOCK(ic); return; } /* * Deregister socket upcalls. */ ICL_CONN_UNLOCK(ic); SOCKBUF_LOCK(&ic->ic_socket->so_snd); if (ic->ic_socket->so_snd.sb_upcall != NULL) soupcall_clear(ic->ic_socket, SO_SND); SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); SOCKBUF_LOCK(&ic->ic_socket->so_rcv); if (ic->ic_socket->so_rcv.sb_upcall != NULL) soupcall_clear(ic->ic_socket, SO_RCV); SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); ICL_CONN_LOCK(ic); ic->ic_disconnecting = true; /* * Wake up the threads, so they can properly terminate. */ while (ic->ic_receive_running || ic->ic_send_running) { //ICL_DEBUG("waiting for send/receive threads to terminate"); cv_signal(&ic->ic_receive_cv); cv_signal(&ic->ic_send_cv); cv_wait(&ic->ic_send_cv, ic->ic_lock); } //ICL_DEBUG("send/receive threads terminated"); ICL_CONN_UNLOCK(ic); - cxgbei_conn_close(ic->ic_socket); + cxgbei_conn_close(ic); soclose(ic->ic_socket); ICL_CONN_LOCK(ic); ic->ic_socket = NULL; if (ic->ic_receive_pdu != NULL) { //ICL_DEBUG("freeing partially received PDU"); icl_pdu_free(ic->ic_receive_pdu); ic->ic_receive_pdu = NULL; } /* * Remove any outstanding PDUs from the send queue. */ while (!STAILQ_EMPTY(&ic->ic_to_send)) { pdu = STAILQ_FIRST(&ic->ic_to_send); STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next); icl_pdu_free(pdu); } KASSERT(STAILQ_EMPTY(&ic->ic_to_send), ("destroying session with non-empty send queue")); #ifdef DIAGNOSTIC KASSERT(ic->ic_outstanding_pdus == 0, ("destroying session with %d outstanding PDUs", ic->ic_outstanding_pdus)); #endif ICL_CONN_UNLOCK(ic); } bool icl_cxgbei_conn_connected(struct icl_conn *ic) { ICL_CONN_LOCK_ASSERT_NOT(ic); ICL_CONN_LOCK(ic); if (ic->ic_socket == NULL) { ICL_CONN_UNLOCK(ic); return (false); } if (ic->ic_socket->so_error != 0) { ICL_CONN_UNLOCK(ic); return (false); } ICL_CONN_UNLOCK(ic); return (true); } int icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) { void *prv; *task_tagp = icl_conn_build_tasktag(ic, *task_tagp); prv = uma_zalloc(icl_transfer_zone, M_NOWAIT | M_ZERO); if (prv == NULL) return (ENOMEM); *prvp = prv; cxgbei_conn_task_reserve_itt(ic, prvp, csio, task_tagp); return (0); } void icl_cxgbei_conn_task_done(struct icl_conn *ic, void *prv) { + cxgbei_cleanup_task(ic, prv); uma_zfree(icl_transfer_zone, prv); } int icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, uint32_t *transfer_tag, void **prvp) { void *prv; *transfer_tag = icl_conn_build_tasktag(ic, *transfer_tag); prv = uma_zalloc(icl_transfer_zone, M_NOWAIT | M_ZERO); if (prv == NULL) return (ENOMEM); *prvp = prv; cxgbei_conn_transfer_reserve_ttt(ic, prvp, io, transfer_tag); return (0); } void icl_cxgbei_conn_transfer_done(struct icl_conn *ic, void *prv) { cxgbei_cleanup_task(ic, prv); uma_zfree(icl_transfer_zone, prv); } static int icl_cxgbei_limits(size_t *limitp) { *limitp = 8 * 1024; return (0); } #ifdef ICL_KERNEL_PROXY int icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so) { int error; ICL_CONN_LOCK_ASSERT_NOT(ic); if (so->so_type != SOCK_STREAM) return (EINVAL); ICL_CONN_LOCK(ic); if (ic->ic_socket != NULL) { ICL_CONN_UNLOCK(ic); return (EBUSY); } ic->ic_socket = so; ICL_CONN_UNLOCK(ic); error = icl_conn_start(ic); return (error); } #endif /* ICL_KERNEL_PROXY */ static int icl_cxgbei_load(void) { int error; icl_pdu_zone = uma_zcreate("icl_pdu", sizeof(struct icl_pdu), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); icl_transfer_zone = uma_zcreate("icl_transfer", 16 * 1024, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); refcount_init(&icl_ncons, 0); /* * The reason we call this "none" is that to the user, * it's known as "offload driver"; "offload driver: soft" * doesn't make much sense. */ error = icl_register("cxgbei", 100, icl_cxgbei_limits, icl_cxgbei_new_conn); KASSERT(error == 0, ("failed to register")); return (error); } static int icl_cxgbei_unload(void) { if (icl_ncons != 0) return (EBUSY); icl_unregister("cxgbei"); uma_zdestroy(icl_pdu_zone); uma_zdestroy(icl_transfer_zone); return (0); } static int icl_cxgbei_modevent(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: return (icl_cxgbei_load()); case MOD_UNLOAD: return (icl_cxgbei_unload()); default: return (EINVAL); } } moduledata_t icl_cxgbei_data = { "icl_cxgbei", icl_cxgbei_modevent, 0 }; DECLARE_MODULE(icl_cxgbei, icl_cxgbei_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); MODULE_DEPEND(icl_cxgbei, icl, 1, 1, 1); MODULE_VERSION(icl_cxgbei, 1); Index: projects/cxl_iscsi/sys/dev/cxgbe/tom/t4_cpl_io.c =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/tom/t4_cpl_io.c (revision 279896) +++ projects/cxl_iscsi/sys/dev/cxgbe/tom/t4_cpl_io.c (revision 279897) @@ -1,1837 +1,1778 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_tcb.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" VNET_DECLARE(int, tcp_do_autosndbuf); #define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf) VNET_DECLARE(int, tcp_autosndbuf_inc); #define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc) VNET_DECLARE(int, tcp_autosndbuf_max); #define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max) VNET_DECLARE(int, tcp_do_autorcvbuf); #define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) VNET_DECLARE(int, tcp_autorcvbuf_inc); #define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc) VNET_DECLARE(int, tcp_autorcvbuf_max); #define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) /* * For ULP connections HW may add headers, e.g., for digests, that aren't part * of the messages sent by the host but that are part of the TCP payload and * therefore consume TCP sequence space. Tx connection parameters that * operate in TCP sequence space are affected by the HW additions and need to * compensate for them to accurately track TCP sequence numbers. This array * contains the compensating extra lengths for ULP packets. It is indexed by * a packet's ULP submode. */ const unsigned int t4_ulp_extra_len[] = {0, 4, 4, 8}; /* * Return the length of any HW additions that will be made to a Tx packet. * Such additions can happen for some types of ULP packets. */ static inline unsigned int ulp_extra_len(struct mbuf *m, int *ulp_mode) { struct m_tag *mtag; if ((mtag = m_tag_find(m, CXGBE_ISCSI_MBUF_TAG, NULL)) == NULL) return (0); *ulp_mode = *((int *)(mtag + 1)); return (t4_ulp_extra_len[*ulp_mode & 3]); } void send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp) { struct wrqe *wr; struct fw_flowc_wr *flowc; unsigned int nparams = ftxp ? 8 : 6, flowclen; struct port_info *pi = toep->port; struct adapter *sc = pi->adapter; unsigned int pfvf = G_FW_VIID_PFN(pi->viid) << S_FW_VIID_PFN; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT), ("%s: flowc for tid %u sent already", __func__, toep->tid)); flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } flowc = wrtod(wr); memset(flowc, 0, wr->wr_len); flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(nparams)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | V_FW_WR_FLOWID(toep->tid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; flowc->mnemval[0].val = htobe32(pfvf); flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; flowc->mnemval[1].val = htobe32(pi->tx_chan); flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; flowc->mnemval[2].val = htobe32(pi->tx_chan); flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; flowc->mnemval[3].val = htobe32(toep->ofld_rxq->iq.abs_id); if (ftxp) { uint32_t sndbuf = min(ftxp->snd_space, sc->tt.sndbuf); flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT; flowc->mnemval[4].val = htobe32(ftxp->snd_nxt); flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; flowc->mnemval[5].val = htobe32(ftxp->rcv_nxt); flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; flowc->mnemval[6].val = htobe32(sndbuf); flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[7].val = htobe32(ftxp->mss); CTR6(KTR_CXGBE, "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x", __func__, toep->tid, ftxp->mss, sndbuf, ftxp->snd_nxt, ftxp->rcv_nxt); } else { flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF; flowc->mnemval[4].val = htobe32(512); flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[5].val = htobe32(512); CTR2(KTR_CXGBE, "%s: tid %u", __func__, toep->tid); } txsd->tx_credits = howmany(flowclen, 16); txsd->plen = 0; KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, ("%s: not enough credits (%d)", __func__, toep->tx_credits)); toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; toep->flags |= TPF_FLOWC_WR_SENT; t4_wrq_tx(sc, wr); } void send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt) { struct wrqe *wr; struct cpl_abort_req *req; int tid = toep->tid; struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); /* don't use if INP_DROPPED */ INP_WLOCK_ASSERT(inp); CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s", __func__, toep->tid, inp->inp_flags & INP_DROPPED ? "inp dropped" : tcpstates[tp->t_state], toep->flags, inp->inp_flags, toep->flags & TPF_ABORT_SHUTDOWN ? " (abort already in progress)" : ""); if (toep->flags & TPF_ABORT_SHUTDOWN) return; /* abort already in progress */ toep->flags |= TPF_ABORT_SHUTDOWN; KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %d.", __func__, tid)); wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid); if (inp->inp_flags & INP_DROPPED) req->rsvd0 = htobe32(snd_nxt); else req->rsvd0 = htobe32(tp->snd_nxt); req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT); req->cmd = CPL_ABORT_SEND_RST; /* * XXX: What's the correct way to tell that the inp hasn't been detached * from its socket? Should I even be flushing the snd buffer here? */ if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { struct socket *so = inp->inp_socket; if (so != NULL) /* because I'm not sure. See comment above */ sbflush(&so->so_snd); } t4_l2t_send(sc, wr, toep->l2te); } /* * Called when a connection is established to translate the TCP options * reported by HW to FreeBSD's native format. */ static void assign_rxopt(struct tcpcb *tp, unsigned int opt) { struct toepcb *toep = tp->t_toe; struct inpcb *inp = tp->t_inpcb; struct adapter *sc = td_adapter(toep->td); int n; INP_LOCK_ASSERT(inp); if (inp->inp_inc.inc_flags & INC_ISIPV6) n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); else n = sizeof(struct ip) + sizeof(struct tcphdr); tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(opt)] - n; CTR4(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u)", __func__, toep->tid, G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)]); if (G_TCPOPT_TSTAMP(opt)) { tp->t_flags |= TF_RCVD_TSTMP; /* timestamps ok */ tp->ts_recent = 0; /* hmmm */ tp->ts_recent_age = tcp_ts_getticks(); tp->t_maxseg -= TCPOLEN_TSTAMP_APPA; } if (G_TCPOPT_SACK(opt)) tp->t_flags |= TF_SACK_PERMIT; /* should already be set */ else tp->t_flags &= ~TF_SACK_PERMIT; /* sack disallowed by peer */ if (G_TCPOPT_WSCALE_OK(opt)) tp->t_flags |= TF_RCVD_SCALE; /* Doing window scaling? */ if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == (TF_RCVD_SCALE | TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; tp->snd_scale = G_TCPOPT_SND_WSCALE(opt); } } /* * Completes some final bits of initialization for just established connections * and changes their state to TCPS_ESTABLISHED. * * The ISNs are from after the exchange of SYNs. i.e., the true ISN + 1. */ void make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn, uint16_t opt) { struct inpcb *inp = toep->inp; struct socket *so = inp->inp_socket; struct tcpcb *tp = intotcpcb(inp); long bufsize; uint32_t iss = be32toh(snd_isn) - 1; /* true ISS */ uint32_t irs = be32toh(rcv_isn) - 1; /* true IRS */ uint16_t tcpopt = be16toh(opt); struct flowc_tx_params ftxp; INP_WLOCK_ASSERT(inp); KASSERT(tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED, ("%s: TCP state %s", __func__, tcpstates[tp->t_state])); CTR4(KTR_CXGBE, "%s: tid %d, toep %p, inp %p", __func__, toep->tid, toep, inp); tp->t_state = TCPS_ESTABLISHED; tp->t_starttime = ticks; TCPSTAT_INC(tcps_connects); tp->irs = irs; tcp_rcvseqinit(tp); tp->rcv_wnd = toep->rx_credits << 10; tp->rcv_adv += tp->rcv_wnd; tp->last_ack_sent = tp->rcv_nxt; /* * If we were unable to send all rx credits via opt0, save the remainder * in rx_credits so that they can be handed over with the next credit * update. */ SOCKBUF_LOCK(&so->so_rcv); bufsize = select_rcv_wnd(so); SOCKBUF_UNLOCK(&so->so_rcv); toep->rx_credits = bufsize - tp->rcv_wnd; tp->iss = iss; tcp_sendseqinit(tp); tp->snd_una = iss + 1; tp->snd_nxt = iss + 1; tp->snd_max = iss + 1; assign_rxopt(tp, tcpopt); SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) bufsize = V_tcp_autosndbuf_max; else bufsize = sbspace(&so->so_snd); SOCKBUF_UNLOCK(&so->so_snd); ftxp.snd_nxt = tp->snd_nxt; ftxp.rcv_nxt = tp->rcv_nxt; ftxp.snd_space = bufsize; ftxp.mss = tp->t_maxseg; send_flowc_wr(toep, &ftxp); soisconnected(so); } static int send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits) { struct wrqe *wr; struct cpl_rx_data_ack *req; uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); KASSERT(credits >= 0, ("%s: %d credits", __func__, credits)); wr = alloc_wrqe(sizeof(*req), toep->ctrlq); if (wr == NULL) return (0); req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid); req->credit_dack = htobe32(dack | V_RX_CREDITS(credits)); t4_wrq_tx(sc, wr); return (credits); } void t4_rcvd(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_rcv; struct toepcb *toep = tp->t_toe; int credits; INP_WLOCK_ASSERT(inp); SOCKBUF_LOCK(sb); KASSERT(toep->sb_cc >= sbused(sb), ("%s: sb %p has more data (%d) than last time (%d).", __func__, sb, sbused(sb), toep->sb_cc)); if (toep->ulp_mode == ULP_MODE_ISCSI) { toep->rx_credits += toep->sb_cc; toep->sb_cc = 0; } else { toep->rx_credits += toep->sb_cc - sbused(sb); toep->sb_cc = sbused(sb); } credits = toep->rx_credits; SOCKBUF_UNLOCK(sb); if (credits > 0 && (credits + 16384 >= tp->rcv_wnd || credits >= 15 * 1024)) { credits = send_rx_credits(sc, toep, credits); SOCKBUF_LOCK(sb); toep->rx_credits -= credits; SOCKBUF_UNLOCK(sb); tp->rcv_wnd += credits; tp->rcv_adv += credits; } } /* * Close a connection by sending a CPL_CLOSE_CON_REQ message. */ static int close_conn(struct adapter *sc, struct toepcb *toep) { struct wrqe *wr; struct cpl_close_con_req *req; unsigned int tid = toep->tid; CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid, toep->flags & TPF_FIN_SENT ? ", IGNORED" : ""); if (toep->flags & TPF_FIN_SENT) return (0); KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %u.", __func__, tid)); wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr))); req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) | V_FW_WR_FLOWID(tid)); req->wr.wr_lo = cpu_to_be64(0); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); req->rsvd = 0; toep->flags |= TPF_FIN_SENT; toep->flags &= ~TPF_SEND_FIN; t4_l2t_send(sc, wr, toep->l2te); return (0); } #define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16) #define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16)) /* Maximum amount of immediate data we could stuff in a WR */ static inline int max_imm_payload(int tx_credits) { const int n = 2; /* Use only up to 2 desc for imm. data WR */ KASSERT(tx_credits >= 0 && tx_credits <= MAX_OFLD_TX_CREDITS, ("%s: %d credits", __func__, tx_credits)); if (tx_credits < MIN_OFLD_TX_CREDITS) return (0); if (tx_credits >= (n * EQ_ESIZE) / 16) return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr)); else return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr)); } /* Maximum number of SGL entries we could stuff in a WR */ static inline int max_dsgl_nsegs(int tx_credits) { int nseg = 1; /* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */ int sge_pair_credits = tx_credits - MIN_OFLD_TX_CREDITS; KASSERT(tx_credits >= 0 && tx_credits <= MAX_OFLD_TX_CREDITS, ("%s: %d credits", __func__, tx_credits)); if (tx_credits < MIN_OFLD_TX_CREDITS) return (0); nseg += 2 * (sge_pair_credits * 16 / 24); if ((sge_pair_credits * 16) % 24 == 16) nseg++; return (nseg); } static inline void write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen, unsigned int plen, uint8_t credits, int shove, int ulp_mode, int txalign) { struct fw_ofld_tx_data_wr *txwr = dst; unsigned int wr_ulp_mode; txwr->op_to_immdlen = htobe32(V_WR_OP(FW_OFLD_TX_DATA_WR) | V_FW_WR_IMMDLEN(immdlen)); txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) | V_FW_WR_LEN16(credits)); /* for iscsi, the mode & submode setting is per-packet */ if (toep->ulp_mode == ULP_MODE_ISCSI) wr_ulp_mode = V_FW_OFLD_TX_DATA_WR_ULPMODE(ulp_mode >> 4) | V_FW_OFLD_TX_DATA_WR_ULPSUBMODE(ulp_mode & 3); else wr_ulp_mode = V_FW_OFLD_TX_DATA_WR_ULPMODE(toep->ulp_mode); txwr->lsodisable_to_proxy = htobe32(wr_ulp_mode | V_FW_OFLD_TX_DATA_WR_URGENT(0) | /* XXX */ V_FW_OFLD_TX_DATA_WR_SHOVE(shove)); txwr->plen = htobe32(plen); if (txalign > 0) { struct tcpcb *tp = intotcpcb(toep->inp); if (plen < 2 * tp->t_maxseg || is_10G_port(toep->port)) txwr->lsodisable_to_proxy |= htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE); else txwr->lsodisable_to_proxy |= htobe32(F_FW_OFLD_TX_DATA_WR_ALIGNPLD | (tp->t_flags & TF_NODELAY ? 0 : F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE)); } } /* * Generate a DSGL from a starting mbuf. The total number of segments and the * maximum segments in any one mbuf are provided. */ static void write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n) { struct mbuf *m; struct ulptx_sgl *usgl = dst; int i, j, rc; struct sglist sg; struct sglist_seg segs[n]; KASSERT(nsegs > 0, ("%s: nsegs 0", __func__)); sglist_init(&sg, n, segs); usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | V_ULPTX_NSGE(nsegs)); i = -1; for (m = start; m != stop; m = m->m_next) { rc = sglist_append(&sg, mtod(m, void *), m->m_len); if (__predict_false(rc != 0)) panic("%s: sglist_append %d", __func__, rc); for (j = 0; j < sg.sg_nseg; i++, j++) { if (i < 0) { usgl->len0 = htobe32(segs[j].ss_len); usgl->addr0 = htobe64(segs[j].ss_paddr); } else { usgl->sge[i / 2].len[i & 1] = htobe32(segs[j].ss_len); usgl->sge[i / 2].addr[i & 1] = htobe64(segs[j].ss_paddr); } #ifdef INVARIANTS nsegs--; #endif } sglist_reset(&sg); } if (i & 1) usgl->sge[i / 2].len[1] = htobe32(0); KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p", __func__, nsegs, start, stop)); } /* * Max number of SGL entries an offload tx work request can have. This is 41 * (1 + 40) for a full 512B work request. * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40) */ #define OFLD_SGL_LEN (41) /* * Send data and/or a FIN to the peer. * * The socket's so_snd buffer consists of a stream of data starting with sb_mb * and linked together with m_next. sb_sndptr, if set, is the last mbuf that * was transmitted. * * drop indicates the number of bytes that should be dropped from the head of * the send buffer. It is an optimization that lets do_fw4_ack avoid creating * contention on the send buffer lock (before this change it used to do * sowwakeup and then t4_push_frames right after that when recovering from tx * stalls). When drop is set this function MUST drop the bytes and wake up any * writers. */ void t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop) { struct mbuf *sndptr, *m, *sb_sndptr; struct fw_ofld_tx_data_wr *txwr; struct wrqe *wr; u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_snd; int tx_credits, shove, compl, space, sowwakeup; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; INP_WLOCK_ASSERT(inp); KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); KASSERT(toep->ulp_mode == ULP_MODE_NONE || toep->ulp_mode == ULP_MODE_TCPDDP || toep->ulp_mode == ULP_MODE_RDMA, ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep)); /* * This function doesn't resume by itself. Someone else must clear the * flag and call this function. */ if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { KASSERT(drop == 0, ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); return; } do { tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); max_imm = max_imm_payload(tx_credits); max_nsegs = max_dsgl_nsegs(tx_credits); SOCKBUF_LOCK(sb); sowwakeup = drop; if (drop) { sbdrop_locked(sb, drop); drop = 0; } sb_sndptr = sb->sb_sndptr; sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb; plen = 0; nsegs = 0; max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ for (m = sndptr; m != NULL; m = m->m_next) { int n = sglist_count(mtod(m, void *), m->m_len); nsegs += n; plen += m->m_len; /* This mbuf sent us _over_ the nsegs limit, back out */ if (plen > max_imm && nsegs > max_nsegs) { nsegs -= n; plen -= m->m_len; if (plen == 0) { /* Too few credits */ toep->flags |= TPF_TX_SUSPENDED; if (sowwakeup) sowwakeup_locked(so); else SOCKBUF_UNLOCK(sb); SOCKBUF_UNLOCK_ASSERT(sb); return; } break; } if (max_nsegs_1mbuf < n) max_nsegs_1mbuf = n; sb_sndptr = m; /* new sb->sb_sndptr if all goes well */ /* This mbuf put us right at the max_nsegs limit */ if (plen > max_imm && nsegs == max_nsegs) { m = m->m_next; break; } } shove = m == NULL && !(tp->t_flags & TF_MORETOCOME); space = sbspace(sb); if (space <= sb->sb_hiwat * 3 / 8 && toep->plen_nocompl + plen >= sb->sb_hiwat / 4) compl = 1; else compl = 0; if (sb->sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf && sb->sb_hiwat < V_tcp_autosndbuf_max && space < sb->sb_hiwat / 8) { int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc, V_tcp_autosndbuf_max); if (!sbreserve_locked(sb, newsize, so, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; else sowwakeup = 1; /* room available */ } if (sowwakeup) sowwakeup_locked(so); else SOCKBUF_UNLOCK(sb); SOCKBUF_UNLOCK_ASSERT(sb); /* nothing to send */ if (plen == 0) { KASSERT(m == NULL, ("%s: nothing to send, but m != NULL", __func__)); break; } if (__predict_false(toep->flags & TPF_FIN_SENT)) panic("%s: excess tx.", __func__); if (plen <= max_imm) { /* Immediate data tx */ wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), toep->ofld_txq); if (wr == NULL) { /* XXX: how will we recover from this? */ toep->flags |= TPF_TX_SUSPENDED; return; } txwr = wrtod(wr); credits = howmany(wr->wr_len, 16); write_tx_wr(txwr, toep, plen, plen, credits, shove, 0, sc->tt.tx_align); m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); nsegs = 0; } else { int wr_len; /* DSGL tx */ wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq); if (wr == NULL) { /* XXX: how will we recover from this? */ toep->flags |= TPF_TX_SUSPENDED; return; } txwr = wrtod(wr); credits = howmany(wr_len, 16); write_tx_wr(txwr, toep, 0, plen, credits, shove, 0, sc->tt.tx_align); write_tx_sgl(txwr + 1, sndptr, m, nsegs, max_nsegs_1mbuf); if (wr_len & 0xf) { uint64_t *pad = (uint64_t *) ((uintptr_t)txwr + wr_len); *pad = 0; } } KASSERT(toep->tx_credits >= credits, ("%s: not enough credits", __func__)); toep->tx_credits -= credits; toep->tx_nocompl += credits; toep->plen_nocompl += plen; if (toep->tx_credits <= toep->tx_total * 3 / 8 && toep->tx_nocompl >= toep->tx_total / 4) compl = 1; if (compl || toep->ulp_mode == ULP_MODE_RDMA) { txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); toep->tx_nocompl = 0; toep->plen_nocompl = 0; } tp->snd_nxt += plen; tp->snd_max += plen; SOCKBUF_LOCK(sb); KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__)); sb->sb_sndptr = sb_sndptr; SOCKBUF_UNLOCK(sb); toep->flags |= TPF_TX_DATA_SENT; if (toep->tx_credits < MIN_OFLD_TX_CREDITS) toep->flags |= TPF_TX_SUSPENDED; KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); txsd->plen = plen; txsd->tx_credits = credits; txsd++; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { toep->txsd_pidx = 0; txsd = &toep->txsd[0]; } toep->txsd_avail--; t4_l2t_send(sc, wr, toep->l2te); } while (m != NULL); /* Send a FIN if requested, but only if there's no more data to send */ if (m == NULL && toep->flags & TPF_SEND_FIN) close_conn(sc, toep); } +void (*cxgbei_fw4_ack)(struct toepcb *, int); +struct mbuf *(*cxgbei_writeq_len)(struct toepcb *, int *); +struct mbuf *(*cxgbei_writeq_next)(struct toepcb *); + /* Send ULP data over TOE using TX_DATA_WR. We send whole mbuf at once */ void t4_ulp_push_frames(struct adapter *sc, struct toepcb *toep, int drop) { struct mbuf *sndptr, *m = NULL; struct fw_ofld_tx_data_wr *txwr; struct wrqe *wr; unsigned int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; struct inpcb *inp = toep->inp; struct tcpcb *tp; struct socket *so; struct sockbuf *sb; int tx_credits, ulp_len = 0, ulp_mode = 0, qlen = 0; int shove, compl; struct ofld_tx_sdesc *txsd; INP_WLOCK_ASSERT(inp); if (toep->flags & TPF_ABORT_SHUTDOWN) return; tp = intotcpcb(inp); so = inp->inp_socket; sb = &so->so_snd; txsd = &toep->txsd[toep->txsd_pidx]; KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); /* * This function doesn't resume by itself. Someone else must clear the * flag and call this function. */ if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) return; - sndptr = t4_queue_iscsi_callback(so, toep, 1, &qlen); + sndptr = cxgbei_writeq_len(toep, &qlen); if (!qlen) return; do { tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); max_imm = max_imm_payload(tx_credits); max_nsegs = max_dsgl_nsegs(tx_credits); if (drop) { - t4_cpl_iscsi_callback(toep->td, toep, &drop, - CPL_FW4_ACK); + cxgbei_fw4_ack(toep, drop); drop = 0; } plen = 0; nsegs = 0; max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ for (m = sndptr; m != NULL; m = m->m_next) { int n = sglist_count(mtod(m, void *), m->m_len); nsegs += n; plen += m->m_len; /* This mbuf sent us _over_ the nsegs limit, return */ if (plen > max_imm && nsegs > max_nsegs) { toep->flags |= TPF_TX_SUSPENDED; return; } if (max_nsegs_1mbuf < n) max_nsegs_1mbuf = n; /* This mbuf put us right at the max_nsegs limit */ if (plen > max_imm && nsegs == max_nsegs) { toep->flags |= TPF_TX_SUSPENDED; return; } } shove = m == NULL && !(tp->t_flags & TF_MORETOCOME); /* nothing to send */ if (plen == 0) { KASSERT(m == NULL, ("%s: nothing to send, but m != NULL", __func__)); break; } if (__predict_false(toep->flags & TPF_FIN_SENT)) panic("%s: excess tx.", __func__); ulp_len = plen + ulp_extra_len(sndptr, &ulp_mode); if (plen <= max_imm) { /* Immediate data tx */ wr = alloc_wrqe(roundup(sizeof(*txwr) + plen, 16), toep->ofld_txq); if (wr == NULL) { /* XXX: how will we recover from this? */ toep->flags |= TPF_TX_SUSPENDED; return; } txwr = wrtod(wr); credits = howmany(wr->wr_len, 16); write_tx_wr(txwr, toep, plen, ulp_len, credits, shove, ulp_mode, 0); m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); } else { int wr_len; /* DSGL tx */ wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; wr = alloc_wrqe(roundup(wr_len, 16), toep->ofld_txq); if (wr == NULL) { /* XXX: how will we recover from this? */ toep->flags |= TPF_TX_SUSPENDED; return; } txwr = wrtod(wr); credits = howmany(wr_len, 16); write_tx_wr(txwr, toep, 0, ulp_len, credits, shove, ulp_mode, 0); write_tx_sgl(txwr + 1, sndptr, m, nsegs, max_nsegs_1mbuf); if (wr_len & 0xf) { uint64_t *pad = (uint64_t *) ((uintptr_t)txwr + wr_len); *pad = 0; } } KASSERT(toep->tx_credits >= credits, ("%s: not enough credits", __func__)); toep->tx_credits -= credits; toep->tx_nocompl += credits; toep->plen_nocompl += plen; if (toep->tx_credits <= toep->tx_total * 3 / 8 && toep->tx_nocompl >= toep->tx_total / 4) compl = 1; if (compl) { txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); toep->tx_nocompl = 0; toep->plen_nocompl = 0; } tp->snd_nxt += ulp_len; tp->snd_max += ulp_len; /* goto next mbuf */ - sndptr = m = t4_queue_iscsi_callback(so, toep, 2, &qlen); + sndptr = m = cxgbei_writeq_next(toep); toep->flags |= TPF_TX_DATA_SENT; if (toep->tx_credits < MIN_OFLD_TX_CREDITS) { toep->flags |= TPF_TX_SUSPENDED; } KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); txsd->plen = plen; txsd->tx_credits = credits; txsd++; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { toep->txsd_pidx = 0; txsd = &toep->txsd[0]; } toep->txsd_avail--; t4_l2t_send(sc, wr, toep->l2te); } while (m != NULL); /* Send a FIN if requested, but only if there's no more data to send */ if (m == NULL && toep->flags & TPF_SEND_FIN) close_conn(sc, toep); } int t4_tod_output(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; #ifdef INVARIANTS struct inpcb *inp = tp->t_inpcb; #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("%s: inp %p dropped.", __func__, inp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); if (toep->ulp_mode == ULP_MODE_ISCSI) t4_ulp_push_frames(sc, toep, 0); else t4_push_frames(sc, toep, 0); return (0); } int t4_send_fin(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; #ifdef INVARIANTS struct inpcb *inp = tp->t_inpcb; #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("%s: inp %p dropped.", __func__, inp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); toep->flags |= TPF_SEND_FIN; if (tp->t_state >= TCPS_ESTABLISHED) { if (toep->ulp_mode == ULP_MODE_ISCSI) t4_ulp_push_frames(sc, toep, 0); else t4_push_frames(sc, toep, 0); } return (0); } int t4_send_rst(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; #if defined(INVARIANTS) struct inpcb *inp = tp->t_inpcb; #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("%s: inp %p dropped.", __func__, inp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); /* hmmmm */ KASSERT(toep->flags & TPF_FLOWC_WR_SENT, ("%s: flowc for tid %u [%s] not sent already", __func__, toep->tid, tcpstates[tp->t_state])); send_reset(sc, toep, 0); return (0); } /* * Peer has sent us a FIN. */ static int do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_peer_close *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp = NULL; struct socket *so; struct sockbuf *sb; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_PEER_CLOSE, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (__predict_false(toep->flags & TPF_SYNQE)) { #ifdef INVARIANTS struct synq_entry *synqe = (void *)toep; INP_WLOCK(synqe->lctx->inp); if (synqe->flags & TPF_SYNQE_HAS_L2TE) { KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, ("%s: listen socket closed but tid %u not aborted.", __func__, tid)); } else { /* * do_pass_accept_req is still running and will * eventually take care of this tid. */ } INP_WUNLOCK(synqe->lctx->inp); #endif CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, toep, toep->flags); return (0); } KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); INP_INFO_WLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = intotcpcb(inp); CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp); if (toep->flags & TPF_ABORT_SHUTDOWN) goto done; tp->rcv_nxt++; /* FIN */ so = inp->inp_socket; sb = &so->so_rcv; SOCKBUF_LOCK(sb); if (__predict_false(toep->ddp_flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) { m = get_ddp_mbuf(be32toh(cpl->rcv_nxt) - tp->rcv_nxt); tp->rcv_nxt = be32toh(cpl->rcv_nxt); toep->ddp_flags &= ~(DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE); KASSERT(toep->sb_cc >= sbused(sb), ("%s: sb %p has more data (%d) than last time (%d).", __func__, sb, sbused(sb), toep->sb_cc)); toep->rx_credits += toep->sb_cc - sbused(sb); #ifdef USE_DDP_RX_FLOW_CONTROL toep->rx_credits -= m->m_len; /* adjust for F_RX_FC_DDP */ #endif sbappendstream_locked(sb, m, 0); toep->sb_cc = sbused(sb); } socantrcvmore_locked(so); /* unlocks the sockbuf */ if (toep->ulp_mode != ULP_MODE_RDMA) { KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt), ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt, be32toh(cpl->rcv_nxt))); } switch (tp->t_state) { case TCPS_SYN_RECEIVED: tp->t_starttime = ticks; /* FALLTHROUGH */ case TCPS_ESTABLISHED: tp->t_state = TCPS_CLOSE_WAIT; break; case TCPS_FIN_WAIT_1: tp->t_state = TCPS_CLOSING; break; case TCPS_FIN_WAIT_2: tcp_twstart(tp); INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ INP_INFO_WUNLOCK(&V_tcbinfo); INP_WLOCK(inp); final_cpl_received(toep); return (0); default: log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n", __func__, tid, tp->t_state); } done: INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); return (0); } /* * Peer has ACK'd our FIN. */ static int do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp = NULL; struct socket *so = NULL; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_CLOSE_CON_RPL, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); INP_INFO_WLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = intotcpcb(inp); CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x", __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags); if (toep->flags & TPF_ABORT_SHUTDOWN) goto done; so = inp->inp_socket; tp->snd_una = be32toh(cpl->snd_nxt) - 1; /* exclude FIN */ switch (tp->t_state) { case TCPS_CLOSING: /* see TCPS_FIN_WAIT_2 in do_peer_close too */ tcp_twstart(tp); release: INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ INP_INFO_WUNLOCK(&V_tcbinfo); INP_WLOCK(inp); final_cpl_received(toep); /* no more CPLs expected */ return (0); case TCPS_LAST_ACK: if (tcp_close(tp)) INP_WUNLOCK(inp); goto release; case TCPS_FIN_WAIT_1: if (so->so_rcv.sb_state & SBS_CANTRCVMORE) soisdisconnected(so); tp->t_state = TCPS_FIN_WAIT_2; break; default: log(LOG_ERR, "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n", __func__, tid, tcpstates[tp->t_state]); } done: INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); return (0); } void send_abort_rpl(struct adapter *sc, struct sge_wrq *ofld_txq, int tid, int rst_status) { struct wrqe *wr; struct cpl_abort_rpl *cpl; wr = alloc_wrqe(sizeof(*cpl), ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } cpl = wrtod(wr); INIT_TP_WR_MIT_CPL(cpl, CPL_ABORT_RPL, tid); cpl->cmd = rst_status; t4_wrq_tx(sc, wr); } static int abort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason) { switch (abort_reason) { case CPL_ERR_BAD_SYN: case CPL_ERR_CONN_RESET: return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); case CPL_ERR_XMIT_TIMEDOUT: case CPL_ERR_PERSIST_TIMEDOUT: case CPL_ERR_FINWAIT2_TIMEDOUT: case CPL_ERR_KEEPALIVE_TIMEDOUT: return (ETIMEDOUT); default: return (EIO); } } -int -cpl_not_handled(struct sge_iq *, const struct rss_header *, struct mbuf *); /* - * tom_cpl_iscsi_callback - - * iscsi and tom would share the following cpl messages, so when any of these - * message is received, after tom is done with processing it, the messages - * needs to be forwarded to iscsi for further processing: - * - CPL_SET_TCB_RPL - * - CPL_RX_DATA_DDP - */ -void (*tom_cpl_iscsi_callback)(struct tom_data *, struct socket *, void *, - unsigned int); - -struct mbuf *(*tom_queue_iscsi_callback)(struct socket *, unsigned int, int *); -/* - * Check if the handler function is set for a given CPL - * return 0 if the function is NULL or cpl_not_handled, 1 otherwise. - */ -int -t4tom_cpl_handler_registered(struct adapter *sc, unsigned int opcode) -{ - - MPASS(opcode < nitems(sc->cpl_handler)); - - return (sc->cpl_handler[opcode] && - sc->cpl_handler[opcode] != cpl_not_handled); -} - -/* - * set the tom_cpl_iscsi_callback function, this function should be used - * whenever both toe and iscsi need to process the same cpl msg. - */ -void -t4tom_register_cpl_iscsi_callback(void (*fp)(struct tom_data *, struct socket *, - void *, unsigned int)) -{ - - tom_cpl_iscsi_callback = fp; -} - -void -t4tom_register_queue_iscsi_callback(struct mbuf *(*fp)(struct socket *, - unsigned int, int *qlen)) -{ - - tom_queue_iscsi_callback = fp; -} - -int -t4_cpl_iscsi_callback(struct tom_data *td, struct toepcb *toep, void *m, - unsigned int opcode) -{ - struct socket *so; - - if (opcode == CPL_FW4_ACK) - so = toep->inp->inp_socket; - else { - INP_WLOCK(toep->inp); - so = toep->inp->inp_socket; - INP_WUNLOCK(toep->inp); - } - - if (tom_cpl_iscsi_callback && so) { - if (toep->ulp_mode == ULP_MODE_ISCSI) { - tom_cpl_iscsi_callback(td, so, m, opcode); - return (0); - } - } - - return (1); -} - -struct mbuf * -t4_queue_iscsi_callback(struct socket *so, struct toepcb *toep, - unsigned int cmd, int *qlen) -{ - - if (tom_queue_iscsi_callback && so) { - if (toep->ulp_mode == ULP_MODE_ISCSI) - return (tom_queue_iscsi_callback(so, cmd, qlen)); - } - - return (NULL); -} - -/* * TCP RST from the peer, timeout, or some other such critical error. */ static int do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct sge_wrq *ofld_txq = toep->ofld_txq; struct inpcb *inp; struct tcpcb *tp; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_ABORT_REQ_RSS, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (toep->flags & TPF_SYNQE) return (do_abort_req_synqe(iq, rss, m)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); if (negative_advice(cpl->status)) { CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)", __func__, cpl->status, tid, toep->flags); return (0); /* Ignore negative advice */ } inp = toep->inp; INP_INFO_WLOCK(&V_tcbinfo); /* for tcp_close */ INP_WLOCK(inp); tp = intotcpcb(inp); CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d", __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp->inp_flags, cpl->status); /* * If we'd initiated an abort earlier the reply to it is responsible for * cleaning up resources. Otherwise we tear everything down right here * right now. We owe the T4 a CPL_ABORT_RPL no matter what. */ if (toep->flags & TPF_ABORT_SHUTDOWN) { INP_WUNLOCK(inp); goto done; } toep->flags |= TPF_ABORT_SHUTDOWN; if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { struct socket *so = inp->inp_socket; if (so != NULL) so_error_set(so, abort_status_to_errno(tp, cpl->status)); tp = tcp_close(tp); if (tp == NULL) INP_WLOCK(inp); /* re-acquire */ } final_cpl_received(toep); done: INP_INFO_WUNLOCK(&V_tcbinfo); send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); return (0); } /* * Reply to the CPL_ABORT_REQ (send_reset) */ static int do_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_ABORT_RPL_RSS, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (toep->flags & TPF_SYNQE) return (do_abort_rpl_synqe(iq, rss, m)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d", __func__, tid, toep, inp, cpl->status); KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, ("%s: wasn't expecting abort reply", __func__)); INP_WLOCK(inp); final_cpl_received(toep); return (0); } static int do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_rx_data *cpl = mtod(m, const void *); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp; struct socket *so; struct sockbuf *sb; int len; uint32_t ddp_placed = 0; if (__predict_false(toep->flags & TPF_SYNQE)) { #ifdef INVARIANTS struct synq_entry *synqe = (void *)toep; INP_WLOCK(synqe->lctx->inp); if (synqe->flags & TPF_SYNQE_HAS_L2TE) { KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, ("%s: listen socket closed but tid %u not aborted.", __func__, tid)); } else { /* * do_pass_accept_req is still running and will * eventually take care of this tid. */ } INP_WUNLOCK(synqe->lctx->inp); #endif CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, toep, toep->flags); m_freem(m); return (0); } KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); /* strip off CPL header */ m_adj(m, sizeof(*cpl)); len = m->m_pkthdr.len; INP_WLOCK(inp); if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", __func__, tid, len, inp->inp_flags); INP_WUNLOCK(inp); m_freem(m); return (0); } tp = intotcpcb(inp); if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq))) ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt; tp->rcv_nxt += len; KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__)); tp->rcv_wnd -= len; tp->t_rcvtime = ticks; so = inp_inpcbtosocket(inp); sb = &so->so_rcv; SOCKBUF_LOCK(sb); if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) { CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)", __func__, tid, len); m_freem(m); SOCKBUF_UNLOCK(sb); INP_WUNLOCK(inp); INP_INFO_WLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = tcp_drop(tp, ECONNRESET); if (tp) INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); return (0); } /* receive buffer autosize */ if (sb->sb_flags & SB_AUTOSIZE && V_tcp_do_autorcvbuf && sb->sb_hiwat < V_tcp_autorcvbuf_max && len > (sbspace(sb) / 8 * 7)) { unsigned int hiwat = sb->sb_hiwat; unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max); if (!sbreserve_locked(sb, newsize, so, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; else toep->rx_credits += newsize - hiwat; } if (toep->ulp_mode == ULP_MODE_TCPDDP) { int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off; if (changed) { if (toep->ddp_flags & DDP_SC_REQ) toep->ddp_flags ^= DDP_ON | DDP_SC_REQ; else { KASSERT(cpl->ddp_off == 1, ("%s: DDP switched on by itself.", __func__)); /* Fell out of DDP mode */ toep->ddp_flags &= ~(DDP_ON | DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE); if (ddp_placed) insert_ddp_data(toep, ddp_placed); } } if ((toep->ddp_flags & DDP_OK) == 0 && time_uptime >= toep->ddp_disabled + DDP_RETRY_WAIT) { toep->ddp_score = DDP_LOW_SCORE; toep->ddp_flags |= DDP_OK; CTR3(KTR_CXGBE, "%s: tid %u DDP_OK @ %u", __func__, tid, time_uptime); } if (toep->ddp_flags & DDP_ON) { /* * CPL_RX_DATA with DDP on can only be an indicate. Ask * soreceive to post a buffer or disable DDP. The * payload that arrived in this indicate is appended to * the socket buffer as usual. */ #if 0 CTR5(KTR_CXGBE, "%s: tid %u (0x%x) DDP indicate (seq 0x%x, len %d)", __func__, tid, toep->flags, be32toh(cpl->seq), len); #endif sb->sb_flags |= SB_DDP_INDICATE; } else if ((toep->ddp_flags & (DDP_OK|DDP_SC_REQ)) == DDP_OK && tp->rcv_wnd > DDP_RSVD_WIN && len >= sc->tt.ddp_thres) { /* * DDP allowed but isn't on (and a request to switch it * on isn't pending either), and conditions are ripe for * it to work. Switch it on. */ enable_ddp(sc, toep); } } KASSERT(toep->sb_cc >= sbused(sb), ("%s: sb %p has more data (%d) than last time (%d).", __func__, sb, sbused(sb), toep->sb_cc)); toep->rx_credits += toep->sb_cc - sbused(sb); sbappendstream_locked(sb, m, 0); toep->sb_cc = sbused(sb); sorwakeup_locked(so); SOCKBUF_UNLOCK_ASSERT(sb); INP_WUNLOCK(inp); return (0); } #define S_CPL_FW4_ACK_OPCODE 24 #define M_CPL_FW4_ACK_OPCODE 0xff #define V_CPL_FW4_ACK_OPCODE(x) ((x) << S_CPL_FW4_ACK_OPCODE) #define G_CPL_FW4_ACK_OPCODE(x) \ (((x) >> S_CPL_FW4_ACK_OPCODE) & M_CPL_FW4_ACK_OPCODE) #define S_CPL_FW4_ACK_FLOWID 0 #define M_CPL_FW4_ACK_FLOWID 0xffffff #define V_CPL_FW4_ACK_FLOWID(x) ((x) << S_CPL_FW4_ACK_FLOWID) #define G_CPL_FW4_ACK_FLOWID(x) \ (((x) >> S_CPL_FW4_ACK_FLOWID) & M_CPL_FW4_ACK_FLOWID) #define S_CPL_FW4_ACK_CR 24 #define M_CPL_FW4_ACK_CR 0xff #define V_CPL_FW4_ACK_CR(x) ((x) << S_CPL_FW4_ACK_CR) #define G_CPL_FW4_ACK_CR(x) (((x) >> S_CPL_FW4_ACK_CR) & M_CPL_FW4_ACK_CR) #define S_CPL_FW4_ACK_SEQVAL 0 #define M_CPL_FW4_ACK_SEQVAL 0x1 #define V_CPL_FW4_ACK_SEQVAL(x) ((x) << S_CPL_FW4_ACK_SEQVAL) #define G_CPL_FW4_ACK_SEQVAL(x) \ (((x) >> S_CPL_FW4_ACK_SEQVAL) & M_CPL_FW4_ACK_SEQVAL) #define F_CPL_FW4_ACK_SEQVAL V_CPL_FW4_ACK_SEQVAL(1U) static int do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp; struct tcpcb *tp; struct socket *so; uint8_t credits = cpl->credits; struct ofld_tx_sdesc *txsd; int plen; #ifdef INVARIANTS unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl))); #endif /* * Very unusual case: we'd sent a flowc + abort_req for a synq entry and * now this comes back carrying the credits for the flowc. */ if (__predict_false(toep->flags & TPF_SYNQE)) { KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, ("%s: credits for a synq entry %p", __func__, toep)); return (0); } inp = toep->inp; KASSERT(opcode == CPL_FW4_ACK, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); INP_WLOCK(inp); if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) { INP_WUNLOCK(inp); return (0); } KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0, ("%s: inp_flags 0x%x", __func__, inp->inp_flags)); tp = intotcpcb(inp); if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) { tcp_seq snd_una = be32toh(cpl->snd_una); #ifdef INVARIANTS if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { log(LOG_ERR, "%s: unexpected seq# %x for TID %u, snd_una %x\n", __func__, snd_una, toep->tid, tp->snd_una); } #endif if (tp->snd_una != snd_una) { tp->snd_una = snd_una; tp->ts_recent_age = tcp_ts_getticks(); } } so = inp->inp_socket; txsd = &toep->txsd[toep->txsd_cidx]; plen = 0; while (credits) { KASSERT(credits >= txsd->tx_credits, ("%s: too many (or partial) credits", __func__)); credits -= txsd->tx_credits; toep->tx_credits += txsd->tx_credits; plen += txsd->plen; txsd++; toep->txsd_avail++; KASSERT(toep->txsd_avail <= toep->txsd_total, ("%s: txsd avail > total", __func__)); if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) { txsd = &toep->txsd[0]; toep->txsd_cidx = 0; } } if (toep->tx_credits == toep->tx_total) { toep->tx_nocompl = 0; toep->plen_nocompl = 0; } if (toep->flags & TPF_TX_SUSPENDED && toep->tx_credits >= toep->tx_total / 4) { toep->flags &= ~TPF_TX_SUSPENDED; if (toep->ulp_mode == ULP_MODE_ISCSI) t4_ulp_push_frames(sc, toep, plen); else t4_push_frames(sc, toep, plen); } else if (plen > 0) { struct sockbuf *sb = &so->so_snd; + int sbu; - if (toep->ulp_mode == ULP_MODE_ISCSI) - t4_cpl_iscsi_callback(toep->td, toep, &plen, - CPL_FW4_ACK); - else { - SOCKBUF_LOCK(sb); + SOCKBUF_LOCK(sb); + sbu = sbused(sb); + if (toep->ulp_mode == ULP_MODE_ISCSI) { + + if (__predict_false(sbu > 0)) { + /* + * The data trasmitted before the tid's ULP mode + * changed to ISCSI is still in so_snd. + * Incoming credits should account for so_snd + * first. + */ + sbdrop_locked(sb, min(sbu, plen)); + plen -= min(sbu, plen); + } + /* XXXNP: sowwakeup_locked causes a LOR. */ + SOCKBUF_UNLOCK(sb); + + if (__predict_true(plen > 0)) + cxgbei_fw4_ack(toep, plen); + } else { sbdrop_locked(sb, plen); - sowwakeup_locked(so); - SOCKBUF_UNLOCK_ASSERT(sb); + sowwakeup_locked(so); /* unlocks so_snd */ } + SOCKBUF_UNLOCK_ASSERT(sb); } INP_WUNLOCK(inp); return (0); } static int do_set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_SET_TCB_RPL, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (is_ftid(sc, tid)) return (t4_filter_rpl(iq, rss, m)); /* TCB is a filter */ - else { - struct toepcb *toep = lookup_tid(sc, tid); - t4_cpl_iscsi_callback(toep->td, toep, m, CPL_SET_TCB_RPL); - return (0); - } + /* + * TOM and/or other ULPs don't request replies for CPL_SET_TCB or + * CPL_SET_TCB_FIELD requests. This can easily change and when it does + * the dispatch code will go here. + */ +#ifdef INVARIANTS + panic("%s: Unexpected CPL_SET_TCB_RPL for tid %u on iq %p", __func__, + tid, iq); +#else + log(LOG_ERR, "%s: Unexpected CPL_SET_TCB_RPL for tid %u on iq %p\n", + __func__, tid, iq); +#endif - CXGBE_UNIMPLEMENTED(__func__); + return (0); } void t4_set_tcb_field(struct adapter *sc, struct toepcb *toep, int ctrl, uint16_t word, uint64_t mask, uint64_t val) { struct wrqe *wr; struct cpl_set_tcb_field *req; wr = alloc_wrqe(sizeof(*req), ctrl ? toep->ctrlq : toep->ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid); req->reply_ctrl = htobe16(V_NO_REPLY(1) | V_QUEUENO(toep->ofld_rxq->iq.abs_id)); req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0)); req->mask = htobe64(mask); req->val = htobe64(val); t4_wrq_tx(sc, wr); } void t4_init_cpl_io_handlers(struct adapter *sc) { t4_register_cpl_handler(sc, CPL_PEER_CLOSE, do_peer_close); t4_register_cpl_handler(sc, CPL_CLOSE_CON_RPL, do_close_con_rpl); t4_register_cpl_handler(sc, CPL_ABORT_REQ_RSS, do_abort_req); t4_register_cpl_handler(sc, CPL_ABORT_RPL_RSS, do_abort_rpl); t4_register_cpl_handler(sc, CPL_RX_DATA, do_rx_data); t4_register_cpl_handler(sc, CPL_FW4_ACK, do_fw4_ack); t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, do_set_tcb_rpl); } void t4_uninit_cpl_io_handlers(struct adapter *sc) { t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl); } #endif Index: projects/cxl_iscsi/sys/dev/cxgbe/tom/t4_ddp.c =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/tom/t4_ddp.c (revision 279896) +++ projects/cxl_iscsi/sys/dev/cxgbe/tom/t4_ddp.c (revision 279897) @@ -1,1238 +1,1239 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include #include #include #include #include #include #ifdef TCP_OFFLOAD #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_tcb.h" #include "tom/t4_tom.h" #define PPOD_SZ(n) ((n) * sizeof(struct pagepod)) #define PPOD_SIZE (PPOD_SZ(1)) /* XXX: must match A_ULP_RX_TDDP_PSZ */ static int t4_ddp_pgsz[] = {4096, 4096 << 2, 4096 << 4, 4096 << 6}; #if 0 static void t4_dump_tcb(struct adapter *sc, int tid) { uint32_t tcb_base, off, i, j; /* Dump TCB for the tid */ tcb_base = t4_read_reg(sc, A_TP_CMM_TCB_BASE); t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2), tcb_base + tid * TCB_SIZE); t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2)); off = 0; printf("\n"); for (i = 0; i < 4; i++) { uint32_t buf[8]; for (j = 0; j < 8; j++, off += 4) buf[j] = htonl(t4_read_reg(sc, MEMWIN2_BASE + off)); printf("%08x %08x %08x %08x %08x %08x %08x %08x\n", buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); } } #endif #define MAX_DDP_BUFFER_SIZE (M_TCB_RX_DDP_BUF0_LEN) static int alloc_ppods(struct tom_data *td, int n, u_int *ppod_addr) { vmem_addr_t v; int rc; MPASS(n > 0); rc = vmem_alloc(td->ppod_arena, PPOD_SZ(n), M_NOWAIT | M_FIRSTFIT, &v); *ppod_addr = (u_int)v; return (rc); } static void free_ppods(struct tom_data *td, u_int ppod_addr, int n) { MPASS(n > 0); vmem_free(td->ppod_arena, (vmem_addr_t)ppod_addr, PPOD_SZ(n)); } static inline int pages_to_nppods(int npages, int ddp_pgsz) { int nsegs = npages * PAGE_SIZE / ddp_pgsz; return (howmany(nsegs, PPOD_PAGES)); } static void free_ddp_buffer(struct tom_data *td, struct ddp_buffer *db) { if (db == NULL) return; if (db->pages) free(db->pages, M_CXGBE); if (db->nppods > 0) free_ppods(td, db->ppod_addr, db->nppods); free(db, M_CXGBE); } void release_ddp_resources(struct toepcb *toep) { int i; for (i = 0; i < nitems(toep->db); i++) { if (toep->db[i] != NULL) { free_ddp_buffer(toep->td, toep->db[i]); toep->db[i] = NULL; } } } /* XXX: handle_ddp_data code duplication */ void insert_ddp_data(struct toepcb *toep, uint32_t n) { struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); struct sockbuf *sb = &inp->inp_socket->so_rcv; struct mbuf *m; INP_WLOCK_ASSERT(inp); SOCKBUF_LOCK_ASSERT(sb); m = get_ddp_mbuf(n); tp->rcv_nxt += n; #ifndef USE_DDP_RX_FLOW_CONTROL KASSERT(tp->rcv_wnd >= n, ("%s: negative window size", __func__)); tp->rcv_wnd -= n; #endif KASSERT(toep->sb_cc >= sbused(sb), ("%s: sb %p has more data (%d) than last time (%d).", __func__, sb, sbused(sb), toep->sb_cc)); toep->rx_credits += toep->sb_cc - sbused(sb); #ifdef USE_DDP_RX_FLOW_CONTROL toep->rx_credits -= n; /* adjust for F_RX_FC_DDP */ #endif sbappendstream_locked(sb, m, 0); toep->sb_cc = sbused(sb); } /* SET_TCB_FIELD sent as a ULP command looks like this */ #define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \ sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core)) /* RX_DATA_ACK sent as a ULP command looks like this */ #define LEN__RX_DATA_ACK_ULP (sizeof(struct ulp_txpkt) + \ sizeof(struct ulptx_idata) + sizeof(struct cpl_rx_data_ack_core)) static inline void * mk_set_tcb_field_ulp(struct ulp_txpkt *ulpmc, struct toepcb *toep, uint64_t word, uint64_t mask, uint64_t val) { struct ulptx_idata *ulpsc; struct cpl_set_tcb_field_core *req; ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); ulpmc->len = htobe32(howmany(LEN__SET_TCB_FIELD_ULP, 16)); ulpsc = (struct ulptx_idata *)(ulpmc + 1); ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); ulpsc->len = htobe32(sizeof(*req)); req = (struct cpl_set_tcb_field_core *)(ulpsc + 1); OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tid)); req->reply_ctrl = htobe16(V_NO_REPLY(1) | V_QUEUENO(toep->ofld_rxq->iq.abs_id)); req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0)); req->mask = htobe64(mask); req->val = htobe64(val); ulpsc = (struct ulptx_idata *)(req + 1); if (LEN__SET_TCB_FIELD_ULP % 16) { ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); ulpsc->len = htobe32(0); return (ulpsc + 1); } return (ulpsc); } static inline void * mk_rx_data_ack_ulp(struct ulp_txpkt *ulpmc, struct toepcb *toep) { struct ulptx_idata *ulpsc; struct cpl_rx_data_ack_core *req; ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); ulpmc->len = htobe32(howmany(LEN__RX_DATA_ACK_ULP, 16)); ulpsc = (struct ulptx_idata *)(ulpmc + 1); ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); ulpsc->len = htobe32(sizeof(*req)); req = (struct cpl_rx_data_ack_core *)(ulpsc + 1); OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tid)); req->credit_dack = htobe32(F_RX_MODULATE_RX); ulpsc = (struct ulptx_idata *)(req + 1); if (LEN__RX_DATA_ACK_ULP % 16) { ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); ulpsc->len = htobe32(0); return (ulpsc + 1); } return (ulpsc); } static inline uint64_t select_ddp_flags(struct socket *so, int flags, int db_idx) { uint64_t ddp_flags = V_TF_DDP_INDICATE_OUT(0); int waitall = flags & MSG_WAITALL; int nb = so->so_state & SS_NBIO || flags & (MSG_DONTWAIT | MSG_NBIO); KASSERT(db_idx == 0 || db_idx == 1, ("%s: bad DDP buffer index %d", __func__, db_idx)); if (db_idx == 0) { ddp_flags |= V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0); if (waitall) ddp_flags |= V_TF_DDP_PUSH_DISABLE_0(1); else if (nb) ddp_flags |= V_TF_DDP_BUF0_FLUSH(1); else ddp_flags |= V_TF_DDP_BUF0_FLUSH(0); } else { ddp_flags |= V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1); if (waitall) ddp_flags |= V_TF_DDP_PUSH_DISABLE_1(1); else if (nb) ddp_flags |= V_TF_DDP_BUF1_FLUSH(1); else ddp_flags |= V_TF_DDP_BUF1_FLUSH(0); } return (ddp_flags); } static struct wrqe * mk_update_tcb_for_ddp(struct adapter *sc, struct toepcb *toep, int db_idx, int offset, uint64_t ddp_flags) { struct ddp_buffer *db = toep->db[db_idx]; struct wrqe *wr; struct work_request_hdr *wrh; struct ulp_txpkt *ulpmc; int len; KASSERT(db_idx == 0 || db_idx == 1, ("%s: bad DDP buffer index %d", __func__, db_idx)); /* * We'll send a compound work request that has 3 SET_TCB_FIELDs and an * RX_DATA_ACK (with RX_MODULATE to speed up delivery). * * The work request header is 16B and always ends at a 16B boundary. * The ULPTX master commands that follow must all end at 16B boundaries * too so we round up the size to 16. */ len = sizeof(*wrh) + 3 * roundup2(LEN__SET_TCB_FIELD_ULP, 16) + roundup2(LEN__RX_DATA_ACK_ULP, 16); wr = alloc_wrqe(len, toep->ctrlq); if (wr == NULL) return (NULL); wrh = wrtod(wr); INIT_ULPTX_WRH(wrh, len, 1, 0); /* atomic */ ulpmc = (struct ulp_txpkt *)(wrh + 1); /* Write the buffer's tag */ ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_RX_DDP_BUF0_TAG + db_idx, V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG), V_TCB_RX_DDP_BUF0_TAG(db->tag)); /* Update the current offset in the DDP buffer and its total length */ if (db_idx == 0) ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_RX_DDP_BUF0_OFFSET, V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), V_TCB_RX_DDP_BUF0_OFFSET(offset) | V_TCB_RX_DDP_BUF0_LEN(db->len)); else ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_RX_DDP_BUF1_OFFSET, V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | V_TCB_RX_DDP_BUF1_LEN((u64)M_TCB_RX_DDP_BUF1_LEN << 32), V_TCB_RX_DDP_BUF1_OFFSET(offset) | V_TCB_RX_DDP_BUF1_LEN((u64)db->len << 32)); /* Update DDP flags */ ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_RX_DDP_FLAGS, V_TF_DDP_BUF0_FLUSH(1) | V_TF_DDP_BUF1_FLUSH(1) | V_TF_DDP_PUSH_DISABLE_0(1) | V_TF_DDP_PUSH_DISABLE_1(1) | V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) | V_TF_DDP_INDICATE_OUT(1), ddp_flags); /* Gratuitous RX_DATA_ACK with RX_MODULATE set to speed up delivery. */ ulpmc = mk_rx_data_ack_ulp(ulpmc, toep); return (wr); } static void discourage_ddp(struct toepcb *toep) { if (toep->ddp_score && --toep->ddp_score == 0) { toep->ddp_flags &= ~DDP_OK; toep->ddp_disabled = time_uptime; CTR3(KTR_CXGBE, "%s: tid %u !DDP_OK @ %u", __func__, toep->tid, time_uptime); } } static int handle_ddp_data(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, int len) { uint32_t report = be32toh(ddp_report); unsigned int db_flag; struct inpcb *inp = toep->inp; struct tcpcb *tp; struct socket *so; struct sockbuf *sb; struct mbuf *m; db_flag = report & F_DDP_BUF_IDX ? DDP_BUF1_ACTIVE : DDP_BUF0_ACTIVE; if (__predict_false(!(report & F_DDP_INV))) CXGBE_UNIMPLEMENTED("DDP buffer still valid"); INP_WLOCK(inp); so = inp_inpcbtosocket(inp); sb = &so->so_rcv; if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) { /* * XXX: think a bit more. * tcpcb probably gone, but socket should still be around * because we always wait for DDP completion in soreceive no * matter what. Just wake it up and let it clean up. */ CTR5(KTR_CXGBE, "%s: tid %u, seq 0x%x, len %d, inp_flags 0x%x", __func__, toep->tid, be32toh(rcv_nxt), len, inp->inp_flags); SOCKBUF_LOCK(sb); goto wakeup; } tp = intotcpcb(inp); len += be32toh(rcv_nxt) - tp->rcv_nxt; tp->rcv_nxt += len; tp->t_rcvtime = ticks; #ifndef USE_DDP_RX_FLOW_CONTROL KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__)); tp->rcv_wnd -= len; #endif m = get_ddp_mbuf(len); SOCKBUF_LOCK(sb); if (report & F_DDP_BUF_COMPLETE) toep->ddp_score = DDP_HIGH_SCORE; else discourage_ddp(toep); KASSERT(toep->sb_cc >= sbused(sb), ("%s: sb %p has more data (%d) than last time (%d).", __func__, sb, sbused(sb), toep->sb_cc)); toep->rx_credits += toep->sb_cc - sbused(sb); #ifdef USE_DDP_RX_FLOW_CONTROL toep->rx_credits -= len; /* adjust for F_RX_FC_DDP */ #endif sbappendstream_locked(sb, m, 0); toep->sb_cc = sbused(sb); wakeup: KASSERT(toep->ddp_flags & db_flag, ("%s: DDP buffer not active. toep %p, ddp_flags 0x%x, report 0x%x", __func__, toep, toep->ddp_flags, report)); toep->ddp_flags &= ~db_flag; sorwakeup_locked(so); SOCKBUF_UNLOCK_ASSERT(sb); INP_WUNLOCK(inp); return (0); } #define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\ F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\ F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\ F_DDP_INVALID_PPOD | F_DDP_HDRCRC_ERR | F_DDP_DATACRC_ERR) +void (*cxgbei_rx_data_ddp)(struct toepcb *, const struct cpl_rx_data_ddp *); + static int do_rx_data_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); uint32_t vld; struct toepcb *toep = lookup_tid(sc, tid); - struct tom_data *td = toep->td; KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__)); KASSERT(!(toep->flags & TPF_SYNQE), ("%s: toep %p claims to be a synq entry", __func__, toep)); vld = be32toh(cpl->ddpvld); if (__predict_false(vld & DDP_ERR)) { panic("%s: DDP error 0x%x (tid %d, toep %p)", __func__, vld, tid, toep); } + if (toep->ulp_mode == ULP_MODE_ISCSI) { - if (!t4_cpl_iscsi_callback(td, toep, (void *)cpl, - CPL_RX_DATA_DDP)) - return (0); + cxgbei_rx_data_ddp(toep, cpl); + return (0); } handle_ddp_data(toep, cpl->u.ddp_report, cpl->seq, be16toh(cpl->len)); return (0); } static int do_rx_ddp_complete(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_rx_ddp_complete *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__)); KASSERT(!(toep->flags & TPF_SYNQE), ("%s: toep %p claims to be a synq entry", __func__, toep)); handle_ddp_data(toep, cpl->ddp_report, cpl->rcv_nxt, 0); return (0); } void enable_ddp(struct adapter *sc, struct toepcb *toep) { KASSERT((toep->ddp_flags & (DDP_ON | DDP_OK | DDP_SC_REQ)) == DDP_OK, ("%s: toep %p has bad ddp_flags 0x%x", __func__, toep, toep->ddp_flags)); CTR3(KTR_CXGBE, "%s: tid %u (time %u)", __func__, toep->tid, time_uptime); toep->ddp_flags |= DDP_SC_REQ; t4_set_tcb_field(sc, toep, 1, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1) | V_TF_DDP_INDICATE_OUT(1) | V_TF_DDP_BUF0_INDICATE(1) | V_TF_DDP_BUF1_INDICATE(1) | V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_BUF1_VALID(1), V_TF_DDP_BUF0_INDICATE(1) | V_TF_DDP_BUF1_INDICATE(1)); t4_set_tcb_field(sc, toep, 1, W_TCB_T_FLAGS, V_TF_RCV_COALESCE_ENABLE(1), 0); } static inline void disable_ddp(struct adapter *sc, struct toepcb *toep) { KASSERT((toep->ddp_flags & (DDP_ON | DDP_SC_REQ)) == DDP_ON, ("%s: toep %p has bad ddp_flags 0x%x", __func__, toep, toep->ddp_flags)); CTR3(KTR_CXGBE, "%s: tid %u (time %u)", __func__, toep->tid, time_uptime); toep->ddp_flags |= DDP_SC_REQ; t4_set_tcb_field(sc, toep, 1, W_TCB_T_FLAGS, V_TF_RCV_COALESCE_ENABLE(1), V_TF_RCV_COALESCE_ENABLE(1)); t4_set_tcb_field(sc, toep, 1, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1), V_TF_DDP_OFF(1)); } static int hold_uio(struct uio *uio, vm_page_t **ppages, int *pnpages) { struct vm_map *map; struct iovec *iov; vm_offset_t start, end; vm_page_t *pp; int n; KASSERT(uio->uio_iovcnt == 1, ("%s: uio_iovcnt %d", __func__, uio->uio_iovcnt)); KASSERT(uio->uio_td->td_proc == curproc, ("%s: uio proc (%p) is not curproc (%p)", __func__, uio->uio_td->td_proc, curproc)); map = &curproc->p_vmspace->vm_map; iov = &uio->uio_iov[0]; start = trunc_page((uintptr_t)iov->iov_base); end = round_page((vm_offset_t)iov->iov_base + iov->iov_len); n = howmany(end - start, PAGE_SIZE); if (end - start > MAX_DDP_BUFFER_SIZE) return (E2BIG); pp = malloc(n * sizeof(vm_page_t), M_CXGBE, M_NOWAIT); if (pp == NULL) return (ENOMEM); if (vm_fault_quick_hold_pages(map, (vm_offset_t)iov->iov_base, iov->iov_len, VM_PROT_WRITE, pp, n) < 0) { free(pp, M_CXGBE); return (EFAULT); } *ppages = pp; *pnpages = n; return (0); } static int bufcmp(struct ddp_buffer *db, vm_page_t *pages, int npages, int offset, int len) { int i; if (db == NULL || db->npages != npages || db->offset != offset || db->len != len) return (1); for (i = 0; i < npages; i++) { if (pages[i]->phys_addr != db->pages[i]->phys_addr) return (1); } return (0); } static int calculate_hcf(int n1, int n2) { int a, b, t; if (n1 <= n2) { a = n1; b = n2; } else { a = n2; b = n1; } while (a != 0) { t = a; a = b % a; b = t; } return (b); } static struct ddp_buffer * alloc_ddp_buffer(struct tom_data *td, vm_page_t *pages, int npages, int offset, int len) { int i, hcf, seglen, idx, ppod, nppods; struct ddp_buffer *db; /* * The DDP page size is unrelated to the VM page size. We combine * contiguous physical pages into larger segments to get the best DDP * page size possible. This is the largest of the four sizes in * A_ULP_RX_TDDP_PSZ that evenly divides the HCF of the segment sizes in * the page list. */ hcf = 0; for (i = 0; i < npages; i++) { seglen = PAGE_SIZE; while (i < npages - 1 && pages[i]->phys_addr + PAGE_SIZE == pages[i + 1]->phys_addr) { seglen += PAGE_SIZE; i++; } hcf = calculate_hcf(hcf, seglen); if (hcf < t4_ddp_pgsz[1]) { idx = 0; goto have_pgsz; /* give up, short circuit */ } } if (hcf % t4_ddp_pgsz[0] != 0) { /* hmmm. This could only happen when PAGE_SIZE < 4K */ KASSERT(PAGE_SIZE < 4096, ("%s: PAGE_SIZE %d, hcf %d", __func__, PAGE_SIZE, hcf)); CTR3(KTR_CXGBE, "%s: PAGE_SIZE %d, hcf %d", __func__, PAGE_SIZE, hcf); return (NULL); } for (idx = nitems(t4_ddp_pgsz) - 1; idx > 0; idx--) { if (hcf % t4_ddp_pgsz[idx] == 0) break; } have_pgsz: MPASS(idx <= M_PPOD_PGSZ); db = malloc(sizeof(*db), M_CXGBE, M_NOWAIT); if (db == NULL) { CTR1(KTR_CXGBE, "%s: malloc failed.", __func__); return (NULL); } nppods = pages_to_nppods(npages, t4_ddp_pgsz[idx]); if (alloc_ppods(td, nppods, &db->ppod_addr) != 0) { free(db, M_CXGBE); CTR4(KTR_CXGBE, "%s: no pods, nppods %d, resid %d, pgsz %d", __func__, nppods, len, t4_ddp_pgsz[idx]); return (NULL); } ppod = (db->ppod_addr - td->ppod_start) / PPOD_SIZE; db->tag = V_PPOD_PGSZ(idx) | V_PPOD_TAG(ppod); db->nppods = nppods; db->npages = npages; db->pages = pages; db->offset = offset; db->len = len; CTR6(KTR_CXGBE, "New DDP buffer. " "ddp_pgsz %d, ppod 0x%x, npages %d, nppods %d, offset %d, len %d", t4_ddp_pgsz[idx], ppod, db->npages, db->nppods, db->offset, db->len); return (db); } #define NUM_ULP_TX_SC_IMM_PPODS (256 / PPOD_SIZE) static int write_page_pods(struct adapter *sc, struct toepcb *toep, struct ddp_buffer *db) { struct wrqe *wr; struct ulp_mem_io *ulpmc; struct ulptx_idata *ulpsc; struct pagepod *ppod; int i, j, k, n, chunk, len, ddp_pgsz, idx; u_int ppod_addr; uint32_t cmd; cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE)); if (is_t4(sc)) cmd |= htobe32(F_ULP_MEMIO_ORDER); else cmd |= htobe32(F_T5_ULP_MEMIO_IMM); ddp_pgsz = t4_ddp_pgsz[G_PPOD_PGSZ(db->tag)]; ppod_addr = db->ppod_addr; for (i = 0; i < db->nppods; ppod_addr += chunk) { /* How many page pods are we writing in this cycle */ n = min(db->nppods - i, NUM_ULP_TX_SC_IMM_PPODS); chunk = PPOD_SZ(n); len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16); wr = alloc_wrqe(len, toep->ctrlq); if (wr == NULL) return (ENOMEM); /* ok to just bail out */ ulpmc = wrtod(wr); INIT_ULPTX_WR(ulpmc, len, 0, 0); ulpmc->cmd = cmd; ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32)); ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16)); ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5)); ulpsc = (struct ulptx_idata *)(ulpmc + 1); ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); ulpsc->len = htobe32(chunk); ppod = (struct pagepod *)(ulpsc + 1); for (j = 0; j < n; i++, j++, ppod++) { ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID | V_PPOD_TID(toep->tid) | db->tag); ppod->len_offset = htobe64(V_PPOD_LEN(db->len) | V_PPOD_OFST(db->offset)); ppod->rsvd = 0; idx = i * PPOD_PAGES * (ddp_pgsz / PAGE_SIZE); for (k = 0; k < nitems(ppod->addr); k++) { if (idx < db->npages) { ppod->addr[k] = htobe64(db->pages[idx]->phys_addr); idx += ddp_pgsz / PAGE_SIZE; } else ppod->addr[k] = 0; #if 0 CTR5(KTR_CXGBE, "%s: tid %d ppod[%d]->addr[%d] = %p", __func__, toep->tid, i, k, htobe64(ppod->addr[k])); #endif } } t4_wrq_tx(sc, wr); } return (0); } /* * Reuse, or allocate (and program the page pods for) a new DDP buffer. The * "pages" array is handed over to this function and should not be used in any * way by the caller after that. */ static int select_ddp_buffer(struct adapter *sc, struct toepcb *toep, vm_page_t *pages, int npages, int db_off, int db_len) { struct ddp_buffer *db; struct tom_data *td = sc->tom_softc; int i, empty_slot = -1; /* Try to reuse */ for (i = 0; i < nitems(toep->db); i++) { if (bufcmp(toep->db[i], pages, npages, db_off, db_len) == 0) { free(pages, M_CXGBE); return (i); /* pages still held */ } else if (toep->db[i] == NULL && empty_slot < 0) empty_slot = i; } /* Allocate new buffer, write its page pods. */ db = alloc_ddp_buffer(td, pages, npages, db_off, db_len); if (db == NULL) { vm_page_unhold_pages(pages, npages); free(pages, M_CXGBE); return (-1); } if (write_page_pods(sc, toep, db) != 0) { vm_page_unhold_pages(pages, npages); free_ddp_buffer(td, db); return (-1); } i = empty_slot; if (i < 0) { i = arc4random() % nitems(toep->db); free_ddp_buffer(td, toep->db[i]); } toep->db[i] = db; CTR5(KTR_CXGBE, "%s: tid %d, DDP buffer[%d] = %p (tag 0x%x)", __func__, toep->tid, i, db, db->tag); return (i); } static void wire_ddp_buffer(struct ddp_buffer *db) { int i; vm_page_t p; for (i = 0; i < db->npages; i++) { p = db->pages[i]; vm_page_lock(p); vm_page_wire(p); vm_page_unhold(p); vm_page_unlock(p); } } static void unwire_ddp_buffer(struct ddp_buffer *db) { int i; vm_page_t p; for (i = 0; i < db->npages; i++) { p = db->pages[i]; vm_page_lock(p); vm_page_unwire(p, PQ_INACTIVE); vm_page_unlock(p); } } static int handle_ddp(struct socket *so, struct uio *uio, int flags, int error) { struct sockbuf *sb = &so->so_rcv; struct tcpcb *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; struct adapter *sc = td_adapter(toep->td); vm_page_t *pages; int npages, db_idx, rc, buf_flag; struct ddp_buffer *db; struct wrqe *wr; uint64_t ddp_flags; SOCKBUF_LOCK_ASSERT(sb); #if 0 if (sbused(sb) + sc->tt.ddp_thres > uio->uio_resid) { CTR4(KTR_CXGBE, "%s: sb_cc %d, threshold %d, resid %d", __func__, sbused(sb), sc->tt.ddp_thres, uio->uio_resid); } #endif /* XXX: too eager to disable DDP, could handle NBIO better than this. */ if (sbused(sb) >= uio->uio_resid || uio->uio_resid < sc->tt.ddp_thres || uio->uio_resid > MAX_DDP_BUFFER_SIZE || uio->uio_iovcnt > 1 || so->so_state & SS_NBIO || flags & (MSG_DONTWAIT | MSG_NBIO) || error || so->so_error || sb->sb_state & SBS_CANTRCVMORE) goto no_ddp; /* * Fault in and then hold the pages of the uio buffers. We'll wire them * a bit later if everything else works out. */ SOCKBUF_UNLOCK(sb); if (hold_uio(uio, &pages, &npages) != 0) { SOCKBUF_LOCK(sb); goto no_ddp; } SOCKBUF_LOCK(sb); if (__predict_false(so->so_error || sb->sb_state & SBS_CANTRCVMORE)) { vm_page_unhold_pages(pages, npages); free(pages, M_CXGBE); goto no_ddp; } /* * Figure out which one of the two DDP buffers to use this time. */ db_idx = select_ddp_buffer(sc, toep, pages, npages, (uintptr_t)uio->uio_iov->iov_base & PAGE_MASK, uio->uio_resid); pages = NULL; /* handed off to select_ddp_buffer */ if (db_idx < 0) goto no_ddp; db = toep->db[db_idx]; buf_flag = db_idx == 0 ? DDP_BUF0_ACTIVE : DDP_BUF1_ACTIVE; /* * Build the compound work request that tells the chip where to DMA the * payload. */ ddp_flags = select_ddp_flags(so, flags, db_idx); wr = mk_update_tcb_for_ddp(sc, toep, db_idx, sbused(sb), ddp_flags); if (wr == NULL) { /* * Just unhold the pages. The DDP buffer's software state is * left as-is in the toep. The page pods were written * successfully and we may have an opportunity to use it in the * future. */ vm_page_unhold_pages(db->pages, db->npages); goto no_ddp; } /* Wire (and then unhold) the pages, and give the chip the go-ahead. */ wire_ddp_buffer(db); t4_wrq_tx(sc, wr); sb->sb_flags &= ~SB_DDP_INDICATE; toep->ddp_flags |= buf_flag; /* * Wait for the DDP operation to complete and then unwire the pages. * The return code from the sbwait will be the final return code of this * function. But we do need to wait for DDP no matter what. */ rc = sbwait(sb); while (toep->ddp_flags & buf_flag) { /* XXXGL: shouldn't here be sbwait() call? */ sb->sb_flags |= SB_WAIT; msleep(&sb->sb_acc, &sb->sb_mtx, PSOCK , "sbwait", 0); } unwire_ddp_buffer(db); return (rc); no_ddp: disable_ddp(sc, toep); discourage_ddp(toep); sb->sb_flags &= ~SB_DDP_INDICATE; return (0); } void t4_init_ddp(struct adapter *sc, struct tom_data *td) { td->ppod_start = sc->vres.ddp.start; td->ppod_arena = vmem_create("DDP page pods", sc->vres.ddp.start, sc->vres.ddp.size, 1, 32, M_FIRSTFIT | M_NOWAIT); t4_register_cpl_handler(sc, CPL_RX_DATA_DDP, do_rx_data_ddp); t4_register_cpl_handler(sc, CPL_RX_DDP_COMPLETE, do_rx_ddp_complete); } void t4_uninit_ddp(struct adapter *sc __unused, struct tom_data *td) { if (td->ppod_arena != NULL) { vmem_destroy(td->ppod_arena); td->ppod_arena = NULL; } } #define VNET_SO_ASSERT(so) \ VNET_ASSERT(curvnet != NULL, \ ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so))); #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) static int soreceive_rcvoob(struct socket *so, struct uio *uio, int flags) { CXGBE_UNIMPLEMENTED(__func__); } static char ddp_magic_str[] = "nothing to see here"; struct mbuf * get_ddp_mbuf(int len) { struct mbuf *m; m = m_get(M_NOWAIT, MT_DATA); if (m == NULL) CXGBE_UNIMPLEMENTED("mbuf alloc failure"); m->m_len = len; m->m_data = &ddp_magic_str[0]; return (m); } static inline int is_ddp_mbuf(struct mbuf *m) { return (m->m_data == &ddp_magic_str[0]); } /* * Copy an mbuf chain into a uio limited by len if set. */ static int m_mbuftouio_ddp(struct uio *uio, struct mbuf *m, int len) { int error, length, total; int progress = 0; if (len > 0) total = min(uio->uio_resid, len); else total = uio->uio_resid; /* Fill the uio with data from the mbufs. */ for (; m != NULL; m = m->m_next) { length = min(m->m_len, total - progress); if (is_ddp_mbuf(m)) { enum uio_seg segflag = uio->uio_segflg; uio->uio_segflg = UIO_NOCOPY; error = uiomove(mtod(m, void *), length, uio); uio->uio_segflg = segflag; } else error = uiomove(mtod(m, void *), length, uio); if (error) return (error); progress += length; } return (0); } /* * Based on soreceive_stream() in uipc_socket.c */ int t4_soreceive_ddp(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { int len = 0, error = 0, flags, oresid, ddp_handled = 0; struct sockbuf *sb; struct mbuf *m, *n = NULL; /* We only do stream sockets. */ if (so->so_type != SOCK_STREAM) return (EINVAL); if (psa != NULL) *psa = NULL; if (controlp != NULL) return (EINVAL); if (flagsp != NULL) flags = *flagsp &~ MSG_EOR; else flags = 0; if (flags & MSG_OOB) return (soreceive_rcvoob(so, uio, flags)); if (mp0 != NULL) *mp0 = NULL; sb = &so->so_rcv; /* Prevent other readers from entering the socket. */ error = sblock(sb, SBLOCKWAIT(flags)); SOCKBUF_LOCK(sb); if (error) goto out; /* Easy one, no space to copyout anything. */ if (uio->uio_resid == 0) { error = EINVAL; goto out; } oresid = uio->uio_resid; /* We will never ever get anything unless we are or were connected. */ if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) { error = ENOTCONN; goto out; } restart: SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (sb->sb_flags & SB_DDP_INDICATE && !ddp_handled) { /* uio should be just as it was at entry */ KASSERT(oresid == uio->uio_resid, ("%s: oresid = %d, uio_resid = %zd, sbavail = %d", __func__, oresid, uio->uio_resid, sbavail(sb))); error = handle_ddp(so, uio, flags, 0); ddp_handled = 1; if (error) goto out; } /* Abort if socket has reported problems. */ if (so->so_error) { if (sbavail(sb)) goto deliver; if (oresid > uio->uio_resid) goto out; error = so->so_error; if (!(flags & MSG_PEEK)) so->so_error = 0; goto out; } /* Door is closed. Deliver what is left, if any. */ if (sb->sb_state & SBS_CANTRCVMORE) { if (sbavail(sb)) goto deliver; else goto out; } /* Socket buffer is empty and we shall not block. */ if (sbavail(sb) == 0 && ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) { error = EAGAIN; goto out; } /* Socket buffer got some data that we shall deliver now. */ if (sbavail(sb) > 0 && !(flags & MSG_WAITALL) && ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)) || sbavail(sb) >= sb->sb_lowat || sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_hiwat) ) { goto deliver; } /* On MSG_WAITALL we must wait until all data or error arrives. */ if ((flags & MSG_WAITALL) && (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_lowat)) goto deliver; /* * Wait and block until (more) data comes in. * NB: Drops the sockbuf lock during wait. */ error = sbwait(sb); if (error) { if (sb->sb_flags & SB_DDP_INDICATE && !ddp_handled) { (void) handle_ddp(so, uio, flags, 1); ddp_handled = 1; } goto out; } goto restart; deliver: SOCKBUF_LOCK_ASSERT(&so->so_rcv); KASSERT(sbavail(sb) > 0, ("%s: sockbuf empty", __func__)); KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__)); if (sb->sb_flags & SB_DDP_INDICATE && !ddp_handled) goto restart; /* Statistics. */ if (uio->uio_td) uio->uio_td->td_ru.ru_msgrcv++; /* Fill uio until full or current end of socket buffer is reached. */ len = min(uio->uio_resid, sbavail(sb)); if (mp0 != NULL) { /* Dequeue as many mbufs as possible. */ if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) { for (*mp0 = m = sb->sb_mb; m != NULL && m->m_len <= len; m = m->m_next) { len -= m->m_len; uio->uio_resid -= m->m_len; sbfree(sb, m); n = m; } sb->sb_mb = m; if (sb->sb_mb == NULL) SB_EMPTY_FIXUP(sb); n->m_next = NULL; } /* Copy the remainder. */ if (len > 0) { KASSERT(sb->sb_mb != NULL, ("%s: len > 0 && sb->sb_mb empty", __func__)); m = m_copym(sb->sb_mb, 0, len, M_NOWAIT); if (m == NULL) len = 0; /* Don't flush data from sockbuf. */ else uio->uio_resid -= m->m_len; if (*mp0 != NULL) n->m_next = m; else *mp0 = m; if (*mp0 == NULL) { error = ENOBUFS; goto out; } } } else { /* NB: Must unlock socket buffer as uiomove may sleep. */ SOCKBUF_UNLOCK(sb); error = m_mbuftouio_ddp(uio, sb->sb_mb, len); SOCKBUF_LOCK(sb); if (error) goto out; } SBLASTRECORDCHK(sb); SBLASTMBUFCHK(sb); /* * Remove the delivered data from the socket buffer unless we * were only peeking. */ if (!(flags & MSG_PEEK)) { if (len > 0) sbdrop_locked(sb, len); /* Notify protocol that we drained some data. */ if ((so->so_proto->pr_flags & PR_WANTRCVD) && (((flags & MSG_WAITALL) && uio->uio_resid > 0) || !(flags & MSG_SOCALLBCK))) { SOCKBUF_UNLOCK(sb); VNET_SO_ASSERT(so); (*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags); SOCKBUF_LOCK(sb); } } /* * For MSG_WAITALL we may have to loop again and wait for * more data to come in. */ if ((flags & MSG_WAITALL) && uio->uio_resid > 0) goto restart; out: SOCKBUF_LOCK_ASSERT(sb); SBLASTRECORDCHK(sb); SBLASTMBUFCHK(sb); SOCKBUF_UNLOCK(sb); sbunlock(sb); return (error); } #endif Index: projects/cxl_iscsi/sys/dev/cxgbe/tom/t4_tom.h =================================================================== --- projects/cxl_iscsi/sys/dev/cxgbe/tom/t4_tom.h (revision 279896) +++ projects/cxl_iscsi/sys/dev/cxgbe/tom/t4_tom.h (revision 279897) @@ -1,304 +1,293 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef __T4_TOM_H__ #define __T4_TOM_H__ #include #define LISTEN_HASH_SIZE 32 /* * Min receive window. We want it to be large enough to accommodate receive * coalescing, handle jumbo frames, and not trigger sender SWS avoidance. */ #define MIN_RCV_WND (24 * 1024U) /* * Max receive window supported by HW in bytes. Only a small part of it can * be set through option0, the rest needs to be set through RX_DATA_ACK. */ #define MAX_RCV_WND ((1U << 27) - 1) #define DDP_RSVD_WIN (16 * 1024U) #define SB_DDP_INDICATE SB_IN_TOE /* soreceive must respond to indicate */ #define USE_DDP_RX_FLOW_CONTROL /* TOE PCB flags */ enum { TPF_ATTACHED = (1 << 0), /* a tcpcb refers to this toepcb */ TPF_FLOWC_WR_SENT = (1 << 1), /* firmware flow context WR sent */ TPF_TX_DATA_SENT = (1 << 2), /* some data sent */ TPF_TX_SUSPENDED = (1 << 3), /* tx suspended for lack of resources */ TPF_SEND_FIN = (1 << 4), /* send FIN after all pending data */ TPF_FIN_SENT = (1 << 5), /* FIN has been sent */ TPF_ABORT_SHUTDOWN = (1 << 6), /* connection abort is in progress */ TPF_CPL_PENDING = (1 << 7), /* haven't received the last CPL */ TPF_SYNQE = (1 << 8), /* synq_entry, not really a toepcb */ TPF_SYNQE_NEEDFREE = (1 << 9), /* synq_entry was malloc'd separately */ TPF_SYNQE_TCPDDP = (1 << 10), /* ulp_mode TCPDDP in toepcb */ TPF_SYNQE_EXPANDED = (1 << 11), /* toepcb ready, tid context updated */ TPF_SYNQE_HAS_L2TE = (1 << 12), /* we've replied to PASS_ACCEPT_REQ */ }; enum { DDP_OK = (1 << 0), /* OK to turn on DDP */ DDP_SC_REQ = (1 << 1), /* state change (on/off) requested */ DDP_ON = (1 << 2), /* DDP is turned on */ DDP_BUF0_ACTIVE = (1 << 3), /* buffer 0 in use (not invalidated) */ DDP_BUF1_ACTIVE = (1 << 4), /* buffer 1 in use (not invalidated) */ }; struct ofld_tx_sdesc { uint32_t plen; /* payload length */ uint8_t tx_credits; /* firmware tx credits (unit is 16B) */ }; struct ddp_buffer { uint32_t tag; /* includes color, page pod addr, and DDP page size */ u_int ppod_addr; int nppods; int offset; int len; int npages; vm_page_t *pages; }; struct toepcb { TAILQ_ENTRY(toepcb) link; /* toep_list */ u_int flags; /* miscellaneous flags */ struct tom_data *td; struct inpcb *inp; /* backpointer to host stack's PCB */ struct port_info *port; /* physical port */ struct sge_wrq *ofld_txq; struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ctrlq; struct l2t_entry *l2te; /* L2 table entry used by this connection */ struct clip_entry *ce; /* CLIP table entry used by this tid */ int tid; /* Connection identifier */ /* tx credit handling */ u_int tx_total; /* total tx WR credits (in 16B units) */ u_int tx_credits; /* tx WR credits (in 16B units) available */ u_int tx_nocompl; /* tx WR credits since last compl request */ u_int plen_nocompl; /* payload since last compl request */ /* rx credit handling */ u_int sb_cc; /* last noted value of so_rcv->sb_cc */ int rx_credits; /* rx credits (in bytes) to be returned to hw */ u_int ulp_mode; /* ULP mode */ + void *ulpcb; u_int ddp_flags; struct ddp_buffer *db[2]; time_t ddp_disabled; uint8_t ddp_score; /* Tx software descriptor */ uint8_t txsd_total; uint8_t txsd_pidx; uint8_t txsd_cidx; uint8_t txsd_avail; struct ofld_tx_sdesc txsd[]; }; struct flowc_tx_params { uint32_t snd_nxt; uint32_t rcv_nxt; unsigned int snd_space; unsigned int mss; }; #define DDP_RETRY_WAIT 5 /* seconds to wait before re-enabling DDP */ #define DDP_LOW_SCORE 1 #define DDP_HIGH_SCORE 3 /* * Compressed state for embryonic connections for a listener. Barely fits in * 64B, try not to grow it further. */ struct synq_entry { TAILQ_ENTRY(synq_entry) link; /* listen_ctx's synq link */ int flags; /* same as toepcb's tp_flags */ int tid; struct listen_ctx *lctx; /* backpointer to listen ctx */ struct mbuf *syn; uint32_t iss; uint32_t ts; volatile uintptr_t wr; volatile u_int refcnt; uint16_t l2e_idx; uint16_t rcv_bufsize; }; /* listen_ctx flags */ #define LCTX_RPL_PENDING 1 /* waiting for a CPL_PASS_OPEN_RPL */ struct listen_ctx { LIST_ENTRY(listen_ctx) link; /* listen hash linkage */ volatile int refcount; int stid; struct stid_region stid_region; int flags; struct inpcb *inp; /* listening socket's inp */ struct sge_wrq *ctrlq; struct sge_ofld_rxq *ofld_rxq; struct clip_entry *ce; TAILQ_HEAD(, synq_entry) synq; }; struct clip_entry { TAILQ_ENTRY(clip_entry) link; struct in6_addr lip; /* local IPv6 address */ u_int refcount; }; TAILQ_HEAD(clip_head, clip_entry); struct tom_data { struct toedev tod; /* toepcb's associated with this TOE device */ struct mtx toep_list_lock; TAILQ_HEAD(, toepcb) toep_list; struct mtx lctx_hash_lock; LIST_HEAD(, listen_ctx) *listen_hash; u_long listen_mask; int lctx_count; /* # of lctx in the hash table */ u_int ppod_start; vmem_t *ppod_arena; struct mtx clip_table_lock; struct clip_head clip_table; int clip_gen; /* WRs that will not be sent to the chip because L2 resolution failed */ struct mtx unsent_wr_lock; STAILQ_HEAD(, wrqe) unsent_wr_list; struct task reclaim_wr_resources; }; static inline struct tom_data * tod_td(struct toedev *tod) { return (__containerof(tod, struct tom_data, tod)); } static inline struct adapter * td_adapter(struct tom_data *td) { return (td->tod.tod_softc); } /* t4_tom.c */ struct toepcb *alloc_toepcb(struct port_info *, int, int, int); void free_toepcb(struct toepcb *); void offload_socket(struct socket *, struct toepcb *); void undo_offload_socket(struct socket *); void final_cpl_received(struct toepcb *); void insert_tid(struct adapter *, int, void *); void *lookup_tid(struct adapter *, int); void update_tid(struct adapter *, int, void *); void remove_tid(struct adapter *, int); void release_tid(struct adapter *, int, struct sge_wrq *); int find_best_mtu_idx(struct adapter *, struct in_conninfo *, int); u_long select_rcv_wnd(struct socket *); int select_rcv_wscale(void); uint64_t calc_opt0(struct socket *, struct port_info *, struct l2t_entry *, int, int, int, int); uint64_t select_ntuple(struct port_info *, struct l2t_entry *); void set_tcpddp_ulp_mode(struct toepcb *); int negative_advice(int); struct clip_entry *hold_lip(struct tom_data *, struct in6_addr *); void release_lip(struct tom_data *, struct clip_entry *); /* t4_connect.c */ void t4_init_connect_cpl_handlers(struct adapter *); int t4_connect(struct toedev *, struct socket *, struct rtentry *, struct sockaddr *); void act_open_failure_cleanup(struct adapter *, u_int, u_int); /* t4_listen.c */ void t4_init_listen_cpl_handlers(struct adapter *); int t4_listen_start(struct toedev *, struct tcpcb *); int t4_listen_stop(struct toedev *, struct tcpcb *); void t4_syncache_added(struct toedev *, void *); void t4_syncache_removed(struct toedev *, void *); int t4_syncache_respond(struct toedev *, void *, struct mbuf *); int do_abort_req_synqe(struct sge_iq *, const struct rss_header *, struct mbuf *); int do_abort_rpl_synqe(struct sge_iq *, const struct rss_header *, struct mbuf *); void t4_offload_socket(struct toedev *, void *, struct socket *); /* t4_cpl_io.c */ void t4_init_cpl_io_handlers(struct adapter *); void t4_uninit_cpl_io_handlers(struct adapter *); void send_abort_rpl(struct adapter *, struct sge_wrq *, int , int); void send_flowc_wr(struct toepcb *, struct flowc_tx_params *); void send_reset(struct adapter *, struct toepcb *, uint32_t); void make_established(struct toepcb *, uint32_t, uint32_t, uint16_t); void t4_rcvd(struct toedev *, struct tcpcb *); int t4_tod_output(struct toedev *, struct tcpcb *); int t4_send_fin(struct toedev *, struct tcpcb *); int t4_send_rst(struct toedev *, struct tcpcb *); void t4_set_tcb_field(struct adapter *, struct toepcb *, int, uint16_t, uint64_t, uint64_t); void t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop); /* t4_ddp.c */ void t4_init_ddp(struct adapter *, struct tom_data *); void t4_uninit_ddp(struct adapter *, struct tom_data *); int t4_soreceive_ddp(struct socket *, struct sockaddr **, struct uio *, struct mbuf **, struct mbuf **, int *); struct mbuf *get_ddp_mbuf(int); void enable_ddp(struct adapter *, struct toepcb *toep); void release_ddp_resources(struct toepcb *toep); void insert_ddp_data(struct toepcb *, uint32_t); /* ULP related */ #define CXGBE_ISCSI_MBUF_TAG 50 -int t4tom_cpl_handler_registered(struct adapter *, unsigned int); -void t4tom_register_cpl_iscsi_callback(void (*fp)(struct tom_data *, - struct socket *, void *, unsigned int)); -void t4tom_register_queue_iscsi_callback(struct mbuf *(*fp)(struct socket *, - unsigned int, int *)); void t4_ulp_push_frames(struct adapter *sc, struct toepcb *toep, int); -int t4_cpl_iscsi_callback(struct tom_data *, struct toepcb *, void *, uint32_t); -struct mbuf *t4_queue_iscsi_callback(struct socket *, struct toepcb *, uint32_t, - int *); -extern void (*tom_cpl_iscsi_callback)(struct tom_data *, struct socket *, - void *, unsigned int); -extern struct mbuf *(*tom_queue_iscsi_callback)(struct socket*, unsigned int, - int *); #endif