Index: head/sys/dev/cxgbe/common/t4_hw.h =================================================================== --- head/sys/dev/cxgbe/common/t4_hw.h (revision 239343) +++ head/sys/dev/cxgbe/common/t4_hw.h (revision 239344) @@ -1,263 +1,265 @@ /*- * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef __T4_HW_H #define __T4_HW_H #include "osdep.h" enum { NCHAN = 4, /* # of HW channels */ MAX_MTU = 9600, /* max MAC MTU, excluding header + FCS */ EEPROMSIZE = 17408, /* Serial EEPROM physical size */ EEPROMVSIZE = 32768, /* Serial EEPROM virtual address space size */ EEPROMPFSIZE = 1024, /* EEPROM writable area size for PFn, n>0 */ RSS_NENTRIES = 2048, /* # of entries in RSS mapping table */ TCB_SIZE = 128, /* TCB size */ NMTUS = 16, /* size of MTU table */ NCCTRL_WIN = 32, /* # of congestion control windows */ NTX_SCHED = 8, /* # of HW Tx scheduling queues */ PM_NSTATS = 5, /* # of PM stats */ MBOX_LEN = 64, /* mailbox size in bytes */ TRACE_LEN = 112, /* length of trace data and mask */ FILTER_OPT_LEN = 36, /* filter tuple width for optional components */ NWOL_PAT = 8, /* # of WoL patterns */ WOL_PAT_LEN = 128, /* length of WoL patterns */ }; enum { CIM_NUM_IBQ = 6, /* # of CIM IBQs */ CIM_NUM_OBQ = 6, /* # of CIM OBQs */ CIMLA_SIZE = 2048, /* # of 32-bit words in CIM LA */ CIM_PIFLA_SIZE = 64, /* # of 192-bit words in CIM PIF LA */ CIM_MALA_SIZE = 64, /* # of 160-bit words in CIM MA LA */ CIM_IBQ_SIZE = 128, /* # of 128-bit words in a CIM IBQ */ TPLA_SIZE = 128, /* # of 64-bit words in TP LA */ ULPRX_LA_SIZE = 512, /* # of 256-bit words in ULP_RX LA */ }; enum { SF_PAGE_SIZE = 256, /* serial flash page size */ SF_SEC_SIZE = 64 * 1024, /* serial flash sector size */ }; /* SGE context types */ enum ctxt_type { CTXT_EGRESS, CTXT_INGRESS, CTXT_FLM, CTXT_CNM }; enum { RSP_TYPE_FLBUF, RSP_TYPE_CPL, RSP_TYPE_INTR }; /* response entry types */ enum { MBOX_OWNER_NONE, MBOX_OWNER_FW, MBOX_OWNER_DRV }; /* mailbox owners */ enum { SGE_MAX_WR_LEN = 512, /* max WR size in bytes */ SGE_CTXT_SIZE = 24, /* size of SGE context */ SGE_NTIMERS = 6, /* # of interrupt holdoff timer values */ SGE_NCOUNTERS = 4, /* # of interrupt packet counter values */ }; struct sge_qstat { /* data written to SGE queue status entries */ volatile __be32 qid; volatile __be16 cidx; volatile __be16 pidx; }; #define S_QSTAT_PIDX 0 #define M_QSTAT_PIDX 0xffff #define G_QSTAT_PIDX(x) (((x) >> S_QSTAT_PIDX) & M_QSTAT_PIDX) #define S_QSTAT_CIDX 16 #define M_QSTAT_CIDX 0xffff #define G_QSTAT_CIDX(x) (((x) >> S_QSTAT_CIDX) & M_QSTAT_CIDX) /* * Structure for last 128 bits of response descriptors */ struct rsp_ctrl { __be32 hdrbuflen_pidx; __be32 pldbuflen_qid; union { u8 type_gen; __be64 last_flit; } u; }; #define S_RSPD_NEWBUF 31 #define V_RSPD_NEWBUF(x) ((x) << S_RSPD_NEWBUF) #define F_RSPD_NEWBUF V_RSPD_NEWBUF(1U) #define S_RSPD_LEN 0 #define M_RSPD_LEN 0x7fffffff #define V_RSPD_LEN(x) ((x) << S_RSPD_LEN) #define G_RSPD_LEN(x) (((x) >> S_RSPD_LEN) & M_RSPD_LEN) #define S_RSPD_QID S_RSPD_LEN #define M_RSPD_QID M_RSPD_LEN #define V_RSPD_QID(x) V_RSPD_LEN(x) #define G_RSPD_QID(x) G_RSPD_LEN(x) #define S_RSPD_GEN 7 #define V_RSPD_GEN(x) ((x) << S_RSPD_GEN) #define F_RSPD_GEN V_RSPD_GEN(1U) #define S_RSPD_QOVFL 6 #define V_RSPD_QOVFL(x) ((x) << S_RSPD_QOVFL) #define F_RSPD_QOVFL V_RSPD_QOVFL(1U) #define S_RSPD_TYPE 4 #define M_RSPD_TYPE 0x3 #define V_RSPD_TYPE(x) ((x) << S_RSPD_TYPE) #define G_RSPD_TYPE(x) (((x) >> S_RSPD_TYPE) & M_RSPD_TYPE) /* Rx queue interrupt deferral fields: counter enable and timer index */ #define S_QINTR_CNT_EN 0 #define V_QINTR_CNT_EN(x) ((x) << S_QINTR_CNT_EN) #define F_QINTR_CNT_EN V_QINTR_CNT_EN(1U) #define S_QINTR_TIMER_IDX 1 #define M_QINTR_TIMER_IDX 0x7 #define V_QINTR_TIMER_IDX(x) ((x) << S_QINTR_TIMER_IDX) #define G_QINTR_TIMER_IDX(x) (((x) >> S_QINTR_TIMER_IDX) & M_QINTR_TIMER_IDX) /* # of pages a pagepod can hold without needing another pagepod */ #define PPOD_PAGES 4U struct pagepod { __be64 vld_tid_pgsz_tag_color; __be64 len_offset; __be64 rsvd; __be64 addr[PPOD_PAGES + 1]; }; #define S_PPOD_COLOR 0 #define M_PPOD_COLOR 0x3F #define V_PPOD_COLOR(x) ((x) << S_PPOD_COLOR) #define S_PPOD_TAG 6 #define M_PPOD_TAG 0xFFFFFF #define V_PPOD_TAG(x) ((x) << S_PPOD_TAG) +#define G_PPOD_TAG(x) (((x) >> S_PPOD_TAG) & M_PPOD_TAG) #define S_PPOD_PGSZ 30 #define M_PPOD_PGSZ 0x3 #define V_PPOD_PGSZ(x) ((x) << S_PPOD_PGSZ) +#define G_PPOD_PGSZ(x) (((x) >> S_PPOD_PGSZ) & M_PPOD_PGSZ) #define S_PPOD_TID 32 #define M_PPOD_TID 0xFFFFFF #define V_PPOD_TID(x) ((__u64)(x) << S_PPOD_TID) #define S_PPOD_VALID 56 #define V_PPOD_VALID(x) ((__u64)(x) << S_PPOD_VALID) #define F_PPOD_VALID V_PPOD_VALID(1ULL) #define S_PPOD_LEN 32 #define M_PPOD_LEN 0xFFFFFFFF #define V_PPOD_LEN(x) ((__u64)(x) << S_PPOD_LEN) #define S_PPOD_OFST 0 #define M_PPOD_OFST 0xFFFFFFFF #define V_PPOD_OFST(x) ((x) << S_PPOD_OFST) /* * Flash layout. */ #define FLASH_START(start) ((start) * SF_SEC_SIZE) #define FLASH_MAX_SIZE(nsecs) ((nsecs) * SF_SEC_SIZE) enum { /* * Various Expansion-ROM boot images, etc. */ FLASH_EXP_ROM_START_SEC = 0, FLASH_EXP_ROM_NSECS = 6, FLASH_EXP_ROM_START = FLASH_START(FLASH_EXP_ROM_START_SEC), FLASH_EXP_ROM_MAX_SIZE = FLASH_MAX_SIZE(FLASH_EXP_ROM_NSECS), /* * iSCSI Boot Firmware Table (iBFT) and other driver-related * parameters ... */ FLASH_IBFT_START_SEC = 6, FLASH_IBFT_NSECS = 1, FLASH_IBFT_START = FLASH_START(FLASH_IBFT_START_SEC), FLASH_IBFT_MAX_SIZE = FLASH_MAX_SIZE(FLASH_IBFT_NSECS), /* * Boot configuration data. */ FLASH_BOOTCFG_START_SEC = 7, FLASH_BOOTCFG_NSECS = 1, FLASH_BOOTCFG_START = FLASH_START(FLASH_BOOTCFG_START_SEC), FLASH_BOOTCFG_MAX_SIZE = FLASH_MAX_SIZE(FLASH_BOOTCFG_NSECS), /* * Location of firmware image in FLASH. */ FLASH_FW_START_SEC = 8, FLASH_FW_NSECS = 8, FLASH_FW_START = FLASH_START(FLASH_FW_START_SEC), FLASH_FW_MAX_SIZE = FLASH_MAX_SIZE(FLASH_FW_NSECS), /* * iSCSI persistent/crash information. */ FLASH_ISCSI_CRASH_START_SEC = 29, FLASH_ISCSI_CRASH_NSECS = 1, FLASH_ISCSI_CRASH_START = FLASH_START(FLASH_ISCSI_CRASH_START_SEC), FLASH_ISCSI_CRASH_MAX_SIZE = FLASH_MAX_SIZE(FLASH_ISCSI_CRASH_NSECS), /* * FCoE persistent/crash information. */ FLASH_FCOE_CRASH_START_SEC = 30, FLASH_FCOE_CRASH_NSECS = 1, FLASH_FCOE_CRASH_START = FLASH_START(FLASH_FCOE_CRASH_START_SEC), FLASH_FCOE_CRASH_MAX_SIZE = FLASH_MAX_SIZE(FLASH_FCOE_CRASH_NSECS), /* * Location of Firmware Configuration File in FLASH. Since the FPGA * "FLASH" is smaller we need to store the Configuration File in a * different location -- which will overlap the end of the firmware * image if firmware ever gets that large ... */ FLASH_CFG_START_SEC = 31, FLASH_CFG_NSECS = 1, FLASH_CFG_START = FLASH_START(FLASH_CFG_START_SEC), FLASH_CFG_MAX_SIZE = FLASH_MAX_SIZE(FLASH_CFG_NSECS), FLASH_FPGA_CFG_START_SEC = 15, FLASH_FPGA_CFG_START = FLASH_START(FLASH_FPGA_CFG_START_SEC), /* * Sectors 32-63 are reserved for FLASH failover. */ }; #undef FLASH_START #undef FLASH_MAX_SIZE #endif /* __T4_HW_H */ Index: head/sys/dev/cxgbe/common/t4_msg.h =================================================================== --- head/sys/dev/cxgbe/common/t4_msg.h (revision 239343) +++ head/sys/dev/cxgbe/common/t4_msg.h (revision 239344) @@ -1,2404 +1,2417 @@ /*- * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef T4_MSG_H #define T4_MSG_H enum { CPL_PASS_OPEN_REQ = 0x1, CPL_PASS_ACCEPT_RPL = 0x2, CPL_ACT_OPEN_REQ = 0x3, CPL_SET_TCB = 0x4, CPL_SET_TCB_FIELD = 0x5, CPL_GET_TCB = 0x6, CPL_CLOSE_CON_REQ = 0x8, CPL_CLOSE_LISTSRV_REQ = 0x9, CPL_ABORT_REQ = 0xA, CPL_ABORT_RPL = 0xB, CPL_TX_DATA = 0xC, CPL_RX_DATA_ACK = 0xD, CPL_TX_PKT = 0xE, CPL_RTE_DELETE_REQ = 0xF, CPL_RTE_WRITE_REQ = 0x10, CPL_RTE_READ_REQ = 0x11, CPL_L2T_WRITE_REQ = 0x12, CPL_L2T_READ_REQ = 0x13, CPL_SMT_WRITE_REQ = 0x14, CPL_SMT_READ_REQ = 0x15, CPL_TAG_WRITE_REQ = 0x16, CPL_BARRIER = 0x18, CPL_TID_RELEASE = 0x1A, CPL_TAG_READ_REQ = 0x1B, CPL_TX_PKT_FSO = 0x1E, CPL_TX_PKT_ISO = 0x1F, CPL_CLOSE_LISTSRV_RPL = 0x20, CPL_ERROR = 0x21, CPL_GET_TCB_RPL = 0x22, CPL_L2T_WRITE_RPL = 0x23, CPL_PASS_OPEN_RPL = 0x24, CPL_ACT_OPEN_RPL = 0x25, CPL_PEER_CLOSE = 0x26, CPL_RTE_DELETE_RPL = 0x27, CPL_RTE_WRITE_RPL = 0x28, CPL_RX_URG_PKT = 0x29, CPL_TAG_WRITE_RPL = 0x2A, CPL_ABORT_REQ_RSS = 0x2B, CPL_RX_URG_NOTIFY = 0x2C, CPL_ABORT_RPL_RSS = 0x2D, CPL_SMT_WRITE_RPL = 0x2E, CPL_TX_DATA_ACK = 0x2F, CPL_RX_PHYS_ADDR = 0x30, CPL_PCMD_READ_RPL = 0x31, CPL_CLOSE_CON_RPL = 0x32, CPL_ISCSI_HDR = 0x33, CPL_L2T_READ_RPL = 0x34, CPL_RDMA_CQE = 0x35, CPL_RDMA_CQE_READ_RSP = 0x36, CPL_RDMA_CQE_ERR = 0x37, CPL_RTE_READ_RPL = 0x38, CPL_RX_DATA = 0x39, CPL_SET_TCB_RPL = 0x3A, CPL_RX_PKT = 0x3B, CPL_TAG_READ_RPL = 0x3C, CPL_HIT_NOTIFY = 0x3D, CPL_PKT_NOTIFY = 0x3E, CPL_RX_DDP_COMPLETE = 0x3F, CPL_ACT_ESTABLISH = 0x40, CPL_PASS_ESTABLISH = 0x41, CPL_RX_DATA_DDP = 0x42, CPL_SMT_READ_RPL = 0x43, CPL_PASS_ACCEPT_REQ = 0x44, CPL_RX2TX_PKT = 0x45, CPL_RX_FCOE_DDP = 0x46, CPL_FCOE_HDR = 0x47, CPL_T5_TRACE_PKT = 0x48, CPL_RX_ISCSI_DDP = 0x49, CPL_RX_FCOE_DIF = 0x4A, CPL_RX_DATA_DIF = 0x4B, CPL_RDMA_READ_REQ = 0x60, CPL_RX_ISCSI_DIF = 0x60, CPL_SET_LE_REQ = 0x80, CPL_PASS_OPEN_REQ6 = 0x81, CPL_ACT_OPEN_REQ6 = 0x83, CPL_RDMA_TERMINATE = 0xA2, CPL_RDMA_WRITE = 0xA4, CPL_SGE_EGR_UPDATE = 0xA5, CPL_SET_LE_RPL = 0xA6, CPL_FW2_MSG = 0xA7, CPL_FW2_PLD = 0xA8, CPL_T5_RDMA_READ_REQ = 0xA9, CPL_RDMA_ATOMIC_REQ = 0xAA, CPL_RDMA_ATOMIC_RPL = 0xAB, CPL_RDMA_IMM_DATA = 0xAC, CPL_RDMA_IMM_DATA_SE = 0xAD, CPL_TRACE_PKT = 0xB0, CPL_RX2TX_DATA = 0xB1, CPL_ISCSI_DATA = 0xB2, CPL_FCOE_DATA = 0xB3, CPL_FW4_MSG = 0xC0, CPL_FW4_PLD = 0xC1, CPL_FW4_ACK = 0xC3, CPL_FW6_MSG = 0xE0, CPL_FW6_PLD = 0xE1, CPL_TX_PKT_LSO = 0xED, CPL_TX_PKT_XT = 0xEE, NUM_CPL_CMDS /* must be last and previous entries must be sorted */ }; enum CPL_error { CPL_ERR_NONE = 0, CPL_ERR_TCAM_PARITY = 1, CPL_ERR_TCAM_FULL = 3, CPL_ERR_BAD_LENGTH = 15, CPL_ERR_BAD_ROUTE = 18, CPL_ERR_CONN_RESET = 20, CPL_ERR_CONN_EXIST_SYNRECV = 21, CPL_ERR_CONN_EXIST = 22, CPL_ERR_ARP_MISS = 23, CPL_ERR_BAD_SYN = 24, CPL_ERR_CONN_TIMEDOUT = 30, CPL_ERR_XMIT_TIMEDOUT = 31, CPL_ERR_PERSIST_TIMEDOUT = 32, CPL_ERR_FINWAIT2_TIMEDOUT = 33, CPL_ERR_KEEPALIVE_TIMEDOUT = 34, CPL_ERR_RTX_NEG_ADVICE = 35, CPL_ERR_PERSIST_NEG_ADVICE = 36, CPL_ERR_ABORT_FAILED = 42, CPL_ERR_IWARP_FLM = 50, }; enum { CPL_CONN_POLICY_AUTO = 0, CPL_CONN_POLICY_ASK = 1, CPL_CONN_POLICY_FILTER = 2, CPL_CONN_POLICY_DENY = 3 }; enum { ULP_MODE_NONE = 0, ULP_MODE_ISCSI = 2, ULP_MODE_RDMA = 4, ULP_MODE_TCPDDP = 5, ULP_MODE_FCOE = 6, }; enum { ULP_CRC_HEADER = 1 << 0, ULP_CRC_DATA = 1 << 1 }; enum { CPL_PASS_OPEN_ACCEPT, CPL_PASS_OPEN_REJECT, CPL_PASS_OPEN_ACCEPT_TNL }; enum { CPL_ABORT_SEND_RST = 0, CPL_ABORT_NO_RST, }; enum { /* TX_PKT_XT checksum types */ TX_CSUM_TCP = 0, TX_CSUM_UDP = 1, TX_CSUM_CRC16 = 4, TX_CSUM_CRC32 = 5, TX_CSUM_CRC32C = 6, TX_CSUM_FCOE = 7, TX_CSUM_TCPIP = 8, TX_CSUM_UDPIP = 9, TX_CSUM_TCPIP6 = 10, TX_CSUM_UDPIP6 = 11, TX_CSUM_IP = 12, }; enum { /* packet type in CPL_RX_PKT */ PKTYPE_XACT_UCAST = 0, PKTYPE_HASH_UCAST = 1, PKTYPE_XACT_MCAST = 2, PKTYPE_HASH_MCAST = 3, PKTYPE_PROMISC = 4, PKTYPE_HPROMISC = 5, PKTYPE_BCAST = 6 }; enum { /* DMAC type in CPL_RX_PKT */ DATYPE_UCAST, DATYPE_MCAST, DATYPE_BCAST }; enum { /* TCP congestion control algorithms */ CONG_ALG_RENO, CONG_ALG_TAHOE, CONG_ALG_NEWRENO, CONG_ALG_HIGHSPEED }; enum { /* RSS hash type */ RSS_HASH_NONE = 0, /* no hash computed */ RSS_HASH_IP = 1, /* IP or IPv6 2-tuple hash */ RSS_HASH_TCP = 2, /* TCP 4-tuple hash */ RSS_HASH_UDP = 3 /* UDP 4-tuple hash */ }; enum { /* LE commands */ LE_CMD_READ = 0x4, LE_CMD_WRITE = 0xb }; enum { /* LE request size */ LE_SZ_NONE = 0, LE_SZ_33 = 1, LE_SZ_66 = 2, LE_SZ_132 = 3, LE_SZ_264 = 4, LE_SZ_528 = 5 }; union opcode_tid { __be32 opcode_tid; __u8 opcode; }; #define S_CPL_OPCODE 24 #define V_CPL_OPCODE(x) ((x) << S_CPL_OPCODE) #define G_CPL_OPCODE(x) (((x) >> S_CPL_OPCODE) & 0xFF) #define G_TID(x) ((x) & 0xFFFFFF) /* tid is assumed to be 24-bits */ #define MK_OPCODE_TID(opcode, tid) (V_CPL_OPCODE(opcode) | (tid)) #define OPCODE_TID(cmd) ((cmd)->ot.opcode_tid) /* extract the TID from a CPL command */ #define GET_TID(cmd) (G_TID(ntohl(OPCODE_TID(cmd)))) /* partitioning of TID fields that also carry a queue id */ #define S_TID_TID 0 #define M_TID_TID 0x3fff #define V_TID_TID(x) ((x) << S_TID_TID) #define G_TID_TID(x) (((x) >> S_TID_TID) & M_TID_TID) #define S_TID_QID 14 #define M_TID_QID 0x3ff #define V_TID_QID(x) ((x) << S_TID_QID) #define G_TID_QID(x) (((x) >> S_TID_QID) & M_TID_QID) union opcode_info { __be64 opcode_info; __u8 opcode; }; struct tcp_options { __be16 mss; __u8 wsf; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 :4; __u8 unknown:1; __u8 ecn:1; __u8 sack:1; __u8 tstamp:1; #else __u8 tstamp:1; __u8 sack:1; __u8 ecn:1; __u8 unknown:1; __u8 :4; #endif }; struct rss_header { __u8 opcode; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 channel:2; __u8 filter_hit:1; __u8 filter_tid:1; __u8 hash_type:2; __u8 ipv6:1; __u8 send2fw:1; #else __u8 send2fw:1; __u8 ipv6:1; __u8 hash_type:2; __u8 filter_tid:1; __u8 filter_hit:1; __u8 channel:2; #endif __be16 qid; __be32 hash_val; }; #define S_HASHTYPE 20 #define M_HASHTYPE 0x3 #define G_HASHTYPE(x) (((x) >> S_HASHTYPE) & M_HASHTYPE) #define S_QNUM 0 #define M_QNUM 0xFFFF #define G_QNUM(x) (((x) >> S_QNUM) & M_QNUM) #ifndef CHELSIO_FW struct work_request_hdr { __be32 wr_hi; __be32 wr_mid; __be64 wr_lo; }; /* wr_mid fields */ #define S_WR_LEN16 0 #define M_WR_LEN16 0xFF #define V_WR_LEN16(x) ((x) << S_WR_LEN16) #define G_WR_LEN16(x) (((x) >> S_WR_LEN16) & M_WR_LEN16) /* wr_hi fields */ #define S_WR_OP 24 #define M_WR_OP 0xFF #define V_WR_OP(x) ((__u64)(x) << S_WR_OP) #define G_WR_OP(x) (((x) >> S_WR_OP) & M_WR_OP) # define WR_HDR struct work_request_hdr wr # define WR_HDR_SIZE sizeof(struct work_request_hdr) # define RSS_HDR #else # define WR_HDR # define WR_HDR_SIZE 0 # define RSS_HDR struct rss_header rss_hdr; #endif /* option 0 fields */ #define S_ACCEPT_MODE 0 #define M_ACCEPT_MODE 0x3 #define V_ACCEPT_MODE(x) ((x) << S_ACCEPT_MODE) #define G_ACCEPT_MODE(x) (((x) >> S_ACCEPT_MODE) & M_ACCEPT_MODE) #define S_TX_CHAN 2 #define M_TX_CHAN 0x3 #define V_TX_CHAN(x) ((x) << S_TX_CHAN) #define G_TX_CHAN(x) (((x) >> S_TX_CHAN) & M_TX_CHAN) #define S_NO_CONG 4 #define V_NO_CONG(x) ((x) << S_NO_CONG) #define F_NO_CONG V_NO_CONG(1U) #define S_DELACK 5 #define V_DELACK(x) ((x) << S_DELACK) #define F_DELACK V_DELACK(1U) #define S_INJECT_TIMER 6 #define V_INJECT_TIMER(x) ((x) << S_INJECT_TIMER) #define F_INJECT_TIMER V_INJECT_TIMER(1U) #define S_NON_OFFLOAD 7 #define V_NON_OFFLOAD(x) ((x) << S_NON_OFFLOAD) #define F_NON_OFFLOAD V_NON_OFFLOAD(1U) #define S_ULP_MODE 8 #define M_ULP_MODE 0xF #define V_ULP_MODE(x) ((x) << S_ULP_MODE) #define G_ULP_MODE(x) (((x) >> S_ULP_MODE) & M_ULP_MODE) #define S_RCV_BUFSIZ 12 #define M_RCV_BUFSIZ 0x3FFU #define V_RCV_BUFSIZ(x) ((x) << S_RCV_BUFSIZ) #define G_RCV_BUFSIZ(x) (((x) >> S_RCV_BUFSIZ) & M_RCV_BUFSIZ) #define S_DSCP 22 #define M_DSCP 0x3F #define V_DSCP(x) ((x) << S_DSCP) #define G_DSCP(x) (((x) >> S_DSCP) & M_DSCP) #define S_SMAC_SEL 28 #define M_SMAC_SEL 0xFF #define V_SMAC_SEL(x) ((__u64)(x) << S_SMAC_SEL) #define G_SMAC_SEL(x) (((x) >> S_SMAC_SEL) & M_SMAC_SEL) #define S_L2T_IDX 36 #define M_L2T_IDX 0xFFF #define V_L2T_IDX(x) ((__u64)(x) << S_L2T_IDX) #define G_L2T_IDX(x) (((x) >> S_L2T_IDX) & M_L2T_IDX) #define S_TCAM_BYPASS 48 #define V_TCAM_BYPASS(x) ((__u64)(x) << S_TCAM_BYPASS) #define F_TCAM_BYPASS V_TCAM_BYPASS(1ULL) #define S_NAGLE 49 #define V_NAGLE(x) ((__u64)(x) << S_NAGLE) #define F_NAGLE V_NAGLE(1ULL) #define S_WND_SCALE 50 #define M_WND_SCALE 0xF #define V_WND_SCALE(x) ((__u64)(x) << S_WND_SCALE) #define G_WND_SCALE(x) (((x) >> S_WND_SCALE) & M_WND_SCALE) #define S_KEEP_ALIVE 54 #define V_KEEP_ALIVE(x) ((__u64)(x) << S_KEEP_ALIVE) #define F_KEEP_ALIVE V_KEEP_ALIVE(1ULL) #define S_MAX_RT 55 #define M_MAX_RT 0xF #define V_MAX_RT(x) ((__u64)(x) << S_MAX_RT) #define G_MAX_RT(x) (((x) >> S_MAX_RT) & M_MAX_RT) #define S_MAX_RT_OVERRIDE 59 #define V_MAX_RT_OVERRIDE(x) ((__u64)(x) << S_MAX_RT_OVERRIDE) #define F_MAX_RT_OVERRIDE V_MAX_RT_OVERRIDE(1ULL) #define S_MSS_IDX 60 #define M_MSS_IDX 0xF #define V_MSS_IDX(x) ((__u64)(x) << S_MSS_IDX) #define G_MSS_IDX(x) (((x) >> S_MSS_IDX) & M_MSS_IDX) /* option 1 fields */ #define S_SYN_RSS_ENABLE 0 #define V_SYN_RSS_ENABLE(x) ((x) << S_SYN_RSS_ENABLE) #define F_SYN_RSS_ENABLE V_SYN_RSS_ENABLE(1U) #define S_SYN_RSS_USE_HASH 1 #define V_SYN_RSS_USE_HASH(x) ((x) << S_SYN_RSS_USE_HASH) #define F_SYN_RSS_USE_HASH V_SYN_RSS_USE_HASH(1U) #define S_SYN_RSS_QUEUE 2 #define M_SYN_RSS_QUEUE 0x3FF #define V_SYN_RSS_QUEUE(x) ((x) << S_SYN_RSS_QUEUE) #define G_SYN_RSS_QUEUE(x) (((x) >> S_SYN_RSS_QUEUE) & M_SYN_RSS_QUEUE) #define S_LISTEN_INTF 12 #define M_LISTEN_INTF 0xFF #define V_LISTEN_INTF(x) ((x) << S_LISTEN_INTF) #define G_LISTEN_INTF(x) (((x) >> S_LISTEN_INTF) & M_LISTEN_INTF) #define S_LISTEN_FILTER 20 #define V_LISTEN_FILTER(x) ((x) << S_LISTEN_FILTER) #define F_LISTEN_FILTER V_LISTEN_FILTER(1U) #define S_SYN_DEFENSE 21 #define V_SYN_DEFENSE(x) ((x) << S_SYN_DEFENSE) #define F_SYN_DEFENSE V_SYN_DEFENSE(1U) #define S_CONN_POLICY 22 #define M_CONN_POLICY 0x3 #define V_CONN_POLICY(x) ((x) << S_CONN_POLICY) #define G_CONN_POLICY(x) (((x) >> S_CONN_POLICY) & M_CONN_POLICY) /* option 2 fields */ #define S_RSS_QUEUE 0 #define M_RSS_QUEUE 0x3FF #define V_RSS_QUEUE(x) ((x) << S_RSS_QUEUE) #define G_RSS_QUEUE(x) (((x) >> S_RSS_QUEUE) & M_RSS_QUEUE) #define S_RSS_QUEUE_VALID 10 #define V_RSS_QUEUE_VALID(x) ((x) << S_RSS_QUEUE_VALID) #define F_RSS_QUEUE_VALID V_RSS_QUEUE_VALID(1U) #define S_RX_COALESCE_VALID 11 #define V_RX_COALESCE_VALID(x) ((x) << S_RX_COALESCE_VALID) #define F_RX_COALESCE_VALID V_RX_COALESCE_VALID(1U) #define S_RX_COALESCE 12 #define M_RX_COALESCE 0x3 #define V_RX_COALESCE(x) ((x) << S_RX_COALESCE) #define G_RX_COALESCE(x) (((x) >> S_RX_COALESCE) & M_RX_COALESCE) #define S_CONG_CNTRL 14 #define M_CONG_CNTRL 0x3 #define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL) #define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL) #define S_PACE 16 #define M_PACE 0x3 #define V_PACE(x) ((x) << S_PACE) #define G_PACE(x) (((x) >> S_PACE) & M_PACE) #define S_CONG_CNTRL_VALID 18 #define V_CONG_CNTRL_VALID(x) ((x) << S_CONG_CNTRL_VALID) #define F_CONG_CNTRL_VALID V_CONG_CNTRL_VALID(1U) #define S_PACE_VALID 19 #define V_PACE_VALID(x) ((x) << S_PACE_VALID) #define F_PACE_VALID V_PACE_VALID(1U) #define S_RX_FC_DISABLE 20 #define V_RX_FC_DISABLE(x) ((x) << S_RX_FC_DISABLE) #define F_RX_FC_DISABLE V_RX_FC_DISABLE(1U) #define S_RX_FC_DDP 21 #define V_RX_FC_DDP(x) ((x) << S_RX_FC_DDP) #define F_RX_FC_DDP V_RX_FC_DDP(1U) #define S_RX_FC_VALID 22 #define V_RX_FC_VALID(x) ((x) << S_RX_FC_VALID) #define F_RX_FC_VALID V_RX_FC_VALID(1U) #define S_TX_QUEUE 23 #define M_TX_QUEUE 0x7 #define V_TX_QUEUE(x) ((x) << S_TX_QUEUE) #define G_TX_QUEUE(x) (((x) >> S_TX_QUEUE) & M_TX_QUEUE) #define S_RX_CHANNEL 26 #define V_RX_CHANNEL(x) ((x) << S_RX_CHANNEL) #define F_RX_CHANNEL V_RX_CHANNEL(1U) #define S_CCTRL_ECN 27 #define V_CCTRL_ECN(x) ((x) << S_CCTRL_ECN) #define F_CCTRL_ECN V_CCTRL_ECN(1U) #define S_WND_SCALE_EN 28 #define V_WND_SCALE_EN(x) ((x) << S_WND_SCALE_EN) #define F_WND_SCALE_EN V_WND_SCALE_EN(1U) #define S_TSTAMPS_EN 29 #define V_TSTAMPS_EN(x) ((x) << S_TSTAMPS_EN) #define F_TSTAMPS_EN V_TSTAMPS_EN(1U) #define S_SACK_EN 30 #define V_SACK_EN(x) ((x) << S_SACK_EN) #define F_SACK_EN V_SACK_EN(1U) struct cpl_pass_open_req { WR_HDR; union opcode_tid ot; __be16 local_port; __be16 peer_port; __be32 local_ip; __be32 peer_ip; __be64 opt0; __be64 opt1; }; struct cpl_pass_open_req6 { WR_HDR; union opcode_tid ot; __be16 local_port; __be16 peer_port; __be64 local_ip_hi; __be64 local_ip_lo; __be64 peer_ip_hi; __be64 peer_ip_lo; __be64 opt0; __be64 opt1; }; struct cpl_pass_open_rpl { RSS_HDR union opcode_tid ot; __u8 rsvd[3]; __u8 status; }; struct cpl_pass_establish { RSS_HDR union opcode_tid ot; __be32 rsvd; __be32 tos_stid; __be16 mac_idx; __be16 tcp_opt; __be32 snd_isn; __be32 rcv_isn; }; /* cpl_pass_establish.tos_stid fields */ #define S_PASS_OPEN_TID 0 #define M_PASS_OPEN_TID 0xFFFFFF #define V_PASS_OPEN_TID(x) ((x) << S_PASS_OPEN_TID) #define G_PASS_OPEN_TID(x) (((x) >> S_PASS_OPEN_TID) & M_PASS_OPEN_TID) #define S_PASS_OPEN_TOS 24 #define M_PASS_OPEN_TOS 0xFF #define V_PASS_OPEN_TOS(x) ((x) << S_PASS_OPEN_TOS) #define G_PASS_OPEN_TOS(x) (((x) >> S_PASS_OPEN_TOS) & M_PASS_OPEN_TOS) /* cpl_pass_establish.tcp_opt fields (also applies to act_open_establish) */ #define G_TCPOPT_WSCALE_OK(x) (((x) >> 5) & 1) #define G_TCPOPT_SACK(x) (((x) >> 6) & 1) #define G_TCPOPT_TSTAMP(x) (((x) >> 7) & 1) #define G_TCPOPT_SND_WSCALE(x) (((x) >> 8) & 0xf) #define G_TCPOPT_MSS(x) (((x) >> 12) & 0xf) struct cpl_pass_accept_req { RSS_HDR union opcode_tid ot; __be16 rsvd; __be16 len; __be32 hdr_len; __be16 vlan; __be16 l2info; __be32 tos_stid; struct tcp_options tcpopt; }; /* cpl_pass_accept_req.hdr_len fields */ #define S_SYN_RX_CHAN 0 #define M_SYN_RX_CHAN 0xF #define V_SYN_RX_CHAN(x) ((x) << S_SYN_RX_CHAN) #define G_SYN_RX_CHAN(x) (((x) >> S_SYN_RX_CHAN) & M_SYN_RX_CHAN) #define S_TCP_HDR_LEN 10 #define M_TCP_HDR_LEN 0x3F #define V_TCP_HDR_LEN(x) ((x) << S_TCP_HDR_LEN) #define G_TCP_HDR_LEN(x) (((x) >> S_TCP_HDR_LEN) & M_TCP_HDR_LEN) #define S_IP_HDR_LEN 16 #define M_IP_HDR_LEN 0x3FF #define V_IP_HDR_LEN(x) ((x) << S_IP_HDR_LEN) #define G_IP_HDR_LEN(x) (((x) >> S_IP_HDR_LEN) & M_IP_HDR_LEN) #define S_ETH_HDR_LEN 26 #define M_ETH_HDR_LEN 0x3F #define V_ETH_HDR_LEN(x) ((x) << S_ETH_HDR_LEN) #define G_ETH_HDR_LEN(x) (((x) >> S_ETH_HDR_LEN) & M_ETH_HDR_LEN) /* cpl_pass_accept_req.l2info fields */ #define S_SYN_MAC_IDX 0 #define M_SYN_MAC_IDX 0x1FF #define V_SYN_MAC_IDX(x) ((x) << S_SYN_MAC_IDX) #define G_SYN_MAC_IDX(x) (((x) >> S_SYN_MAC_IDX) & M_SYN_MAC_IDX) #define S_SYN_XACT_MATCH 9 #define V_SYN_XACT_MATCH(x) ((x) << S_SYN_XACT_MATCH) #define F_SYN_XACT_MATCH V_SYN_XACT_MATCH(1U) #define S_SYN_INTF 12 #define M_SYN_INTF 0xF #define V_SYN_INTF(x) ((x) << S_SYN_INTF) #define G_SYN_INTF(x) (((x) >> S_SYN_INTF) & M_SYN_INTF) struct cpl_pass_accept_rpl { WR_HDR; union opcode_tid ot; __be32 opt2; __be64 opt0; }; struct cpl_act_open_req { WR_HDR; union opcode_tid ot; __be16 local_port; __be16 peer_port; __be32 local_ip; __be32 peer_ip; __be64 opt0; __be32 params; __be32 opt2; }; struct cpl_t5_act_open_req { WR_HDR; union opcode_tid ot; __be16 local_port; __be16 peer_port; __be32 local_ip; __be32 peer_ip; __be64 opt0; __be32 rsvd; __be32 opt2; __be64 params; }; struct cpl_act_open_req6 { WR_HDR; union opcode_tid ot; __be16 local_port; __be16 peer_port; __be64 local_ip_hi; __be64 local_ip_lo; __be64 peer_ip_hi; __be64 peer_ip_lo; __be64 opt0; __be32 params; __be32 opt2; }; struct cpl_t5_act_open_req6 { WR_HDR; union opcode_tid ot; __be16 local_port; __be16 peer_port; __be64 local_ip_hi; __be64 local_ip_lo; __be64 peer_ip_hi; __be64 peer_ip_lo; __be64 opt0; __be32 rsvd; __be32 opt2; __be64 params; }; struct cpl_act_open_rpl { RSS_HDR union opcode_tid ot; __be32 atid_status; }; /* cpl_act_open_rpl.atid_status fields */ #define S_AOPEN_STATUS 0 #define M_AOPEN_STATUS 0xFF #define V_AOPEN_STATUS(x) ((x) << S_AOPEN_STATUS) #define G_AOPEN_STATUS(x) (((x) >> S_AOPEN_STATUS) & M_AOPEN_STATUS) #define S_AOPEN_ATID 8 #define M_AOPEN_ATID 0xFFFFFF #define V_AOPEN_ATID(x) ((x) << S_AOPEN_ATID) #define G_AOPEN_ATID(x) (((x) >> S_AOPEN_ATID) & M_AOPEN_ATID) struct cpl_act_establish { RSS_HDR union opcode_tid ot; __be32 rsvd; __be32 tos_atid; __be16 mac_idx; __be16 tcp_opt; __be32 snd_isn; __be32 rcv_isn; }; struct cpl_get_tcb { WR_HDR; union opcode_tid ot; __be16 reply_ctrl; __be16 cookie; }; /* cpl_get_tcb.reply_ctrl fields */ #define S_QUEUENO 0 #define M_QUEUENO 0x3FF #define V_QUEUENO(x) ((x) << S_QUEUENO) #define G_QUEUENO(x) (((x) >> S_QUEUENO) & M_QUEUENO) #define S_REPLY_CHAN 14 #define V_REPLY_CHAN(x) ((x) << S_REPLY_CHAN) #define F_REPLY_CHAN V_REPLY_CHAN(1U) #define S_NO_REPLY 15 #define V_NO_REPLY(x) ((x) << S_NO_REPLY) #define F_NO_REPLY V_NO_REPLY(1U) struct cpl_get_tcb_rpl { RSS_HDR union opcode_tid ot; __u8 cookie; __u8 status; __be16 len; }; struct cpl_set_tcb { WR_HDR; union opcode_tid ot; __be16 reply_ctrl; __be16 cookie; }; struct cpl_set_tcb_field { WR_HDR; union opcode_tid ot; __be16 reply_ctrl; __be16 word_cookie; __be64 mask; __be64 val; }; +struct cpl_set_tcb_field_core { + union opcode_tid ot; + __be16 reply_ctrl; + __be16 word_cookie; + __be64 mask; + __be64 val; +}; + /* cpl_set_tcb_field.word_cookie fields */ #define S_WORD 0 #define M_WORD 0x1F #define V_WORD(x) ((x) << S_WORD) #define G_WORD(x) (((x) >> S_WORD) & M_WORD) #define S_COOKIE 5 #define M_COOKIE 0x7 #define V_COOKIE(x) ((x) << S_COOKIE) #define G_COOKIE(x) (((x) >> S_COOKIE) & M_COOKIE) struct cpl_set_tcb_rpl { RSS_HDR union opcode_tid ot; __be16 rsvd; __u8 cookie; __u8 status; __be64 oldval; }; struct cpl_close_con_req { WR_HDR; union opcode_tid ot; __be32 rsvd; }; struct cpl_close_con_rpl { RSS_HDR union opcode_tid ot; __u8 rsvd[3]; __u8 status; __be32 snd_nxt; __be32 rcv_nxt; }; struct cpl_close_listsvr_req { WR_HDR; union opcode_tid ot; __be16 reply_ctrl; __be16 rsvd; }; /* additional cpl_close_listsvr_req.reply_ctrl field */ #define S_LISTSVR_IPV6 14 #define V_LISTSVR_IPV6(x) ((x) << S_LISTSVR_IPV6) #define F_LISTSVR_IPV6 V_LISTSVR_IPV6(1U) struct cpl_close_listsvr_rpl { RSS_HDR union opcode_tid ot; __u8 rsvd[3]; __u8 status; }; struct cpl_abort_req_rss { RSS_HDR union opcode_tid ot; __u8 rsvd[3]; __u8 status; }; struct cpl_abort_req { WR_HDR; union opcode_tid ot; __be32 rsvd0; __u8 rsvd1; __u8 cmd; __u8 rsvd2[6]; }; struct cpl_abort_rpl_rss { RSS_HDR union opcode_tid ot; __u8 rsvd[3]; __u8 status; }; struct cpl_abort_rpl { WR_HDR; union opcode_tid ot; __be32 rsvd0; __u8 rsvd1; __u8 cmd; __u8 rsvd2[6]; }; struct cpl_peer_close { RSS_HDR union opcode_tid ot; __be32 rcv_nxt; }; struct cpl_tid_release { WR_HDR; union opcode_tid ot; __be32 rsvd; }; struct tx_data_wr { __be32 wr_hi; __be32 wr_lo; __be32 len; __be32 flags; __be32 sndseq; __be32 param; }; /* tx_data_wr.flags fields */ #define S_TX_ACK_PAGES 21 #define M_TX_ACK_PAGES 0x7 #define V_TX_ACK_PAGES(x) ((x) << S_TX_ACK_PAGES) #define G_TX_ACK_PAGES(x) (((x) >> S_TX_ACK_PAGES) & M_TX_ACK_PAGES) /* tx_data_wr.param fields */ #define S_TX_PORT 0 #define M_TX_PORT 0x7 #define V_TX_PORT(x) ((x) << S_TX_PORT) #define G_TX_PORT(x) (((x) >> S_TX_PORT) & M_TX_PORT) #define S_TX_MSS 4 #define M_TX_MSS 0xF #define V_TX_MSS(x) ((x) << S_TX_MSS) #define G_TX_MSS(x) (((x) >> S_TX_MSS) & M_TX_MSS) #define S_TX_QOS 8 #define M_TX_QOS 0xFF #define V_TX_QOS(x) ((x) << S_TX_QOS) #define G_TX_QOS(x) (((x) >> S_TX_QOS) & M_TX_QOS) #define S_TX_SNDBUF 16 #define M_TX_SNDBUF 0xFFFF #define V_TX_SNDBUF(x) ((x) << S_TX_SNDBUF) #define G_TX_SNDBUF(x) (((x) >> S_TX_SNDBUF) & M_TX_SNDBUF) struct cpl_tx_data { union opcode_tid ot; __be32 len; __be32 rsvd; __be32 flags; }; /* cpl_tx_data.flags fields */ #define S_TX_PROXY 5 #define V_TX_PROXY(x) ((x) << S_TX_PROXY) #define F_TX_PROXY V_TX_PROXY(1U) #define S_TX_ULP_SUBMODE 6 #define M_TX_ULP_SUBMODE 0xF #define V_TX_ULP_SUBMODE(x) ((x) << S_TX_ULP_SUBMODE) #define G_TX_ULP_SUBMODE(x) (((x) >> S_TX_ULP_SUBMODE) & M_TX_ULP_SUBMODE) #define S_TX_ULP_MODE 10 #define M_TX_ULP_MODE 0xF #define V_TX_ULP_MODE(x) ((x) << S_TX_ULP_MODE) #define G_TX_ULP_MODE(x) (((x) >> S_TX_ULP_MODE) & M_TX_ULP_MODE) #define S_TX_SHOVE 14 #define V_TX_SHOVE(x) ((x) << S_TX_SHOVE) #define F_TX_SHOVE V_TX_SHOVE(1U) #define S_TX_MORE 15 #define V_TX_MORE(x) ((x) << S_TX_MORE) #define F_TX_MORE V_TX_MORE(1U) #define S_TX_URG 16 #define V_TX_URG(x) ((x) << S_TX_URG) #define F_TX_URG V_TX_URG(1U) #define S_TX_FLUSH 17 #define V_TX_FLUSH(x) ((x) << S_TX_FLUSH) #define F_TX_FLUSH V_TX_FLUSH(1U) #define S_TX_SAVE 18 #define V_TX_SAVE(x) ((x) << S_TX_SAVE) #define F_TX_SAVE V_TX_SAVE(1U) #define S_TX_TNL 19 #define V_TX_TNL(x) ((x) << S_TX_TNL) #define F_TX_TNL V_TX_TNL(1U) /* additional tx_data_wr.flags fields */ #define S_TX_CPU_IDX 0 #define M_TX_CPU_IDX 0x3F #define V_TX_CPU_IDX(x) ((x) << S_TX_CPU_IDX) #define G_TX_CPU_IDX(x) (((x) >> S_TX_CPU_IDX) & M_TX_CPU_IDX) #define S_TX_CLOSE 17 #define V_TX_CLOSE(x) ((x) << S_TX_CLOSE) #define F_TX_CLOSE V_TX_CLOSE(1U) #define S_TX_INIT 18 #define V_TX_INIT(x) ((x) << S_TX_INIT) #define F_TX_INIT V_TX_INIT(1U) #define S_TX_IMM_ACK 19 #define V_TX_IMM_ACK(x) ((x) << S_TX_IMM_ACK) #define F_TX_IMM_ACK V_TX_IMM_ACK(1U) #define S_TX_IMM_DMA 20 #define V_TX_IMM_DMA(x) ((x) << S_TX_IMM_DMA) #define F_TX_IMM_DMA V_TX_IMM_DMA(1U) struct cpl_tx_data_ack { RSS_HDR union opcode_tid ot; __be32 snd_una; }; struct cpl_wr_ack { /* XXX */ RSS_HDR union opcode_tid ot; __be16 credits; __be16 rsvd; __be32 snd_nxt; __be32 snd_una; }; struct cpl_tx_pkt_core { __be32 ctrl0; __be16 pack; __be16 len; __be64 ctrl1; }; struct cpl_tx_pkt { WR_HDR; struct cpl_tx_pkt_core c; }; #define cpl_tx_pkt_xt cpl_tx_pkt /* cpl_tx_pkt_core.ctrl0 fields */ #define S_TXPKT_VF 0 #define M_TXPKT_VF 0xFF #define V_TXPKT_VF(x) ((x) << S_TXPKT_VF) #define G_TXPKT_VF(x) (((x) >> S_TXPKT_VF) & M_TXPKT_VF) #define S_TXPKT_PF 8 #define M_TXPKT_PF 0x7 #define V_TXPKT_PF(x) ((x) << S_TXPKT_PF) #define G_TXPKT_PF(x) (((x) >> S_TXPKT_PF) & M_TXPKT_PF) #define S_TXPKT_VF_VLD 11 #define V_TXPKT_VF_VLD(x) ((x) << S_TXPKT_VF_VLD) #define F_TXPKT_VF_VLD V_TXPKT_VF_VLD(1U) #define S_TXPKT_OVLAN_IDX 12 #define M_TXPKT_OVLAN_IDX 0xF #define V_TXPKT_OVLAN_IDX(x) ((x) << S_TXPKT_OVLAN_IDX) #define G_TXPKT_OVLAN_IDX(x) (((x) >> S_TXPKT_OVLAN_IDX) & M_TXPKT_OVLAN_IDX) #define S_TXPKT_INTF 16 #define M_TXPKT_INTF 0xF #define V_TXPKT_INTF(x) ((x) << S_TXPKT_INTF) #define G_TXPKT_INTF(x) (((x) >> S_TXPKT_INTF) & M_TXPKT_INTF) #define S_TXPKT_SPECIAL_STAT 20 #define V_TXPKT_SPECIAL_STAT(x) ((x) << S_TXPKT_SPECIAL_STAT) #define F_TXPKT_SPECIAL_STAT V_TXPKT_SPECIAL_STAT(1U) #define S_TXPKT_INS_OVLAN 21 #define V_TXPKT_INS_OVLAN(x) ((x) << S_TXPKT_INS_OVLAN) #define F_TXPKT_INS_OVLAN V_TXPKT_INS_OVLAN(1U) #define S_TXPKT_STAT_DIS 22 #define V_TXPKT_STAT_DIS(x) ((x) << S_TXPKT_STAT_DIS) #define F_TXPKT_STAT_DIS V_TXPKT_STAT_DIS(1U) #define S_TXPKT_LOOPBACK 23 #define V_TXPKT_LOOPBACK(x) ((x) << S_TXPKT_LOOPBACK) #define F_TXPKT_LOOPBACK V_TXPKT_LOOPBACK(1U) #define S_TXPKT_TSTAMP 23 #define V_TXPKT_TSTAMP(x) ((x) << S_TXPKT_TSTAMP) #define F_TXPKT_TSTAMP V_TXPKT_TSTAMP(1U) #define S_TXPKT_OPCODE 24 #define M_TXPKT_OPCODE 0xFF #define V_TXPKT_OPCODE(x) ((x) << S_TXPKT_OPCODE) #define G_TXPKT_OPCODE(x) (((x) >> S_TXPKT_OPCODE) & M_TXPKT_OPCODE) /* cpl_tx_pkt_core.ctrl1 fields */ #define S_TXPKT_SA_IDX 0 #define M_TXPKT_SA_IDX 0xFFF #define V_TXPKT_SA_IDX(x) ((x) << S_TXPKT_SA_IDX) #define G_TXPKT_SA_IDX(x) (((x) >> S_TXPKT_SA_IDX) & M_TXPKT_SA_IDX) #define S_TXPKT_CSUM_END 12 #define M_TXPKT_CSUM_END 0xFF #define V_TXPKT_CSUM_END(x) ((x) << S_TXPKT_CSUM_END) #define G_TXPKT_CSUM_END(x) (((x) >> S_TXPKT_CSUM_END) & M_TXPKT_CSUM_END) #define S_TXPKT_CSUM_START 20 #define M_TXPKT_CSUM_START 0x3FF #define V_TXPKT_CSUM_START(x) ((x) << S_TXPKT_CSUM_START) #define G_TXPKT_CSUM_START(x) (((x) >> S_TXPKT_CSUM_START) & M_TXPKT_CSUM_START) #define S_TXPKT_IPHDR_LEN 20 #define M_TXPKT_IPHDR_LEN 0x3FFF #define V_TXPKT_IPHDR_LEN(x) ((__u64)(x) << S_TXPKT_IPHDR_LEN) #define G_TXPKT_IPHDR_LEN(x) (((x) >> S_TXPKT_IPHDR_LEN) & M_TXPKT_IPHDR_LEN) #define S_TXPKT_CSUM_LOC 30 #define M_TXPKT_CSUM_LOC 0x3FF #define V_TXPKT_CSUM_LOC(x) ((__u64)(x) << S_TXPKT_CSUM_LOC) #define G_TXPKT_CSUM_LOC(x) (((x) >> S_TXPKT_CSUM_LOC) & M_TXPKT_CSUM_LOC) #define S_TXPKT_ETHHDR_LEN 34 #define M_TXPKT_ETHHDR_LEN 0x3F #define V_TXPKT_ETHHDR_LEN(x) ((__u64)(x) << S_TXPKT_ETHHDR_LEN) #define G_TXPKT_ETHHDR_LEN(x) (((x) >> S_TXPKT_ETHHDR_LEN) & M_TXPKT_ETHHDR_LEN) #define S_TXPKT_CSUM_TYPE 40 #define M_TXPKT_CSUM_TYPE 0xF #define V_TXPKT_CSUM_TYPE(x) ((__u64)(x) << S_TXPKT_CSUM_TYPE) #define G_TXPKT_CSUM_TYPE(x) (((x) >> S_TXPKT_CSUM_TYPE) & M_TXPKT_CSUM_TYPE) #define S_TXPKT_VLAN 44 #define M_TXPKT_VLAN 0xFFFF #define V_TXPKT_VLAN(x) ((__u64)(x) << S_TXPKT_VLAN) #define G_TXPKT_VLAN(x) (((x) >> S_TXPKT_VLAN) & M_TXPKT_VLAN) #define S_TXPKT_VLAN_VLD 60 #define V_TXPKT_VLAN_VLD(x) ((__u64)(x) << S_TXPKT_VLAN_VLD) #define F_TXPKT_VLAN_VLD V_TXPKT_VLAN_VLD(1ULL) #define S_TXPKT_IPSEC 61 #define V_TXPKT_IPSEC(x) ((__u64)(x) << S_TXPKT_IPSEC) #define F_TXPKT_IPSEC V_TXPKT_IPSEC(1ULL) #define S_TXPKT_IPCSUM_DIS 62 #define V_TXPKT_IPCSUM_DIS(x) ((__u64)(x) << S_TXPKT_IPCSUM_DIS) #define F_TXPKT_IPCSUM_DIS V_TXPKT_IPCSUM_DIS(1ULL) #define S_TXPKT_L4CSUM_DIS 63 #define V_TXPKT_L4CSUM_DIS(x) ((__u64)(x) << S_TXPKT_L4CSUM_DIS) #define F_TXPKT_L4CSUM_DIS V_TXPKT_L4CSUM_DIS(1ULL) struct cpl_tx_pkt_lso_core { __be32 lso_ctrl; __be16 ipid_ofst; __be16 mss; __be32 seqno_offset; __be32 len; /* encapsulated CPL (TX_PKT, TX_PKT_XT or TX_DATA) follows here */ }; struct cpl_tx_pkt_lso { WR_HDR; struct cpl_tx_pkt_lso_core c; /* encapsulated CPL (TX_PKT, TX_PKT_XT or TX_DATA) follows here */ }; struct cpl_tx_pkt_ufo_core { __be16 ethlen; __be16 iplen; __be16 udplen; __be16 mss; __be32 len; __be32 r1; /* encapsulated CPL (TX_PKT, TX_PKT_XT or TX_DATA) follows here */ }; struct cpl_tx_pkt_ufo { WR_HDR; struct cpl_tx_pkt_ufo_core c; /* encapsulated CPL (TX_PKT, TX_PKT_XT or TX_DATA) follows here */ }; /* cpl_tx_pkt_lso_core.lso_ctrl fields */ #define S_LSO_TCPHDR_LEN 0 #define M_LSO_TCPHDR_LEN 0xF #define V_LSO_TCPHDR_LEN(x) ((x) << S_LSO_TCPHDR_LEN) #define G_LSO_TCPHDR_LEN(x) (((x) >> S_LSO_TCPHDR_LEN) & M_LSO_TCPHDR_LEN) #define S_LSO_IPHDR_LEN 4 #define M_LSO_IPHDR_LEN 0xFFF #define V_LSO_IPHDR_LEN(x) ((x) << S_LSO_IPHDR_LEN) #define G_LSO_IPHDR_LEN(x) (((x) >> S_LSO_IPHDR_LEN) & M_LSO_IPHDR_LEN) #define S_LSO_ETHHDR_LEN 16 #define M_LSO_ETHHDR_LEN 0xF #define V_LSO_ETHHDR_LEN(x) ((x) << S_LSO_ETHHDR_LEN) #define G_LSO_ETHHDR_LEN(x) (((x) >> S_LSO_ETHHDR_LEN) & M_LSO_ETHHDR_LEN) #define S_LSO_IPV6 20 #define V_LSO_IPV6(x) ((x) << S_LSO_IPV6) #define F_LSO_IPV6 V_LSO_IPV6(1U) #define S_LSO_OFLD_ENCAP 21 #define V_LSO_OFLD_ENCAP(x) ((x) << S_LSO_OFLD_ENCAP) #define F_LSO_OFLD_ENCAP V_LSO_OFLD_ENCAP(1U) #define S_LSO_LAST_SLICE 22 #define V_LSO_LAST_SLICE(x) ((x) << S_LSO_LAST_SLICE) #define F_LSO_LAST_SLICE V_LSO_LAST_SLICE(1U) #define S_LSO_FIRST_SLICE 23 #define V_LSO_FIRST_SLICE(x) ((x) << S_LSO_FIRST_SLICE) #define F_LSO_FIRST_SLICE V_LSO_FIRST_SLICE(1U) #define S_LSO_OPCODE 24 #define M_LSO_OPCODE 0xFF #define V_LSO_OPCODE(x) ((x) << S_LSO_OPCODE) #define G_LSO_OPCODE(x) (((x) >> S_LSO_OPCODE) & M_LSO_OPCODE) /* cpl_tx_pkt_lso_core.mss fields */ #define S_LSO_MSS 0 #define M_LSO_MSS 0x3FFF #define V_LSO_MSS(x) ((x) << S_LSO_MSS) #define G_LSO_MSS(x) (((x) >> S_LSO_MSS) & M_LSO_MSS) #define S_LSO_IPID_SPLIT 15 #define V_LSO_IPID_SPLIT(x) ((x) << S_LSO_IPID_SPLIT) #define F_LSO_IPID_SPLIT V_LSO_IPID_SPLIT(1U) struct cpl_tx_pkt_fso { WR_HDR; __be32 fso_ctrl; __be16 seqcnt_ofst; __be16 mtu; __be32 param_offset; __be32 len; /* encapsulated CPL (TX_PKT or TX_PKT_XT) follows here */ }; /* cpl_tx_pkt_fso.fso_ctrl fields different from cpl_tx_pkt_lso.lso_ctrl */ #define S_FSO_XCHG_CLASS 21 #define V_FSO_XCHG_CLASS(x) ((x) << S_FSO_XCHG_CLASS) #define F_FSO_XCHG_CLASS V_FSO_XCHG_CLASS(1U) #define S_FSO_INITIATOR 20 #define V_FSO_INITIATOR(x) ((x) << S_FSO_INITIATOR) #define F_FSO_INITIATOR V_FSO_INITIATOR(1U) #define S_FSO_FCHDR_LEN 12 #define M_FSO_FCHDR_LEN 0xF #define V_FSO_FCHDR_LEN(x) ((x) << S_FSO_FCHDR_LEN) #define G_FSO_FCHDR_LEN(x) (((x) >> S_FSO_FCHDR_LEN) & M_FSO_FCHDR_LEN) struct cpl_iscsi_hdr_no_rss { union opcode_tid ot; __be16 pdu_len_ddp; __be16 len; __be32 seq; __be16 urg; __u8 rsvd; __u8 status; }; struct cpl_tx_data_iso { WR_HDR; __be32 iso_ctrl; __u8 rsvd; __u8 ahs_len; __be16 mss; __be32 burst_size; __be32 len; /* encapsulated CPL_TX_DATA follows here */ }; /* cpl_tx_data_iso.iso_ctrl fields different from cpl_tx_pkt_lso.lso_ctrl */ #define S_ISO_CPLHDR_LEN 18 #define M_ISO_CPLHDR_LEN 0xF #define V_ISO_CPLHDR_LEN(x) ((x) << S_ISO_CPLHDR_LEN) #define G_ISO_CPLHDR_LEN(x) (((x) >> S_ISO_CPLHDR_LEN) & M_ISO_CPLHDR_LEN) #define S_ISO_HDR_CRC 17 #define V_ISO_HDR_CRC(x) ((x) << S_ISO_HDR_CRC) #define F_ISO_HDR_CRC V_ISO_HDR_CRC(1U) #define S_ISO_DATA_CRC 16 #define V_ISO_DATA_CRC(x) ((x) << S_ISO_DATA_CRC) #define F_ISO_DATA_CRC V_ISO_DATA_CRC(1U) #define S_ISO_IMD_DATA_EN 15 #define V_ISO_IMD_DATA_EN(x) ((x) << S_ISO_IMD_DATA_EN) #define F_ISO_IMD_DATA_EN V_ISO_IMD_DATA_EN(1U) #define S_ISO_PDU_TYPE 13 #define M_ISO_PDU_TYPE 0x3 #define V_ISO_PDU_TYPE(x) ((x) << S_ISO_PDU_TYPE) #define G_ISO_PDU_TYPE(x) (((x) >> S_ISO_PDU_TYPE) & M_ISO_PDU_TYPE) struct cpl_iscsi_hdr { RSS_HDR union opcode_tid ot; __be16 pdu_len_ddp; __be16 len; __be32 seq; __be16 urg; __u8 rsvd; __u8 status; }; /* cpl_iscsi_hdr.pdu_len_ddp fields */ #define S_ISCSI_PDU_LEN 0 #define M_ISCSI_PDU_LEN 0x7FFF #define V_ISCSI_PDU_LEN(x) ((x) << S_ISCSI_PDU_LEN) #define G_ISCSI_PDU_LEN(x) (((x) >> S_ISCSI_PDU_LEN) & M_ISCSI_PDU_LEN) #define S_ISCSI_DDP 15 #define V_ISCSI_DDP(x) ((x) << S_ISCSI_DDP) #define F_ISCSI_DDP V_ISCSI_DDP(1U) struct cpl_iscsi_data { RSS_HDR union opcode_tid ot; __u8 rsvd0[2]; __be16 len; __be32 seq; __be16 urg; __u8 rsvd1; __u8 status; }; struct cpl_rx_data { RSS_HDR union opcode_tid ot; __be16 rsvd; __be16 len; __be32 seq; __be16 urg; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 dack_mode:2; __u8 psh:1; __u8 heartbeat:1; __u8 ddp_off:1; __u8 :3; #else __u8 :3; __u8 ddp_off:1; __u8 heartbeat:1; __u8 psh:1; __u8 dack_mode:2; #endif __u8 status; }; struct cpl_fcoe_hdr { RSS_HDR union opcode_tid ot; __be16 oxid; __be16 len; __be32 rctl_fctl; __u8 cs_ctl; __u8 df_ctl; __u8 sof; __u8 eof; __be16 seq_cnt; __u8 seq_id; __u8 type; __be32 param; }; struct cpl_fcoe_data { RSS_HDR union opcode_tid ot; __u8 rsvd0[2]; __be16 len; __be32 seq; __u8 rsvd1[3]; __u8 status; }; struct cpl_rx_urg_notify { RSS_HDR union opcode_tid ot; __be32 seq; }; struct cpl_rx_urg_pkt { RSS_HDR union opcode_tid ot; __be16 rsvd; __be16 len; }; struct cpl_rx_data_ack { WR_HDR; + union opcode_tid ot; + __be32 credit_dack; +}; + +struct cpl_rx_data_ack_core { union opcode_tid ot; __be32 credit_dack; }; /* cpl_rx_data_ack.ack_seq fields */ #define S_RX_CREDITS 0 #define M_RX_CREDITS 0x3FFFFFF #define V_RX_CREDITS(x) ((x) << S_RX_CREDITS) #define G_RX_CREDITS(x) (((x) >> S_RX_CREDITS) & M_RX_CREDITS) #define S_RX_MODULATE_TX 26 #define V_RX_MODULATE_TX(x) ((x) << S_RX_MODULATE_TX) #define F_RX_MODULATE_TX V_RX_MODULATE_TX(1U) #define S_RX_MODULATE_RX 27 #define V_RX_MODULATE_RX(x) ((x) << S_RX_MODULATE_RX) #define F_RX_MODULATE_RX V_RX_MODULATE_RX(1U) #define S_RX_FORCE_ACK 28 #define V_RX_FORCE_ACK(x) ((x) << S_RX_FORCE_ACK) #define F_RX_FORCE_ACK V_RX_FORCE_ACK(1U) #define S_RX_DACK_MODE 29 #define M_RX_DACK_MODE 0x3 #define V_RX_DACK_MODE(x) ((x) << S_RX_DACK_MODE) #define G_RX_DACK_MODE(x) (((x) >> S_RX_DACK_MODE) & M_RX_DACK_MODE) #define S_RX_DACK_CHANGE 31 #define V_RX_DACK_CHANGE(x) ((x) << S_RX_DACK_CHANGE) #define F_RX_DACK_CHANGE V_RX_DACK_CHANGE(1U) struct cpl_rx_ddp_complete { RSS_HDR union opcode_tid ot; __be32 ddp_report; __be32 rcv_nxt; __be32 rsvd; }; struct cpl_rx_data_ddp { RSS_HDR union opcode_tid ot; __be16 urg; __be16 len; __be32 seq; union { __be32 nxt_seq; __be32 ddp_report; } u; __be32 ulp_crc; __be32 ddpvld; }; #define cpl_rx_iscsi_ddp cpl_rx_data_ddp struct cpl_rx_fcoe_ddp { RSS_HDR union opcode_tid ot; __be16 rsvd; __be16 len; __be32 seq; __be32 ddp_report; __be32 ulp_crc; __be32 ddpvld; }; struct cpl_rx_data_dif { RSS_HDR union opcode_tid ot; __be16 ddp_len; __be16 msg_len; __be32 seq; union { __be32 nxt_seq; __be32 ddp_report; } u; __be32 err_vec; __be32 ddpvld; }; struct cpl_rx_iscsi_dif { RSS_HDR union opcode_tid ot; __be16 ddp_len; __be16 msg_len; __be32 seq; union { __be32 nxt_seq; __be32 ddp_report; } u; __be32 ulp_crc; __be32 ddpvld; __u8 rsvd0[8]; __be32 err_vec; __u8 rsvd1[4]; }; struct cpl_rx_fcoe_dif { RSS_HDR union opcode_tid ot; __be16 ddp_len; __be16 msg_len; __be32 seq; __be32 ddp_report; __be32 err_vec; __be32 ddpvld; }; /* cpl_rx_{data,iscsi,fcoe}_{ddp,dif}.ddpvld fields */ #define S_DDP_VALID 15 #define M_DDP_VALID 0x1FFFF #define V_DDP_VALID(x) ((x) << S_DDP_VALID) #define G_DDP_VALID(x) (((x) >> S_DDP_VALID) & M_DDP_VALID) #define S_DDP_PPOD_MISMATCH 15 #define V_DDP_PPOD_MISMATCH(x) ((x) << S_DDP_PPOD_MISMATCH) #define F_DDP_PPOD_MISMATCH V_DDP_PPOD_MISMATCH(1U) #define S_DDP_PDU 16 #define V_DDP_PDU(x) ((x) << S_DDP_PDU) #define F_DDP_PDU V_DDP_PDU(1U) #define S_DDP_LLIMIT_ERR 17 #define V_DDP_LLIMIT_ERR(x) ((x) << S_DDP_LLIMIT_ERR) #define F_DDP_LLIMIT_ERR V_DDP_LLIMIT_ERR(1U) #define S_DDP_PPOD_PARITY_ERR 18 #define V_DDP_PPOD_PARITY_ERR(x) ((x) << S_DDP_PPOD_PARITY_ERR) #define F_DDP_PPOD_PARITY_ERR V_DDP_PPOD_PARITY_ERR(1U) #define S_DDP_PADDING_ERR 19 #define V_DDP_PADDING_ERR(x) ((x) << S_DDP_PADDING_ERR) #define F_DDP_PADDING_ERR V_DDP_PADDING_ERR(1U) #define S_DDP_HDRCRC_ERR 20 #define V_DDP_HDRCRC_ERR(x) ((x) << S_DDP_HDRCRC_ERR) #define F_DDP_HDRCRC_ERR V_DDP_HDRCRC_ERR(1U) #define S_DDP_DATACRC_ERR 21 #define V_DDP_DATACRC_ERR(x) ((x) << S_DDP_DATACRC_ERR) #define F_DDP_DATACRC_ERR V_DDP_DATACRC_ERR(1U) #define S_DDP_INVALID_TAG 22 #define V_DDP_INVALID_TAG(x) ((x) << S_DDP_INVALID_TAG) #define F_DDP_INVALID_TAG V_DDP_INVALID_TAG(1U) #define S_DDP_ULIMIT_ERR 23 #define V_DDP_ULIMIT_ERR(x) ((x) << S_DDP_ULIMIT_ERR) #define F_DDP_ULIMIT_ERR V_DDP_ULIMIT_ERR(1U) #define S_DDP_OFFSET_ERR 24 #define V_DDP_OFFSET_ERR(x) ((x) << S_DDP_OFFSET_ERR) #define F_DDP_OFFSET_ERR V_DDP_OFFSET_ERR(1U) #define S_DDP_COLOR_ERR 25 #define V_DDP_COLOR_ERR(x) ((x) << S_DDP_COLOR_ERR) #define F_DDP_COLOR_ERR V_DDP_COLOR_ERR(1U) #define S_DDP_TID_MISMATCH 26 #define V_DDP_TID_MISMATCH(x) ((x) << S_DDP_TID_MISMATCH) #define F_DDP_TID_MISMATCH V_DDP_TID_MISMATCH(1U) #define S_DDP_INVALID_PPOD 27 #define V_DDP_INVALID_PPOD(x) ((x) << S_DDP_INVALID_PPOD) #define F_DDP_INVALID_PPOD V_DDP_INVALID_PPOD(1U) #define S_DDP_ULP_MODE 28 #define M_DDP_ULP_MODE 0xF #define V_DDP_ULP_MODE(x) ((x) << S_DDP_ULP_MODE) #define G_DDP_ULP_MODE(x) (((x) >> S_DDP_ULP_MODE) & M_DDP_ULP_MODE) /* cpl_rx_{data,iscsi,fcoe}_{ddp,dif}.ddp_report fields */ #define S_DDP_OFFSET 0 #define M_DDP_OFFSET 0xFFFFFF #define V_DDP_OFFSET(x) ((x) << S_DDP_OFFSET) #define G_DDP_OFFSET(x) (((x) >> S_DDP_OFFSET) & M_DDP_OFFSET) #define S_DDP_DACK_MODE 24 #define M_DDP_DACK_MODE 0x3 #define V_DDP_DACK_MODE(x) ((x) << S_DDP_DACK_MODE) #define G_DDP_DACK_MODE(x) (((x) >> S_DDP_DACK_MODE) & M_DDP_DACK_MODE) #define S_DDP_BUF_IDX 26 #define V_DDP_BUF_IDX(x) ((x) << S_DDP_BUF_IDX) #define F_DDP_BUF_IDX V_DDP_BUF_IDX(1U) #define S_DDP_URG 27 #define V_DDP_URG(x) ((x) << S_DDP_URG) #define F_DDP_URG V_DDP_URG(1U) #define S_DDP_PSH 28 #define V_DDP_PSH(x) ((x) << S_DDP_PSH) #define F_DDP_PSH V_DDP_PSH(1U) #define S_DDP_BUF_COMPLETE 29 #define V_DDP_BUF_COMPLETE(x) ((x) << S_DDP_BUF_COMPLETE) #define F_DDP_BUF_COMPLETE V_DDP_BUF_COMPLETE(1U) #define S_DDP_BUF_TIMED_OUT 30 #define V_DDP_BUF_TIMED_OUT(x) ((x) << S_DDP_BUF_TIMED_OUT) #define F_DDP_BUF_TIMED_OUT V_DDP_BUF_TIMED_OUT(1U) #define S_DDP_INV 31 #define V_DDP_INV(x) ((x) << S_DDP_INV) #define F_DDP_INV V_DDP_INV(1U) struct cpl_rx_pkt { RSS_HDR __u8 opcode; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 iff:4; __u8 csum_calc:1; __u8 ipmi_pkt:1; __u8 vlan_ex:1; __u8 ip_frag:1; #else __u8 ip_frag:1; __u8 vlan_ex:1; __u8 ipmi_pkt:1; __u8 csum_calc:1; __u8 iff:4; #endif __be16 csum; __be16 vlan; __be16 len; __be32 l2info; __be16 hdr_len; __be16 err_vec; }; /* rx_pkt.l2info fields */ #define S_RX_ETHHDR_LEN 0 #define M_RX_ETHHDR_LEN 0x1F #define V_RX_ETHHDR_LEN(x) ((x) << S_RX_ETHHDR_LEN) #define G_RX_ETHHDR_LEN(x) (((x) >> S_RX_ETHHDR_LEN) & M_RX_ETHHDR_LEN) #define S_RX_T5_ETHHDR_LEN 0 #define M_RX_T5_ETHHDR_LEN 0x3F #define V_RX_T5_ETHHDR_LEN(x) ((x) << S_RX_T5_ETHHDR_LEN) #define G_RX_T5_ETHHDR_LEN(x) (((x) >> S_RX_T5_ETHHDR_LEN) & M_RX_T5_ETHHDR_LEN) #define S_RX_PKTYPE 5 #define M_RX_PKTYPE 0x7 #define V_RX_PKTYPE(x) ((x) << S_RX_PKTYPE) #define G_RX_PKTYPE(x) (((x) >> S_RX_PKTYPE) & M_RX_PKTYPE) #define S_RX_T5_DATYPE 6 #define M_RX_T5_DATYPE 0x3 #define V_RX_T5_DATYPE(x) ((x) << S_RX_T5_DATYPE) #define G_RX_T5_DATYPE(x) (((x) >> S_RX_T5_DATYPE) & M_RX_T5_DATYPE) #define S_RX_MACIDX 8 #define M_RX_MACIDX 0x1FF #define V_RX_MACIDX(x) ((x) << S_RX_MACIDX) #define G_RX_MACIDX(x) (((x) >> S_RX_MACIDX) & M_RX_MACIDX) #define S_RX_T5_PKTYPE 17 #define M_RX_T5_PKTYPE 0x7 #define V_RX_T5_PKTYPE(x) ((x) << S_RX_T5_PKTYPE) #define G_RX_T5_PKTYPE(x) (((x) >> S_RX_T5_PKTYPE) & M_RX_T5_PKTYPE) #define S_RX_DATYPE 18 #define M_RX_DATYPE 0x3 #define V_RX_DATYPE(x) ((x) << S_RX_DATYPE) #define G_RX_DATYPE(x) (((x) >> S_RX_DATYPE) & M_RX_DATYPE) #define S_RXF_PSH 20 #define V_RXF_PSH(x) ((x) << S_RXF_PSH) #define F_RXF_PSH V_RXF_PSH(1U) #define S_RXF_SYN 21 #define V_RXF_SYN(x) ((x) << S_RXF_SYN) #define F_RXF_SYN V_RXF_SYN(1U) #define S_RXF_UDP 22 #define V_RXF_UDP(x) ((x) << S_RXF_UDP) #define F_RXF_UDP V_RXF_UDP(1U) #define S_RXF_TCP 23 #define V_RXF_TCP(x) ((x) << S_RXF_TCP) #define F_RXF_TCP V_RXF_TCP(1U) #define S_RXF_IP 24 #define V_RXF_IP(x) ((x) << S_RXF_IP) #define F_RXF_IP V_RXF_IP(1U) #define S_RXF_IP6 25 #define V_RXF_IP6(x) ((x) << S_RXF_IP6) #define F_RXF_IP6 V_RXF_IP6(1U) #define S_RXF_SYN_COOKIE 26 #define V_RXF_SYN_COOKIE(x) ((x) << S_RXF_SYN_COOKIE) #define F_RXF_SYN_COOKIE V_RXF_SYN_COOKIE(1U) #define S_RXF_FCOE 26 #define V_RXF_FCOE(x) ((x) << S_RXF_FCOE) #define F_RXF_FCOE V_RXF_FCOE(1U) #define S_RXF_LRO 27 #define V_RXF_LRO(x) ((x) << S_RXF_LRO) #define F_RXF_LRO V_RXF_LRO(1U) #define S_RX_CHAN 28 #define M_RX_CHAN 0xF #define V_RX_CHAN(x) ((x) << S_RX_CHAN) #define G_RX_CHAN(x) (((x) >> S_RX_CHAN) & M_RX_CHAN) /* rx_pkt.hdr_len fields */ #define S_RX_TCPHDR_LEN 0 #define M_RX_TCPHDR_LEN 0x3F #define V_RX_TCPHDR_LEN(x) ((x) << S_RX_TCPHDR_LEN) #define G_RX_TCPHDR_LEN(x) (((x) >> S_RX_TCPHDR_LEN) & M_RX_TCPHDR_LEN) #define S_RX_IPHDR_LEN 6 #define M_RX_IPHDR_LEN 0x3FF #define V_RX_IPHDR_LEN(x) ((x) << S_RX_IPHDR_LEN) #define G_RX_IPHDR_LEN(x) (((x) >> S_RX_IPHDR_LEN) & M_RX_IPHDR_LEN) /* rx_pkt.err_vec fields */ #define S_RXERR_OR 0 #define V_RXERR_OR(x) ((x) << S_RXERR_OR) #define F_RXERR_OR V_RXERR_OR(1U) #define S_RXERR_MAC 1 #define V_RXERR_MAC(x) ((x) << S_RXERR_MAC) #define F_RXERR_MAC V_RXERR_MAC(1U) #define S_RXERR_IPVERS 2 #define V_RXERR_IPVERS(x) ((x) << S_RXERR_IPVERS) #define F_RXERR_IPVERS V_RXERR_IPVERS(1U) #define S_RXERR_FRAG 3 #define V_RXERR_FRAG(x) ((x) << S_RXERR_FRAG) #define F_RXERR_FRAG V_RXERR_FRAG(1U) #define S_RXERR_ATTACK 4 #define V_RXERR_ATTACK(x) ((x) << S_RXERR_ATTACK) #define F_RXERR_ATTACK V_RXERR_ATTACK(1U) #define S_RXERR_ETHHDR_LEN 5 #define V_RXERR_ETHHDR_LEN(x) ((x) << S_RXERR_ETHHDR_LEN) #define F_RXERR_ETHHDR_LEN V_RXERR_ETHHDR_LEN(1U) #define S_RXERR_IPHDR_LEN 6 #define V_RXERR_IPHDR_LEN(x) ((x) << S_RXERR_IPHDR_LEN) #define F_RXERR_IPHDR_LEN V_RXERR_IPHDR_LEN(1U) #define S_RXERR_TCPHDR_LEN 7 #define V_RXERR_TCPHDR_LEN(x) ((x) << S_RXERR_TCPHDR_LEN) #define F_RXERR_TCPHDR_LEN V_RXERR_TCPHDR_LEN(1U) #define S_RXERR_PKT_LEN 8 #define V_RXERR_PKT_LEN(x) ((x) << S_RXERR_PKT_LEN) #define F_RXERR_PKT_LEN V_RXERR_PKT_LEN(1U) #define S_RXERR_TCP_OPT 9 #define V_RXERR_TCP_OPT(x) ((x) << S_RXERR_TCP_OPT) #define F_RXERR_TCP_OPT V_RXERR_TCP_OPT(1U) #define S_RXERR_IPCSUM 12 #define V_RXERR_IPCSUM(x) ((x) << S_RXERR_IPCSUM) #define F_RXERR_IPCSUM V_RXERR_IPCSUM(1U) #define S_RXERR_CSUM 13 #define V_RXERR_CSUM(x) ((x) << S_RXERR_CSUM) #define F_RXERR_CSUM V_RXERR_CSUM(1U) #define S_RXERR_PING 14 #define V_RXERR_PING(x) ((x) << S_RXERR_PING) #define F_RXERR_PING V_RXERR_PING(1U) struct cpl_trace_pkt { RSS_HDR __u8 opcode; __u8 intf; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 runt:4; __u8 filter_hit:4; __u8 :6; __u8 err:1; __u8 trunc:1; #else __u8 filter_hit:4; __u8 runt:4; __u8 trunc:1; __u8 err:1; __u8 :6; #endif __be16 rsvd; __be16 len; __be64 tstamp; }; struct cpl_t5_trace_pkt { RSS_HDR __u8 opcode; __u8 intf; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 runt:4; __u8 filter_hit:4; __u8 :6; __u8 err:1; __u8 trunc:1; #else __u8 filter_hit:4; __u8 runt:4; __u8 trunc:1; __u8 err:1; __u8 :6; #endif __be16 rsvd; __be16 len; __be64 tstamp; __be64 rsvd1; }; struct cpl_rte_delete_req { WR_HDR; union opcode_tid ot; __be32 params; }; /* {cpl_rte_delete_req, cpl_rte_read_req}.params fields */ #define S_RTE_REQ_LUT_IX 8 #define M_RTE_REQ_LUT_IX 0x7FF #define V_RTE_REQ_LUT_IX(x) ((x) << S_RTE_REQ_LUT_IX) #define G_RTE_REQ_LUT_IX(x) (((x) >> S_RTE_REQ_LUT_IX) & M_RTE_REQ_LUT_IX) #define S_RTE_REQ_LUT_BASE 19 #define M_RTE_REQ_LUT_BASE 0x7FF #define V_RTE_REQ_LUT_BASE(x) ((x) << S_RTE_REQ_LUT_BASE) #define G_RTE_REQ_LUT_BASE(x) (((x) >> S_RTE_REQ_LUT_BASE) & M_RTE_REQ_LUT_BASE) #define S_RTE_READ_REQ_SELECT 31 #define V_RTE_READ_REQ_SELECT(x) ((x) << S_RTE_READ_REQ_SELECT) #define F_RTE_READ_REQ_SELECT V_RTE_READ_REQ_SELECT(1U) struct cpl_rte_delete_rpl { RSS_HDR union opcode_tid ot; __u8 status; __u8 rsvd[3]; }; struct cpl_rte_write_req { WR_HDR; union opcode_tid ot; __u32 write_sel; __be32 lut_params; __be32 l2t_idx; __be32 netmask; __be32 faddr; }; /* cpl_rte_write_req.write_sel fields */ #define S_RTE_WR_L2TIDX 31 #define V_RTE_WR_L2TIDX(x) ((x) << S_RTE_WR_L2TIDX) #define F_RTE_WR_L2TIDX V_RTE_WR_L2TIDX(1U) #define S_RTE_WR_FADDR 30 #define V_RTE_WR_FADDR(x) ((x) << S_RTE_WR_FADDR) #define F_RTE_WR_FADDR V_RTE_WR_FADDR(1U) /* cpl_rte_write_req.lut_params fields */ #define S_RTE_WR_LUT_IX 10 #define M_RTE_WR_LUT_IX 0x7FF #define V_RTE_WR_LUT_IX(x) ((x) << S_RTE_WR_LUT_IX) #define G_RTE_WR_LUT_IX(x) (((x) >> S_RTE_WR_LUT_IX) & M_RTE_WR_LUT_IX) #define S_RTE_WR_LUT_BASE 21 #define M_RTE_WR_LUT_BASE 0x7FF #define V_RTE_WR_LUT_BASE(x) ((x) << S_RTE_WR_LUT_BASE) #define G_RTE_WR_LUT_BASE(x) (((x) >> S_RTE_WR_LUT_BASE) & M_RTE_WR_LUT_BASE) struct cpl_rte_write_rpl { RSS_HDR union opcode_tid ot; __u8 status; __u8 rsvd[3]; }; struct cpl_rte_read_req { WR_HDR; union opcode_tid ot; __be32 params; }; struct cpl_rte_read_rpl { RSS_HDR union opcode_tid ot; __u8 status; __u8 rsvd; __be16 l2t_idx; #if defined(__LITTLE_ENDIAN_BITFIELD) __u32 :30; __u32 select:1; #else __u32 select:1; __u32 :30; #endif __be32 addr; }; struct cpl_l2t_write_req { WR_HDR; union opcode_tid ot; __be16 params; __be16 l2t_idx; __be16 vlan; __u8 dst_mac[6]; }; /* cpl_l2t_write_req.params fields */ #define S_L2T_W_INFO 2 #define M_L2T_W_INFO 0x3F #define V_L2T_W_INFO(x) ((x) << S_L2T_W_INFO) #define G_L2T_W_INFO(x) (((x) >> S_L2T_W_INFO) & M_L2T_W_INFO) #define S_L2T_W_PORT 8 #define M_L2T_W_PORT 0xF #define V_L2T_W_PORT(x) ((x) << S_L2T_W_PORT) #define G_L2T_W_PORT(x) (((x) >> S_L2T_W_PORT) & M_L2T_W_PORT) #define S_L2T_W_NOREPLY 15 #define V_L2T_W_NOREPLY(x) ((x) << S_L2T_W_NOREPLY) #define F_L2T_W_NOREPLY V_L2T_W_NOREPLY(1U) struct cpl_l2t_write_rpl { RSS_HDR union opcode_tid ot; __u8 status; __u8 rsvd[3]; }; struct cpl_l2t_read_req { WR_HDR; union opcode_tid ot; __be32 l2t_idx; }; struct cpl_l2t_read_rpl { RSS_HDR union opcode_tid ot; __u8 status; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 :4; __u8 iff:4; #else __u8 iff:4; __u8 :4; #endif __be16 vlan; __be16 info; __u8 dst_mac[6]; }; struct cpl_smt_write_req { WR_HDR; union opcode_tid ot; __be32 params; __be16 pfvf1; __u8 src_mac1[6]; __be16 pfvf0; __u8 src_mac0[6]; }; struct cpl_smt_write_rpl { RSS_HDR union opcode_tid ot; __u8 status; __u8 rsvd[3]; }; struct cpl_smt_read_req { WR_HDR; union opcode_tid ot; __be32 params; }; struct cpl_smt_read_rpl { RSS_HDR union opcode_tid ot; __u8 status; __u8 ovlan_idx; __be16 rsvd; __be16 pfvf1; __u8 src_mac1[6]; __be16 pfvf0; __u8 src_mac0[6]; }; /* cpl_smt_{read,write}_req.params fields */ #define S_SMTW_OVLAN_IDX 16 #define M_SMTW_OVLAN_IDX 0xF #define V_SMTW_OVLAN_IDX(x) ((x) << S_SMTW_OVLAN_IDX) #define G_SMTW_OVLAN_IDX(x) (((x) >> S_SMTW_OVLAN_IDX) & M_SMTW_OVLAN_IDX) #define S_SMTW_IDX 20 #define M_SMTW_IDX 0x7F #define V_SMTW_IDX(x) ((x) << S_SMTW_IDX) #define G_SMTW_IDX(x) (((x) >> S_SMTW_IDX) & M_SMTW_IDX) #define S_SMTW_NORPL 31 #define V_SMTW_NORPL(x) ((x) << S_SMTW_NORPL) #define F_SMTW_NORPL V_SMTW_NORPL(1U) /* cpl_smt_{read,write}_req.pfvf? fields */ #define S_SMTW_VF 0 #define M_SMTW_VF 0xFF #define V_SMTW_VF(x) ((x) << S_SMTW_VF) #define G_SMTW_VF(x) (((x) >> S_SMTW_VF) & M_SMTW_VF) #define S_SMTW_PF 8 #define M_SMTW_PF 0x7 #define V_SMTW_PF(x) ((x) << S_SMTW_PF) #define G_SMTW_PF(x) (((x) >> S_SMTW_PF) & M_SMTW_PF) #define S_SMTW_VF_VLD 11 #define V_SMTW_VF_VLD(x) ((x) << S_SMTW_VF_VLD) #define F_SMTW_VF_VLD V_SMTW_VF_VLD(1U) struct cpl_tag_write_req { WR_HDR; union opcode_tid ot; __be32 params; __be64 tag_val; }; struct cpl_tag_write_rpl { RSS_HDR union opcode_tid ot; __u8 status; __u8 rsvd[2]; __u8 idx; }; struct cpl_tag_read_req { WR_HDR; union opcode_tid ot; __be32 params; }; struct cpl_tag_read_rpl { RSS_HDR union opcode_tid ot; __u8 status; #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 :4; __u8 tag_len:1; __u8 :2; __u8 ins_enable:1; #else __u8 ins_enable:1; __u8 :2; __u8 tag_len:1; __u8 :4; #endif __u8 rsvd; __u8 tag_idx; __be64 tag_val; }; /* cpl_tag{read,write}_req.params fields */ #define S_TAGW_IDX 0 #define M_TAGW_IDX 0x7F #define V_TAGW_IDX(x) ((x) << S_TAGW_IDX) #define G_TAGW_IDX(x) (((x) >> S_TAGW_IDX) & M_TAGW_IDX) #define S_TAGW_LEN 20 #define V_TAGW_LEN(x) ((x) << S_TAGW_LEN) #define F_TAGW_LEN V_TAGW_LEN(1U) #define S_TAGW_INS_ENABLE 23 #define V_TAGW_INS_ENABLE(x) ((x) << S_TAGW_INS_ENABLE) #define F_TAGW_INS_ENABLE V_TAGW_INS_ENABLE(1U) #define S_TAGW_NORPL 31 #define V_TAGW_NORPL(x) ((x) << S_TAGW_NORPL) #define F_TAGW_NORPL V_TAGW_NORPL(1U) struct cpl_barrier { WR_HDR; __u8 opcode; __u8 chan_map; __be16 rsvd0; __be32 rsvd1; }; /* cpl_barrier.chan_map fields */ #define S_CHAN_MAP 4 #define M_CHAN_MAP 0xF #define V_CHAN_MAP(x) ((x) << S_CHAN_MAP) #define G_CHAN_MAP(x) (((x) >> S_CHAN_MAP) & M_CHAN_MAP) struct cpl_error { RSS_HDR union opcode_tid ot; __be32 error; }; struct cpl_hit_notify { RSS_HDR union opcode_tid ot; __be32 rsvd; __be32 info; __be32 reason; }; struct cpl_pkt_notify { RSS_HDR union opcode_tid ot; __be16 rsvd; __be16 len; __be32 info; __be32 reason; }; /* cpl_{hit,pkt}_notify.info fields */ #define S_NTFY_MAC_IDX 0 #define M_NTFY_MAC_IDX 0x1FF #define V_NTFY_MAC_IDX(x) ((x) << S_NTFY_MAC_IDX) #define G_NTFY_MAC_IDX(x) (((x) >> S_NTFY_MAC_IDX) & M_NTFY_MAC_IDX) #define S_NTFY_INTF 10 #define M_NTFY_INTF 0xF #define V_NTFY_INTF(x) ((x) << S_NTFY_INTF) #define G_NTFY_INTF(x) (((x) >> S_NTFY_INTF) & M_NTFY_INTF) #define S_NTFY_TCPHDR_LEN 14 #define M_NTFY_TCPHDR_LEN 0xF #define V_NTFY_TCPHDR_LEN(x) ((x) << S_NTFY_TCPHDR_LEN) #define G_NTFY_TCPHDR_LEN(x) (((x) >> S_NTFY_TCPHDR_LEN) & M_NTFY_TCPHDR_LEN) #define S_NTFY_IPHDR_LEN 18 #define M_NTFY_IPHDR_LEN 0x1FF #define V_NTFY_IPHDR_LEN(x) ((x) << S_NTFY_IPHDR_LEN) #define G_NTFY_IPHDR_LEN(x) (((x) >> S_NTFY_IPHDR_LEN) & M_NTFY_IPHDR_LEN) #define S_NTFY_ETHHDR_LEN 27 #define M_NTFY_ETHHDR_LEN 0x1F #define V_NTFY_ETHHDR_LEN(x) ((x) << S_NTFY_ETHHDR_LEN) #define G_NTFY_ETHHDR_LEN(x) (((x) >> S_NTFY_ETHHDR_LEN) & M_NTFY_ETHHDR_LEN) #define S_NTFY_T5_IPHDR_LEN 18 #define M_NTFY_T5_IPHDR_LEN 0xFF #define V_NTFY_T5_IPHDR_LEN(x) ((x) << S_NTFY_T5_IPHDR_LEN) #define G_NTFY_T5_IPHDR_LEN(x) (((x) >> S_NTFY_T5_IPHDR_LEN) & M_NTFY_T5_IPHDR_LEN) #define S_NTFY_T5_ETHHDR_LEN 26 #define M_NTFY_T5_ETHHDR_LEN 0x3F #define V_NTFY_T5_ETHHDR_LEN(x) ((x) << S_NTFY_T5_ETHHDR_LEN) #define G_NTFY_T5_ETHHDR_LEN(x) (((x) >> S_NTFY_T5_ETHHDR_LEN) & M_NTFY_T5_ETHHDR_LEN) struct cpl_rdma_terminate { RSS_HDR union opcode_tid ot; __be16 rsvd; __be16 len; }; struct cpl_set_le_req { WR_HDR; union opcode_tid ot; __be16 reply_ctrl; __be16 params; __be64 mask_hi; __be64 mask_lo; __be64 val_hi; __be64 val_lo; }; /* cpl_set_le_req.reply_ctrl additional fields */ #define S_LE_REQ_IP6 13 #define V_LE_REQ_IP6(x) ((x) << S_LE_REQ_IP6) #define F_LE_REQ_IP6 V_LE_REQ_IP6(1U) /* cpl_set_le_req.params fields */ #define S_LE_CHAN 0 #define M_LE_CHAN 0x3 #define V_LE_CHAN(x) ((x) << S_LE_CHAN) #define G_LE_CHAN(x) (((x) >> S_LE_CHAN) & M_LE_CHAN) #define S_LE_OFFSET 5 #define M_LE_OFFSET 0x7 #define V_LE_OFFSET(x) ((x) << S_LE_OFFSET) #define G_LE_OFFSET(x) (((x) >> S_LE_OFFSET) & M_LE_OFFSET) #define S_LE_MORE 8 #define V_LE_MORE(x) ((x) << S_LE_MORE) #define F_LE_MORE V_LE_MORE(1U) #define S_LE_REQSIZE 9 #define M_LE_REQSIZE 0x7 #define V_LE_REQSIZE(x) ((x) << S_LE_REQSIZE) #define G_LE_REQSIZE(x) (((x) >> S_LE_REQSIZE) & M_LE_REQSIZE) #define S_LE_REQCMD 12 #define M_LE_REQCMD 0xF #define V_LE_REQCMD(x) ((x) << S_LE_REQCMD) #define G_LE_REQCMD(x) (((x) >> S_LE_REQCMD) & M_LE_REQCMD) struct cpl_set_le_rpl { RSS_HDR union opcode_tid ot; __u8 chan; __u8 info; __be16 len; }; /* cpl_set_le_rpl.info fields */ #define S_LE_RSPCMD 0 #define M_LE_RSPCMD 0xF #define V_LE_RSPCMD(x) ((x) << S_LE_RSPCMD) #define G_LE_RSPCMD(x) (((x) >> S_LE_RSPCMD) & M_LE_RSPCMD) #define S_LE_RSPSIZE 4 #define M_LE_RSPSIZE 0x7 #define V_LE_RSPSIZE(x) ((x) << S_LE_RSPSIZE) #define G_LE_RSPSIZE(x) (((x) >> S_LE_RSPSIZE) & M_LE_RSPSIZE) #define S_LE_RSPTYPE 7 #define V_LE_RSPTYPE(x) ((x) << S_LE_RSPTYPE) #define F_LE_RSPTYPE V_LE_RSPTYPE(1U) struct cpl_sge_egr_update { RSS_HDR __be32 opcode_qid; __be16 cidx; __be16 pidx; }; /* cpl_sge_egr_update.ot fields */ #define S_EGR_QID 0 #define M_EGR_QID 0x1FFFF #define V_EGR_QID(x) ((x) << S_EGR_QID) #define G_EGR_QID(x) (((x) >> S_EGR_QID) & M_EGR_QID) struct cpl_fw2_pld { RSS_HDR u8 opcode; u8 rsvd[5]; __be16 len; }; struct cpl_fw4_pld { RSS_HDR u8 opcode; u8 rsvd0[3]; u8 type; u8 rsvd1; __be16 len; __be64 data; __be64 rsvd2; }; struct cpl_fw6_pld { RSS_HDR u8 opcode; u8 rsvd[5]; __be16 len; __be64 data[4]; }; struct cpl_fw2_msg { RSS_HDR union opcode_info oi; }; struct cpl_fw4_msg { RSS_HDR u8 opcode; u8 type; __be16 rsvd0; __be32 rsvd1; __be64 data[2]; }; struct cpl_fw4_ack { RSS_HDR union opcode_tid ot; u8 credits; u8 rsvd0[2]; u8 flags; __be32 snd_nxt; __be32 snd_una; __be64 rsvd1; }; enum { CPL_FW4_ACK_FLAGS_SEQVAL = 0x1, /* seqn valid */ CPL_FW4_ACK_FLAGS_CH = 0x2, /* channel change complete */ CPL_FW4_ACK_FLAGS_FLOWC = 0x4, /* fw_flowc_wr complete */ }; struct cpl_fw6_msg { RSS_HDR u8 opcode; u8 type; __be16 rsvd0; __be32 rsvd1; __be64 data[4]; }; /* cpl_fw6_msg.type values */ enum { FW6_TYPE_CMD_RPL = 0, FW6_TYPE_WR_RPL = 1, FW6_TYPE_CQE = 2, FW6_TYPE_OFLD_CONNECTION_WR_RPL = 3, NUM_FW6_TYPES }; struct cpl_fw6_msg_ofld_connection_wr_rpl { __u64 cookie; __be32 tid; /* or atid in case of active failure */ __u8 t_state; __u8 retval; __u8 rsvd[2]; }; /* ULP_TX opcodes */ enum { ULP_TX_MEM_READ = 2, ULP_TX_MEM_WRITE = 3, ULP_TX_PKT = 4 }; enum { ULP_TX_SC_NOOP = 0x80, ULP_TX_SC_IMM = 0x81, ULP_TX_SC_DSGL = 0x82, ULP_TX_SC_ISGL = 0x83 }; #define S_ULPTX_CMD 24 #define M_ULPTX_CMD 0xFF #define V_ULPTX_CMD(x) ((x) << S_ULPTX_CMD) #define S_ULPTX_LEN16 0 #define M_ULPTX_LEN16 0xFF #define V_ULPTX_LEN16(x) ((x) << S_ULPTX_LEN16) #define S_ULP_TX_SC_MORE 23 #define V_ULP_TX_SC_MORE(x) ((x) << S_ULP_TX_SC_MORE) #define F_ULP_TX_SC_MORE V_ULP_TX_SC_MORE(1U) struct ulptx_sge_pair { __be32 len[2]; __be64 addr[2]; }; struct ulptx_sgl { __be32 cmd_nsge; __be32 len0; __be64 addr0; #if !(defined C99_NOT_SUPPORTED) struct ulptx_sge_pair sge[0]; #endif }; struct ulptx_isge { __be32 stag; __be32 len; __be64 target_ofst; }; struct ulptx_isgl { __be32 cmd_nisge; __be32 rsvd; #if !(defined C99_NOT_SUPPORTED) struct ulptx_isge sge[0]; #endif }; struct ulptx_idata { __be32 cmd_more; __be32 len; }; #define S_ULPTX_NSGE 0 #define M_ULPTX_NSGE 0xFFFF #define V_ULPTX_NSGE(x) ((x) << S_ULPTX_NSGE) struct ulp_mem_io { WR_HDR; __be32 cmd; __be32 len16; /* command length */ __be32 dlen; /* data length in 32-byte units */ __be32 lock_addr; }; /* additional ulp_mem_io.cmd fields */ #define S_ULP_MEMIO_ORDER 23 #define V_ULP_MEMIO_ORDER(x) ((x) << S_ULP_MEMIO_ORDER) #define F_ULP_MEMIO_ORDER V_ULP_MEMIO_ORDER(1U) /* ulp_mem_io.lock_addr fields */ #define S_ULP_MEMIO_ADDR 0 #define M_ULP_MEMIO_ADDR 0x7FFFFFF #define V_ULP_MEMIO_ADDR(x) ((x) << S_ULP_MEMIO_ADDR) #define S_ULP_MEMIO_LOCK 31 #define V_ULP_MEMIO_LOCK(x) ((x) << S_ULP_MEMIO_LOCK) #define F_ULP_MEMIO_LOCK V_ULP_MEMIO_LOCK(1U) /* ulp_mem_io.dlen fields */ #define S_ULP_MEMIO_DATA_LEN 0 #define M_ULP_MEMIO_DATA_LEN 0x1F #define V_ULP_MEMIO_DATA_LEN(x) ((x) << S_ULP_MEMIO_DATA_LEN) struct ulp_txpkt { __be32 cmd_dest; __be32 len; }; /* ulp_txpkt.cmd_dest fields */ #define S_ULP_TXPKT_DEST 16 #define M_ULP_TXPKT_DEST 0x3 #define V_ULP_TXPKT_DEST(x) ((x) << S_ULP_TXPKT_DEST) #define S_ULP_TXPKT_FID 4 #define M_ULP_TXPKT_FID 0x7ff #define V_ULP_TXPKT_FID(x) ((x) << S_ULP_TXPKT_FID) #define S_ULP_TXPKT_RO 3 #define V_ULP_TXPKT_RO(x) ((x) << S_ULP_TXPKT_RO) #define F_ULP_TXPKT_RO V_ULP_TXPKT_RO(1U) #endif /* T4_MSG_H */ Index: head/sys/dev/cxgbe/offload.h =================================================================== --- head/sys/dev/cxgbe/offload.h (revision 239343) +++ head/sys/dev/cxgbe/offload.h (revision 239344) @@ -1,144 +1,147 @@ /*- * Copyright (c) 2010 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef __T4_OFFLOAD_H__ #define __T4_OFFLOAD_H__ -#define INIT_ULPTX_WR(w, wrlen, atomic, tid) do { \ - (w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \ - (w)->wr.wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \ +#define INIT_ULPTX_WRH(w, wrlen, atomic, tid) do { \ + (w)->wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \ + (w)->wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \ V_FW_WR_FLOWID(tid)); \ - (w)->wr.wr_lo = cpu_to_be64(0); \ + (w)->wr_lo = cpu_to_be64(0); \ } while (0) + +#define INIT_ULPTX_WR(w, wrlen, atomic, tid) \ + INIT_ULPTX_WRH(&((w)->wr), wrlen, atomic, tid) #define INIT_TP_WR(w, tid) do { \ (w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | \ V_FW_WR_IMMDLEN(sizeof(*w) - sizeof(w->wr))); \ (w)->wr.wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(sizeof(*w), 16)) | \ V_FW_WR_FLOWID(tid)); \ (w)->wr.wr_lo = cpu_to_be64(0); \ } while (0) #define INIT_TP_WR_MIT_CPL(w, cpl, tid) do { \ INIT_TP_WR(w, tid); \ OPCODE_TID(w) = htonl(MK_OPCODE_TID(cpl, tid)); \ } while (0) /* * Max # of ATIDs. The absolute HW max is 16K but we keep it lower. */ #define MAX_ATIDS 8192U union serv_entry { void *data; union serv_entry *next; }; union aopen_entry { void *data; union aopen_entry *next; }; /* * Holds the size, base address, free list start, etc of the TID, server TID, * and active-open TID tables. The tables themselves are allocated dynamically. */ struct tid_info { void **tid_tab; unsigned int ntids; union serv_entry *stid_tab; unsigned int nstids; unsigned int stid_base; union aopen_entry *atid_tab; unsigned int natids; struct filter_entry *ftid_tab; unsigned int nftids; unsigned int ftid_base; unsigned int ftids_in_use; struct mtx atid_lock; union aopen_entry *afree; unsigned int atids_in_use; struct mtx stid_lock; union serv_entry *sfree; unsigned int stids_in_use; unsigned int tids_in_use; }; struct t4_range { unsigned int start; unsigned int size; }; struct t4_virt_res { /* virtualized HW resources */ struct t4_range ddp; struct t4_range iscsi; struct t4_range stag; struct t4_range rq; struct t4_range pbl; struct t4_range qp; struct t4_range cq; struct t4_range ocq; }; #ifdef TCP_OFFLOAD enum { ULD_TOM = 1, }; struct adapter; struct port_info; struct uld_info { SLIST_ENTRY(uld_info) link; int refcount; int uld_id; int (*activate)(struct adapter *); int (*deactivate)(struct adapter *); }; struct tom_tunables { int sndbuf; int ddp; int indsz; int ddp_thres; }; int t4_register_uld(struct uld_info *); int t4_unregister_uld(struct uld_info *); int t4_activate_uld(struct adapter *, int); int t4_deactivate_uld(struct adapter *, int); #endif #endif Index: head/sys/dev/cxgbe/tom/t4_connect.c =================================================================== --- head/sys/dev/cxgbe/tom/t4_connect.c (revision 239343) +++ head/sys/dev/cxgbe/tom/t4_connect.c (revision 239344) @@ -1,377 +1,384 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" /* atid services */ static int alloc_atid(struct adapter *, void *); static void *lookup_atid(struct adapter *, int); static void free_atid(struct adapter *, int); static int alloc_atid(struct adapter *sc, void *ctx) { struct tid_info *t = &sc->tids; int atid = -1; mtx_lock(&t->atid_lock); if (t->afree) { union aopen_entry *p = t->afree; atid = p - t->atid_tab; t->afree = p->next; p->data = ctx; t->atids_in_use++; } mtx_unlock(&t->atid_lock); return (atid); } static void * lookup_atid(struct adapter *sc, int atid) { struct tid_info *t = &sc->tids; return (t->atid_tab[atid].data); } static void free_atid(struct adapter *sc, int atid) { struct tid_info *t = &sc->tids; union aopen_entry *p = &t->atid_tab[atid]; mtx_lock(&t->atid_lock); p->next = t->afree; t->afree = p; t->atids_in_use--; mtx_unlock(&t->atid_lock); } /* * Active open failed. */ static int do_act_establish(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_act_establish *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); unsigned int atid = G_TID_TID(ntohl(cpl->tos_atid)); struct toepcb *toep = lookup_atid(sc, atid); struct inpcb *inp = toep->inp; KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == atid, ("%s: toep tid/atid mismatch", __func__)); CTR3(KTR_CXGBE, "%s: atid %u, tid %u", __func__, atid, tid); free_atid(sc, atid); INP_WLOCK(inp); toep->tid = tid; insert_tid(sc, tid, toep); if (inp->inp_flags & INP_DROPPED) { /* socket closed by the kernel before hw told us it connected */ send_flowc_wr(toep, NULL); send_reset(sc, toep, be32toh(cpl->snd_isn)); goto done; } make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt); done: INP_WUNLOCK(inp); return (0); } static inline int act_open_has_tid(unsigned int status) { return (status != CPL_ERR_TCAM_FULL && status != CPL_ERR_TCAM_PARITY && status != CPL_ERR_CONN_EXIST && status != CPL_ERR_ARP_MISS); } /* * Convert an ACT_OPEN_RPL status to an errno. */ static inline int act_open_rpl_status_to_errno(int status) { switch (status) { case CPL_ERR_CONN_RESET: return (ECONNREFUSED); case CPL_ERR_ARP_MISS: return (EHOSTUNREACH); case CPL_ERR_CONN_TIMEDOUT: return (ETIMEDOUT); case CPL_ERR_TCAM_FULL: return (ENOMEM); case CPL_ERR_CONN_EXIST: log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n"); return (EADDRINUSE); default: return (EIO); } } static int do_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); unsigned int atid = G_TID_TID(G_AOPEN_ATID(be32toh(cpl->atid_status))); unsigned int status = G_AOPEN_STATUS(be32toh(cpl->atid_status)); struct toepcb *toep = lookup_atid(sc, atid); struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); struct toedev *tod = &toep->td->tod; KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == atid, ("%s: toep tid/atid mismatch", __func__)); CTR3(KTR_CXGBE, "%s: atid %u, status %u ", __func__, atid, status); /* Ignore negative advice */ if (status == CPL_ERR_RTX_NEG_ADVICE) return (0); free_atid(sc, atid); toep->tid = -1; if (status && act_open_has_tid(status)) release_tid(sc, GET_TID(cpl), toep->ctrlq); if (status == CPL_ERR_TCAM_FULL) { INP_WLOCK(inp); toe_connect_failed(tod, tp, EAGAIN); final_cpl_received(toep); /* unlocks inp */ } else { INP_INFO_WLOCK(&V_tcbinfo); INP_WLOCK(inp); toe_connect_failed(tod, tp, act_open_rpl_status_to_errno(status)); final_cpl_received(toep); /* unlocks inp */ INP_INFO_WUNLOCK(&V_tcbinfo); } return (0); } /* * Options2 for active open. */ static uint32_t calc_opt2a(struct socket *so) { struct tcpcb *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; struct port_info *pi = toep->port; struct adapter *sc = pi->adapter; uint32_t opt2 = 0; if (tp->t_flags & TF_SACK_PERMIT) opt2 |= F_SACK_EN; if (tp->t_flags & TF_REQ_TSTMP) opt2 |= F_TSTAMPS_EN; if (tp->t_flags & TF_REQ_SCALE) opt2 |= F_WND_SCALE_EN; if (V_tcp_do_ecn) opt2 |= F_CCTRL_ECN; opt2 |= V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]); opt2 |= F_RX_COALESCE_VALID | V_RX_COALESCE(M_RX_COALESCE); opt2 |= F_RSS_QUEUE_VALID | V_RSS_QUEUE(toep->ofld_rxq->iq.abs_id); +#ifdef USE_DDP_RX_FLOW_CONTROL + if (toep->ulp_mode == ULP_MODE_TCPDDP) + opt2 |= F_RX_FC_VALID | F_RX_FC_DDP; +#endif + return (htobe32(opt2)); } - void t4_init_connect_cpl_handlers(struct adapter *sc) { t4_register_cpl_handler(sc, CPL_ACT_ESTABLISH, do_act_establish); t4_register_cpl_handler(sc, CPL_ACT_OPEN_RPL, do_act_open_rpl); } /* * active open (soconnect). * * State of affairs on entry: * soisconnecting (so_state |= SS_ISCONNECTING) * tcbinfo not locked (This has changed - used to be WLOCKed) * inp WLOCKed * tp->t_state = TCPS_SYN_SENT * rtalloc1, RT_UNLOCK on rt. */ int t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, struct sockaddr *nam) { struct adapter *sc = tod->tod_softc; struct toepcb *toep = NULL; struct wrqe *wr = NULL; struct cpl_act_open_req *cpl; struct l2t_entry *e = NULL; struct ifnet *rt_ifp = rt->rt_ifp; struct port_info *pi; int atid = -1, mtu_idx, rscale, qid_atid, rc = ENOMEM; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); INP_WLOCK_ASSERT(inp); if (nam->sa_family != AF_INET) CXGBE_UNIMPLEMENTED("IPv6 connect"); if (rt_ifp->if_type == IFT_ETHER) pi = rt_ifp->if_softc; else if (rt_ifp->if_type == IFT_L2VLAN) { struct ifnet *ifp = VLAN_COOKIE(rt_ifp); pi = ifp->if_softc; } else if (rt_ifp->if_type == IFT_IEEE8023ADLAG) return (ENOSYS); /* XXX: implement lagg support */ else return (ENOTSUP); toep = alloc_toepcb(pi, -1, -1, M_NOWAIT); if (toep == NULL) goto failed; atid = alloc_atid(sc, toep); if (atid < 0) goto failed; e = t4_l2t_get(pi, rt_ifp, rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam); if (e == NULL) goto failed; wr = alloc_wrqe(sizeof(*cpl), toep->ctrlq); if (wr == NULL) goto failed; cpl = wrtod(wr); toep->tid = atid; toep->l2te = e; - toep->ulp_mode = ULP_MODE_NONE; + if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) + set_tcpddp_ulp_mode(toep); + else + toep->ulp_mode = ULP_MODE_NONE; SOCKBUF_LOCK(&so->so_rcv); /* opt0 rcv_bufsiz initially, assumes its normal meaning later */ toep->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ); SOCKBUF_UNLOCK(&so->so_rcv); offload_socket(so, toep); /* * The kernel sets request_r_scale based on sb_max whereas we need to * take hardware's MAX_RCV_WND into account too. This is normally a * no-op as MAX_RCV_WND is much larger than the default sb_max. */ if (tp->t_flags & TF_REQ_SCALE) rscale = tp->request_r_scale = select_rcv_wscale(); else rscale = 0; mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, 0); qid_atid = (toep->ofld_rxq->iq.abs_id << 14) | atid; INIT_TP_WR(cpl, 0); OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_atid)); inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip, &cpl->peer_port); cpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, toep->rx_credits, toep->ulp_mode); cpl->params = select_ntuple(pi, e, sc->filter_mode); cpl->opt2 = calc_opt2a(so); CTR5(KTR_CXGBE, "%s: atid %u (%s), toep %p, inp %p", __func__, toep->tid, tcpstates[tp->t_state], toep, inp); rc = t4_l2t_send(sc, wr, e); if (rc == 0) { toepcb_set_flag(toep, TPF_CPL_PENDING); return (0); } undo_offload_socket(so); failed: CTR5(KTR_CXGBE, "%s: FAILED, atid %d, toep %p, l2te %p, wr %p", __func__, atid, toep, e, wr); if (e) t4_l2t_release(e); if (wr) free_wrqe(wr); if (atid >= 0) free_atid(sc, atid); if (toep) free_toepcb(toep); return (rc); } #endif Index: head/sys/dev/cxgbe/tom/t4_cpl_io.c =================================================================== --- head/sys/dev/cxgbe/tom/t4_cpl_io.c (revision 239343) +++ head/sys/dev/cxgbe/tom/t4_cpl_io.c (revision 239344) @@ -1,1329 +1,1416 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" +#include "common/t4_tcb.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" VNET_DECLARE(int, tcp_do_autosndbuf); #define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf) VNET_DECLARE(int, tcp_autosndbuf_inc); #define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc) VNET_DECLARE(int, tcp_autosndbuf_max); #define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max) VNET_DECLARE(int, tcp_do_autorcvbuf); #define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) VNET_DECLARE(int, tcp_autorcvbuf_inc); #define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc) VNET_DECLARE(int, tcp_autorcvbuf_max); #define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) void send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp) { struct wrqe *wr; struct fw_flowc_wr *flowc; unsigned int nparams = ftxp ? 8 : 4, flowclen; struct port_info *pi = toep->port; struct adapter *sc = pi->adapter; unsigned int pfvf = G_FW_VIID_PFN(pi->viid) << S_FW_VIID_PFN; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; KASSERT(!toepcb_flag(toep, TPF_FLOWC_WR_SENT), ("%s: flowc for tid %u sent already", __func__, toep->tid)); CTR2(KTR_CXGBE, "%s: tid %u", __func__, toep->tid); flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); wr = alloc_wrqe(roundup(flowclen, 16), toep->ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } flowc = wrtod(wr); memset(flowc, 0, wr->wr_len); flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(nparams)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | V_FW_WR_FLOWID(toep->tid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; flowc->mnemval[0].val = htobe32(pfvf); flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; flowc->mnemval[1].val = htobe32(pi->tx_chan); flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; flowc->mnemval[2].val = htobe32(pi->tx_chan); flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; flowc->mnemval[3].val = htobe32(toep->ofld_rxq->iq.abs_id); if (ftxp) { uint32_t sndbuf = min(ftxp->snd_space, sc->tt.sndbuf); flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT; flowc->mnemval[4].val = htobe32(ftxp->snd_nxt); flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; flowc->mnemval[5].val = htobe32(ftxp->rcv_nxt); flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; flowc->mnemval[6].val = htobe32(sndbuf); flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[7].val = htobe32(ftxp->mss); } txsd->tx_credits = howmany(flowclen, 16); txsd->plen = 0; KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, ("%s: not enough credits (%d)", __func__, toep->tx_credits)); toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; toepcb_set_flag(toep, TPF_FLOWC_WR_SENT); t4_wrq_tx(sc, wr); } void send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt) { struct wrqe *wr; struct cpl_abort_req *req; int tid = toep->tid; struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); /* don't use if INP_DROPPED */ INP_WLOCK_ASSERT(inp); CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s", __func__, toep->tid, inp->inp_flags & INP_DROPPED ? "inp dropped" : tcpstates[tp->t_state], toep->flags, inp->inp_flags, toepcb_flag(toep, TPF_ABORT_SHUTDOWN) ? " (abort already in progress)" : ""); if (toepcb_flag(toep, TPF_ABORT_SHUTDOWN)) return; /* abort already in progress */ toepcb_set_flag(toep, TPF_ABORT_SHUTDOWN); KASSERT(toepcb_flag(toep, TPF_FLOWC_WR_SENT), ("%s: flowc_wr not sent for tid %d.", __func__, tid)); wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid); if (inp->inp_flags & INP_DROPPED) req->rsvd0 = htobe32(snd_nxt); else req->rsvd0 = htobe32(tp->snd_nxt); req->rsvd1 = !toepcb_flag(toep, TPF_TX_DATA_SENT); req->cmd = CPL_ABORT_SEND_RST; /* * XXX: What's the correct way to tell that the inp hasn't been detached * from its socket? Should I even be flushing the snd buffer here? */ if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { struct socket *so = inp->inp_socket; if (so != NULL) /* because I'm not sure. See comment above */ sbflush(&so->so_snd); } t4_l2t_send(sc, wr, toep->l2te); } /* * Called when a connection is established to translate the TCP options * reported by HW to FreeBSD's native format. */ static void assign_rxopt(struct tcpcb *tp, unsigned int opt) { struct toepcb *toep = tp->t_toe; struct adapter *sc = td_adapter(toep->td); INP_LOCK_ASSERT(tp->t_inpcb); tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(opt)] - 40; if (G_TCPOPT_TSTAMP(opt)) { tp->t_flags |= TF_RCVD_TSTMP; /* timestamps ok */ tp->ts_recent = 0; /* hmmm */ tp->ts_recent_age = tcp_ts_getticks(); tp->t_maxseg -= TCPOLEN_TSTAMP_APPA; } if (G_TCPOPT_SACK(opt)) tp->t_flags |= TF_SACK_PERMIT; /* should already be set */ else tp->t_flags &= ~TF_SACK_PERMIT; /* sack disallowed by peer */ if (G_TCPOPT_WSCALE_OK(opt)) tp->t_flags |= TF_RCVD_SCALE; /* Doing window scaling? */ if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == (TF_RCVD_SCALE | TF_REQ_SCALE)) { tp->rcv_scale = tp->request_r_scale; tp->snd_scale = G_TCPOPT_SND_WSCALE(opt); } } /* * Completes some final bits of initialization for just established connections * and changes their state to TCPS_ESTABLISHED. * * The ISNs are from after the exchange of SYNs. i.e., the true ISN + 1. */ void make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn, uint16_t opt) { struct inpcb *inp = toep->inp; struct socket *so = inp->inp_socket; struct tcpcb *tp = intotcpcb(inp); long bufsize; uint32_t iss = be32toh(snd_isn) - 1; /* true ISS */ uint32_t irs = be32toh(rcv_isn) - 1; /* true IRS */ uint16_t tcpopt = be16toh(opt); struct flowc_tx_params ftxp; INP_WLOCK_ASSERT(inp); KASSERT(tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED, ("%s: TCP state %s", __func__, tcpstates[tp->t_state])); CTR4(KTR_CXGBE, "%s: tid %d, toep %p, inp %p", __func__, toep->tid, toep, inp); tp->t_state = TCPS_ESTABLISHED; tp->t_starttime = ticks; TCPSTAT_INC(tcps_connects); tp->irs = irs; tcp_rcvseqinit(tp); tp->rcv_wnd = toep->rx_credits << 10; tp->rcv_adv += tp->rcv_wnd; tp->last_ack_sent = tp->rcv_nxt; /* * If we were unable to send all rx credits via opt0, save the remainder * in rx_credits so that they can be handed over with the next credit * update. */ SOCKBUF_LOCK(&so->so_rcv); bufsize = select_rcv_wnd(so); SOCKBUF_UNLOCK(&so->so_rcv); toep->rx_credits = bufsize - tp->rcv_wnd; tp->iss = iss; tcp_sendseqinit(tp); tp->snd_una = iss + 1; tp->snd_nxt = iss + 1; tp->snd_max = iss + 1; assign_rxopt(tp, tcpopt); SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) bufsize = V_tcp_autosndbuf_max; else bufsize = sbspace(&so->so_snd); SOCKBUF_UNLOCK(&so->so_snd); ftxp.snd_nxt = tp->snd_nxt; ftxp.rcv_nxt = tp->rcv_nxt; ftxp.snd_space = bufsize; ftxp.mss = tp->t_maxseg; send_flowc_wr(toep, &ftxp); soisconnected(so); } static int -send_rx_credits(struct adapter *sc, struct toepcb *toep, uint32_t credits) +send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits) { struct wrqe *wr; struct cpl_rx_data_ack *req; uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); + KASSERT(credits >= 0, ("%s: %d credits", __func__, credits)); + wr = alloc_wrqe(sizeof(*req), toep->ctrlq); if (wr == NULL) return (0); req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid); req->credit_dack = htobe32(dack | V_RX_CREDITS(credits)); t4_wrq_tx(sc, wr); return (credits); } void t4_rcvd(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; - struct sockbuf *so_rcv = &so->so_rcv; + struct sockbuf *sb = &so->so_rcv; struct toepcb *toep = tp->t_toe; - int must_send; + int credits; INP_WLOCK_ASSERT(inp); - SOCKBUF_LOCK(so_rcv); - KASSERT(toep->enqueued >= so_rcv->sb_cc, - ("%s: so_rcv->sb_cc > enqueued", __func__)); - toep->rx_credits += toep->enqueued - so_rcv->sb_cc; - toep->enqueued = so_rcv->sb_cc; - SOCKBUF_UNLOCK(so_rcv); + SOCKBUF_LOCK(sb); + KASSERT(toep->sb_cc >= sb->sb_cc, + ("%s: sb %p has more data (%d) than last time (%d).", + __func__, sb, sb->sb_cc, toep->sb_cc)); + toep->rx_credits += toep->sb_cc - sb->sb_cc; + toep->sb_cc = sb->sb_cc; + credits = toep->rx_credits; + SOCKBUF_UNLOCK(sb); - must_send = toep->rx_credits + 16384 >= tp->rcv_wnd; - if (must_send || toep->rx_credits >= 15 * 1024) { - int credits; + if (credits > 0 && + (credits + 16384 >= tp->rcv_wnd || credits >= 15 * 1024)) { - credits = send_rx_credits(sc, toep, toep->rx_credits); + credits = send_rx_credits(sc, toep, credits); + SOCKBUF_LOCK(sb); toep->rx_credits -= credits; + SOCKBUF_UNLOCK(sb); tp->rcv_wnd += credits; tp->rcv_adv += credits; } } /* * Close a connection by sending a CPL_CLOSE_CON_REQ message. */ static int close_conn(struct adapter *sc, struct toepcb *toep) { struct wrqe *wr; struct cpl_close_con_req *req; unsigned int tid = toep->tid; CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid, toepcb_flag(toep, TPF_FIN_SENT) ? ", IGNORED" : ""); if (toepcb_flag(toep, TPF_FIN_SENT)) return (0); KASSERT(toepcb_flag(toep, TPF_FLOWC_WR_SENT), ("%s: flowc_wr not sent for tid %u.", __func__, tid)); wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr))); req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) | V_FW_WR_FLOWID(tid)); req->wr.wr_lo = cpu_to_be64(0); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); req->rsvd = 0; toepcb_set_flag(toep, TPF_FIN_SENT); toepcb_clr_flag(toep, TPF_SEND_FIN); t4_l2t_send(sc, wr, toep->l2te); return (0); } #define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16) #define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16)) /* Maximum amount of immediate data we could stuff in a WR */ static inline int max_imm_payload(int tx_credits) { const int n = 2; /* Use only up to 2 desc for imm. data WR */ KASSERT(tx_credits >= 0 && tx_credits <= MAX_OFLD_TX_CREDITS, ("%s: %d credits", __func__, tx_credits)); if (tx_credits < MIN_OFLD_TX_CREDITS) return (0); if (tx_credits >= (n * EQ_ESIZE) / 16) return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr)); else return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr)); } /* Maximum number of SGL entries we could stuff in a WR */ static inline int max_dsgl_nsegs(int tx_credits) { int nseg = 1; /* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */ int sge_pair_credits = tx_credits - MIN_OFLD_TX_CREDITS; KASSERT(tx_credits >= 0 && tx_credits <= MAX_OFLD_TX_CREDITS, ("%s: %d credits", __func__, tx_credits)); if (tx_credits < MIN_OFLD_TX_CREDITS) return (0); nseg += 2 * (sge_pair_credits * 16 / 24); if ((sge_pair_credits * 16) % 24 == 16) nseg++; return (nseg); } static inline void write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen, unsigned int plen, uint8_t credits, int more_to_come) { struct fw_ofld_tx_data_wr *txwr = dst; int shove = !more_to_come; int compl = 1; /* * We always request completion notifications from the firmware. The * only exception is when we know we'll get more data to send shortly * and that we'll have some tx credits remaining to transmit that data. */ if (more_to_come && toep->tx_credits - credits >= MIN_OFLD_TX_CREDITS) compl = 0; txwr->op_to_immdlen = htobe32(V_WR_OP(FW_OFLD_TX_DATA_WR) | V_FW_WR_COMPL(compl) | V_FW_WR_IMMDLEN(immdlen)); txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) | V_FW_WR_LEN16(credits)); txwr->tunnel_to_proxy = htobe32(V_FW_OFLD_TX_DATA_WR_ULPMODE(toep->ulp_mode) | V_FW_OFLD_TX_DATA_WR_URGENT(0) | /* XXX */ V_FW_OFLD_TX_DATA_WR_SHOVE(shove)); txwr->plen = htobe32(plen); } /* * Generate a DSGL from a starting mbuf. The total number of segments and the * maximum segments in any one mbuf are provided. */ static void write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n) { struct mbuf *m; struct ulptx_sgl *usgl = dst; int i, j, rc; struct sglist sg; struct sglist_seg segs[n]; KASSERT(nsegs > 0, ("%s: nsegs 0", __func__)); sglist_init(&sg, n, segs); usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | V_ULPTX_NSGE(nsegs)); i = -1; for (m = start; m != stop; m = m->m_next) { rc = sglist_append(&sg, mtod(m, void *), m->m_len); if (__predict_false(rc != 0)) panic("%s: sglist_append %d", __func__, rc); for (j = 0; j < sg.sg_nseg; i++, j++) { if (i < 0) { usgl->len0 = htobe32(segs[j].ss_len); usgl->addr0 = htobe64(segs[j].ss_paddr); } else { usgl->sge[i / 2].len[i & 1] = htobe32(segs[j].ss_len); usgl->sge[i / 2].addr[i & 1] = htobe64(segs[j].ss_paddr); } #ifdef INVARIANTS nsegs--; #endif } sglist_reset(&sg); } if (i & 1) usgl->sge[i / 2].len[1] = htobe32(0); KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p", __func__, nsegs, start, stop)); } /* * Max number of SGL entries an offload tx work request can have. This is 41 * (1 + 40) for a full 512B work request. * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40) */ #define OFLD_SGL_LEN (41) /* * Send data and/or a FIN to the peer. * * The socket's so_snd buffer consists of a stream of data starting with sb_mb * and linked together with m_next. sb_sndptr, if set, is the last mbuf that * was transmitted. */ static void t4_push_frames(struct adapter *sc, struct toepcb *toep) { struct mbuf *sndptr, *m, *sb_sndptr; struct fw_ofld_tx_data_wr *txwr; struct wrqe *wr; unsigned int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_snd; int tx_credits; struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; INP_WLOCK_ASSERT(inp); KASSERT(toepcb_flag(toep, TPF_FLOWC_WR_SENT), ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); - if (toep->ulp_mode != ULP_MODE_NONE) + if (__predict_false(toep->ulp_mode != ULP_MODE_NONE && + toep->ulp_mode != ULP_MODE_TCPDDP)) CXGBE_UNIMPLEMENTED("ulp_mode"); /* * This function doesn't resume by itself. Someone else must clear the * flag and call this function. */ if (__predict_false(toepcb_flag(toep, TPF_TX_SUSPENDED))) return; do { tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); max_imm = max_imm_payload(tx_credits); max_nsegs = max_dsgl_nsegs(tx_credits); SOCKBUF_LOCK(sb); sb_sndptr = sb->sb_sndptr; sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb; plen = 0; nsegs = 0; max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ for (m = sndptr; m != NULL; m = m->m_next) { int n = sglist_count(mtod(m, void *), m->m_len); nsegs += n; plen += m->m_len; /* This mbuf sent us _over_ the nsegs limit, back out */ if (plen > max_imm && nsegs > max_nsegs) { nsegs -= n; plen -= m->m_len; if (plen == 0) { /* Too few credits */ toepcb_set_flag(toep, TPF_TX_SUSPENDED); SOCKBUF_UNLOCK(sb); return; } break; } if (max_nsegs_1mbuf < n) max_nsegs_1mbuf = n; sb_sndptr = m; /* new sb->sb_sndptr if all goes well */ /* This mbuf put us right at the max_nsegs limit */ if (plen > max_imm && nsegs == max_nsegs) { m = m->m_next; break; } } if (sb->sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf && sb->sb_hiwat < V_tcp_autosndbuf_max && sbspace(sb) < sb->sb_hiwat / 8 * 7) { int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc, V_tcp_autosndbuf_max); if (!sbreserve_locked(sb, newsize, so, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; else { sowwakeup_locked(so); /* room available */ SOCKBUF_UNLOCK_ASSERT(sb); goto unlocked; } } SOCKBUF_UNLOCK(sb); unlocked: /* nothing to send */ if (plen == 0) { KASSERT(m == NULL, ("%s: nothing to send, but m != NULL", __func__)); break; } if (__predict_false(toepcb_flag(toep, TPF_FIN_SENT))) panic("%s: excess tx.", __func__); if (plen <= max_imm) { /* Immediate data tx */ wr = alloc_wrqe(roundup(sizeof(*txwr) + plen, 16), toep->ofld_txq); if (wr == NULL) { /* XXX: how will we recover from this? */ toepcb_set_flag(toep, TPF_TX_SUSPENDED); return; } txwr = wrtod(wr); credits = howmany(wr->wr_len, 16); write_tx_wr(txwr, toep, plen, plen, credits, tp->t_flags & TF_MORETOCOME); m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); } else { int wr_len; /* DSGL tx */ wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; wr = alloc_wrqe(roundup(wr_len, 16), toep->ofld_txq); if (wr == NULL) { /* XXX: how will we recover from this? */ toepcb_set_flag(toep, TPF_TX_SUSPENDED); return; } txwr = wrtod(wr); credits = howmany(wr_len, 16); write_tx_wr(txwr, toep, 0, plen, credits, tp->t_flags & TF_MORETOCOME); write_tx_sgl(txwr + 1, sndptr, m, nsegs, max_nsegs_1mbuf); if (wr_len & 0xf) { uint64_t *pad = (uint64_t *) ((uintptr_t)txwr + wr_len); *pad = 0; } } KASSERT(toep->tx_credits >= credits, ("%s: not enough credits", __func__)); toep->tx_credits -= credits; tp->snd_nxt += plen; tp->snd_max += plen; SOCKBUF_LOCK(sb); KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__)); sb->sb_sndptr = sb_sndptr; SOCKBUF_UNLOCK(sb); toepcb_set_flag(toep, TPF_TX_DATA_SENT); KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); txsd->plen = plen; txsd->tx_credits = credits; txsd++; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { toep->txsd_pidx = 0; txsd = &toep->txsd[0]; } toep->txsd_avail--; t4_l2t_send(sc, wr, toep->l2te); } while (m != NULL); /* Send a FIN if requested, but only if there's no more data to send */ if (m == NULL && toepcb_flag(toep, TPF_SEND_FIN)) close_conn(sc, toep); } int t4_tod_output(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; #ifdef INVARIANTS struct inpcb *inp = tp->t_inpcb; #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("%s: inp %p dropped.", __func__, inp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); t4_push_frames(sc, toep); return (0); } int t4_send_fin(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; #ifdef INVARIANTS struct inpcb *inp = tp->t_inpcb; #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("%s: inp %p dropped.", __func__, inp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); toepcb_set_flag(toep, TPF_SEND_FIN); t4_push_frames(sc, toep); return (0); } int t4_send_rst(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; #if defined(INVARIANTS) struct inpcb *inp = tp->t_inpcb; #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("%s: inp %p dropped.", __func__, inp)); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); /* hmmmm */ KASSERT(toepcb_flag(toep, TPF_FLOWC_WR_SENT), ("%s: flowc for tid %u [%s] not sent already", __func__, toep->tid, tcpstates[tp->t_state])); send_reset(sc, toep, 0); return (0); } /* * Peer has sent us a FIN. */ static int do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_peer_close *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp = NULL; - struct socket *so = NULL; + struct socket *so; + struct sockbuf *sb; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_PEER_CLOSE, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); INP_INFO_WLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = intotcpcb(inp); CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp); if (toepcb_flag(toep, TPF_ABORT_SHUTDOWN)) goto done; + tp->rcv_nxt++; /* FIN */ + so = inp->inp_socket; + sb = &so->so_rcv; + SOCKBUF_LOCK(sb); + if (__predict_false(toep->ddp_flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) { + m = m_get(M_NOWAIT, MT_DATA); + if (m == NULL) + CXGBE_UNIMPLEMENTED("mbuf alloc failure"); - socantrcvmore(so); - tp->rcv_nxt++; /* FIN */ + m->m_len = be32toh(cpl->rcv_nxt) - tp->rcv_nxt; + m->m_flags |= M_DDP; /* Data is already where it should be */ + m->m_data = "nothing to see here"; + tp->rcv_nxt = be32toh(cpl->rcv_nxt); + + toep->ddp_flags &= ~(DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE); + + KASSERT(toep->sb_cc >= sb->sb_cc, + ("%s: sb %p has more data (%d) than last time (%d).", + __func__, sb, sb->sb_cc, toep->sb_cc)); + toep->rx_credits += toep->sb_cc - sb->sb_cc; +#ifdef USE_DDP_RX_FLOW_CONTROL + toep->rx_credits -= m->m_len; /* adjust for F_RX_FC_DDP */ +#endif + sbappendstream_locked(sb, m); + toep->sb_cc = sb->sb_cc; + } + socantrcvmore_locked(so); /* unlocks the sockbuf */ + KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt), ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt, be32toh(cpl->rcv_nxt))); switch (tp->t_state) { case TCPS_SYN_RECEIVED: tp->t_starttime = ticks; /* FALLTHROUGH */ case TCPS_ESTABLISHED: tp->t_state = TCPS_CLOSE_WAIT; break; case TCPS_FIN_WAIT_1: tp->t_state = TCPS_CLOSING; break; case TCPS_FIN_WAIT_2: tcp_twstart(tp); INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ INP_INFO_WUNLOCK(&V_tcbinfo); INP_WLOCK(inp); final_cpl_received(toep); return (0); default: log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n", __func__, tid, tp->t_state); } done: INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); return (0); } /* * Peer has ACK'd our FIN. */ static int do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp = NULL; struct socket *so = NULL; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_CLOSE_CON_RPL, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); INP_INFO_WLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = intotcpcb(inp); CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x", __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags); if (toepcb_flag(toep, TPF_ABORT_SHUTDOWN)) goto done; so = inp->inp_socket; tp->snd_una = be32toh(cpl->snd_nxt) - 1; /* exclude FIN */ switch (tp->t_state) { case TCPS_CLOSING: /* see TCPS_FIN_WAIT_2 in do_peer_close too */ tcp_twstart(tp); release: INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ INP_INFO_WUNLOCK(&V_tcbinfo); INP_WLOCK(inp); final_cpl_received(toep); /* no more CPLs expected */ return (0); case TCPS_LAST_ACK: if (tcp_close(tp)) INP_WUNLOCK(inp); goto release; case TCPS_FIN_WAIT_1: if (so->so_rcv.sb_state & SBS_CANTRCVMORE) soisdisconnected(so); tp->t_state = TCPS_FIN_WAIT_2; break; default: log(LOG_ERR, "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n", __func__, tid, tcpstates[tp->t_state]); } done: INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); return (0); } void send_abort_rpl(struct adapter *sc, struct sge_wrq *ofld_txq, int tid, int rst_status) { struct wrqe *wr; struct cpl_abort_rpl *cpl; wr = alloc_wrqe(sizeof(*cpl), ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } cpl = wrtod(wr); INIT_TP_WR_MIT_CPL(cpl, CPL_ABORT_RPL, tid); cpl->cmd = rst_status; t4_wrq_tx(sc, wr); } static int abort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason) { switch (abort_reason) { case CPL_ERR_BAD_SYN: case CPL_ERR_CONN_RESET: return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); case CPL_ERR_XMIT_TIMEDOUT: case CPL_ERR_PERSIST_TIMEDOUT: case CPL_ERR_FINWAIT2_TIMEDOUT: case CPL_ERR_KEEPALIVE_TIMEDOUT: return (ETIMEDOUT); default: return (EIO); } } /* * TCP RST from the peer, timeout, or some other such critical error. */ static int do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct sge_wrq *ofld_txq = toep->ofld_txq; struct inpcb *inp; struct tcpcb *tp; struct socket *so; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_ABORT_REQ_RSS, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (toepcb_flag(toep, TPF_SYNQE)) return (do_abort_req_synqe(iq, rss, m)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); if (cpl->status == CPL_ERR_RTX_NEG_ADVICE || cpl->status == CPL_ERR_PERSIST_NEG_ADVICE) { CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)", __func__, cpl->status, tid, toep->flags); return (0); /* Ignore negative advice */ } inp = toep->inp; INP_INFO_WLOCK(&V_tcbinfo); /* for tcp_close */ INP_WLOCK(inp); tp = intotcpcb(inp); so = inp->inp_socket; CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d", __func__, tid, tcpstates[tp->t_state], toep->flags, inp->inp_flags, cpl->status); /* * If we'd initiated an abort earlier the reply to it is responsible for * cleaning up resources. Otherwise we tear everything down right here * right now. We owe the T4 a CPL_ABORT_RPL no matter what. */ if (toepcb_flag(toep, TPF_ABORT_SHUTDOWN)) { INP_WUNLOCK(inp); goto done; } toepcb_set_flag(toep, TPF_ABORT_SHUTDOWN); so_error_set(so, abort_status_to_errno(tp, cpl->status)); tp = tcp_close(tp); if (tp == NULL) INP_WLOCK(inp); /* re-acquire */ final_cpl_received(toep); done: INP_INFO_WUNLOCK(&V_tcbinfo); send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); return (0); } /* * Reply to the CPL_ABORT_REQ (send_reset) */ static int do_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_ABORT_RPL_RSS, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (toepcb_flag(toep, TPF_SYNQE)) return (do_abort_rpl_synqe(iq, rss, m)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d", __func__, tid, toep, inp, cpl->status); KASSERT(toepcb_flag(toep, TPF_ABORT_SHUTDOWN), ("%s: wasn't expecting abort reply", __func__)); INP_WLOCK(inp); final_cpl_received(toep); return (0); } static int do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_rx_data *cpl = mtod(m, const void *); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp; struct socket *so; - struct sockbuf *so_rcv; + struct sockbuf *sb; + int len; if (__predict_false(toepcb_flag(toep, TPF_SYNQE))) { /* * do_pass_establish failed and must be attempting to abort the * synqe's tid. Meanwhile, the T4 has sent us data for such a * connection. */ KASSERT(toepcb_flag(toep, TPF_ABORT_SHUTDOWN), ("%s: synqe and tid isn't being aborted.", __func__)); m_freem(m); return (0); } KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); /* strip off CPL header */ m_adj(m, sizeof(*cpl)); + len = m->m_pkthdr.len; INP_WLOCK(inp); if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", - __func__, tid, m->m_pkthdr.len, inp->inp_flags); + __func__, tid, len, inp->inp_flags); INP_WUNLOCK(inp); m_freem(m); return (0); } tp = intotcpcb(inp); #ifdef INVARIANTS if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq))) { log(LOG_ERR, "%s: unexpected seq# %x for TID %u, rcv_nxt %x\n", __func__, be32toh(cpl->seq), toep->tid, tp->rcv_nxt); } #endif - tp->rcv_nxt += m->m_pkthdr.len; - KASSERT(tp->rcv_wnd >= m->m_pkthdr.len, - ("%s: negative window size", __func__)); - tp->rcv_wnd -= m->m_pkthdr.len; + tp->rcv_nxt += len; + KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__)); + tp->rcv_wnd -= len; tp->t_rcvtime = ticks; so = inp_inpcbtosocket(inp); - so_rcv = &so->so_rcv; - SOCKBUF_LOCK(so_rcv); + sb = &so->so_rcv; + SOCKBUF_LOCK(sb); - if (__predict_false(so_rcv->sb_state & SBS_CANTRCVMORE)) { + if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) { CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)", - __func__, tid, m->m_pkthdr.len); + __func__, tid, len); m_freem(m); - SOCKBUF_UNLOCK(so_rcv); + SOCKBUF_UNLOCK(sb); INP_WUNLOCK(inp); INP_INFO_WLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = tcp_drop(tp, ECONNRESET); if (tp) INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); return (0); } /* receive buffer autosize */ - if (so_rcv->sb_flags & SB_AUTOSIZE && + if (sb->sb_flags & SB_AUTOSIZE && V_tcp_do_autorcvbuf && - so_rcv->sb_hiwat < V_tcp_autorcvbuf_max && - m->m_pkthdr.len > (sbspace(so_rcv) / 8 * 7)) { - unsigned int hiwat = so_rcv->sb_hiwat; + sb->sb_hiwat < V_tcp_autorcvbuf_max && + len > (sbspace(sb) / 8 * 7)) { + unsigned int hiwat = sb->sb_hiwat; unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max); - if (!sbreserve_locked(so_rcv, newsize, so, NULL)) - so_rcv->sb_flags &= ~SB_AUTOSIZE; + if (!sbreserve_locked(sb, newsize, so, NULL)) + sb->sb_flags &= ~SB_AUTOSIZE; else toep->rx_credits += newsize - hiwat; } - toep->enqueued += m->m_pkthdr.len; - sbappendstream_locked(so_rcv, m); + + if (toep->ulp_mode == ULP_MODE_TCPDDP) { + int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off; + + if (changed) { + if (__predict_false(!(toep->ddp_flags & DDP_SC_REQ))) { + /* XXX: handle this if legitimate */ + panic("%s: unexpected DDP state change %d", + __func__, cpl->ddp_off); + } + toep->ddp_flags ^= DDP_ON | DDP_SC_REQ; + } + + if ((toep->ddp_flags & DDP_OK) == 0 && + time_uptime >= toep->ddp_disabled + DDP_RETRY_WAIT) { + toep->ddp_score = DDP_LOW_SCORE; + toep->ddp_flags |= DDP_OK; + CTR3(KTR_CXGBE, "%s: tid %u DDP_OK @ %u", + __func__, tid, time_uptime); + } + + if (toep->ddp_flags & DDP_ON) { + + /* + * CPL_RX_DATA with DDP on can only be an indicate. Ask + * soreceive to post a buffer or disable DDP. The + * payload that arrived in this indicate is appended to + * the socket buffer as usual. + */ + +#if 0 + CTR5(KTR_CXGBE, + "%s: tid %u (0x%x) DDP indicate (seq 0x%x, len %d)", + __func__, tid, toep->flags, be32toh(cpl->seq), len); +#endif + sb->sb_flags |= SB_DDP_INDICATE; + } else if ((toep->ddp_flags & (DDP_OK|DDP_SC_REQ)) == DDP_OK && + tp->rcv_wnd > DDP_RSVD_WIN && len >= sc->tt.ddp_thres) { + + /* + * DDP allowed but isn't on (and a request to switch it + * on isn't pending either), and conditions are ripe for + * it to work. Switch it on. + */ + + enable_ddp(sc, toep); + } + } + + KASSERT(toep->sb_cc >= sb->sb_cc, + ("%s: sb %p has more data (%d) than last time (%d).", + __func__, sb, sb->sb_cc, toep->sb_cc)); + toep->rx_credits += toep->sb_cc - sb->sb_cc; + sbappendstream_locked(sb, m); + toep->sb_cc = sb->sb_cc; sorwakeup_locked(so); - SOCKBUF_UNLOCK_ASSERT(so_rcv); + SOCKBUF_UNLOCK_ASSERT(sb); INP_WUNLOCK(inp); return (0); } #define S_CPL_FW4_ACK_OPCODE 24 #define M_CPL_FW4_ACK_OPCODE 0xff #define V_CPL_FW4_ACK_OPCODE(x) ((x) << S_CPL_FW4_ACK_OPCODE) #define G_CPL_FW4_ACK_OPCODE(x) \ (((x) >> S_CPL_FW4_ACK_OPCODE) & M_CPL_FW4_ACK_OPCODE) #define S_CPL_FW4_ACK_FLOWID 0 #define M_CPL_FW4_ACK_FLOWID 0xffffff #define V_CPL_FW4_ACK_FLOWID(x) ((x) << S_CPL_FW4_ACK_FLOWID) #define G_CPL_FW4_ACK_FLOWID(x) \ (((x) >> S_CPL_FW4_ACK_FLOWID) & M_CPL_FW4_ACK_FLOWID) #define S_CPL_FW4_ACK_CR 24 #define M_CPL_FW4_ACK_CR 0xff #define V_CPL_FW4_ACK_CR(x) ((x) << S_CPL_FW4_ACK_CR) #define G_CPL_FW4_ACK_CR(x) (((x) >> S_CPL_FW4_ACK_CR) & M_CPL_FW4_ACK_CR) #define S_CPL_FW4_ACK_SEQVAL 0 #define M_CPL_FW4_ACK_SEQVAL 0x1 #define V_CPL_FW4_ACK_SEQVAL(x) ((x) << S_CPL_FW4_ACK_SEQVAL) #define G_CPL_FW4_ACK_SEQVAL(x) \ (((x) >> S_CPL_FW4_ACK_SEQVAL) & M_CPL_FW4_ACK_SEQVAL) #define F_CPL_FW4_ACK_SEQVAL V_CPL_FW4_ACK_SEQVAL(1U) static int do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp; struct tcpcb *tp; struct socket *so; uint8_t credits = cpl->credits; struct ofld_tx_sdesc *txsd; int plen; #ifdef INVARIANTS unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl))); #endif /* * Very unusual case: we'd sent a flowc + abort_req for a synq entry and * now this comes back carrying the credits for the flowc. */ if (__predict_false(toepcb_flag(toep, TPF_SYNQE))) { KASSERT(toepcb_flag(toep, TPF_ABORT_SHUTDOWN), ("%s: credits for a synq entry %p", __func__, toep)); return (0); } inp = toep->inp; KASSERT(opcode == CPL_FW4_ACK, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); INP_WLOCK(inp); if (__predict_false(toepcb_flag(toep, TPF_ABORT_SHUTDOWN))) { INP_WUNLOCK(inp); return (0); } KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0, ("%s: inp_flags 0x%x", __func__, inp->inp_flags)); tp = intotcpcb(inp); if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) { tcp_seq snd_una = be32toh(cpl->snd_una); #ifdef INVARIANTS if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { log(LOG_ERR, "%s: unexpected seq# %x for TID %u, snd_una %x\n", __func__, snd_una, toep->tid, tp->snd_una); } #endif if (tp->snd_una != snd_una) { tp->snd_una = snd_una; tp->ts_recent_age = tcp_ts_getticks(); } } so = inp->inp_socket; txsd = &toep->txsd[toep->txsd_cidx]; plen = 0; while (credits) { KASSERT(credits >= txsd->tx_credits, ("%s: too many (or partial) credits", __func__)); credits -= txsd->tx_credits; toep->tx_credits += txsd->tx_credits; plen += txsd->plen; txsd++; toep->txsd_avail++; KASSERT(toep->txsd_avail <= toep->txsd_total, ("%s: txsd avail > total", __func__)); if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) { txsd = &toep->txsd[0]; toep->txsd_cidx = 0; } } if (plen > 0) { struct sockbuf *sb = &so->so_snd; SOCKBUF_LOCK(sb); sbdrop_locked(sb, plen); sowwakeup_locked(so); SOCKBUF_UNLOCK_ASSERT(sb); } /* XXX */ if ((toepcb_flag(toep, TPF_TX_SUSPENDED) && toep->tx_credits >= MIN_OFLD_TX_CREDITS) || toep->tx_credits == toep->txsd_total * howmany((sizeof(struct fw_ofld_tx_data_wr) + 1), 16)) { toepcb_clr_flag(toep, TPF_TX_SUSPENDED); t4_push_frames(sc, toep); } INP_WUNLOCK(inp); return (0); } static int do_set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_SET_TCB_RPL, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (tid >= sc->tids.ftid_base && tid < sc->tids.ftid_base + sc->tids.nftids) return (t4_filter_rpl(iq, rss, m)); /* TCB is a filter */ CXGBE_UNIMPLEMENTED(__func__); } void t4_set_tcb_field(struct adapter *sc, struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val) { struct wrqe *wr; struct cpl_set_tcb_field *req; wr = alloc_wrqe(sizeof(*req), toep->ctrlq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid); req->reply_ctrl = htobe16(V_NO_REPLY(1) | V_QUEUENO(toep->ofld_rxq->iq.abs_id)); req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0)); req->mask = htobe64(mask); req->val = htobe64(val); t4_wrq_tx(sc, wr); } void t4_init_cpl_io_handlers(struct adapter *sc) { t4_register_cpl_handler(sc, CPL_PEER_CLOSE, do_peer_close); t4_register_cpl_handler(sc, CPL_CLOSE_CON_RPL, do_close_con_rpl); t4_register_cpl_handler(sc, CPL_ABORT_REQ_RSS, do_abort_req); t4_register_cpl_handler(sc, CPL_ABORT_RPL_RSS, do_abort_rpl); t4_register_cpl_handler(sc, CPL_RX_DATA, do_rx_data); t4_register_cpl_handler(sc, CPL_FW4_ACK, do_fw4_ack); t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, do_set_tcb_rpl); } void t4_uninit_cpl_io_handlers(struct adapter *sc) { t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl); } #endif Index: head/sys/dev/cxgbe/tom/t4_ddp.c =================================================================== --- head/sys/dev/cxgbe/tom/t4_ddp.c (nonexistent) +++ head/sys/dev/cxgbe/tom/t4_ddp.c (revision 239344) @@ -0,0 +1,1223 @@ +/*- + * Copyright (c) 2012 Chelsio Communications, Inc. + * All rights reserved. + * Written by: Navdeep Parhar + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define TCPSTATES +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef TCP_OFFLOAD +#include "common/common.h" +#include "common/t4_msg.h" +#include "common/t4_regs.h" +#include "common/t4_tcb.h" +#include "tom/t4_tom.h" + +#define PPOD_SZ(n) ((n) * sizeof(struct pagepod)) +#define PPOD_SIZE (PPOD_SZ(1)) + +/* XXX: must match A_ULP_RX_TDDP_PSZ */ +static int t4_ddp_pgsz[] = {4096, 4096 << 2, 4096 << 4, 4096 << 6}; + +#if 0 +static void +t4_dump_tcb(struct adapter *sc, int tid) +{ + uint32_t tcb_base, off, i, j; + + /* Dump TCB for the tid */ + tcb_base = t4_read_reg(sc, A_TP_CMM_TCB_BASE); + t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2), + tcb_base + tid * TCB_SIZE); + t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2)); + off = 0; + printf("\n"); + for (i = 0; i < 4; i++) { + uint32_t buf[8]; + for (j = 0; j < 8; j++, off += 4) + buf[j] = htonl(t4_read_reg(sc, MEMWIN2_BASE + off)); + + printf("%08x %08x %08x %08x %08x %08x %08x %08x\n", + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], + buf[7]); + } +} +#endif + +#define MAX_DDP_BUFFER_SIZE (M_TCB_RX_DDP_BUF0_LEN) +static int +alloc_ppods(struct tom_data *td, int n, struct ppod_region *pr) +{ + int ppod; + + KASSERT(n > 0, ("%s: nonsense allocation (%d)", __func__, n)); + + mtx_lock(&td->ppod_lock); + if (n > td->nppods_free) { + mtx_unlock(&td->ppod_lock); + return (-1); + } + + if (td->nppods_free_head >= n) { + td->nppods_free_head -= n; + ppod = td->nppods_free_head; + TAILQ_INSERT_HEAD(&td->ppods, pr, link); + } else { + struct ppod_region *p; + + ppod = td->nppods_free_head; + TAILQ_FOREACH(p, &td->ppods, link) { + ppod += p->used + p->free; + if (n <= p->free) { + ppod -= n; + p->free -= n; + TAILQ_INSERT_AFTER(&td->ppods, p, pr, link); + goto allocated; + } + } + + if (__predict_false(ppod != td->nppods)) { + panic("%s: ppods TAILQ (%p) corrupt." + " At %d instead of %d at the end of the queue.", + __func__, &td->ppods, ppod, td->nppods); + } + + mtx_unlock(&td->ppod_lock); + return (-1); + } + +allocated: + pr->used = n; + pr->free = 0; + td->nppods_free -= n; + mtx_unlock(&td->ppod_lock); + + return (ppod); +} + +static void +free_ppods(struct tom_data *td, struct ppod_region *pr) +{ + struct ppod_region *p; + + KASSERT(pr->used > 0, ("%s: nonsense free (%d)", __func__, pr->used)); + + mtx_lock(&td->ppod_lock); + p = TAILQ_PREV(pr, ppod_head, link); + if (p != NULL) + p->free += pr->used + pr->free; + else + td->nppods_free_head += pr->used + pr->free; + td->nppods_free += pr->used; + KASSERT(td->nppods_free <= td->nppods, + ("%s: nppods_free (%d) > nppods (%d). %d freed this time.", + __func__, td->nppods_free, td->nppods, pr->used)); + TAILQ_REMOVE(&td->ppods, pr, link); + mtx_unlock(&td->ppod_lock); +} + +static inline int +pages_to_nppods(int npages, int ddp_pgsz) +{ + int nsegs = npages * PAGE_SIZE / ddp_pgsz; + + return (howmany(nsegs, PPOD_PAGES)); +} + +static void +free_ddp_buffer(struct tom_data *td, struct ddp_buffer *db) +{ + + if (db == NULL) + return; + + if (db->pages) + free(db->pages, M_CXGBE); + + if (db->nppods > 0) + free_ppods(td, &db->ppod_region); + + free(db, M_CXGBE); +} + +void +release_ddp_resources(struct toepcb *toep) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(toep->db); i++) { + if (toep->db[i] != NULL) { + free_ddp_buffer(toep->td, toep->db[i]); + toep->db[i] = NULL; + } + } +} + +/* SET_TCB_FIELD sent as a ULP command looks like this */ +#define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \ + sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core)) + +/* RX_DATA_ACK sent as a ULP command looks like this */ +#define LEN__RX_DATA_ACK_ULP (sizeof(struct ulp_txpkt) + \ + sizeof(struct ulptx_idata) + sizeof(struct cpl_rx_data_ack_core)) + +static inline void * +mk_set_tcb_field_ulp(struct ulp_txpkt *ulpmc, struct toepcb *toep, + uint64_t word, uint64_t mask, uint64_t val) +{ + struct ulptx_idata *ulpsc; + struct cpl_set_tcb_field_core *req; + + ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); + ulpmc->len = htobe32(howmany(LEN__SET_TCB_FIELD_ULP, 16)); + + ulpsc = (struct ulptx_idata *)(ulpmc + 1); + ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); + ulpsc->len = htobe32(sizeof(*req)); + + req = (struct cpl_set_tcb_field_core *)(ulpsc + 1); + OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tid)); + req->reply_ctrl = htobe16(V_NO_REPLY(1) | + V_QUEUENO(toep->ofld_rxq->iq.abs_id)); + req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0)); + req->mask = htobe64(mask); + req->val = htobe64(val); + + ulpsc = (struct ulptx_idata *)(req + 1); + if (LEN__SET_TCB_FIELD_ULP % 16) { + ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); + ulpsc->len = htobe32(0); + return (ulpsc + 1); + } + return (ulpsc); +} + +static inline void * +mk_rx_data_ack_ulp(struct ulp_txpkt *ulpmc, struct toepcb *toep) +{ + struct ulptx_idata *ulpsc; + struct cpl_rx_data_ack_core *req; + + ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); + ulpmc->len = htobe32(howmany(LEN__RX_DATA_ACK_ULP, 16)); + + ulpsc = (struct ulptx_idata *)(ulpmc + 1); + ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); + ulpsc->len = htobe32(sizeof(*req)); + + req = (struct cpl_rx_data_ack_core *)(ulpsc + 1); + OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tid)); + req->credit_dack = htobe32(F_RX_MODULATE_RX); + + ulpsc = (struct ulptx_idata *)(req + 1); + if (LEN__RX_DATA_ACK_ULP % 16) { + ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); + ulpsc->len = htobe32(0); + return (ulpsc + 1); + } + return (ulpsc); +} + +static inline uint64_t +select_ddp_flags(struct socket *so, int flags, int db_idx) +{ + uint64_t ddp_flags = V_TF_DDP_INDICATE_OUT(0); + int waitall = flags & MSG_WAITALL; + int nb = so->so_state & SS_NBIO || flags & (MSG_DONTWAIT | MSG_NBIO); + + KASSERT(db_idx == 0 || db_idx == 1, + ("%s: bad DDP buffer index %d", __func__, db_idx)); + + if (db_idx == 0) { + ddp_flags |= V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0); + if (waitall) + ddp_flags |= V_TF_DDP_PUSH_DISABLE_0(1); + else if (nb) + ddp_flags |= V_TF_DDP_BUF0_FLUSH(1); + else + ddp_flags |= V_TF_DDP_BUF0_FLUSH(0); + } else { + ddp_flags |= V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1); + if (waitall) + ddp_flags |= V_TF_DDP_PUSH_DISABLE_1(1); + else if (nb) + ddp_flags |= V_TF_DDP_BUF1_FLUSH(1); + else + ddp_flags |= V_TF_DDP_BUF1_FLUSH(0); + } + + return (ddp_flags); +} + +static struct wrqe * +mk_update_tcb_for_ddp(struct adapter *sc, struct toepcb *toep, int db_idx, + int offset, uint64_t ddp_flags) +{ + struct ddp_buffer *db = toep->db[db_idx]; + struct wrqe *wr; + struct work_request_hdr *wrh; + struct ulp_txpkt *ulpmc; + int len; + + KASSERT(db_idx == 0 || db_idx == 1, + ("%s: bad DDP buffer index %d", __func__, db_idx)); + + /* + * We'll send a compound work request that has 3 SET_TCB_FIELDs and an + * RX_DATA_ACK (with RX_MODULATE to speed up delivery). + * + * The work request header is 16B and always ends at a 16B boundary. + * The ULPTX master commands that follow must all end at 16B boundaries + * too so we round up the size to 16. + */ + len = sizeof(*wrh) + 3 * roundup(LEN__SET_TCB_FIELD_ULP, 16) + + roundup(LEN__RX_DATA_ACK_ULP, 16); + + wr = alloc_wrqe(len, toep->ctrlq); + if (wr == NULL) + return (NULL); + wrh = wrtod(wr); + INIT_ULPTX_WRH(wrh, len, 1, 0); /* atomic */ + ulpmc = (struct ulp_txpkt *)(wrh + 1); + + /* Write the buffer's tag */ + ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, + W_TCB_RX_DDP_BUF0_TAG + db_idx, + V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG), + V_TCB_RX_DDP_BUF0_TAG(db->tag)); + + /* Update the current offset in the DDP buffer and its total length */ + if (db_idx == 0) + ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, + W_TCB_RX_DDP_BUF0_OFFSET, + V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | + V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), + V_TCB_RX_DDP_BUF0_OFFSET(offset) | + V_TCB_RX_DDP_BUF0_LEN(db->len)); + else + ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, + W_TCB_RX_DDP_BUF1_OFFSET, + V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | + V_TCB_RX_DDP_BUF1_LEN((u64)M_TCB_RX_DDP_BUF1_LEN << 32), + V_TCB_RX_DDP_BUF1_OFFSET(offset) | + V_TCB_RX_DDP_BUF1_LEN((u64)db->len << 32)); + + /* Update DDP flags */ + ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_RX_DDP_FLAGS, + V_TF_DDP_BUF0_FLUSH(1) | V_TF_DDP_BUF1_FLUSH(1) | + V_TF_DDP_PUSH_DISABLE_0(1) | V_TF_DDP_PUSH_DISABLE_1(1) | + V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_BUF1_VALID(1) | + V_TF_DDP_ACTIVE_BUF(1) | V_TF_DDP_INDICATE_OUT(1), ddp_flags); + + /* Gratuitous RX_DATA_ACK with RX_MODULATE set to speed up delivery. */ + ulpmc = mk_rx_data_ack_ulp(ulpmc, toep); + + return (wr); +} + +static void +discourage_ddp(struct toepcb *toep) +{ + + if (toep->ddp_score && --toep->ddp_score == 0) { + toep->ddp_flags &= ~DDP_OK; + toep->ddp_disabled = time_uptime; + CTR3(KTR_CXGBE, "%s: tid %u !DDP_OK @ %u", + __func__, toep->tid, time_uptime); + } +} + +static int +handle_ddp_data(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, int len) +{ + uint32_t report = be32toh(ddp_report); + unsigned int db_flag; + struct inpcb *inp = toep->inp; + struct tcpcb *tp; + struct socket *so; + struct sockbuf *sb; + struct mbuf *m; + + db_flag = report & F_DDP_BUF_IDX ? DDP_BUF1_ACTIVE : DDP_BUF0_ACTIVE; + + if (__predict_false(!(report & F_DDP_INV))) + CXGBE_UNIMPLEMENTED("DDP buffer still valid"); + + INP_WLOCK(inp); + so = inp_inpcbtosocket(inp); + sb = &so->so_rcv; + if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) { + + /* + * XXX: think a bit more. + * tcpcb probably gone, but socket should still be around + * because we always wait for DDP completion in soreceive no + * matter what. Just wake it up and let it clean up. + */ + + CTR5(KTR_CXGBE, "%s: tid %u, seq 0x%x, len %d, inp_flags 0x%x", + __func__, toep->tid, be32toh(rcv_nxt), len, inp->inp_flags); + SOCKBUF_LOCK(sb); + goto wakeup; + } + + tp = intotcpcb(inp); + len += be32toh(rcv_nxt) - tp->rcv_nxt; + tp->rcv_nxt += len; + tp->t_rcvtime = ticks; +#ifndef USE_DDP_RX_FLOW_CONTROL + KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__)); + tp->rcv_wnd -= len; +#endif + + m = m_get(M_NOWAIT, MT_DATA); + if (m == NULL) + CXGBE_UNIMPLEMENTED("mbuf alloc failure"); + m->m_len = len; + m->m_flags |= M_DDP; /* Data is already where it should be */ + m->m_data = "nothing to see here"; + + SOCKBUF_LOCK(sb); + if (report & F_DDP_BUF_COMPLETE) + toep->ddp_score = DDP_HIGH_SCORE; + else + discourage_ddp(toep); + + KASSERT(toep->sb_cc >= sb->sb_cc, + ("%s: sb %p has more data (%d) than last time (%d).", + __func__, sb, sb->sb_cc, toep->sb_cc)); + toep->rx_credits += toep->sb_cc - sb->sb_cc; +#ifdef USE_DDP_RX_FLOW_CONTROL + toep->rx_credits -= len; /* adjust for F_RX_FC_DDP */ +#endif + sbappendstream_locked(sb, m); + toep->sb_cc = sb->sb_cc; +wakeup: + KASSERT(toep->ddp_flags & db_flag, + ("%s: DDP buffer not active. toep %p, ddp_flags 0x%x, report 0x%x", + __func__, toep, toep->ddp_flags, report)); + toep->ddp_flags &= ~db_flag; + sorwakeup_locked(so); + SOCKBUF_UNLOCK_ASSERT(sb); + + INP_WUNLOCK(inp); + return (0); +} + +#define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\ + F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\ + F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\ + F_DDP_INVALID_PPOD | F_DDP_HDRCRC_ERR | F_DDP_DATACRC_ERR) + +static int +do_rx_data_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) +{ + struct adapter *sc = iq->adapter; + const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1); + unsigned int tid = GET_TID(cpl); + uint32_t vld; + struct toepcb *toep = lookup_tid(sc, tid); + + KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); + KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__)); + KASSERT(!toepcb_flag(toep, TPF_SYNQE), + ("%s: toep %p claims to be a synq entry", __func__, toep)); + + vld = be32toh(cpl->ddpvld); + if (__predict_false(vld & DDP_ERR)) { + panic("%s: DDP error 0x%x (tid %d, toep %p)", + __func__, vld, tid, toep); + } + + handle_ddp_data(toep, cpl->u.ddp_report, cpl->seq, be16toh(cpl->len)); + + return (0); +} + +static int +do_rx_ddp_complete(struct sge_iq *iq, const struct rss_header *rss, + struct mbuf *m) +{ + struct adapter *sc = iq->adapter; + const struct cpl_rx_ddp_complete *cpl = (const void *)(rss + 1); + unsigned int tid = GET_TID(cpl); + struct toepcb *toep = lookup_tid(sc, tid); + + KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); + KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__)); + KASSERT(!toepcb_flag(toep, TPF_SYNQE), + ("%s: toep %p claims to be a synq entry", __func__, toep)); + + handle_ddp_data(toep, cpl->ddp_report, cpl->rcv_nxt, 0); + + return (0); +} + +void +enable_ddp(struct adapter *sc, struct toepcb *toep) +{ + + KASSERT((toep->ddp_flags & (DDP_ON | DDP_OK | DDP_SC_REQ)) == DDP_OK, + ("%s: toep %p has bad ddp_flags 0x%x", + __func__, toep, toep->ddp_flags)); + + CTR3(KTR_CXGBE, "%s: tid %u (time %u)", + __func__, toep->tid, time_uptime); + + toep->ddp_flags |= DDP_SC_REQ; + t4_set_tcb_field(sc, toep, W_TCB_RX_DDP_FLAGS, + V_TF_DDP_OFF(1) | V_TF_DDP_INDICATE_OUT(1) | + V_TF_DDP_BUF0_INDICATE(1) | V_TF_DDP_BUF1_INDICATE(1) | + V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_BUF1_VALID(1), + V_TF_DDP_BUF0_INDICATE(1) | V_TF_DDP_BUF1_INDICATE(1)); + t4_set_tcb_field(sc, toep, W_TCB_T_FLAGS, + V_TF_RCV_COALESCE_ENABLE(1), 0); +} + +static inline void +disable_ddp(struct adapter *sc, struct toepcb *toep) +{ + + KASSERT((toep->ddp_flags & (DDP_ON | DDP_SC_REQ)) == DDP_ON, + ("%s: toep %p has bad ddp_flags 0x%x", + __func__, toep, toep->ddp_flags)); + + CTR3(KTR_CXGBE, "%s: tid %u (time %u)", + __func__, toep->tid, time_uptime); + + toep->ddp_flags |= DDP_SC_REQ; + t4_set_tcb_field(sc, toep, W_TCB_T_FLAGS, + V_TF_RCV_COALESCE_ENABLE(1), V_TF_RCV_COALESCE_ENABLE(1)); + t4_set_tcb_field(sc, toep, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1), + V_TF_DDP_OFF(1)); +} + +static int +hold_uio(struct uio *uio, vm_page_t **ppages, int *pnpages) +{ + struct vm_map *map; + struct iovec *iov; + vm_offset_t start, end; + vm_page_t *pp; + int n; + + KASSERT(uio->uio_iovcnt == 1, + ("%s: uio_iovcnt %d", __func__, uio->uio_iovcnt)); + KASSERT(uio->uio_td->td_proc == curproc, + ("%s: uio proc (%p) is not curproc (%p)", + __func__, uio->uio_td->td_proc, curproc)); + + map = &curproc->p_vmspace->vm_map; + iov = &uio->uio_iov[0]; + start = trunc_page((uintptr_t)iov->iov_base); + end = round_page((vm_offset_t)iov->iov_base + iov->iov_len); + n = howmany(end - start, PAGE_SIZE); + + if (end - start > MAX_DDP_BUFFER_SIZE) + return (E2BIG); + + pp = malloc(n * sizeof(vm_page_t), M_CXGBE, M_NOWAIT); + if (pp == NULL) + return (ENOMEM); + + if (vm_fault_quick_hold_pages(map, (vm_offset_t)iov->iov_base, + iov->iov_len, VM_PROT_WRITE, pp, n) < 0) { + free(pp, M_CXGBE); + return (EFAULT); + } + + *ppages = pp; + *pnpages = n; + + return (0); +} + +static int +bufcmp(struct ddp_buffer *db, vm_page_t *pages, int npages, int offset, int len) +{ + int i; + + if (db == NULL || db->npages != npages || db->offset != offset || + db->len != len) + return (1); + + for (i = 0; i < npages; i++) { + if (pages[i]->phys_addr != db->pages[i]->phys_addr) + return (1); + } + + return (0); +} + +static int +calculate_hcf(int n1, int n2) +{ + int a, b, t; + + if (n1 <= n2) { + a = n1; + b = n2; + } else { + a = n2; + b = n1; + } + + while (a != 0) { + t = a; + a = b % a; + b = t; + } + + return (b); +} + +static struct ddp_buffer * +alloc_ddp_buffer(struct tom_data *td, vm_page_t *pages, int npages, int offset, + int len) +{ + int i, hcf, seglen, idx, ppod, nppods; + struct ddp_buffer *db; + + /* + * The DDP page size is unrelated to the VM page size. We combine + * contiguous physical pages into larger segments to get the best DDP + * page size possible. This is the largest of the four sizes in + * A_ULP_RX_TDDP_PSZ that evenly divides the HCF of the segment sizes in + * the page list. + */ + hcf = 0; + for (i = 0; i < npages; i++) { + seglen = PAGE_SIZE; + while (i < npages - 1 && + pages[i]->phys_addr + PAGE_SIZE == pages[i + 1]->phys_addr) { + seglen += PAGE_SIZE; + i++; + } + + hcf = calculate_hcf(hcf, seglen); + if (hcf < t4_ddp_pgsz[1]) { + idx = 0; + goto have_pgsz; /* give up, short circuit */ + } + } + + if (hcf % t4_ddp_pgsz[0] != 0) { + /* hmmm. This could only happen when PAGE_SIZE < 4K */ + KASSERT(PAGE_SIZE < 4096, + ("%s: PAGE_SIZE %d, hcf %d", __func__, PAGE_SIZE, hcf)); + CTR3(KTR_CXGBE, "%s: PAGE_SIZE %d, hcf %d", + __func__, PAGE_SIZE, hcf); + return (NULL); + } + + for (idx = ARRAY_SIZE(t4_ddp_pgsz) - 1; idx > 0; idx--) { + if (hcf % t4_ddp_pgsz[idx] == 0) + break; + } +have_pgsz: + + db = malloc(sizeof(*db), M_CXGBE, M_NOWAIT); + if (db == NULL) { + CTR1(KTR_CXGBE, "%s: malloc failed.", __func__); + return (NULL); + } + + nppods = pages_to_nppods(npages, t4_ddp_pgsz[idx]); + ppod = alloc_ppods(td, nppods, &db->ppod_region); + if (ppod < 0) { + free(db, M_CXGBE); + CTR4(KTR_CXGBE, "%s: no pods, nppods %d, resid %d, pgsz %d", + __func__, nppods, len, t4_ddp_pgsz[idx]); + return (NULL); + } + + KASSERT(idx <= M_PPOD_PGSZ && ppod <= M_PPOD_TAG, + ("%s: DDP pgsz_idx = %d, ppod = %d", __func__, idx, ppod)); + + db->tag = V_PPOD_PGSZ(idx) | V_PPOD_TAG(ppod); + db->nppods = nppods; + db->npages = npages; + db->pages = pages; + db->offset = offset; + db->len = len; + + CTR6(KTR_CXGBE, "New DDP buffer. " + "ddp_pgsz %d, ppod 0x%x, npages %d, nppods %d, offset %d, len %d", + t4_ddp_pgsz[idx], ppod, db->npages, db->nppods, db->offset, + db->len); + + return (db); +} + +#define NUM_ULP_TX_SC_IMM_PPODS (256 / PPOD_SIZE) + +static int +write_page_pods(struct adapter *sc, struct toepcb *toep, struct ddp_buffer *db) +{ + struct wrqe *wr; + struct ulp_mem_io *ulpmc; + struct ulptx_idata *ulpsc; + struct pagepod *ppod; + int i, j, k, n, chunk, len, ddp_pgsz, idx, ppod_addr; + + ddp_pgsz = t4_ddp_pgsz[G_PPOD_PGSZ(db->tag)]; + ppod_addr = sc->vres.ddp.start + G_PPOD_TAG(db->tag) * PPOD_SIZE; + for (i = 0; i < db->nppods; ppod_addr += chunk) { + + /* How many page pods are we writing in this cycle */ + n = min(db->nppods - i, NUM_ULP_TX_SC_IMM_PPODS); + chunk = PPOD_SZ(n); + len = roundup(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16); + + wr = alloc_wrqe(len, toep->ctrlq); + if (wr == NULL) + return (ENOMEM); /* ok to just bail out */ + ulpmc = wrtod(wr); + + INIT_ULPTX_WR(ulpmc, len, 0, 0); + ulpmc->cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE) | + F_ULP_MEMIO_ORDER); + ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32)); + ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16)); + ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5)); + + ulpsc = (struct ulptx_idata *)(ulpmc + 1); + ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); + ulpsc->len = htobe32(chunk); + + ppod = (struct pagepod *)(ulpsc + 1); + for (j = 0; j < n; i++, j++, ppod++) { + ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID | + V_PPOD_TID(toep->tid) | db->tag); + ppod->len_offset = htobe64(V_PPOD_LEN(db->len) | + V_PPOD_OFST(db->offset)); + ppod->rsvd = 0; + idx = i * PPOD_PAGES * (ddp_pgsz / PAGE_SIZE); + for (k = 0; k < ARRAY_SIZE(ppod->addr); k++) { + if (idx < db->npages) { + ppod->addr[k] = + htobe64(db->pages[idx]->phys_addr); + idx += ddp_pgsz / PAGE_SIZE; + } else + ppod->addr[k] = 0; +#if 0 + CTR5(KTR_CXGBE, + "%s: tid %d ppod[%d]->addr[%d] = %p", + __func__, toep->tid, i, k, + htobe64(ppod->addr[k])); +#endif + } + + } + + t4_wrq_tx(sc, wr); + } + + return (0); +} + +/* + * Reuse, or allocate (and program the page pods for) a new DDP buffer. + */ +static int +select_ddp_buffer(struct adapter *sc, struct toepcb *toep, vm_page_t *pages, + int npages, int db_off, int db_len) +{ + struct ddp_buffer *db; + struct tom_data *td = sc->tom_softc; + int i, empty_slot = -1; + + /* Try to reuse */ + for (i = 0; i < ARRAY_SIZE(toep->db); i++) { + if (bufcmp(toep->db[i], pages, npages, db_off, db_len) == 0) { + free(pages, M_CXGBE); + return (i); /* pages still held */ + } else if (toep->db[i] == NULL && empty_slot < 0) + empty_slot = i; + } + + /* Allocate new buffer, write its page pods. */ + db = alloc_ddp_buffer(td, pages, npages, db_off, db_len); + if (db == NULL) { + vm_page_unhold_pages(pages, npages); + free(pages, M_CXGBE); + return (-1); + } + if (write_page_pods(sc, toep, db) != 0) { + vm_page_unhold_pages(pages, npages); + free_ddp_buffer(td, db); + return (-1); + } + + i = empty_slot; + if (i < 0) { + i = arc4random() % ARRAY_SIZE(toep->db); + free_ddp_buffer(td, toep->db[i]); + } + toep->db[i] = db; + + CTR5(KTR_CXGBE, "%s: tid %d, DDP buffer[%d] = %p (tag 0x%x)", + __func__, toep->tid, i, db, db->tag); + + return (i); +} + +static void +wire_ddp_buffer(struct ddp_buffer *db) +{ + int i; + vm_page_t p; + + for (i = 0; i < db->npages; i++) { + p = db->pages[i]; + vm_page_lock(p); + vm_page_wire(p); + vm_page_unhold(p); + vm_page_unlock(p); + } +} + +static void +unwire_ddp_buffer(struct ddp_buffer *db) +{ + int i; + vm_page_t p; + + for (i = 0; i < db->npages; i++) { + p = db->pages[i]; + vm_page_lock(p); + vm_page_unwire(p, 0); + vm_page_unlock(p); + } +} + +static inline void +unhold_ddp_buffer(struct ddp_buffer *db) +{ + + vm_page_unhold_pages(db->pages, db->npages); +} + +static int +handle_ddp(struct socket *so, struct uio *uio, int flags, int error) +{ + struct sockbuf *sb = &so->so_rcv; + struct tcpcb *tp = so_sototcpcb(so); + struct toepcb *toep = tp->t_toe; + struct adapter *sc = td_adapter(toep->td); + vm_page_t *pages; + int npages, db_idx, rc, buf_flag; + struct ddp_buffer *db; + struct wrqe *wr; + uint64_t ddp_flags; + + SOCKBUF_LOCK_ASSERT(sb); + +#if 0 + if (sb->sb_cc + sc->tt.ddp_thres > uio->uio_resid) { + CTR4(KTR_CXGBE, "%s: sb_cc %d, threshold %d, resid %d", + __func__, sb->sb_cc, sc->tt.ddp_thres, uio->uio_resid); + } +#endif + + /* XXX: too eager to disable DDP, could handle NBIO better than this. */ + if (sb->sb_cc >= uio->uio_resid || uio->uio_resid < sc->tt.ddp_thres || + uio->uio_resid > MAX_DDP_BUFFER_SIZE || uio->uio_iovcnt > 1 || + so->so_state & SS_NBIO || flags & (MSG_DONTWAIT | MSG_NBIO) || + error || so->so_error || sb->sb_state & SBS_CANTRCVMORE) + goto no_ddp; + + /* + * Fault in and then hold the pages of the uio buffers. We'll wire them + * a bit later if everything else works out. + */ + if (hold_uio(uio, &pages, &npages) != 0) + goto no_ddp; + + /* + * Figure out which one of the two DDP buffers to use this time. + */ + db_idx = select_ddp_buffer(sc, toep, pages, npages, + (uintptr_t)uio->uio_iov->iov_base & PAGE_MASK, uio->uio_resid); + pages = NULL; /* pages either in use elsewhere or unheld + freed */ + if (db_idx < 0) + goto no_ddp; + db = toep->db[db_idx]; + buf_flag = db_idx == 0 ? DDP_BUF0_ACTIVE : DDP_BUF1_ACTIVE; + + /* + * Build the compound work request that tells the chip where to DMA the + * payload. + */ + ddp_flags = select_ddp_flags(so, flags, db_idx); + wr = mk_update_tcb_for_ddp(sc, toep, db_idx, sb->sb_cc, ddp_flags); + if (wr == NULL) { + unhold_ddp_buffer(db); + goto no_ddp; + } + + /* Wire the pages and give the chip the go-ahead. */ + wire_ddp_buffer(db); + t4_wrq_tx(sc, wr); + sb->sb_flags &= ~SB_DDP_INDICATE; + toep->ddp_flags |= buf_flag; + + /* + * Wait for the DDP operation to complete and then unwire the pages. + * The return code from the sbwait will be the final return code of this + * function. But we do need to wait for DDP no matter what. + */ + rc = sbwait(sb); + while (toep->ddp_flags & buf_flag) { + sb->sb_flags |= SB_WAIT; + msleep(&sb->sb_cc, &sb->sb_mtx, PSOCK , "sbwait", 0); + } + unwire_ddp_buffer(db); + return (rc); +no_ddp: + disable_ddp(sc, toep); + discourage_ddp(toep); + sb->sb_flags &= ~SB_DDP_INDICATE; + return (0); +} + +void +t4_init_ddp(struct adapter *sc, struct tom_data *td) +{ + int nppods = sc->vres.ddp.size / PPOD_SIZE; + + td->nppods = nppods; + td->nppods_free = nppods; + td->nppods_free_head = nppods; + TAILQ_INIT(&td->ppods); + mtx_init(&td->ppod_lock, "page pods", NULL, MTX_DEF); + + t4_register_cpl_handler(sc, CPL_RX_DATA_DDP, do_rx_data_ddp); + t4_register_cpl_handler(sc, CPL_RX_DDP_COMPLETE, do_rx_ddp_complete); +} + +void +t4_uninit_ddp(struct adapter *sc __unused, struct tom_data *td) +{ + + KASSERT(td->nppods == td->nppods_free, + ("%s: page pods still in use, nppods = %d, free = %d", + __func__, td->nppods, td->nppods_free)); + + if (mtx_initialized(&td->ppod_lock)) + mtx_destroy(&td->ppod_lock); +} + +#define VNET_SO_ASSERT(so) \ + VNET_ASSERT(curvnet != NULL, \ + ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so))); +#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) +static int +soreceive_rcvoob(struct socket *so, struct uio *uio, int flags) +{ + + CXGBE_UNIMPLEMENTED(__func__); +} + +/* + * Copy an mbuf chain into a uio limited by len if set. + */ +static int +m_mbuftouio_ddp(struct uio *uio, struct mbuf *m, int len) +{ + int error, length, total; + int progress = 0; + + if (len > 0) + total = min(uio->uio_resid, len); + else + total = uio->uio_resid; + + /* Fill the uio with data from the mbufs. */ + for (; m != NULL; m = m->m_next) { + length = min(m->m_len, total - progress); + + if (m->m_flags & M_DDP) { + enum uio_seg segflag = uio->uio_segflg; + + uio->uio_segflg = UIO_NOCOPY; + error = uiomove(mtod(m, void *), length, uio); + uio->uio_segflg = segflag; + } else + error = uiomove(mtod(m, void *), length, uio); + if (error) + return (error); + + progress += length; + } + + return (0); +} + +/* + * Based on soreceive_stream() in uipc_socket.c + */ +int +t4_soreceive_ddp(struct socket *so, struct sockaddr **psa, struct uio *uio, + struct mbuf **mp0, struct mbuf **controlp, int *flagsp) +{ + int len = 0, error = 0, flags, oresid, ddp_handled = 0; + struct sockbuf *sb; + struct mbuf *m, *n = NULL; + + /* We only do stream sockets. */ + if (so->so_type != SOCK_STREAM) + return (EINVAL); + if (psa != NULL) + *psa = NULL; + if (controlp != NULL) + return (EINVAL); + if (flagsp != NULL) + flags = *flagsp &~ MSG_EOR; + else + flags = 0; + if (flags & MSG_OOB) + return (soreceive_rcvoob(so, uio, flags)); + if (mp0 != NULL) + *mp0 = NULL; + + sb = &so->so_rcv; + + /* Prevent other readers from entering the socket. */ + error = sblock(sb, SBLOCKWAIT(flags)); + if (error) + goto out; + SOCKBUF_LOCK(sb); + + /* Easy one, no space to copyout anything. */ + if (uio->uio_resid == 0) { + error = EINVAL; + goto out; + } + oresid = uio->uio_resid; + + /* We will never ever get anything unless we are or were connected. */ + if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) { + error = ENOTCONN; + goto out; + } + +restart: + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + + if (sb->sb_flags & SB_DDP_INDICATE && !ddp_handled) { + + /* uio should be just as it was at entry */ + KASSERT(oresid == uio->uio_resid, + ("%s: oresid = %d, uio_resid = %zd, sb_cc = %d", + __func__, oresid, uio->uio_resid, sb->sb_cc)); + + error = handle_ddp(so, uio, flags, 0); + ddp_handled = 1; + if (error) + goto out; + } + + /* Abort if socket has reported problems. */ + if (so->so_error) { + if (sb->sb_cc > 0) + goto deliver; + if (oresid > uio->uio_resid) + goto out; + error = so->so_error; + if (!(flags & MSG_PEEK)) + so->so_error = 0; + goto out; + } + + /* Door is closed. Deliver what is left, if any. */ + if (sb->sb_state & SBS_CANTRCVMORE) { + if (sb->sb_cc > 0) + goto deliver; + else + goto out; + } + + /* Socket buffer is empty and we shall not block. */ + if (sb->sb_cc == 0 && + ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) { + error = EAGAIN; + goto out; + } + + /* Socket buffer got some data that we shall deliver now. */ + if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) && + ((sb->sb_flags & SS_NBIO) || + (flags & (MSG_DONTWAIT|MSG_NBIO)) || + sb->sb_cc >= sb->sb_lowat || + sb->sb_cc >= uio->uio_resid || + sb->sb_cc >= sb->sb_hiwat) ) { + goto deliver; + } + + /* On MSG_WAITALL we must wait until all data or error arrives. */ + if ((flags & MSG_WAITALL) && + (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_lowat)) + goto deliver; + + /* + * Wait and block until (more) data comes in. + * NB: Drops the sockbuf lock during wait. + */ + error = sbwait(sb); + if (error) { + if (sb->sb_flags & SB_DDP_INDICATE && !ddp_handled) { + (void) handle_ddp(so, uio, flags, 1); + ddp_handled = 1; + } + goto out; + } + goto restart; + +deliver: + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + KASSERT(sb->sb_cc > 0, ("%s: sockbuf empty", __func__)); + KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__)); + + if (sb->sb_flags & SB_DDP_INDICATE && !ddp_handled) + goto restart; + + /* Statistics. */ + if (uio->uio_td) + uio->uio_td->td_ru.ru_msgrcv++; + + /* Fill uio until full or current end of socket buffer is reached. */ + len = min(uio->uio_resid, sb->sb_cc); + if (mp0 != NULL) { + /* Dequeue as many mbufs as possible. */ + if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) { + for (*mp0 = m = sb->sb_mb; + m != NULL && m->m_len <= len; + m = m->m_next) { + len -= m->m_len; + uio->uio_resid -= m->m_len; + sbfree(sb, m); + n = m; + } + sb->sb_mb = m; + if (sb->sb_mb == NULL) + SB_EMPTY_FIXUP(sb); + n->m_next = NULL; + } + /* Copy the remainder. */ + if (len > 0) { + KASSERT(sb->sb_mb != NULL, + ("%s: len > 0 && sb->sb_mb empty", __func__)); + + m = m_copym(sb->sb_mb, 0, len, M_DONTWAIT); + if (m == NULL) + len = 0; /* Don't flush data from sockbuf. */ + else + uio->uio_resid -= m->m_len; + if (*mp0 != NULL) + n->m_next = m; + else + *mp0 = m; + if (*mp0 == NULL) { + error = ENOBUFS; + goto out; + } + } + } else { + /* NB: Must unlock socket buffer as uiomove may sleep. */ + SOCKBUF_UNLOCK(sb); + error = m_mbuftouio_ddp(uio, sb->sb_mb, len); + SOCKBUF_LOCK(sb); + if (error) + goto out; + } + SBLASTRECORDCHK(sb); + SBLASTMBUFCHK(sb); + + /* + * Remove the delivered data from the socket buffer unless we + * were only peeking. + */ + if (!(flags & MSG_PEEK)) { + if (len > 0) + sbdrop_locked(sb, len); + + /* Notify protocol that we drained some data. */ + if ((so->so_proto->pr_flags & PR_WANTRCVD) && + (((flags & MSG_WAITALL) && uio->uio_resid > 0) || + !(flags & MSG_SOCALLBCK))) { + SOCKBUF_UNLOCK(sb); + VNET_SO_ASSERT(so); + (*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags); + SOCKBUF_LOCK(sb); + } + } + + /* + * For MSG_WAITALL we may have to loop again and wait for + * more data to come in. + */ + if ((flags & MSG_WAITALL) && uio->uio_resid > 0) + goto restart; +out: + SOCKBUF_LOCK_ASSERT(sb); + SBLASTRECORDCHK(sb); + SBLASTMBUFCHK(sb); + SOCKBUF_UNLOCK(sb); + sbunlock(sb); + return (error); +} + +#endif Property changes on: head/sys/dev/cxgbe/tom/t4_ddp.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/dev/cxgbe/tom/t4_listen.c =================================================================== --- head/sys/dev/cxgbe/tom/t4_listen.c (revision 239343) +++ head/sys/dev/cxgbe/tom/t4_listen.c (revision 239344) @@ -1,1362 +1,1374 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" /* stid services */ static int alloc_stid(struct adapter *, void *); static void *lookup_stid(struct adapter *, int); static void free_stid(struct adapter *, int); /* lctx services */ static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *, struct port_info *); static int free_lctx(struct adapter *, struct listen_ctx *); static void hold_lctx(struct listen_ctx *); static void listen_hash_add(struct adapter *, struct listen_ctx *); static struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *); static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *); static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *); static inline void save_qids_in_mbuf(struct mbuf *, struct port_info *); static inline void get_qids_from_mbuf(struct mbuf *m, int *, int *); static void send_reset_synqe(struct toedev *, struct synq_entry *); /* XXX: won't work for IPv6 */ static int alloc_stid(struct adapter *sc, void *ctx) { struct tid_info *t = &sc->tids; int stid = -1; mtx_lock(&t->stid_lock); if (t->sfree) { union serv_entry *p = t->sfree; stid = p - t->stid_tab; stid += t->stid_base; t->sfree = p->next; p->data = ctx; t->stids_in_use++; } mtx_unlock(&t->stid_lock); return (stid); } static void * lookup_stid(struct adapter *sc, int stid) { struct tid_info *t = &sc->tids; return (t->stid_tab[stid - t->stid_base].data); } static void free_stid(struct adapter *sc, int stid) { struct tid_info *t = &sc->tids; union serv_entry *p = &t->stid_tab[stid - t->stid_base]; mtx_lock(&t->stid_lock); p->next = t->sfree; t->sfree = p; t->stids_in_use--; mtx_unlock(&t->stid_lock); } static struct listen_ctx * alloc_lctx(struct adapter *sc, struct inpcb *inp, struct port_info *pi) { struct listen_ctx *lctx; INP_WLOCK_ASSERT(inp); lctx = malloc(sizeof(struct listen_ctx), M_CXGBE, M_NOWAIT | M_ZERO); if (lctx == NULL) return (NULL); lctx->stid = alloc_stid(sc, lctx); if (lctx->stid < 0) { free(lctx, M_CXGBE); return (NULL); } lctx->ctrlq = &sc->sge.ctrlq[pi->port_id]; lctx->ofld_rxq = &sc->sge.ofld_rxq[pi->first_ofld_rxq]; refcount_init(&lctx->refcount, 1); TAILQ_INIT(&lctx->synq); lctx->inp = inp; in_pcbref(inp); return (lctx); } /* Don't call this directly, use release_lctx instead */ static int free_lctx(struct adapter *sc, struct listen_ctx *lctx) { struct inpcb *inp = lctx->inp; INP_WLOCK_ASSERT(inp); KASSERT(lctx->refcount == 0, ("%s: refcount %d", __func__, lctx->refcount)); KASSERT(TAILQ_EMPTY(&lctx->synq), ("%s: synq not empty.", __func__)); KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid)); CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p", __func__, lctx->stid, lctx, lctx->inp); free_stid(sc, lctx->stid); free(lctx, M_CXGBE); return (in_pcbrele_wlocked(inp)); } static void hold_lctx(struct listen_ctx *lctx) { refcount_acquire(&lctx->refcount); } static inline uint32_t listen_hashfn(void *key, u_long mask) { return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask); } /* * Add a listen_ctx entry to the listen hash table. */ static void listen_hash_add(struct adapter *sc, struct listen_ctx *lctx) { struct tom_data *td = sc->tom_softc; int bucket = listen_hashfn(lctx->inp, td->listen_mask); mtx_lock(&td->lctx_hash_lock); LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link); td->lctx_count++; mtx_unlock(&td->lctx_hash_lock); } /* * Look for the listening socket's context entry in the hash and return it. */ static struct listen_ctx * listen_hash_find(struct adapter *sc, struct inpcb *inp) { struct tom_data *td = sc->tom_softc; int bucket = listen_hashfn(inp, td->listen_mask); struct listen_ctx *lctx; mtx_lock(&td->lctx_hash_lock); LIST_FOREACH(lctx, &td->listen_hash[bucket], link) { if (lctx->inp == inp) break; } mtx_unlock(&td->lctx_hash_lock); return (lctx); } /* * Removes the listen_ctx structure for inp from the hash and returns it. */ static struct listen_ctx * listen_hash_del(struct adapter *sc, struct inpcb *inp) { struct tom_data *td = sc->tom_softc; int bucket = listen_hashfn(inp, td->listen_mask); struct listen_ctx *lctx, *l; mtx_lock(&td->lctx_hash_lock); LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) { if (lctx->inp == inp) { LIST_REMOVE(lctx, link); td->lctx_count--; break; } } mtx_unlock(&td->lctx_hash_lock); return (lctx); } /* * Releases a hold on the lctx. Must be called with the listening socket's inp * locked. The inp may be freed by this function and it returns NULL to * indicate this. */ static struct inpcb * release_lctx(struct adapter *sc, struct listen_ctx *lctx) { struct inpcb *inp = lctx->inp; int inp_freed = 0; INP_WLOCK_ASSERT(inp); if (refcount_release(&lctx->refcount)) inp_freed = free_lctx(sc, lctx); return (inp_freed ? NULL : inp); } static void send_reset_synqe(struct toedev *tod, struct synq_entry *synqe) { struct adapter *sc = tod->tod_softc; struct mbuf *m = synqe->syn; struct ifnet *ifp = m->m_pkthdr.rcvif; struct port_info *pi = ifp->if_softc; struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx]; struct wrqe *wr; struct fw_flowc_wr *flowc; struct cpl_abort_req *req; int txqid, rxqid, flowclen; struct sge_wrq *ofld_txq; struct sge_ofld_rxq *ofld_rxq; const int nparams = 4; unsigned int pfvf = G_FW_VIID_PFN(pi->viid) << S_FW_VIID_PFN; INP_WLOCK_ASSERT(synqe->lctx->inp); CTR4(KTR_CXGBE, "%s: synqe %p, tid %d%s", __func__, synqe, synqe->tid, synqe_flag(synqe, TPF_ABORT_SHUTDOWN) ? " (abort already in progress)" : ""); if (synqe_flag(synqe, TPF_ABORT_SHUTDOWN)) return; /* abort already in progress */ synqe_set_flag(synqe, TPF_ABORT_SHUTDOWN); get_qids_from_mbuf(m, &txqid, &rxqid); ofld_txq = &sc->sge.ofld_txq[txqid]; ofld_rxq = &sc->sge.ofld_rxq[rxqid]; /* The wrqe will have two WRs - a flowc followed by an abort_req */ flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); wr = alloc_wrqe(roundup(flowclen, EQ_ESIZE) + sizeof(*req), ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } flowc = wrtod(wr); req = (void *)((caddr_t)flowc + roundup(flowclen, EQ_ESIZE)); /* First the flowc ... */ memset(flowc, 0, wr->wr_len); flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(nparams)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | V_FW_WR_FLOWID(synqe->tid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; flowc->mnemval[0].val = htobe32(pfvf); flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; flowc->mnemval[1].val = htobe32(pi->tx_chan); flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; flowc->mnemval[2].val = htobe32(pi->tx_chan); flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id); synqe_set_flag(synqe, TPF_FLOWC_WR_SENT); /* ... then ABORT request */ INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid); req->rsvd0 = 0; /* don't have a snd_nxt */ req->rsvd1 = 1; /* no data sent yet */ req->cmd = CPL_ABORT_SEND_RST; t4_l2t_send(sc, wr, e); } static int create_server(struct adapter *sc, struct listen_ctx *lctx) { struct wrqe *wr; struct cpl_pass_open_req *req; struct in_conninfo *inc = &lctx->inp->inp_inc; wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); if (wr == NULL) { log(LOG_ERR, "%s: allocation failure", __func__); return (ENOMEM); } req = wrtod(wr); INIT_TP_WR(req, 0); OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid)); req->local_port = inc->inc_lport; req->peer_port = 0; req->local_ip = inc->inc_laddr.s_addr; req->peer_ip = 0; req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan)); req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) | F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id)); t4_wrq_tx(sc, wr); return (0); } static int destroy_server(struct adapter *sc, struct listen_ctx *lctx) { struct wrqe *wr; struct cpl_close_listsvr_req *req; wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); INIT_TP_WR(req, 0); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, lctx->stid)); req->reply_ctrl = htobe16(lctx->ofld_rxq->iq.abs_id); req->rsvd = htobe16(0); t4_wrq_tx(sc, wr); return (0); } /* * Start a listening server by sending a passive open request to HW. * * Can't take adapter lock here and access to sc->flags, sc->open_device_map, * sc->offload_map, if_capenable are all race prone. */ int t4_listen_start(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; struct port_info *pi; struct inpcb *inp = tp->t_inpcb; struct listen_ctx *lctx; int i; INP_WLOCK_ASSERT(inp); if ((inp->inp_vflag & INP_IPV4) == 0) return (0); #if 0 ADAPTER_LOCK(sc); if (IS_BUSY(sc)) { log(LOG_ERR, "%s: listen request ignored, %s is busy", __func__, device_get_nameunit(sc->dev)); goto done; } KASSERT(sc->flags & TOM_INIT_DONE, ("%s: TOM not initialized", __func__)); #endif if ((sc->open_device_map & sc->offload_map) == 0) goto done; /* no port that's UP with IFCAP_TOE enabled */ /* * Find a running port with IFCAP_TOE4. We'll use the first such port's * queues to send the passive open and receive the reply to it. * * XXX: need a way to mark a port in use by offload. if_cxgbe should * then reject any attempt to bring down such a port (and maybe reject * attempts to disable IFCAP_TOE on that port too?). */ for_each_port(sc, i) { if (isset(&sc->open_device_map, i) && sc->port[i]->ifp->if_capenable & IFCAP_TOE4) break; } KASSERT(i < sc->params.nports, ("%s: no running port with TOE capability enabled.", __func__)); pi = sc->port[i]; if (listen_hash_find(sc, inp) != NULL) goto done; /* already setup */ lctx = alloc_lctx(sc, inp, pi); if (lctx == NULL) { log(LOG_ERR, "%s: listen request ignored, %s couldn't allocate lctx\n", __func__, device_get_nameunit(sc->dev)); goto done; } listen_hash_add(sc, lctx); CTR5(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p", __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp); if (create_server(sc, lctx) != 0) { log(LOG_ERR, "%s: %s failed to create hw listener.\n", __func__, device_get_nameunit(sc->dev)); (void) listen_hash_del(sc, inp); inp = release_lctx(sc, lctx); /* can't be freed, host stack has a reference */ KASSERT(inp != NULL, ("%s: inp freed", __func__)); goto done; } lctx->flags |= LCTX_RPL_PENDING; done: #if 0 ADAPTER_UNLOCK(sc); #endif return (0); } int t4_listen_stop(struct toedev *tod, struct tcpcb *tp) { struct listen_ctx *lctx; struct adapter *sc = tod->tod_softc; struct inpcb *inp = tp->t_inpcb; struct synq_entry *synqe; INP_WLOCK_ASSERT(inp); lctx = listen_hash_del(sc, inp); if (lctx == NULL) return (ENOENT); /* no hardware listener for this inp */ CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid, lctx, lctx->flags); /* * If the reply to the PASS_OPEN is still pending we'll wait for it to * arrive and clean up when it does. */ if (lctx->flags & LCTX_RPL_PENDING) { KASSERT(TAILQ_EMPTY(&lctx->synq), ("%s: synq not empty.", __func__)); return (EINPROGRESS); } /* * The host stack will abort all the connections on the listening * socket's so_comp. It doesn't know about the connections on the synq * so we need to take care of those. */ TAILQ_FOREACH(synqe, &lctx->synq, link) send_reset_synqe(tod, synqe); destroy_server(sc, lctx); return (0); } static inline void hold_synqe(struct synq_entry *synqe) { refcount_acquire(&synqe->refcnt); } static inline void release_synqe(struct synq_entry *synqe) { if (refcount_release(&synqe->refcnt)) { int needfree = synqe_flag(synqe, TPF_SYNQE_NEEDFREE); m_freem(synqe->syn); if (needfree) free(synqe, M_CXGBE); } } void t4_syncache_added(struct toedev *tod __unused, void *arg) { struct synq_entry *synqe = arg; hold_synqe(synqe); } void t4_syncache_removed(struct toedev *tod __unused, void *arg) { struct synq_entry *synqe = arg; release_synqe(synqe); } /* XXX */ extern void tcp_dooptions(struct tcpopt *, u_char *, int, int); int t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m) { struct adapter *sc = tod->tod_softc; struct synq_entry *synqe = arg; struct wrqe *wr; struct l2t_entry *e; struct tcpopt to; struct ip *ip = mtod(m, struct ip *); struct tcphdr *th = (void *)(ip + 1); wr = (struct wrqe *)atomic_readandclear_ptr(&synqe->wr); if (wr == NULL) return (EALREADY); bzero(&to, sizeof(to)); tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th), TO_SYN); /* save these for later */ synqe->iss = be32toh(th->th_seq); synqe->ts = to.to_tsval; e = &sc->l2t->l2tab[synqe->l2e_idx]; t4_l2t_send(sc, wr, e); m_freem(m); /* don't need this any more */ return (0); } static int do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_pass_open_rpl *cpl = (const void *)(rss + 1); int stid = GET_TID(cpl); unsigned int status = cpl->status; struct listen_ctx *lctx = lookup_stid(sc, stid); struct inpcb *inp = lctx->inp; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_PASS_OPEN_RPL, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); INP_WLOCK(inp); CTR4(KTR_CXGBE, "%s: stid %d, status %u, flags 0x%x", __func__, stid, status, lctx->flags); lctx->flags &= ~LCTX_RPL_PENDING; if (status != CPL_ERR_NONE) log(LOG_ERR, "listener with stid %u failed: %d", stid, status); #ifdef INVARIANTS /* * If the inp has been dropped (listening socket closed) then * listen_stop must have run and taken the inp out of the hash. */ if (inp->inp_flags & INP_DROPPED) { KASSERT(listen_hash_del(sc, inp) == NULL, ("%s: inp %p still in listen hash", __func__, inp)); } #endif if (inp->inp_flags & INP_DROPPED && status != CPL_ERR_NONE) { if (release_lctx(sc, lctx) != NULL) INP_WUNLOCK(inp); return (status); } /* * Listening socket stopped listening earlier and now the chip tells us * it has started the hardware listener. Stop it; the lctx will be * released in do_close_server_rpl. */ if (inp->inp_flags & INP_DROPPED) { destroy_server(sc, lctx); INP_WUNLOCK(inp); return (status); } /* * Failed to start hardware listener. Take inp out of the hash and * release our reference on it. An error message has been logged * already. */ if (status != CPL_ERR_NONE) { listen_hash_del(sc, inp); if (release_lctx(sc, lctx) != NULL) INP_WUNLOCK(inp); return (status); } /* hardware listener open for business */ INP_WUNLOCK(inp); return (status); } static int do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_close_listsvr_rpl *cpl = (const void *)(rss + 1); int stid = GET_TID(cpl); unsigned int status = cpl->status; struct listen_ctx *lctx = lookup_stid(sc, stid); struct inpcb *inp = lctx->inp; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_CLOSE_LISTSRV_RPL, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status); if (status != CPL_ERR_NONE) { log(LOG_ERR, "%s: failed (%u) to close listener for stid %u", __func__, status, stid); return (status); } INP_WLOCK(inp); inp = release_lctx(sc, lctx); if (inp != NULL) INP_WUNLOCK(inp); return (status); } static void done_with_synqe(struct adapter *sc, struct synq_entry *synqe) { struct listen_ctx *lctx = synqe->lctx; struct inpcb *inp = lctx->inp; struct port_info *pi = synqe->syn->m_pkthdr.rcvif->if_softc; struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx]; INP_WLOCK_ASSERT(inp); TAILQ_REMOVE(&lctx->synq, synqe, link); inp = release_lctx(sc, lctx); if (inp) INP_WUNLOCK(inp); remove_tid(sc, synqe->tid); release_tid(sc, synqe->tid, &sc->sge.ctrlq[pi->port_id]); t4_l2t_release(e); release_synqe(synqe); /* removed from synq list */ } int do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct synq_entry *synqe = lookup_tid(sc, tid); struct listen_ctx *lctx = synqe->lctx; struct inpcb *inp = lctx->inp; int txqid; struct sge_wrq *ofld_txq; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_ABORT_REQ_RSS, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__)); CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d", __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status); if (cpl->status == CPL_ERR_RTX_NEG_ADVICE || cpl->status == CPL_ERR_PERSIST_NEG_ADVICE) return (0); /* Ignore negative advice */ INP_WLOCK(inp); get_qids_from_mbuf(synqe->syn, &txqid, NULL); ofld_txq = &sc->sge.ofld_txq[txqid]; /* * If we'd initiated an abort earlier the reply to it is responsible for * cleaning up resources. Otherwise we tear everything down right here * right now. We owe the T4 a CPL_ABORT_RPL no matter what. */ if (synqe_flag(synqe, TPF_ABORT_SHUTDOWN)) { INP_WUNLOCK(inp); goto done; } done_with_synqe(sc, synqe); /* inp lock released by done_with_synqe */ done: send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); return (0); } int do_abort_rpl_synqe(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct synq_entry *synqe = lookup_tid(sc, tid); struct listen_ctx *lctx = synqe->lctx; struct inpcb *inp = lctx->inp; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_ABORT_RPL_RSS, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__)); CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d", __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status); INP_WLOCK(inp); KASSERT(synqe_flag(synqe, TPF_ABORT_SHUTDOWN), ("%s: wasn't expecting abort reply for synqe %p (0x%x)", __func__, synqe, synqe->flags)); done_with_synqe(sc, synqe); /* inp lock released by done_with_synqe */ return (0); } void t4_offload_socket(struct toedev *tod, void *arg, struct socket *so) { struct adapter *sc = tod->tod_softc; struct synq_entry *synqe = arg; #ifdef INVARIANTS struct inpcb *inp = sotoinpcb(so); #endif struct cpl_pass_establish *cpl = mtod(synqe->syn, void *); struct toepcb *toep = *(struct toepcb **)(cpl + 1); INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */ INP_WLOCK_ASSERT(inp); KASSERT(synqe_flag(synqe, TPF_SYNQE), ("%s: %p not a synq_entry?", __func__, arg)); offload_socket(so, toep); make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt); toepcb_set_flag(toep, TPF_CPL_PENDING); update_tid(sc, synqe->tid, toep); } static inline void save_qids_in_mbuf(struct mbuf *m, struct port_info *pi) { uint32_t txqid, rxqid; txqid = (arc4random() % pi->nofldtxq) + pi->first_ofld_txq; rxqid = (arc4random() % pi->nofldrxq) + pi->first_ofld_rxq; m->m_pkthdr.flowid = (txqid << 16) | (rxqid & 0xffff); } static inline void get_qids_from_mbuf(struct mbuf *m, int *txqid, int *rxqid) { if (txqid) *txqid = m->m_pkthdr.flowid >> 16; if (rxqid) *rxqid = m->m_pkthdr.flowid & 0xffff; } /* * Use the trailing space in the mbuf in which the PASS_ACCEPT_REQ arrived to * store some state temporarily. */ static struct synq_entry * mbuf_to_synqe(struct mbuf *m) { int len = roundup(sizeof (struct synq_entry), 8); int tspace = M_TRAILINGSPACE(m); struct synq_entry *synqe = NULL; if (tspace < len) { synqe = malloc(sizeof(*synqe), M_CXGBE, M_NOWAIT); if (synqe == NULL) return (NULL); } else synqe = (void *)(m->m_data + m->m_len + tspace - sizeof(*synqe)); synqe->flags = 0; synqe_set_flag(synqe, TPF_SYNQE); if (tspace < len) synqe_set_flag(synqe, TPF_SYNQE_NEEDFREE); return (synqe); } static void t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to) { bzero(to, sizeof(*to)); if (t4opt->mss) { to->to_flags |= TOF_MSS; to->to_mss = be16toh(t4opt->mss); } if (t4opt->wsf) { to->to_flags |= TOF_SCALE; to->to_wscale = t4opt->wsf; } if (t4opt->tstamp) to->to_flags |= TOF_TS; if (t4opt->sack) to->to_flags |= TOF_SACKPERM; } /* * Options2 for passive open. */ static uint32_t calc_opt2p(struct adapter *sc, struct port_info *pi, int rxqid, - const struct tcp_options *tcpopt, struct tcphdr *th) + const struct tcp_options *tcpopt, struct tcphdr *th, int ulp_mode) { uint32_t opt2 = 0; struct sge_ofld_rxq *ofld_rxq = &sc->sge.ofld_rxq[rxqid]; if (V_tcp_do_rfc1323) { if (tcpopt->tstamp) opt2 |= F_TSTAMPS_EN; if (tcpopt->sack) opt2 |= F_SACK_EN; if (tcpopt->wsf > 0) opt2 |= F_WND_SCALE_EN; } if (V_tcp_do_ecn && th->th_flags & (TH_ECE | TH_CWR)) opt2 |= F_CCTRL_ECN; opt2 |= V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]); opt2 |= F_RX_COALESCE_VALID | V_RX_COALESCE(M_RX_COALESCE); opt2 |= F_RSS_QUEUE_VALID | V_RSS_QUEUE(ofld_rxq->iq.abs_id); +#ifdef USE_DDP_RX_FLOW_CONTROL + if (ulp_mode == ULP_MODE_TCPDDP) + opt2 |= F_RX_FC_VALID | F_RX_FC_DDP; +#endif + return htobe32(opt2); } /* XXX: duplication. */ static inline void tcp_fields_to_host(struct tcphdr *th) { th->th_seq = ntohl(th->th_seq); th->th_ack = ntohl(th->th_ack); th->th_win = ntohs(th->th_win); th->th_urp = ntohs(th->th_urp); } static void pass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc, struct tcphdr *th) { const struct cpl_pass_accept_req *cpl = mtod(m, const void *); const struct ether_header *eh; unsigned int hlen = be32toh(cpl->hdr_len); const struct ip *ip; const struct tcphdr *tcp; eh = (const void *)(cpl + 1); ip = (const void *)((uintptr_t)eh + G_ETH_HDR_LEN(hlen)); tcp = (const void *)((uintptr_t)ip + G_IP_HDR_LEN(hlen)); if (inc) { bzero(inc, sizeof(*inc)); inc->inc_faddr = ip->ip_src; inc->inc_laddr = ip->ip_dst; inc->inc_fport = tcp->th_sport; inc->inc_lport = tcp->th_dport; if (ip->ip_v == 6) inc->inc_flags |= INC_ISIPV6; } if (th) { bcopy(tcp, th, sizeof(*th)); tcp_fields_to_host(th); /* just like tcp_input */ } } #define REJECT_PASS_ACCEPT() do { \ reject_reason = __LINE__; \ goto reject; \ } while (0) /* * The context associated with a tid entry via insert_tid could be a synq_entry * or a toepcb. The only way CPL handlers can tell is via a bit in these flags. */ CTASSERT(offsetof(struct toepcb, flags) == offsetof(struct synq_entry, flags)); /* * Incoming SYN on a listening socket. * * XXX: Every use of ifp in this routine has a bad race with up/down, toe/-toe, * etc. */ static int do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; struct toedev *tod; const struct cpl_pass_accept_req *cpl = mtod(m, const void *); struct cpl_pass_accept_rpl *rpl; struct wrqe *wr; unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid)); unsigned int tid = GET_TID(cpl); struct listen_ctx *lctx = lookup_stid(sc, stid); struct inpcb *inp; struct socket *so; struct in_conninfo inc; struct tcphdr th; struct tcpopt to; struct port_info *pi; struct ifnet *ifp, *ifp_vlan = NULL; struct l2t_entry *e = NULL; struct rtentry *rt; struct sockaddr_in nam; - int rscale, mtu_idx, rx_credits, rxqid; + int rscale, mtu_idx, rx_credits, rxqid, ulp_mode; struct synq_entry *synqe = NULL; int reject_reason; uint16_t vid; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_PASS_ACCEPT_REQ, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid, lctx); pass_accept_req_to_protohdrs(m, &inc, &th); t4opt_to_tcpopt(&cpl->tcpopt, &to); pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))]; ifp = pi->ifp; m->m_pkthdr.rcvif = ifp; tod = TOEDEV(ifp); /* * Don't offload if the interface that received the SYN doesn't have * IFCAP_TOE enabled. */ if ((ifp->if_capenable & IFCAP_TOE4) == 0) REJECT_PASS_ACCEPT(); /* Don't offload IPv6 connections. XXX: add IPv6 support */ if (inc.inc_flags & INC_ISIPV6) REJECT_PASS_ACCEPT(); /* * Don't offload if the SYN had a VLAN tag and the vid doesn't match * anything on this interface. */ vid = EVL_VLANOFTAG(be16toh(cpl->vlan)); if (vid != 0xfff) { ifp_vlan = VLAN_DEVAT(ifp, vid); if (ifp_vlan == NULL) REJECT_PASS_ACCEPT(); } /* * Don't offload if the peer requested a TCP option that's not known to * the silicon. */ if (cpl->tcpopt.unknown) REJECT_PASS_ACCEPT(); /* * Don't offload if the outgoing interface for the route back to the * peer is not the same as the interface that received the SYN. * XXX: too restrictive. */ nam.sin_len = sizeof(nam); nam.sin_family = AF_INET; nam.sin_addr = inc.inc_faddr; rt = rtalloc1((struct sockaddr *)&nam, 0, 0); if (rt == NULL) REJECT_PASS_ACCEPT(); else { struct sockaddr *nexthop; RT_UNLOCK(rt); nexthop = rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : (struct sockaddr *)&nam; if (rt->rt_ifp == ifp || (ifp_vlan != NULL && rt->rt_ifp == ifp_vlan)) e = t4_l2t_get(pi, rt->rt_ifp, nexthop); RTFREE(rt); if (e == NULL) REJECT_PASS_ACCEPT(); /* no l2te, or ifp mismatch */ } synqe = mbuf_to_synqe(m); if (synqe == NULL) REJECT_PASS_ACCEPT(); wr = alloc_wrqe(sizeof(*rpl), &sc->sge.ctrlq[pi->port_id]); if (wr == NULL) REJECT_PASS_ACCEPT(); rpl = wrtod(wr); INP_INFO_WLOCK(&V_tcbinfo); /* for 4-tuple check, syncache_add */ /* Don't offload if the 4-tuple is already in use */ if (toe_4tuple_check(&inc, &th, ifp) != 0) { INP_INFO_WUNLOCK(&V_tcbinfo); free(wr, M_CXGBE); REJECT_PASS_ACCEPT(); } inp = lctx->inp; /* listening socket, not owned by TOE */ INP_WLOCK(inp); /* Don't offload if the listening socket has closed */ if (__predict_false(inp->inp_flags & INP_DROPPED)) { /* * The listening socket has closed. The reply from the TOE to * our CPL_CLOSE_LISTSRV_REQ will ultimately release all * resources tied to this listen context. */ INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); free(wr, M_CXGBE); REJECT_PASS_ACCEPT(); } so = inp->inp_socket; mtu_idx = find_best_mtu_idx(sc, &inc, be16toh(cpl->tcpopt.mss)); rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0; SOCKBUF_LOCK(&so->so_rcv); /* opt0 rcv_bufsiz initially, assumes its normal meaning later */ rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ); SOCKBUF_UNLOCK(&so->so_rcv); save_qids_in_mbuf(m, pi); get_qids_from_mbuf(m, NULL, &rxqid); INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid); - rpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, rx_credits, - ULP_MODE_NONE); - rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th); + if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) { + ulp_mode = ULP_MODE_TCPDDP; + synqe_set_flag(synqe, TPF_SYNQE_TCPDDP); + } else + ulp_mode = ULP_MODE_NONE; + rpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, rx_credits, ulp_mode); + rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th, ulp_mode); synqe->tid = tid; synqe->lctx = lctx; synqe->syn = m; m = NULL; refcount_init(&synqe->refcnt, 1); /* 1 so that it is held for the duration of this function */ synqe->l2e_idx = e->idx; synqe->rcv_bufsize = rx_credits; atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr); insert_tid(sc, tid, synqe); TAILQ_INSERT_TAIL(&lctx->synq, synqe, link); hold_synqe(synqe); /* hold for the duration it's in the synq */ hold_lctx(lctx); /* A synqe on the list has a ref on its lctx */ /* * If all goes well t4_syncache_respond will get called during * syncache_add. Also note that syncache_add releases both pcbinfo and * pcb locks. */ toe_syncache_add(&inc, &to, &th, inp, tod, synqe); INP_UNLOCK_ASSERT(inp); /* ok to assert, we have a ref on the inp */ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); /* * If we replied during syncache_add (synqe->wr has been consumed), * good. Otherwise, set it to 0 so that further syncache_respond * attempts by the kernel will be ignored. * * The extra hold on the synqe makes sure that it is still around, even * if the listener has been dropped and the synqe was aborted and the * reply to the abort has removed and released the synqe from the synq * list. */ if (atomic_cmpset_ptr(&synqe->wr, (uintptr_t)wr, 0)) { INP_WLOCK(inp); if (__predict_false(inp->inp_flags & INP_DROPPED)) { /* listener closed. synqe must have been aborted. */ KASSERT(synqe_flag(synqe, TPF_ABORT_SHUTDOWN), ("%s: listener %p closed but synqe %p not aborted", __func__, inp, synqe)); CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, ABORTED", __func__, stid, tid, lctx, synqe); INP_WUNLOCK(inp); free(wr, M_CXGBE); release_synqe(synqe); /* about to exit function */ return (__LINE__); } /* * synqe aborted before TOM replied to PASS_ACCEPT_REQ. But * that can only happen if the listener was closed and we just * checked for that. */ KASSERT(!synqe_flag(synqe, TPF_ABORT_SHUTDOWN), ("%s: synqe %p aborted, but listener %p not dropped.", __func__, synqe, inp)); /* Yank the synqe out of the lctx synq. */ TAILQ_REMOVE(&lctx->synq, synqe, link); release_synqe(synqe); /* removed from synq list */ inp = release_lctx(sc, lctx); if (inp) INP_WUNLOCK(inp); /* * syncache may or may not have a hold on the synqe, which may * or may not be stashed in the original SYN mbuf passed to us. * Just copy it over instead of dealing with all possibilities. */ m = m_dup(synqe->syn, M_DONTWAIT); if (m) m->m_pkthdr.rcvif = ifp; release_synqe(synqe); /* about to exit function */ free(wr, M_CXGBE); REJECT_PASS_ACCEPT(); } release_synqe(synqe); /* about to exit function */ CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, SYNACK", __func__, stid, tid, lctx, synqe); return (0); reject: CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid, reject_reason); if (e) t4_l2t_release(e); release_tid(sc, tid, lctx->ctrlq); if (__predict_true(m != NULL)) { m_adj(m, sizeof(*cpl)); m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; ifp->if_input(ifp, m); } return (reject_reason); } static void synqe_to_protohdrs(struct synq_entry *synqe, const struct cpl_pass_establish *cpl, struct in_conninfo *inc, struct tcphdr *th, struct tcpopt *to) { uint16_t tcp_opt = be16toh(cpl->tcp_opt); /* start off with the original SYN */ pass_accept_req_to_protohdrs(synqe->syn, inc, th); /* modify parts to make it look like the ACK to our SYN|ACK */ th->th_flags = TH_ACK; th->th_ack = synqe->iss + 1; th->th_seq = be32toh(cpl->rcv_isn); bzero(to, sizeof(*to)); if (G_TCPOPT_TSTAMP(tcp_opt)) { to->to_flags |= TOF_TS; to->to_tsecr = synqe->ts; } } static int do_pass_establish(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; struct port_info *pi; struct ifnet *ifp; const struct cpl_pass_establish *cpl = (const void *)(rss + 1); #if defined(KTR) || defined(INVARIANTS) unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid)); #endif unsigned int tid = GET_TID(cpl); struct synq_entry *synqe = lookup_tid(sc, tid); struct listen_ctx *lctx = synqe->lctx; struct inpcb *inp = lctx->inp; struct socket *so; struct tcphdr th; struct tcpopt to; struct in_conninfo inc; struct toepcb *toep; u_int txqid, rxqid; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_PASS_ESTABLISH, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); KASSERT(synqe_flag(synqe, TPF_SYNQE), ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe)); INP_INFO_WLOCK(&V_tcbinfo); /* for syncache_expand */ INP_WLOCK(inp); CTR6(KTR_CXGBE, "%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x", __func__, stid, tid, synqe, synqe->flags, inp->inp_flags); if (__predict_false(inp->inp_flags & INP_DROPPED)) { /* * The listening socket has closed. The TOM must have aborted * all the embryonic connections (including this one) that were * on the lctx's synq. do_abort_rpl for the tid is responsible * for cleaning up. */ KASSERT(synqe_flag(synqe, TPF_ABORT_SHUTDOWN), ("%s: listen socket dropped but tid %u not aborted.", __func__, tid)); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); return (0); } ifp = synqe->syn->m_pkthdr.rcvif; pi = ifp->if_softc; KASSERT(pi->adapter == sc, ("%s: pi %p, sc %p mismatch", __func__, pi, sc)); get_qids_from_mbuf(synqe->syn, &txqid, &rxqid); KASSERT(rxqid == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0], ("%s: CPL arrived on unexpected rxq. %d %d", __func__, rxqid, (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0]))); toep = alloc_toepcb(pi, txqid, rxqid, M_NOWAIT); if (toep == NULL) { reset: /* The reply to this abort will perform final cleanup */ send_reset_synqe(TOEDEV(ifp), synqe); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); return (0); } toep->tid = tid; toep->l2te = &sc->l2t->l2tab[synqe->l2e_idx]; - toep->ulp_mode = ULP_MODE_NONE; + if (synqe_flag(synqe, TPF_SYNQE_TCPDDP)) + set_tcpddp_ulp_mode(toep); + else + toep->ulp_mode = ULP_MODE_NONE; /* opt0 rcv_bufsiz initially, assumes its normal meaning later */ toep->rx_credits = synqe->rcv_bufsize; so = inp->inp_socket; KASSERT(so != NULL, ("%s: socket is NULL", __func__)); /* Come up with something that syncache_expand should be ok with. */ synqe_to_protohdrs(synqe, cpl, &inc, &th, &to); /* * No more need for anything in the mbuf that carried the * CPL_PASS_ACCEPT_REQ. Drop the CPL_PASS_ESTABLISH and toep pointer * there. XXX: bad form but I don't want to increase the size of synqe. */ m = synqe->syn; KASSERT(sizeof(*cpl) + sizeof(toep) <= m->m_len, ("%s: no room in mbuf %p (m_len %d)", __func__, m, m->m_len)); bcopy(cpl, mtod(m, void *), sizeof(*cpl)); *(struct toepcb **)(mtod(m, struct cpl_pass_establish *) + 1) = toep; if (!toe_syncache_expand(&inc, &to, &th, &so) || so == NULL) { free_toepcb(toep); goto reset; } /* Done with the synqe */ TAILQ_REMOVE(&lctx->synq, synqe, link); inp = release_lctx(sc, lctx); if (inp != NULL) INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); release_synqe(synqe); return (0); } void t4_init_listen_cpl_handlers(struct adapter *sc) { t4_register_cpl_handler(sc, CPL_PASS_OPEN_RPL, do_pass_open_rpl); t4_register_cpl_handler(sc, CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl); t4_register_cpl_handler(sc, CPL_PASS_ACCEPT_REQ, do_pass_accept_req); t4_register_cpl_handler(sc, CPL_PASS_ESTABLISH, do_pass_establish); } #endif Index: head/sys/dev/cxgbe/tom/t4_tom.c =================================================================== --- head/sys/dev/cxgbe/tom/t4_tom.c (revision 239343) +++ head/sys/dev/cxgbe/tom/t4_tom.c (revision 239344) @@ -1,755 +1,777 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #ifdef TCP_OFFLOAD #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" +static struct protosw ddp_protosw; +static struct pr_usrreqs ddp_usrreqs; + /* Module ops */ static int t4_tom_mod_load(void); static int t4_tom_mod_unload(void); static int t4_tom_modevent(module_t, int, void *); /* ULD ops and helpers */ static int t4_tom_activate(struct adapter *); static int t4_tom_deactivate(struct adapter *); static struct uld_info tom_uld_info = { .uld_id = ULD_TOM, .activate = t4_tom_activate, .deactivate = t4_tom_deactivate, }; static void queue_tid_release(struct adapter *, int); static void release_offload_resources(struct toepcb *); static int alloc_tid_tabs(struct tid_info *); static void free_tid_tabs(struct tid_info *); static void free_tom_data(struct adapter *, struct tom_data *); struct toepcb * alloc_toepcb(struct port_info *pi, int txqid, int rxqid, int flags) { struct adapter *sc = pi->adapter; struct toepcb *toep; int tx_credits, txsd_total, len; /* * The firmware counts tx work request credits in units of 16 bytes * each. Reserve room for an ABORT_REQ so the driver never has to worry * about tx credits if it wants to abort a connection. */ tx_credits = sc->params.ofldq_wr_cred; tx_credits -= howmany(sizeof(struct cpl_abort_req), 16); /* * Shortest possible tx work request is a fw_ofld_tx_data_wr + 1 byte * immediate payload, and firmware counts tx work request credits in * units of 16 byte. Calculate the maximum work requests possible. */ txsd_total = tx_credits / howmany((sizeof(struct fw_ofld_tx_data_wr) + 1), 16); if (txqid < 0) txqid = (arc4random() % pi->nofldtxq) + pi->first_ofld_txq; KASSERT(txqid >= pi->first_ofld_txq && txqid < pi->first_ofld_txq + pi->nofldtxq, ("%s: txqid %d for port %p (first %d, n %d)", __func__, txqid, pi, pi->first_ofld_txq, pi->nofldtxq)); if (rxqid < 0) rxqid = (arc4random() % pi->nofldrxq) + pi->first_ofld_rxq; KASSERT(rxqid >= pi->first_ofld_rxq && rxqid < pi->first_ofld_rxq + pi->nofldrxq, ("%s: rxqid %d for port %p (first %d, n %d)", __func__, rxqid, pi, pi->first_ofld_rxq, pi->nofldrxq)); len = offsetof(struct toepcb, txsd) + txsd_total * sizeof(struct ofld_tx_sdesc); toep = malloc(len, M_CXGBE, M_ZERO | flags); if (toep == NULL) return (NULL); toep->td = sc->tom_softc; toep->port = pi; toep->tx_credits = tx_credits; toep->ofld_txq = &sc->sge.ofld_txq[txqid]; toep->ofld_rxq = &sc->sge.ofld_rxq[rxqid]; toep->ctrlq = &sc->sge.ctrlq[pi->port_id]; toep->txsd_total = txsd_total; toep->txsd_avail = txsd_total; toep->txsd_pidx = 0; toep->txsd_cidx = 0; return (toep); } void free_toepcb(struct toepcb *toep) { KASSERT(toepcb_flag(toep, TPF_ATTACHED) == 0, ("%s: attached to an inpcb", __func__)); KASSERT(toepcb_flag(toep, TPF_CPL_PENDING) == 0, ("%s: CPL pending", __func__)); free(toep, M_CXGBE); } /* * Set up the socket for TCP offload. */ void offload_socket(struct socket *so, struct toepcb *toep) { struct tom_data *td = toep->td; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); struct sockbuf *sb; INP_WLOCK_ASSERT(inp); /* Update socket */ sb = &so->so_snd; SOCKBUF_LOCK(sb); sb->sb_flags |= SB_NOCOALESCE; SOCKBUF_UNLOCK(sb); sb = &so->so_rcv; SOCKBUF_LOCK(sb); sb->sb_flags |= SB_NOCOALESCE; + if (toep->ulp_mode == ULP_MODE_TCPDDP) + so->so_proto = &ddp_protosw; SOCKBUF_UNLOCK(sb); /* Update TCP PCB */ tp->tod = &td->tod; tp->t_toe = toep; tp->t_flags |= TF_TOE; /* Install an extra hold on inp */ toep->inp = inp; toepcb_set_flag(toep, TPF_ATTACHED); in_pcbref(inp); /* Add the TOE PCB to the active list */ mtx_lock(&td->toep_list_lock); TAILQ_INSERT_HEAD(&td->toep_list, toep, link); mtx_unlock(&td->toep_list_lock); } /* This is _not_ the normal way to "unoffload" a socket. */ void undo_offload_socket(struct socket *so) { struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); struct toepcb *toep = tp->t_toe; struct tom_data *td = toep->td; struct sockbuf *sb; INP_WLOCK_ASSERT(inp); sb = &so->so_snd; SOCKBUF_LOCK(sb); sb->sb_flags &= ~SB_NOCOALESCE; SOCKBUF_UNLOCK(sb); sb = &so->so_rcv; SOCKBUF_LOCK(sb); sb->sb_flags &= ~SB_NOCOALESCE; SOCKBUF_UNLOCK(sb); tp->tod = NULL; tp->t_toe = NULL; tp->t_flags &= ~TF_TOE; toep->inp = NULL; toepcb_clr_flag(toep, TPF_ATTACHED); if (in_pcbrele_wlocked(inp)) panic("%s: inp freed.", __func__); mtx_lock(&td->toep_list_lock); TAILQ_REMOVE(&td->toep_list, toep, link); mtx_unlock(&td->toep_list_lock); } static void release_offload_resources(struct toepcb *toep) { struct tom_data *td = toep->td; struct adapter *sc = td_adapter(td); int tid = toep->tid; KASSERT(toepcb_flag(toep, TPF_CPL_PENDING) == 0, ("%s: %p has CPL pending.", __func__, toep)); KASSERT(toepcb_flag(toep, TPF_ATTACHED) == 0, ("%s: %p is still attached.", __func__, toep)); CTR4(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p)", __func__, toep, tid, toep->l2te); + if (toep->ulp_mode == ULP_MODE_TCPDDP) + release_ddp_resources(toep); + if (toep->l2te) t4_l2t_release(toep->l2te); if (tid >= 0) { remove_tid(sc, tid); release_tid(sc, tid, toep->ctrlq); } mtx_lock(&td->toep_list_lock); TAILQ_REMOVE(&td->toep_list, toep, link); mtx_unlock(&td->toep_list_lock); free_toepcb(toep); } /* * The kernel is done with the TCP PCB and this is our opportunity to unhook the * toepcb hanging off of it. If the TOE driver is also done with the toepcb (no * pending CPL) then it is time to release all resources tied to the toepcb. * * Also gets called when an offloaded active open fails and the TOM wants the * kernel to take the TCP PCB back. */ static void t4_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp) { #if defined(KTR) || defined(INVARIANTS) struct inpcb *inp = tp->t_inpcb; #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); KASSERT(toepcb_flag(toep, TPF_ATTACHED), ("%s: not attached", __func__)); #ifdef KTR if (tp->t_state == TCPS_SYN_SENT) { CTR6(KTR_CXGBE, "%s: atid %d, toep %p (0x%x), inp %p (0x%x)", __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags); } else { CTR6(KTR_CXGBE, "t4_pcb_detach: tid %d (%s), toep %p (0x%x), inp %p (0x%x)", toep->tid, tcpstates[tp->t_state], toep, toep->flags, inp, inp->inp_flags); } #endif tp->t_toe = NULL; tp->t_flags &= ~TF_TOE; toepcb_clr_flag(toep, TPF_ATTACHED); if (toepcb_flag(toep, TPF_CPL_PENDING) == 0) release_offload_resources(toep); } /* * The TOE driver will not receive any more CPLs for the tid associated with the * toepcb; release the hold on the inpcb. */ void final_cpl_received(struct toepcb *toep) { struct inpcb *inp = toep->inp; KASSERT(inp != NULL, ("%s: inp is NULL", __func__)); INP_WLOCK_ASSERT(inp); KASSERT(toepcb_flag(toep, TPF_CPL_PENDING), ("%s: CPL not pending already?", __func__)); CTR6(KTR_CXGBE, "%s: tid %d, toep %p (0x%x), inp %p (0x%x)", __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags); toep->inp = NULL; toepcb_clr_flag(toep, TPF_CPL_PENDING); if (toepcb_flag(toep, TPF_ATTACHED) == 0) release_offload_resources(toep); if (!in_pcbrele_wlocked(inp)) INP_WUNLOCK(inp); } void insert_tid(struct adapter *sc, int tid, void *ctx) { struct tid_info *t = &sc->tids; t->tid_tab[tid] = ctx; atomic_add_int(&t->tids_in_use, 1); } void * lookup_tid(struct adapter *sc, int tid) { struct tid_info *t = &sc->tids; return (t->tid_tab[tid]); } void update_tid(struct adapter *sc, int tid, void *ctx) { struct tid_info *t = &sc->tids; t->tid_tab[tid] = ctx; } void remove_tid(struct adapter *sc, int tid) { struct tid_info *t = &sc->tids; t->tid_tab[tid] = NULL; atomic_subtract_int(&t->tids_in_use, 1); } void release_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq) { struct wrqe *wr; struct cpl_tid_release *req; wr = alloc_wrqe(sizeof(*req), ctrlq); if (wr == NULL) { queue_tid_release(sc, tid); /* defer */ return; } req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid); t4_wrq_tx(sc, wr); } static void queue_tid_release(struct adapter *sc, int tid) { CXGBE_UNIMPLEMENTED("deferred tid release"); } /* * What mtu_idx to use, given a 4-tuple and/or an MSS cap */ int find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss) { unsigned short *mtus = &sc->params.mtus[0]; int i = 0, mss; KASSERT(inc != NULL || pmss > 0, ("%s: at least one of inc/pmss must be specified", __func__)); mss = inc ? tcp_mssopt(inc) : pmss; if (pmss > 0 && mss > pmss) mss = pmss; while (i < NMTUS - 1 && mtus[i + 1] <= mss + 40) ++i; return (i); } /* * Determine the receive window size for a socket. */ u_long select_rcv_wnd(struct socket *so) { unsigned long wnd; SOCKBUF_LOCK_ASSERT(&so->so_rcv); wnd = sbspace(&so->so_rcv); if (wnd < MIN_RCV_WND) wnd = MIN_RCV_WND; return min(wnd, MAX_RCV_WND); } int select_rcv_wscale(void) { int wscale = 0; unsigned long space = sb_max; if (space > MAX_RCV_WND) space = MAX_RCV_WND; while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space) wscale++; return (wscale); } extern int always_keepalive; #define VIID_SMACIDX(v) (((unsigned int)(v) & 0x7f) << 1) /* * socket so could be a listening socket too. */ uint64_t calc_opt0(struct socket *so, struct port_info *pi, struct l2t_entry *e, int mtu_idx, int rscale, int rx_credits, int ulp_mode) { uint64_t opt0; KASSERT(rx_credits <= M_RCV_BUFSIZ, ("%s: rcv_bufsiz too high", __func__)); opt0 = F_TCAM_BYPASS | V_WND_SCALE(rscale) | V_MSS_IDX(mtu_idx) | V_ULP_MODE(ulp_mode) | V_RCV_BUFSIZ(rx_credits); if (so != NULL) { struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); int keepalive = always_keepalive || so_options_get(so) & SO_KEEPALIVE; opt0 |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0); opt0 |= V_KEEP_ALIVE(keepalive != 0); } if (e != NULL) opt0 |= V_L2T_IDX(e->idx); if (pi != NULL) { opt0 |= V_SMAC_SEL(VIID_SMACIDX(pi->viid)); opt0 |= V_TX_CHAN(pi->tx_chan); } return htobe64(opt0); } #define FILTER_SEL_WIDTH_P_FC (3 + 1) #define FILTER_SEL_WIDTH_VIN_P_FC (6 + 7 + FILTER_SEL_WIDTH_P_FC) #define FILTER_SEL_WIDTH_TAG_P_FC (3 + FILTER_SEL_WIDTH_VIN_P_FC) #define FILTER_SEL_WIDTH_VLD_TAG_P_FC (1 + FILTER_SEL_WIDTH_TAG_P_FC) #define VLAN_NONE 0xfff #define FILTER_SEL_VLAN_NONE 0xffff uint32_t select_ntuple(struct port_info *pi, struct l2t_entry *e, uint32_t filter_mode) { uint16_t viid = pi->viid; uint32_t ntuple = 0; if (filter_mode == HW_TPL_FR_MT_PR_IV_P_FC) { if (e->vlan == VLAN_NONE) ntuple |= FILTER_SEL_VLAN_NONE << FILTER_SEL_WIDTH_P_FC; else { ntuple |= e->vlan << FILTER_SEL_WIDTH_P_FC; ntuple |= 1 << FILTER_SEL_WIDTH_VLD_TAG_P_FC; } ntuple |= e->lport << S_PORT; ntuple |= IPPROTO_TCP << FILTER_SEL_WIDTH_VLD_TAG_P_FC; } else if (filter_mode == HW_TPL_FR_MT_PR_OV_P_FC) { ntuple |= G_FW_VIID_VIN(viid) << FILTER_SEL_WIDTH_P_FC; ntuple |= G_FW_VIID_PFN(viid) << FILTER_SEL_WIDTH_VIN_P_FC; ntuple |= G_FW_VIID_VIVLD(viid) << FILTER_SEL_WIDTH_TAG_P_FC; ntuple |= e->lport << S_PORT; ntuple |= IPPROTO_TCP << FILTER_SEL_WIDTH_VLD_TAG_P_FC; } return (htobe32(ntuple)); } static int alloc_tid_tabs(struct tid_info *t) { size_t size; unsigned int i; size = t->ntids * sizeof(*t->tid_tab) + t->natids * sizeof(*t->atid_tab) + t->nstids * sizeof(*t->stid_tab); t->tid_tab = malloc(size, M_CXGBE, M_ZERO | M_NOWAIT); if (t->tid_tab == NULL) return (ENOMEM); mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF); t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids]; t->afree = t->atid_tab; t->atids_in_use = 0; for (i = 1; i < t->natids; i++) t->atid_tab[i - 1].next = &t->atid_tab[i]; t->atid_tab[t->natids - 1].next = NULL; mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF); t->stid_tab = (union serv_entry *)&t->atid_tab[t->natids]; t->sfree = t->stid_tab; t->stids_in_use = 0; for (i = 1; i < t->nstids; i++) t->stid_tab[i - 1].next = &t->stid_tab[i]; t->stid_tab[t->nstids - 1].next = NULL; atomic_store_rel_int(&t->tids_in_use, 0); return (0); } static void free_tid_tabs(struct tid_info *t) { KASSERT(t->tids_in_use == 0, ("%s: %d tids still in use.", __func__, t->tids_in_use)); KASSERT(t->atids_in_use == 0, ("%s: %d atids still in use.", __func__, t->atids_in_use)); KASSERT(t->stids_in_use == 0, ("%s: %d tids still in use.", __func__, t->stids_in_use)); free(t->tid_tab, M_CXGBE); t->tid_tab = NULL; if (mtx_initialized(&t->atid_lock)) mtx_destroy(&t->atid_lock); if (mtx_initialized(&t->stid_lock)) mtx_destroy(&t->stid_lock); } static void free_tom_data(struct adapter *sc, struct tom_data *td) { KASSERT(TAILQ_EMPTY(&td->toep_list), ("%s: TOE PCB list is not empty.", __func__)); KASSERT(td->lctx_count == 0, ("%s: lctx hash table is not empty.", __func__)); t4_uninit_l2t_cpl_handlers(sc); + t4_uninit_cpl_io_handlers(sc); + t4_uninit_ddp(sc, td); if (td->listen_mask != 0) hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask); if (mtx_initialized(&td->lctx_hash_lock)) mtx_destroy(&td->lctx_hash_lock); if (mtx_initialized(&td->toep_list_lock)) mtx_destroy(&td->toep_list_lock); free_tid_tabs(&sc->tids); free(td, M_CXGBE); } /* * Ground control to Major TOM * Commencing countdown, engines on */ static int t4_tom_activate(struct adapter *sc) { struct tom_data *td; struct toedev *tod; int i, rc; ADAPTER_LOCK_ASSERT_OWNED(sc); /* for sc->flags */ /* per-adapter softc for TOM */ td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT); if (td == NULL) return (ENOMEM); /* List of TOE PCBs and associated lock */ mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF); TAILQ_INIT(&td->toep_list); /* Listen context */ mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF); td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGBE, &td->listen_mask, HASH_NOWAIT); /* TID tables */ rc = alloc_tid_tabs(&sc->tids); if (rc != 0) goto done; + t4_init_ddp(sc, td); + /* CPL handlers */ t4_init_connect_cpl_handlers(sc); t4_init_l2t_cpl_handlers(sc); t4_init_listen_cpl_handlers(sc); t4_init_cpl_io_handlers(sc); /* toedev ops */ tod = &td->tod; init_toedev(tod); tod->tod_softc = sc; tod->tod_connect = t4_connect; tod->tod_listen_start = t4_listen_start; tod->tod_listen_stop = t4_listen_stop; tod->tod_rcvd = t4_rcvd; tod->tod_output = t4_tod_output; tod->tod_send_rst = t4_send_rst; tod->tod_send_fin = t4_send_fin; tod->tod_pcb_detach = t4_pcb_detach; tod->tod_l2_update = t4_l2_update; tod->tod_syncache_added = t4_syncache_added; tod->tod_syncache_removed = t4_syncache_removed; tod->tod_syncache_respond = t4_syncache_respond; tod->tod_offload_socket = t4_offload_socket; for_each_port(sc, i) TOEDEV(sc->port[i]->ifp) = &td->tod; sc->tom_softc = td; sc->flags |= TOM_INIT_DONE; register_toedev(sc->tom_softc); done: if (rc != 0) free_tom_data(sc, td); return (rc); } static int t4_tom_deactivate(struct adapter *sc) { int rc = 0; struct tom_data *td = sc->tom_softc; ADAPTER_LOCK_ASSERT_OWNED(sc); /* for sc->flags */ if (td == NULL) return (0); /* XXX. KASSERT? */ if (sc->offload_map != 0) return (EBUSY); /* at least one port has IFCAP_TOE enabled */ mtx_lock(&td->toep_list_lock); if (!TAILQ_EMPTY(&td->toep_list)) rc = EBUSY; mtx_unlock(&td->toep_list_lock); mtx_lock(&td->lctx_hash_lock); if (td->lctx_count > 0) rc = EBUSY; mtx_unlock(&td->lctx_hash_lock); if (rc == 0) { unregister_toedev(sc->tom_softc); free_tom_data(sc, td); sc->tom_softc = NULL; sc->flags &= ~TOM_INIT_DONE; } return (rc); } static int t4_tom_mod_load(void) { int rc; + struct protosw *tcp_protosw; + + tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM); + if (tcp_protosw == NULL) + return (ENOPROTOOPT); + + bcopy(tcp_protosw, &ddp_protosw, sizeof(ddp_protosw)); + bcopy(tcp_protosw->pr_usrreqs, &ddp_usrreqs, sizeof(ddp_usrreqs)); + ddp_usrreqs.pru_soreceive = t4_soreceive_ddp; + ddp_protosw.pr_usrreqs = &ddp_usrreqs; rc = t4_register_uld(&tom_uld_info); if (rc != 0) t4_tom_mod_unload(); return (rc); } static void tom_uninit(struct adapter *sc, void *arg __unused) { /* Try to free resources (works only if no port has IFCAP_TOE) */ ADAPTER_LOCK(sc); if (sc->flags & TOM_INIT_DONE) t4_deactivate_uld(sc, ULD_TOM); ADAPTER_UNLOCK(sc); } static int t4_tom_mod_unload(void) { t4_iterate(tom_uninit, NULL); if (t4_unregister_uld(&tom_uld_info) == EBUSY) return (EBUSY); return (0); } #endif /* TCP_OFFLOAD */ static int t4_tom_modevent(module_t mod, int cmd, void *arg) { int rc = 0; #ifdef TCP_OFFLOAD switch (cmd) { case MOD_LOAD: rc = t4_tom_mod_load(); break; case MOD_UNLOAD: rc = t4_tom_mod_unload(); break; default: rc = EINVAL; } #else printf("t4_tom: compiled without TCP_OFFLOAD support.\n"); rc = EOPNOTSUPP; #endif return (rc); } static moduledata_t t4_tom_moddata= { "t4_tom", t4_tom_modevent, 0 }; MODULE_VERSION(t4_tom, 1); MODULE_DEPEND(t4_tom, toecore, 1, 1, 1); MODULE_DEPEND(t4_tom, t4nex, 1, 1, 1); DECLARE_MODULE(t4_tom, t4_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY); Index: head/sys/dev/cxgbe/tom/t4_tom.h =================================================================== --- head/sys/dev/cxgbe/tom/t4_tom.h (revision 239343) +++ head/sys/dev/cxgbe/tom/t4_tom.h (revision 239344) @@ -1,251 +1,316 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef __T4_TOM_H__ #define __T4_TOM_H__ #define KTR_CXGBE KTR_SPARE3 #define LISTEN_HASH_SIZE 32 /* * Min receive window. We want it to be large enough to accommodate receive * coalescing, handle jumbo frames, and not trigger sender SWS avoidance. */ #define MIN_RCV_WND (24 * 1024U) /* * Max receive window supported by HW in bytes. Only a small part of it can * be set through option0, the rest needs to be set through RX_DATA_ACK. */ #define MAX_RCV_WND ((1U << 27) - 1) +#define DDP_RSVD_WIN (16 * 1024U) +#define SB_DDP_INDICATE SB_IN_TOE /* soreceive must respond to indicate */ + +#define M_DDP M_PROTO1 + +#define USE_DDP_RX_FLOW_CONTROL + /* TOE PCB flags */ enum { TPF_ATTACHED, /* a tcpcb refers to this toepcb */ TPF_FLOWC_WR_SENT, /* firmware flow context WR sent */ TPF_TX_DATA_SENT, /* some data sent */ TPF_TX_SUSPENDED, /* tx suspended for lack of resources */ TPF_SEND_FIN, /* send FIN after sending all pending data */ TPF_FIN_SENT, /* FIN has been sent */ TPF_ABORT_SHUTDOWN, /* connection abort is in progress */ TPF_CPL_PENDING, /* haven't received the last CPL */ TPF_SYNQE, /* synq_entry, not really a toepcb */ TPF_SYNQE_NEEDFREE, /* synq_entry was allocated externally */ + TPF_SYNQE_TCPDDP, /* ulp_mode TCPDDP when toepcb is allocated */ }; +enum { + DDP_OK = (1 << 0), /* OK to turn on DDP */ + DDP_SC_REQ = (1 << 1), /* state change (on/off) requested */ + DDP_ON = (1 << 2), /* DDP is turned on */ + DDP_BUF0_ACTIVE = (1 << 3), /* buffer 0 in use (not invalidated) */ + DDP_BUF1_ACTIVE = (1 << 4), /* buffer 1 in use (not invalidated) */ +}; + struct ofld_tx_sdesc { uint32_t plen; /* payload length */ uint8_t tx_credits; /* firmware tx credits (unit is 16B) */ }; +struct ppod_region { + TAILQ_ENTRY(ppod_region) link; + int used; /* # of pods used by this region */ + int free; /* # of contiguous pods free right after this region */ +}; + +struct ddp_buffer { + uint32_t tag; /* includes color, page pod addr, and DDP page size */ + int nppods; + int offset; + int len; + struct ppod_region ppod_region; + int npages; + vm_page_t *pages; +}; + struct toepcb { TAILQ_ENTRY(toepcb) link; /* toep_list */ unsigned int flags; /* miscellaneous flags */ struct tom_data *td; struct inpcb *inp; /* backpointer to host stack's PCB */ struct port_info *port; /* physical port */ struct sge_wrq *ofld_txq; struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ctrlq; struct l2t_entry *l2te; /* L2 table entry used by this connection */ int tid; /* Connection identifier */ unsigned int tx_credits;/* tx WR credits (in 16 byte units) remaining */ - unsigned int enqueued; /* # of bytes added to so_rcv (not yet read) */ + unsigned int sb_cc; /* last noted value of so_rcv->sb_cc */ int rx_credits; /* rx credits (in bytes) to be returned to hw */ unsigned int ulp_mode; /* ULP mode */ + unsigned int ddp_flags; + struct ddp_buffer *db[2]; + time_t ddp_disabled; + uint8_t ddp_score; + /* Tx software descriptor */ uint8_t txsd_total; uint8_t txsd_pidx; uint8_t txsd_cidx; uint8_t txsd_avail; struct ofld_tx_sdesc txsd[]; }; struct flowc_tx_params { uint32_t snd_nxt; uint32_t rcv_nxt; unsigned int snd_space; unsigned int mss; }; static inline int toepcb_flag(struct toepcb *toep, int flag) { return isset(&toep->flags, flag); } static inline void toepcb_set_flag(struct toepcb *toep, int flag) { setbit(&toep->flags, flag); } static inline void toepcb_clr_flag(struct toepcb *toep, int flag) { clrbit(&toep->flags, flag); } +#define DDP_RETRY_WAIT 5 /* seconds to wait before re-enabling DDP */ +#define DDP_LOW_SCORE 1 +#define DDP_HIGH_SCORE 3 + +static inline void +set_tcpddp_ulp_mode(struct toepcb *toep) +{ + + toep->ulp_mode = ULP_MODE_TCPDDP; + toep->ddp_flags = DDP_OK; + toep->ddp_score = DDP_LOW_SCORE; +} + /* * Compressed state for embryonic connections for a listener. Barely fits in * 64B, try not to grow it further. */ struct synq_entry { TAILQ_ENTRY(synq_entry) link; /* listen_ctx's synq link */ int flags; /* same as toepcb's tp_flags */ int tid; struct listen_ctx *lctx; /* backpointer to listen ctx */ struct mbuf *syn; uint32_t iss; uint32_t ts; volatile uintptr_t wr; volatile u_int refcnt; uint16_t l2e_idx; uint16_t rcv_bufsize; }; static inline int synqe_flag(struct synq_entry *synqe, int flag) { return isset(&synqe->flags, flag); } static inline void synqe_set_flag(struct synq_entry *synqe, int flag) { setbit(&synqe->flags, flag); } static inline void synqe_clr_flag(struct synq_entry *synqe, int flag) { clrbit(&synqe->flags, flag); } /* listen_ctx flags */ #define LCTX_RPL_PENDING 1 /* waiting for a CPL_PASS_OPEN_RPL */ struct listen_ctx { LIST_ENTRY(listen_ctx) link; /* listen hash linkage */ volatile int refcount; int stid; int flags; struct inpcb *inp; /* listening socket's inp */ struct sge_wrq *ctrlq; struct sge_ofld_rxq *ofld_rxq; TAILQ_HEAD(, synq_entry) synq; }; +TAILQ_HEAD(ppod_head, ppod_region); + struct tom_data { struct toedev tod; /* toepcb's associated with this TOE device */ struct mtx toep_list_lock; TAILQ_HEAD(, toepcb) toep_list; + struct mtx lctx_hash_lock; LIST_HEAD(, listen_ctx) *listen_hash; u_long listen_mask; int lctx_count; /* # of lctx in the hash table */ - struct mtx lctx_hash_lock; + + struct mtx ppod_lock; + int nppods; + int nppods_free; /* # of available ppods */ + int nppods_free_head; /* # of available ppods at the begining */ + struct ppod_head ppods; }; static inline struct tom_data * tod_td(struct toedev *tod) { return (member2struct(tom_data, tod, tod)); } static inline struct adapter * td_adapter(struct tom_data *td) { return (td->tod.tod_softc); } /* t4_tom.c */ struct toepcb *alloc_toepcb(struct port_info *, int, int, int); void free_toepcb(struct toepcb *); void offload_socket(struct socket *, struct toepcb *); void undo_offload_socket(struct socket *); void final_cpl_received(struct toepcb *); void insert_tid(struct adapter *, int, void *); void *lookup_tid(struct adapter *, int); void update_tid(struct adapter *, int, void *); void remove_tid(struct adapter *, int); void release_tid(struct adapter *, int, struct sge_wrq *); int find_best_mtu_idx(struct adapter *, struct in_conninfo *, int); u_long select_rcv_wnd(struct socket *); int select_rcv_wscale(void); uint64_t calc_opt0(struct socket *, struct port_info *, struct l2t_entry *, int, int, int, int); uint32_t select_ntuple(struct port_info *, struct l2t_entry *, uint32_t); /* t4_connect.c */ void t4_init_connect_cpl_handlers(struct adapter *); int t4_connect(struct toedev *, struct socket *, struct rtentry *, struct sockaddr *); /* t4_listen.c */ void t4_init_listen_cpl_handlers(struct adapter *); int t4_listen_start(struct toedev *, struct tcpcb *); int t4_listen_stop(struct toedev *, struct tcpcb *); void t4_syncache_added(struct toedev *, void *); void t4_syncache_removed(struct toedev *, void *); int t4_syncache_respond(struct toedev *, void *, struct mbuf *); int do_abort_req_synqe(struct sge_iq *, const struct rss_header *, struct mbuf *); int do_abort_rpl_synqe(struct sge_iq *, const struct rss_header *, struct mbuf *); void t4_offload_socket(struct toedev *, void *, struct socket *); /* t4_cpl_io.c */ void t4_init_cpl_io_handlers(struct adapter *); void t4_uninit_cpl_io_handlers(struct adapter *); void send_abort_rpl(struct adapter *, struct sge_wrq *, int , int); void send_flowc_wr(struct toepcb *, struct flowc_tx_params *); void send_reset(struct adapter *, struct toepcb *, uint32_t); void make_established(struct toepcb *, uint32_t, uint32_t, uint16_t); void t4_rcvd(struct toedev *, struct tcpcb *); int t4_tod_output(struct toedev *, struct tcpcb *); int t4_send_fin(struct toedev *, struct tcpcb *); int t4_send_rst(struct toedev *, struct tcpcb *); void t4_set_tcb_field(struct adapter *, struct toepcb *, uint16_t, uint64_t, uint64_t); +/* t4_ddp.c */ +void t4_init_ddp(struct adapter *, struct tom_data *); +void t4_uninit_ddp(struct adapter *, struct tom_data *); +int t4_soreceive_ddp(struct socket *, struct sockaddr **, struct uio *, + struct mbuf **, struct mbuf **, int *); +void enable_ddp(struct adapter *, struct toepcb *toep); +void release_ddp_resources(struct toepcb *toep); #endif Index: head/sys/modules/cxgbe/tom/Makefile =================================================================== --- head/sys/modules/cxgbe/tom/Makefile (revision 239343) +++ head/sys/modules/cxgbe/tom/Makefile (revision 239344) @@ -1,25 +1,25 @@ # # $FreeBSD$ # .include CXGBE = ${.CURDIR}/../../../dev/cxgbe .PATH: ${CXGBE}/tom KMOD = t4_tom -SRCS = t4_tom.c t4_connect.c t4_listen.c t4_cpl_io.c t4_tom_l2t.c +SRCS = t4_tom.c t4_connect.c t4_listen.c t4_cpl_io.c t4_tom_l2t.c t4_ddp.c SRCS+= device_if.h bus_if.h pci_if.h SRCS+= opt_inet.h CFLAGS+= -I${CXGBE} .if !defined(KERNBUILDDIR) .if ${MK_INET_SUPPORT} != "no" opt_inet.h: echo "#define INET 1" > ${.TARGET} echo "#define TCP_OFFLOAD 1" >> ${.TARGET} .endif .endif .include