Index: sys/dev/sfxge/common/ef10_ev.c =================================================================== --- sys/dev/sfxge/common/ef10_ev.c +++ sys/dev/sfxge/common/ef10_ev.c @@ -776,7 +776,7 @@ } #endif /* EFSYS_OPT_QSTATS */ -#if EFSYS_OPT_RX_PACKED_STREAM +#if EFSYS_OPT_RX_PACKED_STREAM || EFSYS_OPT_RX_ES_SUPER_BUFFER static __checkReturn boolean_t ef10_ev_rx_packed_stream( @@ -815,7 +815,18 @@ if (new_buffer) { flags |= EFX_PKT_PACKED_STREAM_NEW_BUFFER; +#if EFSYS_OPT_RX_PACKED_STREAM + /* + * If both packed stream and equal stride super-buffer + * modes are compiled in, in theory credits should be + * be maintained for packed stream only, but right now + * these modes are not distinguished in the event queue + * Rx queue state and it is OK to increment the counter + * regardless (it might be event cheaper than branching + * since neighbour structure member are updated as well). + */ eersp->eers_rx_packed_stream_credits++; +#endif eersp->eers_rx_read_ptr++; } current_id = eersp->eers_rx_read_ptr & eersp->eers_rx_mask; @@ -857,7 +868,7 @@ return (should_abort); } -#endif /* EFSYS_OPT_RX_PACKED_STREAM */ +#endif /* EFSYS_OPT_RX_PACKED_STREAM || EFSYS_OPT_RX_ES_SUPER_BUFFER */ static __checkReturn boolean_t ef10_ev_rx( @@ -891,7 +902,7 @@ label = EFX_QWORD_FIELD(*eqp, ESF_DZ_RX_QLABEL); eersp = &eep->ee_rxq_state[label]; -#if EFSYS_OPT_RX_PACKED_STREAM +#if EFSYS_OPT_RX_PACKED_STREAM || EFSYS_OPT_RX_ES_SUPER_BUFFER /* * Packed stream events are very different, * so handle them separately @@ -1391,8 +1402,9 @@ __in efx_rxq_type_t type) { efx_evq_rxq_state_t *eersp; -#if EFSYS_OPT_RX_PACKED_STREAM +#if EFSYS_OPT_RX_PACKED_STREAM || EFSYS_OPT_RX_ES_SUPER_BUFFER boolean_t packed_stream = (type == EFX_RXQ_TYPE_PACKED_STREAM); + boolean_t es_super_buffer = (type == EFX_RXQ_TYPE_ES_SUPER_BUFFER); #endif _NOTE(ARGUNUSED(type)) @@ -1414,9 +1426,11 @@ eersp->eers_rx_read_ptr = 0; #endif eersp->eers_rx_mask = erp->er_mask; -#if EFSYS_OPT_RX_PACKED_STREAM +#if EFSYS_OPT_RX_PACKED_STREAM || EFSYS_OPT_RX_ES_SUPER_BUFFER eersp->eers_rx_stream_npackets = 0; - eersp->eers_rx_packed_stream = packed_stream; + eersp->eers_rx_packed_stream = packed_stream || es_super_buffer; +#endif +#if EFSYS_OPT_RX_PACKED_STREAM if (packed_stream) { eersp->eers_rx_packed_stream_credits = (eep->ee_mask + 1) / EFX_DIV_ROUND_UP(EFX_RX_PACKED_STREAM_MEM_PER_CREDIT, @@ -1450,9 +1464,11 @@ eersp->eers_rx_read_ptr = 0; eersp->eers_rx_mask = 0; -#if EFSYS_OPT_RX_PACKED_STREAM +#if EFSYS_OPT_RX_PACKED_STREAM || EFSYS_OPT_RX_ES_SUPER_BUFFER eersp->eers_rx_stream_npackets = 0; eersp->eers_rx_packed_stream = B_FALSE; +#endif +#if EFSYS_OPT_RX_PACKED_STREAM eersp->eers_rx_packed_stream_credits = 0; #endif } Index: sys/dev/sfxge/common/ef10_impl.h =================================================================== --- sys/dev/sfxge/common/ef10_impl.h +++ sys/dev/sfxge/common/ef10_impl.h @@ -1242,6 +1242,16 @@ #endif /* EFSYS_OPT_RX_PACKED_STREAM */ +#if EFSYS_OPT_RX_ES_SUPER_BUFFER + +/* + * Maximum DMA length and buffer stride alignment. + * (see SF-119419-TC, 3.2) + */ +#define EFX_RX_ES_SUPER_BUFFER_BUF_ALIGNMENT 64 + +#endif + #ifdef __cplusplus } #endif Index: sys/dev/sfxge/common/ef10_rx.c =================================================================== --- sys/dev/sfxge/common/ef10_rx.c +++ sys/dev/sfxge/common/ef10_rx.c @@ -48,12 +48,16 @@ __in efsys_mem_t *esmp, __in boolean_t disable_scatter, __in boolean_t want_inner_classes, - __in uint32_t ps_bufsize) + __in uint32_t ps_bufsize, + __in uint32_t es_bufs_per_desc, + __in uint32_t es_max_dma_len, + __in uint32_t es_buf_stride, + __in uint32_t hol_block_timeout) { efx_nic_cfg_t *encp = &(enp->en_nic_cfg); efx_mcdi_req_t req; - uint8_t payload[MAX(MC_CMD_INIT_RXQ_EXT_IN_LEN, - MC_CMD_INIT_RXQ_EXT_OUT_LEN)]; + uint8_t payload[MAX(MC_CMD_INIT_RXQ_V3_IN_LEN, + MC_CMD_INIT_RXQ_V3_OUT_LEN)]; int npages = EFX_RXQ_NBUFS(ndescs); int i; efx_qword_t *dma_addr; @@ -71,6 +75,8 @@ if (ps_bufsize > 0) dma_mode = MC_CMD_INIT_RXQ_EXT_IN_PACKED_STREAM; + else if (es_bufs_per_desc > 0) + dma_mode = MC_CMD_INIT_RXQ_V3_IN_EQUAL_STRIDE_SUPER_BUFFER; else dma_mode = MC_CMD_INIT_RXQ_EXT_IN_SINGLE_PACKET; @@ -97,9 +103,9 @@ (void) memset(payload, 0, sizeof (payload)); req.emr_cmd = MC_CMD_INIT_RXQ; req.emr_in_buf = payload; - req.emr_in_length = MC_CMD_INIT_RXQ_EXT_IN_LEN; + req.emr_in_length = MC_CMD_INIT_RXQ_V3_IN_LEN; req.emr_out_buf = payload; - req.emr_out_length = MC_CMD_INIT_RXQ_EXT_OUT_LEN; + req.emr_out_length = MC_CMD_INIT_RXQ_V3_OUT_LEN; MCDI_IN_SET_DWORD(req, INIT_RXQ_EXT_IN_SIZE, ndescs); MCDI_IN_SET_DWORD(req, INIT_RXQ_EXT_IN_TARGET_EVQ, target_evq); @@ -119,6 +125,19 @@ MCDI_IN_SET_DWORD(req, INIT_RXQ_EXT_IN_OWNER_ID, 0); MCDI_IN_SET_DWORD(req, INIT_RXQ_EXT_IN_PORT_ID, EVB_PORT_ID_ASSIGNED); + if (es_bufs_per_desc > 0) { + MCDI_IN_SET_DWORD(req, + INIT_RXQ_V3_IN_ES_PACKET_BUFFERS_PER_BUCKET, + es_bufs_per_desc); + MCDI_IN_SET_DWORD(req, + INIT_RXQ_V3_IN_ES_MAX_DMA_LEN, es_max_dma_len); + MCDI_IN_SET_DWORD(req, + INIT_RXQ_V3_IN_ES_PACKET_STRIDE, es_buf_stride); + MCDI_IN_SET_DWORD(req, + INIT_RXQ_V3_IN_ES_HEAD_OF_LINE_BLOCK_TIMEOUT, + hol_block_timeout); + } + dma_addr = MCDI_IN2(req, efx_qword_t, INIT_RXQ_IN_DMA_ADDR); addr = EFSYS_MEM_ADDR(esmp); @@ -1040,6 +1059,10 @@ boolean_t disable_scatter; boolean_t want_inner_classes; unsigned int ps_buf_size; + uint32_t es_bufs_per_desc = 0; + uint32_t es_max_dma_len = 0; + uint32_t es_buf_stride = 0; + uint32_t hol_block_timeout = 0; _NOTE(ARGUNUSED(id, erp, type_data)) @@ -1088,6 +1111,19 @@ } break; #endif /* EFSYS_OPT_RX_PACKED_STREAM */ +#if EFSYS_OPT_RX_ES_SUPER_BUFFER + case EFX_RXQ_TYPE_ES_SUPER_BUFFER: + ps_buf_size = 0; + es_bufs_per_desc = + type_data->ertd_es_super_buffer.eessb_bufs_per_desc; + es_max_dma_len = + type_data->ertd_es_super_buffer.eessb_max_dma_len; + es_buf_stride = + type_data->ertd_es_super_buffer.eessb_buf_stride; + hol_block_timeout = + type_data->ertd_es_super_buffer.eessb_hol_block_timeout; + break; +#endif /* EFSYS_OPT_RX_ES_SUPER_BUFFER */ default: rc = ENOTSUP; goto fail4; @@ -1111,6 +1147,27 @@ EFSYS_ASSERT(ps_buf_size == 0); #endif /* EFSYS_OPT_RX_PACKED_STREAM */ +#if EFSYS_OPT_RX_ES_SUPER_BUFFER + if (es_bufs_per_desc > 0) { + if (encp->enc_rx_es_super_buffer_supported == B_FALSE) { + rc = ENOTSUP; + goto fail7; + } + if (!IS_P2ALIGNED(es_max_dma_len, + EFX_RX_ES_SUPER_BUFFER_BUF_ALIGNMENT)) { + rc = EINVAL; + goto fail8; + } + if (!IS_P2ALIGNED(es_buf_stride, + EFX_RX_ES_SUPER_BUFFER_BUF_ALIGNMENT)) { + rc = EINVAL; + goto fail9; + } + } +#else /* EFSYS_OPT_RX_ES_SUPER_BUFFER */ + EFSYS_ASSERT(es_bufs_per_desc == 0); +#endif /* EFSYS_OPT_RX_ES_SUPER_BUFFER */ + /* Scatter can only be disabled if the firmware supports doing so */ if (flags & EFX_RXQ_FLAG_SCATTER) disable_scatter = B_FALSE; @@ -1124,8 +1181,9 @@ if ((rc = efx_mcdi_init_rxq(enp, ndescs, eep->ee_index, label, index, esmp, disable_scatter, want_inner_classes, - ps_buf_size)) != 0) - goto fail7; + ps_buf_size, es_bufs_per_desc, es_max_dma_len, + es_buf_stride, hol_block_timeout)) != 0) + goto fail10; erp->er_eep = eep; erp->er_label = label; @@ -1136,8 +1194,16 @@ return (0); +fail10: + EFSYS_PROBE(fail10); +#if EFSYS_OPT_RX_ES_SUPER_BUFFER +fail9: + EFSYS_PROBE(fail9); +fail8: + EFSYS_PROBE(fail8); fail7: EFSYS_PROBE(fail7); +#endif /* EFSYS_OPT_RX_ES_SUPER_BUFFER */ #if EFSYS_OPT_RX_PACKED_STREAM fail6: EFSYS_PROBE(fail6); Index: sys/dev/sfxge/common/efsys.h =================================================================== --- sys/dev/sfxge/common/efsys.h +++ sys/dev/sfxge/common/efsys.h @@ -285,6 +285,8 @@ #define EFSYS_OPT_RX_PACKED_STREAM 0 +#define EFSYS_OPT_RX_ES_SUPER_BUFFER 0 + #define EFSYS_OPT_TUNNEL 0 #define EFSYS_OPT_FW_SUBVARIANT_AWARE 0 Index: sys/dev/sfxge/common/efx.h =================================================================== --- sys/dev/sfxge/common/efx.h +++ sys/dev/sfxge/common/efx.h @@ -1893,7 +1893,7 @@ __in uint32_t size, __in uint16_t flags); -#if EFSYS_OPT_RX_PACKED_STREAM +#if EFSYS_OPT_RX_PACKED_STREAM || EFSYS_OPT_RX_ES_SUPER_BUFFER /* * Packed stream mode is documented in SF-112241-TC. @@ -1903,6 +1903,13 @@ * packets are put there in a continuous stream. * The main advantage of such an approach is that RX queue refilling * happens much less frequently. + * + * Equal stride packed stream mode is documented in SF-119419-TC. + * The general idea is to utilize advantages of the packed stream, + * but avoid indirection in packets representation. + * The main advantage of such an approach is that RX queue refilling + * happens much less frequently and packets buffers are independent + * from upper layers point of view. */ typedef __checkReturn boolean_t @@ -2003,7 +2010,7 @@ typedef struct efx_ev_callbacks_s { efx_initialized_ev_t eec_initialized; efx_rx_ev_t eec_rx; -#if EFSYS_OPT_RX_PACKED_STREAM +#if EFSYS_OPT_RX_PACKED_STREAM || EFSYS_OPT_RX_ES_SUPER_BUFFER efx_rx_ps_ev_t eec_rx_ps; #endif efx_tx_ev_t eec_tx; @@ -2310,6 +2317,7 @@ typedef enum efx_rxq_type_e { EFX_RXQ_TYPE_DEFAULT, EFX_RXQ_TYPE_PACKED_STREAM, + EFX_RXQ_TYPE_ES_SUPER_BUFFER, EFX_RXQ_NTYPES } efx_rxq_type_t; @@ -2363,6 +2371,28 @@ #endif +#if EFSYS_OPT_RX_ES_SUPER_BUFFER + +/* Maximum head-of-line block timeout in nanoseconds */ +#define EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX (400U * 1000 * 1000) + +extern __checkReturn efx_rc_t +efx_rx_qcreate_es_super_buffer( + __in efx_nic_t *enp, + __in unsigned int index, + __in unsigned int label, + __in uint32_t n_bufs_per_desc, + __in uint32_t max_dma_len, + __in uint32_t buf_stride, + __in uint32_t hol_block_timeout, + __in efsys_mem_t *esmp, + __in size_t ndescs, + __in unsigned int flags, + __in efx_evq_t *eep, + __deref_out efx_rxq_t **erpp); + +#endif + typedef struct efx_buffer_s { efsys_dma_addr_t eb_addr; size_t eb_size; Index: sys/dev/sfxge/common/efx_check.h =================================================================== --- sys/dev/sfxge/common/efx_check.h +++ sys/dev/sfxge/common/efx_check.h @@ -370,6 +370,13 @@ # endif #endif +#if EFSYS_OPT_RX_ES_SUPER_BUFFER +/* Support equal stride super-buffer mode */ +# if !(EFSYS_OPT_MEDFORD2) +# error "ES_SUPER_BUFFER requires MEDFORD2" +# endif +#endif + /* Support hardware assistance for tunnels */ #if EFSYS_OPT_TUNNEL # if !(EFSYS_OPT_MEDFORD || EFSYS_OPT_MEDFORD2) Index: sys/dev/sfxge/common/efx_impl.h =================================================================== --- sys/dev/sfxge/common/efx_impl.h +++ sys/dev/sfxge/common/efx_impl.h @@ -165,6 +165,14 @@ uint32_t eps_buf_size; } ertd_packed_stream; #endif +#if EFSYS_OPT_RX_ES_SUPER_BUFFER + struct { + uint32_t eessb_bufs_per_desc; + uint32_t eessb_max_dma_len; + uint32_t eessb_buf_stride; + uint32_t eessb_hol_block_timeout; + } ertd_es_super_buffer; +#endif } efx_rxq_type_data_t; typedef struct efx_rx_ops_s { @@ -763,9 +771,11 @@ typedef struct efx_evq_rxq_state_s { unsigned int eers_rx_read_ptr; unsigned int eers_rx_mask; -#if EFSYS_OPT_RX_PACKED_STREAM +#if EFSYS_OPT_RX_PACKED_STREAM || EFSYS_OPT_RX_ES_SUPER_BUFFER unsigned int eers_rx_stream_npackets; boolean_t eers_rx_packed_stream; +#endif +#if EFSYS_OPT_RX_PACKED_STREAM unsigned int eers_rx_packed_stream_credits; #endif } efx_evq_rxq_state_t; Index: sys/dev/sfxge/common/efx_rx.c =================================================================== --- sys/dev/sfxge/common/efx_rx.c +++ sys/dev/sfxge/common/efx_rx.c @@ -871,6 +871,58 @@ #endif +#if EFSYS_OPT_RX_ES_SUPER_BUFFER + + __checkReturn efx_rc_t +efx_rx_qcreate_es_super_buffer( + __in efx_nic_t *enp, + __in unsigned int index, + __in unsigned int label, + __in uint32_t n_bufs_per_desc, + __in uint32_t max_dma_len, + __in uint32_t buf_stride, + __in uint32_t hol_block_timeout, + __in efsys_mem_t *esmp, + __in size_t ndescs, + __in unsigned int flags, + __in efx_evq_t *eep, + __deref_out efx_rxq_t **erpp) +{ + efx_rc_t rc; + efx_rxq_type_data_t type_data; + + if (hol_block_timeout > EFX_RXQ_ES_SUPER_BUFFER_HOL_BLOCK_MAX) { + rc = EINVAL; + goto fail1; + } + + memset(&type_data, 0, sizeof (type_data)); + + type_data.ertd_es_super_buffer.eessb_bufs_per_desc = n_bufs_per_desc; + type_data.ertd_es_super_buffer.eessb_max_dma_len = max_dma_len; + type_data.ertd_es_super_buffer.eessb_buf_stride = buf_stride; + type_data.ertd_es_super_buffer.eessb_hol_block_timeout = + hol_block_timeout; + + rc = efx_rx_qcreate_internal(enp, index, label, + EFX_RXQ_TYPE_ES_SUPER_BUFFER, &type_data, esmp, ndescs, + 0 /* id unused on EF10 */, flags, eep, erpp); + if (rc != 0) + goto fail2; + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + +#endif + + void efx_rx_qdestroy( __in efx_rxq_t *erp)