diff --git a/sys/dev/ena/ena.h b/sys/dev/ena/ena.h index a6aa39829f12..85f353e7ab54 100644 --- a/sys/dev/ena/ena.h +++ b/sys/dev/ena/ena.h @@ -1,546 +1,554 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef ENA_H #define ENA_H #include "opt_rss.h" #include "ena-com/ena_com.h" #include "ena-com/ena_eth_com.h" #define DRV_MODULE_VER_MAJOR 2 #define DRV_MODULE_VER_MINOR 5 #define DRV_MODULE_VER_SUBMINOR 0 #define DRV_MODULE_NAME "ena" #ifndef DRV_MODULE_VERSION #define DRV_MODULE_VERSION \ __XSTRING(DRV_MODULE_VER_MAJOR) "." \ __XSTRING(DRV_MODULE_VER_MINOR) "." \ __XSTRING(DRV_MODULE_VER_SUBMINOR) #endif #define DEVICE_NAME "Elastic Network Adapter (ENA)" #define DEVICE_DESC "ENA adapter" /* Calculate DMA mask - width for ena cannot exceed 48, so it is safe */ #define ENA_DMA_BIT_MASK(x) ((1ULL << (x)) - 1ULL) /* 1 for AENQ + ADMIN */ #define ENA_ADMIN_MSIX_VEC 1 #define ENA_MAX_MSIX_VEC(io_queues) (ENA_ADMIN_MSIX_VEC + (io_queues)) #define ENA_REG_BAR 0 #define ENA_MEM_BAR 2 #define ENA_BUS_DMA_SEGS 32 #define ENA_DEFAULT_BUF_RING_SIZE 4096 #define ENA_DEFAULT_RING_SIZE 1024 #define ENA_MIN_RING_SIZE 256 /* * Refill Rx queue when number of required descriptors is above * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER or ENA_RX_REFILL_THRESH_PACKET */ #define ENA_RX_REFILL_THRESH_DIVIDER 8 #define ENA_RX_REFILL_THRESH_PACKET 256 #define ENA_IRQNAME_SIZE 40 #define ENA_PKT_MAX_BUFS 19 #define ENA_RX_RSS_TABLE_LOG_SIZE 7 #define ENA_RX_RSS_TABLE_SIZE (1 << ENA_RX_RSS_TABLE_LOG_SIZE) #define ENA_HASH_KEY_SIZE 40 #define ENA_MAX_FRAME_LEN 10000 #define ENA_MIN_FRAME_LEN 60 #define ENA_TX_RESUME_THRESH (ENA_PKT_MAX_BUFS + 2) #define DB_THRESHOLD 64 #define TX_COMMIT 32 /* * TX budget for cleaning. It should be half of the RX budget to reduce amount * of TCP retransmissions. */ #define TX_BUDGET 128 /* RX cleanup budget. -1 stands for infinity. */ #define RX_BUDGET 256 /* * How many times we can repeat cleanup in the io irq handling routine if the * RX or TX budget was depleted. */ #define CLEAN_BUDGET 8 #define RX_IRQ_INTERVAL 20 #define TX_IRQ_INTERVAL 50 #define ENA_MIN_MTU 128 #define ENA_TSO_MAXSIZE 65536 #define ENA_MMIO_DISABLE_REG_READ BIT(0) #define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1)) #define ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1)) #define ENA_IO_TXQ_IDX(q) (2 * (q)) #define ENA_IO_RXQ_IDX(q) (2 * (q) + 1) #define ENA_IO_TXQ_IDX_TO_COMBINED_IDX(q) ((q) / 2) #define ENA_IO_RXQ_IDX_TO_COMBINED_IDX(q) (((q) - 1) / 2) #define ENA_MGMNT_IRQ_IDX 0 #define ENA_IO_IRQ_FIRST_IDX 1 #define ENA_IO_IRQ_IDX(q) (ENA_IO_IRQ_FIRST_IDX + (q)) #define ENA_MAX_NO_INTERRUPT_ITERATIONS 3 /* * ENA device should send keep alive msg every 1 sec. * We wait for 6 sec just to be on the safe side. */ #define DEFAULT_KEEP_ALIVE_TO (SBT_1S * 6) /* Time in jiffies before concluding the transmitter is hung. */ #define DEFAULT_TX_CMP_TO (SBT_1S * 5) /* Number of queues to check for missing queues per timer tick */ #define DEFAULT_TX_MONITORED_QUEUES (4) /* Max number of timeouted packets before device reset */ #define DEFAULT_TX_CMP_THRESHOLD (128) /* * Supported PCI vendor and devices IDs */ #define PCI_VENDOR_ID_AMAZON 0x1d0f #define PCI_DEV_ID_ENA_PF 0x0ec2 #define PCI_DEV_ID_ENA_PF_RSERV0 0x1ec2 #define PCI_DEV_ID_ENA_VF 0xec20 #define PCI_DEV_ID_ENA_VF_RSERV0 0xec21 /* * Flags indicating current ENA driver state */ enum ena_flags_t { ENA_FLAG_DEVICE_RUNNING, ENA_FLAG_DEV_UP, ENA_FLAG_LINK_UP, ENA_FLAG_MSIX_ENABLED, ENA_FLAG_TRIGGER_RESET, ENA_FLAG_ONGOING_RESET, ENA_FLAG_DEV_UP_BEFORE_RESET, ENA_FLAG_RSS_ACTIVE, ENA_FLAGS_NUMBER = ENA_FLAG_RSS_ACTIVE }; BITSET_DEFINE(_ena_state, ENA_FLAGS_NUMBER); typedef struct _ena_state ena_state_t; #define ENA_FLAG_ZERO(adapter) \ BIT_ZERO(ENA_FLAGS_NUMBER, &(adapter)->flags) #define ENA_FLAG_ISSET(bit, adapter) \ BIT_ISSET(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags) #define ENA_FLAG_SET_ATOMIC(bit, adapter) \ BIT_SET_ATOMIC(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags) #define ENA_FLAG_CLEAR_ATOMIC(bit, adapter) \ BIT_CLR_ATOMIC(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags) struct msix_entry { int entry; int vector; }; typedef struct _ena_vendor_info_t { uint16_t vendor_id; uint16_t device_id; unsigned int index; } ena_vendor_info_t; struct ena_irq { /* Interrupt resources */ struct resource *res; driver_filter_t *handler; void *data; void *cookie; unsigned int vector; bool requested; #ifdef RSS int cpu; #endif char name[ENA_IRQNAME_SIZE]; }; struct ena_que { struct ena_adapter *adapter; struct ena_ring *tx_ring; struct ena_ring *rx_ring; struct task cleanup_task; struct taskqueue *cleanup_tq; uint32_t id; #ifdef RSS int cpu; cpuset_t cpu_mask; #endif int domain; struct sysctl_oid *oid; }; struct ena_calc_queue_size_ctx { struct ena_com_dev_get_features_ctx *get_feat_ctx; struct ena_com_dev *ena_dev; device_t pdev; uint32_t tx_queue_size; uint32_t rx_queue_size; uint32_t max_tx_queue_size; uint32_t max_rx_queue_size; uint16_t max_tx_sgl_size; uint16_t max_rx_sgl_size; }; #ifdef DEV_NETMAP struct ena_netmap_tx_info { uint32_t socket_buf_idx[ENA_PKT_MAX_BUFS]; bus_dmamap_t map_seg[ENA_PKT_MAX_BUFS]; unsigned int sockets_used; }; #endif struct ena_tx_buffer { struct mbuf *mbuf; /* # of ena desc for this specific mbuf * (includes data desc and metadata desc) */ unsigned int tx_descs; /* # of buffers used by this mbuf */ unsigned int num_of_bufs; bus_dmamap_t dmamap; /* Used to detect missing tx packets */ struct bintime timestamp; bool print_once; #ifdef DEV_NETMAP struct ena_netmap_tx_info nm_info; #endif /* DEV_NETMAP */ struct ena_com_buf bufs[ENA_PKT_MAX_BUFS]; } __aligned(CACHE_LINE_SIZE); struct ena_rx_buffer { struct mbuf *mbuf; bus_dmamap_t map; struct ena_com_buf ena_buf; #ifdef DEV_NETMAP uint32_t netmap_buf_idx; #endif /* DEV_NETMAP */ } __aligned(CACHE_LINE_SIZE); struct ena_stats_tx { counter_u64_t cnt; counter_u64_t bytes; counter_u64_t prepare_ctx_err; counter_u64_t dma_mapping_err; counter_u64_t doorbells; counter_u64_t missing_tx_comp; counter_u64_t bad_req_id; counter_u64_t collapse; counter_u64_t collapse_err; counter_u64_t queue_wakeup; counter_u64_t queue_stop; counter_u64_t llq_buffer_copy; counter_u64_t unmask_interrupt_num; }; struct ena_stats_rx { counter_u64_t cnt; counter_u64_t bytes; counter_u64_t refil_partial; counter_u64_t csum_bad; counter_u64_t mjum_alloc_fail; counter_u64_t mbuf_alloc_fail; counter_u64_t dma_mapping_err; counter_u64_t bad_desc_num; counter_u64_t bad_req_id; counter_u64_t empty_rx_ring; counter_u64_t csum_good; }; struct ena_ring { /* Holds the empty requests for TX/RX out of order completions */ union { uint16_t *free_tx_ids; uint16_t *free_rx_ids; }; struct ena_com_dev *ena_dev; struct ena_adapter *adapter; struct ena_com_io_cq *ena_com_io_cq; struct ena_com_io_sq *ena_com_io_sq; uint16_t qid; /* Determines if device will use LLQ or normal mode for TX */ enum ena_admin_placement_policy_type tx_mem_queue_type; union { /* The maximum length the driver can push to the device (For LLQ) */ uint8_t tx_max_header_size; /* The maximum (and default) mbuf size for the Rx descriptor. */ uint16_t rx_mbuf_sz; }; bool first_interrupt; uint16_t no_interrupt_event_cnt; struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS]; struct ena_que *que; struct lro_ctrl lro; uint16_t next_to_use; uint16_t next_to_clean; union { struct ena_tx_buffer *tx_buffer_info; /* contex of tx packet */ struct ena_rx_buffer *rx_buffer_info; /* contex of rx packet */ }; int ring_size; /* number of tx/rx_buffer_info's entries */ struct buf_ring *br; /* only for TX */ uint32_t buf_ring_size; struct mtx ring_mtx; char mtx_name[16]; struct { struct task enqueue_task; struct taskqueue *enqueue_tq; }; union { struct ena_stats_tx tx_stats; struct ena_stats_rx rx_stats; }; union { int empty_rx_queue; /* For Tx ring to indicate if it's running or not */ bool running; }; /* How many packets are sent in one Tx loop, used for doorbells */ uint32_t acum_pkts; /* Used for LLQ */ uint8_t *push_buf_intermediate_buf; #ifdef DEV_NETMAP bool initialized; #endif /* DEV_NETMAP */ } __aligned(CACHE_LINE_SIZE); struct ena_stats_dev { counter_u64_t wd_expired; counter_u64_t interface_up; counter_u64_t interface_down; counter_u64_t admin_q_pause; }; struct ena_hw_stats { counter_u64_t rx_packets; counter_u64_t tx_packets; counter_u64_t rx_bytes; counter_u64_t tx_bytes; counter_u64_t rx_drops; counter_u64_t tx_drops; }; /* Board specific private data structure */ struct ena_adapter { struct ena_com_dev *ena_dev; /* OS defined structs */ if_t ifp; device_t pdev; struct ifmedia media; /* OS resources */ struct resource *memory; struct resource *registers; struct resource *msix; int msix_rid; /* MSI-X */ struct msix_entry *msix_entries; int msix_vecs; /* DMA tags used throughout the driver adapter for Tx and Rx */ bus_dma_tag_t tx_buf_tag; bus_dma_tag_t rx_buf_tag; int dma_width; uint32_t max_mtu; uint32_t num_io_queues; uint32_t max_num_io_queues; uint32_t requested_tx_ring_size; uint32_t requested_rx_ring_size; uint32_t max_tx_ring_size; uint32_t max_rx_ring_size; uint16_t max_tx_sgl_size; uint16_t max_rx_sgl_size; uint32_t tx_offload_cap; uint32_t buf_ring_size; /* RSS*/ int first_bind; struct ena_indir *rss_indir; uint8_t mac_addr[ETHER_ADDR_LEN]; /* mdio and phy*/ ena_state_t flags; /* Queue will represent one TX and one RX ring */ struct ena_que que[ENA_MAX_NUM_IO_QUEUES] __aligned(CACHE_LINE_SIZE); /* TX */ struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES] __aligned(CACHE_LINE_SIZE); /* RX */ struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES] __aligned(CACHE_LINE_SIZE); struct ena_irq irq_tbl[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)]; /* Timer service */ struct callout timer_service; sbintime_t keep_alive_timestamp; uint32_t next_monitored_tx_qid; struct task reset_task; struct taskqueue *reset_tq; int wd_active; sbintime_t keep_alive_timeout; sbintime_t missing_tx_timeout; uint32_t missing_tx_max_queues; uint32_t missing_tx_threshold; bool disable_meta_caching; uint16_t eni_metrics_sample_interval; uint16_t eni_metrics_sample_interval_cnt; /* Statistics */ struct ena_stats_dev dev_stats; struct ena_hw_stats hw_stats; struct ena_admin_eni_stats eni_metrics; enum ena_regs_reset_reason_types reset_reason; }; #define ENA_RING_MTX_LOCK(_ring) mtx_lock(&(_ring)->ring_mtx) #define ENA_RING_MTX_TRYLOCK(_ring) mtx_trylock(&(_ring)->ring_mtx) #define ENA_RING_MTX_UNLOCK(_ring) mtx_unlock(&(_ring)->ring_mtx) #define ENA_RING_MTX_ASSERT(_ring) \ mtx_assert(&(_ring)->ring_mtx, MA_OWNED) #define ENA_LOCK_INIT() \ sx_init(&ena_global_lock, "ENA global lock") #define ENA_LOCK_DESTROY() sx_destroy(&ena_global_lock) #define ENA_LOCK_LOCK() sx_xlock(&ena_global_lock) #define ENA_LOCK_UNLOCK() sx_unlock(&ena_global_lock) #define ENA_LOCK_ASSERT() sx_assert(&ena_global_lock, SA_XLOCKED) #define ENA_TIMER_INIT(_adapter) \ callout_init(&(_adapter)->timer_service, true) #define ENA_TIMER_DRAIN(_adapter) \ callout_drain(&(_adapter)->timer_service) #define ENA_TIMER_RESET(_adapter) \ callout_reset_sbt(&(_adapter)->timer_service, SBT_1S, SBT_1S, \ ena_timer_service, (void*)(_adapter), 0) #define clamp_t(type, _x, min, max) min_t(type, max_t(type, _x, min), max) #define clamp_val(val, lo, hi) clamp_t(__typeof(val), val, lo, hi) extern struct sx ena_global_lock; static inline int ena_mbuf_count(struct mbuf *mbuf) { int count = 1; while ((mbuf = mbuf->m_next) != NULL) ++count; return count; } int ena_up(struct ena_adapter *adapter); void ena_down(struct ena_adapter *adapter); int ena_restore_device(struct ena_adapter *adapter); void ena_destroy_device(struct ena_adapter *adapter, bool graceful); int ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num); int ena_update_buf_ring_size(struct ena_adapter *adapter, uint32_t new_buf_ring_size); int ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size, uint32_t new_rx_size); int ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num); static inline void ena_trigger_reset(struct ena_adapter *adapter, enum ena_regs_reset_reason_types reset_reason) { if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) { adapter->reset_reason = reset_reason; ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter); } } +static inline void +ena_ring_tx_doorbell(struct ena_ring *tx_ring) +{ + ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); + counter_u64_add(tx_ring->tx_stats.doorbells, 1); + tx_ring->acum_pkts = 0; +} + #endif /* !(ENA_H) */ diff --git a/sys/dev/ena/ena_datapath.c b/sys/dev/ena/ena_datapath.c index c6798b3f7eb3..8d4621947e03 100644 --- a/sys/dev/ena/ena_datapath.c +++ b/sys/dev/ena/ena_datapath.c @@ -1,1160 +1,1154 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_rss.h" #include "ena.h" #include "ena_datapath.h" #ifdef DEV_NETMAP #include "ena_netmap.h" #endif /* DEV_NETMAP */ #ifdef RSS #include #endif /* RSS */ #include /********************************************************************* * Static functions prototypes *********************************************************************/ static int ena_tx_cleanup(struct ena_ring *); static int ena_rx_cleanup(struct ena_ring *); static inline int ena_get_tx_req_id(struct ena_ring *tx_ring, struct ena_com_io_cq *io_cq, uint16_t *req_id); static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *, struct mbuf *); static struct mbuf* ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *, struct ena_com_rx_ctx *, uint16_t *); static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *, struct mbuf *); static void ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *, bool); static int ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf); static int ena_xmit_mbuf(struct ena_ring *, struct mbuf **); static void ena_start_xmit(struct ena_ring *); /********************************************************************* * Global functions *********************************************************************/ void ena_cleanup(void *arg, int pending) { struct ena_que *que = arg; struct ena_adapter *adapter = que->adapter; if_t ifp = adapter->ifp; struct ena_ring *tx_ring; struct ena_ring *rx_ring; struct ena_com_io_cq* io_cq; struct ena_eth_io_intr_reg intr_reg; int qid, ena_qid; int txc, rxc, i; if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)) return; ena_log_io(adapter->pdev, DBG, "MSI-X TX/RX routine\n"); tx_ring = que->tx_ring; rx_ring = que->rx_ring; qid = que->id; ena_qid = ENA_IO_TXQ_IDX(qid); io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; tx_ring->first_interrupt = true; rx_ring->first_interrupt = true; for (i = 0; i < CLEAN_BUDGET; ++i) { rxc = ena_rx_cleanup(rx_ring); txc = ena_tx_cleanup(tx_ring); if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)) return; if ((txc != TX_BUDGET) && (rxc != RX_BUDGET)) break; } /* Signal that work is done and unmask interrupt */ ena_com_update_intr_reg(&intr_reg, RX_IRQ_INTERVAL, TX_IRQ_INTERVAL, true); counter_u64_add(tx_ring->tx_stats.unmask_interrupt_num, 1); ena_com_unmask_intr(io_cq, &intr_reg); } void ena_deferred_mq_start(void *arg, int pending) { struct ena_ring *tx_ring = (struct ena_ring *)arg; struct ifnet *ifp = tx_ring->adapter->ifp; while (!drbr_empty(ifp, tx_ring->br) && tx_ring->running && (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) { ENA_RING_MTX_LOCK(tx_ring); ena_start_xmit(tx_ring); ENA_RING_MTX_UNLOCK(tx_ring); } } int ena_mq_start(if_t ifp, struct mbuf *m) { struct ena_adapter *adapter = ifp->if_softc; struct ena_ring *tx_ring; int ret, is_drbr_empty; uint32_t i; #ifdef RSS uint32_t bucket_id; #endif if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0)) return (ENODEV); /* Which queue to use */ /* * If everything is setup correctly, it should be the * same bucket that the current CPU we're on is. * It should improve performance. */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { #ifdef RSS if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), &bucket_id) == 0) i = bucket_id % adapter->num_io_queues; else #endif i = m->m_pkthdr.flowid % adapter->num_io_queues; } else { i = curcpu % adapter->num_io_queues; } tx_ring = &adapter->tx_ring[i]; /* Check if drbr is empty before putting packet */ is_drbr_empty = drbr_empty(ifp, tx_ring->br); ret = drbr_enqueue(ifp, tx_ring->br, m); if (unlikely(ret != 0)) { taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task); return (ret); } if (is_drbr_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) { ena_start_xmit(tx_ring); ENA_RING_MTX_UNLOCK(tx_ring); } else { taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task); } return (0); } void ena_qflush(if_t ifp) { struct ena_adapter *adapter = ifp->if_softc; struct ena_ring *tx_ring = adapter->tx_ring; int i; for(i = 0; i < adapter->num_io_queues; ++i, ++tx_ring) if (!drbr_empty(ifp, tx_ring->br)) { ENA_RING_MTX_LOCK(tx_ring); drbr_flush(ifp, tx_ring->br); ENA_RING_MTX_UNLOCK(tx_ring); } if_qflush(ifp); } /********************************************************************* * Static functions *********************************************************************/ static inline int ena_get_tx_req_id(struct ena_ring *tx_ring, struct ena_com_io_cq *io_cq, uint16_t *req_id) { struct ena_adapter *adapter = tx_ring->adapter; int rc; rc = ena_com_tx_comp_req_id_get(io_cq, req_id); if (rc == ENA_COM_TRY_AGAIN) return (EAGAIN); if (unlikely(rc != 0)) { ena_log(adapter->pdev, ERR, "Invalid req_id: %hu\n", *req_id); counter_u64_add(tx_ring->tx_stats.bad_req_id, 1); goto err; } if (tx_ring->tx_buffer_info[*req_id].mbuf != NULL) return (0); ena_log(adapter->pdev, ERR, "tx_info doesn't have valid mbuf\n"); err: ena_trigger_reset(adapter, ENA_REGS_RESET_INV_TX_REQ_ID); return (EFAULT); } /** * ena_tx_cleanup - clear sent packets and corresponding descriptors * @tx_ring: ring for which we want to clean packets * * Once packets are sent, we ask the device in a loop for no longer used * descriptors. We find the related mbuf chain in a map (index in an array) * and free it, then update ring state. * This is performed in "endless" loop, updating ring pointers every * TX_COMMIT. The first check of free descriptor is performed before the actual * loop, then repeated at the loop end. **/ static int ena_tx_cleanup(struct ena_ring *tx_ring) { struct ena_adapter *adapter; struct ena_com_io_cq* io_cq; uint16_t next_to_clean; uint16_t req_id; uint16_t ena_qid; unsigned int total_done = 0; int rc; int commit = TX_COMMIT; int budget = TX_BUDGET; int work_done; bool above_thresh; adapter = tx_ring->que->adapter; ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; next_to_clean = tx_ring->next_to_clean; #ifdef DEV_NETMAP if (netmap_tx_irq(adapter->ifp, tx_ring->qid) != NM_IRQ_PASS) return (0); #endif /* DEV_NETMAP */ do { struct ena_tx_buffer *tx_info; struct mbuf *mbuf; rc = ena_get_tx_req_id(tx_ring, io_cq, &req_id); if (unlikely(rc != 0)) break; tx_info = &tx_ring->tx_buffer_info[req_id]; mbuf = tx_info->mbuf; tx_info->mbuf = NULL; bintime_clear(&tx_info->timestamp); bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap); ena_log_io(adapter->pdev, DBG, "tx: q %d mbuf %p completed\n", tx_ring->qid, mbuf); m_freem(mbuf); total_done += tx_info->tx_descs; tx_ring->free_tx_ids[next_to_clean] = req_id; next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, tx_ring->ring_size); if (unlikely(--commit == 0)) { commit = TX_COMMIT; /* update ring state every TX_COMMIT descriptor */ tx_ring->next_to_clean = next_to_clean; ena_com_comp_ack( &adapter->ena_dev->io_sq_queues[ena_qid], total_done); ena_com_update_dev_comp_head(io_cq); total_done = 0; } } while (likely(--budget)); work_done = TX_BUDGET - budget; ena_log_io(adapter->pdev, DBG, "tx: q %d done. total pkts: %d\n", tx_ring->qid, work_done); /* If there is still something to commit update ring state */ if (likely(commit != TX_COMMIT)) { tx_ring->next_to_clean = next_to_clean; ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid], total_done); ena_com_update_dev_comp_head(io_cq); } /* * Need to make the rings circular update visible to * ena_xmit_mbuf() before checking for tx_ring->running. */ mb(); above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, ENA_TX_RESUME_THRESH); if (unlikely(!tx_ring->running && above_thresh)) { ENA_RING_MTX_LOCK(tx_ring); above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, ENA_TX_RESUME_THRESH); if (!tx_ring->running && above_thresh) { tx_ring->running = true; counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1); taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task); } ENA_RING_MTX_UNLOCK(tx_ring); } return (work_done); } static void ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx, struct mbuf *mbuf) { struct ena_adapter *adapter = rx_ring->adapter; if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { mbuf->m_pkthdr.flowid = ena_rx_ctx->hash; #ifdef RSS /* * Hardware and software RSS are in agreement only when both are * configured to Toeplitz algorithm. This driver configures * that algorithm only when software RSS is enabled and uses it. */ if (adapter->ena_dev->rss.hash_func != ENA_ADMIN_TOEPLITZ && ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN) { M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); return; } #endif if (ena_rx_ctx->frag && (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) { M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); return; } switch (ena_rx_ctx->l3_proto) { case ENA_ETH_IO_L3_PROTO_IPV4: switch (ena_rx_ctx->l4_proto) { case ENA_ETH_IO_L4_PROTO_TCP: M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4); break; case ENA_ETH_IO_L4_PROTO_UDP: M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4); break; default: M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4); } break; case ENA_ETH_IO_L3_PROTO_IPV6: switch (ena_rx_ctx->l4_proto) { case ENA_ETH_IO_L4_PROTO_TCP: M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6); break; case ENA_ETH_IO_L4_PROTO_UDP: M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6); break; default: M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6); } break; case ENA_ETH_IO_L3_PROTO_UNKNOWN: M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE); break; default: M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); } } else { mbuf->m_pkthdr.flowid = rx_ring->qid; M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE); } } /** * ena_rx_mbuf - assemble mbuf from descriptors * @rx_ring: ring for which we want to clean packets * @ena_bufs: buffer info * @ena_rx_ctx: metadata for this packet(s) * @next_to_clean: ring pointer, will be updated only upon success * **/ static struct mbuf* ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean) { struct mbuf *mbuf; struct ena_rx_buffer *rx_info; struct ena_adapter *adapter; device_t pdev; unsigned int descs = ena_rx_ctx->descs; uint16_t ntc, len, req_id, buf = 0; ntc = *next_to_clean; adapter = rx_ring->adapter; pdev = adapter->pdev; len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; rx_info = &rx_ring->rx_buffer_info[req_id]; if (unlikely(rx_info->mbuf == NULL)) { ena_log(pdev, ERR, "NULL mbuf in rx_info"); return (NULL); } ena_log_io(pdev, DBG, "rx_info %p, mbuf %p, paddr %jx\n", rx_info, rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr); bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_POSTREAD); mbuf = rx_info->mbuf; mbuf->m_flags |= M_PKTHDR; mbuf->m_pkthdr.len = len; mbuf->m_len = len; /* Only for the first segment the data starts at specific offset */ mbuf->m_data = mtodo(mbuf, ena_rx_ctx->pkt_offset); ena_log_io(pdev, DBG, "Mbuf data offset=%u\n", ena_rx_ctx->pkt_offset); mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp; /* Fill mbuf with hash key and it's interpretation for optimization */ ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf); ena_log_io(pdev, DBG, "rx mbuf 0x%p, flags=0x%x, len: %d\n", mbuf, mbuf->m_flags, mbuf->m_pkthdr.len); /* DMA address is not needed anymore, unmap it */ bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map); rx_info->mbuf = NULL; rx_ring->free_rx_ids[ntc] = req_id; ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size); /* * While we have more than 1 descriptors for one rcvd packet, append * other mbufs to the main one */ while (--descs) { ++buf; len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; rx_info = &rx_ring->rx_buffer_info[req_id]; if (unlikely(rx_info->mbuf == NULL)) { ena_log(pdev, ERR, "NULL mbuf in rx_info"); /* * If one of the required mbufs was not allocated yet, * we can break there. * All earlier used descriptors will be reallocated * later and not used mbufs can be reused. * The next_to_clean pointer will not be updated in case * of an error, so caller should advance it manually * in error handling routine to keep it up to date * with hw ring. */ m_freem(mbuf); return (NULL); } bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_POSTREAD); if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) { counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1); ena_log_io(pdev, WARN, "Failed to append Rx mbuf %p\n", mbuf); } ena_log_io(pdev, DBG, "rx mbuf updated. len %d\n", mbuf->m_pkthdr.len); /* Free already appended mbuf, it won't be useful anymore */ bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map); m_freem(rx_info->mbuf); rx_info->mbuf = NULL; rx_ring->free_rx_ids[ntc] = req_id; ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size); } *next_to_clean = ntc; return (mbuf); } /** * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum **/ static inline void ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx, struct mbuf *mbuf) { device_t pdev = rx_ring->adapter->pdev; /* if IP and error */ if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) && ena_rx_ctx->l3_csum_err)) { /* ipv4 checksum error */ mbuf->m_pkthdr.csum_flags = 0; counter_u64_add(rx_ring->rx_stats.csum_bad, 1); ena_log_io(pdev, DBG, "RX IPv4 header checksum error\n"); return; } /* if TCP/UDP */ if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) { if (ena_rx_ctx->l4_csum_err) { /* TCP/UDP checksum error */ mbuf->m_pkthdr.csum_flags = 0; counter_u64_add(rx_ring->rx_stats.csum_bad, 1); ena_log_io(pdev, DBG, "RX L4 checksum error\n"); } else { mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID; counter_u64_add(rx_ring->rx_stats.csum_good, 1); } } } /** * ena_rx_cleanup - handle rx irq * @arg: ring for which irq is being handled **/ static int ena_rx_cleanup(struct ena_ring *rx_ring) { struct ena_adapter *adapter; device_t pdev; struct mbuf *mbuf; struct ena_com_rx_ctx ena_rx_ctx; struct ena_com_io_cq* io_cq; struct ena_com_io_sq* io_sq; enum ena_regs_reset_reason_types reset_reason; if_t ifp; uint16_t ena_qid; uint16_t next_to_clean; uint32_t refill_required; uint32_t refill_threshold; uint32_t do_if_input = 0; unsigned int qid; int rc, i; int budget = RX_BUDGET; #ifdef DEV_NETMAP int done; #endif /* DEV_NETMAP */ adapter = rx_ring->que->adapter; pdev = adapter->pdev; ifp = adapter->ifp; qid = rx_ring->que->id; ena_qid = ENA_IO_RXQ_IDX(qid); io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; io_sq = &adapter->ena_dev->io_sq_queues[ena_qid]; next_to_clean = rx_ring->next_to_clean; #ifdef DEV_NETMAP if (netmap_rx_irq(adapter->ifp, rx_ring->qid, &done) != NM_IRQ_PASS) return (0); #endif /* DEV_NETMAP */ ena_log_io(pdev, DBG, "rx: qid %d\n", qid); do { ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size; ena_rx_ctx.descs = 0; ena_rx_ctx.pkt_offset = 0; bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag, io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD); rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx); if (unlikely(rc != 0)) { if (rc == ENA_COM_NO_SPACE) { counter_u64_add(rx_ring->rx_stats.bad_desc_num, 1); reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS; } else { counter_u64_add(rx_ring->rx_stats.bad_req_id, 1); reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID; } ena_trigger_reset(adapter, reset_reason); return (0); } if (unlikely(ena_rx_ctx.descs == 0)) break; ena_log_io(pdev, DBG, "rx: q %d got packet from ena. " "descs #: %d l3 proto %d l4 proto %d hash: %x\n", rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, ena_rx_ctx.l4_proto, ena_rx_ctx.hash); /* Receive mbuf from the ring */ mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs, &ena_rx_ctx, &next_to_clean); bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag, io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD); /* Exit if we failed to retrieve a buffer */ if (unlikely(mbuf == NULL)) { for (i = 0; i < ena_rx_ctx.descs; ++i) { rx_ring->free_rx_ids[next_to_clean] = rx_ring->ena_bufs[i].req_id; next_to_clean = ENA_RX_RING_IDX_NEXT(next_to_clean, rx_ring->ring_size); } break; } if (((ifp->if_capenable & IFCAP_RXCSUM) != 0) || ((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0)) { ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf); } counter_enter(); counter_u64_add_protected(rx_ring->rx_stats.bytes, mbuf->m_pkthdr.len); counter_u64_add_protected(adapter->hw_stats.rx_bytes, mbuf->m_pkthdr.len); counter_exit(); /* * LRO is only for IP/TCP packets and TCP checksum of the packet * should be computed by hardware. */ do_if_input = 1; if (((ifp->if_capenable & IFCAP_LRO) != 0) && ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) && (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) { /* * Send to the stack if: * - LRO not enabled, or * - no LRO resources, or * - lro enqueue fails */ if ((rx_ring->lro.lro_cnt != 0) && (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0)) do_if_input = 0; } if (do_if_input != 0) { ena_log_io(pdev, DBG, "calling if_input() with mbuf %p\n", mbuf); (*ifp->if_input)(ifp, mbuf); } counter_enter(); counter_u64_add_protected(rx_ring->rx_stats.cnt, 1); counter_u64_add_protected(adapter->hw_stats.rx_packets, 1); counter_exit(); } while (--budget); rx_ring->next_to_clean = next_to_clean; refill_required = ena_com_free_q_entries(io_sq); refill_threshold = min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER, ENA_RX_REFILL_THRESH_PACKET); if (refill_required > refill_threshold) { ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); ena_refill_rx_bufs(rx_ring, refill_required); } tcp_lro_flush_all(&rx_ring->lro); return (RX_BUDGET - budget); } static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf, bool disable_meta_caching) { struct ena_com_tx_meta *ena_meta; struct ether_vlan_header *eh; struct mbuf *mbuf_next; u32 mss; bool offload; uint16_t etype; int ehdrlen; struct ip *ip; int ipproto; int iphlen; struct tcphdr *th; int offset; offload = false; ena_meta = &ena_tx_ctx->ena_meta; mss = mbuf->m_pkthdr.tso_segsz; if (mss != 0) offload = true; if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) offload = true; if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0) offload = true; if ((mbuf->m_pkthdr.csum_flags & CSUM6_OFFLOAD) != 0) offload = true; if (!offload) { if (disable_meta_caching) { memset(ena_meta, 0, sizeof(*ena_meta)); ena_tx_ctx->meta_valid = 1; } else { ena_tx_ctx->meta_valid = 0; } return; } /* Determine where frame payload starts. */ eh = mtod(mbuf, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } mbuf_next = m_getptr(mbuf, ehdrlen, &offset); switch (etype) { case ETHERTYPE_IP: ip = (struct ip *)(mtodo(mbuf_next, offset)); iphlen = ip->ip_hl << 2; ipproto = ip->ip_p; ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; if ((ip->ip_off & htons(IP_DF)) != 0) ena_tx_ctx->df = 1; break; case ETHERTYPE_IPV6: ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; iphlen = ip6_lasthdr(mbuf, ehdrlen, IPPROTO_IPV6, &ipproto); iphlen -= ehdrlen; ena_tx_ctx->df = 1; break; default: iphlen = 0; ipproto = 0; break; } mbuf_next = m_getptr(mbuf, iphlen + ehdrlen, &offset); th = (struct tcphdr *)(mtodo(mbuf_next, offset)); if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) { ena_tx_ctx->l3_csum_enable = 1; } if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) { ena_tx_ctx->tso_enable = 1; ena_meta->l4_hdr_len = (th->th_off); } if (ipproto == IPPROTO_TCP) { ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; if ((mbuf->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) != 0) ena_tx_ctx->l4_csum_enable = 1; else ena_tx_ctx->l4_csum_enable = 0; } else if (ipproto == IPPROTO_UDP) { ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; if ((mbuf->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) != 0) ena_tx_ctx->l4_csum_enable = 1; else ena_tx_ctx->l4_csum_enable = 0; } else { ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; ena_tx_ctx->l4_csum_enable = 0; } ena_meta->mss = mss; ena_meta->l3_hdr_len = iphlen; ena_meta->l3_hdr_offset = ehdrlen; ena_tx_ctx->meta_valid = 1; } static int ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf) { struct ena_adapter *adapter; struct mbuf *collapsed_mbuf; int num_frags; adapter = tx_ring->adapter; num_frags = ena_mbuf_count(*mbuf); /* One segment must be reserved for configuration descriptor. */ if (num_frags < adapter->max_tx_sgl_size) return (0); if ((num_frags == adapter->max_tx_sgl_size) && ((*mbuf)->m_pkthdr.len < tx_ring->tx_max_header_size)) return (0); counter_u64_add(tx_ring->tx_stats.collapse, 1); collapsed_mbuf = m_collapse(*mbuf, M_NOWAIT, adapter->max_tx_sgl_size - 1); if (unlikely(collapsed_mbuf == NULL)) { counter_u64_add(tx_ring->tx_stats.collapse_err, 1); return (ENOMEM); } /* If mbuf was collapsed succesfully, original mbuf is released. */ *mbuf = collapsed_mbuf; return (0); } static int ena_tx_map_mbuf(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info, struct mbuf *mbuf, void **push_hdr, u16 *header_len) { struct ena_adapter *adapter = tx_ring->adapter; struct ena_com_buf *ena_buf; bus_dma_segment_t segs[ENA_BUS_DMA_SEGS]; size_t iseg = 0; uint32_t mbuf_head_len; uint16_t offset; int rc, nsegs; mbuf_head_len = mbuf->m_len; tx_info->mbuf = mbuf; ena_buf = tx_info->bufs; /* * For easier maintaining of the DMA map, map the whole mbuf even if * the LLQ is used. The descriptors will be filled using the segments. */ rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag, tx_info->dmamap, mbuf, segs, &nsegs, BUS_DMA_NOWAIT); if (unlikely((rc != 0) || (nsegs == 0))) { ena_log_io(adapter->pdev, WARN, "dmamap load failed! err: %d nsegs: %d\n", rc, nsegs); goto dma_error; } if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { /* * When the device is LLQ mode, the driver will copy * the header into the device memory space. * the ena_com layer assumes the header is in a linear * memory space. * This assumption might be wrong since part of the header * can be in the fragmented buffers. * First check if header fits in the mbuf. If not, copy it to * separate buffer that will be holding linearized data. */ *header_len = min_t(uint32_t, mbuf->m_pkthdr.len, tx_ring->tx_max_header_size); /* If header is in linear space, just point into mbuf's data. */ if (likely(*header_len <= mbuf_head_len)) { *push_hdr = mbuf->m_data; /* * Otherwise, copy whole portion of header from multiple mbufs * to intermediate buffer. */ } else { m_copydata(mbuf, 0, *header_len, tx_ring->push_buf_intermediate_buf); *push_hdr = tx_ring->push_buf_intermediate_buf; counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1); } ena_log_io(adapter->pdev, DBG, "mbuf: %p ""header_buf->vaddr: %p " "push_len: %d\n", mbuf, *push_hdr, *header_len); /* If packet is fitted in LLQ header, no need for DMA segments. */ if (mbuf->m_pkthdr.len <= tx_ring->tx_max_header_size) { return (0); } else { offset = tx_ring->tx_max_header_size; /* * As Header part is mapped to LLQ header, we can skip it and just * map the residuum of the mbuf to DMA Segments. */ while (offset > 0) { if (offset >= segs[iseg].ds_len) { offset -= segs[iseg].ds_len; } else { ena_buf->paddr = segs[iseg].ds_addr + offset; ena_buf->len = segs[iseg].ds_len - offset; ena_buf++; tx_info->num_of_bufs++; offset = 0; } iseg++; } } } else { *push_hdr = NULL; /* * header_len is just a hint for the device. Because FreeBSD is not * giving us information about packet header length and it is not * guaranteed that all packet headers will be in the 1st mbuf, setting * header_len to 0 is making the device ignore this value and resolve * header on it's own. */ *header_len = 0; } /* Map rest of the mbuf */ while (iseg < nsegs) { ena_buf->paddr = segs[iseg].ds_addr; ena_buf->len = segs[iseg].ds_len; ena_buf++; iseg++; tx_info->num_of_bufs++; } return (0); dma_error: counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1); tx_info->mbuf = NULL; return (rc); } static int ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf) { struct ena_adapter *adapter; device_t pdev; struct ena_tx_buffer *tx_info; struct ena_com_tx_ctx ena_tx_ctx; struct ena_com_dev *ena_dev; struct ena_com_io_sq* io_sq; void *push_hdr; uint16_t next_to_use; uint16_t req_id; uint16_t ena_qid; uint16_t header_len; int rc; int nb_hw_desc; ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); adapter = tx_ring->que->adapter; pdev = adapter->pdev; ena_dev = adapter->ena_dev; io_sq = &ena_dev->io_sq_queues[ena_qid]; rc = ena_check_and_collapse_mbuf(tx_ring, mbuf); if (unlikely(rc != 0)) { ena_log_io(pdev, WARN, "Failed to collapse mbuf! err: %d\n", rc); return (rc); } ena_log_io(pdev, DBG, "Tx: %d bytes\n", (*mbuf)->m_pkthdr.len); next_to_use = tx_ring->next_to_use; req_id = tx_ring->free_tx_ids[next_to_use]; tx_info = &tx_ring->tx_buffer_info[req_id]; tx_info->num_of_bufs = 0; ENA_WARN(tx_info->mbuf != NULL, adapter->ena_dev, "mbuf isn't NULL for req_id %d\n", req_id); rc = ena_tx_map_mbuf(tx_ring, tx_info, *mbuf, &push_hdr, &header_len); if (unlikely(rc != 0)) { ena_log_io(pdev, WARN, "Failed to map TX mbuf\n"); return (rc); } memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx)); ena_tx_ctx.ena_bufs = tx_info->bufs; ena_tx_ctx.push_header = push_hdr; ena_tx_ctx.num_bufs = tx_info->num_of_bufs; ena_tx_ctx.req_id = req_id; ena_tx_ctx.header_len = header_len; /* Set flags and meta data */ ena_tx_csum(&ena_tx_ctx, *mbuf, adapter->disable_meta_caching); if (tx_ring->acum_pkts == DB_THRESHOLD || ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx)) { ena_log_io(pdev, DBG, "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", tx_ring->que->id); - ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); - counter_u64_add(tx_ring->tx_stats.doorbells, 1); - tx_ring->acum_pkts = 0; + ena_ring_tx_doorbell(tx_ring); } /* Prepare the packet's descriptors and send them to device */ rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc); if (unlikely(rc != 0)) { if (likely(rc == ENA_COM_NO_MEM)) { ena_log_io(pdev, DBG, "tx ring[%d] is out of space\n", tx_ring->que->id); } else { ena_log(pdev, ERR, "failed to prepare tx bufs\n"); ena_trigger_reset(adapter, ENA_REGS_RESET_DRIVER_INVALID_STATE); } counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1); goto dma_error; } counter_enter(); counter_u64_add_protected(tx_ring->tx_stats.cnt, 1); counter_u64_add_protected(tx_ring->tx_stats.bytes, (*mbuf)->m_pkthdr.len); counter_u64_add_protected(adapter->hw_stats.tx_packets, 1); counter_u64_add_protected(adapter->hw_stats.tx_bytes, (*mbuf)->m_pkthdr.len); counter_exit(); tx_info->tx_descs = nb_hw_desc; getbinuptime(&tx_info->timestamp); tx_info->print_once = true; tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, tx_ring->ring_size); /* stop the queue when no more space available, the packet can have up * to sgl_size + 2. one for the meta descriptor and one for header * (if the header is larger than tx_max_header_size). */ if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, adapter->max_tx_sgl_size + 2))) { ena_log_io(pdev, DBG, "Stop queue %d\n", tx_ring->que->id); tx_ring->running = false; counter_u64_add(tx_ring->tx_stats.queue_stop, 1); /* There is a rare condition where this function decides to * stop the queue but meanwhile tx_cleanup() updates * next_to_completion and terminates. * The queue will remain stopped forever. * To solve this issue this function performs mb(), checks * the wakeup condition and wakes up the queue if needed. */ mb(); if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, ENA_TX_RESUME_THRESH)) { tx_ring->running = true; counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1); } } bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap, BUS_DMASYNC_PREWRITE); return (0); dma_error: tx_info->mbuf = NULL; bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap); return (rc); } static void ena_start_xmit(struct ena_ring *tx_ring) { struct mbuf *mbuf; struct ena_adapter *adapter = tx_ring->adapter; - struct ena_com_io_sq* io_sq; int ena_qid; int ret = 0; ENA_RING_MTX_ASSERT(tx_ring); if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0)) return; if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))) return; ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); - io_sq = &adapter->ena_dev->io_sq_queues[ena_qid]; while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) { ena_log_io(adapter->pdev, DBG, "\ndequeued mbuf %p with flags %#x and header csum flags %#jx\n", mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags); if (unlikely(!tx_ring->running)) { drbr_putback(adapter->ifp, tx_ring->br, mbuf); break; } if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) { if (ret == ENA_COM_NO_MEM) { drbr_putback(adapter->ifp, tx_ring->br, mbuf); } else if (ret == ENA_COM_NO_SPACE) { drbr_putback(adapter->ifp, tx_ring->br, mbuf); } else { m_freem(mbuf); drbr_advance(adapter->ifp, tx_ring->br); } break; } drbr_advance(adapter->ifp, tx_ring->br); if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0)) return; tx_ring->acum_pkts++; BPF_MTAP(adapter->ifp, mbuf); } if (likely(tx_ring->acum_pkts != 0)) { /* Trigger the dma engine */ - ena_com_write_sq_doorbell(io_sq); - counter_u64_add(tx_ring->tx_stats.doorbells, 1); - tx_ring->acum_pkts = 0; + ena_ring_tx_doorbell(tx_ring); } if (unlikely(!tx_ring->running)) taskqueue_enqueue(tx_ring->que->cleanup_tq, &tx_ring->que->cleanup_task); } diff --git a/sys/dev/ena/ena_netmap.c b/sys/dev/ena/ena_netmap.c index 1525b1efd954..e9511b033fd3 100644 --- a/sys/dev/ena/ena_netmap.c +++ b/sys/dev/ena/ena_netmap.c @@ -1,1103 +1,1098 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #ifdef DEV_NETMAP #include "ena.h" #include "ena_netmap.h" #define ENA_NETMAP_MORE_FRAMES 1 #define ENA_NETMAP_NO_MORE_FRAMES 0 #define ENA_MAX_FRAMES 16384 struct ena_netmap_ctx { struct netmap_kring *kring; struct ena_adapter *adapter; struct netmap_adapter *na; struct netmap_slot *slots; struct ena_ring *ring; struct ena_com_io_cq *io_cq; struct ena_com_io_sq *io_sq; u_int nm_i; uint16_t nt; uint16_t lim; }; /* Netmap callbacks */ static int ena_netmap_reg(struct netmap_adapter *, int); static int ena_netmap_txsync(struct netmap_kring *, int); static int ena_netmap_rxsync(struct netmap_kring *, int); /* Helper functions */ static int ena_netmap_tx_frames(struct ena_netmap_ctx *); static int ena_netmap_tx_frame(struct ena_netmap_ctx *); static inline uint16_t ena_netmap_count_slots(struct ena_netmap_ctx *); static inline uint16_t ena_netmap_packet_len(struct netmap_slot *, u_int, uint16_t); static int ena_netmap_copy_data(struct netmap_adapter *, struct netmap_slot *, u_int, uint16_t, uint16_t, void *); static int ena_netmap_map_single_slot(struct netmap_adapter *, struct netmap_slot *, bus_dma_tag_t, bus_dmamap_t, void **, uint64_t *); static int ena_netmap_tx_map_slots(struct ena_netmap_ctx *, struct ena_tx_buffer *, void **, uint16_t *, uint16_t *); static void ena_netmap_unmap_last_socket_chain(struct ena_netmap_ctx *, struct ena_tx_buffer *); static void ena_netmap_tx_cleanup(struct ena_netmap_ctx *); static uint16_t ena_netmap_tx_clean_one(struct ena_netmap_ctx *, uint16_t); static inline int validate_tx_req_id(struct ena_ring *, uint16_t); static int ena_netmap_rx_frames(struct ena_netmap_ctx *); static int ena_netmap_rx_frame(struct ena_netmap_ctx *); static int ena_netmap_rx_load_desc(struct ena_netmap_ctx *, uint16_t, int *); static void ena_netmap_rx_cleanup(struct ena_netmap_ctx *); static void ena_netmap_fill_ctx(struct netmap_kring *, struct ena_netmap_ctx *, uint16_t); int ena_netmap_attach(struct ena_adapter *adapter) { struct netmap_adapter na; ena_log_nm(adapter->pdev, INFO, "netmap attach\n"); bzero(&na, sizeof(na)); na.na_flags = NAF_MOREFRAG; na.ifp = adapter->ifp; na.num_tx_desc = adapter->requested_tx_ring_size; na.num_rx_desc = adapter->requested_rx_ring_size; na.num_tx_rings = adapter->num_io_queues; na.num_rx_rings = adapter->num_io_queues; na.rx_buf_maxsize = adapter->buf_ring_size; na.nm_txsync = ena_netmap_txsync; na.nm_rxsync = ena_netmap_rxsync; na.nm_register = ena_netmap_reg; return (netmap_attach(&na)); } int ena_netmap_alloc_rx_slot(struct ena_adapter *adapter, struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info) { struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring; struct netmap_ring *ring; struct netmap_slot *slot; void *addr; uint64_t paddr; int nm_i, qid, head, lim, rc; /* if previously allocated frag is not used */ if (unlikely(rx_info->netmap_buf_idx != 0)) return (0); qid = rx_ring->qid; kring = na->rx_rings[qid]; nm_i = kring->nr_hwcur; head = kring->rhead; ena_log_nm(adapter->pdev, DBG, "nr_hwcur: %d, nr_hwtail: %d, " "rhead: %d, rcur: %d, rtail: %d\n", kring->nr_hwcur, kring->nr_hwtail, kring->rhead, kring->rcur, kring->rtail); if ((nm_i == head) && rx_ring->initialized) { ena_log_nm(adapter->pdev, ERR, "No free slots in netmap ring\n"); return (ENOMEM); } ring = kring->ring; if (ring == NULL) { ena_log_nm(adapter->pdev, ERR, "Rx ring %d is NULL\n", qid); return (EFAULT); } slot = &ring->slot[nm_i]; addr = PNMB(na, slot, &paddr); if (addr == NETMAP_BUF_BASE(na)) { ena_log_nm(adapter->pdev, ERR, "Bad buff in slot\n"); return (EFAULT); } rc = netmap_load_map(na, adapter->rx_buf_tag, rx_info->map, addr); if (rc != 0) { ena_log_nm(adapter->pdev, WARN, "DMA mapping error\n"); return (rc); } bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_PREREAD); rx_info->ena_buf.paddr = paddr; rx_info->ena_buf.len = ring->nr_buf_size; rx_info->mbuf = NULL; rx_info->netmap_buf_idx = slot->buf_idx; slot->buf_idx = 0; lim = kring->nkr_num_slots - 1; kring->nr_hwcur = nm_next(nm_i, lim); return (0); } void ena_netmap_free_rx_slot(struct ena_adapter *adapter, struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info) { struct netmap_adapter *na; struct netmap_kring *kring; struct netmap_slot *slot; int nm_i, qid, lim; na = NA(adapter->ifp); if (na == NULL) { ena_log_nm(adapter->pdev, ERR, "netmap adapter is NULL\n"); return; } if (na->rx_rings == NULL) { ena_log_nm(adapter->pdev, ERR, "netmap rings are NULL\n"); return; } qid = rx_ring->qid; kring = na->rx_rings[qid]; if (kring == NULL) { ena_log_nm(adapter->pdev, ERR, "netmap kernel ring %d is NULL\n", qid); return; } lim = kring->nkr_num_slots - 1; nm_i = nm_prev(kring->nr_hwcur, lim); if (kring->nr_mode != NKR_NETMAP_ON) return; bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_POSTREAD); netmap_unload_map(na, adapter->rx_buf_tag, rx_info->map); KASSERT(kring->ring == NULL, ("Netmap Rx ring is NULL\n")); slot = &kring->ring->slot[nm_i]; ENA_WARN(slot->buf_idx != 0, adapter->ena_dev, "Overwrite slot buf\n"); slot->buf_idx = rx_info->netmap_buf_idx; slot->flags = NS_BUF_CHANGED; rx_info->netmap_buf_idx = 0; kring->nr_hwcur = nm_i; } static bool ena_ring_in_netmap(struct ena_adapter *adapter, int qid, enum txrx x) { struct netmap_adapter *na; struct netmap_kring *kring; if (adapter->ifp->if_capenable & IFCAP_NETMAP) { na = NA(adapter->ifp); kring = (x == NR_RX) ? na->rx_rings[qid] : na->tx_rings[qid]; if (kring->nr_mode == NKR_NETMAP_ON) return true; } return false; } bool ena_tx_ring_in_netmap(struct ena_adapter *adapter, int qid) { return ena_ring_in_netmap(adapter, qid, NR_TX); } bool ena_rx_ring_in_netmap(struct ena_adapter *adapter, int qid) { return ena_ring_in_netmap(adapter, qid, NR_RX); } static void ena_netmap_reset_ring(struct ena_adapter *adapter, int qid, enum txrx x) { if (!ena_ring_in_netmap(adapter, qid, x)) return; netmap_reset(NA(adapter->ifp), x, qid, 0); ena_log_nm(adapter->pdev, INFO, "%s ring %d is in netmap mode\n", (x == NR_TX) ? "Tx" : "Rx", qid); } void ena_netmap_reset_rx_ring(struct ena_adapter *adapter, int qid) { ena_netmap_reset_ring(adapter, qid, NR_RX); } void ena_netmap_reset_tx_ring(struct ena_adapter *adapter, int qid) { ena_netmap_reset_ring(adapter, qid, NR_TX); } static int ena_netmap_reg(struct netmap_adapter *na, int onoff) { struct ifnet *ifp = na->ifp; struct ena_adapter* adapter = ifp->if_softc; device_t pdev = adapter->pdev; struct netmap_kring *kring; enum txrx t; int rc, i; ENA_LOCK_LOCK(); ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter); ena_down(adapter); if (onoff) { ena_log_nm(pdev, INFO, "netmap on\n"); for_rx_tx(t) { for (i = 0; i <= nma_get_nrings(na, t); i++) { kring = NMR(na, t)[i]; if (nm_kring_pending_on(kring)) { kring->nr_mode = NKR_NETMAP_ON; } } } nm_set_native_flags(na); } else { ena_log_nm(pdev, INFO, "netmap off\n"); nm_clear_native_flags(na); for_rx_tx(t) { for (i = 0; i <= nma_get_nrings(na, t); i++) { kring = NMR(na, t)[i]; if (nm_kring_pending_off(kring)) { kring->nr_mode = NKR_NETMAP_OFF; } } } } rc = ena_up(adapter); if (rc != 0) { ena_log_nm(pdev, WARN, "ena_up failed with rc=%d\n", rc); adapter->reset_reason = ENA_REGS_RESET_DRIVER_INVALID_STATE; nm_clear_native_flags(na); ena_destroy_device(adapter, false); ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter); rc = ena_restore_device(adapter); } ENA_LOCK_UNLOCK(); return (rc); } static int ena_netmap_txsync(struct netmap_kring *kring, int flags) { struct ena_netmap_ctx ctx; int rc = 0; ena_netmap_fill_ctx(kring, &ctx, ENA_IO_TXQ_IDX(kring->ring_id)); ctx.ring = &ctx.adapter->tx_ring[kring->ring_id]; ENA_RING_MTX_LOCK(ctx.ring); if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, ctx.adapter))) goto txsync_end; if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, ctx.adapter))) goto txsync_end; rc = ena_netmap_tx_frames(&ctx); ena_netmap_tx_cleanup(&ctx); txsync_end: ENA_RING_MTX_UNLOCK(ctx.ring); return (rc); } static int ena_netmap_tx_frames(struct ena_netmap_ctx *ctx) { struct ena_ring *tx_ring = ctx->ring; int rc = 0; ctx->nm_i = ctx->kring->nr_hwcur; ctx->nt = ctx->ring->next_to_use; __builtin_prefetch(&ctx->slots[ctx->nm_i]); while (ctx->nm_i != ctx->kring->rhead) { if ((rc = ena_netmap_tx_frame(ctx)) != 0) { /* * When there is no empty space in Tx ring, error is * still being returned. It should not be passed to the * netmap, as application knows current ring state from * netmap ring pointers. Returning error there could * cause application to exit, but the Tx ring is commonly * being full. */ if (rc == ENA_COM_NO_MEM) rc = 0; break; } tx_ring->acum_pkts++; } /* If any packet was sent... */ if (likely(ctx->nm_i != ctx->kring->nr_hwcur)) { /* ...send the doorbell to the device. */ - ena_com_write_sq_doorbell(ctx->io_sq); - counter_u64_add(ctx->ring->tx_stats.doorbells, 1); - tx_ring->acum_pkts = 0; + ena_ring_tx_doorbell(tx_ring); ctx->ring->next_to_use = ctx->nt; ctx->kring->nr_hwcur = ctx->nm_i; } return (rc); } static int ena_netmap_tx_frame(struct ena_netmap_ctx *ctx) { struct ena_com_tx_ctx ena_tx_ctx; struct ena_adapter *adapter; struct ena_ring *tx_ring; struct ena_tx_buffer *tx_info; uint16_t req_id; uint16_t header_len; uint16_t packet_len; int nb_hw_desc; int rc; void *push_hdr; adapter = ctx->adapter; if (ena_netmap_count_slots(ctx) > adapter->max_tx_sgl_size) { ena_log_nm(adapter->pdev, WARN, "Too many slots per packet\n"); return (EINVAL); } tx_ring = ctx->ring; req_id = tx_ring->free_tx_ids[ctx->nt]; tx_info = &tx_ring->tx_buffer_info[req_id]; tx_info->num_of_bufs = 0; tx_info->nm_info.sockets_used = 0; rc = ena_netmap_tx_map_slots(ctx, tx_info, &push_hdr, &header_len, &packet_len); if (unlikely(rc != 0)) { ena_log_nm(adapter->pdev, ERR, "Failed to map Tx slot\n"); return (rc); } bzero(&ena_tx_ctx, sizeof(struct ena_com_tx_ctx)); ena_tx_ctx.ena_bufs = tx_info->bufs; ena_tx_ctx.push_header = push_hdr; ena_tx_ctx.num_bufs = tx_info->num_of_bufs; ena_tx_ctx.req_id = req_id; ena_tx_ctx.header_len = header_len; ena_tx_ctx.meta_valid = adapter->disable_meta_caching; /* There are no any offloads, as the netmap doesn't support them */ if (tx_ring->acum_pkts == DB_THRESHOLD || - ena_com_is_doorbell_needed(ctx->io_sq, &ena_tx_ctx)) { - ena_com_write_sq_doorbell(ctx->io_sq); - counter_u64_add(tx_ring->tx_stats.doorbells, 1); - tx_ring->acum_pkts = 0; - } + ena_com_is_doorbell_needed(ctx->io_sq, &ena_tx_ctx)) + ena_ring_tx_doorbell(tx_ring); rc = ena_com_prepare_tx(ctx->io_sq, &ena_tx_ctx, &nb_hw_desc); if (unlikely(rc != 0)) { if (likely(rc == ENA_COM_NO_MEM)) { ena_log_nm(adapter->pdev, DBG, "Tx ring[%d] is out of space\n", tx_ring->que->id); } else { ena_log_nm(adapter->pdev, ERR, "Failed to prepare Tx bufs\n"); ena_trigger_reset(adapter, ENA_REGS_RESET_DRIVER_INVALID_STATE); } counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1); ena_netmap_unmap_last_socket_chain(ctx, tx_info); return (rc); } counter_enter(); counter_u64_add_protected(tx_ring->tx_stats.cnt, 1); counter_u64_add_protected(tx_ring->tx_stats.bytes, packet_len); counter_u64_add_protected(adapter->hw_stats.tx_packets, 1); counter_u64_add_protected(adapter->hw_stats.tx_bytes, packet_len); counter_exit(); tx_info->tx_descs = nb_hw_desc; ctx->nt = ENA_TX_RING_IDX_NEXT(ctx->nt, ctx->ring->ring_size); for (unsigned int i = 0; i < tx_info->num_of_bufs; i++) bus_dmamap_sync(adapter->tx_buf_tag, tx_info->nm_info.map_seg[i], BUS_DMASYNC_PREWRITE); return (0); } static inline uint16_t ena_netmap_count_slots(struct ena_netmap_ctx *ctx) { uint16_t slots = 1; uint16_t nm = ctx->nm_i; while ((ctx->slots[nm].flags & NS_MOREFRAG) != 0) { slots++; nm = nm_next(nm, ctx->lim); } return slots; } static inline uint16_t ena_netmap_packet_len(struct netmap_slot *slots, u_int slot_index, uint16_t limit) { struct netmap_slot *nm_slot; uint16_t packet_size = 0; do { nm_slot = &slots[slot_index]; packet_size += nm_slot->len; slot_index = nm_next(slot_index, limit); } while ((nm_slot->flags & NS_MOREFRAG) != 0); return packet_size; } static int ena_netmap_copy_data(struct netmap_adapter *na, struct netmap_slot *slots, u_int slot_index, uint16_t limit, uint16_t bytes_to_copy, void *destination) { struct netmap_slot *nm_slot; void *slot_vaddr; uint16_t packet_size; uint16_t data_amount; packet_size = 0; do { nm_slot = &slots[slot_index]; slot_vaddr = NMB(na, nm_slot); if (unlikely(slot_vaddr == NULL)) return (EINVAL); data_amount = min_t(uint16_t, bytes_to_copy, nm_slot->len); memcpy(destination, slot_vaddr, data_amount); bytes_to_copy -= data_amount; slot_index = nm_next(slot_index, limit); } while ((nm_slot->flags & NS_MOREFRAG) != 0 && bytes_to_copy > 0); return (0); } static int ena_netmap_map_single_slot(struct netmap_adapter *na, struct netmap_slot *slot, bus_dma_tag_t dmatag, bus_dmamap_t dmamap, void **vaddr, uint64_t *paddr) { device_t pdev; int rc; pdev = ((struct ena_adapter *)na->ifp->if_softc)->pdev; *vaddr = PNMB(na, slot, paddr); if (unlikely(vaddr == NULL)) { ena_log_nm(pdev, ERR, "Slot address is NULL\n"); return (EINVAL); } rc = netmap_load_map(na, dmatag, dmamap, *vaddr); if (unlikely(rc != 0)) { ena_log_nm(pdev, ERR, "Failed to map slot %d for DMA\n", slot->buf_idx); return (EINVAL); } return (0); } static int ena_netmap_tx_map_slots(struct ena_netmap_ctx *ctx, struct ena_tx_buffer *tx_info, void **push_hdr, uint16_t *header_len, uint16_t *packet_len) { struct netmap_slot *slot; struct ena_com_buf *ena_buf; struct ena_adapter *adapter; struct ena_ring *tx_ring; struct ena_netmap_tx_info *nm_info; bus_dmamap_t *nm_maps; void *vaddr; uint64_t paddr; uint32_t *nm_buf_idx; uint32_t slot_head_len; uint32_t frag_len; uint32_t remaining_len; uint16_t push_len; uint16_t delta; int rc; adapter = ctx->adapter; tx_ring = ctx->ring; ena_buf = tx_info->bufs; nm_info = &tx_info->nm_info; nm_maps = nm_info->map_seg; nm_buf_idx = nm_info->socket_buf_idx; slot = &ctx->slots[ctx->nm_i]; slot_head_len = slot->len; *packet_len = ena_netmap_packet_len(ctx->slots, ctx->nm_i, ctx->lim); remaining_len = *packet_len; delta = 0; __builtin_prefetch(&ctx->slots[ctx->nm_i + 1]); if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { /* * When the device is in LLQ mode, the driver will copy * the header into the device memory space. * The ena_com layer assumes that the header is in a linear * memory space. * This assumption might be wrong since part of the header * can be in the fragmented buffers. * First, check if header fits in the first slot. If not, copy * it to separate buffer that will be holding linearized data. */ push_len = min_t(uint32_t, *packet_len, tx_ring->tx_max_header_size); *header_len = push_len; /* If header is in linear space, just point to socket's data. */ if (likely(push_len <= slot_head_len)) { *push_hdr = NMB(ctx->na, slot); if (unlikely(push_hdr == NULL)) { ena_log_nm(adapter->pdev, ERR, "Slot vaddress is NULL\n"); return (EINVAL); } /* * Otherwise, copy whole portion of header from multiple slots * to intermediate buffer. */ } else { rc = ena_netmap_copy_data(ctx->na, ctx->slots, ctx->nm_i, ctx->lim, push_len, tx_ring->push_buf_intermediate_buf); if (unlikely(rc)) { ena_log_nm(adapter->pdev, ERR, "Failed to copy data from slots to push_buf\n"); return (EINVAL); } *push_hdr = tx_ring->push_buf_intermediate_buf; counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1); delta = push_len - slot_head_len; } ena_log_nm(adapter->pdev, DBG, "slot: %d header_buf->vaddr: %p push_len: %d\n", slot->buf_idx, *push_hdr, push_len); /* * If header was in linear memory space, map for the dma rest of the data * in the first mbuf of the mbuf chain. */ if (slot_head_len > push_len) { rc = ena_netmap_map_single_slot(ctx->na, slot, adapter->tx_buf_tag, *nm_maps, &vaddr, &paddr); if (unlikely(rc != 0)) { ena_log_nm(adapter->pdev, ERR, "DMA mapping error\n"); return (rc); } nm_maps++; ena_buf->paddr = paddr + push_len; ena_buf->len = slot->len - push_len; ena_buf++; tx_info->num_of_bufs++; } remaining_len -= slot->len; /* Save buf idx before advancing */ *nm_buf_idx = slot->buf_idx; nm_buf_idx++; slot->buf_idx = 0; /* Advance to the next socket */ ctx->nm_i = nm_next(ctx->nm_i, ctx->lim); slot = &ctx->slots[ctx->nm_i]; nm_info->sockets_used++; /* * If header is in non linear space (delta > 0), then skip mbufs * containing header and map the last one containing both header * and the packet data. * The first segment is already counted in. */ while (delta > 0) { __builtin_prefetch(&ctx->slots[ctx->nm_i + 1]); frag_len = slot->len; /* * If whole segment contains header just move to the * next one and reduce delta. */ if (unlikely(delta >= frag_len)) { delta -= frag_len; } else { /* * Map the data and then assign it with the * offsets */ rc = ena_netmap_map_single_slot(ctx->na, slot, adapter->tx_buf_tag, *nm_maps, &vaddr, &paddr); if (unlikely(rc != 0)) { ena_log_nm(adapter->pdev, ERR, "DMA mapping error\n"); goto error_map; } nm_maps++; ena_buf->paddr = paddr + delta; ena_buf->len = slot->len - delta; ena_buf++; tx_info->num_of_bufs++; delta = 0; } remaining_len -= slot->len; /* Save buf idx before advancing */ *nm_buf_idx = slot->buf_idx; nm_buf_idx++; slot->buf_idx = 0; /* Advance to the next socket */ ctx->nm_i = nm_next(ctx->nm_i, ctx->lim); slot = &ctx->slots[ctx->nm_i]; nm_info->sockets_used++; } } else { *push_hdr = NULL; /* * header_len is just a hint for the device. Because netmap is * not giving us any information about packet header length and * it is not guaranteed that all packet headers will be in the * 1st slot, setting header_len to 0 is making the device ignore * this value and resolve header on it's own. */ *header_len = 0; } /* Map all remaining data (regular routine for non-LLQ mode) */ while (remaining_len > 0) { __builtin_prefetch(&ctx->slots[ctx->nm_i + 1]); rc = ena_netmap_map_single_slot(ctx->na, slot, adapter->tx_buf_tag, *nm_maps, &vaddr, &paddr); if (unlikely(rc != 0)) { ena_log_nm(adapter->pdev, ERR, "DMA mapping error\n"); goto error_map; } nm_maps++; ena_buf->paddr = paddr; ena_buf->len = slot->len; ena_buf++; tx_info->num_of_bufs++; remaining_len -= slot->len; /* Save buf idx before advancing */ *nm_buf_idx = slot->buf_idx; nm_buf_idx++; slot->buf_idx = 0; /* Advance to the next socket */ ctx->nm_i = nm_next(ctx->nm_i, ctx->lim); slot = &ctx->slots[ctx->nm_i]; nm_info->sockets_used++; } return (0); error_map: ena_netmap_unmap_last_socket_chain(ctx, tx_info); return (rc); } static void ena_netmap_unmap_last_socket_chain(struct ena_netmap_ctx *ctx, struct ena_tx_buffer *tx_info) { struct ena_netmap_tx_info *nm_info; int n; nm_info = &tx_info->nm_info; /** * As the used sockets must not be equal to the buffers used in the LLQ * mode, they must be treated separately. * First, unmap the DMA maps. */ n = tx_info->num_of_bufs; while (n--) { netmap_unload_map(ctx->na, ctx->adapter->tx_buf_tag, nm_info->map_seg[n]); } tx_info->num_of_bufs = 0; /* Next, retain the sockets back to the userspace */ n = nm_info->sockets_used; while (n--) { ctx->slots[ctx->nm_i].buf_idx = nm_info->socket_buf_idx[n]; ctx->slots[ctx->nm_i].flags = NS_BUF_CHANGED; nm_info->socket_buf_idx[n] = 0; ctx->nm_i = nm_prev(ctx->nm_i, ctx->lim); } nm_info->sockets_used = 0; } static void ena_netmap_tx_cleanup(struct ena_netmap_ctx *ctx) { uint16_t req_id; uint16_t total_tx_descs = 0; ctx->nm_i = ctx->kring->nr_hwtail; ctx->nt = ctx->ring->next_to_clean; /* Reclaim buffers for completed transmissions */ while (ena_com_tx_comp_req_id_get(ctx->io_cq, &req_id) >= 0) { if (validate_tx_req_id(ctx->ring, req_id) != 0) break; total_tx_descs += ena_netmap_tx_clean_one(ctx, req_id); } ctx->kring->nr_hwtail = ctx->nm_i; if (total_tx_descs > 0) { /* acknowledge completion of sent packets */ ctx->ring->next_to_clean = ctx->nt; ena_com_comp_ack(ctx->ring->ena_com_io_sq, total_tx_descs); ena_com_update_dev_comp_head(ctx->ring->ena_com_io_cq); } } static uint16_t ena_netmap_tx_clean_one(struct ena_netmap_ctx *ctx, uint16_t req_id) { struct ena_tx_buffer *tx_info; struct ena_netmap_tx_info *nm_info; int n; tx_info = &ctx->ring->tx_buffer_info[req_id]; nm_info = &tx_info->nm_info; /** * As the used sockets must not be equal to the buffers used in the LLQ * mode, they must be treated separately. * First, unmap the DMA maps. */ n = tx_info->num_of_bufs; for (n = 0; n < tx_info->num_of_bufs; n++) { netmap_unload_map(ctx->na, ctx->adapter->tx_buf_tag, nm_info->map_seg[n]); } tx_info->num_of_bufs = 0; /* Next, retain the sockets back to the userspace */ for (n = 0; n < nm_info->sockets_used; n++) { ctx->nm_i = nm_next(ctx->nm_i, ctx->lim); ENA_WARN(ctx->slots[ctx->nm_i].buf_idx != 0, ctx->adapter->ena_dev, "Tx idx is not 0.\n"); ctx->slots[ctx->nm_i].buf_idx = nm_info->socket_buf_idx[n]; ctx->slots[ctx->nm_i].flags = NS_BUF_CHANGED; nm_info->socket_buf_idx[n] = 0; } nm_info->sockets_used = 0; ctx->ring->free_tx_ids[ctx->nt] = req_id; ctx->nt = ENA_TX_RING_IDX_NEXT(ctx->nt, ctx->lim); return tx_info->tx_descs; } static inline int validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id) { struct ena_adapter *adapter = tx_ring->adapter; if (likely(req_id < tx_ring->ring_size)) return (0); ena_log_nm(adapter->pdev, WARN, "Invalid req_id: %hu\n", req_id); counter_u64_add(tx_ring->tx_stats.bad_req_id, 1); ena_trigger_reset(adapter, ENA_REGS_RESET_INV_TX_REQ_ID); return (EFAULT); } static int ena_netmap_rxsync(struct netmap_kring *kring, int flags) { struct ena_netmap_ctx ctx; int rc; ena_netmap_fill_ctx(kring, &ctx, ENA_IO_RXQ_IDX(kring->ring_id)); ctx.ring = &ctx.adapter->rx_ring[kring->ring_id]; if (ctx.kring->rhead > ctx.lim) { /* Probably not needed to release slots from RX ring. */ return (netmap_ring_reinit(ctx.kring)); } if (unlikely((if_getdrvflags(ctx.na->ifp) & IFF_DRV_RUNNING) == 0)) return (0); if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, ctx.adapter))) return (0); if ((rc = ena_netmap_rx_frames(&ctx)) != 0) return (rc); ena_netmap_rx_cleanup(&ctx); return (0); } static inline int ena_netmap_rx_frames(struct ena_netmap_ctx *ctx) { int rc = 0; int frames_counter = 0; ctx->nt = ctx->ring->next_to_clean; ctx->nm_i = ctx->kring->nr_hwtail; while((rc = ena_netmap_rx_frame(ctx)) == ENA_NETMAP_MORE_FRAMES) { frames_counter++; /* In case of multiple frames, it is not an error. */ rc = 0; if (frames_counter > ENA_MAX_FRAMES) { ena_log_nm(ctx->adapter->pdev, ERR, "Driver is stuck in the Rx loop\n"); break; } }; ctx->kring->nr_hwtail = ctx->nm_i; ctx->kring->nr_kflags &= ~NKR_PENDINTR; ctx->ring->next_to_clean = ctx->nt; return (rc); } static inline int ena_netmap_rx_frame(struct ena_netmap_ctx *ctx) { struct ena_com_rx_ctx ena_rx_ctx; enum ena_regs_reset_reason_types reset_reason; int rc, len = 0; uint16_t buf, nm; ena_rx_ctx.ena_bufs = ctx->ring->ena_bufs; ena_rx_ctx.max_bufs = ctx->adapter->max_rx_sgl_size; bus_dmamap_sync(ctx->io_cq->cdesc_addr.mem_handle.tag, ctx->io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD); rc = ena_com_rx_pkt(ctx->io_cq, ctx->io_sq, &ena_rx_ctx); if (unlikely(rc != 0)) { ena_log_nm(ctx->adapter->pdev, ERR, "Failed to read pkt from the device with error: %d\n", rc); if (rc == ENA_COM_NO_SPACE) { counter_u64_add(ctx->ring->rx_stats.bad_desc_num, 1); reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS; } else { counter_u64_add(ctx->ring->rx_stats.bad_req_id, 1); reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID; } ena_trigger_reset(ctx->adapter, reset_reason); return (rc); } if (unlikely(ena_rx_ctx.descs == 0)) return (ENA_NETMAP_NO_MORE_FRAMES); ena_log_nm(ctx->adapter->pdev, DBG, "Rx: q %d got packet from ena. descs #:" " %d l3 proto %d l4 proto %d hash: %x\n", ctx->ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, ena_rx_ctx.l4_proto, ena_rx_ctx.hash); for (buf = 0; buf < ena_rx_ctx.descs; buf++) if ((rc = ena_netmap_rx_load_desc(ctx, buf, &len)) != 0) break; /* * ena_netmap_rx_load_desc doesn't know the number of descriptors. * It just set flag NS_MOREFRAG to all slots, then here flag of * last slot is cleared. */ ctx->slots[nm_prev(ctx->nm_i, ctx->lim)].flags = NS_BUF_CHANGED; if (rc != 0) { goto rx_clear_desc; } bus_dmamap_sync(ctx->io_cq->cdesc_addr.mem_handle.tag, ctx->io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD); counter_enter(); counter_u64_add_protected(ctx->ring->rx_stats.bytes, len); counter_u64_add_protected(ctx->adapter->hw_stats.rx_bytes, len); counter_u64_add_protected(ctx->ring->rx_stats.cnt, 1); counter_u64_add_protected(ctx->adapter->hw_stats.rx_packets, 1); counter_exit(); return (ENA_NETMAP_MORE_FRAMES); rx_clear_desc: nm = ctx->nm_i; /* Remove failed packet from ring */ while(buf--) { ctx->slots[nm].flags = 0; ctx->slots[nm].len = 0; nm = nm_prev(nm, ctx->lim); } return (rc); } static inline int ena_netmap_rx_load_desc(struct ena_netmap_ctx *ctx, uint16_t buf, int *len) { struct ena_rx_buffer *rx_info; uint16_t req_id; req_id = ctx->ring->ena_bufs[buf].req_id; rx_info = &ctx->ring->rx_buffer_info[req_id]; bus_dmamap_sync(ctx->adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_POSTREAD); netmap_unload_map(ctx->na, ctx->adapter->rx_buf_tag, rx_info->map); ENA_WARN(ctx->slots[ctx->nm_i].buf_idx != 0, ctx->adapter->ena_dev, "Rx idx is not 0.\n"); ctx->slots[ctx->nm_i].buf_idx = rx_info->netmap_buf_idx; rx_info->netmap_buf_idx = 0; /* * Set NS_MOREFRAG to all slots. * Then ena_netmap_rx_frame clears it from last one. */ ctx->slots[ctx->nm_i].flags |= NS_MOREFRAG | NS_BUF_CHANGED; ctx->slots[ctx->nm_i].len = ctx->ring->ena_bufs[buf].len; *len += ctx->slots[ctx->nm_i].len; ctx->ring->free_rx_ids[ctx->nt] = req_id; ena_log_nm(ctx->adapter->pdev, DBG, "rx_info %p, buf_idx %d, paddr %jx, nm: %d\n", rx_info, ctx->slots[ctx->nm_i].buf_idx, (uintmax_t)rx_info->ena_buf.paddr, ctx->nm_i); ctx->nm_i = nm_next(ctx->nm_i, ctx->lim); ctx->nt = ENA_RX_RING_IDX_NEXT(ctx->nt, ctx->ring->ring_size); return (0); } static inline void ena_netmap_rx_cleanup(struct ena_netmap_ctx *ctx) { int refill_required; refill_required = ctx->kring->rhead - ctx->kring->nr_hwcur; if (ctx->kring->nr_hwcur != ctx->kring->nr_hwtail) refill_required -= 1; if (refill_required == 0) return; else if (refill_required < 0) refill_required += ctx->kring->nkr_num_slots; ena_refill_rx_bufs(ctx->ring, refill_required); } static inline void ena_netmap_fill_ctx(struct netmap_kring *kring, struct ena_netmap_ctx *ctx, uint16_t ena_qid) { ctx->kring = kring; ctx->na = kring->na; ctx->adapter = ctx->na->ifp->if_softc; ctx->lim = kring->nkr_num_slots - 1; ctx->io_cq = &ctx->adapter->ena_dev->io_cq_queues[ena_qid]; ctx->io_sq = &ctx->adapter->ena_dev->io_sq_queues[ena_qid]; ctx->slots = kring->ring->slot; } void ena_netmap_unload(struct ena_adapter *adapter, bus_dmamap_t map) { struct netmap_adapter *na = NA(adapter->ifp); netmap_unload_map(na, adapter->tx_buf_tag, map); } #endif /* DEV_NETMAP */