diff --git a/share/man/man4/gve.4 b/share/man/man4/gve.4 index 297f1071b4cd..32f5d2819e0d 100644 --- a/share/man/man4/gve.4 +++ b/share/man/man4/gve.4 @@ -1,279 +1,296 @@ .\" SPDX-License-Identifier: BSD-3-Clause .\" .\" Copyright (c) 2023-2024 Google LLC .\" .\" Redistribution and use in source and binary forms, with or without modification, .\" are permitted provided that the following conditions are met: .\" .\" 1. Redistributions of source code must retain the above copyright notice, this .\" list of conditions and the following disclaimer. .\" .\" 2. Redistributions in binary form must reproduce the above copyright notice, .\" this list of conditions and the following disclaimer in the documentation .\" and/or other materials provided with the distribution. .\" .\" 3. Neither the name of the copyright holder nor the names of its contributors .\" may be used to endorse or promote products derived from this software without .\" specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED .\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE .\" DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR .\" ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES .\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; .\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON .\" ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS .\" SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .Dd October 14, 2024 .Dt GVE 4 .Os .Sh NAME .Nm gve .Nd "Ethernet driver for Google Virtual NIC (gVNIC)" .Sh SYNOPSIS To compile this driver into the kernel, place the following lines in your kernel configuration file: .Bd -ragged -offset indent .Cd "device gve" .Ed .Pp Alternatively, to load the driver as a module at boot time, place the following line in .Xr loader.conf 5 : .Bd -literal -offset indent if_gve_load="YES" .Ed .Sh DESCRIPTION gVNIC is a virtual network interface designed specifically for Google Compute Engine (GCE). It is required to support per-VM Tier-1 networking performance, and for using certain VM shapes on GCE. .Pp .Nm is the driver for gVNIC. It supports the following features: .Pp .Bl -bullet -compact .It RX checksum offload .It TX chesksum offload .It TCP Segmentation Offload (TSO) .It Large Receive Offload (LRO) in software .It Jumbo frames .It Receive Side Scaling (RSS) .El .Pp For more information on configuring this device, see .Xr ifconfig 8 . .Sh HARDWARE .Nm binds to a single PCI device ID presented by gVNIC: .Pp .Bl -bullet -compact .It 0x1AE0:0x0042 .El .Sh EXAMPLES .Pp Change the TX queue count to 4 for the gve0 interface: .D1 sysctl dev.gve.0.num_tx_queues=4 .Pp Change the RX queue count to 4 for the gve0 interface: .D1 sysctl dev.gve.0.num_rx_queues=4 +.Pp +Change the TX ring size to 512 for the gve0 interface: +.D1 sysctl dev.gve.0.tx_ring_size=512 +.Pp +Change the RX ring size to 512 for the gve0 interface: +.D1 sysctl dev.gve.0.rx_ring_size=512 .Sh DIAGNOSTICS The following messages are recorded during driver initialization: .Bl -diag .It "Enabled MSIX with %d vectors" .It "Configured device resources" .It "Successfully attached %s" .It "Deconfigured device resources" .El .Pp These messages are seen if driver initialization fails. Global (across-queues) allocation failures: .Bl -diag .It "Failed to configure device resources: err=%d" .It "No compatible queue formats" .It "Failed to allocate ifnet struct" .It "Failed to allocate admin queue mem" .It "Failed to alloc DMA mem for DescribeDevice" .It "Failed to allocate QPL page" .El .Pp irq and BAR allocation failures: .Bl -diag .It "Failed to acquire any msix vectors" .It "Tried to acquire %d msix vectors, got only %d" .It "Failed to setup irq %d for Tx queue %d " .It "Failed to setup irq %d for Rx queue %d " .It "Failed to allocate irq %d for mgmnt queue" .It "Failed to setup irq %d for mgmnt queue, err: %d" .It "Failed to allocate BAR0" .It "Failed to allocate BAR2" .It "Failed to allocate msix table" .El .Pp Rx queue-specific allocation failures: .Bl -diag .It "No QPL left for rx ring %d" .It "Failed to alloc queue resources for rx ring %d" .It "Failed to alloc desc ring for rx ring %d" .It "Failed to alloc data ring for rx ring %d" .El .Pp Tx queue-specific allocation failures: .Bl -diag .It "No QPL left for tx ring %d" .It "Failed to alloc queue resources for tx ring %d" .It "Failed to alloc desc ring for tx ring %d" .It "Failed to vmap fifo, qpl_id = %d" .El .Pp The following messages are recorded when the interface detach fails: .Bl -diag .It "Failed to deconfigure device resources: err=%d" .El .Pp If bootverbose is on, the following messages are recorded when the interface is being brought up: .Bl -diag .It "Created %d rx queues" .It "Created %d tx queues" .It "MTU set to %d" .El .Pp The following messages are recorded when the interface is being brought down: .Bl -diag .It "Destroyed %d rx queues" .It "Destroyed %d tx queues" .El .Pp These messages are seen if errors are encountered when bringing the interface up or down: .Bl -diag .It "Failed to destroy rxq %d, err: %d" .It "Failed to destroy txq %d, err: %d" .It "Failed to create rxq %d, err: %d" .It "Failed to create txq %d, err: %d" .It "Failed to set MTU to %d" .It "Invalid new MTU setting. new mtu: %d max mtu: %d min mtu: %d" .It "Cannot bring the iface up when detached" .It "Reached max number of registered pages %lu > %lu" .It "Failed to init lro for rx ring %d" .El .Pp These messages are seen if any admin queue command fails: .Bl -diag .It "AQ command(%u): failed with status %d" .It "AQ command(%u): unknown status code %d" .It "AQ commands timed out, need to reset AQ" .It "Unknown AQ command opcode %d" .El .Pp These messages are recorded when the device is being reset due to an error: .Bl -diag .It "Scheduling reset task!" .It "Waiting until admin queue is released." .It "Admin queue released" .El .Pp If it was the NIC that requested the reset, this message is recorded: .Bl -diag .It "Device requested reset" .El .Pp If the reset fails during the reinitialization phase, this message is recorded: .Bl -diag .It "Restore failed!" .El .Pp These two messages correspoond to the NIC alerting the driver to link state changes: .Bl -diag .It "Device link is up." .It "Device link is down." .El .Pp Apart from these messages, the driver exposes per-queue packet and error counters as sysctl nodes. Global (across queues) counters can be read using .Xr netstat 1 . .Sh SYSCTL VARIABLES .Nm exposes the following .Xr sysctl 8 variables: .Bl -tag -width indent .It Va hw.gve.driver_version The driver version. This is read-only. .It Va hw.gve.queue_format The queue format in use. This is read-only. .It Va hw.gve.disable_hw_lro Setting this boot-time tunable to 1 disables Large Receive Offload (LRO) in the NIC. The default value is 0, which means hardware LRO is enabled by default. The software LRO stack in the kernel is always used. This sysctl variable needs to be set before loading the driver, using .Xr loader.conf 5 . .It Va dev.gve.X.num_rx_queues and dev.gve.X.num_tx_queues Run-time tunables that represent the number of currently used RX/TX queues. The default value is the max number of RX/TX queues the device can support. .Pp This call turns down the interface while setting up the new queues, which may potentially cause any new packets to be dropped. This call can fail if the system is not able to provide the driver with enough resources. In that situation, the driver will revert to the previous number of RX/TX queues. If this also fails, a device reset will be triggered. .Pp Note: sysctl nodes for queue stats remain available even if a queue is removed. .Pp +.It Va dev.gve.X.rx_ring_size and dev.gve.X.tx_ring_size +Run-time tunables that represent the current ring size for RX/TX queues. +The default value is set to device defaults for ring size. +.Pp +This call turns down the interface while setting up the queues with the new ring size, +which may potentially cause any new packets to be dropped. +This call can fail if the system is not able to provide the driver with enough resources. +In that situation, the driver will try to revert to the previous ring size for RX/TX queues. +If this also fails, the device will be in an unhealthy state and will need to be reloaded. +This value must be a power of 2 and within the defined range. +.Pp .El .Sh LIMITATIONS .Nm does not support the transmission of VLAN-tagged packets. All VLAN-tagged traffic is dropped. .Sh QUEUE FORMATS .Nm features different datapath modes called queue formats: .Pp .Bl -bullet -compact .It GQI_QPL: "QPL" stands for "Queue Page List" and refers to the fact that hardware expects a fixed bounce buffer and cannot access arbitrary memory. GQI is the older descriptor format. The G in "GQI" refers to an older generation of hardware, and the "QI" stands for "Queue In-order" referring to the fact that the NIC sends Tx and Rx completions in the same order as the one in which the corresponding descriptors were posted by the driver. .It DQO_RDA: DQO is the descriptor format required to take full advantage of next generation VM shapes. "RDA" stands for "Raw DMA Addressing" and refers to the fact that hardware can work with DMA-ed packets and does not expect them to be copied into or out of a fixed bounce buffer. The D in "DQO" refers to a newer generation of hardware, and the "QO" stands for "Queue Out-of-order" referring to the fact that the NIC might send Tx and Rx completions in an order different from the one in which the corresponding descriptors were posted by the driver. .It DQO_QPL: The next generation descriptor format in the "QPL" mode. .El .Sh SUPPORT Please email gvnic-drivers@google.com with the specifics of the issue encountered. .Sh SEE ALSO .Xr netstat 1 , .Xr loader.conf 5 , .Xr ifconfig 8 , .Xr sysctl 8 .Sh HISTORY The .Nm device driver first appeared in .Fx 14.0 . .Sh AUTHORS The .Nm driver was written by Google. diff --git a/sys/dev/gve/gve.h b/sys/dev/gve/gve.h index 2b49ee5ad45a..5b298b889ed6 100644 --- a/sys/dev/gve/gve.h +++ b/sys/dev/gve/gve.h @@ -1,703 +1,713 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2023-2024 Google LLC * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _GVE_FBSD_H #define _GVE_FBSD_H #include "gve_desc.h" #include "gve_plat.h" #include "gve_register.h" #ifndef PCI_VENDOR_ID_GOOGLE #define PCI_VENDOR_ID_GOOGLE 0x1ae0 #endif #define PCI_DEV_ID_GVNIC 0x0042 #define GVE_REGISTER_BAR 0 #define GVE_DOORBELL_BAR 2 /* Driver can alloc up to 2 segments for the header and 2 for the payload. */ #define GVE_TX_MAX_DESCS 4 #define GVE_TX_BUFRING_ENTRIES 4096 #define ADMINQ_SIZE PAGE_SIZE #define GVE_DEFAULT_RX_BUFFER_SIZE 2048 /* Each RX bounce buffer page can fit two packet buffers. */ #define GVE_DEFAULT_RX_BUFFER_OFFSET (PAGE_SIZE / 2) /* PTYPEs are always 10 bits. */ #define GVE_NUM_PTYPES 1024 /* * Number of descriptors per queue page list. * Page count AKA QPL size can be derived by dividing the number of elements in * a page by the number of descriptors available. */ #define GVE_QPL_DIVISOR 16 +/* Ring Size Limits */ +#define GVE_DEFAULT_MIN_RX_RING_SIZE 512 +#define GVE_DEFAULT_MIN_TX_RING_SIZE 256 + static MALLOC_DEFINE(M_GVE, "gve", "gve allocations"); struct gve_dma_handle { bus_addr_t bus_addr; void *cpu_addr; bus_dma_tag_t tag; bus_dmamap_t map; }; union gve_tx_desc { struct gve_tx_pkt_desc pkt; /* first desc for a packet */ struct gve_tx_mtd_desc mtd; /* optional metadata descriptor */ struct gve_tx_seg_desc seg; /* subsequent descs for a packet */ }; /* Tracks the memory in the fifo occupied by a segment of a packet */ struct gve_tx_iovec { uint32_t iov_offset; /* offset into this segment */ uint32_t iov_len; /* length */ uint32_t iov_padding; /* padding associated with this segment */ }; /* Tracks allowed and current queue settings */ struct gve_queue_config { uint16_t max_queues; uint16_t num_queues; /* current */ }; struct gve_irq_db { __be32 index; } __aligned(CACHE_LINE_SIZE); /* * GVE_QUEUE_FORMAT_UNSPECIFIED must be zero since 0 is the default value * when the entire configure_device_resources command is zeroed out and the * queue_format is not specified. */ enum gve_queue_format { GVE_QUEUE_FORMAT_UNSPECIFIED = 0x0, GVE_GQI_RDA_FORMAT = 0x1, GVE_GQI_QPL_FORMAT = 0x2, GVE_DQO_RDA_FORMAT = 0x3, GVE_DQO_QPL_FORMAT = 0x4, }; enum gve_state_flags_bit { GVE_STATE_FLAG_ADMINQ_OK, GVE_STATE_FLAG_RESOURCES_OK, GVE_STATE_FLAG_QPLREG_OK, GVE_STATE_FLAG_RX_RINGS_OK, GVE_STATE_FLAG_TX_RINGS_OK, GVE_STATE_FLAG_QUEUES_UP, GVE_STATE_FLAG_LINK_UP, GVE_STATE_FLAG_DO_RESET, GVE_STATE_FLAG_IN_RESET, GVE_NUM_STATE_FLAGS /* Not part of the enum space */ }; BITSET_DEFINE(gve_state_flags, GVE_NUM_STATE_FLAGS); #define GVE_DEVICE_STATUS_RESET (0x1 << 1) #define GVE_DEVICE_STATUS_LINK_STATUS (0x1 << 2) #define GVE_RING_LOCK(ring) mtx_lock(&(ring)->ring_mtx) #define GVE_RING_TRYLOCK(ring) mtx_trylock(&(ring)->ring_mtx) #define GVE_RING_UNLOCK(ring) mtx_unlock(&(ring)->ring_mtx) #define GVE_RING_ASSERT(ring) mtx_assert(&(ring)->ring_mtx, MA_OWNED) #define GVE_IFACE_LOCK_INIT(lock) sx_init(&lock, "gve interface lock") #define GVE_IFACE_LOCK_DESTROY(lock) sx_destroy(&lock) #define GVE_IFACE_LOCK_LOCK(lock) sx_xlock(&lock) #define GVE_IFACE_LOCK_UNLOCK(lock) sx_unlock(&lock) #define GVE_IFACE_LOCK_ASSERT(lock) sx_assert(&lock, SA_XLOCKED) struct gve_queue_page_list { uint32_t id; uint32_t num_dmas; uint32_t num_pages; vm_offset_t kva; vm_page_t *pages; struct gve_dma_handle *dmas; }; struct gve_irq { struct resource *res; void *cookie; }; struct gve_rx_slot_page_info { void *page_address; vm_page_t page; uint32_t page_offset; uint16_t pad; }; /* * A single received packet split across multiple buffers may be * reconstructed using the information in this structure. */ struct gve_rx_ctx { /* head and tail of mbuf chain for the current packet */ struct mbuf *mbuf_head; struct mbuf *mbuf_tail; uint32_t total_size; uint8_t frag_cnt; bool is_tcp; bool drop_pkt; }; struct gve_ring_com { struct gve_priv *priv; uint32_t id; /* * BAR2 offset for this ring's doorbell and the * counter-array offset for this ring's counter. * Acquired from the device individually for each * queue in the queue_create adminq command. */ struct gve_queue_resources *q_resources; struct gve_dma_handle q_resources_mem; /* Byte offset into BAR2 where this ring's 4-byte irq doorbell lies. */ uint32_t irq_db_offset; /* Byte offset into BAR2 where this ring's 4-byte doorbell lies. */ uint32_t db_offset; /* * Index, not byte-offset, into the counter array where this ring's * 4-byte counter lies. */ uint32_t counter_idx; /* * The index of the MSIX vector that was assigned to * this ring in `gve_alloc_irqs`. * * It is passed to the device in the queue_create adminq * command. * * Additionally, this also serves as the index into * `priv->irq_db_indices` where this ring's irq doorbell's * BAR2 offset, `irq_db_idx`, can be found. */ int ntfy_id; /* * The fixed bounce buffer for this ring. * Once allocated, has to be offered to the device * over the register-page-list adminq command. */ struct gve_queue_page_list *qpl; struct task cleanup_task; struct taskqueue *cleanup_tq; } __aligned(CACHE_LINE_SIZE); struct gve_rxq_stats { counter_u64_t rbytes; counter_u64_t rpackets; counter_u64_t rx_dropped_pkt; counter_u64_t rx_copybreak_cnt; counter_u64_t rx_frag_flip_cnt; counter_u64_t rx_frag_copy_cnt; counter_u64_t rx_dropped_pkt_desc_err; counter_u64_t rx_dropped_pkt_buf_post_fail; counter_u64_t rx_dropped_pkt_mbuf_alloc_fail; counter_u64_t rx_mbuf_dmamap_err; counter_u64_t rx_mbuf_mclget_null; }; #define NUM_RX_STATS (sizeof(struct gve_rxq_stats) / sizeof(counter_u64_t)) union gve_rx_qpl_buf_id_dqo { struct { uint16_t buf_id:11; /* Index into rx->dqo.bufs */ uint8_t frag_num:5; /* Which frag in the QPL page */ }; uint16_t all; } __packed; _Static_assert(sizeof(union gve_rx_qpl_buf_id_dqo) == 2, "gve: bad dqo qpl rx buf id length"); struct gve_rx_buf_dqo { union { /* RDA */ struct { struct mbuf *mbuf; bus_dmamap_t dmamap; uint64_t addr; bool mapped; }; /* QPL */ struct { uint8_t num_nic_frags; /* number of pending completions */ uint8_t next_idx; /* index of the next frag to post */ /* for chaining rx->dqo.used_bufs */ STAILQ_ENTRY(gve_rx_buf_dqo) stailq_entry; }; }; /* for chaining rx->dqo.free_bufs */ SLIST_ENTRY(gve_rx_buf_dqo) slist_entry; }; /* power-of-2 sized receive ring */ struct gve_rx_ring { struct gve_ring_com com; struct gve_dma_handle desc_ring_mem; uint32_t cnt; /* free-running total number of completed packets */ uint32_t fill_cnt; /* free-running total number of descs and buffs posted */ union { /* GQI-only fields */ struct { struct gve_dma_handle data_ring_mem; /* accessed in the GQ receive hot path */ struct gve_rx_desc *desc_ring; union gve_rx_data_slot *data_ring; struct gve_rx_slot_page_info *page_info; uint32_t mask; /* masks the cnt and fill_cnt to the size of the ring */ uint8_t seq_no; /* helps traverse the descriptor ring */ }; /* DQO-only fields */ struct { struct gve_dma_handle compl_ring_mem; struct gve_rx_compl_desc_dqo *compl_ring; struct gve_rx_desc_dqo *desc_ring; struct gve_rx_buf_dqo *bufs; /* Parking place for posted buffers */ bus_dma_tag_t buf_dmatag; /* To dmamap posted mbufs with */ uint32_t buf_cnt; /* Size of the bufs array */ uint32_t mask; /* One less than the sizes of the desc and compl rings */ uint32_t head; /* The index at which to post the next buffer at */ uint32_t tail; /* The index at which to receive the next compl at */ uint8_t cur_gen_bit; /* Gets flipped on every cycle of the compl ring */ SLIST_HEAD(, gve_rx_buf_dqo) free_bufs; /* * Only used in QPL mode. Pages referred to by if_input-ed mbufs * stay parked here till their wire count comes back to 1. * Pages are moved here after there aren't any pending completions. */ STAILQ_HEAD(, gve_rx_buf_dqo) used_bufs; } dqo; }; struct lro_ctrl lro; struct gve_rx_ctx ctx; struct gve_rxq_stats stats; } __aligned(CACHE_LINE_SIZE); /* * A contiguous representation of the pages composing the Tx bounce buffer. * The xmit taskqueue and the completion taskqueue both simultaneously use it. * Both operate on `available`: the xmit tq lowers it and the completion tq * raises it. `head` is the last location written at and so only the xmit tq * uses it. */ struct gve_tx_fifo { vm_offset_t base; /* address of base of FIFO */ uint32_t size; /* total size */ volatile int available; /* how much space is still available */ uint32_t head; /* offset to write at */ }; struct gve_tx_buffer_state { struct mbuf *mbuf; struct gve_tx_iovec iov[GVE_TX_MAX_DESCS]; }; struct gve_txq_stats { counter_u64_t tbytes; counter_u64_t tpackets; counter_u64_t tso_packet_cnt; counter_u64_t tx_dropped_pkt; counter_u64_t tx_delayed_pkt_nospace_device; counter_u64_t tx_dropped_pkt_nospace_bufring; counter_u64_t tx_delayed_pkt_nospace_descring; counter_u64_t tx_delayed_pkt_nospace_compring; counter_u64_t tx_delayed_pkt_nospace_qpl_bufs; counter_u64_t tx_delayed_pkt_tsoerr; counter_u64_t tx_dropped_pkt_vlan; counter_u64_t tx_mbuf_collapse; counter_u64_t tx_mbuf_defrag; counter_u64_t tx_mbuf_defrag_err; counter_u64_t tx_mbuf_dmamap_enomem_err; counter_u64_t tx_mbuf_dmamap_err; }; #define NUM_TX_STATS (sizeof(struct gve_txq_stats) / sizeof(counter_u64_t)) struct gve_tx_pending_pkt_dqo { struct mbuf *mbuf; union { /* RDA */ bus_dmamap_t dmamap; /* QPL */ struct { /* * A linked list of entries from qpl_bufs that served * as the bounce buffer for this packet. */ int32_t qpl_buf_head; uint32_t num_qpl_bufs; }; }; uint8_t state; /* the gve_packet_state enum */ int next; /* To chain the free_pending_pkts lists */ }; /* power-of-2 sized transmit ring */ struct gve_tx_ring { struct gve_ring_com com; struct gve_dma_handle desc_ring_mem; struct task xmit_task; struct taskqueue *xmit_tq; bool stopped; /* Accessed when writing descriptors */ struct buf_ring *br; struct mtx ring_mtx; uint32_t req; /* free-running total number of packets written to the nic */ uint32_t done; /* free-running total number of completed packets */ union { /* GQI specific stuff */ struct { union gve_tx_desc *desc_ring; struct gve_tx_buffer_state *info; struct gve_tx_fifo fifo; uint32_t mask; /* masks the req and done to the size of the ring */ }; /* DQO specific stuff */ struct { struct gve_dma_handle compl_ring_mem; /* Accessed when writing descriptors */ struct { union gve_tx_desc_dqo *desc_ring; uint32_t desc_mask; /* masks head and tail to the size of desc_ring */ uint32_t desc_head; /* last desc read by NIC, cached value of hw_tx_head */ uint32_t desc_tail; /* last desc written by driver */ uint32_t last_re_idx; /* desc which last had "report event" set */ /* * The head index of a singly linked list containing pending packet objects * to park mbufs till the NIC sends completions. Once this list is depleted, * the "_prd" suffixed producer list, grown by the completion taskqueue, * is stolen. */ int32_t free_pending_pkts_csm; /* * The head index of a singly linked list representing QPL page fragments * to copy mbuf payload into for the NIC to see. Once this list is depleted, * the "_prd" suffixed producer list, grown by the completion taskqueue, * is stolen. * * Only used in QPL mode. int32_t because atomic_swap_16 doesn't exist. */ int32_t free_qpl_bufs_csm; uint32_t qpl_bufs_consumed; /* Allows quickly checking for buf availability */ uint32_t qpl_bufs_produced_cached; /* Cached value of qpl_bufs_produced */ /* DMA params for mapping Tx mbufs. Only used in RDA mode. */ bus_dma_tag_t buf_dmatag; } __aligned(CACHE_LINE_SIZE); /* Accessed when processing completions */ struct { struct gve_tx_compl_desc_dqo *compl_ring; uint32_t compl_mask; /* masks head to the size of compl_ring */ uint32_t compl_head; /* last completion read by driver */ uint8_t cur_gen_bit; /* NIC flips a bit on every pass */ uint32_t hw_tx_head; /* last desc read by NIC */ /* * The completion taskqueue moves pending-packet objects to this * list after freeing the mbuf. The "_prd" denotes that this is * a producer list. The transmit taskqueue steals this list once * its consumer list, with the "_csm" suffix, is depleted. */ int32_t free_pending_pkts_prd; /* * The completion taskqueue moves the QPL pages corresponding to a * completed packet into this list. It is only used in QPL mode. * The "_prd" denotes that this is a producer list. The transmit * taskqueue steals this list once its consumer list, with the "_csm" * suffix, is depleted. * * Only used in QPL mode. int32_t because atomic_swap_16 doesn't exist. */ int32_t free_qpl_bufs_prd; uint32_t qpl_bufs_produced; } __aligned(CACHE_LINE_SIZE); /* Accessed by both the completion and xmit loops */ struct { /* completion tags index into this array */ struct gve_tx_pending_pkt_dqo *pending_pkts; uint16_t num_pending_pkts; /* * Represents QPL page fragments. An index into this array * always represents the same QPL page fragment. The value * is also an index into this array and servers as a means * to chain buffers into linked lists whose heads are * either free_qpl_bufs_prd or free_qpl_bufs_csm or * qpl_bufs_head. */ int32_t *qpl_bufs; } __aligned(CACHE_LINE_SIZE); } dqo; }; struct gve_txq_stats stats; } __aligned(CACHE_LINE_SIZE); enum gve_packet_state { /* * Packet does not yet have a dmamap created. * This should always be zero since state is not explicitly initialized. */ GVE_PACKET_STATE_UNALLOCATED, /* Packet has a dmamap and is in free list, available to be allocated. */ GVE_PACKET_STATE_FREE, /* Packet is expecting a regular data completion */ GVE_PACKET_STATE_PENDING_DATA_COMPL, }; struct gve_ptype { uint8_t l3_type; /* `gve_l3_type` in gve_adminq.h */ uint8_t l4_type; /* `gve_l4_type` in gve_adminq.h */ }; struct gve_ptype_lut { struct gve_ptype ptypes[GVE_NUM_PTYPES]; }; struct gve_priv { if_t ifp; device_t dev; struct ifmedia media; uint8_t mac[ETHER_ADDR_LEN]; struct gve_dma_handle aq_mem; struct resource *reg_bar; /* BAR0 */ struct resource *db_bar; /* BAR2 */ struct resource *msix_table; uint32_t mgmt_msix_idx; uint32_t rx_copybreak; uint16_t num_event_counters; uint16_t default_num_queues; uint16_t tx_desc_cnt; + uint16_t max_tx_desc_cnt; + uint16_t min_tx_desc_cnt; uint16_t rx_desc_cnt; + uint16_t max_rx_desc_cnt; + uint16_t min_rx_desc_cnt; uint16_t rx_pages_per_qpl; uint64_t max_registered_pages; uint64_t num_registered_pages; uint32_t supported_features; uint16_t max_mtu; + bool modify_ringsize_enabled; struct gve_dma_handle counter_array_mem; __be32 *counters; struct gve_dma_handle irqs_db_mem; struct gve_irq_db *irq_db_indices; enum gve_queue_format queue_format; struct gve_queue_config tx_cfg; struct gve_queue_config rx_cfg; uint32_t num_queues; struct gve_irq *irq_tbl; struct gve_tx_ring *tx; struct gve_rx_ring *rx; struct gve_ptype_lut *ptype_lut_dqo; /* * Admin queue - see gve_adminq.h * Since AQ cmds do not run in steady state, 32 bit counters suffice */ struct gve_adminq_command *adminq; vm_paddr_t adminq_bus_addr; uint32_t adminq_mask; /* masks prod_cnt to adminq size */ uint32_t adminq_prod_cnt; /* free-running count of AQ cmds executed */ uint32_t adminq_cmd_fail; /* free-running count of AQ cmds failed */ uint32_t adminq_timeouts; /* free-running count of AQ cmds timeouts */ /* free-running count of each distinct AQ cmd executed */ uint32_t adminq_describe_device_cnt; uint32_t adminq_cfg_device_resources_cnt; uint32_t adminq_register_page_list_cnt; uint32_t adminq_unregister_page_list_cnt; uint32_t adminq_create_tx_queue_cnt; uint32_t adminq_create_rx_queue_cnt; uint32_t adminq_destroy_tx_queue_cnt; uint32_t adminq_destroy_rx_queue_cnt; uint32_t adminq_dcfg_device_resources_cnt; uint32_t adminq_set_driver_parameter_cnt; uint32_t adminq_verify_driver_compatibility_cnt; uint32_t adminq_get_ptype_map_cnt; uint32_t interface_up_cnt; uint32_t interface_down_cnt; uint32_t reset_cnt; struct task service_task; struct taskqueue *service_tq; struct gve_state_flags state_flags; struct sx gve_iface_lock; }; static inline bool gve_get_state_flag(struct gve_priv *priv, int pos) { return (BIT_ISSET(GVE_NUM_STATE_FLAGS, pos, &priv->state_flags)); } static inline void gve_set_state_flag(struct gve_priv *priv, int pos) { BIT_SET_ATOMIC(GVE_NUM_STATE_FLAGS, pos, &priv->state_flags); } static inline void gve_clear_state_flag(struct gve_priv *priv, int pos) { BIT_CLR_ATOMIC(GVE_NUM_STATE_FLAGS, pos, &priv->state_flags); } static inline bool gve_is_gqi(struct gve_priv *priv) { return (priv->queue_format == GVE_GQI_QPL_FORMAT); } static inline bool gve_is_qpl(struct gve_priv *priv) { return (priv->queue_format == GVE_GQI_QPL_FORMAT || priv->queue_format == GVE_DQO_QPL_FORMAT); } /* Defined in gve_main.c */ void gve_schedule_reset(struct gve_priv *priv); int gve_adjust_tx_queues(struct gve_priv *priv, uint16_t new_queue_cnt); int gve_adjust_rx_queues(struct gve_priv *priv, uint16_t new_queue_cnt); +int gve_adjust_ring_sizes(struct gve_priv *priv, uint16_t new_desc_cnt, bool is_rx); /* Register access functions defined in gve_utils.c */ uint32_t gve_reg_bar_read_4(struct gve_priv *priv, bus_size_t offset); void gve_reg_bar_write_4(struct gve_priv *priv, bus_size_t offset, uint32_t val); void gve_db_bar_write_4(struct gve_priv *priv, bus_size_t offset, uint32_t val); void gve_db_bar_dqo_write_4(struct gve_priv *priv, bus_size_t offset, uint32_t val); /* QPL (Queue Page List) functions defined in gve_qpl.c */ struct gve_queue_page_list *gve_alloc_qpl(struct gve_priv *priv, uint32_t id, int npages, bool single_kva); void gve_free_qpl(struct gve_priv *priv, struct gve_queue_page_list *qpl); int gve_register_qpls(struct gve_priv *priv); int gve_unregister_qpls(struct gve_priv *priv); void gve_mextadd_free(struct mbuf *mbuf); /* TX functions defined in gve_tx.c */ int gve_alloc_tx_rings(struct gve_priv *priv, uint16_t start_idx, uint16_t stop_idx); void gve_free_tx_rings(struct gve_priv *priv, uint16_t start_idx, uint16_t stop_idx); int gve_create_tx_rings(struct gve_priv *priv); int gve_destroy_tx_rings(struct gve_priv *priv); int gve_tx_intr(void *arg); int gve_xmit_ifp(if_t ifp, struct mbuf *mbuf); void gve_qflush(if_t ifp); void gve_xmit_tq(void *arg, int pending); void gve_tx_cleanup_tq(void *arg, int pending); /* TX functions defined in gve_tx_dqo.c */ int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int i); void gve_tx_free_ring_dqo(struct gve_priv *priv, int i); void gve_clear_tx_ring_dqo(struct gve_priv *priv, int i); int gve_tx_intr_dqo(void *arg); int gve_xmit_dqo(struct gve_tx_ring *tx, struct mbuf **mbuf_ptr); int gve_xmit_dqo_qpl(struct gve_tx_ring *tx, struct mbuf *mbuf); void gve_tx_cleanup_tq_dqo(void *arg, int pending); /* RX functions defined in gve_rx.c */ int gve_alloc_rx_rings(struct gve_priv *priv, uint16_t start_idx, uint16_t stop_idx); void gve_free_rx_rings(struct gve_priv *priv, uint16_t start_idx, uint16_t stop_idx); int gve_create_rx_rings(struct gve_priv *priv); int gve_destroy_rx_rings(struct gve_priv *priv); int gve_rx_intr(void *arg); void gve_rx_cleanup_tq(void *arg, int pending); /* RX functions defined in gve_rx_dqo.c */ int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int i); void gve_rx_free_ring_dqo(struct gve_priv *priv, int i); void gve_rx_prefill_buffers_dqo(struct gve_rx_ring *rx); void gve_clear_rx_ring_dqo(struct gve_priv *priv, int i); int gve_rx_intr_dqo(void *arg); void gve_rx_cleanup_tq_dqo(void *arg, int pending); /* DMA functions defined in gve_utils.c */ int gve_dma_alloc_coherent(struct gve_priv *priv, int size, int align, struct gve_dma_handle *dma); void gve_dma_free_coherent(struct gve_dma_handle *dma); int gve_dmamap_create(struct gve_priv *priv, int size, int align, struct gve_dma_handle *dma); void gve_dmamap_destroy(struct gve_dma_handle *dma); /* IRQ functions defined in gve_utils.c */ void gve_free_irqs(struct gve_priv *priv); int gve_alloc_irqs(struct gve_priv *priv); void gve_unmask_all_queue_irqs(struct gve_priv *priv); void gve_mask_all_queue_irqs(struct gve_priv *priv); /* Systcl functions defined in gve_sysctl.c */ extern bool gve_disable_hw_lro; extern char gve_queue_format[8]; extern char gve_version[8]; void gve_setup_sysctl(struct gve_priv *priv); void gve_accum_stats(struct gve_priv *priv, uint64_t *rpackets, uint64_t *rbytes, uint64_t *rx_dropped_pkt, uint64_t *tpackets, uint64_t *tbytes, uint64_t *tx_dropped_pkt); /* Stats functions defined in gve_utils.c */ void gve_alloc_counters(counter_u64_t *stat, int num_stats); void gve_free_counters(counter_u64_t *stat, int num_stats); #endif /* _GVE_FBSD_H_ */ diff --git a/sys/dev/gve/gve_adminq.c b/sys/dev/gve/gve_adminq.c index dd03f817f45a..3415d2fa4b60 100644 --- a/sys/dev/gve/gve_adminq.c +++ b/sys/dev/gve/gve_adminq.c @@ -1,938 +1,989 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2023-2024 Google LLC * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include "gve.h" #include "gve_adminq.h" #define GVE_ADMINQ_SLEEP_LEN_MS 20 #define GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK 10 #define GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION 1 #define GVE_REG_ADMINQ_ADDR 16 #define ADMINQ_SLOTS (ADMINQ_SIZE / sizeof(struct gve_adminq_command)) #define GVE_DEVICE_OPTION_ERROR_FMT "%s option error:\n" \ "Expected: length=%d, feature_mask=%x.\n" \ "Actual: length=%d, feature_mask=%x.\n" #define GVE_DEVICE_OPTION_TOO_BIG_FMT "Length of %s option larger than expected." \ " Possible older version of guest driver.\n" static void gve_parse_device_option(struct gve_priv *priv, struct gve_device_descriptor *device_descriptor, struct gve_device_option *option, struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, + struct gve_device_option_modify_ring **dev_op_modify_ring, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames) { uint32_t req_feat_mask = be32toh(option->required_features_mask); uint16_t option_length = be16toh(option->option_length); uint16_t option_id = be16toh(option->option_id); /* * If the length or feature mask doesn't match, continue without * enabling the feature. */ switch (option_id) { case GVE_DEV_OPT_ID_GQI_QPL: if (option_length < sizeof(**dev_op_gqi_qpl) || req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL) { device_printf(priv->dev, GVE_DEVICE_OPTION_ERROR_FMT, "GQI QPL", (int)sizeof(**dev_op_gqi_qpl), GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL, option_length, req_feat_mask); break; } if (option_length > sizeof(**dev_op_gqi_qpl)) { device_printf(priv->dev, GVE_DEVICE_OPTION_TOO_BIG_FMT, "GQI QPL"); } *dev_op_gqi_qpl = (void *)(option + 1); break; case GVE_DEV_OPT_ID_DQO_RDA: if (option_length < sizeof(**dev_op_dqo_rda) || req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA) { device_printf(priv->dev, GVE_DEVICE_OPTION_ERROR_FMT, "DQO RDA", (int)sizeof(**dev_op_dqo_rda), GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA, option_length, req_feat_mask); break; } if (option_length > sizeof(**dev_op_dqo_rda)) { device_printf(priv->dev, GVE_DEVICE_OPTION_TOO_BIG_FMT, "DQO RDA"); } *dev_op_dqo_rda = (void *)(option + 1); break; case GVE_DEV_OPT_ID_DQO_QPL: if (option_length < sizeof(**dev_op_dqo_qpl) || req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL) { device_printf(priv->dev, GVE_DEVICE_OPTION_ERROR_FMT, "DQO QPL", (int)sizeof(**dev_op_dqo_qpl), GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL, option_length, req_feat_mask); break; } if (option_length > sizeof(**dev_op_dqo_qpl)) { device_printf(priv->dev, GVE_DEVICE_OPTION_TOO_BIG_FMT, "DQO QPL"); } *dev_op_dqo_qpl = (void *)(option + 1); break; + case GVE_DEV_OPT_ID_MODIFY_RING: + if (option_length < (sizeof(**dev_op_modify_ring) - + sizeof(struct gve_ring_size_bound)) || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING) { + device_printf(priv->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "Modify Ring", (int)sizeof(**dev_op_modify_ring), + GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_modify_ring)) { + device_printf(priv->dev, GVE_DEVICE_OPTION_TOO_BIG_FMT, + "Modify Ring"); + } + *dev_op_modify_ring = (void *)(option + 1); + + /* Min ring size included; set the minimum ring size. */ + if (option_length == sizeof(**dev_op_modify_ring)) { + priv->min_rx_desc_cnt = max( + be16toh((*dev_op_modify_ring)->min_ring_size.rx), + GVE_DEFAULT_MIN_RX_RING_SIZE); + priv->min_tx_desc_cnt = max( + be16toh((*dev_op_modify_ring)->min_ring_size.tx), + GVE_DEFAULT_MIN_TX_RING_SIZE); + } + break; + case GVE_DEV_OPT_ID_JUMBO_FRAMES: if (option_length < sizeof(**dev_op_jumbo_frames) || req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) { device_printf(priv->dev, GVE_DEVICE_OPTION_ERROR_FMT, "Jumbo Frames", (int)sizeof(**dev_op_jumbo_frames), GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES, option_length, req_feat_mask); break; } if (option_length > sizeof(**dev_op_jumbo_frames)) { device_printf(priv->dev, GVE_DEVICE_OPTION_TOO_BIG_FMT, "Jumbo Frames"); } *dev_op_jumbo_frames = (void *)(option + 1); break; default: /* * If we don't recognize the option just continue * without doing anything. */ device_printf(priv->dev, "Unrecognized device option 0x%hx not enabled.\n", option_id); } } /* Process all device options for a given describe device call. */ static int gve_process_device_options(struct gve_priv *priv, struct gve_device_descriptor *descriptor, struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, + struct gve_device_option_modify_ring **dev_op_modify_ring, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames) { char *desc_end = (char *)descriptor + be16toh(descriptor->total_length); const int num_options = be16toh(descriptor->num_device_options); struct gve_device_option *dev_opt; int i; /* The options struct directly follows the device descriptor. */ dev_opt = (void *)(descriptor + 1); for (i = 0; i < num_options; i++) { if ((char *)(dev_opt + 1) > desc_end || (char *)(dev_opt + 1) + be16toh(dev_opt->option_length) > desc_end) { device_printf(priv->dev, "options exceed device descriptor's total length.\n"); return (EINVAL); } gve_parse_device_option(priv, descriptor, dev_opt, dev_op_gqi_qpl, dev_op_dqo_rda, dev_op_dqo_qpl, + dev_op_modify_ring, dev_op_jumbo_frames); dev_opt = (void *)((char *)(dev_opt + 1) + be16toh(dev_opt->option_length)); } return (0); } static int gve_adminq_execute_cmd(struct gve_priv *priv, struct gve_adminq_command *cmd); static int gve_adminq_destroy_tx_queue(struct gve_priv *priv, uint32_t id) { struct gve_adminq_command cmd = (struct gve_adminq_command){}; cmd.opcode = htobe32(GVE_ADMINQ_DESTROY_TX_QUEUE); cmd.destroy_tx_queue.queue_id = htobe32(id); return (gve_adminq_execute_cmd(priv, &cmd)); } static int gve_adminq_destroy_rx_queue(struct gve_priv *priv, uint32_t id) { struct gve_adminq_command cmd = (struct gve_adminq_command){}; cmd.opcode = htobe32(GVE_ADMINQ_DESTROY_RX_QUEUE); cmd.destroy_rx_queue.queue_id = htobe32(id); return (gve_adminq_execute_cmd(priv, &cmd)); } int gve_adminq_destroy_rx_queues(struct gve_priv *priv, uint32_t num_queues) { int err; int i; for (i = 0; i < num_queues; i++) { err = gve_adminq_destroy_rx_queue(priv, i); if (err != 0) { device_printf(priv->dev, "Failed to destroy rxq %d, err: %d\n", i, err); } } if (err != 0) return (err); device_printf(priv->dev, "Destroyed %d rx queues\n", num_queues); return (0); } int gve_adminq_destroy_tx_queues(struct gve_priv *priv, uint32_t num_queues) { int err; int i; for (i = 0; i < num_queues; i++) { err = gve_adminq_destroy_tx_queue(priv, i); if (err != 0) { device_printf(priv->dev, "Failed to destroy txq %d, err: %d\n", i, err); } } if (err != 0) return (err); device_printf(priv->dev, "Destroyed %d tx queues\n", num_queues); return (0); } static int gve_adminq_create_rx_queue(struct gve_priv *priv, uint32_t queue_index) { struct gve_adminq_command cmd = (struct gve_adminq_command){}; struct gve_rx_ring *rx = &priv->rx[queue_index]; struct gve_dma_handle *qres_dma = &rx->com.q_resources_mem; bus_dmamap_sync(qres_dma->tag, qres_dma->map, BUS_DMASYNC_PREREAD); cmd.opcode = htobe32(GVE_ADMINQ_CREATE_RX_QUEUE); cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) { .queue_id = htobe32(queue_index), .ntfy_id = htobe32(rx->com.ntfy_id), .queue_resources_addr = htobe64(qres_dma->bus_addr), .rx_ring_size = htobe16(priv->rx_desc_cnt), .packet_buffer_size = htobe16(GVE_DEFAULT_RX_BUFFER_SIZE), }; if (gve_is_gqi(priv)) { cmd.create_rx_queue.rx_desc_ring_addr = htobe64(rx->desc_ring_mem.bus_addr); cmd.create_rx_queue.rx_data_ring_addr = htobe64(rx->data_ring_mem.bus_addr); cmd.create_rx_queue.index = htobe32(queue_index); cmd.create_rx_queue.queue_page_list_id = htobe32((rx->com.qpl)->id); } else { cmd.create_rx_queue.queue_page_list_id = htobe32(GVE_RAW_ADDRESSING_QPL_ID); cmd.create_rx_queue.rx_desc_ring_addr = htobe64(rx->dqo.compl_ring_mem.bus_addr); cmd.create_rx_queue.rx_data_ring_addr = htobe64(rx->desc_ring_mem.bus_addr); cmd.create_rx_queue.rx_buff_ring_size = htobe16(priv->rx_desc_cnt); cmd.create_rx_queue.enable_rsc = !!((if_getcapenable(priv->ifp) & IFCAP_LRO) && !gve_disable_hw_lro); } return (gve_adminq_execute_cmd(priv, &cmd)); } int gve_adminq_create_rx_queues(struct gve_priv *priv, uint32_t num_queues) { int err; int i; for (i = 0; i < num_queues; i++) { err = gve_adminq_create_rx_queue(priv, i); if (err != 0) { device_printf(priv->dev, "Failed to create rxq %d, err: %d\n", i, err); goto abort; } } if (bootverbose) device_printf(priv->dev, "Created %d rx queues\n", num_queues); return (0); abort: gve_adminq_destroy_rx_queues(priv, i); return (err); } static int gve_adminq_create_tx_queue(struct gve_priv *priv, uint32_t queue_index) { struct gve_adminq_command cmd = (struct gve_adminq_command){}; struct gve_tx_ring *tx = &priv->tx[queue_index]; struct gve_dma_handle *qres_dma = &tx->com.q_resources_mem; bus_dmamap_sync(qres_dma->tag, qres_dma->map, BUS_DMASYNC_PREREAD); cmd.opcode = htobe32(GVE_ADMINQ_CREATE_TX_QUEUE); cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) { .queue_id = htobe32(queue_index), .queue_resources_addr = htobe64(qres_dma->bus_addr), .tx_ring_addr = htobe64(tx->desc_ring_mem.bus_addr), .ntfy_id = htobe32(tx->com.ntfy_id), .tx_ring_size = htobe16(priv->tx_desc_cnt), }; if (gve_is_gqi(priv)) { cmd.create_tx_queue.queue_page_list_id = htobe32((tx->com.qpl)->id); } else { cmd.create_tx_queue.queue_page_list_id = htobe32(GVE_RAW_ADDRESSING_QPL_ID); cmd.create_tx_queue.tx_comp_ring_addr = htobe64(tx->dqo.compl_ring_mem.bus_addr); cmd.create_tx_queue.tx_comp_ring_size = htobe16(priv->tx_desc_cnt); } return (gve_adminq_execute_cmd(priv, &cmd)); } int gve_adminq_create_tx_queues(struct gve_priv *priv, uint32_t num_queues) { int err; int i; for (i = 0; i < num_queues; i++) { err = gve_adminq_create_tx_queue(priv, i); if (err != 0) { device_printf(priv->dev, "Failed to create txq %d, err: %d\n", i, err); goto abort; } } if (bootverbose) device_printf(priv->dev, "Created %d tx queues\n", num_queues); return (0); abort: gve_adminq_destroy_tx_queues(priv, i); return (err); } int gve_adminq_set_mtu(struct gve_priv *priv, uint32_t mtu) { struct gve_adminq_command cmd = (struct gve_adminq_command){}; cmd.opcode = htobe32(GVE_ADMINQ_SET_DRIVER_PARAMETER); cmd.set_driver_param = (struct gve_adminq_set_driver_parameter) { .parameter_type = htobe32(GVE_SET_PARAM_MTU), .parameter_value = htobe64(mtu), }; return (gve_adminq_execute_cmd(priv, &cmd)); } static void gve_enable_supported_features(struct gve_priv *priv, uint32_t supported_features_mask, + const struct gve_device_option_modify_ring *dev_op_modify_ring, const struct gve_device_option_jumbo_frames *dev_op_jumbo_frames) { + if (dev_op_modify_ring && + (supported_features_mask & GVE_SUP_MODIFY_RING_MASK)) { + if (bootverbose) + device_printf(priv->dev, "MODIFY RING device option enabled.\n"); + priv->modify_ringsize_enabled = true; + priv->max_rx_desc_cnt = be16toh(dev_op_modify_ring->max_ring_size.rx); + priv->max_tx_desc_cnt = be16toh(dev_op_modify_ring->max_ring_size.tx); + } + if (dev_op_jumbo_frames && (supported_features_mask & GVE_SUP_JUMBO_FRAMES_MASK)) { if (bootverbose) device_printf(priv->dev, "JUMBO FRAMES device option enabled: %u.\n", be16toh(dev_op_jumbo_frames->max_mtu)); priv->max_mtu = be16toh(dev_op_jumbo_frames->max_mtu); } } int gve_adminq_describe_device(struct gve_priv *priv) { struct gve_adminq_command aq_cmd = (struct gve_adminq_command){}; struct gve_device_descriptor *desc; struct gve_dma_handle desc_mem; struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL; struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL; struct gve_device_option_dqo_qpl *dev_op_dqo_qpl = NULL; + struct gve_device_option_modify_ring *dev_op_modify_ring = NULL; struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL; uint32_t supported_features_mask = 0; int rc; int i; rc = gve_dma_alloc_coherent(priv, ADMINQ_SIZE, ADMINQ_SIZE, &desc_mem); if (rc != 0) { device_printf(priv->dev, "Failed to alloc DMA mem for DescribeDevice.\n"); return (rc); } desc = desc_mem.cpu_addr; aq_cmd.opcode = htobe32(GVE_ADMINQ_DESCRIBE_DEVICE); aq_cmd.describe_device.device_descriptor_addr = htobe64( desc_mem.bus_addr); aq_cmd.describe_device.device_descriptor_version = htobe32( GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION); aq_cmd.describe_device.available_length = htobe32(ADMINQ_SIZE); bus_dmamap_sync(desc_mem.tag, desc_mem.map, BUS_DMASYNC_PREWRITE); rc = gve_adminq_execute_cmd(priv, &aq_cmd); if (rc != 0) goto free_device_descriptor; bus_dmamap_sync(desc_mem.tag, desc_mem.map, BUS_DMASYNC_POSTREAD); + /* Default min in case device options don't have min values */ + priv->min_rx_desc_cnt = GVE_DEFAULT_MIN_RX_RING_SIZE; + priv->min_tx_desc_cnt = GVE_DEFAULT_MIN_TX_RING_SIZE; + rc = gve_process_device_options(priv, desc, &dev_op_gqi_qpl, &dev_op_dqo_rda, &dev_op_dqo_qpl, + &dev_op_modify_ring, &dev_op_jumbo_frames); if (rc != 0) goto free_device_descriptor; if (dev_op_dqo_rda != NULL) { snprintf(gve_queue_format, sizeof(gve_queue_format), "%s", "DQO RDA"); priv->queue_format = GVE_DQO_RDA_FORMAT; supported_features_mask = be32toh( dev_op_dqo_rda->supported_features_mask); if (bootverbose) device_printf(priv->dev, "Driver is running with DQO RDA queue format.\n"); } else if (dev_op_dqo_qpl != NULL) { snprintf(gve_queue_format, sizeof(gve_queue_format), "%s", "DQO QPL"); priv->queue_format = GVE_DQO_QPL_FORMAT; supported_features_mask = be32toh( dev_op_dqo_qpl->supported_features_mask); if (bootverbose) device_printf(priv->dev, "Driver is running with DQO QPL queue format.\n"); } else if (dev_op_gqi_qpl != NULL) { snprintf(gve_queue_format, sizeof(gve_queue_format), "%s", "GQI QPL"); priv->queue_format = GVE_GQI_QPL_FORMAT; supported_features_mask = be32toh( dev_op_gqi_qpl->supported_features_mask); if (bootverbose) device_printf(priv->dev, "Driver is running with GQI QPL queue format.\n"); } else { device_printf(priv->dev, "No compatible queue formats\n"); rc = EINVAL; goto free_device_descriptor; } priv->num_event_counters = be16toh(desc->counters); priv->default_num_queues = be16toh(desc->default_num_queues); priv->tx_desc_cnt = be16toh(desc->tx_queue_entries); priv->rx_desc_cnt = be16toh(desc->rx_queue_entries); priv->rx_pages_per_qpl = be16toh(desc->rx_pages_per_qpl); priv->max_registered_pages = be64toh(desc->max_registered_pages); priv->max_mtu = be16toh(desc->mtu); priv->default_num_queues = be16toh(desc->default_num_queues); priv->supported_features = supported_features_mask; + /* Default max to current in case modify ring size option is disabled */ + priv->max_rx_desc_cnt = priv->rx_desc_cnt; + priv->max_tx_desc_cnt = priv->tx_desc_cnt; + gve_enable_supported_features(priv, supported_features_mask, - dev_op_jumbo_frames); + dev_op_modify_ring, dev_op_jumbo_frames); for (i = 0; i < ETHER_ADDR_LEN; i++) priv->mac[i] = desc->mac[i]; free_device_descriptor: gve_dma_free_coherent(&desc_mem); return (rc); } int gve_adminq_register_page_list(struct gve_priv *priv, struct gve_queue_page_list *qpl) { struct gve_adminq_command cmd = (struct gve_adminq_command){}; uint32_t num_entries = qpl->num_pages; uint32_t size = num_entries * sizeof(qpl->dmas[0].bus_addr); __be64 *page_list; struct gve_dma_handle dma; int err; int i; err = gve_dma_alloc_coherent(priv, size, PAGE_SIZE, &dma); if (err != 0) return (ENOMEM); page_list = dma.cpu_addr; for (i = 0; i < num_entries; i++) page_list[i] = htobe64(qpl->dmas[i].bus_addr); bus_dmamap_sync(dma.tag, dma.map, BUS_DMASYNC_PREWRITE); cmd.opcode = htobe32(GVE_ADMINQ_REGISTER_PAGE_LIST); cmd.reg_page_list = (struct gve_adminq_register_page_list) { .page_list_id = htobe32(qpl->id), .num_pages = htobe32(num_entries), .page_address_list_addr = htobe64(dma.bus_addr), .page_size = htobe64(PAGE_SIZE), }; err = gve_adminq_execute_cmd(priv, &cmd); gve_dma_free_coherent(&dma); return (err); } int gve_adminq_unregister_page_list(struct gve_priv *priv, uint32_t page_list_id) { struct gve_adminq_command cmd = (struct gve_adminq_command){}; cmd.opcode = htobe32(GVE_ADMINQ_UNREGISTER_PAGE_LIST); cmd.unreg_page_list = (struct gve_adminq_unregister_page_list) { .page_list_id = htobe32(page_list_id), }; return (gve_adminq_execute_cmd(priv, &cmd)); } #define GVE_NTFY_BLK_BASE_MSIX_IDX 0 int gve_adminq_configure_device_resources(struct gve_priv *priv) { struct gve_adminq_command aq_cmd = (struct gve_adminq_command){}; bus_dmamap_sync(priv->irqs_db_mem.tag, priv->irqs_db_mem.map, BUS_DMASYNC_PREREAD); bus_dmamap_sync(priv->counter_array_mem.tag, priv->counter_array_mem.map, BUS_DMASYNC_PREREAD); aq_cmd.opcode = htobe32(GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES); aq_cmd.configure_device_resources = (struct gve_adminq_configure_device_resources) { .counter_array = htobe64(priv->counter_array_mem.bus_addr), .irq_db_addr = htobe64(priv->irqs_db_mem.bus_addr), .num_counters = htobe32(priv->num_event_counters), .num_irq_dbs = htobe32(priv->num_queues), .irq_db_stride = htobe32(sizeof(struct gve_irq_db)), .ntfy_blk_msix_base_idx = htobe32(GVE_NTFY_BLK_BASE_MSIX_IDX), .queue_format = priv->queue_format, }; return (gve_adminq_execute_cmd(priv, &aq_cmd)); } int gve_adminq_deconfigure_device_resources(struct gve_priv *priv) { struct gve_adminq_command aq_cmd = (struct gve_adminq_command){}; aq_cmd.opcode = htobe32(GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES); return (gve_adminq_execute_cmd(priv, &aq_cmd)); } int gve_adminq_verify_driver_compatibility(struct gve_priv *priv, uint64_t driver_info_len, vm_paddr_t driver_info_addr) { struct gve_adminq_command aq_cmd = (struct gve_adminq_command){}; aq_cmd.opcode = htobe32(GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY); aq_cmd.verify_driver_compatibility = (struct gve_adminq_verify_driver_compatibility) { .driver_info_len = htobe64(driver_info_len), .driver_info_addr = htobe64(driver_info_addr), }; return (gve_adminq_execute_cmd(priv, &aq_cmd)); } int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv, struct gve_ptype_lut *ptype_lut_dqo) { struct gve_adminq_command aq_cmd = (struct gve_adminq_command){}; struct gve_ptype_map *ptype_map; struct gve_dma_handle dma; int err = 0; int i; err = gve_dma_alloc_coherent(priv, sizeof(*ptype_map), PAGE_SIZE, &dma); if (err) return (err); ptype_map = dma.cpu_addr; aq_cmd.opcode = htobe32(GVE_ADMINQ_GET_PTYPE_MAP); aq_cmd.get_ptype_map = (struct gve_adminq_get_ptype_map) { .ptype_map_len = htobe64(sizeof(*ptype_map)), .ptype_map_addr = htobe64(dma.bus_addr), }; err = gve_adminq_execute_cmd(priv, &aq_cmd); if (err) goto err; /* Populate ptype_lut_dqo. */ for (i = 0; i < GVE_NUM_PTYPES; i++) { ptype_lut_dqo->ptypes[i].l3_type = ptype_map->ptypes[i].l3_type; ptype_lut_dqo->ptypes[i].l4_type = ptype_map->ptypes[i].l4_type; } err: gve_dma_free_coherent(&dma); return (err); } int gve_adminq_alloc(struct gve_priv *priv) { int rc; if (gve_get_state_flag(priv, GVE_STATE_FLAG_ADMINQ_OK)) return (0); if (priv->aq_mem.cpu_addr == NULL) { rc = gve_dma_alloc_coherent(priv, ADMINQ_SIZE, ADMINQ_SIZE, &priv->aq_mem); if (rc != 0) { device_printf(priv->dev, "Failed to allocate admin queue mem\n"); return (rc); } } priv->adminq = priv->aq_mem.cpu_addr; priv->adminq_bus_addr = priv->aq_mem.bus_addr; if (priv->adminq == NULL) return (ENOMEM); priv->adminq_mask = ADMINQ_SLOTS - 1; priv->adminq_prod_cnt = 0; priv->adminq_cmd_fail = 0; priv->adminq_timeouts = 0; priv->adminq_describe_device_cnt = 0; priv->adminq_cfg_device_resources_cnt = 0; priv->adminq_register_page_list_cnt = 0; priv->adminq_unregister_page_list_cnt = 0; priv->adminq_create_tx_queue_cnt = 0; priv->adminq_create_rx_queue_cnt = 0; priv->adminq_destroy_tx_queue_cnt = 0; priv->adminq_destroy_rx_queue_cnt = 0; priv->adminq_dcfg_device_resources_cnt = 0; priv->adminq_set_driver_parameter_cnt = 0; priv->adminq_get_ptype_map_cnt = 0; gve_reg_bar_write_4(priv, GVE_REG_ADMINQ_ADDR, priv->adminq_bus_addr / ADMINQ_SIZE); gve_set_state_flag(priv, GVE_STATE_FLAG_ADMINQ_OK); return (0); } void gve_release_adminq(struct gve_priv *priv) { if (!gve_get_state_flag(priv, GVE_STATE_FLAG_ADMINQ_OK)) return; gve_reg_bar_write_4(priv, GVE_REG_ADMINQ_ADDR, 0); while (gve_reg_bar_read_4(priv, GVE_REG_ADMINQ_ADDR)) { device_printf(priv->dev, "Waiting until admin queue is released.\n"); pause("gve release adminq", GVE_ADMINQ_SLEEP_LEN_MS); } gve_dma_free_coherent(&priv->aq_mem); priv->aq_mem = (struct gve_dma_handle){}; priv->adminq = 0; priv->adminq_bus_addr = 0; gve_clear_state_flag(priv, GVE_STATE_FLAG_ADMINQ_OK); if (bootverbose) device_printf(priv->dev, "Admin queue released\n"); } static int gve_adminq_parse_err(struct gve_priv *priv, uint32_t opcode, uint32_t status) { if (status != GVE_ADMINQ_COMMAND_PASSED && status != GVE_ADMINQ_COMMAND_UNSET) { device_printf(priv->dev, "AQ command(%u): failed with status %d\n", opcode, status); priv->adminq_cmd_fail++; } switch (status) { case GVE_ADMINQ_COMMAND_PASSED: return (0); case GVE_ADMINQ_COMMAND_UNSET: device_printf(priv->dev, "AQ command(%u): err and status both unset, this should not be possible.\n", opcode); return (EINVAL); case GVE_ADMINQ_COMMAND_ERROR_ABORTED: case GVE_ADMINQ_COMMAND_ERROR_CANCELLED: case GVE_ADMINQ_COMMAND_ERROR_DATALOSS: case GVE_ADMINQ_COMMAND_ERROR_FAILED_PRECONDITION: case GVE_ADMINQ_COMMAND_ERROR_UNAVAILABLE: return (EAGAIN); case GVE_ADMINQ_COMMAND_ERROR_ALREADY_EXISTS: case GVE_ADMINQ_COMMAND_ERROR_INTERNAL_ERROR: case GVE_ADMINQ_COMMAND_ERROR_INVALID_ARGUMENT: case GVE_ADMINQ_COMMAND_ERROR_NOT_FOUND: case GVE_ADMINQ_COMMAND_ERROR_OUT_OF_RANGE: case GVE_ADMINQ_COMMAND_ERROR_UNKNOWN_ERROR: return (EINVAL); case GVE_ADMINQ_COMMAND_ERROR_DEADLINE_EXCEEDED: return (ETIMEDOUT); case GVE_ADMINQ_COMMAND_ERROR_PERMISSION_DENIED: case GVE_ADMINQ_COMMAND_ERROR_UNAUTHENTICATED: return (EACCES); case GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED: return (ENOMEM); case GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED: return (EOPNOTSUPP); default: device_printf(priv->dev, "AQ command(%u): unknown status code %d\n", opcode, status); return (EINVAL); } } static void gve_adminq_kick_cmd(struct gve_priv *priv, uint32_t prod_cnt) { gve_reg_bar_write_4(priv, ADMINQ_DOORBELL, prod_cnt); } static bool gve_adminq_wait_for_cmd(struct gve_priv *priv, uint32_t prod_cnt) { int i; for (i = 0; i < GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK; i++) { if (gve_reg_bar_read_4(priv, ADMINQ_EVENT_COUNTER) == prod_cnt) return (true); pause("gve adminq cmd", GVE_ADMINQ_SLEEP_LEN_MS); } return (false); } /* * Flushes all AQ commands currently queued and waits for them to complete. * If there are failures, it will return the first error. */ static int gve_adminq_kick_and_wait(struct gve_priv *priv) { struct gve_adminq_command *cmd; uint32_t status, err; uint32_t tail, head; uint32_t opcode; int i; tail = gve_reg_bar_read_4(priv, ADMINQ_EVENT_COUNTER); head = priv->adminq_prod_cnt; gve_adminq_kick_cmd(priv, head); if (!gve_adminq_wait_for_cmd(priv, head)) { device_printf(priv->dev, "AQ commands timed out, need to reset AQ\n"); priv->adminq_timeouts++; return (ENOTRECOVERABLE); } bus_dmamap_sync( priv->aq_mem.tag, priv->aq_mem.map, BUS_DMASYNC_POSTREAD); for (i = tail; i < head; i++) { cmd = &priv->adminq[i & priv->adminq_mask]; status = be32toh(cmd->status); opcode = be32toh(cmd->opcode); err = gve_adminq_parse_err(priv, opcode, status); if (err != 0) return (err); } return (0); } /* * This function is not threadsafe - the caller is responsible for any * necessary locks. */ static int gve_adminq_issue_cmd(struct gve_priv *priv, struct gve_adminq_command *cmd_orig) { struct gve_adminq_command *cmd; uint32_t opcode; uint32_t tail; int err; tail = gve_reg_bar_read_4(priv, ADMINQ_EVENT_COUNTER); /* Check if next command will overflow the buffer. */ if ((priv->adminq_prod_cnt - tail) > priv->adminq_mask) { /* Flush existing commands to make room. */ err = gve_adminq_kick_and_wait(priv); if (err != 0) return (err); /* Retry. */ tail = gve_reg_bar_read_4(priv, ADMINQ_EVENT_COUNTER); if ((priv->adminq_prod_cnt - tail) > priv->adminq_mask) { /* * This should never happen. We just flushed the * command queue so there should be enough space. */ return (ENOMEM); } } cmd = &priv->adminq[priv->adminq_prod_cnt & priv->adminq_mask]; priv->adminq_prod_cnt++; memcpy(cmd, cmd_orig, sizeof(*cmd_orig)); bus_dmamap_sync( priv->aq_mem.tag, priv->aq_mem.map, BUS_DMASYNC_PREWRITE); opcode = be32toh(cmd->opcode); switch (opcode) { case GVE_ADMINQ_DESCRIBE_DEVICE: priv->adminq_describe_device_cnt++; break; case GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES: priv->adminq_cfg_device_resources_cnt++; break; case GVE_ADMINQ_REGISTER_PAGE_LIST: priv->adminq_register_page_list_cnt++; break; case GVE_ADMINQ_UNREGISTER_PAGE_LIST: priv->adminq_unregister_page_list_cnt++; break; case GVE_ADMINQ_CREATE_TX_QUEUE: priv->adminq_create_tx_queue_cnt++; break; case GVE_ADMINQ_CREATE_RX_QUEUE: priv->adminq_create_rx_queue_cnt++; break; case GVE_ADMINQ_DESTROY_TX_QUEUE: priv->adminq_destroy_tx_queue_cnt++; break; case GVE_ADMINQ_DESTROY_RX_QUEUE: priv->adminq_destroy_rx_queue_cnt++; break; case GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES: priv->adminq_dcfg_device_resources_cnt++; break; case GVE_ADMINQ_SET_DRIVER_PARAMETER: priv->adminq_set_driver_parameter_cnt++; break; case GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY: priv->adminq_verify_driver_compatibility_cnt++; break; case GVE_ADMINQ_GET_PTYPE_MAP: priv->adminq_get_ptype_map_cnt++; break; default: device_printf(priv->dev, "Unknown AQ command opcode %d\n", opcode); } return (0); } /* * This function is not threadsafe - the caller is responsible for any * necessary locks. * The caller is also responsible for making sure there are no commands * waiting to be executed. */ static int gve_adminq_execute_cmd(struct gve_priv *priv, struct gve_adminq_command *cmd_orig) { uint32_t tail, head; int err; tail = gve_reg_bar_read_4(priv, ADMINQ_EVENT_COUNTER); head = priv->adminq_prod_cnt; if (tail != head) return (EINVAL); err = gve_adminq_issue_cmd(priv, cmd_orig); if (err != 0) return (err); return (gve_adminq_kick_and_wait(priv)); } diff --git a/sys/dev/gve/gve_adminq.h b/sys/dev/gve/gve_adminq.h index 37a7cb3ecbb8..bc51046a3037 100644 --- a/sys/dev/gve/gve_adminq.h +++ b/sys/dev/gve/gve_adminq.h @@ -1,449 +1,457 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2023-2024 Google LLC * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _GVE_AQ_H_ #define _GVE_AQ_H_ 1 #include #include #include #include #include /* Admin queue opcodes */ enum gve_adminq_opcodes { GVE_ADMINQ_DESCRIBE_DEVICE = 0x1, GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES = 0x2, GVE_ADMINQ_REGISTER_PAGE_LIST = 0x3, GVE_ADMINQ_UNREGISTER_PAGE_LIST = 0x4, GVE_ADMINQ_CREATE_TX_QUEUE = 0x5, GVE_ADMINQ_CREATE_RX_QUEUE = 0x6, GVE_ADMINQ_DESTROY_TX_QUEUE = 0x7, GVE_ADMINQ_DESTROY_RX_QUEUE = 0x8, GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES = 0x9, GVE_ADMINQ_SET_DRIVER_PARAMETER = 0xB, GVE_ADMINQ_REPORT_STATS = 0xC, GVE_ADMINQ_REPORT_LINK_SPEED = 0xD, GVE_ADMINQ_GET_PTYPE_MAP = 0xE, GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY = 0xF, }; /* Admin queue status codes */ enum gve_adminq_statuses { GVE_ADMINQ_COMMAND_UNSET = 0x0, GVE_ADMINQ_COMMAND_PASSED = 0x1, GVE_ADMINQ_COMMAND_ERROR_ABORTED = 0xFFFFFFF0, GVE_ADMINQ_COMMAND_ERROR_ALREADY_EXISTS = 0xFFFFFFF1, GVE_ADMINQ_COMMAND_ERROR_CANCELLED = 0xFFFFFFF2, GVE_ADMINQ_COMMAND_ERROR_DATALOSS = 0xFFFFFFF3, GVE_ADMINQ_COMMAND_ERROR_DEADLINE_EXCEEDED = 0xFFFFFFF4, GVE_ADMINQ_COMMAND_ERROR_FAILED_PRECONDITION = 0xFFFFFFF5, GVE_ADMINQ_COMMAND_ERROR_INTERNAL_ERROR = 0xFFFFFFF6, GVE_ADMINQ_COMMAND_ERROR_INVALID_ARGUMENT = 0xFFFFFFF7, GVE_ADMINQ_COMMAND_ERROR_NOT_FOUND = 0xFFFFFFF8, GVE_ADMINQ_COMMAND_ERROR_OUT_OF_RANGE = 0xFFFFFFF9, GVE_ADMINQ_COMMAND_ERROR_PERMISSION_DENIED = 0xFFFFFFFA, GVE_ADMINQ_COMMAND_ERROR_UNAUTHENTICATED = 0xFFFFFFFB, GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED = 0xFFFFFFFC, GVE_ADMINQ_COMMAND_ERROR_UNAVAILABLE = 0xFFFFFFFD, GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED = 0xFFFFFFFE, GVE_ADMINQ_COMMAND_ERROR_UNKNOWN_ERROR = 0xFFFFFFFF, }; #define GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION 1 /* * All AdminQ command structs should be naturally packed. The static_assert * calls make sure this is the case at compile time. */ struct gve_adminq_describe_device { __be64 device_descriptor_addr; __be32 device_descriptor_version; __be32 available_length; }; _Static_assert(sizeof(struct gve_adminq_describe_device) == 16, "gve: bad admin queue struct length"); struct gve_device_descriptor { __be64 max_registered_pages; __be16 reserved1; __be16 tx_queue_entries; __be16 rx_queue_entries; __be16 default_num_queues; __be16 mtu; __be16 counters; __be16 reserved2; __be16 rx_pages_per_qpl; uint8_t mac[ETHER_ADDR_LEN]; __be16 num_device_options; __be16 total_length; uint8_t reserved3[6]; }; _Static_assert(sizeof(struct gve_device_descriptor) == 40, "gve: bad admin queue struct length"); struct gve_device_option { __be16 option_id; __be16 option_length; __be32 required_features_mask; }; _Static_assert(sizeof(struct gve_device_option) == 8, "gve: bad admin queue struct length"); struct gve_device_option_gqi_rda { __be32 supported_features_mask; }; _Static_assert(sizeof(struct gve_device_option_gqi_rda) == 4, "gve: bad admin queue struct length"); struct gve_device_option_gqi_qpl { __be32 supported_features_mask; }; _Static_assert(sizeof(struct gve_device_option_gqi_qpl) == 4, "gve: bad admin queue struct length"); struct gve_device_option_dqo_rda { __be32 supported_features_mask; __be16 tx_comp_ring_entries; __be16 rx_buff_ring_entries; }; _Static_assert(sizeof(struct gve_device_option_dqo_rda) == 8, "gve: bad admin queue struct length"); struct gve_device_option_dqo_qpl { __be32 supported_features_mask; __be16 tx_comp_ring_entries; __be16 rx_buff_ring_entries; }; _Static_assert(sizeof(struct gve_device_option_dqo_qpl) == 8, "gve: bad admin queue struct length"); +struct gve_ring_size_bound { + __be16 rx; + __be16 tx; +}; + +_Static_assert(sizeof(struct gve_ring_size_bound) == 4, + "gve: bad admin queue struct length"); + struct gve_device_option_modify_ring { __be32 supported_features_mask; - __be16 max_rx_ring_size; - __be16 max_tx_ring_size; + struct gve_ring_size_bound max_ring_size; + struct gve_ring_size_bound min_ring_size; }; -_Static_assert(sizeof(struct gve_device_option_modify_ring) == 8, +_Static_assert(sizeof(struct gve_device_option_modify_ring) == 12, "gve: bad admin queue struct length"); struct gve_device_option_jumbo_frames { __be32 supported_features_mask; __be16 max_mtu; uint8_t padding[2]; }; _Static_assert(sizeof(struct gve_device_option_jumbo_frames) == 8, "gve: bad admin queue struct length"); enum gve_dev_opt_id { GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING = 0x1, GVE_DEV_OPT_ID_GQI_RDA = 0x2, GVE_DEV_OPT_ID_GQI_QPL = 0x3, GVE_DEV_OPT_ID_DQO_RDA = 0x4, GVE_DEV_OPT_ID_MODIFY_RING = 0x6, GVE_DEV_OPT_ID_DQO_QPL = 0x7, GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8, }; /* * These masks are way to predicate the use of a particular option on the driver * having particular bug fixes represented by each bit position in the mask. * Currently they are all zero because there are no known bugs preventing the * use of any option. */ enum gve_dev_opt_req_feat_mask { GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING = 0x0, GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0, GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0, GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0, GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0, GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING = 0x0, GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0, }; enum gve_sup_feature_mask { GVE_SUP_MODIFY_RING_MASK = 1 << 0, GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2, }; #define GVE_VERSION_STR_LEN 128 enum gve_driver_capability { gve_driver_capability_gqi_qpl = 0, gve_driver_capability_gqi_rda = 1, gve_driver_capability_dqo_qpl = 2, gve_driver_capability_dqo_rda = 3, }; #define GVE_CAP1(a) BIT((int) a) #define GVE_CAP2(a) BIT(((int) a) - 64) #define GVE_CAP3(a) BIT(((int) a) - 128) #define GVE_CAP4(a) BIT(((int) a) - 192) /* * The following four defines describe 256 compatibility bits. * Only a few bits (as shown in `gve_driver_compatibility`) are currently * defined. The rest are reserved for future use. */ #define GVE_DRIVER_CAPABILITY_FLAGS1 \ (GVE_CAP1(gve_driver_capability_gqi_qpl) | \ GVE_CAP1(gve_driver_capability_dqo_qpl) | \ GVE_CAP1(gve_driver_capability_dqo_rda)) #define GVE_DRIVER_CAPABILITY_FLAGS2 0x0 #define GVE_DRIVER_CAPABILITY_FLAGS3 0x0 #define GVE_DRIVER_CAPABILITY_FLAGS4 0x0 struct gve_driver_info { uint8_t os_type; uint8_t driver_major; uint8_t driver_minor; uint8_t driver_sub; __be32 os_version_major; __be32 os_version_minor; __be32 os_version_sub; __be64 driver_capability_flags[4]; uint8_t os_version_str1[GVE_VERSION_STR_LEN]; uint8_t os_version_str2[GVE_VERSION_STR_LEN]; }; struct gve_adminq_verify_driver_compatibility { __be64 driver_info_len; __be64 driver_info_addr; }; _Static_assert(sizeof(struct gve_adminq_verify_driver_compatibility) == 16, "gve: bad admin queue struct length"); struct gve_adminq_configure_device_resources { __be64 counter_array; __be64 irq_db_addr; __be32 num_counters; __be32 num_irq_dbs; __be32 irq_db_stride; __be32 ntfy_blk_msix_base_idx; uint8_t queue_format; uint8_t padding[7]; }; _Static_assert(sizeof(struct gve_adminq_configure_device_resources) == 40, "gve: bad admin queue struct length"); struct gve_adminq_register_page_list { __be32 page_list_id; __be32 num_pages; __be64 page_address_list_addr; __be64 page_size; }; _Static_assert(sizeof(struct gve_adminq_register_page_list) == 24, "gve: bad admin queue struct length"); struct gve_adminq_unregister_page_list { __be32 page_list_id; }; _Static_assert(sizeof(struct gve_adminq_unregister_page_list) == 4, "gve: bad admin queue struct length"); struct gve_adminq_create_tx_queue { __be32 queue_id; __be32 reserved; __be64 queue_resources_addr; __be64 tx_ring_addr; __be32 queue_page_list_id; __be32 ntfy_id; __be64 tx_comp_ring_addr; __be16 tx_ring_size; __be16 tx_comp_ring_size; uint8_t padding[4]; }; _Static_assert(sizeof(struct gve_adminq_create_tx_queue) == 48, "gve: bad admin queue struct length"); #define GVE_RAW_ADDRESSING_QPL_ID 0xFFFFFFFF struct gve_adminq_create_rx_queue { __be32 queue_id; __be32 index; __be32 reserved; __be32 ntfy_id; __be64 queue_resources_addr; __be64 rx_desc_ring_addr; __be64 rx_data_ring_addr; __be32 queue_page_list_id; __be16 rx_ring_size; __be16 packet_buffer_size; __be16 rx_buff_ring_size; uint8_t enable_rsc; uint8_t padding[5]; }; _Static_assert(sizeof(struct gve_adminq_create_rx_queue) == 56, "gve: bad admin queue struct length"); /* Queue resources that are shared with the device */ struct gve_queue_resources { union { struct { __be32 db_index; /* Device -> Guest */ __be32 counter_index; /* Device -> Guest */ }; uint8_t reserved[64]; }; }; _Static_assert(sizeof(struct gve_queue_resources) == 64, "gve: bad admin queue struct length"); struct gve_adminq_destroy_tx_queue { __be32 queue_id; }; _Static_assert(sizeof(struct gve_adminq_destroy_tx_queue) == 4, "gve: bad admin queue struct length"); struct gve_adminq_destroy_rx_queue { __be32 queue_id; }; _Static_assert(sizeof(struct gve_adminq_destroy_rx_queue) == 4, "gve: bad admin queue struct length"); /* GVE Set Driver Parameter Types */ enum gve_set_driver_param_types { GVE_SET_PARAM_MTU = 0x1, }; struct gve_adminq_set_driver_parameter { __be32 parameter_type; uint8_t reserved[4]; __be64 parameter_value; }; _Static_assert(sizeof(struct gve_adminq_set_driver_parameter) == 16, "gve: bad admin queue struct length"); struct stats { __be32 stat_name; __be32 queue_id; __be64 value; }; _Static_assert(sizeof(struct stats) == 16, "gve: bad admin queue struct length"); /* These are control path types for PTYPE which are the same as the data path * types. */ struct gve_ptype_entry { uint8_t l3_type; uint8_t l4_type; }; struct gve_ptype_map { struct gve_ptype_entry ptypes[1 << 10]; /* PTYPES are always 10 bits. */ }; struct gve_adminq_get_ptype_map { __be64 ptype_map_len; __be64 ptype_map_addr; }; struct gve_adminq_command { __be32 opcode; __be32 status; union { struct gve_adminq_configure_device_resources configure_device_resources; struct gve_adminq_create_tx_queue create_tx_queue; struct gve_adminq_create_rx_queue create_rx_queue; struct gve_adminq_destroy_tx_queue destroy_tx_queue; struct gve_adminq_destroy_rx_queue destroy_rx_queue; struct gve_adminq_describe_device describe_device; struct gve_adminq_register_page_list reg_page_list; struct gve_adminq_unregister_page_list unreg_page_list; struct gve_adminq_set_driver_parameter set_driver_param; struct gve_adminq_verify_driver_compatibility verify_driver_compatibility; struct gve_adminq_get_ptype_map get_ptype_map; uint8_t reserved[56]; }; }; _Static_assert(sizeof(struct gve_adminq_command) == 64, "gve: bad admin queue struct length"); enum gve_l3_type { /* Must be zero so zero initialized LUT is unknown. */ GVE_L3_TYPE_UNKNOWN = 0, GVE_L3_TYPE_OTHER, GVE_L3_TYPE_IPV4, GVE_L3_TYPE_IPV6, }; enum gve_l4_type { /* Must be zero so zero initialized LUT is unknown. */ GVE_L4_TYPE_UNKNOWN = 0, GVE_L4_TYPE_OTHER, GVE_L4_TYPE_TCP, GVE_L4_TYPE_UDP, GVE_L4_TYPE_ICMP, GVE_L4_TYPE_SCTP, }; int gve_adminq_create_rx_queues(struct gve_priv *priv, uint32_t num_queues); int gve_adminq_create_tx_queues(struct gve_priv *priv, uint32_t num_queues); int gve_adminq_destroy_tx_queues(struct gve_priv *priv, uint32_t num_queues); int gve_adminq_destroy_rx_queues(struct gve_priv *priv, uint32_t num_queues); int gve_adminq_set_mtu(struct gve_priv *priv, uint32_t mtu); int gve_adminq_alloc(struct gve_priv *priv); void gve_reset_adminq(struct gve_priv *priv); int gve_adminq_describe_device(struct gve_priv *priv); int gve_adminq_configure_device_resources(struct gve_priv *priv); int gve_adminq_deconfigure_device_resources(struct gve_priv *priv); void gve_release_adminq(struct gve_priv *priv); int gve_adminq_register_page_list(struct gve_priv *priv, struct gve_queue_page_list *qpl); int gve_adminq_unregister_page_list(struct gve_priv *priv, uint32_t page_list_id); int gve_adminq_verify_driver_compatibility(struct gve_priv *priv, uint64_t driver_info_len, vm_paddr_t driver_info_addr); int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv, struct gve_ptype_lut *ptype_lut); #endif /* _GVE_AQ_H_ */ diff --git a/sys/dev/gve/gve_main.c b/sys/dev/gve/gve_main.c index 39556b85f493..8a00deedef36 100644 --- a/sys/dev/gve/gve_main.c +++ b/sys/dev/gve/gve_main.c @@ -1,1027 +1,1075 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2023-2024 Google LLC * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "gve.h" #include "gve_adminq.h" #include "gve_dqo.h" -#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.2\n" +#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.3\n" #define GVE_VERSION_MAJOR 1 #define GVE_VERSION_MINOR 3 -#define GVE_VERSION_SUB 2 +#define GVE_VERSION_SUB 3 #define GVE_DEFAULT_RX_COPYBREAK 256 /* Devices supported by this driver. */ static struct gve_dev { uint16_t vendor_id; uint16_t device_id; const char *name; } gve_devs[] = { { PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC, "gVNIC" } }; struct sx gve_global_lock; static int gve_verify_driver_compatibility(struct gve_priv *priv) { int err; struct gve_driver_info *driver_info; struct gve_dma_handle driver_info_mem; err = gve_dma_alloc_coherent(priv, sizeof(struct gve_driver_info), PAGE_SIZE, &driver_info_mem); if (err != 0) return (ENOMEM); driver_info = driver_info_mem.cpu_addr; *driver_info = (struct gve_driver_info) { .os_type = 3, /* Freebsd */ .driver_major = GVE_VERSION_MAJOR, .driver_minor = GVE_VERSION_MINOR, .driver_sub = GVE_VERSION_SUB, .os_version_major = htobe32(FBSD_VERSION_MAJOR), .os_version_minor = htobe32(FBSD_VERSION_MINOR), .os_version_sub = htobe32(FBSD_VERSION_PATCH), .driver_capability_flags = { htobe64(GVE_DRIVER_CAPABILITY_FLAGS1), htobe64(GVE_DRIVER_CAPABILITY_FLAGS2), htobe64(GVE_DRIVER_CAPABILITY_FLAGS3), htobe64(GVE_DRIVER_CAPABILITY_FLAGS4), }, }; snprintf(driver_info->os_version_str1, sizeof(driver_info->os_version_str1), "FreeBSD %u", __FreeBSD_version); bus_dmamap_sync(driver_info_mem.tag, driver_info_mem.map, BUS_DMASYNC_PREREAD); err = gve_adminq_verify_driver_compatibility(priv, sizeof(struct gve_driver_info), driver_info_mem.bus_addr); /* It's ok if the device doesn't support this */ if (err == EOPNOTSUPP) err = 0; gve_dma_free_coherent(&driver_info_mem); return (err); } static int gve_up(struct gve_priv *priv) { if_t ifp = priv->ifp; int err; GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock); if (device_is_attached(priv->dev) == 0) { device_printf(priv->dev, "Cannot bring the iface up when detached\n"); return (ENXIO); } if (gve_get_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP)) return (0); if_clearhwassist(ifp); if (if_getcapenable(ifp) & IFCAP_TXCSUM) if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0); if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6) if_sethwassistbits(ifp, CSUM_IP6_TCP | CSUM_IP6_UDP, 0); if (if_getcapenable(ifp) & IFCAP_TSO4) if_sethwassistbits(ifp, CSUM_IP_TSO, 0); if (if_getcapenable(ifp) & IFCAP_TSO6) if_sethwassistbits(ifp, CSUM_IP6_TSO, 0); if (gve_is_qpl(priv)) { err = gve_register_qpls(priv); if (err != 0) goto reset; } err = gve_create_rx_rings(priv); if (err != 0) goto reset; err = gve_create_tx_rings(priv); if (err != 0) goto reset; if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); if (!gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP)) { if_link_state_change(ifp, LINK_STATE_UP); gve_set_state_flag(priv, GVE_STATE_FLAG_LINK_UP); } gve_unmask_all_queue_irqs(priv); gve_set_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP); priv->interface_up_cnt++; return (0); reset: gve_schedule_reset(priv); return (err); } static void gve_down(struct gve_priv *priv) { GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock); if (!gve_get_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP)) return; if (gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP)) { if_link_state_change(priv->ifp, LINK_STATE_DOWN); gve_clear_state_flag(priv, GVE_STATE_FLAG_LINK_UP); } if_setdrvflagbits(priv->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); if (gve_destroy_rx_rings(priv) != 0) goto reset; if (gve_destroy_tx_rings(priv) != 0) goto reset; if (gve_is_qpl(priv)) { if (gve_unregister_qpls(priv) != 0) goto reset; } if (gve_is_gqi(priv)) gve_mask_all_queue_irqs(priv); gve_clear_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP); priv->interface_down_cnt++; return; reset: gve_schedule_reset(priv); } int gve_adjust_rx_queues(struct gve_priv *priv, uint16_t new_queue_cnt) { int err; GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock); gve_down(priv); if (new_queue_cnt < priv->rx_cfg.num_queues) { /* * Freeing a ring still preserves its ntfy_id, * which is needed if we create the ring again. */ gve_free_rx_rings(priv, new_queue_cnt, priv->rx_cfg.num_queues); } else { err = gve_alloc_rx_rings(priv, priv->rx_cfg.num_queues, new_queue_cnt); if (err != 0) { device_printf(priv->dev, "Failed to allocate new queues"); /* Failed to allocate rings, start back up with old ones */ gve_up(priv); return (err); } } priv->rx_cfg.num_queues = new_queue_cnt; err = gve_up(priv); if (err != 0) gve_schedule_reset(priv); return (err); } int gve_adjust_tx_queues(struct gve_priv *priv, uint16_t new_queue_cnt) { int err; GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock); gve_down(priv); if (new_queue_cnt < priv->tx_cfg.num_queues) { /* * Freeing a ring still preserves its ntfy_id, * which is needed if we create the ring again. */ gve_free_tx_rings(priv, new_queue_cnt, priv->tx_cfg.num_queues); } else { err = gve_alloc_tx_rings(priv, priv->tx_cfg.num_queues, new_queue_cnt); if (err != 0) { device_printf(priv->dev, "Failed to allocate new queues"); /* Failed to allocate rings, start back up with old ones */ gve_up(priv); return (err); } } priv->tx_cfg.num_queues = new_queue_cnt; err = gve_up(priv); if (err != 0) gve_schedule_reset(priv); return (err); } +int +gve_adjust_ring_sizes(struct gve_priv *priv, uint16_t new_desc_cnt, bool is_rx) +{ + int err; + uint16_t prev_desc_cnt; + + GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock); + + gve_down(priv); + + if (is_rx) { + gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues); + prev_desc_cnt = priv->rx_desc_cnt; + priv->rx_desc_cnt = new_desc_cnt; + err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues); + if (err != 0) { + device_printf(priv->dev, + "Failed to allocate rings. Trying to start back up with previous ring size."); + priv->rx_desc_cnt = prev_desc_cnt; + err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues); + } + } else { + gve_free_tx_rings(priv, 0, priv->tx_cfg.num_queues); + prev_desc_cnt = priv->tx_desc_cnt; + priv->tx_desc_cnt = new_desc_cnt; + err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues); + if (err != 0) { + device_printf(priv->dev, + "Failed to allocate rings. Trying to start back up with previous ring size."); + priv->tx_desc_cnt = prev_desc_cnt; + err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues); + } + } + + if (err != 0) { + device_printf(priv->dev, "Failed to allocate rings! Cannot start device back up!"); + return (err); + } + + err = gve_up(priv); + if (err != 0) { + gve_schedule_reset(priv); + return (err); + } + + return (0); +} + static int gve_set_mtu(if_t ifp, uint32_t new_mtu) { struct gve_priv *priv = if_getsoftc(ifp); const uint32_t max_problem_range = 8227; const uint32_t min_problem_range = 7822; int err; if ((new_mtu > priv->max_mtu) || (new_mtu < ETHERMIN)) { device_printf(priv->dev, "Invalid new MTU setting. new mtu: %d max mtu: %d min mtu: %d\n", new_mtu, priv->max_mtu, ETHERMIN); return (EINVAL); } /* * When hardware LRO is enabled in DQ mode, MTUs within the range * [7822, 8227] trigger hardware issues which cause a drastic drop * in throughput. */ if (!gve_is_gqi(priv) && !gve_disable_hw_lro && new_mtu >= min_problem_range && new_mtu <= max_problem_range) { device_printf(priv->dev, "Cannot set to MTU to %d within the range [%d, %d] while hardware LRO is enabled\n", new_mtu, min_problem_range, max_problem_range); return (EINVAL); } err = gve_adminq_set_mtu(priv, new_mtu); if (err == 0) { if (bootverbose) device_printf(priv->dev, "MTU set to %d\n", new_mtu); if_setmtu(ifp, new_mtu); } else { device_printf(priv->dev, "Failed to set MTU to %d\n", new_mtu); } return (err); } static void gve_init(void *arg) { struct gve_priv *priv = (struct gve_priv *)arg; if (!gve_get_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP)) { GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); gve_up(priv); GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); } } static int gve_ioctl(if_t ifp, u_long command, caddr_t data) { struct gve_priv *priv; struct ifreq *ifr; int rc = 0; priv = if_getsoftc(ifp); ifr = (struct ifreq *)data; switch (command) { case SIOCSIFMTU: if (if_getmtu(ifp) == ifr->ifr_mtu) break; GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); gve_down(priv); gve_set_mtu(ifp, ifr->ifr_mtu); rc = gve_up(priv); GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); break; case SIOCSIFFLAGS: if ((if_getflags(ifp) & IFF_UP) != 0) { if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); rc = gve_up(priv); GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); } } else { if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) { GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); gve_down(priv); GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); } } break; case SIOCSIFCAP: if (ifr->ifr_reqcap == if_getcapenable(ifp)) break; GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); gve_down(priv); if_setcapenable(ifp, ifr->ifr_reqcap); rc = gve_up(priv); GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); break; case SIOCSIFMEDIA: /* FALLTHROUGH */ case SIOCGIFMEDIA: rc = ifmedia_ioctl(ifp, ifr, &priv->media, command); break; default: rc = ether_ioctl(ifp, command, data); break; } return (rc); } static int gve_media_change(if_t ifp) { struct gve_priv *priv = if_getsoftc(ifp); device_printf(priv->dev, "Media change not supported\n"); return (0); } static void gve_media_status(if_t ifp, struct ifmediareq *ifmr) { struct gve_priv *priv = if_getsoftc(ifp); GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP)) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_AUTO; } else { ifmr->ifm_active |= IFM_NONE; } GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); } static uint64_t gve_get_counter(if_t ifp, ift_counter cnt) { struct gve_priv *priv; uint64_t rpackets = 0; uint64_t tpackets = 0; uint64_t rbytes = 0; uint64_t tbytes = 0; uint64_t rx_dropped_pkt = 0; uint64_t tx_dropped_pkt = 0; priv = if_getsoftc(ifp); gve_accum_stats(priv, &rpackets, &rbytes, &rx_dropped_pkt, &tpackets, &tbytes, &tx_dropped_pkt); switch (cnt) { case IFCOUNTER_IPACKETS: return (rpackets); case IFCOUNTER_OPACKETS: return (tpackets); case IFCOUNTER_IBYTES: return (rbytes); case IFCOUNTER_OBYTES: return (tbytes); case IFCOUNTER_IQDROPS: return (rx_dropped_pkt); case IFCOUNTER_OQDROPS: return (tx_dropped_pkt); default: return (if_get_counter_default(ifp, cnt)); } } static void gve_setup_ifnet(device_t dev, struct gve_priv *priv) { int caps = 0; if_t ifp; ifp = priv->ifp = if_alloc(IFT_ETHER); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setsoftc(ifp, priv); if_setdev(ifp, dev); if_setinitfn(ifp, gve_init); if_setioctlfn(ifp, gve_ioctl); if_settransmitfn(ifp, gve_xmit_ifp); if_setqflushfn(ifp, gve_qflush); /* * Set TSO limits, must match the arguments to bus_dma_tag_create * when creating tx->dqo.buf_dmatag. Only applies to the RDA mode * because in QPL we copy the entire packet into the bounce buffer * and thus it does not matter how fragmented the mbuf is. */ if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) { if_sethwtsomaxsegcount(ifp, GVE_TX_MAX_DATA_DESCS_DQO); if_sethwtsomaxsegsize(ifp, GVE_TX_MAX_BUF_SIZE_DQO); } if_sethwtsomax(ifp, GVE_TSO_MAXSIZE_DQO); #if __FreeBSD_version >= 1400086 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); #else if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | IFF_KNOWSEPOCH); #endif ifmedia_init(&priv->media, IFM_IMASK, gve_media_change, gve_media_status); if_setgetcounterfn(ifp, gve_get_counter); caps = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 | IFCAP_TSO | IFCAP_LRO; if ((priv->supported_features & GVE_SUP_JUMBO_FRAMES_MASK) != 0) caps |= IFCAP_JUMBO_MTU; if_setcapabilities(ifp, caps); if_setcapenable(ifp, caps); if (bootverbose) device_printf(priv->dev, "Setting initial MTU to %d\n", priv->max_mtu); if_setmtu(ifp, priv->max_mtu); ether_ifattach(ifp, priv->mac); ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO); } static int gve_alloc_counter_array(struct gve_priv *priv) { int err; err = gve_dma_alloc_coherent(priv, sizeof(uint32_t) * priv->num_event_counters, PAGE_SIZE, &priv->counter_array_mem); if (err != 0) return (err); priv->counters = priv->counter_array_mem.cpu_addr; return (0); } static void gve_free_counter_array(struct gve_priv *priv) { if (priv->counters != NULL) gve_dma_free_coherent(&priv->counter_array_mem); priv->counter_array_mem = (struct gve_dma_handle){}; } static int gve_alloc_irq_db_array(struct gve_priv *priv) { int err; err = gve_dma_alloc_coherent(priv, sizeof(struct gve_irq_db) * (priv->num_queues), PAGE_SIZE, &priv->irqs_db_mem); if (err != 0) return (err); priv->irq_db_indices = priv->irqs_db_mem.cpu_addr; return (0); } static void gve_free_irq_db_array(struct gve_priv *priv) { if (priv->irq_db_indices != NULL) gve_dma_free_coherent(&priv->irqs_db_mem); priv->irqs_db_mem = (struct gve_dma_handle){}; } static void gve_free_rings(struct gve_priv *priv) { gve_free_irqs(priv); gve_free_tx_rings(priv, 0, priv->tx_cfg.num_queues); free(priv->tx, M_GVE); priv->tx = NULL; gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues); free(priv->rx, M_GVE); priv->rx = NULL; } static int gve_alloc_rings(struct gve_priv *priv) { int err; priv->rx = malloc(sizeof(struct gve_rx_ring) * priv->rx_cfg.max_queues, M_GVE, M_WAITOK | M_ZERO); err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues); if (err != 0) goto abort; priv->tx = malloc(sizeof(struct gve_tx_ring) * priv->tx_cfg.max_queues, M_GVE, M_WAITOK | M_ZERO); err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues); if (err != 0) goto abort; err = gve_alloc_irqs(priv); if (err != 0) goto abort; return (0); abort: gve_free_rings(priv); return (err); } static void gve_deconfigure_and_free_device_resources(struct gve_priv *priv) { int err; if (gve_get_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK)) { err = gve_adminq_deconfigure_device_resources(priv); if (err != 0) { device_printf(priv->dev, "Failed to deconfigure device resources: err=%d\n", err); return; } if (bootverbose) device_printf(priv->dev, "Deconfigured device resources\n"); gve_clear_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK); } gve_free_irq_db_array(priv); gve_free_counter_array(priv); if (priv->ptype_lut_dqo) { free(priv->ptype_lut_dqo, M_GVE); priv->ptype_lut_dqo = NULL; } } static int gve_alloc_and_configure_device_resources(struct gve_priv *priv) { int err; if (gve_get_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK)) return (0); err = gve_alloc_counter_array(priv); if (err != 0) return (err); err = gve_alloc_irq_db_array(priv); if (err != 0) goto abort; err = gve_adminq_configure_device_resources(priv); if (err != 0) { device_printf(priv->dev, "Failed to configure device resources: err=%d\n", err); err = (ENXIO); goto abort; } if (!gve_is_gqi(priv)) { priv->ptype_lut_dqo = malloc(sizeof(*priv->ptype_lut_dqo), M_GVE, M_WAITOK | M_ZERO); err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); if (err != 0) { device_printf(priv->dev, "Failed to configure ptype lut: err=%d\n", err); goto abort; } } gve_set_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK); if (bootverbose) device_printf(priv->dev, "Configured device resources\n"); return (0); abort: gve_deconfigure_and_free_device_resources(priv); return (err); } static void gve_set_queue_cnts(struct gve_priv *priv) { priv->tx_cfg.max_queues = gve_reg_bar_read_4(priv, MAX_TX_QUEUES); priv->rx_cfg.max_queues = gve_reg_bar_read_4(priv, MAX_RX_QUEUES); priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; if (priv->default_num_queues > 0) { priv->tx_cfg.num_queues = MIN(priv->default_num_queues, priv->tx_cfg.num_queues); priv->rx_cfg.num_queues = MIN(priv->default_num_queues, priv->rx_cfg.num_queues); } priv->num_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; priv->mgmt_msix_idx = priv->num_queues; } static int gve_alloc_adminq_and_describe_device(struct gve_priv *priv) { int err; if ((err = gve_adminq_alloc(priv)) != 0) return (err); if ((err = gve_verify_driver_compatibility(priv)) != 0) { device_printf(priv->dev, "Failed to verify driver compatibility: err=%d\n", err); goto abort; } if ((err = gve_adminq_describe_device(priv)) != 0) goto abort; gve_set_queue_cnts(priv); priv->num_registered_pages = 0; return (0); abort: gve_release_adminq(priv); return (err); } void gve_schedule_reset(struct gve_priv *priv) { if (gve_get_state_flag(priv, GVE_STATE_FLAG_IN_RESET)) return; device_printf(priv->dev, "Scheduling reset task!\n"); gve_set_state_flag(priv, GVE_STATE_FLAG_DO_RESET); taskqueue_enqueue(priv->service_tq, &priv->service_task); } static void gve_destroy(struct gve_priv *priv) { gve_down(priv); gve_deconfigure_and_free_device_resources(priv); gve_release_adminq(priv); } static void gve_restore(struct gve_priv *priv) { int err; err = gve_adminq_alloc(priv); if (err != 0) goto abort; err = gve_adminq_configure_device_resources(priv); if (err != 0) { device_printf(priv->dev, "Failed to configure device resources: err=%d\n", err); err = (ENXIO); goto abort; } if (!gve_is_gqi(priv)) { err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); if (err != 0) { device_printf(priv->dev, "Failed to configure ptype lut: err=%d\n", err); goto abort; } } err = gve_up(priv); if (err != 0) goto abort; return; abort: device_printf(priv->dev, "Restore failed!\n"); return; } static void gve_clear_device_resources(struct gve_priv *priv) { int i; for (i = 0; i < priv->num_event_counters; i++) priv->counters[i] = 0; bus_dmamap_sync(priv->counter_array_mem.tag, priv->counter_array_mem.map, BUS_DMASYNC_PREWRITE); for (i = 0; i < priv->num_queues; i++) priv->irq_db_indices[i] = (struct gve_irq_db){}; bus_dmamap_sync(priv->irqs_db_mem.tag, priv->irqs_db_mem.map, BUS_DMASYNC_PREWRITE); if (priv->ptype_lut_dqo) *priv->ptype_lut_dqo = (struct gve_ptype_lut){0}; } static void gve_handle_reset(struct gve_priv *priv) { if (!gve_get_state_flag(priv, GVE_STATE_FLAG_DO_RESET)) return; gve_clear_state_flag(priv, GVE_STATE_FLAG_DO_RESET); gve_set_state_flag(priv, GVE_STATE_FLAG_IN_RESET); GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); if_setdrvflagbits(priv->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); if_link_state_change(priv->ifp, LINK_STATE_DOWN); gve_clear_state_flag(priv, GVE_STATE_FLAG_LINK_UP); /* * Releasing the adminq causes the NIC to destroy all resources * registered with it, so by clearing the flags beneath we cause * the subsequent gve_down call below to not attempt to tell the * NIC to destroy these resources again. * * The call to gve_down is needed in the first place to refresh * the state and the DMA-able memory within each driver ring. */ gve_release_adminq(priv); gve_clear_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK); gve_clear_state_flag(priv, GVE_STATE_FLAG_QPLREG_OK); gve_clear_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK); gve_clear_state_flag(priv, GVE_STATE_FLAG_TX_RINGS_OK); gve_down(priv); gve_clear_device_resources(priv); gve_restore(priv); GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); priv->reset_cnt++; gve_clear_state_flag(priv, GVE_STATE_FLAG_IN_RESET); } static void gve_handle_link_status(struct gve_priv *priv) { uint32_t status = gve_reg_bar_read_4(priv, DEVICE_STATUS); bool link_up = status & GVE_DEVICE_STATUS_LINK_STATUS; if (link_up == gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP)) return; if (link_up) { if (bootverbose) device_printf(priv->dev, "Device link is up.\n"); if_link_state_change(priv->ifp, LINK_STATE_UP); gve_set_state_flag(priv, GVE_STATE_FLAG_LINK_UP); } else { device_printf(priv->dev, "Device link is down.\n"); if_link_state_change(priv->ifp, LINK_STATE_DOWN); gve_clear_state_flag(priv, GVE_STATE_FLAG_LINK_UP); } } static void gve_service_task(void *arg, int pending) { struct gve_priv *priv = (struct gve_priv *)arg; uint32_t status = gve_reg_bar_read_4(priv, DEVICE_STATUS); if (((GVE_DEVICE_STATUS_RESET_MASK & status) != 0) && !gve_get_state_flag(priv, GVE_STATE_FLAG_IN_RESET)) { device_printf(priv->dev, "Device requested reset\n"); gve_set_state_flag(priv, GVE_STATE_FLAG_DO_RESET); } gve_handle_reset(priv); gve_handle_link_status(priv); } static int gve_probe(device_t dev) { uint16_t deviceid, vendorid; int i; vendorid = pci_get_vendor(dev); deviceid = pci_get_device(dev); for (i = 0; i < nitems(gve_devs); i++) { if (vendorid == gve_devs[i].vendor_id && deviceid == gve_devs[i].device_id) { device_set_desc(dev, gve_devs[i].name); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static void gve_free_sys_res_mem(struct gve_priv *priv) { if (priv->msix_table != NULL) bus_release_resource(priv->dev, SYS_RES_MEMORY, rman_get_rid(priv->msix_table), priv->msix_table); if (priv->db_bar != NULL) bus_release_resource(priv->dev, SYS_RES_MEMORY, rman_get_rid(priv->db_bar), priv->db_bar); if (priv->reg_bar != NULL) bus_release_resource(priv->dev, SYS_RES_MEMORY, rman_get_rid(priv->reg_bar), priv->reg_bar); } static int gve_attach(device_t dev) { struct gve_priv *priv; int rid; int err; snprintf(gve_version, sizeof(gve_version), "%d.%d.%d", GVE_VERSION_MAJOR, GVE_VERSION_MINOR, GVE_VERSION_SUB); priv = device_get_softc(dev); priv->dev = dev; GVE_IFACE_LOCK_INIT(priv->gve_iface_lock); pci_enable_busmaster(dev); rid = PCIR_BAR(GVE_REGISTER_BAR); priv->reg_bar = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (priv->reg_bar == NULL) { device_printf(dev, "Failed to allocate BAR0\n"); err = ENXIO; goto abort; } rid = PCIR_BAR(GVE_DOORBELL_BAR); priv->db_bar = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (priv->db_bar == NULL) { device_printf(dev, "Failed to allocate BAR2\n"); err = ENXIO; goto abort; } rid = pci_msix_table_bar(priv->dev); priv->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (priv->msix_table == NULL) { device_printf(dev, "Failed to allocate msix table\n"); err = ENXIO; goto abort; } err = gve_alloc_adminq_and_describe_device(priv); if (err != 0) goto abort; err = gve_alloc_and_configure_device_resources(priv); if (err != 0) goto abort; err = gve_alloc_rings(priv); if (err != 0) goto abort; gve_setup_ifnet(dev, priv); priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; bus_write_multi_1(priv->reg_bar, DRIVER_VERSION, GVE_DRIVER_VERSION, sizeof(GVE_DRIVER_VERSION) - 1); TASK_INIT(&priv->service_task, 0, gve_service_task, priv); priv->service_tq = taskqueue_create("gve service", M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &priv->service_tq); taskqueue_start_threads(&priv->service_tq, 1, PI_NET, "%s service tq", device_get_nameunit(priv->dev)); gve_setup_sysctl(priv); if (bootverbose) device_printf(priv->dev, "Successfully attached %s", GVE_DRIVER_VERSION); return (0); abort: gve_free_rings(priv); gve_deconfigure_and_free_device_resources(priv); gve_release_adminq(priv); gve_free_sys_res_mem(priv); GVE_IFACE_LOCK_DESTROY(priv->gve_iface_lock); return (err); } static int gve_detach(device_t dev) { struct gve_priv *priv = device_get_softc(dev); if_t ifp = priv->ifp; int error; error = bus_generic_detach(dev); if (error != 0) return (error); ether_ifdetach(ifp); GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); gve_destroy(priv); GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); gve_free_rings(priv); gve_free_sys_res_mem(priv); GVE_IFACE_LOCK_DESTROY(priv->gve_iface_lock); while (taskqueue_cancel(priv->service_tq, &priv->service_task, NULL)) taskqueue_drain(priv->service_tq, &priv->service_task); taskqueue_free(priv->service_tq); if_free(ifp); return (0); } static device_method_t gve_methods[] = { DEVMETHOD(device_probe, gve_probe), DEVMETHOD(device_attach, gve_attach), DEVMETHOD(device_detach, gve_detach), DEVMETHOD_END }; static driver_t gve_driver = { "gve", gve_methods, sizeof(struct gve_priv) }; #if __FreeBSD_version < 1301503 static devclass_t gve_devclass; DRIVER_MODULE(gve, pci, gve_driver, gve_devclass, 0, 0); #else DRIVER_MODULE(gve, pci, gve_driver, 0, 0); #endif MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, gve, gve_devs, nitems(gve_devs)); diff --git a/sys/dev/gve/gve_sysctl.c b/sys/dev/gve/gve_sysctl.c index 8f52ffad6f3e..f7c7b5803865 100644 --- a/sys/dev/gve/gve_sysctl.c +++ b/sys/dev/gve/gve_sysctl.c @@ -1,410 +1,497 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2023-2024 Google LLC * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "gve.h" static SYSCTL_NODE(_hw, OID_AUTO, gve, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "GVE driver parameters"); bool gve_disable_hw_lro = false; SYSCTL_BOOL(_hw_gve, OID_AUTO, disable_hw_lro, CTLFLAG_RDTUN, &gve_disable_hw_lro, 0, "Controls if hardware LRO is used"); char gve_queue_format[8]; SYSCTL_STRING(_hw_gve, OID_AUTO, queue_format, CTLFLAG_RD, &gve_queue_format, 0, "Queue format being used by the iface"); char gve_version[8]; SYSCTL_STRING(_hw_gve, OID_AUTO, driver_version, CTLFLAG_RD, &gve_version, 0, "Driver version"); static void gve_setup_rxq_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct gve_rx_ring *rxq) { struct sysctl_oid *node; struct sysctl_oid_list *list; struct gve_rxq_stats *stats; char namebuf[16]; snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->com.id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue"); list = SYSCTL_CHILDREN(node); stats = &rxq->stats; SYSCTL_ADD_COUNTER_U64(ctx, list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &stats->rbytes, "Bytes received"); SYSCTL_ADD_COUNTER_U64(ctx, list, OID_AUTO, "rx_packets", CTLFLAG_RD, &stats->rpackets, "Packets received"); SYSCTL_ADD_COUNTER_U64(ctx, list, OID_AUTO, "rx_copybreak_cnt", CTLFLAG_RD, &stats->rx_copybreak_cnt, "Total frags with mbufs allocated for copybreak"); SYSCTL_ADD_COUNTER_U64(ctx, list, OID_AUTO, "rx_frag_flip_cnt", CTLFLAG_RD, &stats->rx_frag_flip_cnt, "Total frags that allocated mbuf with page flip"); SYSCTL_ADD_COUNTER_U64(ctx, list, OID_AUTO, "rx_frag_copy_cnt", CTLFLAG_RD, &stats->rx_frag_copy_cnt, "Total frags with mbuf that copied payload into mbuf"); SYSCTL_ADD_COUNTER_U64(ctx, list, OID_AUTO, "rx_dropped_pkt", CTLFLAG_RD, &stats->rx_dropped_pkt, "Total rx packets dropped"); SYSCTL_ADD_COUNTER_U64(ctx, list, OID_AUTO, "rx_dropped_pkt_desc_err", CTLFLAG_RD, &stats->rx_dropped_pkt_desc_err, "Packets dropped due to descriptor error"); SYSCTL_ADD_COUNTER_U64(ctx, list, OID_AUTO, "rx_dropped_pkt_buf_post_fail", CTLFLAG_RD, &stats->rx_dropped_pkt_buf_post_fail, "Packets dropped due to failure to post enough buffers"); SYSCTL_ADD_COUNTER_U64(ctx, list, OID_AUTO, "rx_dropped_pkt_mbuf_alloc_fail", CTLFLAG_RD, &stats->rx_dropped_pkt_mbuf_alloc_fail, "Packets dropped due to failed mbuf allocation"); SYSCTL_ADD_COUNTER_U64(ctx, list, OID_AUTO, "rx_mbuf_dmamap_err", CTLFLAG_RD, &stats->rx_mbuf_dmamap_err, "Number of rx mbufs which could not be dma mapped"); SYSCTL_ADD_COUNTER_U64(ctx, list, OID_AUTO, "rx_mbuf_mclget_null", CTLFLAG_RD, &stats->rx_mbuf_mclget_null, "Number of times when there were no cluster mbufs"); SYSCTL_ADD_U32(ctx, list, OID_AUTO, "rx_completed_desc", CTLFLAG_RD, &rxq->cnt, 0, "Number of descriptors completed"); SYSCTL_ADD_U32(ctx, list, OID_AUTO, "num_desc_posted", CTLFLAG_RD, &rxq->fill_cnt, rxq->fill_cnt, "Toal number of descriptors posted"); } static void gve_setup_txq_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct gve_tx_ring *txq) { struct sysctl_oid *node; struct sysctl_oid_list *tx_list; struct gve_txq_stats *stats; char namebuf[16]; snprintf(namebuf, sizeof(namebuf), "txq%d", txq->com.id); node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue"); tx_list = SYSCTL_CHILDREN(node); stats = &txq->stats; SYSCTL_ADD_U32(ctx, tx_list, OID_AUTO, "tx_posted_desc", CTLFLAG_RD, &txq->req, 0, "Number of descriptors posted by NIC"); SYSCTL_ADD_U32(ctx, tx_list, OID_AUTO, "tx_completed_desc", CTLFLAG_RD, &txq->done, 0, "Number of descriptors completed"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &stats->tpackets, "Packets transmitted"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "tx_tso_packets", CTLFLAG_RD, &stats->tso_packet_cnt, "TSO Packets transmitted"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, &stats->tbytes, "Bytes transmitted"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "tx_delayed_pkt_nospace_device", CTLFLAG_RD, &stats->tx_delayed_pkt_nospace_device, "Packets delayed due to no space in device"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "tx_dropped_pkt_nospace_bufring", CTLFLAG_RD, &stats->tx_dropped_pkt_nospace_bufring, "Packets dropped due to no space in br ring"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "tx_dropped_pkt_vlan", CTLFLAG_RD, &stats->tx_dropped_pkt_vlan, "Dropped VLAN packets"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "tx_delayed_pkt_nospace_descring", CTLFLAG_RD, &stats->tx_delayed_pkt_nospace_descring, "Packets delayed due to no space in desc ring"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "tx_delayed_pkt_nospace_compring", CTLFLAG_RD, &stats->tx_delayed_pkt_nospace_compring, "Packets delayed due to no space in comp ring"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "tx_delayed_pkt_nospace_qpl_bufs", CTLFLAG_RD, &stats->tx_delayed_pkt_nospace_qpl_bufs, "Packets delayed due to not enough qpl bufs"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "tx_delayed_pkt_tsoerr", CTLFLAG_RD, &stats->tx_delayed_pkt_tsoerr, "TSO packets delayed due to err in prep errors"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "tx_mbuf_collpase", CTLFLAG_RD, &stats->tx_mbuf_collapse, "tx mbufs that had to be collapsed"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "tx_mbuf_defrag", CTLFLAG_RD, &stats->tx_mbuf_defrag, "tx mbufs that had to be defragged"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "tx_mbuf_defrag_err", CTLFLAG_RD, &stats->tx_mbuf_defrag_err, "tx mbufs that failed defrag"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "tx_mbuf_dmamap_enomem_err", CTLFLAG_RD, &stats->tx_mbuf_dmamap_enomem_err, "tx mbufs that could not be dma-mapped due to low mem"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "tx_mbuf_dmamap_err", CTLFLAG_RD, &stats->tx_mbuf_dmamap_err, "tx mbufs that could not be dma-mapped"); } static void gve_setup_queue_stat_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct gve_priv *priv) { int i; for (i = 0; i < priv->rx_cfg.num_queues; i++) { gve_setup_rxq_sysctl(ctx, child, &priv->rx[i]); } for (i = 0; i < priv->tx_cfg.num_queues; i++) { gve_setup_txq_sysctl(ctx, child, &priv->tx[i]); } } static void gve_setup_adminq_stat_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct gve_priv *priv) { struct sysctl_oid *admin_node; struct sysctl_oid_list *admin_list; /* Admin queue stats */ admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "adminq_stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Admin Queue statistics"); admin_list = SYSCTL_CHILDREN(admin_node); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "adminq_prod_cnt", CTLFLAG_RD, &priv->adminq_prod_cnt, 0, "Adminq Commands issued"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "adminq_cmd_fail", CTLFLAG_RD, &priv->adminq_cmd_fail, 0, "Aqminq Failed commands"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "adminq_timeouts", CTLFLAG_RD, &priv->adminq_timeouts, 0, "Adminq Timedout commands"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "adminq_describe_device_cnt", CTLFLAG_RD, &priv->adminq_describe_device_cnt, 0, "adminq_describe_device_cnt"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "adminq_cfg_device_resources_cnt", CTLFLAG_RD, &priv->adminq_cfg_device_resources_cnt, 0, "adminq_cfg_device_resources_cnt"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "adminq_register_page_list_cnt", CTLFLAG_RD, &priv->adminq_register_page_list_cnt, 0, "adminq_register_page_list_cnt"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "adminq_unregister_page_list_cnt", CTLFLAG_RD, &priv->adminq_unregister_page_list_cnt, 0, "adminq_unregister_page_list_cnt"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "adminq_create_tx_queue_cnt", CTLFLAG_RD, &priv->adminq_create_tx_queue_cnt, 0, "adminq_create_tx_queue_cnt"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "adminq_create_rx_queue_cnt", CTLFLAG_RD, &priv->adminq_create_rx_queue_cnt, 0, "adminq_create_rx_queue_cnt"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "adminq_destroy_tx_queue_cnt", CTLFLAG_RD, &priv->adminq_destroy_tx_queue_cnt, 0, "adminq_destroy_tx_queue_cnt"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "adminq_destroy_rx_queue_cnt", CTLFLAG_RD, &priv->adminq_destroy_rx_queue_cnt, 0, "adminq_destroy_rx_queue_cnt"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "adminq_get_ptype_map_cnt", CTLFLAG_RD, &priv->adminq_get_ptype_map_cnt, 0, "adminq_get_ptype_map_cnt"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "adminq_dcfg_device_resources_cnt", CTLFLAG_RD, &priv->adminq_dcfg_device_resources_cnt, 0, "adminq_dcfg_device_resources_cnt"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "adminq_set_driver_parameter_cnt", CTLFLAG_RD, &priv->adminq_set_driver_parameter_cnt, 0, "adminq_set_driver_parameter_cnt"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "adminq_verify_driver_compatibility_cnt", CTLFLAG_RD, &priv->adminq_verify_driver_compatibility_cnt, 0, "adminq_verify_driver_compatibility_cnt"); } static void gve_setup_main_stat_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct gve_priv *priv) { struct sysctl_oid *main_node; struct sysctl_oid_list *main_list; /* Main stats */ main_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "main_stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Main statistics"); main_list = SYSCTL_CHILDREN(main_node); SYSCTL_ADD_U32(ctx, main_list, OID_AUTO, "interface_up_cnt", CTLFLAG_RD, &priv->interface_up_cnt, 0, "Times interface was set to up"); SYSCTL_ADD_U32(ctx, main_list, OID_AUTO, "interface_down_cnt", CTLFLAG_RD, &priv->interface_down_cnt, 0, "Times interface was set to down"); SYSCTL_ADD_U32(ctx, main_list, OID_AUTO, "reset_cnt", CTLFLAG_RD, &priv->reset_cnt, 0, "Times reset"); } static int gve_check_num_queues(struct gve_priv *priv, int val, bool is_rx) { if (val < 1) { device_printf(priv->dev, "Requested num queues (%u) must be a positive integer\n", val); return (EINVAL); } if (val > (is_rx ? priv->rx_cfg.max_queues : priv->tx_cfg.max_queues)) { device_printf(priv->dev, "Requested num queues (%u) is too large\n", val); return (EINVAL); } return (0); } static int gve_sysctl_num_tx_queues(SYSCTL_HANDLER_ARGS) { struct gve_priv *priv = arg1; int val; int err; val = priv->tx_cfg.num_queues; err = sysctl_handle_int(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL) return (err); err = gve_check_num_queues(priv, val, /*is_rx=*/false); if (err != 0) return (err); if (val != priv->tx_cfg.num_queues) { GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); err = gve_adjust_tx_queues(priv, val); GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); } return (err); } static int gve_sysctl_num_rx_queues(SYSCTL_HANDLER_ARGS) { struct gve_priv *priv = arg1; int val; int err; val = priv->rx_cfg.num_queues; err = sysctl_handle_int(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL) return (err); err = gve_check_num_queues(priv, val, /*is_rx=*/true); if (err != 0) return (err); if (val != priv->rx_cfg.num_queues) { GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); err = gve_adjust_rx_queues(priv, val); GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); } return (err); } +static int +gve_check_ring_size(struct gve_priv *priv, int val, bool is_rx) +{ + if (!powerof2(val) || val == 0) { + device_printf(priv->dev, + "Requested ring size (%u) must be a power of 2\n", val); + return (EINVAL); + } + + if (val < (is_rx ? priv->min_rx_desc_cnt : priv->min_tx_desc_cnt)) { + device_printf(priv->dev, + "Requested ring size (%u) cannot be less than %d\n", val, + (is_rx ? priv->min_rx_desc_cnt : priv->min_tx_desc_cnt)); + return (EINVAL); + } + + + if (val > (is_rx ? priv->max_rx_desc_cnt : priv->max_tx_desc_cnt)) { + device_printf(priv->dev, + "Requested ring size (%u) cannot be greater than %d\n", val, + (is_rx ? priv->max_rx_desc_cnt : priv->max_tx_desc_cnt)); + return (EINVAL); + } + + return (0); +} + +static int +gve_sysctl_tx_ring_size(SYSCTL_HANDLER_ARGS) +{ + struct gve_priv *priv = arg1; + int val; + int err; + + val = priv->tx_desc_cnt; + err = sysctl_handle_int(oidp, &val, 0, req); + if (err != 0 || req->newptr == NULL) + return (err); + + err = gve_check_ring_size(priv, val, /*is_rx=*/false); + if (err != 0) + return (err); + + if (val != priv->tx_desc_cnt) { + GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); + err = gve_adjust_ring_sizes(priv, val, /*is_rx=*/false); + GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); + } + + return (err); +} + +static int +gve_sysctl_rx_ring_size(SYSCTL_HANDLER_ARGS) +{ + struct gve_priv *priv = arg1; + int val; + int err; + + val = priv->rx_desc_cnt; + err = sysctl_handle_int(oidp, &val, 0, req); + if (err != 0 || req->newptr == NULL) + return (err); + + err = gve_check_ring_size(priv, val, /*is_rx=*/true); + if (err != 0) + return (err); + + if (val != priv->rx_desc_cnt) { + GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); + err = gve_adjust_ring_sizes(priv, val, /*is_rx=*/true); + GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); + } + + return (err); +} + static void gve_setup_sysctl_writables(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct gve_priv *priv) { SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "num_tx_queues", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, gve_sysctl_num_tx_queues, "I", "Number of TX queues"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "num_rx_queues", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, gve_sysctl_num_rx_queues, "I", "Number of RX queues"); + + if (priv->modify_ringsize_enabled) { + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_ring_size", + CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, + gve_sysctl_tx_ring_size, "I", "TX ring size"); + + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ring_size", + CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, + gve_sysctl_rx_ring_size, "I", "RX ring size"); + } } void gve_setup_sysctl(struct gve_priv *priv) { device_t dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid_list *child; dev = priv->dev; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); gve_setup_queue_stat_sysctl(ctx, child, priv); gve_setup_adminq_stat_sysctl(ctx, child, priv); gve_setup_main_stat_sysctl(ctx, child, priv); gve_setup_sysctl_writables(ctx, child, priv); } void gve_accum_stats(struct gve_priv *priv, uint64_t *rpackets, uint64_t *rbytes, uint64_t *rx_dropped_pkt, uint64_t *tpackets, uint64_t *tbytes, uint64_t *tx_dropped_pkt) { struct gve_rxq_stats *rxqstats; struct gve_txq_stats *txqstats; int i; for (i = 0; i < priv->rx_cfg.num_queues; i++) { rxqstats = &priv->rx[i].stats; *rpackets += counter_u64_fetch(rxqstats->rpackets); *rbytes += counter_u64_fetch(rxqstats->rbytes); *rx_dropped_pkt += counter_u64_fetch(rxqstats->rx_dropped_pkt); } for (i = 0; i < priv->tx_cfg.num_queues; i++) { txqstats = &priv->tx[i].stats; *tpackets += counter_u64_fetch(txqstats->tpackets); *tbytes += counter_u64_fetch(txqstats->tbytes); *tx_dropped_pkt += counter_u64_fetch(txqstats->tx_dropped_pkt); } }