diff --git a/sys/dev/gve/gve_adminq.h b/sys/dev/gve/gve_adminq.h
index bc51046a3037..531a844f7d90 100644
--- a/sys/dev/gve/gve_adminq.h
+++ b/sys/dev/gve/gve_adminq.h
@@ -1,457 +1,458 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2023-2024 Google LLC
  *
  * Redistribution and use in source and binary forms, with or without modification,
  * are permitted provided that the following conditions are met:
  *
  * 1. Redistributions of source code must retain the above copyright notice, this
  *    list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright notice,
  *    this list of conditions and the following disclaimer in the documentation
  *    and/or other materials provided with the distribution.
  *
  * 3. Neither the name of the copyright holder nor the names of its contributors
  *    may be used to endorse or promote products derived from this software without
  *    specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #ifndef _GVE_AQ_H_
 #define _GVE_AQ_H_ 1
 
 #include <sys/types.h>
 #include <net/if.h>
 #include <net/iflib.h>
 #include <machine/bus.h>
 #include <machine/resource.h>
 
 /* Admin queue opcodes */
 enum gve_adminq_opcodes {
 	GVE_ADMINQ_DESCRIBE_DEVICE		= 0x1,
 	GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES	= 0x2,
 	GVE_ADMINQ_REGISTER_PAGE_LIST		= 0x3,
 	GVE_ADMINQ_UNREGISTER_PAGE_LIST		= 0x4,
 	GVE_ADMINQ_CREATE_TX_QUEUE		= 0x5,
 	GVE_ADMINQ_CREATE_RX_QUEUE		= 0x6,
 	GVE_ADMINQ_DESTROY_TX_QUEUE		= 0x7,
 	GVE_ADMINQ_DESTROY_RX_QUEUE		= 0x8,
 	GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES	= 0x9,
 	GVE_ADMINQ_SET_DRIVER_PARAMETER		= 0xB,
 	GVE_ADMINQ_REPORT_STATS			= 0xC,
 	GVE_ADMINQ_REPORT_LINK_SPEED		= 0xD,
 	GVE_ADMINQ_GET_PTYPE_MAP		= 0xE,
 	GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY	= 0xF,
 };
 
 /* Admin queue status codes */
 enum gve_adminq_statuses {
 	GVE_ADMINQ_COMMAND_UNSET			= 0x0,
 	GVE_ADMINQ_COMMAND_PASSED			= 0x1,
 	GVE_ADMINQ_COMMAND_ERROR_ABORTED		= 0xFFFFFFF0,
 	GVE_ADMINQ_COMMAND_ERROR_ALREADY_EXISTS		= 0xFFFFFFF1,
 	GVE_ADMINQ_COMMAND_ERROR_CANCELLED		= 0xFFFFFFF2,
 	GVE_ADMINQ_COMMAND_ERROR_DATALOSS		= 0xFFFFFFF3,
 	GVE_ADMINQ_COMMAND_ERROR_DEADLINE_EXCEEDED	= 0xFFFFFFF4,
 	GVE_ADMINQ_COMMAND_ERROR_FAILED_PRECONDITION	= 0xFFFFFFF5,
 	GVE_ADMINQ_COMMAND_ERROR_INTERNAL_ERROR		= 0xFFFFFFF6,
 	GVE_ADMINQ_COMMAND_ERROR_INVALID_ARGUMENT	= 0xFFFFFFF7,
 	GVE_ADMINQ_COMMAND_ERROR_NOT_FOUND		= 0xFFFFFFF8,
 	GVE_ADMINQ_COMMAND_ERROR_OUT_OF_RANGE		= 0xFFFFFFF9,
 	GVE_ADMINQ_COMMAND_ERROR_PERMISSION_DENIED	= 0xFFFFFFFA,
 	GVE_ADMINQ_COMMAND_ERROR_UNAUTHENTICATED	= 0xFFFFFFFB,
 	GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED	= 0xFFFFFFFC,
 	GVE_ADMINQ_COMMAND_ERROR_UNAVAILABLE		= 0xFFFFFFFD,
 	GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED		= 0xFFFFFFFE,
 	GVE_ADMINQ_COMMAND_ERROR_UNKNOWN_ERROR		= 0xFFFFFFFF,
 };
 
 #define GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION 1
 
 /*
  * All AdminQ command structs should be naturally packed. The static_assert
  * calls make sure this is the case at compile time.
  */
 
 struct gve_adminq_describe_device {
 	__be64 device_descriptor_addr;
 	__be32 device_descriptor_version;
 	__be32 available_length;
 };
 
 _Static_assert(sizeof(struct gve_adminq_describe_device) == 16,
     "gve: bad admin queue struct length");
 
 struct gve_device_descriptor {
 	__be64 max_registered_pages;
 	__be16 reserved1;
 	__be16 tx_queue_entries;
 	__be16 rx_queue_entries;
 	__be16 default_num_queues;
 	__be16 mtu;
 	__be16 counters;
 	__be16 reserved2;
 	__be16 rx_pages_per_qpl;
 	uint8_t  mac[ETHER_ADDR_LEN];
 	__be16 num_device_options;
 	__be16 total_length;
 	uint8_t  reserved3[6];
 };
 
 _Static_assert(sizeof(struct gve_device_descriptor) == 40,
     "gve: bad admin queue struct length");
 
 struct gve_device_option {
 	__be16 option_id;
 	__be16 option_length;
 	__be32 required_features_mask;
 };
 
 _Static_assert(sizeof(struct gve_device_option) == 8,
     "gve: bad admin queue struct length");
 
 struct gve_device_option_gqi_rda {
 	__be32 supported_features_mask;
 };
 
 _Static_assert(sizeof(struct gve_device_option_gqi_rda) == 4,
     "gve: bad admin queue struct length");
 
 struct gve_device_option_gqi_qpl {
 	__be32 supported_features_mask;
 };
 
 _Static_assert(sizeof(struct gve_device_option_gqi_qpl) == 4,
     "gve: bad admin queue struct length");
 
 struct gve_device_option_dqo_rda {
 	__be32 supported_features_mask;
 	__be16 tx_comp_ring_entries;
 	__be16 rx_buff_ring_entries;
 };
 
 _Static_assert(sizeof(struct gve_device_option_dqo_rda) == 8,
     "gve: bad admin queue struct length");
 
 struct gve_device_option_dqo_qpl {
 	__be32 supported_features_mask;
 	__be16 tx_comp_ring_entries;
 	__be16 rx_buff_ring_entries;
 };
 
 _Static_assert(sizeof(struct gve_device_option_dqo_qpl) == 8,
     "gve: bad admin queue struct length");
 
 struct gve_ring_size_bound {
 	__be16 rx;
 	__be16 tx;
 };
 
 _Static_assert(sizeof(struct gve_ring_size_bound) == 4,
     "gve: bad admin queue struct length");
 
 struct gve_device_option_modify_ring {
 	__be32 supported_features_mask;
 	struct gve_ring_size_bound max_ring_size;
 	struct gve_ring_size_bound min_ring_size;
 };
 
 _Static_assert(sizeof(struct gve_device_option_modify_ring) == 12,
     "gve: bad admin queue struct length");
 
 struct gve_device_option_jumbo_frames {
 	__be32 supported_features_mask;
 	__be16 max_mtu;
 	uint8_t padding[2];
 };
 
 _Static_assert(sizeof(struct gve_device_option_jumbo_frames) == 8,
     "gve: bad admin queue struct length");
 
 enum gve_dev_opt_id {
 	GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING = 0x1,
 	GVE_DEV_OPT_ID_GQI_RDA = 0x2,
 	GVE_DEV_OPT_ID_GQI_QPL = 0x3,
 	GVE_DEV_OPT_ID_DQO_RDA = 0x4,
 	GVE_DEV_OPT_ID_MODIFY_RING = 0x6,
 	GVE_DEV_OPT_ID_DQO_QPL = 0x7,
 	GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8,
 };
 
 /*
  * These masks are way to predicate the use of a particular option on the driver
  * having particular bug fixes represented by each bit position in the mask.
  * Currently they are all zero because there are no known bugs preventing the
  * use of any option.
  */
 enum gve_dev_opt_req_feat_mask {
 	GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING = 0x0,
 	GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0,
 	GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0,
 	GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0,
 	GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0,
 	GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING = 0x0,
 	GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0,
 };
 
 enum gve_sup_feature_mask {
 	GVE_SUP_MODIFY_RING_MASK  = 1 << 0,
 	GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2,
 };
 
 #define GVE_VERSION_STR_LEN 128
 
 enum gve_driver_capability {
 	gve_driver_capability_gqi_qpl = 0,
 	gve_driver_capability_gqi_rda = 1,
 	gve_driver_capability_dqo_qpl = 2,
 	gve_driver_capability_dqo_rda = 3,
 };
 
 #define GVE_CAP1(a) BIT((int) a)
 #define GVE_CAP2(a) BIT(((int) a) - 64)
 #define GVE_CAP3(a) BIT(((int) a) - 128)
 #define GVE_CAP4(a) BIT(((int) a) - 192)
 
 /*
  * The following four defines describe 256 compatibility bits.
  * Only a few bits (as shown in `gve_driver_compatibility`) are currently
  * defined. The rest are reserved for future use.
  */
 #define GVE_DRIVER_CAPABILITY_FLAGS1 \
 	(GVE_CAP1(gve_driver_capability_gqi_qpl) | \
 	 GVE_CAP1(gve_driver_capability_dqo_qpl) | \
 	 GVE_CAP1(gve_driver_capability_dqo_rda))
 #define GVE_DRIVER_CAPABILITY_FLAGS2 0x0
 #define GVE_DRIVER_CAPABILITY_FLAGS3 0x0
 #define GVE_DRIVER_CAPABILITY_FLAGS4 0x0
 
 struct gve_driver_info {
 	uint8_t os_type;
 	uint8_t driver_major;
 	uint8_t driver_minor;
 	uint8_t driver_sub;
 	__be32 os_version_major;
 	__be32 os_version_minor;
 	__be32 os_version_sub;
 	__be64 driver_capability_flags[4];
 	uint8_t os_version_str1[GVE_VERSION_STR_LEN];
 	uint8_t os_version_str2[GVE_VERSION_STR_LEN];
 };
 
 struct gve_adminq_verify_driver_compatibility {
 	__be64 driver_info_len;
 	__be64 driver_info_addr;
 };
 
 _Static_assert(sizeof(struct gve_adminq_verify_driver_compatibility) == 16,
     "gve: bad admin queue struct length");
 
 struct gve_adminq_configure_device_resources {
 	__be64 counter_array;
 	__be64 irq_db_addr;
 	__be32 num_counters;
 	__be32 num_irq_dbs;
 	__be32 irq_db_stride;
 	__be32 ntfy_blk_msix_base_idx;
 	uint8_t queue_format;
 	uint8_t padding[7];
 };
 
 _Static_assert(sizeof(struct gve_adminq_configure_device_resources) == 40,
     "gve: bad admin queue struct length");
 
 struct gve_adminq_register_page_list {
 	__be32 page_list_id;
 	__be32 num_pages;
 	__be64 page_address_list_addr;
 	__be64 page_size;
 };
 
 _Static_assert(sizeof(struct gve_adminq_register_page_list) == 24,
     "gve: bad admin queue struct length");
 
 struct gve_adminq_unregister_page_list {
 	__be32 page_list_id;
 };
 
 _Static_assert(sizeof(struct gve_adminq_unregister_page_list) == 4,
     "gve: bad admin queue struct length");
 
 struct gve_adminq_create_tx_queue {
 	__be32 queue_id;
 	__be32 reserved;
 	__be64 queue_resources_addr;
 	__be64 tx_ring_addr;
 	__be32 queue_page_list_id;
 	__be32 ntfy_id;
 	__be64 tx_comp_ring_addr;
 	__be16 tx_ring_size;
 	__be16 tx_comp_ring_size;
 	uint8_t padding[4];
 };
 
 _Static_assert(sizeof(struct gve_adminq_create_tx_queue) == 48,
     "gve: bad admin queue struct length");
 
 #define GVE_RAW_ADDRESSING_QPL_ID 0xFFFFFFFF
 
 struct gve_adminq_create_rx_queue {
 	__be32 queue_id;
 	__be32 index;
 	__be32 reserved;
 	__be32 ntfy_id;
 	__be64 queue_resources_addr;
 	__be64 rx_desc_ring_addr;
 	__be64 rx_data_ring_addr;
 	__be32 queue_page_list_id;
 	__be16 rx_ring_size;
 	__be16 packet_buffer_size;
 	__be16 rx_buff_ring_size;
 	uint8_t enable_rsc;
 	uint8_t padding[5];
 };
 
 _Static_assert(sizeof(struct gve_adminq_create_rx_queue) == 56,
     "gve: bad admin queue struct length");
 
 /* Queue resources that are shared with the device */
 struct gve_queue_resources {
 	union {
 		struct {
 			__be32 db_index;	/* Device -> Guest */
 			__be32 counter_index;	/* Device -> Guest */
 		};
 		uint8_t reserved[64];
 	};
 };
 
 _Static_assert(sizeof(struct gve_queue_resources) == 64,
     "gve: bad admin queue struct length");
 
 struct gve_adminq_destroy_tx_queue {
 	__be32 queue_id;
 };
 
 _Static_assert(sizeof(struct gve_adminq_destroy_tx_queue) == 4,
     "gve: bad admin queue struct length");
 
 struct gve_adminq_destroy_rx_queue {
 	__be32 queue_id;
 };
 
 _Static_assert(sizeof(struct gve_adminq_destroy_rx_queue) == 4,
     "gve: bad admin queue struct length");
 
 /* GVE Set Driver Parameter Types */
 enum gve_set_driver_param_types {
 	GVE_SET_PARAM_MTU	= 0x1,
 };
 
 struct gve_adminq_set_driver_parameter {
 	__be32 parameter_type;
 	uint8_t reserved[4];
 	__be64 parameter_value;
 };
 
 _Static_assert(sizeof(struct gve_adminq_set_driver_parameter) == 16,
     "gve: bad admin queue struct length");
 
 struct stats {
 	__be32 stat_name;
 	__be32 queue_id;
 	__be64 value;
 };
 
 _Static_assert(sizeof(struct stats) == 16,
     "gve: bad admin queue struct length");
 
-/* These are control path types for PTYPE which are the same as the data path
+/*
+ * These are control path types for PTYPE which are the same as the data path
  * types.
  */
 struct gve_ptype_entry {
 	uint8_t l3_type;
 	uint8_t l4_type;
 };
 
 struct gve_ptype_map {
 	struct gve_ptype_entry ptypes[1 << 10]; /* PTYPES are always 10 bits. */
 };
 
 struct gve_adminq_get_ptype_map {
 	__be64 ptype_map_len;
 	__be64 ptype_map_addr;
 };
 
 struct gve_adminq_command {
 	__be32 opcode;
 	__be32 status;
 	union {
 		struct gve_adminq_configure_device_resources
 					configure_device_resources;
 		struct gve_adminq_create_tx_queue create_tx_queue;
 		struct gve_adminq_create_rx_queue create_rx_queue;
 		struct gve_adminq_destroy_tx_queue destroy_tx_queue;
 		struct gve_adminq_destroy_rx_queue destroy_rx_queue;
 		struct gve_adminq_describe_device describe_device;
 		struct gve_adminq_register_page_list reg_page_list;
 		struct gve_adminq_unregister_page_list unreg_page_list;
 		struct gve_adminq_set_driver_parameter set_driver_param;
 		struct gve_adminq_verify_driver_compatibility
 					verify_driver_compatibility;
 		struct gve_adminq_get_ptype_map get_ptype_map;
 		uint8_t reserved[56];
 	};
 };
 
 _Static_assert(sizeof(struct gve_adminq_command) == 64,
     "gve: bad admin queue struct length");
 
 enum gve_l3_type {
 	/* Must be zero so zero initialized LUT is unknown. */
 	GVE_L3_TYPE_UNKNOWN = 0,
 	GVE_L3_TYPE_OTHER,
 	GVE_L3_TYPE_IPV4,
 	GVE_L3_TYPE_IPV6,
 };
 
 enum gve_l4_type {
 	/* Must be zero so zero initialized LUT is unknown. */
 	GVE_L4_TYPE_UNKNOWN = 0,
 	GVE_L4_TYPE_OTHER,
 	GVE_L4_TYPE_TCP,
 	GVE_L4_TYPE_UDP,
 	GVE_L4_TYPE_ICMP,
 	GVE_L4_TYPE_SCTP,
 };
 
 int gve_adminq_create_rx_queues(struct gve_priv *priv, uint32_t num_queues);
 int gve_adminq_create_tx_queues(struct gve_priv *priv, uint32_t num_queues);
 int gve_adminq_destroy_tx_queues(struct gve_priv *priv, uint32_t num_queues);
 int gve_adminq_destroy_rx_queues(struct gve_priv *priv, uint32_t num_queues);
 int gve_adminq_set_mtu(struct gve_priv *priv, uint32_t mtu);
 int gve_adminq_alloc(struct gve_priv *priv);
 void gve_reset_adminq(struct gve_priv *priv);
 int gve_adminq_describe_device(struct gve_priv *priv);
 int gve_adminq_configure_device_resources(struct gve_priv *priv);
 int gve_adminq_deconfigure_device_resources(struct gve_priv *priv);
 void gve_release_adminq(struct gve_priv *priv);
 int gve_adminq_register_page_list(struct gve_priv *priv,
     struct gve_queue_page_list *qpl);
 int gve_adminq_unregister_page_list(struct gve_priv *priv, uint32_t page_list_id);
 int gve_adminq_verify_driver_compatibility(struct gve_priv *priv,
     uint64_t driver_info_len, vm_paddr_t driver_info_addr);
 int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv,
     struct gve_ptype_lut *ptype_lut);
 #endif /* _GVE_AQ_H_ */
diff --git a/sys/dev/gve/gve_dqo.h b/sys/dev/gve/gve_dqo.h
index 214138303a77..212bfa1a6ad3 100644
--- a/sys/dev/gve/gve_dqo.h
+++ b/sys/dev/gve/gve_dqo.h
@@ -1,321 +1,333 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2024 Google LLC
  *
  * Redistribution and use in source and binary forms, with or without modification,
  * are permitted provided that the following conditions are met:
  *
  * 1. Redistributions of source code must retain the above copyright notice, this
  *    list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright notice,
  *    this list of conditions and the following disclaimer in the documentation
  *    and/or other materials provided with the distribution.
  *
  * 3. Neither the name of the copyright holder nor the names of its contributors
  *    may be used to endorse or promote products derived from this software without
  *    specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /* GVE DQO Descriptor formats */
 
 #ifndef _GVE_DESC_DQO_H_
 #define _GVE_DESC_DQO_H_
 
 #include "gve_plat.h"
 
 #define GVE_ITR_ENABLE_BIT_DQO BIT(0)
 #define GVE_ITR_NO_UPDATE_DQO (3 << 3)
 #define GVE_ITR_INTERVAL_DQO_SHIFT 5
 #define GVE_ITR_INTERVAL_DQO_MASK ((1 << 12) - 1)
 #define GVE_TX_IRQ_RATELIMIT_US_DQO 50
 #define GVE_RX_IRQ_RATELIMIT_US_DQO 20
 
 #define GVE_TX_MAX_HDR_SIZE_DQO 255
 #define GVE_TX_MIN_TSO_MSS_DQO 88
 
 /*
  * Ringing the doorbell too often can hurt performance.
  *
  * HW requires this value to be at least 8.
  */
 #define GVE_RX_BUF_THRESH_DQO 32
 
 /*
  * Start dropping RX fragments if at least these many
  * buffers cannot be posted to the NIC.
  */
 #define GVE_RX_DQO_MIN_PENDING_BUFS 128
 
 #define GVE_DQ_NUM_FRAGS_IN_PAGE (PAGE_SIZE / GVE_DEFAULT_RX_BUFFER_SIZE)
 
 /*
  * gve_rx_qpl_buf_id_dqo's 11 bit wide buf_id field limits the total
  * number of pages per QPL to 2048.
  */
 #define GVE_RX_NUM_QPL_PAGES_DQO 2048
 
 /* 2K TX buffers for DQO-QPL */
 #define GVE_TX_BUF_SHIFT_DQO 11
 #define GVE_TX_BUF_SIZE_DQO BIT(GVE_TX_BUF_SHIFT_DQO)
 #define GVE_TX_BUFS_PER_PAGE_DQO (PAGE_SIZE >> GVE_TX_BUF_SHIFT_DQO)
 
 #define GVE_TX_NUM_QPL_PAGES_DQO 512
 
 /* Basic TX descriptor (DTYPE 0x0C) */
 struct gve_tx_pkt_desc_dqo {
 	__le64 buf_addr;
 
 	/* Must be GVE_TX_PKT_DESC_DTYPE_DQO (0xc) */
 	uint8_t dtype:5;
 
 	/* Denotes the last descriptor of a packet. */
 	uint8_t end_of_packet:1;
 	uint8_t checksum_offload_enable:1;
 
 	/* If set, will generate a descriptor completion for this descriptor. */
 	uint8_t report_event:1;
 	uint8_t reserved0;
 	__le16 reserved1;
 
 	/* The TX completion for this packet will contain this tag. */
 	__le16 compl_tag;
 	uint16_t buf_size:14;
 	uint16_t reserved2:2;
 } __packed;
 _Static_assert(sizeof(struct gve_tx_pkt_desc_dqo) == 16,
     "gve: bad dqo desc struct length");
 
 #define GVE_TX_PKT_DESC_DTYPE_DQO 0xc
 
 /*
  * Maximum number of data descriptors allowed per packet, or per-TSO segment.
  */
 #define GVE_TX_MAX_DATA_DESCS_DQO 10
 #define GVE_TX_MAX_BUF_SIZE_DQO ((16 * 1024) - 1)
 #define GVE_TSO_MAXSIZE_DQO IP_MAXPACKET
 
 _Static_assert(GVE_TX_MAX_BUF_SIZE_DQO * GVE_TX_MAX_DATA_DESCS_DQO >=
     GVE_TSO_MAXSIZE_DQO,
     "gve: bad tso parameters");
 
 /*
  * "report_event" on TX packet descriptors may only be reported on the last
  * descriptor of a TX packet, and they must be spaced apart with at least this
  * value.
  */
 #define GVE_TX_MIN_RE_INTERVAL 32
 
 struct gve_tx_context_cmd_dtype {
 	uint8_t dtype:5;
 	uint8_t tso:1;
 	uint8_t reserved1:2;
 	uint8_t reserved2;
 };
 
 _Static_assert(sizeof(struct gve_tx_context_cmd_dtype) == 2,
     "gve: bad dqo desc struct length");
 
 /*
  * TX Native TSO Context DTYPE (0x05)
  *
  * "flex" fields allow the driver to send additional packet context to HW.
  */
 struct gve_tx_tso_context_desc_dqo {
 	/* The L4 payload bytes that should be segmented. */
 	uint32_t tso_total_len:24;
 	uint32_t flex10:8;
 
 	/* Max segment size in TSO excluding headers. */
 	uint16_t mss:14;
 	uint16_t reserved:2;
 
 	uint8_t header_len; /* Header length to use for TSO offload */
 	uint8_t flex11;
 	struct gve_tx_context_cmd_dtype cmd_dtype;
 	uint8_t flex0;
 	uint8_t flex5;
 	uint8_t flex6;
 	uint8_t flex7;
 	uint8_t flex8;
 	uint8_t flex9;
 } __packed;
 _Static_assert(sizeof(struct gve_tx_tso_context_desc_dqo) == 16,
     "gve: bad dqo desc struct length");
 
 #define GVE_TX_TSO_CTX_DESC_DTYPE_DQO 0x5
 
 /* General context descriptor for sending metadata. */
 struct gve_tx_general_context_desc_dqo {
 	uint8_t flex4;
 	uint8_t flex5;
 	uint8_t flex6;
 	uint8_t flex7;
 	uint8_t flex8;
 	uint8_t flex9;
 	uint8_t flex10;
 	uint8_t flex11;
 	struct gve_tx_context_cmd_dtype cmd_dtype;
 	uint16_t reserved;
 	uint8_t flex0;
 	uint8_t flex1;
 	uint8_t flex2;
 	uint8_t flex3;
 } __packed;
 _Static_assert(sizeof(struct gve_tx_general_context_desc_dqo) == 16,
     "gve: bad dqo desc struct length");
 
 #define GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO 0x4
 
 /*
  * Logical structure of metadata which is packed into context descriptor flex
  * fields.
  */
 struct gve_tx_metadata_dqo {
 	union {
 		struct {
 			uint8_t version;
 
 			/*
 			 * A zero value means no l4_hash was associated with the
 			 * mbuf.
 			 */
 			uint16_t path_hash:15;
 
 			/*
 			 * Should be set to 1 if the flow associated with the
 			 * mbuf had a rehash from the TCP stack.
 			 */
 			uint16_t rehash_event:1;
 		}  __packed;
 		uint8_t bytes[12];
 	};
 }  __packed;
 _Static_assert(sizeof(struct gve_tx_metadata_dqo) == 12,
     "gve: bad dqo desc struct length");
 
 #define GVE_TX_METADATA_VERSION_DQO 0
 
+/* Used to access the generation bit within a TX completion descriptor. */
+#define GVE_TX_DESC_DQO_GEN_BYTE_OFFSET 1
+#define GVE_TX_DESC_DQO_GEN_BIT_MASK 0x80
+
 /* TX completion descriptor */
 struct gve_tx_compl_desc_dqo {
-	/* For types 0-4 this is the TX queue ID associated with this
+	/*
+	 * For types 0-4 this is the TX queue ID associated with this
 	 * completion.
 	 */
 	uint16_t id:11;
 
 	/* See: GVE_COMPL_TYPE_DQO* */
 	uint16_t type:3;
 	uint16_t reserved0:1;
 
 	/* Flipped by HW to notify the descriptor is populated. */
 	uint16_t generation:1;
 	union {
-		/* For descriptor completions, this is the last index fetched
+		/*
+		 * For descriptor completions, this is the last index fetched
 		 * by HW + 1.
 		 */
 		__le16 tx_head;
 
-		/* For packet completions, this is the completion tag set on the
+		/*
+		 * For packet completions, this is the completion tag set on the
 		 * TX packet descriptors.
 		 */
 		__le16 completion_tag;
 	};
 	__le32 reserved1;
 } __packed;
 _Static_assert(sizeof(struct gve_tx_compl_desc_dqo) == 8,
     "gve: bad dqo desc struct length");
 
 union gve_tx_desc_dqo {
 	struct gve_tx_pkt_desc_dqo pkt;
 	struct gve_tx_tso_context_desc_dqo tso_ctx;
 	struct gve_tx_general_context_desc_dqo general_ctx;
 };
 
 #define GVE_COMPL_TYPE_DQO_PKT 0x2 /* Packet completion */
 #define GVE_COMPL_TYPE_DQO_DESC 0x4 /* Descriptor completion */
 
 /* Descriptor to post buffers to HW on buffer queue. */
 struct gve_rx_desc_dqo {
 	__le16 buf_id; /* ID returned in Rx completion descriptor */
 	__le16 reserved0;
 	__le32 reserved1;
 	__le64 buf_addr; /* DMA address of the buffer */
 	__le64 header_buf_addr;
 	__le64 reserved2;
 } __packed;
 _Static_assert(sizeof(struct gve_rx_desc_dqo) == 32,
     "gve: bad dqo desc struct length");
 
+/* Used to access the generation bit within an RX completion descriptor. */
+#define GVE_RX_DESC_DQO_GEN_BYTE_OFFSET 5
+#define GVE_RX_DESC_DQO_GEN_BIT_MASK 0x40
+
 /* Descriptor for HW to notify SW of new packets received on RX queue. */
 struct gve_rx_compl_desc_dqo {
 	/* Must be 1 */
 	uint8_t rxdid:4;
 	uint8_t reserved0:4;
 
 	/* Packet originated from this system rather than the network. */
 	uint8_t loopback:1;
-	/* Set when IPv6 packet contains a destination options header or routing
+	/*
+	 * Set when IPv6 packet contains a destination options header or routing
 	 * header.
 	 */
 	uint8_t ipv6_ex_add:1;
 	/* Invalid packet was received. */
 	uint8_t rx_error:1;
 	uint8_t reserved1:5;
 
 	uint16_t packet_type:10;
 	uint16_t ip_hdr_err:1;
 	uint16_t udp_len_err:1;
 	uint16_t raw_cs_invalid:1;
 	uint16_t reserved2:3;
 
 	uint16_t packet_len:14;
 	/* Flipped by HW to notify the descriptor is populated. */
 	uint16_t generation:1;
 	/* Should be zero. */
 	uint16_t buffer_queue_id:1;
 
 	uint16_t header_len:10;
 	uint16_t rsc:1;
 	uint16_t split_header:1;
 	uint16_t reserved3:4;
 
 	uint8_t descriptor_done:1;
 	uint8_t end_of_packet:1;
 	uint8_t header_buffer_overflow:1;
 	uint8_t l3_l4_processed:1;
 	uint8_t csum_ip_err:1;
 	uint8_t csum_l4_err:1;
 	uint8_t csum_external_ip_err:1;
 	uint8_t csum_external_udp_err:1;
 
 	uint8_t status_error1;
 
 	__le16 reserved5;
 	__le16 buf_id; /* Buffer ID which was sent on the buffer queue. */
 
 	union {
 		/* Packet checksum. */
 		__le16 raw_cs;
 		/* Segment length for RSC packets. */
 		__le16 rsc_seg_len;
 	};
 	__le32 hash;
 	__le32 reserved6;
 	__le64 reserved7;
 } __packed;
 
 _Static_assert(sizeof(struct gve_rx_compl_desc_dqo) == 32,
     "gve: bad dqo desc struct length");
 #endif /* _GVE_DESC_DQO_H_ */
diff --git a/sys/dev/gve/gve_rx_dqo.c b/sys/dev/gve/gve_rx_dqo.c
index a499ac9d3c6a..11b2c7ea0c55 100644
--- a/sys/dev/gve/gve_rx_dqo.c
+++ b/sys/dev/gve/gve_rx_dqo.c
@@ -1,1021 +1,1031 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2024 Google LLC
  *
  * Redistribution and use in source and binary forms, with or without modification,
  * are permitted provided that the following conditions are met:
  *
  * 1. Redistributions of source code must retain the above copyright notice, this
  *    list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright notice,
  *    this list of conditions and the following disclaimer in the documentation
  *    and/or other materials provided with the distribution.
  *
  * 3. Neither the name of the copyright holder nor the names of its contributors
  *    may be used to endorse or promote products derived from this software without
  *    specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #include "gve.h"
 #include "gve_adminq.h"
 #include "gve_dqo.h"
 
 static void
 gve_free_rx_mbufs_dqo(struct gve_rx_ring *rx)
 {
 	struct gve_rx_buf_dqo *buf;
 	int i;
 
 	if (gve_is_qpl(rx->com.priv))
 		return;
 
 	for (i = 0; i < rx->dqo.buf_cnt; i++) {
 		buf = &rx->dqo.bufs[i];
 		if (!buf->mbuf)
 			continue;
 
 		bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
 		    BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap);
 		m_freem(buf->mbuf);
 		buf->mbuf = NULL;
 	}
 }
 
 void
 gve_rx_free_ring_dqo(struct gve_priv *priv, int i)
 {
 	struct gve_rx_ring *rx = &priv->rx[i];
 	struct gve_ring_com *com = &rx->com;
 	int j;
 
 	if (rx->dqo.compl_ring != NULL) {
 		gve_dma_free_coherent(&rx->dqo.compl_ring_mem);
 		rx->dqo.compl_ring = NULL;
 	}
 
 	if (rx->dqo.desc_ring != NULL) {
 		gve_dma_free_coherent(&rx->desc_ring_mem);
 		rx->dqo.desc_ring = NULL;
 	}
 
 	if (rx->dqo.bufs != NULL) {
 		gve_free_rx_mbufs_dqo(rx);
 
 		if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag) {
 			for (j = 0; j < rx->dqo.buf_cnt; j++)
 				if (rx->dqo.bufs[j].mapped)
 					bus_dmamap_destroy(rx->dqo.buf_dmatag,
 					    rx->dqo.bufs[j].dmamap);
 		}
 
 		free(rx->dqo.bufs, M_GVE);
 		rx->dqo.bufs = NULL;
 	}
 
 	if (!gve_is_qpl(priv) && rx->dqo.buf_dmatag)
 		bus_dma_tag_destroy(rx->dqo.buf_dmatag);
 
 	if (com->qpl != NULL) {
 		gve_free_qpl(priv, com->qpl);
 		com->qpl = NULL;
 	}
 }
 
 int
 gve_rx_alloc_ring_dqo(struct gve_priv *priv, int i)
 {
 	struct gve_rx_ring *rx = &priv->rx[i];
 	int err;
 	int j;
 
 	err = gve_dma_alloc_coherent(priv,
 	    sizeof(struct gve_rx_desc_dqo) * priv->rx_desc_cnt,
 	    CACHE_LINE_SIZE, &rx->desc_ring_mem);
 	if (err != 0) {
 		device_printf(priv->dev,
 		    "Failed to alloc desc ring for rx ring %d", i);
 		goto abort;
 	}
 	rx->dqo.desc_ring = rx->desc_ring_mem.cpu_addr;
 	rx->dqo.mask = priv->rx_desc_cnt - 1;
 
 	err = gve_dma_alloc_coherent(priv,
 	    sizeof(struct gve_rx_compl_desc_dqo) * priv->rx_desc_cnt,
 	    CACHE_LINE_SIZE, &rx->dqo.compl_ring_mem);
 	if (err != 0) {
 		device_printf(priv->dev,
 		    "Failed to alloc compl ring for rx ring %d", i);
 		goto abort;
 	}
 	rx->dqo.compl_ring = rx->dqo.compl_ring_mem.cpu_addr;
 	rx->dqo.mask = priv->rx_desc_cnt - 1;
 
 	rx->dqo.buf_cnt = gve_is_qpl(priv) ? GVE_RX_NUM_QPL_PAGES_DQO :
 	    priv->rx_desc_cnt;
 	rx->dqo.bufs = malloc(rx->dqo.buf_cnt * sizeof(struct gve_rx_buf_dqo),
 	    M_GVE, M_WAITOK | M_ZERO);
 
 	if (gve_is_qpl(priv)) {
 		rx->com.qpl = gve_alloc_qpl(priv, i + priv->tx_cfg.max_queues,
 		    GVE_RX_NUM_QPL_PAGES_DQO, /*single_kva=*/false);
 		if (rx->com.qpl == NULL) {
 			device_printf(priv->dev,
 			    "Failed to alloc QPL for rx ring %d", i);
 			err = ENOMEM;
 			goto abort;
 		}
 		return (0);
 	}
 
 	err = bus_dma_tag_create(
 	    bus_get_dma_tag(priv->dev),	/* parent */
 	    1, 0,			/* alignment, bounds */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    MCLBYTES,			/* maxsize */
 	    1,				/* nsegments */
 	    MCLBYTES,			/* maxsegsize */
 	    0,				/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockarg */
 	    &rx->dqo.buf_dmatag);
 	if (err != 0) {
 		device_printf(priv->dev,
 		    "%s: bus_dma_tag_create failed: %d\n",
 		    __func__, err);
 		goto abort;
 	}
 
 	for (j = 0; j < rx->dqo.buf_cnt; j++) {
 		err = bus_dmamap_create(rx->dqo.buf_dmatag, 0,
 		    &rx->dqo.bufs[j].dmamap);
 		if (err != 0) {
 			device_printf(priv->dev,
 			    "err in creating rx buf dmamap %d: %d",
 			    j, err);
 			goto abort;
 		}
 		rx->dqo.bufs[j].mapped = true;
 	}
 
 	return (0);
 
 abort:
 	gve_rx_free_ring_dqo(priv, i);
 	return (err);
 }
 
 static void
 gve_rx_clear_desc_ring_dqo(struct gve_rx_ring *rx)
 {
 	struct gve_ring_com *com = &rx->com;
 	int entries;
 	int i;
 
 	entries = com->priv->rx_desc_cnt;
 	for (i = 0; i < entries; i++)
 		rx->dqo.desc_ring[i] = (struct gve_rx_desc_dqo){};
 
 	bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
 	    BUS_DMASYNC_PREWRITE);
 }
 
 static void
 gve_rx_clear_compl_ring_dqo(struct gve_rx_ring *rx)
 {
 	struct gve_ring_com *com = &rx->com;
 	int i;
 
 	for (i = 0; i < com->priv->rx_desc_cnt; i++)
 		rx->dqo.compl_ring[i] = (struct gve_rx_compl_desc_dqo){};
 
 	bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map,
 	    BUS_DMASYNC_PREWRITE);
 }
 
 void
 gve_clear_rx_ring_dqo(struct gve_priv *priv, int i)
 {
 	struct gve_rx_ring *rx = &priv->rx[i];
 	int j;
 
 	rx->fill_cnt = 0;
 	rx->cnt = 0;
 	rx->dqo.mask = priv->rx_desc_cnt - 1;
 	rx->dqo.head = 0;
 	rx->dqo.tail = 0;
 	rx->dqo.cur_gen_bit = 0;
 
 	gve_rx_clear_desc_ring_dqo(rx);
 	gve_rx_clear_compl_ring_dqo(rx);
 
 	gve_free_rx_mbufs_dqo(rx);
 
 	if (gve_is_qpl(priv)) {
 		SLIST_INIT(&rx->dqo.free_bufs);
 		STAILQ_INIT(&rx->dqo.used_bufs);
 
 		for (j = 0; j < rx->dqo.buf_cnt; j++) {
 			struct gve_rx_buf_dqo *buf = &rx->dqo.bufs[j];
 
 			vm_page_t page = rx->com.qpl->pages[buf - rx->dqo.bufs];
 			u_int ref_count = atomic_load_int(&page->ref_count);
 
 			/*
 			 * An ifconfig down+up might see pages still in flight
 			 * from the previous innings.
 			 */
 			if (VPRC_WIRE_COUNT(ref_count) == 1)
 				SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
 				    buf, slist_entry);
 			else
 				STAILQ_INSERT_TAIL(&rx->dqo.used_bufs,
 				    buf, stailq_entry);
 
 			buf->num_nic_frags = 0;
 			buf->next_idx = 0;
 		}
 	} else {
 		SLIST_INIT(&rx->dqo.free_bufs);
 		for (j = 0; j < rx->dqo.buf_cnt; j++)
 			SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
 			    &rx->dqo.bufs[j], slist_entry);
 	}
 }
 
 int
 gve_rx_intr_dqo(void *arg)
 {
 	struct gve_rx_ring *rx = arg;
 	struct gve_priv *priv = rx->com.priv;
 	struct gve_ring_com *com = &rx->com;
 
 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
 		return (FILTER_STRAY);
 
 	/* Interrupts are automatically masked */
 	taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task);
 	return (FILTER_HANDLED);
 }
 
 static void
 gve_rx_advance_head_dqo(struct gve_rx_ring *rx)
 {
 	rx->dqo.head = (rx->dqo.head + 1) & rx->dqo.mask;
 	rx->fill_cnt++; /* rx->fill_cnt is just a sysctl counter */
 
 	if ((rx->dqo.head & (GVE_RX_BUF_THRESH_DQO - 1)) == 0) {
 		bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map,
 		    BUS_DMASYNC_PREWRITE);
 		gve_db_bar_dqo_write_4(rx->com.priv, rx->com.db_offset,
 		    rx->dqo.head);
 	}
 }
 
 static void
 gve_rx_post_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf)
 {
 	struct gve_rx_desc_dqo *desc;
 
 	bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
 	    BUS_DMASYNC_PREREAD);
 
 	desc = &rx->dqo.desc_ring[rx->dqo.head];
 	desc->buf_id = htole16(buf - rx->dqo.bufs);
 	desc->buf_addr = htole64(buf->addr);
 
 	gve_rx_advance_head_dqo(rx);
 }
 
 static int
 gve_rx_post_new_mbuf_dqo(struct gve_rx_ring *rx, int how)
 {
 	struct gve_rx_buf_dqo *buf;
 	bus_dma_segment_t segs[1];
 	int nsegs;
 	int err;
 
 	buf = SLIST_FIRST(&rx->dqo.free_bufs);
 	if (__predict_false(!buf)) {
 		device_printf(rx->com.priv->dev,
 		    "Unexpected empty free bufs list\n");
 		return (ENOBUFS);
 	}
 	SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
 
 	buf->mbuf = m_getcl(how, MT_DATA, M_PKTHDR);
 	if (__predict_false(!buf->mbuf)) {
 		err = ENOMEM;
 		counter_enter();
 		counter_u64_add_protected(rx->stats.rx_mbuf_mclget_null, 1);
 		counter_exit();
 		goto abort_with_buf;
 	}
 	buf->mbuf->m_len = MCLBYTES;
 
 	err = bus_dmamap_load_mbuf_sg(rx->dqo.buf_dmatag, buf->dmamap,
 	    buf->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
 	KASSERT(nsegs == 1, ("dma segs for a cluster mbuf is not 1"));
 	if (__predict_false(err != 0)) {
 		counter_enter();
 		counter_u64_add_protected(rx->stats.rx_mbuf_dmamap_err, 1);
 		counter_exit();
 		goto abort_with_mbuf;
 	}
 	buf->addr = segs[0].ds_addr;
 
 	gve_rx_post_buf_dqo(rx, buf);
 	return (0);
 
 abort_with_mbuf:
 	m_freem(buf->mbuf);
 	buf->mbuf = NULL;
 abort_with_buf:
 	SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry);
 	return (err);
 }
 
 static struct gve_dma_handle *
 gve_get_page_dma_handle(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf)
 {
 	return (&(rx->com.qpl->dmas[buf - rx->dqo.bufs]));
 }
 
 static void
 gve_rx_post_qpl_buf_dqo(struct gve_rx_ring *rx, struct gve_rx_buf_dqo *buf,
     uint8_t frag_num)
 {
 	struct gve_rx_desc_dqo *desc = &rx->dqo.desc_ring[rx->dqo.head];
 	union gve_rx_qpl_buf_id_dqo composed_id;
 	struct gve_dma_handle *page_dma_handle;
 
 	composed_id.buf_id = buf - rx->dqo.bufs;
 	composed_id.frag_num = frag_num;
 	desc->buf_id = htole16(composed_id.all);
 
 	page_dma_handle = gve_get_page_dma_handle(rx, buf);
 	bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
 	    BUS_DMASYNC_PREREAD);
 	desc->buf_addr = htole64(page_dma_handle->bus_addr +
 	    frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
 
 	buf->num_nic_frags++;
 	gve_rx_advance_head_dqo(rx);
 }
 
 static void
 gve_rx_maybe_extract_from_used_bufs(struct gve_rx_ring *rx, bool just_one)
 {
 	struct gve_rx_buf_dqo *hol_blocker = NULL;
 	struct gve_rx_buf_dqo *buf;
 	u_int ref_count;
 	vm_page_t page;
 
 	while (true) {
 		buf = STAILQ_FIRST(&rx->dqo.used_bufs);
 		if (__predict_false(buf == NULL))
 			break;
 
 		page = rx->com.qpl->pages[buf - rx->dqo.bufs];
 		ref_count = atomic_load_int(&page->ref_count);
 
 		if (VPRC_WIRE_COUNT(ref_count) != 1) {
 			/* Account for one head-of-line blocker */
 			if (hol_blocker != NULL)
 				break;
 			hol_blocker = buf;
 			STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs,
 			    stailq_entry);
 			continue;
 		}
 
 		STAILQ_REMOVE_HEAD(&rx->dqo.used_bufs,
 		    stailq_entry);
 		SLIST_INSERT_HEAD(&rx->dqo.free_bufs,
 		    buf, slist_entry);
 		if (just_one)
 			break;
 	}
 
 	if (hol_blocker != NULL)
 		STAILQ_INSERT_HEAD(&rx->dqo.used_bufs,
 		    hol_blocker, stailq_entry);
 }
 
 static int
 gve_rx_post_new_dqo_qpl_buf(struct gve_rx_ring *rx)
 {
 	struct gve_rx_buf_dqo *buf;
 
 	buf = SLIST_FIRST(&rx->dqo.free_bufs);
 	if (__predict_false(buf == NULL)) {
 		gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/true);
 		buf = SLIST_FIRST(&rx->dqo.free_bufs);
 		if (__predict_false(buf == NULL))
 			return (ENOBUFS);
 	}
 
 	gve_rx_post_qpl_buf_dqo(rx, buf, buf->next_idx);
 	if (buf->next_idx == GVE_DQ_NUM_FRAGS_IN_PAGE - 1)
 		buf->next_idx = 0;
 	else
 		buf->next_idx++;
 
 	/*
 	 * We have posted all the frags in this buf to the NIC.
 	 * - buf will enter used_bufs once the last completion arrives.
 	 * - It will renter free_bufs in gve_rx_maybe_extract_from_used_bufs
 	 *   when its wire count drops back to 1.
 	 */
 	if (buf->next_idx == 0)
 		SLIST_REMOVE_HEAD(&rx->dqo.free_bufs, slist_entry);
 	return (0);
 }
 
 static void
 gve_rx_post_buffers_dqo(struct gve_rx_ring *rx, int how)
 {
 	uint32_t num_pending_bufs;
 	uint32_t num_to_post;
 	uint32_t i;
 	int err;
 
 	num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
 	num_to_post = rx->dqo.mask - num_pending_bufs;
 
 	for (i = 0; i < num_to_post; i++) {
 		if (gve_is_qpl(rx->com.priv))
 			err = gve_rx_post_new_dqo_qpl_buf(rx);
 		else
 			err = gve_rx_post_new_mbuf_dqo(rx, how);
 		if (err)
 			break;
 	}
 }
 
 void
 gve_rx_prefill_buffers_dqo(struct gve_rx_ring *rx)
 {
 	gve_rx_post_buffers_dqo(rx, M_WAITOK);
 }
 
 static void
 gve_rx_set_hashtype_dqo(struct mbuf *mbuf, struct gve_ptype *ptype, bool *is_tcp)
 {
 	switch (ptype->l3_type) {
 	case GVE_L3_TYPE_IPV4:
 		switch (ptype->l4_type) {
 		case GVE_L4_TYPE_TCP:
 			*is_tcp = true;
 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
 			break;
 		case GVE_L4_TYPE_UDP:
 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
 			break;
 		default:
 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
 		}
 		break;
 	case GVE_L3_TYPE_IPV6:
 		switch (ptype->l4_type) {
 		case GVE_L4_TYPE_TCP:
 			*is_tcp = true;
 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
 			break;
 		case GVE_L4_TYPE_UDP:
 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
 			break;
 		default:
 			M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
 		}
 		break;
 	default:
 		M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
 	}
 }
 
 static void
 gve_rx_set_csum_flags_dqo(struct mbuf *mbuf,
     struct gve_rx_compl_desc_dqo *desc,
     struct gve_ptype *ptype)
 {
 	/* HW did not identify and process L3 and L4 headers. */
 	if (__predict_false(!desc->l3_l4_processed))
 		return;
 
 	if (ptype->l3_type == GVE_L3_TYPE_IPV4) {
 		if (__predict_false(desc->csum_ip_err ||
 		    desc->csum_external_ip_err))
 			return;
 	} else if (ptype->l3_type == GVE_L3_TYPE_IPV6) {
 		/* Checksum should be skipped if this flag is set. */
 		if (__predict_false(desc->ipv6_ex_add))
 			return;
 	}
 
 	if (__predict_false(desc->csum_l4_err))
 		return;
 
 	switch (ptype->l4_type) {
 	case GVE_L4_TYPE_TCP:
 	case GVE_L4_TYPE_UDP:
 	case GVE_L4_TYPE_ICMP:
 	case GVE_L4_TYPE_SCTP:
 		mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
 					    CSUM_IP_VALID |
 					    CSUM_DATA_VALID |
 					    CSUM_PSEUDO_HDR;
 		mbuf->m_pkthdr.csum_data = 0xffff;
 		break;
 	default:
 		break;
 	}
 }
 
 static void
 gve_rx_input_mbuf_dqo(struct gve_rx_ring *rx,
     struct gve_rx_compl_desc_dqo *compl_desc)
 {
 	struct mbuf *mbuf = rx->ctx.mbuf_head;
 	if_t ifp = rx->com.priv->ifp;
 	struct gve_ptype *ptype;
 	bool do_if_input = true;
 	bool is_tcp = false;
 
 	ptype = &rx->com.priv->ptype_lut_dqo->ptypes[compl_desc->packet_type];
 	gve_rx_set_hashtype_dqo(mbuf, ptype, &is_tcp);
 	mbuf->m_pkthdr.flowid = le32toh(compl_desc->hash);
 	gve_rx_set_csum_flags_dqo(mbuf, compl_desc, ptype);
 
 	mbuf->m_pkthdr.rcvif = ifp;
 	mbuf->m_pkthdr.len = rx->ctx.total_size;
 
 	if (((if_getcapenable(rx->com.priv->ifp) & IFCAP_LRO) != 0) &&
 	    is_tcp &&
 	    (rx->lro.lro_cnt != 0) &&
 	    (tcp_lro_rx(&rx->lro, mbuf, 0) == 0))
 		do_if_input = false;
 
 	if (do_if_input)
 		if_input(ifp, mbuf);
 
 	counter_enter();
 	counter_u64_add_protected(rx->stats.rbytes, rx->ctx.total_size);
 	counter_u64_add_protected(rx->stats.rpackets, 1);
 	counter_exit();
 
 	rx->ctx = (struct gve_rx_ctx){};
 }
 
 static int
 gve_rx_copybreak_dqo(struct gve_rx_ring *rx, void *va,
     struct gve_rx_compl_desc_dqo *compl_desc, uint16_t frag_len)
 {
 	struct mbuf *mbuf;
 
 	mbuf = m_get2(frag_len, M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (__predict_false(mbuf == NULL))
 		return (ENOMEM);
 
 	counter_enter();
 	counter_u64_add_protected(rx->stats.rx_copybreak_cnt, 1);
 	counter_exit();
 
 	m_copyback(mbuf, 0, frag_len, va);
 	mbuf->m_len = frag_len;
 
 	rx->ctx.mbuf_head = mbuf;
 	rx->ctx.mbuf_tail = mbuf;
 	rx->ctx.total_size += frag_len;
 
 	gve_rx_input_mbuf_dqo(rx, compl_desc);
 	return (0);
 }
 
 static void
 gve_rx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
     struct gve_rx_compl_desc_dqo *compl_desc,
     int *work_done)
 {
 	bool is_last_frag = compl_desc->end_of_packet != 0;
 	struct gve_rx_ctx *ctx = &rx->ctx;
 	struct gve_rx_buf_dqo *buf;
 	uint32_t num_pending_bufs;
 	uint16_t frag_len;
 	uint16_t buf_id;
 	int err;
 
 	buf_id = le16toh(compl_desc->buf_id);
 	if (__predict_false(buf_id >= rx->dqo.buf_cnt)) {
 		device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n",
 		    buf_id, rx->com.id);
 		gve_schedule_reset(priv);
 		goto drop_frag_clear_ctx;
 	}
 	buf = &rx->dqo.bufs[buf_id];
 	if (__predict_false(buf->mbuf == NULL)) {
 		device_printf(priv->dev, "Spurious completion for buf id %d on rxq %d, issuing reset\n",
 		    buf_id, rx->com.id);
 		gve_schedule_reset(priv);
 		goto drop_frag_clear_ctx;
 	}
 
 	if (__predict_false(ctx->drop_pkt))
 		goto drop_frag;
 
 	if (__predict_false(compl_desc->rx_error)) {
 		counter_enter();
 		counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
 		counter_exit();
 		goto drop_frag;
 	}
 
 	bus_dmamap_sync(rx->dqo.buf_dmatag, buf->dmamap,
 	    BUS_DMASYNC_POSTREAD);
 
 	frag_len = compl_desc->packet_len;
 	if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) {
 		err = gve_rx_copybreak_dqo(rx, mtod(buf->mbuf, char*),
 		    compl_desc, frag_len);
 		if (__predict_false(err != 0))
 			goto drop_frag;
 		(*work_done)++;
 		gve_rx_post_buf_dqo(rx, buf);
 		return;
 	}
 
 	/*
 	 * Although buffer completions may arrive out of order, buffer
 	 * descriptors are consumed by the NIC in order. That is, the
 	 * buffer at desc_ring[tail] might not be the buffer we got the
 	 * completion compl_ring[tail] for: but we know that desc_ring[tail]
 	 * has already been read by the NIC.
 	 */
 	num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
 
 	/*
 	 * For every fragment received, try to post a new buffer.
 	 *
 	 * Failures are okay but only so long as the number of outstanding
 	 * buffers is above a threshold.
 	 *
 	 * Beyond that we drop new packets to reuse their buffers.
 	 * Without ensuring a minimum number of buffers for the NIC to
 	 * put packets in, we run the risk of getting the queue stuck
 	 * for good.
 	 */
 	err = gve_rx_post_new_mbuf_dqo(rx, M_NOWAIT);
 	if (__predict_false(err != 0 &&
 	    num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) {
 		counter_enter();
 		counter_u64_add_protected(
 		    rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
 		counter_exit();
 		goto drop_frag;
 	}
 
 	buf->mbuf->m_len = frag_len;
 	ctx->total_size += frag_len;
 	if (ctx->mbuf_tail == NULL) {
 		ctx->mbuf_head = buf->mbuf;
 		ctx->mbuf_tail = buf->mbuf;
 	} else {
 		buf->mbuf->m_flags &= ~M_PKTHDR;
 		ctx->mbuf_tail->m_next = buf->mbuf;
 		ctx->mbuf_tail = buf->mbuf;
 	}
 
 	/*
 	 * Disassociate the mbuf from buf and surrender buf to the free list to
 	 * be used by a future mbuf.
 	 */
 	bus_dmamap_unload(rx->dqo.buf_dmatag, buf->dmamap);
 	buf->mbuf = NULL;
 	buf->addr = 0;
 	SLIST_INSERT_HEAD(&rx->dqo.free_bufs, buf, slist_entry);
 
 	if (is_last_frag) {
 		gve_rx_input_mbuf_dqo(rx, compl_desc);
 		(*work_done)++;
 	}
 	return;
 
 drop_frag:
 	/* Clear the earlier frags if there were any */
 	m_freem(ctx->mbuf_head);
 	rx->ctx = (struct gve_rx_ctx){};
 	/* Drop the rest of the pkt if there are more frags */
 	ctx->drop_pkt = true;
 	/* Reuse the dropped frag's buffer */
 	gve_rx_post_buf_dqo(rx, buf);
 
 	if (is_last_frag)
 		goto drop_frag_clear_ctx;
 	return;
 
 drop_frag_clear_ctx:
 	counter_enter();
 	counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
 	counter_exit();
 	m_freem(ctx->mbuf_head);
 	rx->ctx = (struct gve_rx_ctx){};
 }
 
 static void *
 gve_get_cpu_addr_for_qpl_buf(struct gve_rx_ring *rx,
     struct gve_rx_buf_dqo *buf, uint8_t buf_frag_num)
 {
 	int page_idx = buf - rx->dqo.bufs;
 	void *va = rx->com.qpl->dmas[page_idx].cpu_addr;
 
 	va = (char *)va + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
 	return (va);
 }
 
 static int
 gve_rx_add_clmbuf_to_ctx(struct gve_rx_ring *rx,
     struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf,
     uint8_t buf_frag_num, uint16_t frag_len)
 {
 	void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
 	struct mbuf *mbuf;
 
 	if (ctx->mbuf_tail == NULL) {
 		mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		if (mbuf == NULL)
 			return (ENOMEM);
 		ctx->mbuf_head = mbuf;
 		ctx->mbuf_tail = mbuf;
 	} else {
 		mbuf = m_getcl(M_NOWAIT, MT_DATA, 0);
 		if (mbuf == NULL)
 			return (ENOMEM);
 		ctx->mbuf_tail->m_next = mbuf;
 		ctx->mbuf_tail = mbuf;
 	}
 
 	mbuf->m_len = frag_len;
 	ctx->total_size += frag_len;
 
 	m_copyback(mbuf, 0, frag_len, va);
 	counter_enter();
 	counter_u64_add_protected(rx->stats.rx_frag_copy_cnt, 1);
 	counter_exit();
 	return (0);
 }
 
 static int
 gve_rx_add_extmbuf_to_ctx(struct gve_rx_ring *rx,
     struct gve_rx_ctx *ctx, struct gve_rx_buf_dqo *buf,
     uint8_t buf_frag_num, uint16_t frag_len)
 {
 	struct mbuf *mbuf;
 	void *page_addr;
 	vm_page_t page;
 	int page_idx;
 	void *va;
 
 	if (ctx->mbuf_tail == NULL) {
 		mbuf = m_gethdr(M_NOWAIT, MT_DATA);
 		if (mbuf == NULL)
 			return (ENOMEM);
 		ctx->mbuf_head = mbuf;
 		ctx->mbuf_tail = mbuf;
 	} else {
 		mbuf = m_get(M_NOWAIT, MT_DATA);
 		if (mbuf == NULL)
 			return (ENOMEM);
 		ctx->mbuf_tail->m_next = mbuf;
 		ctx->mbuf_tail = mbuf;
 	}
 
 	mbuf->m_len = frag_len;
 	ctx->total_size += frag_len;
 
 	page_idx = buf - rx->dqo.bufs;
 	page = rx->com.qpl->pages[page_idx];
 	page_addr = rx->com.qpl->dmas[page_idx].cpu_addr;
 	va = (char *)page_addr + (buf_frag_num * GVE_DEFAULT_RX_BUFFER_SIZE);
 
 	/*
 	 * Grab an extra ref to the page so that gve_mextadd_free
 	 * does not end up freeing the page while the interface exists.
 	 */
 	vm_page_wire(page);
 
 	counter_enter();
 	counter_u64_add_protected(rx->stats.rx_frag_flip_cnt, 1);
 	counter_exit();
 
 	MEXTADD(mbuf, va, frag_len,
 	    gve_mextadd_free, page, page_addr,
 	    0, EXT_NET_DRV);
 	return (0);
 }
 
 static void
 gve_rx_dqo_qpl(struct gve_priv *priv, struct gve_rx_ring *rx,
     struct gve_rx_compl_desc_dqo *compl_desc,
     int *work_done)
 {
 	bool is_last_frag = compl_desc->end_of_packet != 0;
 	union gve_rx_qpl_buf_id_dqo composed_id;
 	struct gve_dma_handle *page_dma_handle;
 	struct gve_rx_ctx *ctx = &rx->ctx;
 	struct gve_rx_buf_dqo *buf;
 	uint32_t num_pending_bufs;
 	uint8_t buf_frag_num;
 	uint16_t frag_len;
 	uint16_t buf_id;
 	int err;
 
 	composed_id.all = le16toh(compl_desc->buf_id);
 	buf_id = composed_id.buf_id;
 	buf_frag_num = composed_id.frag_num;
 
 	if (__predict_false(buf_id >= rx->dqo.buf_cnt)) {
 		device_printf(priv->dev, "Invalid rx buf id %d on rxq %d, issuing reset\n",
 		    buf_id, rx->com.id);
 		gve_schedule_reset(priv);
 		goto drop_frag_clear_ctx;
 	}
 	buf = &rx->dqo.bufs[buf_id];
 	if (__predict_false(buf->num_nic_frags == 0 ||
 	    buf_frag_num > GVE_DQ_NUM_FRAGS_IN_PAGE - 1)) {
 		device_printf(priv->dev, "Spurious compl for buf id %d on rxq %d "
 		    "with buf_frag_num %d and num_nic_frags %d, issuing reset\n",
 		    buf_id, rx->com.id, buf_frag_num, buf->num_nic_frags);
 		gve_schedule_reset(priv);
 		goto drop_frag_clear_ctx;
 	}
 
 	buf->num_nic_frags--;
 
 	if (__predict_false(ctx->drop_pkt))
 		goto drop_frag;
 
 	if (__predict_false(compl_desc->rx_error)) {
 		counter_enter();
 		counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1);
 		counter_exit();
 		goto drop_frag;
 	}
 
 	page_dma_handle = gve_get_page_dma_handle(rx, buf);
 	bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map,
 	    BUS_DMASYNC_POSTREAD);
 
 	frag_len = compl_desc->packet_len;
 	if (frag_len <= priv->rx_copybreak && !ctx->mbuf_head && is_last_frag) {
 		void *va = gve_get_cpu_addr_for_qpl_buf(rx, buf, buf_frag_num);
 
 		err = gve_rx_copybreak_dqo(rx, va, compl_desc, frag_len);
 		if (__predict_false(err != 0))
 			goto drop_frag;
 		(*work_done)++;
 		gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
 		return;
 	}
 
 	num_pending_bufs = (rx->dqo.head - rx->dqo.tail) & rx->dqo.mask;
 	err = gve_rx_post_new_dqo_qpl_buf(rx);
 	if (__predict_false(err != 0 &&
 	    num_pending_bufs <= GVE_RX_DQO_MIN_PENDING_BUFS)) {
 		/*
 		 * Resort to copying this fragment into a cluster mbuf
 		 * when the above threshold is breached and repost the
 		 * incoming buffer. If we cannot find cluster mbufs,
 		 * just drop the packet (to repost its buffer).
 		 */
 		err = gve_rx_add_clmbuf_to_ctx(rx, ctx, buf,
 		    buf_frag_num, frag_len);
 		if (err != 0) {
 			counter_enter();
 			counter_u64_add_protected(
 			    rx->stats.rx_dropped_pkt_buf_post_fail, 1);
 			counter_exit();
 			goto drop_frag;
 		}
 		gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
 	} else {
 		err = gve_rx_add_extmbuf_to_ctx(rx, ctx, buf,
 		    buf_frag_num, frag_len);
 		if (__predict_false(err != 0)) {
 			counter_enter();
 			counter_u64_add_protected(
 			    rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1);
 			counter_exit();
 			goto drop_frag;
 		}
 	}
 
 	/*
 	 * Both the counts need to be checked.
 	 *
 	 * num_nic_frags == 0 implies no pending completions
 	 * but not all frags may have yet been posted.
 	 *
 	 * next_idx == 0 implies all frags have been posted
 	 * but there might be pending completions.
 	 */
 	if (buf->num_nic_frags == 0 && buf->next_idx == 0)
 		STAILQ_INSERT_TAIL(&rx->dqo.used_bufs, buf, stailq_entry);
 
 	if (is_last_frag) {
 		gve_rx_input_mbuf_dqo(rx, compl_desc);
 		(*work_done)++;
 	}
 	return;
 
 drop_frag:
 	/* Clear the earlier frags if there were any */
 	m_freem(ctx->mbuf_head);
 	rx->ctx = (struct gve_rx_ctx){};
 	/* Drop the rest of the pkt if there are more frags */
 	ctx->drop_pkt = true;
 	/* Reuse the dropped frag's buffer */
 	gve_rx_post_qpl_buf_dqo(rx, buf, buf_frag_num);
 
 	if (is_last_frag)
 		goto drop_frag_clear_ctx;
 	return;
 
 drop_frag_clear_ctx:
 	counter_enter();
 	counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1);
 	counter_exit();
 	m_freem(ctx->mbuf_head);
 	rx->ctx = (struct gve_rx_ctx){};
 }
 
+static uint8_t
+gve_rx_get_gen_bit(uint8_t *desc)
+{
+	uint8_t byte;
+
+	/*
+	 * Prevent generation bit from being read after the rest of the
+	 * descriptor.
+	 */
+	byte = atomic_load_acq_8(desc + GVE_RX_DESC_DQO_GEN_BYTE_OFFSET);
+	return ((byte & GVE_RX_DESC_DQO_GEN_BIT_MASK) != 0);
+}
+
 static bool
 gve_rx_cleanup_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, int budget)
 {
 	struct gve_rx_compl_desc_dqo *compl_desc;
 	uint32_t work_done = 0;
 
 	NET_EPOCH_ASSERT();
 
 	while (work_done < budget) {
-		bus_dmamap_sync(rx->dqo.compl_ring_mem.tag, rx->dqo.compl_ring_mem.map,
+		bus_dmamap_sync(rx->dqo.compl_ring_mem.tag,
+		    rx->dqo.compl_ring_mem.map,
 		    BUS_DMASYNC_POSTREAD);
 
 		compl_desc = &rx->dqo.compl_ring[rx->dqo.tail];
-		if (compl_desc->generation == rx->dqo.cur_gen_bit)
+		if (gve_rx_get_gen_bit((uint8_t *)compl_desc) ==
+		    rx->dqo.cur_gen_bit)
 			break;
-		/*
-		 * Prevent generation bit from being read after the rest of the
-		 * descriptor.
-		 */
-		atomic_thread_fence_acq();
 
 		rx->cnt++;
 		rx->dqo.tail = (rx->dqo.tail + 1) & rx->dqo.mask;
 		rx->dqo.cur_gen_bit ^= (rx->dqo.tail == 0);
 
 		if (gve_is_qpl(priv))
 			gve_rx_dqo_qpl(priv, rx, compl_desc, &work_done);
 		else
 			gve_rx_dqo(priv, rx, compl_desc, &work_done);
 	}
 
 	if (work_done != 0)
 		tcp_lro_flush_all(&rx->lro);
 
 	gve_rx_post_buffers_dqo(rx, M_NOWAIT);
 	if (gve_is_qpl(priv))
 		gve_rx_maybe_extract_from_used_bufs(rx, /*just_one=*/false);
 	return (work_done == budget);
 }
 
 void
 gve_rx_cleanup_tq_dqo(void *arg, int pending)
 {
 	struct gve_rx_ring *rx = arg;
 	struct gve_priv *priv = rx->com.priv;
 
 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
 		return;
 
 	if (gve_rx_cleanup_dqo(priv, rx, /*budget=*/64)) {
 		taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task);
 		return;
 	}
 
 	gve_db_bar_dqo_write_4(priv, rx->com.irq_db_offset,
 	    GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
 }
diff --git a/sys/dev/gve/gve_tx_dqo.c b/sys/dev/gve/gve_tx_dqo.c
index 7361d47b8ce6..8a1993c3e712 100644
--- a/sys/dev/gve/gve_tx_dqo.c
+++ b/sys/dev/gve/gve_tx_dqo.c
@@ -1,1111 +1,1120 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2024 Google LLC
  *
  * Redistribution and use in source and binary forms, with or without modification,
  * are permitted provided that the following conditions are met:
  *
  * 1. Redistributions of source code must retain the above copyright notice, this
  *    list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright notice,
  *    this list of conditions and the following disclaimer in the documentation
  *    and/or other materials provided with the distribution.
  *
  * 3. Neither the name of the copyright holder nor the names of its contributors
  *    may be used to endorse or promote products derived from this software without
  *    specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include "opt_inet6.h"
 
 #include "gve.h"
 #include "gve_dqo.h"
 
 static void
 gve_unmap_packet(struct gve_tx_ring *tx,
     struct gve_tx_pending_pkt_dqo *pending_pkt)
 {
 	bus_dmamap_sync(tx->dqo.buf_dmatag, pending_pkt->dmamap,
 	    BUS_DMASYNC_POSTWRITE);
 	bus_dmamap_unload(tx->dqo.buf_dmatag, pending_pkt->dmamap);
 }
 
 static void
 gve_clear_qpl_pending_pkt(struct gve_tx_pending_pkt_dqo *pending_pkt)
 {
 	pending_pkt->qpl_buf_head = -1;
 	pending_pkt->num_qpl_bufs = 0;
 }
 
 static void
 gve_free_tx_mbufs_dqo(struct gve_tx_ring *tx)
 {
 	struct gve_tx_pending_pkt_dqo *pending_pkt;
 	int i;
 
 	for (i = 0; i < tx->dqo.num_pending_pkts; i++) {
 		pending_pkt = &tx->dqo.pending_pkts[i];
 		if (!pending_pkt->mbuf)
 			continue;
 
 		if (gve_is_qpl(tx->com.priv))
 			gve_clear_qpl_pending_pkt(pending_pkt);
 		else
 			gve_unmap_packet(tx, pending_pkt);
 
 		m_freem(pending_pkt->mbuf);
 		pending_pkt->mbuf = NULL;
 	}
 }
 
 void
 gve_tx_free_ring_dqo(struct gve_priv *priv, int i)
 {
 	struct gve_tx_ring *tx = &priv->tx[i];
 	struct gve_ring_com *com = &tx->com;
 	int j;
 
 	if (tx->dqo.desc_ring != NULL) {
 		gve_dma_free_coherent(&tx->desc_ring_mem);
 		tx->dqo.desc_ring = NULL;
 	}
 
 	if (tx->dqo.compl_ring != NULL) {
 		gve_dma_free_coherent(&tx->dqo.compl_ring_mem);
 		tx->dqo.compl_ring = NULL;
 	}
 
 	if (tx->dqo.pending_pkts != NULL) {
 		gve_free_tx_mbufs_dqo(tx);
 
 		if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag) {
 			for (j = 0; j < tx->dqo.num_pending_pkts; j++)
 				if (tx->dqo.pending_pkts[j].state !=
 				    GVE_PACKET_STATE_UNALLOCATED)
 					bus_dmamap_destroy(tx->dqo.buf_dmatag,
 					    tx->dqo.pending_pkts[j].dmamap);
 		}
 
 		free(tx->dqo.pending_pkts, M_GVE);
 		tx->dqo.pending_pkts = NULL;
 	}
 
 	if (!gve_is_qpl(priv) && tx->dqo.buf_dmatag)
 		bus_dma_tag_destroy(tx->dqo.buf_dmatag);
 
 	if (gve_is_qpl(priv) && tx->dqo.qpl_bufs != NULL) {
 		free(tx->dqo.qpl_bufs, M_GVE);
 		tx->dqo.qpl_bufs = NULL;
 	}
 
 	if (com->qpl != NULL) {
 		gve_free_qpl(priv, com->qpl);
 		com->qpl = NULL;
 	}
 }
 
 static int
 gve_tx_alloc_rda_fields_dqo(struct gve_tx_ring *tx)
 {
 	struct gve_priv *priv = tx->com.priv;
 	int err;
 	int j;
 
 	/*
 	 * DMA tag for mapping Tx mbufs
 	 * The maxsize, nsegments, and maxsegsize params should match
 	 * the if_sethwtso* arguments in gve_setup_ifnet in gve_main.c.
 	 */
 	err = bus_dma_tag_create(
 	    bus_get_dma_tag(priv->dev),	/* parent */
 	    1, 0,			/* alignment, bounds */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    GVE_TSO_MAXSIZE_DQO,	/* maxsize */
 	    GVE_TX_MAX_DATA_DESCS_DQO,	/* nsegments */
 	    GVE_TX_MAX_BUF_SIZE_DQO,	/* maxsegsize */
 	    BUS_DMA_ALLOCNOW,		/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockarg */
 	    &tx->dqo.buf_dmatag);
 	if (err != 0) {
 		device_printf(priv->dev, "%s: bus_dma_tag_create failed: %d\n",
 		    __func__, err);
 		return (err);
 	}
 
 	for (j = 0; j < tx->dqo.num_pending_pkts; j++) {
 		err = bus_dmamap_create(tx->dqo.buf_dmatag, 0,
 		    &tx->dqo.pending_pkts[j].dmamap);
 		if (err != 0) {
 			device_printf(priv->dev,
 			    "err in creating pending pkt dmamap %d: %d",
 			    j, err);
 			return (err);
 		}
 		tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE;
 	}
 
 	return (0);
 }
 
 int
 gve_tx_alloc_ring_dqo(struct gve_priv *priv, int i)
 {
 	struct gve_tx_ring *tx = &priv->tx[i];
 	uint16_t num_pending_pkts;
 	int err;
 
 	/* Descriptor ring */
 	err = gve_dma_alloc_coherent(priv,
 	    sizeof(union gve_tx_desc_dqo) * priv->tx_desc_cnt,
 	    CACHE_LINE_SIZE, &tx->desc_ring_mem);
 	if (err != 0) {
 		device_printf(priv->dev,
 		    "Failed to alloc desc ring for tx ring %d", i);
 		goto abort;
 	}
 	tx->dqo.desc_ring = tx->desc_ring_mem.cpu_addr;
 
 	/* Completion ring */
 	err = gve_dma_alloc_coherent(priv,
 	    sizeof(struct gve_tx_compl_desc_dqo) * priv->tx_desc_cnt,
 	    CACHE_LINE_SIZE, &tx->dqo.compl_ring_mem);
 	if (err != 0) {
 		device_printf(priv->dev,
 		    "Failed to alloc compl ring for tx ring %d", i);
 		goto abort;
 	}
 	tx->dqo.compl_ring = tx->dqo.compl_ring_mem.cpu_addr;
 
 	/*
 	 * pending_pkts array
 	 *
 	 * The max number of pending packets determines the maximum number of
 	 * descriptors which maybe written to the completion queue.
 	 *
 	 * We must set the number small enough to make sure we never overrun the
 	 * completion queue.
 	 */
 	num_pending_pkts = priv->tx_desc_cnt;
 	/*
 	 * Reserve space for descriptor completions, which will be reported at
 	 * most every GVE_TX_MIN_RE_INTERVAL packets.
 	 */
 	num_pending_pkts -= num_pending_pkts / GVE_TX_MIN_RE_INTERVAL;
 
 	tx->dqo.num_pending_pkts = num_pending_pkts;
 	tx->dqo.pending_pkts = malloc(
 	    sizeof(struct gve_tx_pending_pkt_dqo) * num_pending_pkts,
 	    M_GVE, M_WAITOK | M_ZERO);
 
 	if (gve_is_qpl(priv)) {
 		int qpl_buf_cnt;
 
 		tx->com.qpl = gve_alloc_qpl(priv, i, GVE_TX_NUM_QPL_PAGES_DQO,
 		    /*single_kva*/false);
 		if (tx->com.qpl == NULL) {
 			device_printf(priv->dev,
 			    "Failed to alloc QPL for tx ring %d", i);
 			err = ENOMEM;
 			goto abort;
 		}
 
 		qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO *
 		    tx->com.qpl->num_pages;
 
 		tx->dqo.qpl_bufs = malloc(
 		    sizeof(*tx->dqo.qpl_bufs) * qpl_buf_cnt,
 		    M_GVE, M_WAITOK | M_ZERO);
 	} else
 		gve_tx_alloc_rda_fields_dqo(tx);
 	return (0);
 
 abort:
 	gve_tx_free_ring_dqo(priv, i);
 	return (err);
 }
 
 static void
 gve_extract_tx_metadata_dqo(const struct mbuf *mbuf,
     struct gve_tx_metadata_dqo *metadata)
 {
 	uint32_t hash = mbuf->m_pkthdr.flowid;
 	uint16_t path_hash;
 
 	metadata->version = GVE_TX_METADATA_VERSION_DQO;
 	if (hash) {
 		path_hash = hash ^ (hash >> 16);
 
 		path_hash &= (1 << 15) - 1;
 		if (__predict_false(path_hash == 0))
 			path_hash = ~path_hash;
 
 		metadata->path_hash = path_hash;
 	}
 }
 
 static void
 gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx,
     uint32_t *desc_idx, uint32_t len, uint64_t addr,
     int16_t compl_tag, bool eop, bool csum_enabled)
 {
 	while (len > 0) {
 		struct gve_tx_pkt_desc_dqo *desc =
 		    &tx->dqo.desc_ring[*desc_idx].pkt;
 		uint32_t cur_len = MIN(len, GVE_TX_MAX_BUF_SIZE_DQO);
 		bool cur_eop = eop && cur_len == len;
 
 		*desc = (struct gve_tx_pkt_desc_dqo){
 			.buf_addr = htole64(addr),
 			.dtype = GVE_TX_PKT_DESC_DTYPE_DQO,
 			.end_of_packet = cur_eop,
 			.checksum_offload_enable = csum_enabled,
 			.compl_tag = htole16(compl_tag),
 			.buf_size = cur_len,
 		};
 
 		addr += cur_len;
 		len -= cur_len;
 		*desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
 	}
 }
 
 static void
 gve_tx_fill_tso_ctx_desc(struct gve_tx_tso_context_desc_dqo *desc,
     const struct mbuf *mbuf, const struct gve_tx_metadata_dqo *metadata,
     int header_len)
 {
 	*desc = (struct gve_tx_tso_context_desc_dqo){
 		.header_len = header_len,
 		.cmd_dtype = {
 			.dtype = GVE_TX_TSO_CTX_DESC_DTYPE_DQO,
 			.tso = 1,
 		},
 		.flex0 = metadata->bytes[0],
 		.flex5 = metadata->bytes[5],
 		.flex6 = metadata->bytes[6],
 		.flex7 = metadata->bytes[7],
 		.flex8 = metadata->bytes[8],
 		.flex9 = metadata->bytes[9],
 		.flex10 = metadata->bytes[10],
 		.flex11 = metadata->bytes[11],
 	};
 	desc->tso_total_len = mbuf->m_pkthdr.len - header_len;
 	desc->mss = mbuf->m_pkthdr.tso_segsz;
 }
 
 static void
 gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc,
     const struct gve_tx_metadata_dqo *metadata)
 {
 	*desc = (struct gve_tx_general_context_desc_dqo){
 		.flex0 = metadata->bytes[0],
 		.flex1 = metadata->bytes[1],
 		.flex2 = metadata->bytes[2],
 		.flex3 = metadata->bytes[3],
 		.flex4 = metadata->bytes[4],
 		.flex5 = metadata->bytes[5],
 		.flex6 = metadata->bytes[6],
 		.flex7 = metadata->bytes[7],
 		.flex8 = metadata->bytes[8],
 		.flex9 = metadata->bytes[9],
 		.flex10 = metadata->bytes[10],
 		.flex11 = metadata->bytes[11],
 		.cmd_dtype = {.dtype = GVE_TX_GENERAL_CTX_DESC_DTYPE_DQO},
 	};
 }
 
 #define PULLUP_HDR(m, len)				\
 do {							\
 	if (__predict_false((m)->m_len < (len))) {	\
 		(m) = m_pullup((m), (len));		\
 		if ((m) == NULL)			\
 			return (EINVAL);		\
 	}						\
 } while (0)
 
 static int
 gve_prep_tso(struct mbuf *mbuf, int *header_len)
 {
 	uint8_t l3_off, l4_off = 0;
 	struct ether_header *eh;
 	struct tcphdr *th;
 	u_short csum;
 
 	PULLUP_HDR(mbuf, sizeof(*eh));
 	eh = mtod(mbuf, struct ether_header *);
 	KASSERT(eh->ether_type != ETHERTYPE_VLAN,
 	    ("VLAN-tagged packets not supported"));
 	l3_off = ETHER_HDR_LEN;
 
 #ifdef INET6
 	if (ntohs(eh->ether_type) == ETHERTYPE_IPV6) {
 		struct ip6_hdr *ip6;
 
 		PULLUP_HDR(mbuf, l3_off + sizeof(*ip6));
 		ip6 = (struct ip6_hdr *)(mtodo(mbuf, l3_off));
 		l4_off = l3_off + sizeof(struct ip6_hdr);
 		csum = in6_cksum_pseudo(ip6, /*len=*/0, IPPROTO_TCP,
 		    /*csum=*/0);
 	} else
 #endif
 	if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
 		struct ip *ip;
 
 		PULLUP_HDR(mbuf, l3_off + sizeof(*ip));
 		ip = (struct ip *)(mtodo(mbuf, l3_off));
 		l4_off = l3_off + (ip->ip_hl << 2);
 		csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 		    htons(IPPROTO_TCP));
 	}
 
 	PULLUP_HDR(mbuf, l4_off + sizeof(struct tcphdr *));
 	th = (struct tcphdr *)(mtodo(mbuf, l4_off));
 	*header_len = l4_off + (th->th_off << 2);
 
 	/*
 	 * Hardware requires the th->th_sum to not include the TCP payload,
 	 * hence we recompute the csum with it excluded.
 	 */
 	th->th_sum = csum;
 
 	return (0);
 }
 
 static int
 gve_tx_fill_ctx_descs(struct gve_tx_ring *tx, struct mbuf *mbuf,
     bool is_tso, uint32_t *desc_idx)
 {
 	struct gve_tx_general_context_desc_dqo *gen_desc;
 	struct gve_tx_tso_context_desc_dqo *tso_desc;
 	struct gve_tx_metadata_dqo metadata;
 	int header_len;
 	int err;
 
 	metadata = (struct gve_tx_metadata_dqo){0};
 	gve_extract_tx_metadata_dqo(mbuf, &metadata);
 
 	if (is_tso) {
 		err = gve_prep_tso(mbuf, &header_len);
 		if (__predict_false(err)) {
 			counter_enter();
 			counter_u64_add_protected(
 			    tx->stats.tx_delayed_pkt_tsoerr, 1);
 			counter_exit();
 			return (err);
 		}
 
 		tso_desc = &tx->dqo.desc_ring[*desc_idx].tso_ctx;
 		gve_tx_fill_tso_ctx_desc(tso_desc, mbuf, &metadata, header_len);
 
 		*desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
 		counter_enter();
 		counter_u64_add_protected(tx->stats.tso_packet_cnt, 1);
 		counter_exit();
 	}
 
 	gen_desc = &tx->dqo.desc_ring[*desc_idx].general_ctx;
 	gve_tx_fill_general_ctx_desc(gen_desc, &metadata);
 	*desc_idx = (*desc_idx + 1) & tx->dqo.desc_mask;
 	return (0);
 }
 
 static int
 gve_map_mbuf_dqo(struct gve_tx_ring *tx,
     struct mbuf **mbuf, bus_dmamap_t dmamap,
     bus_dma_segment_t *segs, int *nsegs, int attempt)
 {
 	struct mbuf *m_new = NULL;
 	int err;
 
 	err = bus_dmamap_load_mbuf_sg(tx->dqo.buf_dmatag, dmamap,
 	    *mbuf, segs, nsegs, BUS_DMA_NOWAIT);
 
 	switch (err) {
 	case __predict_true(0):
 		break;
 	case EFBIG:
 		if (__predict_false(attempt > 0))
 			goto abort;
 
 		counter_enter();
 		counter_u64_add_protected(
 		    tx->stats.tx_mbuf_collapse, 1);
 		counter_exit();
 
 		/* Try m_collapse before m_defrag */
 		m_new = m_collapse(*mbuf, M_NOWAIT,
 		    GVE_TX_MAX_DATA_DESCS_DQO);
 		if (m_new == NULL) {
 			counter_enter();
 			counter_u64_add_protected(
 			    tx->stats.tx_mbuf_defrag, 1);
 			counter_exit();
 			m_new = m_defrag(*mbuf, M_NOWAIT);
 		}
 
 		if (__predict_false(m_new == NULL)) {
 			counter_enter();
 			counter_u64_add_protected(
 			    tx->stats.tx_mbuf_defrag_err, 1);
 			counter_exit();
 
 			m_freem(*mbuf);
 			*mbuf = NULL;
 			err = ENOMEM;
 			goto abort;
 		} else {
 			*mbuf = m_new;
 			return (gve_map_mbuf_dqo(tx, mbuf, dmamap,
 			    segs, nsegs, ++attempt));
 		}
 	case ENOMEM:
 		counter_enter();
 		counter_u64_add_protected(
 		    tx->stats.tx_mbuf_dmamap_enomem_err, 1);
 		counter_exit();
 		goto abort;
 	default:
 		goto abort;
 	}
 
 	return (0);
 
 abort:
 	counter_enter();
 	counter_u64_add_protected(tx->stats.tx_mbuf_dmamap_err, 1);
 	counter_exit();
 	return (err);
 }
 
 static uint32_t
 num_avail_desc_ring_slots(const struct gve_tx_ring *tx)
 {
 	uint32_t num_used = (tx->dqo.desc_tail - tx->dqo.desc_head) &
 	    tx->dqo.desc_mask;
 
 	return (tx->dqo.desc_mask - num_used);
 }
 
 static struct gve_tx_pending_pkt_dqo *
 gve_alloc_pending_packet(struct gve_tx_ring *tx)
 {
 	int32_t index = tx->dqo.free_pending_pkts_csm;
 	struct gve_tx_pending_pkt_dqo *pending_pkt;
 
 	/*
 	 * No pending packets available in the consumer list,
 	 * try to steal the producer list.
 	 */
 	if (__predict_false(index == -1)) {
 		tx->dqo.free_pending_pkts_csm = atomic_swap_32(
 		    &tx->dqo.free_pending_pkts_prd, -1);
 
 		index = tx->dqo.free_pending_pkts_csm;
 		if (__predict_false(index == -1))
 			return (NULL);
 	}
 
 	pending_pkt = &tx->dqo.pending_pkts[index];
 
 	/* Remove pending_pkt from the consumer list */
 	tx->dqo.free_pending_pkts_csm = pending_pkt->next;
 	pending_pkt->state = GVE_PACKET_STATE_PENDING_DATA_COMPL;
 
 	return (pending_pkt);
 }
 
 static void
 gve_free_pending_packet(struct gve_tx_ring *tx,
     struct gve_tx_pending_pkt_dqo *pending_pkt)
 {
 	int index = pending_pkt - tx->dqo.pending_pkts;
 	int32_t old_head;
 
 	pending_pkt->state = GVE_PACKET_STATE_FREE;
 
 	/* Add pending_pkt to the producer list */
 	while (true) {
 		old_head = atomic_load_acq_32(&tx->dqo.free_pending_pkts_prd);
 
 		pending_pkt->next = old_head;
 		if (atomic_cmpset_32(&tx->dqo.free_pending_pkts_prd,
 		    old_head, index))
 			break;
 	}
 }
 
 /*
  * Has the side-effect of retrieving the value of the last desc index
  * processed by the NIC. hw_tx_head is written to by the completions-processing
  * taskqueue upon receiving descriptor-completions.
  */
 static bool
 gve_tx_has_desc_room_dqo(struct gve_tx_ring *tx, int needed_descs)
 {
 	if (needed_descs <= num_avail_desc_ring_slots(tx))
 		return (true);
 
 	tx->dqo.desc_head = atomic_load_acq_32(&tx->dqo.hw_tx_head);
 	if (needed_descs > num_avail_desc_ring_slots(tx)) {
 		counter_enter();
 		counter_u64_add_protected(
 		    tx->stats.tx_delayed_pkt_nospace_descring, 1);
 		counter_exit();
 		return (false);
 	}
 
 	return (0);
 }
 
 static void
 gve_tx_request_desc_compl(struct gve_tx_ring *tx, uint32_t desc_idx)
 {
 	uint32_t last_report_event_interval;
 	uint32_t last_desc_idx;
 
 	last_desc_idx = (desc_idx - 1) & tx->dqo.desc_mask;
 	last_report_event_interval =
 	    (last_desc_idx - tx->dqo.last_re_idx) & tx->dqo.desc_mask;
 
 	if (__predict_false(last_report_event_interval >=
 	    GVE_TX_MIN_RE_INTERVAL)) {
 		tx->dqo.desc_ring[last_desc_idx].pkt.report_event = true;
 		tx->dqo.last_re_idx = last_desc_idx;
 	}
 }
 
 static bool
 gve_tx_have_enough_qpl_bufs(struct gve_tx_ring *tx, int num_bufs)
 {
 	uint32_t available = tx->dqo.qpl_bufs_produced_cached -
 	    tx->dqo.qpl_bufs_consumed;
 
 	if (__predict_true(available >= num_bufs))
 		return (true);
 
 	tx->dqo.qpl_bufs_produced_cached = atomic_load_acq_32(
 	    &tx->dqo.qpl_bufs_produced);
 	available = tx->dqo.qpl_bufs_produced_cached -
 	    tx->dqo.qpl_bufs_consumed;
 
 	if (__predict_true(available >= num_bufs))
 		return (true);
 	return (false);
 }
 
 static int32_t
 gve_tx_alloc_qpl_buf(struct gve_tx_ring *tx)
 {
 	int32_t buf = tx->dqo.free_qpl_bufs_csm;
 
 	if (__predict_false(buf == -1)) {
 		tx->dqo.free_qpl_bufs_csm = atomic_swap_32(
 		    &tx->dqo.free_qpl_bufs_prd, -1);
 		buf = tx->dqo.free_qpl_bufs_csm;
 		if (__predict_false(buf == -1))
 			return (-1);
 	}
 
 	tx->dqo.free_qpl_bufs_csm = tx->dqo.qpl_bufs[buf];
 	tx->dqo.qpl_bufs_consumed++;
 	return (buf);
 }
 
 /*
  * Tx buffer i corresponds to
  * qpl_page_id = i / GVE_TX_BUFS_PER_PAGE_DQO
  * qpl_page_offset = (i % GVE_TX_BUFS_PER_PAGE_DQO) * GVE_TX_BUF_SIZE_DQO
  */
 static void
 gve_tx_buf_get_addr_dqo(struct gve_tx_ring *tx,
     int32_t index, void **va, bus_addr_t *dma_addr)
 {
 	int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO);
 	int offset = (index & (GVE_TX_BUFS_PER_PAGE_DQO - 1)) <<
 	    GVE_TX_BUF_SHIFT_DQO;
 
 	*va = (char *)tx->com.qpl->dmas[page_id].cpu_addr + offset;
 	*dma_addr = tx->com.qpl->dmas[page_id].bus_addr + offset;
 }
 
 static struct gve_dma_handle *
 gve_get_page_dma_handle(struct gve_tx_ring *tx, int32_t index)
 {
 	int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO);
 
 	return (&tx->com.qpl->dmas[page_id]);
 }
 
 static void
 gve_tx_copy_mbuf_and_write_pkt_descs(struct gve_tx_ring *tx,
     struct mbuf *mbuf, struct gve_tx_pending_pkt_dqo *pkt,
     bool csum_enabled, int16_t completion_tag,
     uint32_t *desc_idx)
 {
 	int32_t pkt_len = mbuf->m_pkthdr.len;
 	struct gve_dma_handle *dma;
 	uint32_t copy_offset = 0;
 	int32_t prev_buf = -1;
 	uint32_t copy_len;
 	bus_addr_t addr;
 	int32_t buf;
 	void *va;
 
 	MPASS(pkt->num_qpl_bufs == 0);
 	MPASS(pkt->qpl_buf_head == -1);
 
 	while (copy_offset < pkt_len) {
 		buf = gve_tx_alloc_qpl_buf(tx);
 		/* We already checked for availability */
 		MPASS(buf != -1);
 
 		gve_tx_buf_get_addr_dqo(tx, buf, &va, &addr);
 		copy_len = MIN(GVE_TX_BUF_SIZE_DQO, pkt_len - copy_offset);
 		m_copydata(mbuf, copy_offset, copy_len, va);
 		copy_offset += copy_len;
 
 		dma = gve_get_page_dma_handle(tx, buf);
 		bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE);
 
 		gve_tx_fill_pkt_desc_dqo(tx, desc_idx,
 		    copy_len, addr, completion_tag,
 		    /*eop=*/copy_offset == pkt_len,
 		    csum_enabled);
 
 		/* Link all the qpl bufs for a packet */
 		if (prev_buf == -1)
 			pkt->qpl_buf_head = buf;
 		else
 			tx->dqo.qpl_bufs[prev_buf] = buf;
 
 		prev_buf = buf;
 		pkt->num_qpl_bufs++;
 	}
 
 	tx->dqo.qpl_bufs[buf] = -1;
 }
 
 int
 gve_xmit_dqo_qpl(struct gve_tx_ring *tx, struct mbuf *mbuf)
 {
 	uint32_t desc_idx = tx->dqo.desc_tail;
 	struct gve_tx_pending_pkt_dqo *pkt;
 	int total_descs_needed;
 	int16_t completion_tag;
 	bool has_csum_flag;
 	int csum_flags;
 	bool is_tso;
 	int nsegs;
 	int err;
 
 	csum_flags = mbuf->m_pkthdr.csum_flags;
 	has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP |
 	    CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO);
 	is_tso = csum_flags & CSUM_TSO;
 
 	nsegs = howmany(mbuf->m_pkthdr.len, GVE_TX_BUF_SIZE_DQO);
 	/* Check if we have enough room in the desc ring */
 	total_descs_needed = 1 +     /* general_ctx_desc */
 	    nsegs +		     /* pkt_desc */
 	    (is_tso ? 1 : 0);        /* tso_ctx_desc */
 	if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed)))
 		return (ENOBUFS);
 
 	if (!gve_tx_have_enough_qpl_bufs(tx, nsegs)) {
 		counter_enter();
 		counter_u64_add_protected(
 		    tx->stats.tx_delayed_pkt_nospace_qpl_bufs, 1);
 		counter_exit();
 		return (ENOBUFS);
 	}
 
 	pkt = gve_alloc_pending_packet(tx);
 	if (pkt == NULL) {
 		counter_enter();
 		counter_u64_add_protected(
 		    tx->stats.tx_delayed_pkt_nospace_compring, 1);
 		counter_exit();
 		return (ENOBUFS);
 	}
 	completion_tag = pkt - tx->dqo.pending_pkts;
 	pkt->mbuf = mbuf;
 
 	err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx);
 	if (err)
 		goto abort;
 
 	gve_tx_copy_mbuf_and_write_pkt_descs(tx, mbuf, pkt,
 	    has_csum_flag, completion_tag, &desc_idx);
 
 	/* Remember the index of the last desc written */
 	tx->dqo.desc_tail = desc_idx;
 
 	/*
 	 * Request a descriptor completion on the last descriptor of the
 	 * packet if we are allowed to by the HW enforced interval.
 	 */
 	gve_tx_request_desc_compl(tx, desc_idx);
 
 	tx->req += total_descs_needed; /* tx->req is just a sysctl counter */
 	return (0);
 
 abort:
 	pkt->mbuf = NULL;
 	gve_free_pending_packet(tx, pkt);
 	return (err);
 }
 
 int
 gve_xmit_dqo(struct gve_tx_ring *tx, struct mbuf **mbuf_ptr)
 {
 	bus_dma_segment_t segs[GVE_TX_MAX_DATA_DESCS_DQO];
 	uint32_t desc_idx = tx->dqo.desc_tail;
 	struct gve_tx_pending_pkt_dqo *pkt;
 	struct mbuf *mbuf = *mbuf_ptr;
 	int total_descs_needed;
 	int16_t completion_tag;
 	bool has_csum_flag;
 	int csum_flags;
 	bool is_tso;
 	int nsegs;
 	int err;
 	int i;
 
 	csum_flags = mbuf->m_pkthdr.csum_flags;
 	has_csum_flag = csum_flags & (CSUM_TCP | CSUM_UDP |
 	    CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_TSO);
 	is_tso = csum_flags & CSUM_TSO;
 
 	/*
 	 * This mbuf might end up needing more than 1 pkt desc.
 	 * The actual number, `nsegs` is known only after the
 	 * expensive gve_map_mbuf_dqo call. This check beneath
 	 * exists to fail early when the desc ring is really full.
 	 */
 	total_descs_needed = 1 +     /* general_ctx_desc */
 	    1 +			     /* pkt_desc */
 	    (is_tso ? 1 : 0);        /* tso_ctx_desc */
 	if (__predict_false(!gve_tx_has_desc_room_dqo(tx, total_descs_needed)))
 		return (ENOBUFS);
 
 	pkt = gve_alloc_pending_packet(tx);
 	if (pkt == NULL) {
 		counter_enter();
 		counter_u64_add_protected(
 		    tx->stats.tx_delayed_pkt_nospace_compring, 1);
 		counter_exit();
 		return (ENOBUFS);
 	}
 	completion_tag = pkt - tx->dqo.pending_pkts;
 
 	err = gve_map_mbuf_dqo(tx, mbuf_ptr, pkt->dmamap,
 	    segs, &nsegs, /*attempt=*/0);
 	if (err)
 		goto abort;
 	mbuf = *mbuf_ptr;  /* gve_map_mbuf_dqo might replace the mbuf chain */
 	pkt->mbuf = mbuf;
 
 	total_descs_needed = 1 + /* general_ctx_desc */
 	    nsegs +              /* pkt_desc */
 	    (is_tso ? 1 : 0);    /* tso_ctx_desc */
 	if (__predict_false(
 	    !gve_tx_has_desc_room_dqo(tx, total_descs_needed))) {
 		err = ENOBUFS;
 		goto abort_with_dma;
 	}
 
 	err = gve_tx_fill_ctx_descs(tx, mbuf, is_tso, &desc_idx);
 	if (err)
 		goto abort_with_dma;
 
 	bus_dmamap_sync(tx->dqo.buf_dmatag, pkt->dmamap, BUS_DMASYNC_PREWRITE);
 	for (i = 0; i < nsegs; i++) {
 		gve_tx_fill_pkt_desc_dqo(tx, &desc_idx,
 		    segs[i].ds_len, segs[i].ds_addr,
 		    completion_tag, /*eop=*/i == (nsegs - 1),
 		    has_csum_flag);
 	}
 
 	/* Remember the index of the last desc written */
 	tx->dqo.desc_tail = desc_idx;
 
 	/*
 	 * Request a descriptor completion on the last descriptor of the
 	 * packet if we are allowed to by the HW enforced interval.
 	 */
 	gve_tx_request_desc_compl(tx, desc_idx);
 
 	tx->req += total_descs_needed; /* tx->req is just a sysctl counter */
 	return (0);
 
 abort_with_dma:
 	gve_unmap_packet(tx, pkt);
 abort:
 	pkt->mbuf = NULL;
 	gve_free_pending_packet(tx, pkt);
 	return (err);
 }
 
 static void
 gve_reap_qpl_bufs_dqo(struct gve_tx_ring *tx,
     struct gve_tx_pending_pkt_dqo *pkt)
 {
 	int32_t buf = pkt->qpl_buf_head;
 	struct gve_dma_handle *dma;
 	int32_t qpl_buf_tail;
 	int32_t old_head;
 	int i;
 
 	for (i = 0; i < pkt->num_qpl_bufs; i++) {
 		dma = gve_get_page_dma_handle(tx, buf);
 		bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_POSTWRITE);
 		qpl_buf_tail = buf;
 		buf = tx->dqo.qpl_bufs[buf];
 	}
 	MPASS(buf == -1);
 	buf = qpl_buf_tail;
 
 	while (true) {
 		old_head = atomic_load_32(&tx->dqo.free_qpl_bufs_prd);
 		tx->dqo.qpl_bufs[buf] = old_head;
 
 		/*
 		 * The "rel" ensures that the update to dqo.free_qpl_bufs_prd
 		 * is visible only after the linked list from this pkt is
 		 * attached above to old_head.
 		 */
 		if (atomic_cmpset_rel_32(&tx->dqo.free_qpl_bufs_prd,
 		    old_head, pkt->qpl_buf_head))
 			break;
 	}
 	/*
 	 * The "rel" ensures that the update to dqo.qpl_bufs_produced is
 	 * visible only adter the update to dqo.free_qpl_bufs_prd above.
 	 */
 	atomic_add_rel_32(&tx->dqo.qpl_bufs_produced, pkt->num_qpl_bufs);
 
 	gve_clear_qpl_pending_pkt(pkt);
 }
 
 static uint64_t
 gve_handle_packet_completion(struct gve_priv *priv,
     struct gve_tx_ring *tx, uint16_t compl_tag)
 {
 	struct gve_tx_pending_pkt_dqo *pending_pkt;
 	int32_t pkt_len;
 
 	if (__predict_false(compl_tag >= tx->dqo.num_pending_pkts)) {
 		device_printf(priv->dev, "Invalid TX completion tag: %d\n",
 		    compl_tag);
 		return (0);
 	}
 
 	pending_pkt = &tx->dqo.pending_pkts[compl_tag];
 
 	/* Packet is allocated but not pending data completion. */
 	if (__predict_false(pending_pkt->state !=
 	    GVE_PACKET_STATE_PENDING_DATA_COMPL)) {
 		device_printf(priv->dev,
 		    "No pending data completion: %d\n", compl_tag);
 		return (0);
 	}
 
 	pkt_len = pending_pkt->mbuf->m_pkthdr.len;
 
 	if (gve_is_qpl(priv))
 		gve_reap_qpl_bufs_dqo(tx, pending_pkt);
 	else
 		gve_unmap_packet(tx, pending_pkt);
 
 	m_freem(pending_pkt->mbuf);
 	pending_pkt->mbuf = NULL;
 	gve_free_pending_packet(tx, pending_pkt);
 	return (pkt_len);
 }
 
 int
 gve_tx_intr_dqo(void *arg)
 {
 	struct gve_tx_ring *tx = arg;
 	struct gve_priv *priv = tx->com.priv;
 	struct gve_ring_com *com = &tx->com;
 
 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
 		return (FILTER_STRAY);
 
 	/* Interrupts are automatically masked */
 	taskqueue_enqueue(com->cleanup_tq, &com->cleanup_task);
 	return (FILTER_HANDLED);
 }
 
 static void
 gve_tx_clear_desc_ring_dqo(struct gve_tx_ring *tx)
 {
 	struct gve_ring_com *com = &tx->com;
 	int i;
 
 	for (i = 0; i < com->priv->tx_desc_cnt; i++)
 		tx->dqo.desc_ring[i] = (union gve_tx_desc_dqo){};
 
 	bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map,
 	    BUS_DMASYNC_PREWRITE);
 }
 
 static void
 gve_tx_clear_compl_ring_dqo(struct gve_tx_ring *tx)
 {
 	struct gve_ring_com *com = &tx->com;
 	int entries;
 	int i;
 
 	entries = com->priv->tx_desc_cnt;
 	for (i = 0; i < entries; i++)
 		tx->dqo.compl_ring[i] = (struct gve_tx_compl_desc_dqo){};
 
 	bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map,
 	    BUS_DMASYNC_PREWRITE);
 }
 
 void
 gve_clear_tx_ring_dqo(struct gve_priv *priv, int i)
 {
 	struct gve_tx_ring *tx = &priv->tx[i];
 	int j;
 
 	tx->dqo.desc_head = 0;
 	tx->dqo.desc_tail = 0;
 	tx->dqo.desc_mask = priv->tx_desc_cnt - 1;
 	tx->dqo.last_re_idx = 0;
 
 	tx->dqo.compl_head = 0;
 	tx->dqo.compl_mask = priv->tx_desc_cnt - 1;
 	atomic_store_32(&tx->dqo.hw_tx_head, 0);
 	tx->dqo.cur_gen_bit = 0;
 
 	gve_free_tx_mbufs_dqo(tx);
 
 	for (j = 0; j < tx->dqo.num_pending_pkts; j++) {
 		if (gve_is_qpl(tx->com.priv))
 			gve_clear_qpl_pending_pkt(&tx->dqo.pending_pkts[j]);
 		tx->dqo.pending_pkts[j].next =
 		    (j == tx->dqo.num_pending_pkts - 1) ? -1 : j + 1;
 		tx->dqo.pending_pkts[j].state = GVE_PACKET_STATE_FREE;
 	}
 	tx->dqo.free_pending_pkts_csm = 0;
 	atomic_store_rel_32(&tx->dqo.free_pending_pkts_prd, -1);
 
 	if (gve_is_qpl(priv)) {
 		int qpl_buf_cnt = GVE_TX_BUFS_PER_PAGE_DQO *
 		    tx->com.qpl->num_pages;
 
 		for (j = 0; j < qpl_buf_cnt - 1; j++)
 			tx->dqo.qpl_bufs[j] = j + 1;
 		tx->dqo.qpl_bufs[j] = -1;
 
 		tx->dqo.free_qpl_bufs_csm = 0;
 		atomic_store_32(&tx->dqo.free_qpl_bufs_prd, -1);
 		atomic_store_32(&tx->dqo.qpl_bufs_produced, qpl_buf_cnt);
 		tx->dqo.qpl_bufs_produced_cached = qpl_buf_cnt;
 		tx->dqo.qpl_bufs_consumed = 0;
 	}
 
 	gve_tx_clear_desc_ring_dqo(tx);
 	gve_tx_clear_compl_ring_dqo(tx);
 }
 
+static uint8_t
+gve_tx_get_gen_bit(uint8_t *desc)
+{
+	uint8_t byte;
+
+	/*
+	 * Prevent generation bit from being read after the rest of the
+	 * descriptor.
+	 */
+	byte = atomic_load_acq_8(desc + GVE_TX_DESC_DQO_GEN_BYTE_OFFSET);
+	return ((byte & GVE_TX_DESC_DQO_GEN_BIT_MASK) != 0);
+}
+
 static bool
 gve_tx_cleanup_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, int budget)
 {
 	struct gve_tx_compl_desc_dqo *compl_desc;
 	uint64_t bytes_done = 0;
 	uint64_t pkts_done = 0;
 	uint16_t compl_tag;
 	int work_done = 0;
 	uint16_t tx_head;
 	uint16_t type;
 
 	while (work_done < budget) {
-		bus_dmamap_sync(tx->dqo.compl_ring_mem.tag, tx->dqo.compl_ring_mem.map,
+		bus_dmamap_sync(tx->dqo.compl_ring_mem.tag,
+		    tx->dqo.compl_ring_mem.map,
 		    BUS_DMASYNC_POSTREAD);
 
 		compl_desc = &tx->dqo.compl_ring[tx->dqo.compl_head];
-		if (compl_desc->generation == tx->dqo.cur_gen_bit)
+		if (gve_tx_get_gen_bit((uint8_t *)compl_desc) ==
+		    tx->dqo.cur_gen_bit)
 			break;
 
-		/*
-		 * Prevent generation bit from being read after the rest of the
-		 * descriptor.
-		 */
-		atomic_thread_fence_acq();
 		type = compl_desc->type;
-
 		if (type == GVE_COMPL_TYPE_DQO_DESC) {
 			/* This is the last descriptor fetched by HW plus one */
 			tx_head = le16toh(compl_desc->tx_head);
 			atomic_store_rel_32(&tx->dqo.hw_tx_head, tx_head);
 		} else if (type == GVE_COMPL_TYPE_DQO_PKT) {
 			compl_tag = le16toh(compl_desc->completion_tag);
 			bytes_done += gve_handle_packet_completion(priv,
 			    tx, compl_tag);
 			pkts_done++;
 		}
 
 		tx->dqo.compl_head = (tx->dqo.compl_head + 1) &
 		    tx->dqo.compl_mask;
 		/* Flip the generation bit when we wrap around */
 		tx->dqo.cur_gen_bit ^= tx->dqo.compl_head == 0;
 		work_done++;
 	}
 
 	/*
 	 * Waking the xmit taskqueue has to occur after room has been made in
 	 * the queue.
 	 */
 	atomic_thread_fence_seq_cst();
 	if (atomic_load_bool(&tx->stopped) && work_done) {
 		atomic_store_bool(&tx->stopped, false);
 		taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task);
 	}
 
 	tx->done += work_done; /* tx->done is just a sysctl counter */
 	counter_enter();
 	counter_u64_add_protected(tx->stats.tbytes, bytes_done);
 	counter_u64_add_protected(tx->stats.tpackets, pkts_done);
 	counter_exit();
 
 	return (work_done == budget);
 }
 
 void
 gve_tx_cleanup_tq_dqo(void *arg, int pending)
 {
 	struct gve_tx_ring *tx = arg;
 	struct gve_priv *priv = tx->com.priv;
 
 	if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0))
 		return;
 
 	if (gve_tx_cleanup_dqo(priv, tx, /*budget=*/1024)) {
 		taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task);
 		return;
 	}
 
 	gve_db_bar_dqo_write_4(priv, tx->com.irq_db_offset,
 	    GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
 }