diff --git a/share/man/man4/gve.4 b/share/man/man4/gve.4 --- a/share/man/man4/gve.4 +++ b/share/man/man4/gve.4 @@ -86,6 +86,12 @@ .Pp Change the RX queue count to 4 for the gve0 interface: .D1 sysctl dev.gve.0.num_rx_queues=4 +.Pp +Change the TX ring size to 512 for the gve0 interface: +.D1 sysctl dev.gve.0.tx_ring_size=512 +.Pp +Change the RX ring size to 512 for the gve0 interface: +.D1 sysctl dev.gve.0.rx_ring_size=512 .Sh DIAGNOSTICS The following messages are recorded during driver initialization: .Bl -diag @@ -230,6 +236,17 @@ .Pp Note: sysctl nodes for queue stats remain available even if a queue is removed. .Pp +.It Va dev.gve.X.rx_ring_size and dev.gve.X.tx_ring_size +Run-time tunables that represent the current ring size for RX/TX queues. +The default value is set to device defaults for ring size. +.Pp +This call turns down the interface while setting up the queues with the new ring size, +which may potentially cause any new packets to be dropped. +This call can fail if the system is not able to provide the driver with enough resources. +In that situation, the driver will try to revert to the previous ring size for RX/TX queues. +If this also fails, the device will be in an unhealthy state and will need to be reloaded. +This value must be a power of 2 and within the defined range. +.Pp .El .Sh LIMITATIONS .Nm diff --git a/sys/dev/gve/gve.h b/sys/dev/gve/gve.h --- a/sys/dev/gve/gve.h +++ b/sys/dev/gve/gve.h @@ -63,6 +63,10 @@ */ #define GVE_QPL_DIVISOR 16 +/* Ring Size Limits */ +#define GVE_DEFAULT_MIN_RX_RING_SIZE 512 +#define GVE_DEFAULT_MIN_TX_RING_SIZE 256 + static MALLOC_DEFINE(M_GVE, "gve", "gve allocations"); struct gve_dma_handle { @@ -529,12 +533,17 @@ uint16_t num_event_counters; uint16_t default_num_queues; uint16_t tx_desc_cnt; + uint16_t max_tx_desc_cnt; + uint16_t min_tx_desc_cnt; uint16_t rx_desc_cnt; + uint16_t max_rx_desc_cnt; + uint16_t min_rx_desc_cnt; uint16_t rx_pages_per_qpl; uint64_t max_registered_pages; uint64_t num_registered_pages; uint32_t supported_features; uint16_t max_mtu; + bool modify_ringsize_enabled; struct gve_dma_handle counter_array_mem; __be32 *counters; @@ -622,6 +631,7 @@ void gve_schedule_reset(struct gve_priv *priv); int gve_adjust_tx_queues(struct gve_priv *priv, uint16_t new_queue_cnt); int gve_adjust_rx_queues(struct gve_priv *priv, uint16_t new_queue_cnt); +int gve_adjust_ring_sizes(struct gve_priv *priv, uint16_t new_desc_cnt, bool is_rx); /* Register access functions defined in gve_utils.c */ uint32_t gve_reg_bar_read_4(struct gve_priv *priv, bus_size_t offset); diff --git a/sys/dev/gve/gve_adminq.h b/sys/dev/gve/gve_adminq.h --- a/sys/dev/gve/gve_adminq.h +++ b/sys/dev/gve/gve_adminq.h @@ -153,13 +153,21 @@ _Static_assert(sizeof(struct gve_device_option_dqo_qpl) == 8, "gve: bad admin queue struct length"); +struct gve_ring_size_bound { + __be16 rx; + __be16 tx; +}; + +_Static_assert(sizeof(struct gve_ring_size_bound) == 4, + "gve: bad admin queue struct length"); + struct gve_device_option_modify_ring { __be32 supported_features_mask; - __be16 max_rx_ring_size; - __be16 max_tx_ring_size; + struct gve_ring_size_bound max_ring_size; + struct gve_ring_size_bound min_ring_size; }; -_Static_assert(sizeof(struct gve_device_option_modify_ring) == 8, +_Static_assert(sizeof(struct gve_device_option_modify_ring) == 12, "gve: bad admin queue struct length"); struct gve_device_option_jumbo_frames { diff --git a/sys/dev/gve/gve_adminq.c b/sys/dev/gve/gve_adminq.c --- a/sys/dev/gve/gve_adminq.c +++ b/sys/dev/gve/gve_adminq.c @@ -59,6 +59,7 @@ struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, + struct gve_device_option_modify_ring **dev_op_modify_ring, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames) { uint32_t req_feat_mask = be32toh(option->required_features_mask); @@ -121,6 +122,34 @@ *dev_op_dqo_qpl = (void *)(option + 1); break; + case GVE_DEV_OPT_ID_MODIFY_RING: + if (option_length < (sizeof(**dev_op_modify_ring) - + sizeof(struct gve_ring_size_bound)) || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING) { + device_printf(priv->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "Modify Ring", (int)sizeof(**dev_op_modify_ring), + GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_modify_ring)) { + device_printf(priv->dev, GVE_DEVICE_OPTION_TOO_BIG_FMT, + "Modify Ring"); + } + *dev_op_modify_ring = (void *)(option + 1); + + /* Min ring size included; set the minimum ring size. */ + if (option_length == sizeof(**dev_op_modify_ring)) { + priv->min_rx_desc_cnt = max( + be16toh((*dev_op_modify_ring)->min_ring_size.rx), + GVE_DEFAULT_MIN_RX_RING_SIZE); + priv->min_tx_desc_cnt = max( + be16toh((*dev_op_modify_ring)->min_ring_size.tx), + GVE_DEFAULT_MIN_TX_RING_SIZE); + } + break; + case GVE_DEV_OPT_ID_JUMBO_FRAMES: if (option_length < sizeof(**dev_op_jumbo_frames) || req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) { @@ -155,6 +184,7 @@ struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, + struct gve_device_option_modify_ring **dev_op_modify_ring, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames) { char *desc_end = (char *)descriptor + be16toh(descriptor->total_length); @@ -176,6 +206,7 @@ dev_op_gqi_qpl, dev_op_dqo_rda, dev_op_dqo_qpl, + dev_op_modify_ring, dev_op_jumbo_frames); dev_opt = (void *)((char *)(dev_opt + 1) + be16toh(dev_opt->option_length)); } @@ -390,8 +421,18 @@ static void gve_enable_supported_features(struct gve_priv *priv, uint32_t supported_features_mask, + const struct gve_device_option_modify_ring *dev_op_modify_ring, const struct gve_device_option_jumbo_frames *dev_op_jumbo_frames) { + if (dev_op_modify_ring && + (supported_features_mask & GVE_SUP_MODIFY_RING_MASK)) { + if (bootverbose) + device_printf(priv->dev, "MODIFY RING device option enabled.\n"); + priv->modify_ringsize_enabled = true; + priv->max_rx_desc_cnt = be16toh(dev_op_modify_ring->max_ring_size.rx); + priv->max_tx_desc_cnt = be16toh(dev_op_modify_ring->max_ring_size.tx); + } + if (dev_op_jumbo_frames && (supported_features_mask & GVE_SUP_JUMBO_FRAMES_MASK)) { if (bootverbose) @@ -410,6 +451,7 @@ struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL; struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL; struct gve_device_option_dqo_qpl *dev_op_dqo_qpl = NULL; + struct gve_device_option_modify_ring *dev_op_modify_ring = NULL; struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL; uint32_t supported_features_mask = 0; int rc; @@ -438,10 +480,15 @@ bus_dmamap_sync(desc_mem.tag, desc_mem.map, BUS_DMASYNC_POSTREAD); + /* Default min in case device options don't have min values */ + priv->min_rx_desc_cnt = GVE_DEFAULT_MIN_RX_RING_SIZE; + priv->min_tx_desc_cnt = GVE_DEFAULT_MIN_TX_RING_SIZE; + rc = gve_process_device_options(priv, desc, &dev_op_gqi_qpl, &dev_op_dqo_rda, &dev_op_dqo_qpl, + &dev_op_modify_ring, &dev_op_jumbo_frames); if (rc != 0) goto free_device_descriptor; @@ -489,8 +536,12 @@ priv->default_num_queues = be16toh(desc->default_num_queues); priv->supported_features = supported_features_mask; + /* Default max to current in case modify ring size option is disabled */ + priv->max_rx_desc_cnt = priv->rx_desc_cnt; + priv->max_tx_desc_cnt = priv->tx_desc_cnt; + gve_enable_supported_features(priv, supported_features_mask, - dev_op_jumbo_frames); + dev_op_modify_ring, dev_op_jumbo_frames); for (i = 0; i < ETHER_ADDR_LEN; i++) priv->mac[i] = desc->mac[i]; diff --git a/sys/dev/gve/gve_main.c b/sys/dev/gve/gve_main.c --- a/sys/dev/gve/gve_main.c +++ b/sys/dev/gve/gve_main.c @@ -32,10 +32,10 @@ #include "gve_adminq.h" #include "gve_dqo.h" -#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.2\n" +#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.3\n" #define GVE_VERSION_MAJOR 1 #define GVE_VERSION_MINOR 3 -#define GVE_VERSION_SUB 2 +#define GVE_VERSION_SUB 3 #define GVE_DEFAULT_RX_COPYBREAK 256 @@ -260,6 +260,54 @@ return (err); } +int +gve_adjust_ring_sizes(struct gve_priv *priv, uint16_t new_desc_cnt, bool is_rx) +{ + int err; + uint16_t prev_desc_cnt; + + GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock); + + gve_down(priv); + + if (is_rx) { + gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues); + prev_desc_cnt = priv->rx_desc_cnt; + priv->rx_desc_cnt = new_desc_cnt; + err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues); + if (err != 0) { + device_printf(priv->dev, + "Failed to allocate rings. Trying to start back up with previous ring size."); + priv->rx_desc_cnt = prev_desc_cnt; + err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues); + } + } else { + gve_free_tx_rings(priv, 0, priv->tx_cfg.num_queues); + prev_desc_cnt = priv->tx_desc_cnt; + priv->tx_desc_cnt = new_desc_cnt; + err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues); + if (err != 0) { + device_printf(priv->dev, + "Failed to allocate rings. Trying to start back up with previous ring size."); + priv->tx_desc_cnt = prev_desc_cnt; + err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues); + } + } + + if (err != 0) { + device_printf(priv->dev, "Failed to allocate rings! Cannot start device back up!"); + return (err); + } + + err = gve_up(priv); + if (err != 0) { + gve_schedule_reset(priv); + return (err); + } + + return (0); +} + static int gve_set_mtu(if_t ifp, uint32_t new_mtu) { diff --git a/sys/dev/gve/gve_sysctl.c b/sys/dev/gve/gve_sysctl.c --- a/sys/dev/gve/gve_sysctl.c +++ b/sys/dev/gve/gve_sysctl.c @@ -362,6 +362,83 @@ return (0); } +static int +gve_check_ring_size(struct gve_priv *priv, int val, bool is_rx) +{ + if (!powerof2(val) || val == 0) { + device_printf(priv->dev, + "Requested ring size (%u) must be a power of 2\n", val); + return (EINVAL); + } + + if (val < (is_rx ? priv->min_rx_desc_cnt : priv->min_tx_desc_cnt)) { + device_printf(priv->dev, + "Requested ring size (%u) cannot be less than %d\n", val, + (is_rx ? priv->min_rx_desc_cnt : priv->min_tx_desc_cnt)); + return (EINVAL); + } + + + if (val > (is_rx ? priv->max_rx_desc_cnt : priv->max_tx_desc_cnt)) { + device_printf(priv->dev, + "Requested ring size (%u) cannot be greater than %d\n", val, + (is_rx ? priv->max_rx_desc_cnt : priv->max_tx_desc_cnt)); + return (EINVAL); + } + + return (0); +} + +static int +gve_sysctl_tx_ring_size(SYSCTL_HANDLER_ARGS) +{ + struct gve_priv *priv = arg1; + int val; + int err; + + val = priv->tx_desc_cnt; + err = sysctl_handle_int(oidp, &val, 0, req); + if (err != 0 || req->newptr == NULL) + return (err); + + err = gve_check_ring_size(priv, val, /*is_rx=*/false); + if (err != 0) + return (err); + + if (val != priv->tx_desc_cnt) { + GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); + err = gve_adjust_ring_sizes(priv, val, /*is_rx=*/false); + GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); + } + + return (err); +} + +static int +gve_sysctl_rx_ring_size(SYSCTL_HANDLER_ARGS) +{ + struct gve_priv *priv = arg1; + int val; + int err; + + val = priv->rx_desc_cnt; + err = sysctl_handle_int(oidp, &val, 0, req); + if (err != 0 || req->newptr == NULL) + return (err); + + err = gve_check_ring_size(priv, val, /*is_rx=*/true); + if (err != 0) + return (err); + + if (val != priv->rx_desc_cnt) { + GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); + err = gve_adjust_ring_sizes(priv, val, /*is_rx=*/true); + GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); + } + + return (err); +} + static void gve_setup_sysctl_writables(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct gve_priv *priv) @@ -373,6 +450,16 @@ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "num_rx_queues", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, gve_sysctl_num_rx_queues, "I", "Number of RX queues"); + + if (priv->modify_ringsize_enabled) { + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_ring_size", + CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, + gve_sysctl_tx_ring_size, "I", "TX ring size"); + + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ring_size", + CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, + gve_sysctl_rx_ring_size, "I", "RX ring size"); + } } void gve_setup_sysctl(struct gve_priv *priv)