diff --git a/share/man/man4/gve.4 b/share/man/man4/gve.4 --- a/share/man/man4/gve.4 +++ b/share/man/man4/gve.4 @@ -217,7 +217,7 @@ .Pp This call turns down the interface while setting up the new queues, which may potentially cause any new packets to be dropped. -This call can fail if the system isn't able to provide the driver with enough resources. +This call can fail if the system is not able to provide the driver with enough resources. In that situation, the driver will revert to the previous number of RX queues. If this also fails, a device reset will be triggered. .Pp @@ -231,7 +231,7 @@ .Pp This call turns down the interface while setting up the new queues, which may potentially cause any new packets to be dropped. -This call can fail if the system isn't able to provide the driver with enough resources. +This call can fail if the system is not able to provide the driver with enough resources. In that situation, the driver will revert to the previous number of TX queues. If this also fails, a device reset will be triggered. .Pp @@ -239,6 +239,31 @@ .Pp Example: To set the TX queue count to 4 for the gve0 interface, use the following command: .D1 sysctl dev.gve.0.num_tx_queues=4 +.It Va dev.gve.X.rx_ring_size +A run-time tunable that represents the current ring size for RX queues. +The default value is set to device defaults for ring size. +.Pp +This call turns down the interface while setting up the queues with the new ring size, +which may potentially cause any new packets to be dropped. +This call can fail if the system is not able to provide the driver with enough resources. +In that situation, the driver will try to revert to the previous ring size for RX queues. +If this also fails, the device will be in an unhealthy state and will need to be reloaded. +This value must be a power of 2 and within the defined range. +.Pp +Example: To set the RX ring size to 512 for the gve0 interface, use the following command: +.D1 sysctl dev.gve.0.rx_ring_size=512 +.It Va dev.gve.X.tx_ring_size +A run-time tunable that represents the current ring size for TX queues. +The default value is set to device defaults for ring size. +.Pp +This call turns down the interface while setting up the queues with the new ring size, which may potentially cause any new packets to be dropped. +This call can fail if the system is not able to provide the driver with enough resources. +In that situation, the driver will try to revert to the previous ring size for TX queues. +If this also fails, the device will be in an unhealthy state and will need to be reloaded. +This value must be a power of 2 and within the defined range. +.Pp +Example: To set the TX ring size to 1024 for the gve0 interface, use the following command: +.D1 sysctl dev.gve.0.tx_ring_size=1024 .El .Sh LIMITATIONS .Nm diff --git a/sys/dev/gve/gve.h b/sys/dev/gve/gve.h --- a/sys/dev/gve/gve.h +++ b/sys/dev/gve/gve.h @@ -63,6 +63,10 @@ */ #define GVE_QPL_DIVISOR 16 +/* Ring Size Limits */ +#define GVE_DEFAULT_MIN_RX_RING_SIZE 512 +#define GVE_DEFAULT_MIN_TX_RING_SIZE 256 + static MALLOC_DEFINE(M_GVE, "gve", "gve allocations"); struct gve_dma_handle { @@ -529,12 +533,17 @@ uint16_t num_event_counters; uint16_t default_num_queues; uint16_t tx_desc_cnt; + uint16_t max_tx_desc_cnt; + uint16_t min_tx_desc_cnt; uint16_t rx_desc_cnt; + uint16_t max_rx_desc_cnt; + uint16_t min_rx_desc_cnt; uint16_t rx_pages_per_qpl; uint64_t max_registered_pages; uint64_t num_registered_pages; uint32_t supported_features; uint16_t max_mtu; + bool modify_ringsize_enabled; struct gve_dma_handle counter_array_mem; __be32 *counters; @@ -622,7 +631,7 @@ void gve_schedule_reset(struct gve_priv *priv); int gve_adjust_tx_queues(struct gve_priv *priv, uint16_t new_queue_cnt); int gve_adjust_rx_queues(struct gve_priv *priv, uint16_t new_queue_cnt); -int gve_adjust_ring_sizes(struct gve_priv *priv, bool is_rx); +int gve_adjust_ring_sizes(struct gve_priv *priv, uint16_t new_desc_cnt, bool is_rx); /* Register access functions defined in gve_utils.c */ uint32_t gve_reg_bar_read_4(struct gve_priv *priv, bus_size_t offset); diff --git a/sys/dev/gve/gve_adminq.h b/sys/dev/gve/gve_adminq.h --- a/sys/dev/gve/gve_adminq.h +++ b/sys/dev/gve/gve_adminq.h @@ -153,13 +153,21 @@ _Static_assert(sizeof(struct gve_device_option_dqo_qpl) == 8, "gve: bad admin queue struct length"); +struct gve_ring_size_bound { + __be16 rx; + __be16 tx; +}; + +_Static_assert(sizeof(struct gve_ring_size_bound) == 4, + "gve: bad admin queue struct length"); + struct gve_device_option_modify_ring { __be32 supported_features_mask; - __be16 max_rx_ring_size; - __be16 max_tx_ring_size; + struct gve_ring_size_bound max_ring_size; + struct gve_ring_size_bound min_ring_size; }; -_Static_assert(sizeof(struct gve_device_option_modify_ring) == 8, +_Static_assert(sizeof(struct gve_device_option_modify_ring) == 12, "gve: bad admin queue struct length"); struct gve_device_option_jumbo_frames { diff --git a/sys/dev/gve/gve_adminq.c b/sys/dev/gve/gve_adminq.c --- a/sys/dev/gve/gve_adminq.c +++ b/sys/dev/gve/gve_adminq.c @@ -59,6 +59,7 @@ struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, + struct gve_device_option_modify_ring **dev_op_modify_ring, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames) { uint32_t req_feat_mask = be32toh(option->required_features_mask); @@ -121,6 +122,34 @@ *dev_op_dqo_qpl = (void *)(option + 1); break; + case GVE_DEV_OPT_ID_MODIFY_RING: + if (option_length < (sizeof(**dev_op_modify_ring) - + sizeof(struct gve_ring_size_bound)) || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING) { + device_printf(priv->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "Modify Ring", (int)sizeof(**dev_op_modify_ring), + GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_modify_ring)) { + device_printf(priv->dev, GVE_DEVICE_OPTION_TOO_BIG_FMT, + "Modify Ring"); + } + *dev_op_modify_ring = (void *)(option + 1); + + /* Min ring size included; set the minimum ring size. */ + if (option_length == sizeof(**dev_op_modify_ring)) { + priv->min_rx_desc_cnt = max( + be16toh((*dev_op_modify_ring)->min_ring_size.rx), + GVE_DEFAULT_MIN_RX_RING_SIZE); + priv->min_tx_desc_cnt = max( + be16toh((*dev_op_modify_ring)->min_ring_size.tx), + GVE_DEFAULT_MIN_TX_RING_SIZE); + } + break; + case GVE_DEV_OPT_ID_JUMBO_FRAMES: if (option_length < sizeof(**dev_op_jumbo_frames) || req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) { @@ -155,6 +184,7 @@ struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, + struct gve_device_option_modify_ring **dev_op_modify_ring, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames) { char *desc_end = (char *)descriptor + be16toh(descriptor->total_length); @@ -176,6 +206,7 @@ dev_op_gqi_qpl, dev_op_dqo_rda, dev_op_dqo_qpl, + dev_op_modify_ring, dev_op_jumbo_frames); dev_opt = (void *)((char *)(dev_opt + 1) + be16toh(dev_opt->option_length)); } @@ -390,8 +421,17 @@ static void gve_enable_supported_features(struct gve_priv *priv, uint32_t supported_features_mask, + const struct gve_device_option_modify_ring *dev_op_modify_ring, const struct gve_device_option_jumbo_frames *dev_op_jumbo_frames) { + if (dev_op_modify_ring && + (supported_features_mask & GVE_SUP_MODIFY_RING_MASK)) { + priv->modify_ringsize_enabled = true; + device_printf(priv->dev, "MODIFY RING device option enabled.\n"); + priv->max_rx_desc_cnt = be16toh(dev_op_modify_ring->max_ring_size.rx); + priv->max_tx_desc_cnt = be16toh(dev_op_modify_ring->max_ring_size.tx); + } + if (dev_op_jumbo_frames && (supported_features_mask & GVE_SUP_JUMBO_FRAMES_MASK)) { if (bootverbose) @@ -410,6 +450,7 @@ struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL; struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL; struct gve_device_option_dqo_qpl *dev_op_dqo_qpl = NULL; + struct gve_device_option_modify_ring *dev_op_modify_ring = NULL; struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL; uint32_t supported_features_mask = 0; int rc; @@ -438,10 +479,15 @@ bus_dmamap_sync(desc_mem.tag, desc_mem.map, BUS_DMASYNC_POSTREAD); + /* Default min in case device options don't have min values */ + priv->min_rx_desc_cnt = GVE_DEFAULT_MIN_RX_RING_SIZE; + priv->min_tx_desc_cnt = GVE_DEFAULT_MIN_TX_RING_SIZE; + rc = gve_process_device_options(priv, desc, &dev_op_gqi_qpl, &dev_op_dqo_rda, &dev_op_dqo_qpl, + &dev_op_modify_ring, &dev_op_jumbo_frames); if (rc != 0) goto free_device_descriptor; @@ -489,8 +535,12 @@ priv->default_num_queues = be16toh(desc->default_num_queues); priv->supported_features = supported_features_mask; + /* Default max to current in case modify ring size option is disabled */ + priv->max_rx_desc_cnt = priv->rx_desc_cnt; + priv->max_tx_desc_cnt = priv->tx_desc_cnt; + gve_enable_supported_features(priv, supported_features_mask, - dev_op_jumbo_frames); + dev_op_modify_ring, dev_op_jumbo_frames); for (i = 0; i < ETHER_ADDR_LEN; i++) priv->mac[i] = desc->mac[i]; diff --git a/sys/dev/gve/gve_main.c b/sys/dev/gve/gve_main.c --- a/sys/dev/gve/gve_main.c +++ b/sys/dev/gve/gve_main.c @@ -32,10 +32,10 @@ #include "gve_adminq.h" #include "gve_dqo.h" -#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.2\n" +#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.3\n" #define GVE_VERSION_MAJOR 1 #define GVE_VERSION_MINOR 3 -#define GVE_VERSION_SUB 2 +#define GVE_VERSION_SUB 3 #define GVE_DEFAULT_RX_COPYBREAK 256 @@ -260,6 +260,55 @@ return (err); } +int +gve_adjust_ring_sizes(struct gve_priv *priv, uint16_t new_desc_cnt, bool is_rx) +{ + int err; + uint16_t prev_desc_cnt; + + GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock); + + gve_down(priv); + + if (is_rx) { + gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues); + prev_desc_cnt = priv->rx_desc_cnt; + priv->rx_desc_cnt = new_desc_cnt; + err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues); + if (err != 0) { + device_printf(priv->dev, + "Failed to allocate rings. Trying to start back up with previous ring size."); + priv->rx_desc_cnt = prev_desc_cnt; + err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues); + } + } else { + gve_free_tx_rings(priv, 0, priv->tx_cfg.num_queues); + prev_desc_cnt = priv->tx_desc_cnt; + priv->tx_desc_cnt = new_desc_cnt; + err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues); + if (err != 0) { + device_printf(priv->dev, + "Failed to allocate rings. Trying to start back up with previous ring size."); + priv->tx_desc_cnt = prev_desc_cnt; + err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues); + } + } + + if (err != 0) { + device_printf(priv->dev, "Failed to allocate rings! Cannot start device back up!"); + gve_down(priv); + return (err); + } + + err = gve_up(priv); + if (err != 0) { + gve_schedule_reset(priv); + return (err); + } + + return (0); +} + static int gve_set_mtu(if_t ifp, uint32_t new_mtu) { diff --git a/sys/dev/gve/gve_sysctl.c b/sys/dev/gve/gve_sysctl.c --- a/sys/dev/gve/gve_sysctl.c +++ b/sys/dev/gve/gve_sysctl.c @@ -361,6 +361,83 @@ return (0); } +static int +gve_check_ring_size(struct gve_priv *priv, uint16_t val, bool is_rx) +{ + if (!powerof2(val) || val == 0) { + device_printf(priv->dev, + "Requested ring size (%u) must be a power of 2\n", val); + return (EINVAL); + } + + if (val < (is_rx ? priv->min_rx_desc_cnt : priv->min_tx_desc_cnt)) { + device_printf(priv->dev, + "Requested ring size (%u) cannot be less than %d\n", val, + (is_rx ? priv->min_rx_desc_cnt : priv->min_tx_desc_cnt)); + return (EINVAL); + } + + + if (val > (is_rx ? priv->max_rx_desc_cnt : priv->max_tx_desc_cnt)) { + device_printf(priv->dev, + "Requested ring size (%u) cannot be greater than %d\n", val, + (is_rx ? priv->max_rx_desc_cnt : priv->max_tx_desc_cnt)); + return (EINVAL); + } + + return (0); +} + +static int +gve_sysctl_tx_ring_size(SYSCTL_HANDLER_ARGS) +{ + struct gve_priv *priv = arg1; + uint16_t val; + int err; + + val = priv->tx_desc_cnt; + err = sysctl_handle_16(oidp, &val, 0, req); + if (err != 0 || req->newptr == NULL) + return (err); + + err = gve_check_ring_size(priv, val, /*is_rx=*/false); + if (err != 0) + return (err); + + if (val != priv->tx_desc_cnt) { + GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); + err = gve_adjust_ring_sizes(priv, val, /*is_rx=*/false); + GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); + } + + return (err); +} + +static int +gve_sysctl_rx_ring_size(SYSCTL_HANDLER_ARGS) +{ + struct gve_priv *priv = arg1; + uint16_t val; + int err; + + val = priv->rx_desc_cnt; + err = sysctl_handle_16(oidp, &val, 0, req); + if (err != 0 || req->newptr == NULL) + return (err); + + err = gve_check_ring_size(priv, val, /*is_rx=*/true); + if (err != 0) + return (err); + + if (val != priv->rx_desc_cnt) { + GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); + err = gve_adjust_ring_sizes(priv, val, /*is_rx=*/true); + GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); + } + + return (err); +} + static void gve_setup_sysctl_writables(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct gve_priv *priv) @@ -372,6 +449,16 @@ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "num_rx_queues", CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, gve_sysctl_num_rx_queues, "I", "Number of RX Queues."); + + if (priv->modify_ringsize_enabled) { + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_ring_size", + CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, + gve_sysctl_tx_ring_size, "I", "TX Ring Size."); + + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ring_size", + CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, + gve_sysctl_rx_ring_size, "I", "RX Ring Size."); + } } void gve_setup_sysctl(struct gve_priv *priv)