Index: head/sys/dev/ena/ena.h =================================================================== --- head/sys/dev/ena/ena.h +++ head/sys/dev/ena/ena.h @@ -463,9 +463,13 @@ uint32_t missing_tx_threshold; bool disable_meta_caching; + uint16_t eni_metrics_sample_interval; + uint16_t eni_metrics_sample_interval_cnt; + /* Statistics */ struct ena_stats_dev dev_stats; struct ena_hw_stats hw_stats; + struct ena_admin_eni_stats eni_metrics; enum ena_regs_reset_reason_types reset_reason; }; Index: head/sys/dev/ena/ena.c =================================================================== --- head/sys/dev/ena/ena.c +++ head/sys/dev/ena/ena.c @@ -172,6 +172,7 @@ static void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *); static void unimplemented_aenq_handler(void *, struct ena_admin_aenq_entry *); +static int ena_copy_eni_metrics(struct ena_adapter *); static void ena_timer_service(void *); static char ena_version[] = DEVICE_NAME DRV_MODULE_NAME " v" DRV_MODULE_VERSION; @@ -3215,6 +3216,44 @@ } } +/** + * ena_copy_eni_metrics - Get and copy ENI metrics from the HW. + * @adapter: ENA device adapter + * + * Returns 0 on success, EOPNOTSUPP if current HW doesn't support those metrics + * and other error codes on failure. + * + * This function can possibly cause a race with other calls to the admin queue. + * Because of that, the caller should either lock this function or make sure + * that there is no race in the current context. + */ +static int +ena_copy_eni_metrics(struct ena_adapter *adapter) +{ + static bool print_once = true; + int rc; + + rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_metrics); + + if (rc != 0) { + if (rc == ENA_COM_UNSUPPORTED) { + if (print_once) { + device_printf(adapter->pdev, + "Retrieving ENI metrics is not supported.\n"); + print_once = false; + } else { + ena_trace(NULL, ENA_DBG, + "Retrieving ENI metrics is not supported.\n"); + } + } else { + device_printf(adapter->pdev, + "Failed to get ENI metrics: %d\n", rc); + } + } + + return (rc); +} + static void ena_timer_service(void *data) { @@ -3229,6 +3268,38 @@ check_for_missing_completions(adapter); check_for_empty_rx_ring(adapter); + + /* + * User controller update of the ENI metrics. + * If the delay was set to 0, then the stats shouldn't be updated at + * all. + * Otherwise, wait 'eni_metrics_sample_interval' seconds, before + * updating stats. + * As timer service is executed every second, it's enough to increment + * appropriate counter each time the timer service is executed. + */ + if ((adapter->eni_metrics_sample_interval != 0) && + (++adapter->eni_metrics_sample_interval_cnt >= + adapter->eni_metrics_sample_interval)) { + /* + * There is no race with other admin queue calls, as: + * - Timer service runs after interface is up, so all + * configuration calls to the admin queue are finished. + * - After interface is up, the driver doesn't use (at least + * for now) other functions writing to the admin queue. + * + * It may change in the future, so in that situation, the lock + * will be needed. ENA_LOCK_*() cannot be used for that purpose, + * as callout ena_timer_service is protected by them. It could + * lead to the deadlock if callout_drain() would hold the lock + * before ena_copy_eni_metrics() was executed. It's advised to + * use separate lock in that situation which will be used only + * for the admin queue. + */ + (void)ena_copy_eni_metrics(adapter); + adapter->eni_metrics_sample_interval_cnt = 0; + } + if (host_info != NULL) ena_update_host_info(host_info, adapter->ifp); Index: head/sys/dev/ena/ena_sysctl.c =================================================================== --- head/sys/dev/ena/ena_sysctl.c +++ head/sys/dev/ena/ena_sysctl.c @@ -34,11 +34,16 @@ static void ena_sysctl_add_wd(struct ena_adapter *); static void ena_sysctl_add_stats(struct ena_adapter *); +static void ena_sysctl_add_eni_metrics(struct ena_adapter *); static void ena_sysctl_add_tuneables(struct ena_adapter *); static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS); static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS); static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS); +static int ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS); +/* Limit max ENI sample rate to be an hour. */ +#define ENI_METRICS_MAX_SAMPLE_INTERVAL 3600 + static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "ENA driver parameters"); @@ -69,6 +74,7 @@ { ena_sysctl_add_wd(adapter); ena_sysctl_add_stats(adapter); + ena_sysctl_add_eni_metrics(adapter); ena_sysctl_add_tuneables(adapter); } @@ -292,6 +298,58 @@ } static void +ena_sysctl_add_eni_metrics(struct ena_adapter *adapter) +{ + device_t dev; + struct ena_admin_eni_stats *eni_metrics; + + struct sysctl_ctx_list *ctx; + struct sysctl_oid *tree; + struct sysctl_oid_list *child; + + struct sysctl_oid *eni_node; + struct sysctl_oid_list *eni_list; + + dev = adapter->pdev; + + ctx = device_get_sysctl_ctx(dev); + tree = device_get_sysctl_tree(dev); + child = SYSCTL_CHILDREN(tree); + + eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics", + CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics"); + eni_list = SYSCTL_CHILDREN(eni_node); + + eni_metrics = &adapter->eni_metrics; + + SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded", + CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0, + "Inbound BW allowance exceeded"); + SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded", + CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0, + "Outbound BW allowance exceeded"); + SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded", + CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0, + "PPS allowance exceeded"); + SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded", + CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0, + "Connection tracking allowance exceeded"); + SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded", + CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0, + "Linklocal packet rate allowance exceeded"); + + /* + * Tuneable, which determines how often ENI metrics will be read. + * 0 means it's turned off. Maximum allowed value is limited by: + * ENI_METRICS_MAX_SAMPLE_INTERVAL. + */ + SYSCTL_ADD_PROC(ctx, eni_list, OID_AUTO, "sample_interval", + CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, + ena_sysctl_eni_metrics_interval, "SU", + "Interval in seconds for updating ENI emetrics. 0 turns off the update."); +} + +static void ena_sysctl_add_tuneables(struct ena_adapter *adapter) { device_t dev; @@ -336,7 +394,7 @@ error = sysctl_wire_old_buffer(req, sizeof(val)); if (error == 0) { val = adapter->buf_ring_size; - error = sysctl_handle_int(oidp, &val, 0, req); + error = sysctl_handle_32(oidp, &val, 0, req); } if (error != 0 || req->newptr == NULL) return (error); @@ -460,4 +518,41 @@ } return (error); +} + +static int +ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS) +{ + struct ena_adapter *adapter = arg1; + uint16_t interval; + int error; + + error = sysctl_wire_old_buffer(req, sizeof(interval)); + if (error == 0) { + interval = adapter->eni_metrics_sample_interval; + error = sysctl_handle_16(oidp, &interval, 0, req); + } + if (error != 0 || req->newptr == NULL) + return (error); + + if (interval > ENI_METRICS_MAX_SAMPLE_INTERVAL) { + device_printf(adapter->pdev, + "ENI metrics update interval is out of range - maximum allowed value: %d seconds\n", + ENI_METRICS_MAX_SAMPLE_INTERVAL); + return (EINVAL); + } + + if (interval == 0) { + device_printf(adapter->pdev, + "ENI metrics update is now turned off\n"); + bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics)); + } else { + device_printf(adapter->pdev, + "ENI metrics update interval is set to: %"PRIu16" seconds\n", + interval); + } + + adapter->eni_metrics_sample_interval = interval; + + return (0); }