diff --git a/sys/dev/ice/ice_controlq.c b/sys/dev/ice/ice_controlq.c index e96c7e230310..e1a6b0fb5662 100644 --- a/sys/dev/ice/ice_controlq.c +++ b/sys/dev/ice/ice_controlq.c @@ -1,1312 +1,1331 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2024, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "ice_common.h" #define ICE_CQ_INIT_REGS(qinfo, prefix) \ do { \ (qinfo)->sq.head = prefix##_ATQH; \ (qinfo)->sq.tail = prefix##_ATQT; \ (qinfo)->sq.len = prefix##_ATQLEN; \ (qinfo)->sq.bah = prefix##_ATQBAH; \ (qinfo)->sq.bal = prefix##_ATQBAL; \ (qinfo)->sq.len_mask = prefix##_ATQLEN_ATQLEN_M; \ (qinfo)->sq.len_ena_mask = prefix##_ATQLEN_ATQENABLE_M; \ (qinfo)->sq.len_crit_mask = prefix##_ATQLEN_ATQCRIT_M; \ (qinfo)->sq.head_mask = prefix##_ATQH_ATQH_M; \ (qinfo)->rq.head = prefix##_ARQH; \ (qinfo)->rq.tail = prefix##_ARQT; \ (qinfo)->rq.len = prefix##_ARQLEN; \ (qinfo)->rq.bah = prefix##_ARQBAH; \ (qinfo)->rq.bal = prefix##_ARQBAL; \ (qinfo)->rq.len_mask = prefix##_ARQLEN_ARQLEN_M; \ (qinfo)->rq.len_ena_mask = prefix##_ARQLEN_ARQENABLE_M; \ (qinfo)->rq.len_crit_mask = prefix##_ARQLEN_ARQCRIT_M; \ (qinfo)->rq.head_mask = prefix##_ARQH_ARQH_M; \ } while (0) /** * ice_adminq_init_regs - Initialize AdminQ registers * @hw: pointer to the hardware structure * * This assumes the alloc_sq and alloc_rq functions have already been called */ static void ice_adminq_init_regs(struct ice_hw *hw) { struct ice_ctl_q_info *cq = &hw->adminq; ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__); ICE_CQ_INIT_REGS(cq, PF_FW); } /** * ice_mailbox_init_regs - Initialize Mailbox registers * @hw: pointer to the hardware structure * * This assumes the alloc_sq and alloc_rq functions have already been called */ static void ice_mailbox_init_regs(struct ice_hw *hw) { struct ice_ctl_q_info *cq = &hw->mailboxq; ICE_CQ_INIT_REGS(cq, PF_MBX); } /** * ice_sb_init_regs - Initialize Sideband registers * @hw: pointer to the hardware structure * * This assumes the alloc_sq and alloc_rq functions have already been called */ static void ice_sb_init_regs(struct ice_hw *hw) { struct ice_ctl_q_info *cq = &hw->sbq; ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__); ICE_CQ_INIT_REGS(cq, PF_SB); } /** * ice_check_sq_alive * @hw: pointer to the HW struct * @cq: pointer to the specific Control queue * * Returns true if Queue is enabled else false. */ bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq) { /* check both queue-length and queue-enable fields */ if (cq->sq.len && cq->sq.len_mask && cq->sq.len_ena_mask) return (rd32(hw, cq->sq.len) & (cq->sq.len_mask | cq->sq.len_ena_mask)) == (cq->num_sq_entries | cq->sq.len_ena_mask); return false; } /** * ice_alloc_ctrlq_sq_ring - Allocate Control Transmit Queue (ATQ) rings * @hw: pointer to the hardware structure * @cq: pointer to the specific Control queue */ static int ice_alloc_ctrlq_sq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq) { size_t size = cq->num_sq_entries * sizeof(struct ice_aq_desc); cq->sq.desc_buf.va = ice_alloc_dma_mem(hw, &cq->sq.desc_buf, size); if (!cq->sq.desc_buf.va) return ICE_ERR_NO_MEMORY; return 0; } /** * ice_alloc_ctrlq_rq_ring - Allocate Control Receive Queue (ARQ) rings * @hw: pointer to the hardware structure * @cq: pointer to the specific Control queue */ static int ice_alloc_ctrlq_rq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq) { size_t size = cq->num_rq_entries * sizeof(struct ice_aq_desc); cq->rq.desc_buf.va = ice_alloc_dma_mem(hw, &cq->rq.desc_buf, size); if (!cq->rq.desc_buf.va) return ICE_ERR_NO_MEMORY; return 0; } /** * ice_free_cq_ring - Free control queue ring * @hw: pointer to the hardware structure * @ring: pointer to the specific control queue ring * * This assumes the posted buffers have already been cleaned * and de-allocated */ static void ice_free_cq_ring(struct ice_hw *hw, struct ice_ctl_q_ring *ring) { ice_free_dma_mem(hw, &ring->desc_buf); } /** * ice_alloc_rq_bufs - Allocate pre-posted buffers for the ARQ * @hw: pointer to the hardware structure * @cq: pointer to the specific Control queue */ static int ice_alloc_rq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq) { int i; /* We'll be allocating the buffer info memory first, then we can * allocate the mapped buffers for the event processing */ cq->rq.dma_head = ice_calloc(hw, cq->num_rq_entries, sizeof(cq->rq.desc_buf)); if (!cq->rq.dma_head) return ICE_ERR_NO_MEMORY; cq->rq.r.rq_bi = (struct ice_dma_mem *)cq->rq.dma_head; /* allocate the mapped buffers */ for (i = 0; i < cq->num_rq_entries; i++) { struct ice_aq_desc *desc; struct ice_dma_mem *bi; bi = &cq->rq.r.rq_bi[i]; bi->va = ice_alloc_dma_mem(hw, bi, cq->rq_buf_size); if (!bi->va) goto unwind_alloc_rq_bufs; /* now configure the descriptors for use */ desc = ICE_CTL_Q_DESC(cq->rq, i); desc->flags = CPU_TO_LE16(ICE_AQ_FLAG_BUF); if (cq->rq_buf_size > ICE_AQ_LG_BUF) desc->flags |= CPU_TO_LE16(ICE_AQ_FLAG_LB); desc->opcode = 0; /* This is in accordance with control queue design, there is no * register for buffer size configuration */ desc->datalen = CPU_TO_LE16(bi->size); desc->retval = 0; desc->cookie_high = 0; desc->cookie_low = 0; desc->params.generic.addr_high = CPU_TO_LE32(ICE_HI_DWORD(bi->pa)); desc->params.generic.addr_low = CPU_TO_LE32(ICE_LO_DWORD(bi->pa)); desc->params.generic.param0 = 0; desc->params.generic.param1 = 0; } return 0; unwind_alloc_rq_bufs: /* don't try to free the one that failed... */ i--; for (; i >= 0; i--) ice_free_dma_mem(hw, &cq->rq.r.rq_bi[i]); cq->rq.r.rq_bi = NULL; ice_free(hw, cq->rq.dma_head); cq->rq.dma_head = NULL; return ICE_ERR_NO_MEMORY; } /** * ice_alloc_sq_bufs - Allocate empty buffer structs for the ATQ * @hw: pointer to the hardware structure * @cq: pointer to the specific Control queue */ static int ice_alloc_sq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq) { int i; /* No mapped memory needed yet, just the buffer info structures */ cq->sq.dma_head = ice_calloc(hw, cq->num_sq_entries, sizeof(cq->sq.desc_buf)); if (!cq->sq.dma_head) return ICE_ERR_NO_MEMORY; cq->sq.r.sq_bi = (struct ice_dma_mem *)cq->sq.dma_head; /* allocate the mapped buffers */ for (i = 0; i < cq->num_sq_entries; i++) { struct ice_dma_mem *bi; bi = &cq->sq.r.sq_bi[i]; bi->va = ice_alloc_dma_mem(hw, bi, cq->sq_buf_size); if (!bi->va) goto unwind_alloc_sq_bufs; } return 0; unwind_alloc_sq_bufs: /* don't try to free the one that failed... */ i--; for (; i >= 0; i--) ice_free_dma_mem(hw, &cq->sq.r.sq_bi[i]); cq->sq.r.sq_bi = NULL; ice_free(hw, cq->sq.dma_head); cq->sq.dma_head = NULL; return ICE_ERR_NO_MEMORY; } static int ice_cfg_cq_regs(struct ice_hw *hw, struct ice_ctl_q_ring *ring, u16 num_entries) { /* Clear Head and Tail */ wr32(hw, ring->head, 0); wr32(hw, ring->tail, 0); /* set starting point */ wr32(hw, ring->len, (num_entries | ring->len_ena_mask)); wr32(hw, ring->bal, ICE_LO_DWORD(ring->desc_buf.pa)); wr32(hw, ring->bah, ICE_HI_DWORD(ring->desc_buf.pa)); /* Check one register to verify that config was applied */ if (rd32(hw, ring->bal) != ICE_LO_DWORD(ring->desc_buf.pa)) return ICE_ERR_AQ_ERROR; return 0; } /** * ice_cfg_sq_regs - configure Control ATQ registers * @hw: pointer to the hardware structure * @cq: pointer to the specific Control queue * * Configure base address and length registers for the transmit queue */ static int ice_cfg_sq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq) { return ice_cfg_cq_regs(hw, &cq->sq, cq->num_sq_entries); } /** * ice_cfg_rq_regs - configure Control ARQ register * @hw: pointer to the hardware structure * @cq: pointer to the specific Control queue * * Configure base address and length registers for the receive (event queue) */ static int ice_cfg_rq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq) { int status; status = ice_cfg_cq_regs(hw, &cq->rq, cq->num_rq_entries); if (status) return status; /* Update tail in the HW to post pre-allocated buffers */ wr32(hw, cq->rq.tail, (u32)(cq->num_rq_entries - 1)); return 0; } #define ICE_FREE_CQ_BUFS(hw, qi, ring) \ do { \ /* free descriptors */ \ if ((qi)->ring.r.ring##_bi) { \ int i; \ \ for (i = 0; i < (qi)->num_##ring##_entries; i++) \ if ((qi)->ring.r.ring##_bi[i].pa) \ ice_free_dma_mem((hw), \ &(qi)->ring.r.ring##_bi[i]); \ } \ /* free DMA head */ \ ice_free(hw, (qi)->ring.dma_head); \ } while (0) /** * ice_init_sq - main initialization routine for Control ATQ * @hw: pointer to the hardware structure * @cq: pointer to the specific Control queue * * This is the main initialization routine for the Control Send Queue * Prior to calling this function, the driver *MUST* set the following fields * in the cq->structure: * - cq->num_sq_entries * - cq->sq_buf_size * * Do *NOT* hold the lock when calling this as the memory allocation routines * called are not going to be atomic context safe */ static int ice_init_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq) { int ret_code; ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__); if (cq->sq.count > 0) { /* queue already initialized */ ret_code = ICE_ERR_NOT_READY; goto init_ctrlq_exit; } /* verify input for valid configuration */ if (!cq->num_sq_entries || !cq->sq_buf_size) { ret_code = ICE_ERR_CFG; goto init_ctrlq_exit; } cq->sq.next_to_use = 0; cq->sq.next_to_clean = 0; /* allocate the ring memory */ ret_code = ice_alloc_ctrlq_sq_ring(hw, cq); if (ret_code) goto init_ctrlq_exit; /* allocate buffers in the rings */ ret_code = ice_alloc_sq_bufs(hw, cq); if (ret_code) goto init_ctrlq_free_rings; /* initialize base registers */ ret_code = ice_cfg_sq_regs(hw, cq); if (ret_code) goto init_ctrlq_free_rings; /* success! */ cq->sq.count = cq->num_sq_entries; goto init_ctrlq_exit; init_ctrlq_free_rings: ICE_FREE_CQ_BUFS(hw, cq, sq); ice_free_cq_ring(hw, &cq->sq); init_ctrlq_exit: return ret_code; } /** * ice_init_rq - initialize receive side of a control queue * @hw: pointer to the hardware structure * @cq: pointer to the specific Control queue * * The main initialization routine for Receive side of a control queue. * Prior to calling this function, the driver *MUST* set the following fields * in the cq->structure: * - cq->num_rq_entries * - cq->rq_buf_size * * Do *NOT* hold the lock when calling this as the memory allocation routines * called are not going to be atomic context safe */ static int ice_init_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq) { int ret_code; ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__); if (cq->rq.count > 0) { /* queue already initialized */ ret_code = ICE_ERR_NOT_READY; goto init_ctrlq_exit; } /* verify input for valid configuration */ if (!cq->num_rq_entries || !cq->rq_buf_size) { ret_code = ICE_ERR_CFG; goto init_ctrlq_exit; } cq->rq.next_to_use = 0; cq->rq.next_to_clean = 0; /* allocate the ring memory */ ret_code = ice_alloc_ctrlq_rq_ring(hw, cq); if (ret_code) goto init_ctrlq_exit; /* allocate buffers in the rings */ ret_code = ice_alloc_rq_bufs(hw, cq); if (ret_code) goto init_ctrlq_free_rings; /* initialize base registers */ ret_code = ice_cfg_rq_regs(hw, cq); if (ret_code) goto init_ctrlq_free_rings; /* success! */ cq->rq.count = cq->num_rq_entries; goto init_ctrlq_exit; init_ctrlq_free_rings: ICE_FREE_CQ_BUFS(hw, cq, rq); ice_free_cq_ring(hw, &cq->rq); init_ctrlq_exit: return ret_code; } /** * ice_shutdown_sq - shutdown the transmit side of a control queue * @hw: pointer to the hardware structure * @cq: pointer to the specific Control queue * * The main shutdown routine for the Control Transmit Queue */ static int ice_shutdown_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq) { int ret_code = 0; ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__); ice_acquire_lock(&cq->sq_lock); if (!cq->sq.count) { ret_code = ICE_ERR_NOT_READY; goto shutdown_sq_out; } /* Stop processing of the control queue */ wr32(hw, cq->sq.head, 0); wr32(hw, cq->sq.tail, 0); wr32(hw, cq->sq.len, 0); wr32(hw, cq->sq.bal, 0); wr32(hw, cq->sq.bah, 0); cq->sq.count = 0; /* to indicate uninitialized queue */ /* free ring buffers and the ring itself */ ICE_FREE_CQ_BUFS(hw, cq, sq); ice_free_cq_ring(hw, &cq->sq); shutdown_sq_out: ice_release_lock(&cq->sq_lock); return ret_code; } /** * ice_aq_ver_check - Check the reported AQ API version * @hw: pointer to the hardware structure * * Checks if the driver should load on a given AQ API version. * * Return: 'true' iff the driver should attempt to load. 'false' otherwise. */ static bool ice_aq_ver_check(struct ice_hw *hw) { u8 exp_fw_api_ver_major = EXP_FW_API_VER_MAJOR_BY_MAC(hw); u8 exp_fw_api_ver_minor = EXP_FW_API_VER_MINOR_BY_MAC(hw); if (hw->api_maj_ver > exp_fw_api_ver_major) { /* Major API version is newer than expected, don't load */ ice_warn(hw, "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n"); return false; } else if (hw->api_maj_ver == exp_fw_api_ver_major) { if (hw->api_min_ver > (exp_fw_api_ver_minor + 2)) ice_info(hw, "The driver for the device detected a newer version (%u.%u) of the NVM image than expected (%u.%u). Please install the most recent version of the network driver.\n", hw->api_maj_ver, hw->api_min_ver, exp_fw_api_ver_major, exp_fw_api_ver_minor); else if ((hw->api_min_ver + 2) < exp_fw_api_ver_minor) ice_info(hw, "The driver for the device detected an older version (%u.%u) of the NVM image than expected (%u.%u). Please update the NVM image.\n", hw->api_maj_ver, hw->api_min_ver, exp_fw_api_ver_major, exp_fw_api_ver_minor); } else { /* Major API version is older than expected, log a warning */ ice_info(hw, "The driver for the device detected an older version (%u.%u) of the NVM image than expected (%u.%u). Please update the NVM image.\n", hw->api_maj_ver, hw->api_min_ver, exp_fw_api_ver_major, exp_fw_api_ver_minor); } return true; } /** * ice_shutdown_rq - shutdown Control ARQ * @hw: pointer to the hardware structure * @cq: pointer to the specific Control queue * * The main shutdown routine for the Control Receive Queue */ static int ice_shutdown_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq) { int ret_code = 0; ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__); ice_acquire_lock(&cq->rq_lock); if (!cq->rq.count) { ret_code = ICE_ERR_NOT_READY; goto shutdown_rq_out; } /* Stop Control Queue processing */ wr32(hw, cq->rq.head, 0); wr32(hw, cq->rq.tail, 0); wr32(hw, cq->rq.len, 0); wr32(hw, cq->rq.bal, 0); wr32(hw, cq->rq.bah, 0); /* set rq.count to 0 to indicate uninitialized queue */ cq->rq.count = 0; /* free ring buffers and the ring itself */ ICE_FREE_CQ_BUFS(hw, cq, rq); ice_free_cq_ring(hw, &cq->rq); shutdown_rq_out: ice_release_lock(&cq->rq_lock); return ret_code; } /** * ice_idle_aq - stop ARQ/ATQ processing momentarily * @hw: pointer to the hardware structure * @cq: pointer to the specific Control queue */ void ice_idle_aq(struct ice_hw *hw, struct ice_ctl_q_info *cq) { wr32(hw, cq->sq.len, 0); wr32(hw, cq->rq.len, 0); ice_msec_delay(2, false); } /** * ice_init_check_adminq - Check version for Admin Queue to know if its alive * @hw: pointer to the hardware structure */ static int ice_init_check_adminq(struct ice_hw *hw) { struct ice_ctl_q_info *cq = &hw->adminq; int status; ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__); status = ice_aq_get_fw_ver(hw, NULL); if (status) goto init_ctrlq_free_rq; if (!ice_aq_ver_check(hw)) { status = ICE_ERR_FW_API_VER; goto init_ctrlq_free_rq; } return 0; init_ctrlq_free_rq: ice_shutdown_rq(hw, cq); ice_shutdown_sq(hw, cq); return status; } /** * ice_init_ctrlq - main initialization routine for any control Queue * @hw: pointer to the hardware structure * @q_type: specific Control queue type * * Prior to calling this function, the driver *MUST* set the following fields * in the cq->structure: * - cq->num_sq_entries * - cq->num_rq_entries * - cq->rq_buf_size * - cq->sq_buf_size * * NOTE: this function does not initialize the controlq locks */ static int ice_init_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) { struct ice_ctl_q_info *cq; int ret_code; ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__); switch (q_type) { case ICE_CTL_Q_ADMIN: ice_adminq_init_regs(hw); cq = &hw->adminq; break; case ICE_CTL_Q_SB: ice_sb_init_regs(hw); cq = &hw->sbq; break; case ICE_CTL_Q_MAILBOX: ice_mailbox_init_regs(hw); cq = &hw->mailboxq; break; default: return ICE_ERR_PARAM; } cq->qtype = q_type; /* verify input for valid configuration */ if (!cq->num_rq_entries || !cq->num_sq_entries || !cq->rq_buf_size || !cq->sq_buf_size) { return ICE_ERR_CFG; } /* setup SQ command write back timeout */ cq->sq_cmd_timeout = ICE_CTL_Q_SQ_CMD_TIMEOUT; /* allocate the ATQ */ ret_code = ice_init_sq(hw, cq); if (ret_code) return ret_code; /* allocate the ARQ */ ret_code = ice_init_rq(hw, cq); if (ret_code) goto init_ctrlq_free_sq; /* success! */ return 0; init_ctrlq_free_sq: ice_shutdown_sq(hw, cq); return ret_code; } /** * ice_is_sbq_supported - is the sideband queue supported * @hw: pointer to the hardware structure * * Returns true if the sideband control queue interface is * supported for the device, false otherwise */ static bool ice_is_sbq_supported(struct ice_hw *hw) { return ice_is_generic_mac(hw); } /** * ice_shutdown_ctrlq - shutdown routine for any control queue * @hw: pointer to the hardware structure * @q_type: specific Control queue type * @unloading: is the driver unloading itself * * NOTE: this function does not destroy the control queue locks. */ static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type, bool unloading) { struct ice_ctl_q_info *cq; ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__); switch (q_type) { case ICE_CTL_Q_ADMIN: cq = &hw->adminq; if (ice_check_sq_alive(hw, cq)) ice_aq_q_shutdown(hw, unloading); break; case ICE_CTL_Q_SB: cq = &hw->sbq; break; case ICE_CTL_Q_MAILBOX: cq = &hw->mailboxq; break; default: return; } ice_shutdown_sq(hw, cq); ice_shutdown_rq(hw, cq); } /** * ice_shutdown_all_ctrlq - shutdown routine for all control queues * @hw: pointer to the hardware structure * @unloading: is the driver unloading itself * * NOTE: this function does not destroy the control queue locks. The driver * may call this at runtime to shutdown and later restart control queues, such * as in response to a reset event. */ void ice_shutdown_all_ctrlq(struct ice_hw *hw, bool unloading) { ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__); /* Shutdown FW admin queue */ ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN, unloading); /* Shutdown PHY Sideband */ if (ice_is_sbq_supported(hw)) ice_shutdown_ctrlq(hw, ICE_CTL_Q_SB, unloading); /* Shutdown PF-VF Mailbox */ ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX, unloading); } /** * ice_init_all_ctrlq - main initialization routine for all control queues * @hw: pointer to the hardware structure * * Prior to calling this function, the driver MUST* set the following fields * in the cq->structure for all control queues: * - cq->num_sq_entries * - cq->num_rq_entries * - cq->rq_buf_size * - cq->sq_buf_size * * NOTE: this function does not initialize the controlq locks. */ int ice_init_all_ctrlq(struct ice_hw *hw) { u32 retry = 0; int status; ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__); /* Init FW admin queue */ do { status = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN); if (status) return status; status = ice_init_check_adminq(hw); if (status != ICE_ERR_AQ_FW_CRITICAL) break; ice_debug(hw, ICE_DBG_AQ_MSG, "Retry Admin Queue init due to FW critical error\n"); ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN, true); ice_msec_delay(ICE_CTL_Q_ADMIN_INIT_MSEC, true); } while (retry++ < ICE_CTL_Q_ADMIN_INIT_TIMEOUT); if (status) return status; /* sideband control queue (SBQ) interface is not supported on some * devices. Initialize if supported, else fallback to the admin queue * interface */ if (ice_is_sbq_supported(hw)) { status = ice_init_ctrlq(hw, ICE_CTL_Q_SB); if (status) return status; } /* Init Mailbox queue */ return ice_init_ctrlq(hw, ICE_CTL_Q_MAILBOX); } /** * ice_init_ctrlq_locks - Initialize locks for a control queue * @cq: pointer to the control queue * * Initializes the send and receive queue locks for a given control queue. */ static void ice_init_ctrlq_locks(struct ice_ctl_q_info *cq) { ice_init_lock(&cq->sq_lock); ice_init_lock(&cq->rq_lock); } /** * ice_create_all_ctrlq - main initialization routine for all control queues * @hw: pointer to the hardware structure * * Prior to calling this function, the driver *MUST* set the following fields * in the cq->structure for all control queues: * - cq->num_sq_entries * - cq->num_rq_entries * - cq->rq_buf_size * - cq->sq_buf_size * * This function creates all the control queue locks and then calls * ice_init_all_ctrlq. It should be called once during driver load. If the * driver needs to re-initialize control queues at run time it should call * ice_init_all_ctrlq instead. */ int ice_create_all_ctrlq(struct ice_hw *hw) { ice_init_ctrlq_locks(&hw->adminq); if (ice_is_sbq_supported(hw)) ice_init_ctrlq_locks(&hw->sbq); ice_init_ctrlq_locks(&hw->mailboxq); return ice_init_all_ctrlq(hw); } /** * ice_destroy_ctrlq_locks - Destroy locks for a control queue * @cq: pointer to the control queue * * Destroys the send and receive queue locks for a given control queue. */ static void ice_destroy_ctrlq_locks(struct ice_ctl_q_info *cq) { ice_destroy_lock(&cq->sq_lock); ice_destroy_lock(&cq->rq_lock); } /** * ice_destroy_all_ctrlq - exit routine for all control queues * @hw: pointer to the hardware structure * * This function shuts down all the control queues and then destroys the * control queue locks. It should be called once during driver unload. The * driver should call ice_shutdown_all_ctrlq if it needs to shut down and * reinitialize control queues, such as in response to a reset event. */ void ice_destroy_all_ctrlq(struct ice_hw *hw) { /* shut down all the control queues first */ ice_shutdown_all_ctrlq(hw, true); ice_destroy_ctrlq_locks(&hw->adminq); if (ice_is_sbq_supported(hw)) ice_destroy_ctrlq_locks(&hw->sbq); ice_destroy_ctrlq_locks(&hw->mailboxq); } /** * ice_clean_sq - cleans send side of a control queue * @hw: pointer to the hardware structure * @cq: pointer to the specific Control queue * * returns the number of free desc */ static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq) { struct ice_ctl_q_ring *sq = &cq->sq; u16 ntc = sq->next_to_clean; struct ice_aq_desc *desc; + u32 head; desc = ICE_CTL_Q_DESC(*sq, ntc); - while (rd32(hw, cq->sq.head) != ntc) { - ice_debug(hw, ICE_DBG_AQ_MSG, "ntc %d head %d.\n", ntc, rd32(hw, cq->sq.head)); + head = rd32(hw, sq->head); + if (head >= sq->count) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "Read head value (%d) exceeds allowed range.\n", + head); + return 0; + } + + while (head != ntc) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "ntc %d head %d.\n", + ntc, head); ice_memset(desc, 0, sizeof(*desc), ICE_DMA_MEM); ntc++; if (ntc == sq->count) ntc = 0; desc = ICE_CTL_Q_DESC(*sq, ntc); + + head = rd32(hw, sq->head); + if (head >= sq->count) { + ice_debug(hw, ICE_DBG_AQ_MSG, + "Read head value (%d) exceeds allowed range.\n", + head); + return 0; + } } sq->next_to_clean = ntc; return ICE_CTL_Q_DESC_UNUSED(sq); } /** * ice_ctl_q_str - Convert control queue type to string * @qtype: the control queue type * * Returns: A string name for the given control queue type. */ static const char *ice_ctl_q_str(enum ice_ctl_q qtype) { switch (qtype) { case ICE_CTL_Q_UNKNOWN: return "Unknown CQ"; case ICE_CTL_Q_ADMIN: return "AQ"; case ICE_CTL_Q_MAILBOX: return "MBXQ"; case ICE_CTL_Q_SB: return "SBQ"; default: return "Unrecognized CQ"; } } /** * ice_debug_cq * @hw: pointer to the hardware structure * @cq: pointer to the specific Control queue * @desc: pointer to control queue descriptor * @buf: pointer to command buffer * @buf_len: max length of buf * @response: true if this is the writeback response * * Dumps debug log about control command with descriptor contents. */ static void ice_debug_cq(struct ice_hw *hw, struct ice_ctl_q_info *cq, void *desc, void *buf, u16 buf_len, bool response) { struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc; u16 datalen, flags; if (!((ICE_DBG_AQ_DESC | ICE_DBG_AQ_DESC_BUF) & hw->debug_mask)) return; if (!desc) return; datalen = LE16_TO_CPU(cq_desc->datalen); flags = LE16_TO_CPU(cq_desc->flags); ice_debug(hw, ICE_DBG_AQ_DESC, "%s %s: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n", ice_ctl_q_str(cq->qtype), response ? "Response" : "Command", LE16_TO_CPU(cq_desc->opcode), flags, datalen, LE16_TO_CPU(cq_desc->retval)); ice_debug(hw, ICE_DBG_AQ_DESC, "\tcookie (h,l) 0x%08X 0x%08X\n", LE32_TO_CPU(cq_desc->cookie_high), LE32_TO_CPU(cq_desc->cookie_low)); ice_debug(hw, ICE_DBG_AQ_DESC, "\tparam (0,1) 0x%08X 0x%08X\n", LE32_TO_CPU(cq_desc->params.generic.param0), LE32_TO_CPU(cq_desc->params.generic.param1)); ice_debug(hw, ICE_DBG_AQ_DESC, "\taddr (h,l) 0x%08X 0x%08X\n", LE32_TO_CPU(cq_desc->params.generic.addr_high), LE32_TO_CPU(cq_desc->params.generic.addr_low)); /* Dump buffer iff 1) one exists and 2) is either a response indicated * by the DD and/or CMP flag set or a command with the RD flag set. */ if (buf && cq_desc->datalen != 0 && (flags & (ICE_AQ_FLAG_DD | ICE_AQ_FLAG_CMP) || flags & ICE_AQ_FLAG_RD)) { ice_debug(hw, ICE_DBG_AQ_DESC_BUF, "Buffer:\n"); ice_debug_array(hw, ICE_DBG_AQ_DESC_BUF, 16, 1, (u8 *)buf, MIN_T(u16, buf_len, datalen)); } } /** * ice_sq_done - check if the last send on a control queue has completed * @hw: pointer to the HW struct * @cq: pointer to the specific Control queue * * Returns: true if all the descriptors on the send side of a control queue * are finished processing, false otherwise. */ bool ice_sq_done(struct ice_hw *hw, struct ice_ctl_q_info *cq) { /* control queue designers suggest use of head for better * timing reliability than DD bit */ return rd32(hw, cq->sq.head) == cq->sq.next_to_use; } /** * ice_sq_send_cmd_nolock - send command to a control queue * @hw: pointer to the HW struct * @cq: pointer to the specific Control queue * @desc: prefilled descriptor describing the command (non DMA mem) * @buf: buffer to use for indirect commands (or NULL for direct commands) * @buf_size: size of buffer for indirect commands (or 0 for direct commands) * @cd: pointer to command details structure * * This is the main send command routine for a control queue. It prepares the * command into a descriptor, bumps the send queue tail, waits for the command * to complete, captures status and data for the command, etc. */ int ice_sq_send_cmd_nolock(struct ice_hw *hw, struct ice_ctl_q_info *cq, struct ice_aq_desc *desc, void *buf, u16 buf_size, struct ice_sq_cd *cd) { struct ice_dma_mem *dma_buf = NULL; struct ice_aq_desc *desc_on_ring; bool cmd_completed = false; u32 total_delay = 0; int status = 0; u16 retval = 0; u32 val = 0; /* if reset is in progress return a soft error */ if (hw->reset_ongoing) return ICE_ERR_RESET_ONGOING; cq->sq_last_status = ICE_AQ_RC_OK; if (!cq->sq.count) { ice_debug(hw, ICE_DBG_AQ_MSG, "Control Send queue not initialized.\n"); status = ICE_ERR_AQ_EMPTY; goto sq_send_command_error; } if ((buf && !buf_size) || (!buf && buf_size)) { status = ICE_ERR_PARAM; goto sq_send_command_error; } if (buf) { if (buf_size > cq->sq_buf_size) { ice_debug(hw, ICE_DBG_AQ_MSG, "Invalid buffer size for Control Send queue: %d.\n", buf_size); status = ICE_ERR_INVAL_SIZE; goto sq_send_command_error; } desc->flags |= CPU_TO_LE16(ICE_AQ_FLAG_BUF); if (buf_size > ICE_AQ_LG_BUF) desc->flags |= CPU_TO_LE16(ICE_AQ_FLAG_LB); } val = rd32(hw, cq->sq.head); if (val >= cq->num_sq_entries) { ice_debug(hw, ICE_DBG_AQ_MSG, "head overrun at %d in the Control Send Queue ring\n", val); status = ICE_ERR_AQ_EMPTY; goto sq_send_command_error; } /* Call clean and check queue available function to reclaim the * descriptors that were processed by FW/MBX; the function returns the * number of desc available. The clean function called here could be * called in a separate thread in case of asynchronous completions. */ if (ice_clean_sq(hw, cq) == 0) { ice_debug(hw, ICE_DBG_AQ_MSG, "Error: Control Send Queue is full.\n"); status = ICE_ERR_AQ_FULL; goto sq_send_command_error; } /* initialize the temp desc pointer with the right desc */ desc_on_ring = ICE_CTL_Q_DESC(cq->sq, cq->sq.next_to_use); /* if the desc is available copy the temp desc to the right place */ ice_memcpy(desc_on_ring, desc, sizeof(*desc_on_ring), ICE_NONDMA_TO_DMA); /* if buf is not NULL assume indirect command */ if (buf) { dma_buf = &cq->sq.r.sq_bi[cq->sq.next_to_use]; /* copy the user buf into the respective DMA buf */ ice_memcpy(dma_buf->va, buf, buf_size, ICE_NONDMA_TO_DMA); desc_on_ring->datalen = CPU_TO_LE16(buf_size); /* Update the address values in the desc with the pa value * for respective buffer */ desc_on_ring->params.generic.addr_high = CPU_TO_LE32(ICE_HI_DWORD(dma_buf->pa)); desc_on_ring->params.generic.addr_low = CPU_TO_LE32(ICE_LO_DWORD(dma_buf->pa)); } /* Debug desc and buffer */ ice_debug(hw, ICE_DBG_AQ_DESC, "ATQ: Control Send queue desc and buffer:\n"); ice_debug_cq(hw, cq, (void *)desc_on_ring, buf, buf_size, false); (cq->sq.next_to_use)++; if (cq->sq.next_to_use == cq->sq.count) cq->sq.next_to_use = 0; wr32(hw, cq->sq.tail, cq->sq.next_to_use); ice_flush(hw); /* Wait a short time before initial ice_sq_done() check, to allow * hardware time for completion. */ ice_usec_delay(5, false); do { if (ice_sq_done(hw, cq)) break; ice_usec_delay(10, false); total_delay++; } while (total_delay < cq->sq_cmd_timeout); /* if ready, copy the desc back to temp */ if (ice_sq_done(hw, cq)) { ice_memcpy(desc, desc_on_ring, sizeof(*desc), ICE_DMA_TO_NONDMA); if (buf) { /* get returned length to copy */ u16 copy_size = LE16_TO_CPU(desc->datalen); if (copy_size > buf_size) { ice_debug(hw, ICE_DBG_AQ_MSG, "Return len %d > than buf len %d\n", copy_size, buf_size); status = ICE_ERR_AQ_ERROR; } else { ice_memcpy(buf, dma_buf->va, copy_size, ICE_DMA_TO_NONDMA); } } retval = LE16_TO_CPU(desc->retval); if (retval) { ice_debug(hw, ICE_DBG_AQ_MSG, "Control Send Queue command 0x%04X completed with error 0x%X\n", LE16_TO_CPU(desc->opcode), retval); /* strip off FW internal code */ retval &= 0xff; } cmd_completed = true; if (!status && retval != ICE_AQ_RC_OK) status = ICE_ERR_AQ_ERROR; cq->sq_last_status = (enum ice_aq_err)retval; } ice_debug(hw, ICE_DBG_AQ_MSG, "ATQ: desc and buffer writeback:\n"); ice_debug_cq(hw, cq, (void *)desc, buf, buf_size, true); /* save writeback AQ if requested */ if (cd && cd->wb_desc) ice_memcpy(cd->wb_desc, desc_on_ring, sizeof(*cd->wb_desc), ICE_DMA_TO_NONDMA); /* update the error if time out occurred */ if (!cmd_completed) { if (rd32(hw, cq->rq.len) & cq->rq.len_crit_mask || rd32(hw, cq->sq.len) & cq->sq.len_crit_mask) { ice_debug(hw, ICE_DBG_AQ_MSG, "Critical FW error.\n"); status = ICE_ERR_AQ_FW_CRITICAL; } else { ice_debug(hw, ICE_DBG_AQ_MSG, "Control Send Queue Writeback timeout.\n"); status = ICE_ERR_AQ_TIMEOUT; } } sq_send_command_error: return status; } /** * ice_sq_send_cmd - send command to a control queue * @hw: pointer to the HW struct * @cq: pointer to the specific Control queue * @desc: prefilled descriptor describing the command * @buf: buffer to use for indirect commands (or NULL for direct commands) * @buf_size: size of buffer for indirect commands (or 0 for direct commands) * @cd: pointer to command details structure * * Main command for the transmit side of a control queue. It puts the command * on the queue, bumps the tail, waits for processing of the command, captures * command status and results, etc. */ int ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, struct ice_aq_desc *desc, void *buf, u16 buf_size, struct ice_sq_cd *cd) { int status = 0; /* if reset is in progress return a soft error */ if (hw->reset_ongoing) return ICE_ERR_RESET_ONGOING; ice_acquire_lock(&cq->sq_lock); status = ice_sq_send_cmd_nolock(hw, cq, desc, buf, buf_size, cd); ice_release_lock(&cq->sq_lock); return status; } /** * ice_fill_dflt_direct_cmd_desc - AQ descriptor helper function * @desc: pointer to the temp descriptor (non DMA mem) * @opcode: the opcode can be used to decide which flags to turn off or on * * Fill the desc with default values */ void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode) { /* zero out the desc */ ice_memset(desc, 0, sizeof(*desc), ICE_NONDMA_MEM); desc->opcode = CPU_TO_LE16(opcode); desc->flags = CPU_TO_LE16(ICE_AQ_FLAG_SI); } /** * ice_clean_rq_elem * @hw: pointer to the HW struct * @cq: pointer to the specific Control queue * @e: event info from the receive descriptor, includes any buffers * @pending: number of events that could be left to process * * Clean one element from the receive side of a control queue. On return 'e' * contains contents of the message, and 'pending' contains the number of * events left to process. */ int ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, struct ice_rq_event_info *e, u16 *pending) { u16 ntc = cq->rq.next_to_clean; enum ice_aq_err rq_last_status; struct ice_aq_desc *desc; struct ice_dma_mem *bi; int ret_code = 0; u16 desc_idx; u16 datalen; u16 flags; u16 ntu; /* pre-clean the event info */ ice_memset(&e->desc, 0, sizeof(e->desc), ICE_NONDMA_MEM); /* take the lock before we start messing with the ring */ ice_acquire_lock(&cq->rq_lock); if (!cq->rq.count) { ice_debug(hw, ICE_DBG_AQ_MSG, "Control Receive queue not initialized.\n"); ret_code = ICE_ERR_AQ_EMPTY; goto clean_rq_elem_err; } /* set next_to_use to head */ ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask); if (ntu == ntc) { /* nothing to do - shouldn't need to update ring's values */ ret_code = ICE_ERR_AQ_NO_WORK; goto clean_rq_elem_out; } /* now clean the next descriptor */ desc = ICE_CTL_Q_DESC(cq->rq, ntc); desc_idx = ntc; rq_last_status = (enum ice_aq_err)LE16_TO_CPU(desc->retval); flags = LE16_TO_CPU(desc->flags); if (flags & ICE_AQ_FLAG_ERR) { ret_code = ICE_ERR_AQ_ERROR; ice_debug(hw, ICE_DBG_AQ_MSG, "Control Receive Queue Event 0x%04X received with error 0x%X\n", LE16_TO_CPU(desc->opcode), rq_last_status); } ice_memcpy(&e->desc, desc, sizeof(e->desc), ICE_DMA_TO_NONDMA); datalen = LE16_TO_CPU(desc->datalen); e->msg_len = MIN_T(u16, datalen, e->buf_len); if (e->msg_buf && e->msg_len) ice_memcpy(e->msg_buf, cq->rq.r.rq_bi[desc_idx].va, e->msg_len, ICE_DMA_TO_NONDMA); ice_debug(hw, ICE_DBG_AQ_DESC, "ARQ: desc and buffer:\n"); ice_debug_cq(hw, cq, (void *)desc, e->msg_buf, cq->rq_buf_size, true); /* Restore the original datalen and buffer address in the desc, * FW updates datalen to indicate the event message size */ bi = &cq->rq.r.rq_bi[ntc]; ice_memset(desc, 0, sizeof(*desc), ICE_DMA_MEM); desc->flags = CPU_TO_LE16(ICE_AQ_FLAG_BUF); if (cq->rq_buf_size > ICE_AQ_LG_BUF) desc->flags |= CPU_TO_LE16(ICE_AQ_FLAG_LB); desc->datalen = CPU_TO_LE16(bi->size); desc->params.generic.addr_high = CPU_TO_LE32(ICE_HI_DWORD(bi->pa)); desc->params.generic.addr_low = CPU_TO_LE32(ICE_LO_DWORD(bi->pa)); /* set tail = the last cleaned desc index. */ wr32(hw, cq->rq.tail, ntc); /* ntc is updated to tail + 1 */ ntc++; if (ntc == cq->num_rq_entries) ntc = 0; cq->rq.next_to_clean = ntc; cq->rq.next_to_use = ntu; clean_rq_elem_out: /* Set pending if needed, unlock and return */ if (pending) { /* re-read HW head to calculate actual pending messages */ ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask); *pending = (u16)((ntc > ntu ? cq->rq.count : 0) + (ntu - ntc)); } clean_rq_elem_err: ice_release_lock(&cq->rq_lock); return ret_code; } diff --git a/sys/dev/ice/ice_drv_info.h b/sys/dev/ice/ice_drv_info.h index 22e23ee53491..80b517bb5b08 100644 --- a/sys/dev/ice/ice_drv_info.h +++ b/sys/dev/ice/ice_drv_info.h @@ -1,234 +1,234 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2024, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** * @file ice_drv_info.h * @brief device IDs and driver version * * Contains the device IDs tables and the driver version string. * * This file contains static or constant definitions intended to be included * exactly once in the main driver interface file. It implicitly depends on * the main driver header file. * * These definitions could be placed directly in the interface file, but are * kept separate for organizational purposes. */ /** * @var ice_driver_version * @brief driver version string * * Driver version information, used for display as part of an informational * sysctl, and as part of the driver information sent to the firmware at load. * * @var ice_major_version * @brief driver major version number * * @var ice_minor_version * @brief driver minor version number * * @var ice_patch_version * @brief driver patch version number * * @var ice_rc_version * @brief driver release candidate version number */ -const char ice_driver_version[] = "1.42.5-k"; +const char ice_driver_version[] = "1.43.2-k"; const uint8_t ice_major_version = 1; -const uint8_t ice_minor_version = 42; -const uint8_t ice_patch_version = 5; +const uint8_t ice_minor_version = 43; +const uint8_t ice_patch_version = 2; const uint8_t ice_rc_version = 0; #define PVIDV(vendor, devid, name) \ - PVID(vendor, devid, name " - 1.42.5-k") + PVID(vendor, devid, name " - 1.43.2-k") #define PVIDV_OEM(vendor, devid, svid, sdevid, revid, name) \ - PVID_OEM(vendor, devid, svid, sdevid, revid, name " - 1.42.5-k") + PVID_OEM(vendor, devid, svid, sdevid, revid, name " - 1.43.2-k") /** * @var ice_vendor_info_array * @brief array of PCI devices supported by this driver * * Array of PCI devices which are supported by this driver. Used to determine * whether a given device should be loaded by this driver. This information is * also exported as part of the module information for other tools to analyze. * * @remark Each type of device ID needs to be listed from most-specific entry * to most-generic entry; e.g. PVIDV_OEM()s for a device ID must come before * the PVIDV() for it. */ static const pci_vendor_info_t ice_vendor_info_array[] = { PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_BACKPLANE, "Intel(R) Ethernet Controller E810-C for backplane"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_QSFP, ICE_INTEL_VENDOR_ID, 0x0001, 0, "Intel(R) Ethernet Network Adapter E810-C-Q1"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_QSFP, ICE_INTEL_VENDOR_ID, 0x0002, 0, "Intel(R) Ethernet Network Adapter E810-C-Q2"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_QSFP, ICE_INTEL_VENDOR_ID, 0x0003, 0, "Intel(R) Ethernet Network Adapter E810-C-Q1"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_QSFP, ICE_INTEL_VENDOR_ID, 0x0004, 0, "Intel(R) Ethernet Network Adapter E810-C-Q2"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_QSFP, ICE_INTEL_VENDOR_ID, 0x0005, 0, "Intel(R) Ethernet Network Adapter E810-C-Q1 for OCP3.0"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_QSFP, ICE_INTEL_VENDOR_ID, 0x0006, 0, "Intel(R) Ethernet Network Adapter E810-C-Q2 for OCP3.0"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_QSFP, ICE_INTEL_VENDOR_ID, 0x0007, 0, "Intel(R) Ethernet Network Adapter E810-C-Q1 for OCP3.0"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_QSFP, ICE_INTEL_VENDOR_ID, 0x0008, 0, "Intel(R) Ethernet Network Adapter E810-C-Q2 for OCP3.0"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_QSFP, ICE_INTEL_VENDOR_ID, 0x000D, 0, "Intel(R) Ethernet Network Adapter E810-L-Q2 for OCP3.0"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_QSFP, ICE_INTEL_VENDOR_ID, 0x000E, 0, "Intel(R) Ethernet Network Adapter E810-2C-Q2"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_QSFP, "Intel(R) Ethernet Controller E810-C for QSFP"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_SFP, ICE_INTEL_VENDOR_ID, 0x0005, 0, "Intel(R) Ethernet Network Adapter E810-XXV-4"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_SFP, ICE_INTEL_VENDOR_ID, 0x0006, 0, "Intel(R) Ethernet Network Adapter E810-XXV-4"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_SFP, ICE_INTEL_VENDOR_ID, 0x0007, 0, "Intel(R) Ethernet Network Adapter E810-XXV-4"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_SFP, ICE_INTEL_VENDOR_ID, 0x000C, 0, "Intel(R) Ethernet Network Adapter E810-XXV-4 for OCP 3.0"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810C_SFP, "Intel(R) Ethernet Controller E810-C for SFP"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E822C_BACKPLANE, "Intel(R) Ethernet Connection E822-C for backplane"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E822C_QSFP, "Intel(R) Ethernet Connection E822-C for QSFP"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E822C_SFP, "Intel(R) Ethernet Connection E822-C for SFP"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E822C_10G_BASE_T, "Intel(R) Ethernet Connection E822-C/X557-AT 10GBASE-T"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E822C_SGMII, "Intel(R) Ethernet Connection E822-C 1GbE"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E822L_BACKPLANE, "Intel(R) Ethernet Connection E822-L for backplane"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E822L_SFP, "Intel(R) Ethernet Connection E822-L for SFP"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E822L_10G_BASE_T, "Intel(R) Ethernet Connection E822-L/X557-AT 10GBASE-T"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E822L_SGMII, "Intel(R) Ethernet Connection E822-L 1GbE"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E823L_BACKPLANE, "Intel(R) Ethernet Connection E823-L for backplane"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E823L_SFP, "Intel(R) Ethernet Connection E823-L for SFP"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E823L_QSFP, "Intel(R) Ethernet Connection E823-L for QSFP"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E823L_10G_BASE_T, "Intel(R) Ethernet Connection E823-L/X557-AT 10GBASE-T"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E823L_1GBE, "Intel(R) Ethernet Connection E823-L 1GbE"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E823C_BACKPLANE, "Intel(R) Ethernet Connection E823-C for backplane"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E823C_QSFP, "Intel(R) Ethernet Connection E823-C for QSFP"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E823C_SFP, "Intel(R) Ethernet Connection E823-C for SFP"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E823C_10G_BASE_T, "Intel(R) Ethernet Connection E823-C/X557-AT 10GBASE-T"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E823C_SGMII, "Intel(R) Ethernet Connection E823-C 1GbE"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810_XXV_BACKPLANE, "Intel(R) Ethernet Controller E810-XXV for backplane"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810_XXV_QSFP, "Intel(R) Ethernet Controller E810-XXV for QSFP"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810_XXV_SFP, ICE_INTEL_VENDOR_ID, 0x0003, 0, "Intel(R) Ethernet Network Adapter E810-XXV-2"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810_XXV_SFP, ICE_INTEL_VENDOR_ID, 0x0004, 0, "Intel(R) Ethernet Network Adapter E810-XXV-2"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810_XXV_SFP, ICE_INTEL_VENDOR_ID, 0x0005, 0, "Intel(R) Ethernet Network Adapter E810-XXV-2 for OCP 3.0"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810_XXV_SFP, ICE_INTEL_VENDOR_ID, 0x0006, 0, "Intel(R) Ethernet Network Adapter E810-XXV-2 for OCP 3.0"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E810_XXV_SFP, "Intel(R) Ethernet Controller E810-XXV for SFP"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830_BACKPLANE, "Intel(R) Ethernet Connection E830-CC for backplane"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830_QSFP56, ICE_INTEL_VENDOR_ID, 0x0002, 0, "Intel(R) Ethernet Network Adapter E830-C-Q2 for OCP 3.0"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830_QSFP56, ICE_INTEL_VENDOR_ID, 0x0004, 0, "Intel(R) Ethernet Network Adapter E830-CC-Q1 for OCP 3.0"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830_QSFP56, "Intel(R) Ethernet Connection E830-CC for QSFP56"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830_SFP, ICE_INTEL_VENDOR_ID, 0x0001, 0, "Intel(R) Ethernet Network Adapter E830-XXV-2 for OCP 3.0"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830_SFP, ICE_INTEL_VENDOR_ID, 0x0003, 0, "Intel(R) Ethernet Network Adapter E830-XXV-2"), PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830_SFP, ICE_INTEL_VENDOR_ID, 0x0004, 0, "Intel(R) Ethernet Network Adapter E830-XXV-4 for OCP 3.0"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830_SFP, "Intel(R) Ethernet Connection E830-CC for SFP"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830C_BACKPLANE, "Intel(R) Ethernet Connection E830-C for backplane"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830C_QSFP, "Intel(R) Ethernet Connection E830-C for QSFP"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830C_SFP, "Intel(R) Ethernet Connection E830-C for SFP"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830_L_BACKPLANE, "Intel(R) Ethernet Connection E830-L for backplane"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830_L_QSFP, "Intel(R) Ethernet Connection E830-L for QSFP"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830_L_SFP, "Intel(R) Ethernet Connection E830-L for SFP"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E825C_BACKPLANE, "Intel(R) Ethernet Connection E825-C for backplane"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E825C_QSFP, "Intel(R) Ethernet Connection E825-C for QSFP"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E825C_SFP, "Intel(R) Ethernet Connection E825-C for SFP"), PVID_END }; diff --git a/sys/dev/ice/ice_lib.c b/sys/dev/ice/ice_lib.c index d2d13cd2db60..d44ae5f37750 100644 --- a/sys/dev/ice/ice_lib.c +++ b/sys/dev/ice/ice_lib.c @@ -1,12064 +1,12079 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2024, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** * @file ice_lib.c * @brief Generic device setup and sysctl functions * * Library of generic device functions not specific to the networking stack. * * This includes hardware initialization functions, as well as handlers for * many of the device sysctls used to probe driver status or tune specific * behaviors. */ #include "ice_lib.h" #include "ice_iflib.h" #include #include #include #include #include #include #include /** * @var M_ICE * @brief main ice driver allocation type * * malloc(9) allocation type used by the majority of memory allocations in the * ice driver. */ MALLOC_DEFINE(M_ICE, "ice", "Intel(R) 100Gb Network Driver lib allocations"); /* * Helper function prototypes */ static int ice_get_next_vsi(struct ice_vsi **all_vsi, int size); static void ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx); static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type); static int ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx); static int ice_setup_tx_ctx(struct ice_tx_queue *txq, struct ice_tlan_ctx *tlan_ctx, u16 pf_q); static int ice_setup_rx_ctx(struct ice_rx_queue *rxq); static int ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg); static void ice_free_fltr_list(struct ice_list_head *list); static int ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list, const u8 *addr, enum ice_sw_fwd_act_type action); static void ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname, struct ice_ctl_q_info *cq); static void ice_process_link_event(struct ice_softc *sc, struct ice_rq_event_info *e); static void ice_process_ctrlq_event(struct ice_softc *sc, const char *qname, struct ice_rq_event_info *event); static void ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf); static void ice_update_port_oversize(struct ice_softc *sc, u64 rx_errors); static void ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf); static void ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf); static bool ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info); static u_int ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl, u_int errors); static void ice_add_debug_tunables(struct ice_softc *sc); static void ice_add_debug_sysctls(struct ice_softc *sc); static void ice_vsi_set_rss_params(struct ice_vsi *vsi); static void ice_get_default_rss_key(u8 *seed); static int ice_set_rss_key(struct ice_vsi *vsi); static int ice_set_rss_lut(struct ice_vsi *vsi); static void ice_set_rss_flow_flds(struct ice_vsi *vsi); static void ice_clean_vsi_rss_cfg(struct ice_vsi *vsi); static const char *ice_aq_speed_to_str(struct ice_port_info *pi); static const char *ice_requested_fec_mode(struct ice_port_info *pi); static const char *ice_negotiated_fec_mode(struct ice_port_info *pi); static const char *ice_autoneg_mode(struct ice_port_info *pi); static const char *ice_flowcontrol_mode(struct ice_port_info *pi); static void ice_print_bus_link_data(device_t dev, struct ice_hw *hw); static void ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status); static uint8_t ice_pcie_bandwidth_check(struct ice_softc *sc); static uint64_t ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed); static int ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width); static uint64_t ice_phy_types_to_max_rate(struct ice_port_info *pi); static void ice_add_sysctls_sw_stats(struct ice_vsi *vsi, struct sysctl_ctx_list *ctx, struct sysctl_oid *parent); static void ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent_list, u64* pfc_stat_location, const char *node_name, const char *descr); static void ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid *parent, struct ice_hw_port_stats *stats); static void ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi, enum ice_vsi_type type, int idx, bool dynamic); static void ice_handle_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event); static void ice_handle_lan_overflow_event(struct ice_softc *sc, struct ice_rq_event_info *event); static int ice_add_ethertype_to_list(struct ice_vsi *vsi, struct ice_list_head *list, u16 ethertype, u16 direction, enum ice_sw_fwd_act_type action); static void ice_del_rx_lldp_filter(struct ice_softc *sc); static u16 ice_aq_phy_types_to_link_speeds(u64 phy_type_low, u64 phy_type_high); struct ice_phy_data; static int ice_intersect_phy_types_and_speeds(struct ice_softc *sc, struct ice_phy_data *phy_data); static int ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc, struct ice_aqc_set_phy_cfg_data *cfg); static int ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc, struct ice_aqc_set_phy_cfg_data *cfg); static void ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg); static void ice_print_ldo_tlv(struct ice_softc *sc, struct ice_link_default_override_tlv *tlv); static void ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low, u64 *phy_type_high); static u16 ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type); static void ice_handle_health_status_event(struct ice_softc *sc, struct ice_rq_event_info *event); static void ice_print_health_status_string(device_t dev, struct ice_aqc_health_status_elem *elem); static void ice_debug_print_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event); static bool ice_check_ets_bw(u8 *table); static u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg); static bool ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg, struct ice_dcbx_cfg *new_cfg); static void ice_dcb_recfg(struct ice_softc *sc); static u8 ice_dcb_tc_contig(u8 tc_map); static int ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit); static int ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map); static void ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name, struct ice_dcb_ets_cfg *ets); static void ice_stop_pf_vsi(struct ice_softc *sc); static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt); static int ice_config_pfc(struct ice_softc *sc, u8 new_mode); void ice_add_dscp2tc_map_sysctls(struct ice_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *ctx_list); static void ice_set_default_local_mib_settings(struct ice_softc *sc); static bool ice_dscp_is_mapped(struct ice_dcbx_cfg *dcbcfg); static void ice_start_dcbx_agent(struct ice_softc *sc); static u16 ice_fw_debug_dump_print_cluster(struct ice_softc *sc, struct sbuf *sbuf, u16 cluster_id); static void ice_fw_debug_dump_print_clusters(struct ice_softc *sc, struct sbuf *sbuf); static void ice_remove_vsi_mirroring(struct ice_vsi *vsi); static int ice_get_tx_rx_equalizations(struct ice_hw *hw, u8 serdes_num, struct ice_serdes_equalization *ptr); static int ice_fec_counter_read(struct ice_hw *hw, u32 receiver_id, u32 reg_offset, u16 *output); static int ice_get_port_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port, struct ice_fec_stats_to_sysctl *fec_stats); static bool ice_is_serdes_muxed(struct ice_hw *hw); static int ice_get_maxspeed(struct ice_hw *hw, u8 lport, u8 *max_speed); static int ice_update_port_topology(u8 lport, struct ice_port_topology *port_topology, bool is_muxed); static int ice_get_port_topology(struct ice_hw *hw, u8 lport, struct ice_port_topology *port_topology); static int ice_module_init(void); static int ice_module_exit(void); /* * package version comparison functions */ static bool pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name); static int pkg_ver_compatible(struct ice_pkg_ver *pkg_ver); /* * dynamic sysctl handlers */ static int ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS); static int ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS); static int ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS); static int ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS); static int ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS); static int ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS); static int ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS); static int ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS); static int ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS); static int ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS); static int ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS); static int ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS); static int ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS); static int ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS); static int ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS); static int ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS); static int __ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS, bool is_phy_type_high); static int ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS); static int ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS); static int ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS); static int ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS); static int ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS); static int ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS); static int ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode); static int ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS); static int ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS); static int ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS); static int ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS); static int ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS); static int ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS); static int ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS); static int ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS); static int ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS); static int ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS); static int ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS); static int ice_sysctl_dump_phy_stats(SYSCTL_HANDLER_ARGS); static int ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS); static int ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS); static int ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS); static int ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS); static int ice_sysctl_dscp2tc_map(SYSCTL_HANDLER_ARGS); static int ice_sysctl_pfc_mode(SYSCTL_HANDLER_ARGS); static int ice_sysctl_fw_debug_dump_cluster_setting(SYSCTL_HANDLER_ARGS); static int ice_sysctl_fw_debug_dump_do_dump(SYSCTL_HANDLER_ARGS); static int ice_sysctl_allow_no_fec_mod_in_auto(SYSCTL_HANDLER_ARGS); static int ice_sysctl_set_link_active(SYSCTL_HANDLER_ARGS); static int ice_sysctl_debug_set_link(SYSCTL_HANDLER_ARGS); static int ice_sysctl_temperature(SYSCTL_HANDLER_ARGS); static int ice_sysctl_create_mirror_interface(SYSCTL_HANDLER_ARGS); static int ice_sysctl_destroy_mirror_interface(SYSCTL_HANDLER_ARGS); /** * ice_map_bar - Map PCIe BAR memory * @dev: the PCIe device * @bar: the BAR info structure * @bar_num: PCIe BAR number * * Maps the specified PCIe BAR. Stores the mapping data in struct * ice_bar_info. */ int ice_map_bar(device_t dev, struct ice_bar_info *bar, int bar_num) { if (bar->res != NULL) { device_printf(dev, "PCI BAR%d already mapped\n", bar_num); return (EDOOFUS); } bar->rid = PCIR_BAR(bar_num); bar->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar->rid, RF_ACTIVE); if (!bar->res) { device_printf(dev, "PCI BAR%d mapping failed\n", bar_num); return (ENXIO); } bar->tag = rman_get_bustag(bar->res); bar->handle = rman_get_bushandle(bar->res); bar->size = rman_get_size(bar->res); return (0); } /** * ice_free_bar - Free PCIe BAR memory * @dev: the PCIe device * @bar: the BAR info structure * * Frees the specified PCIe BAR, releasing its resources. */ void ice_free_bar(device_t dev, struct ice_bar_info *bar) { if (bar->res != NULL) bus_release_resource(dev, SYS_RES_MEMORY, bar->rid, bar->res); bar->res = NULL; } /** * ice_set_ctrlq_len - Configure ctrlq lengths for a device * @hw: the device hardware structure * * Configures the control queues for the given device, setting up the * specified lengths, prior to initializing hardware. */ void ice_set_ctrlq_len(struct ice_hw *hw) { hw->adminq.num_rq_entries = ICE_AQ_LEN; hw->adminq.num_sq_entries = ICE_AQ_LEN; hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN; hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN; hw->mailboxq.num_rq_entries = ICE_MBXQ_LEN; hw->mailboxq.num_sq_entries = ICE_MBXQ_LEN; hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN; hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN; hw->sbq.num_rq_entries = ICE_SBQ_LEN; hw->sbq.num_sq_entries = ICE_SBQ_LEN; hw->sbq.rq_buf_size = ICE_SBQ_MAX_BUF_LEN; hw->sbq.sq_buf_size = ICE_SBQ_MAX_BUF_LEN; } /** * ice_get_next_vsi - Get the next available VSI slot * @all_vsi: the VSI list * @size: the size of the VSI list * * Returns the index to the first available VSI slot. Will return size (one * past the last index) if there are no slots available. */ static int ice_get_next_vsi(struct ice_vsi **all_vsi, int size) { int i; for (i = 0; i < size; i++) { if (all_vsi[i] == NULL) return i; } return size; } /** * ice_setup_vsi_common - Common VSI setup for both dynamic and static VSIs * @sc: the device private softc structure * @vsi: the VSI to setup * @type: the VSI type of the new VSI * @idx: the index in the all_vsi array to use * @dynamic: whether this VSI memory was dynamically allocated * * Perform setup for a VSI that is common to both dynamically allocated VSIs * and the static PF VSI which is embedded in the softc structure. */ static void ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi, enum ice_vsi_type type, int idx, bool dynamic) { /* Store important values in VSI struct */ vsi->type = type; vsi->sc = sc; vsi->idx = idx; sc->all_vsi[idx] = vsi; vsi->dynamic = dynamic; /* Set default mirroring rule information */ vsi->rule_mir_ingress = ICE_INVAL_MIRROR_RULE_ID; vsi->rule_mir_egress = ICE_INVAL_MIRROR_RULE_ID; /* Setup the VSI tunables now */ ice_add_vsi_tunables(vsi, sc->vsi_sysctls); } /** * ice_alloc_vsi - Allocate a dynamic VSI * @sc: device softc structure * @type: VSI type * * Allocates a new dynamic VSI structure and inserts it into the VSI list. */ struct ice_vsi * ice_alloc_vsi(struct ice_softc *sc, enum ice_vsi_type type) { struct ice_vsi *vsi; int idx; /* Find an open index for a new VSI to be allocated. If the returned * index is >= the num_available_vsi then it means no slot is * available. */ idx = ice_get_next_vsi(sc->all_vsi, sc->num_available_vsi); if (idx >= sc->num_available_vsi) { device_printf(sc->dev, "No available VSI slots\n"); return NULL; } vsi = (struct ice_vsi *)malloc(sizeof(*vsi), M_ICE, M_NOWAIT | M_ZERO); if (!vsi) { device_printf(sc->dev, "Unable to allocate VSI memory\n"); return NULL; } ice_setup_vsi_common(sc, vsi, type, idx, true); return vsi; } /** * ice_setup_pf_vsi - Setup the PF VSI * @sc: the device private softc * * Setup the PF VSI structure which is embedded as sc->pf_vsi in the device * private softc. Unlike other VSIs, the PF VSI memory is allocated as part of * the softc memory, instead of being dynamically allocated at creation. */ void ice_setup_pf_vsi(struct ice_softc *sc) { ice_setup_vsi_common(sc, &sc->pf_vsi, ICE_VSI_PF, 0, false); } /** * ice_alloc_vsi_qmap * @vsi: VSI structure * @max_tx_queues: Number of transmit queues to identify * @max_rx_queues: Number of receive queues to identify * * Allocates a max_[t|r]x_queues array of words for the VSI where each * word contains the index of the queue it represents. In here, all * words are initialized to an index of ICE_INVALID_RES_IDX, indicating * all queues for this VSI are not yet assigned an index and thus, * not ready for use. * */ void ice_alloc_vsi_qmap(struct ice_vsi *vsi, const int max_tx_queues, const int max_rx_queues) { int i; MPASS(max_tx_queues > 0); MPASS(max_rx_queues > 0); /* Allocate Tx queue mapping memory */ vsi->tx_qmap = malloc(sizeof(u16) * max_tx_queues, M_ICE, M_WAITOK); /* Allocate Rx queue mapping memory */ vsi->rx_qmap = malloc(sizeof(u16) * max_rx_queues, M_ICE, M_WAITOK); /* Mark every queue map as invalid to start with */ for (i = 0; i < max_tx_queues; i++) { vsi->tx_qmap[i] = ICE_INVALID_RES_IDX; } for (i = 0; i < max_rx_queues; i++) { vsi->rx_qmap[i] = ICE_INVALID_RES_IDX; } } /** * ice_free_vsi_qmaps - Free the PF qmaps associated with a VSI * @vsi: the VSI private structure * * Frees the PF qmaps associated with the given VSI. Generally this will be * called by ice_release_vsi, but may need to be called during attach cleanup, * depending on when the qmaps were allocated. */ void ice_free_vsi_qmaps(struct ice_vsi *vsi) { struct ice_softc *sc = vsi->sc; if (vsi->tx_qmap) { ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap, vsi->num_tx_queues); free(vsi->tx_qmap, M_ICE); vsi->tx_qmap = NULL; } if (vsi->rx_qmap) { ice_resmgr_release_map(&sc->rx_qmgr, vsi->rx_qmap, vsi->num_rx_queues); free(vsi->rx_qmap, M_ICE); vsi->rx_qmap = NULL; } } /** * ice_set_default_vsi_ctx - Setup default VSI context parameters * @ctx: the VSI context to initialize * * Initialize and prepare a default VSI context for configuring a new VSI. */ static void ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx) { u32 table = 0; memset(&ctx->info, 0, sizeof(ctx->info)); /* VSI will be allocated from shared pool */ ctx->alloc_from_pool = true; /* Enable source pruning by default */ ctx->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE; /* Traffic from VSI can be sent to LAN */ ctx->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA; /* Allow all packets untagged/tagged */ ctx->info.inner_vlan_flags = ((ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL & ICE_AQ_VSI_INNER_VLAN_TX_MODE_M) >> ICE_AQ_VSI_INNER_VLAN_TX_MODE_S); /* Show VLAN/UP from packets in Rx descriptors */ ctx->info.inner_vlan_flags |= ((ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH & ICE_AQ_VSI_INNER_VLAN_EMODE_M) >> ICE_AQ_VSI_INNER_VLAN_EMODE_S); /* Have 1:1 UP mapping for both ingress/egress tables */ table |= ICE_UP_TABLE_TRANSLATE(0, 0); table |= ICE_UP_TABLE_TRANSLATE(1, 1); table |= ICE_UP_TABLE_TRANSLATE(2, 2); table |= ICE_UP_TABLE_TRANSLATE(3, 3); table |= ICE_UP_TABLE_TRANSLATE(4, 4); table |= ICE_UP_TABLE_TRANSLATE(5, 5); table |= ICE_UP_TABLE_TRANSLATE(6, 6); table |= ICE_UP_TABLE_TRANSLATE(7, 7); ctx->info.ingress_table = CPU_TO_LE32(table); ctx->info.egress_table = CPU_TO_LE32(table); /* Have 1:1 UP mapping for outer to inner UP table */ ctx->info.outer_up_table = CPU_TO_LE32(table); /* No Outer tag support, so outer_vlan_flags remains zero */ } /** * ice_set_rss_vsi_ctx - Setup VSI context parameters for RSS * @ctx: the VSI context to configure * @type: the VSI type * * Configures the VSI context for RSS, based on the VSI type. */ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type) { u8 lut_type, hash_type; switch (type) { case ICE_VSI_PF: lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF; hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ; break; case ICE_VSI_VF: case ICE_VSI_VMDQ2: lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI; hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ; break; default: /* Other VSI types do not support RSS */ return; } ctx->info.q_opt_rss = (((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) & ICE_AQ_VSI_Q_OPT_RSS_LUT_M) | ((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) & ICE_AQ_VSI_Q_OPT_RSS_HASH_M)); } /** * ice_setup_vsi_qmap - Setup the queue mapping for a VSI * @vsi: the VSI to configure * @ctx: the VSI context to configure * * Configures the context for the given VSI, setting up how the firmware * should map the queues for this VSI. * * @pre vsi->qmap_type is set to a valid type */ static int ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx) { int pow = 0; u16 qmap; MPASS(vsi->rx_qmap != NULL); switch (vsi->qmap_type) { case ICE_RESMGR_ALLOC_CONTIGUOUS: ctx->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG); ctx->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]); ctx->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues); break; case ICE_RESMGR_ALLOC_SCATTERED: ctx->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_NONCONTIG); for (int i = 0; i < vsi->num_rx_queues; i++) ctx->info.q_mapping[i] = CPU_TO_LE16(vsi->rx_qmap[i]); break; default: return (EOPNOTSUPP); } /* Calculate the next power-of-2 of number of queues */ if (vsi->num_rx_queues) pow = flsl(vsi->num_rx_queues - 1); /* Assign all the queues to traffic class zero */ qmap = (pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M; ctx->info.tc_mapping[0] = CPU_TO_LE16(qmap); /* Fill out default driver TC queue info for VSI */ vsi->tc_info[0].qoffset = 0; vsi->tc_info[0].qcount_rx = vsi->num_rx_queues; vsi->tc_info[0].qcount_tx = vsi->num_tx_queues; for (int i = 1; i < ICE_MAX_TRAFFIC_CLASS; i++) { vsi->tc_info[i].qoffset = 0; vsi->tc_info[i].qcount_rx = 1; vsi->tc_info[i].qcount_tx = 1; } vsi->tc_map = 0x1; return 0; } /** * ice_setup_vsi_mirroring -- Setup a VSI for mirroring PF VSI traffic * @vsi: VSI to setup * * @pre vsi->mirror_src_vsi is set to the SW VSI num that traffic is to be * mirrored from * * Returns 0 on success, EINVAL on failure. */ int ice_setup_vsi_mirroring(struct ice_vsi *vsi) { struct ice_mir_rule_buf rule = { }; struct ice_softc *sc = vsi->sc; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; u16 rule_id, dest_vsi; u16 count = 1; rule.vsi_idx = ice_get_hw_vsi_num(hw, vsi->mirror_src_vsi); rule.add = true; dest_vsi = ice_get_hw_vsi_num(hw, vsi->idx); rule_id = ICE_INVAL_MIRROR_RULE_ID; status = ice_aq_add_update_mir_rule(hw, ICE_AQC_RULE_TYPE_VPORT_INGRESS, dest_vsi, count, &rule, NULL, &rule_id); if (status) { device_printf(dev, "Could not add INGRESS rule for mirror vsi %d to vsi %d, err %s aq_err %s\n", rule.vsi_idx, dest_vsi, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EINVAL); } vsi->rule_mir_ingress = rule_id; rule_id = ICE_INVAL_MIRROR_RULE_ID; status = ice_aq_add_update_mir_rule(hw, ICE_AQC_RULE_TYPE_VPORT_EGRESS, dest_vsi, count, &rule, NULL, &rule_id); if (status) { device_printf(dev, "Could not add EGRESS rule for mirror vsi %d to vsi %d, err %s aq_err %s\n", rule.vsi_idx, dest_vsi, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EINVAL); } vsi->rule_mir_egress = rule_id; return (0); } /** * ice_remove_vsi_mirroring -- Teardown any VSI mirroring rules * @vsi: VSI to remove mirror rules from */ static void ice_remove_vsi_mirroring(struct ice_vsi *vsi) { struct ice_hw *hw = &vsi->sc->hw; int status = 0; bool keep_alloc = false; if (vsi->rule_mir_ingress != ICE_INVAL_MIRROR_RULE_ID) status = ice_aq_delete_mir_rule(hw, vsi->rule_mir_ingress, keep_alloc, NULL); if (status) device_printf(vsi->sc->dev, "Could not remove mirror VSI ingress rule, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); status = 0; if (vsi->rule_mir_egress != ICE_INVAL_MIRROR_RULE_ID) status = ice_aq_delete_mir_rule(hw, vsi->rule_mir_egress, keep_alloc, NULL); if (status) device_printf(vsi->sc->dev, "Could not remove mirror VSI egress rule, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } /** * ice_initialize_vsi - Initialize a VSI for use * @vsi: the vsi to initialize * * Initialize a VSI over the adminq and prepare it for operation. * * @pre vsi->num_tx_queues is set * @pre vsi->num_rx_queues is set */ int ice_initialize_vsi(struct ice_vsi *vsi) { struct ice_vsi_ctx ctx = { 0 }; struct ice_hw *hw = &vsi->sc->hw; u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; int status; int err; /* For now, we only have code supporting PF VSIs */ switch (vsi->type) { case ICE_VSI_PF: ctx.flags = ICE_AQ_VSI_TYPE_PF; break; case ICE_VSI_VMDQ2: ctx.flags = ICE_AQ_VSI_TYPE_VMDQ2; break; default: return (ENODEV); } ice_set_default_vsi_ctx(&ctx); ice_set_rss_vsi_ctx(&ctx, vsi->type); /* XXX: VSIs of other types may need different port info? */ ctx.info.sw_id = hw->port_info->sw_id; /* Set some RSS parameters based on the VSI type */ ice_vsi_set_rss_params(vsi); /* Initialize the Rx queue mapping for this VSI */ err = ice_setup_vsi_qmap(vsi, &ctx); if (err) { return err; } /* (Re-)add VSI to HW VSI handle list */ status = ice_add_vsi(hw, vsi->idx, &ctx, NULL); if (status != 0) { device_printf(vsi->sc->dev, "Add VSI AQ call failed, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } vsi->info = ctx.info; /* Initialize VSI with just 1 TC to start */ max_txqs[0] = vsi->num_tx_queues; status = ice_cfg_vsi_lan(hw->port_info, vsi->idx, ICE_DFLT_TRAFFIC_CLASS, max_txqs); if (status) { device_printf(vsi->sc->dev, "Failed VSI lan queue config, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); ice_deinit_vsi(vsi); return (ENODEV); } /* Reset VSI stats */ ice_reset_vsi_stats(vsi); return 0; } /** * ice_deinit_vsi - Tell firmware to release resources for a VSI * @vsi: the VSI to release * * Helper function which requests the firmware to release the hardware * resources associated with a given VSI. */ void ice_deinit_vsi(struct ice_vsi *vsi) { struct ice_vsi_ctx ctx = { 0 }; struct ice_softc *sc = vsi->sc; struct ice_hw *hw = &sc->hw; int status; /* Assert that the VSI pointer matches in the list */ MPASS(vsi == sc->all_vsi[vsi->idx]); ctx.info = vsi->info; status = ice_rm_vsi_lan_cfg(hw->port_info, vsi->idx); if (status) { /* * This should only fail if the VSI handle is invalid, or if * any of the nodes have leaf nodes which are still in use. */ device_printf(sc->dev, "Unable to remove scheduler nodes for VSI %d, err %s\n", vsi->idx, ice_status_str(status)); } /* Tell firmware to release the VSI resources */ status = ice_free_vsi(hw, vsi->idx, &ctx, false, NULL); if (status != 0) { device_printf(sc->dev, "Free VSI %u AQ call failed, err %s aq_err %s\n", vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } } /** * ice_release_vsi - Release resources associated with a VSI * @vsi: the VSI to release * * Release software and firmware resources associated with a VSI. Release the * queue managers associated with this VSI. Also free the VSI structure memory * if the VSI was allocated dynamically using ice_alloc_vsi(). */ void ice_release_vsi(struct ice_vsi *vsi) { struct ice_softc *sc = vsi->sc; int idx = vsi->idx; /* Assert that the VSI pointer matches in the list */ MPASS(vsi == sc->all_vsi[idx]); /* Cleanup RSS configuration */ if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS)) ice_clean_vsi_rss_cfg(vsi); ice_del_vsi_sysctl_ctx(vsi); /* Remove the configured mirror rule, if it exists */ ice_remove_vsi_mirroring(vsi); /* * If we unload the driver after a reset fails, we do not need to do * this step. */ if (!ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) ice_deinit_vsi(vsi); ice_free_vsi_qmaps(vsi); if (vsi->dynamic) { free(sc->all_vsi[idx], M_ICE); } sc->all_vsi[idx] = NULL; } /** * ice_aq_speed_to_rate - Convert AdminQ speed enum to baudrate * @pi: port info data * * Returns the baudrate value for the current link speed of a given port. */ uint64_t ice_aq_speed_to_rate(struct ice_port_info *pi) { switch (pi->phy.link_info.link_speed) { case ICE_AQ_LINK_SPEED_200GB: return IF_Gbps(200); case ICE_AQ_LINK_SPEED_100GB: return IF_Gbps(100); case ICE_AQ_LINK_SPEED_50GB: return IF_Gbps(50); case ICE_AQ_LINK_SPEED_40GB: return IF_Gbps(40); case ICE_AQ_LINK_SPEED_25GB: return IF_Gbps(25); case ICE_AQ_LINK_SPEED_10GB: return IF_Gbps(10); case ICE_AQ_LINK_SPEED_5GB: return IF_Gbps(5); case ICE_AQ_LINK_SPEED_2500MB: return IF_Mbps(2500); case ICE_AQ_LINK_SPEED_1000MB: return IF_Mbps(1000); case ICE_AQ_LINK_SPEED_100MB: return IF_Mbps(100); case ICE_AQ_LINK_SPEED_10MB: return IF_Mbps(10); case ICE_AQ_LINK_SPEED_UNKNOWN: default: /* return 0 if we don't know the link speed */ return 0; } } /** * ice_aq_speed_to_str - Convert AdminQ speed enum to string representation * @pi: port info data * * Returns the string representation of the current link speed for a given * port. */ static const char * ice_aq_speed_to_str(struct ice_port_info *pi) { switch (pi->phy.link_info.link_speed) { case ICE_AQ_LINK_SPEED_200GB: return "200 Gbps"; case ICE_AQ_LINK_SPEED_100GB: return "100 Gbps"; case ICE_AQ_LINK_SPEED_50GB: return "50 Gbps"; case ICE_AQ_LINK_SPEED_40GB: return "40 Gbps"; case ICE_AQ_LINK_SPEED_25GB: return "25 Gbps"; case ICE_AQ_LINK_SPEED_20GB: return "20 Gbps"; case ICE_AQ_LINK_SPEED_10GB: return "10 Gbps"; case ICE_AQ_LINK_SPEED_5GB: return "5 Gbps"; case ICE_AQ_LINK_SPEED_2500MB: return "2.5 Gbps"; case ICE_AQ_LINK_SPEED_1000MB: return "1 Gbps"; case ICE_AQ_LINK_SPEED_100MB: return "100 Mbps"; case ICE_AQ_LINK_SPEED_10MB: return "10 Mbps"; case ICE_AQ_LINK_SPEED_UNKNOWN: default: return "Unknown speed"; } } /** * ice_get_phy_type_low - Get media associated with phy_type_low * @phy_type_low: the low 64bits of phy_type from the AdminQ * * Given the lower 64bits of the phy_type from the hardware, return the * ifm_active bit associated. Return IFM_UNKNOWN when phy_type_low is unknown. * Note that only one of ice_get_phy_type_low or ice_get_phy_type_high should * be called. If phy_type_low is zero, call ice_phy_type_high. */ int ice_get_phy_type_low(uint64_t phy_type_low) { switch (phy_type_low) { case ICE_PHY_TYPE_LOW_100BASE_TX: return IFM_100_TX; case ICE_PHY_TYPE_LOW_100M_SGMII: return IFM_100_SGMII; case ICE_PHY_TYPE_LOW_1000BASE_T: return IFM_1000_T; case ICE_PHY_TYPE_LOW_1000BASE_SX: return IFM_1000_SX; case ICE_PHY_TYPE_LOW_1000BASE_LX: return IFM_1000_LX; case ICE_PHY_TYPE_LOW_1000BASE_KX: return IFM_1000_KX; case ICE_PHY_TYPE_LOW_1G_SGMII: return IFM_1000_SGMII; case ICE_PHY_TYPE_LOW_2500BASE_T: return IFM_2500_T; case ICE_PHY_TYPE_LOW_2500BASE_X: return IFM_2500_X; case ICE_PHY_TYPE_LOW_2500BASE_KX: return IFM_2500_KX; case ICE_PHY_TYPE_LOW_5GBASE_T: return IFM_5000_T; case ICE_PHY_TYPE_LOW_5GBASE_KR: return IFM_5000_KR; case ICE_PHY_TYPE_LOW_10GBASE_T: return IFM_10G_T; case ICE_PHY_TYPE_LOW_10G_SFI_DA: return IFM_10G_TWINAX; case ICE_PHY_TYPE_LOW_10GBASE_SR: return IFM_10G_SR; case ICE_PHY_TYPE_LOW_10GBASE_LR: return IFM_10G_LR; case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1: return IFM_10G_KR; case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC: return IFM_10G_AOC; case ICE_PHY_TYPE_LOW_10G_SFI_C2C: return IFM_10G_SFI; case ICE_PHY_TYPE_LOW_25GBASE_T: return IFM_25G_T; case ICE_PHY_TYPE_LOW_25GBASE_CR: return IFM_25G_CR; case ICE_PHY_TYPE_LOW_25GBASE_CR_S: return IFM_25G_CR_S; case ICE_PHY_TYPE_LOW_25GBASE_CR1: return IFM_25G_CR1; case ICE_PHY_TYPE_LOW_25GBASE_SR: return IFM_25G_SR; case ICE_PHY_TYPE_LOW_25GBASE_LR: return IFM_25G_LR; case ICE_PHY_TYPE_LOW_25GBASE_KR: return IFM_25G_KR; case ICE_PHY_TYPE_LOW_25GBASE_KR_S: return IFM_25G_KR_S; case ICE_PHY_TYPE_LOW_25GBASE_KR1: return IFM_25G_KR1; case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC: return IFM_25G_AOC; case ICE_PHY_TYPE_LOW_25G_AUI_C2C: return IFM_25G_AUI; case ICE_PHY_TYPE_LOW_40GBASE_CR4: return IFM_40G_CR4; case ICE_PHY_TYPE_LOW_40GBASE_SR4: return IFM_40G_SR4; case ICE_PHY_TYPE_LOW_40GBASE_LR4: return IFM_40G_LR4; case ICE_PHY_TYPE_LOW_40GBASE_KR4: return IFM_40G_KR4; case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC: return IFM_40G_XLAUI_AC; case ICE_PHY_TYPE_LOW_40G_XLAUI: return IFM_40G_XLAUI; case ICE_PHY_TYPE_LOW_50GBASE_CR2: return IFM_50G_CR2; case ICE_PHY_TYPE_LOW_50GBASE_SR2: return IFM_50G_SR2; case ICE_PHY_TYPE_LOW_50GBASE_LR2: return IFM_50G_LR2; case ICE_PHY_TYPE_LOW_50GBASE_KR2: return IFM_50G_KR2; case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC: return IFM_50G_LAUI2_AC; case ICE_PHY_TYPE_LOW_50G_LAUI2: return IFM_50G_LAUI2; case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC: return IFM_50G_AUI2_AC; case ICE_PHY_TYPE_LOW_50G_AUI2: return IFM_50G_AUI2; case ICE_PHY_TYPE_LOW_50GBASE_CP: return IFM_50G_CP; case ICE_PHY_TYPE_LOW_50GBASE_SR: return IFM_50G_SR; case ICE_PHY_TYPE_LOW_50GBASE_FR: return IFM_50G_FR; case ICE_PHY_TYPE_LOW_50GBASE_LR: return IFM_50G_LR; case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4: return IFM_50G_KR_PAM4; case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC: return IFM_50G_AUI1_AC; case ICE_PHY_TYPE_LOW_50G_AUI1: return IFM_50G_AUI1; case ICE_PHY_TYPE_LOW_100GBASE_CR4: return IFM_100G_CR4; case ICE_PHY_TYPE_LOW_100GBASE_SR4: return IFM_100G_SR4; case ICE_PHY_TYPE_LOW_100GBASE_LR4: return IFM_100G_LR4; case ICE_PHY_TYPE_LOW_100GBASE_KR4: return IFM_100G_KR4; case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC: return IFM_100G_CAUI4_AC; case ICE_PHY_TYPE_LOW_100G_CAUI4: return IFM_100G_CAUI4; case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC: return IFM_100G_AUI4_AC; case ICE_PHY_TYPE_LOW_100G_AUI4: return IFM_100G_AUI4; case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4: return IFM_100G_CR_PAM4; case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4: return IFM_100G_KR_PAM4; case ICE_PHY_TYPE_LOW_100GBASE_CP2: return IFM_100G_CP2; case ICE_PHY_TYPE_LOW_100GBASE_SR2: return IFM_100G_SR2; case ICE_PHY_TYPE_LOW_100GBASE_DR: return IFM_100G_DR; default: return IFM_UNKNOWN; } } /** * ice_get_phy_type_high - Get media associated with phy_type_high * @phy_type_high: the upper 64bits of phy_type from the AdminQ * * Given the upper 64bits of the phy_type from the hardware, return the * ifm_active bit associated. Return IFM_UNKNOWN on an unknown value. Note * that only one of ice_get_phy_type_low or ice_get_phy_type_high should be * called. If phy_type_high is zero, call ice_get_phy_type_low. */ int ice_get_phy_type_high(uint64_t phy_type_high) { switch (phy_type_high) { case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4: return IFM_100G_KR2_PAM4; case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC: return IFM_100G_CAUI2_AC; case ICE_PHY_TYPE_HIGH_100G_CAUI2: return IFM_100G_CAUI2; case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC: return IFM_100G_AUI2_AC; case ICE_PHY_TYPE_HIGH_100G_AUI2: return IFM_100G_AUI2; case ICE_PHY_TYPE_HIGH_200G_CR4_PAM4: return IFM_200G_CR4_PAM4; case ICE_PHY_TYPE_HIGH_200G_SR4: return IFM_200G_SR4; case ICE_PHY_TYPE_HIGH_200G_FR4: return IFM_200G_FR4; case ICE_PHY_TYPE_HIGH_200G_LR4: return IFM_200G_LR4; case ICE_PHY_TYPE_HIGH_200G_DR4: return IFM_200G_DR4; case ICE_PHY_TYPE_HIGH_200G_KR4_PAM4: return IFM_200G_KR4_PAM4; case ICE_PHY_TYPE_HIGH_200G_AUI4_AOC_ACC: return IFM_200G_AUI4_AC; case ICE_PHY_TYPE_HIGH_200G_AUI4: return IFM_200G_AUI4; case ICE_PHY_TYPE_HIGH_200G_AUI8_AOC_ACC: return IFM_200G_AUI8_AC; case ICE_PHY_TYPE_HIGH_200G_AUI8: return IFM_200G_AUI8; default: return IFM_UNKNOWN; } } /** * ice_phy_types_to_max_rate - Returns port's max supported baudrate * @pi: port info struct * * ice_aq_get_phy_caps() w/ ICE_AQC_REPORT_TOPO_CAP_MEDIA parameter needs * to have been called before this function for it to work. */ static uint64_t ice_phy_types_to_max_rate(struct ice_port_info *pi) { uint64_t phy_low = pi->phy.phy_type_low; uint64_t phy_high = pi->phy.phy_type_high; uint64_t max_rate = 0; int bit; /* * These are based on the indices used in the BIT() macros for * ICE_PHY_TYPE_LOW_* */ static const uint64_t phy_rates[] = { IF_Mbps(100), IF_Mbps(100), IF_Gbps(1ULL), IF_Gbps(1ULL), IF_Gbps(1ULL), IF_Gbps(1ULL), IF_Gbps(1ULL), IF_Mbps(2500ULL), IF_Mbps(2500ULL), IF_Mbps(2500ULL), IF_Gbps(5ULL), IF_Gbps(5ULL), IF_Gbps(10ULL), IF_Gbps(10ULL), IF_Gbps(10ULL), IF_Gbps(10ULL), IF_Gbps(10ULL), IF_Gbps(10ULL), IF_Gbps(10ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(25ULL), IF_Gbps(40ULL), IF_Gbps(40ULL), IF_Gbps(40ULL), IF_Gbps(40ULL), IF_Gbps(40ULL), IF_Gbps(40ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(50ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), /* These rates are for ICE_PHY_TYPE_HIGH_* */ IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(100ULL), IF_Gbps(200ULL), IF_Gbps(200ULL), IF_Gbps(200ULL), IF_Gbps(200ULL), IF_Gbps(200ULL), IF_Gbps(200ULL), IF_Gbps(200ULL), IF_Gbps(200ULL), IF_Gbps(200ULL), IF_Gbps(200ULL), }; /* coverity[address_of] */ for_each_set_bit(bit, &phy_high, 64) if ((bit + 64) < (int)ARRAY_SIZE(phy_rates)) max_rate = uqmax(max_rate, phy_rates[(bit + 64)]); /* coverity[address_of] */ for_each_set_bit(bit, &phy_low, 64) max_rate = uqmax(max_rate, phy_rates[bit]); return (max_rate); } /* The if_media type is split over the original 5 bit media variant field, * along with extended types using up extra bits in the options section. * We want to convert this split number into a bitmap index, so we reverse the * calculation of IFM_X here. */ #define IFM_IDX(x) (((x) & IFM_TMASK) | \ (((x) & IFM_ETH_XTYPE) >> IFM_ETH_XSHIFT)) /** * ice_add_media_types - Add supported media types to the media structure * @sc: ice private softc structure * @media: ifmedia structure to setup * * Looks up the supported phy types, and initializes the various media types * available. * * @pre this function must be protected from being called while another thread * is accessing the ifmedia types. */ int ice_add_media_types(struct ice_softc *sc, struct ifmedia *media) { struct ice_aqc_get_phy_caps_data pcaps = { 0 }; struct ice_port_info *pi = sc->hw.port_info; int status; uint64_t phy_low, phy_high; int bit; ASSERT_CFG_LOCKED(sc); /* the maximum possible media type index is 511. We probably don't * need most of this space, but this ensures future compatibility when * additional media types are used. */ ice_declare_bitmap(already_added, 511); /* Remove all previous media types */ ifmedia_removeall(media); status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, &pcaps, NULL); if (status) { device_printf(sc->dev, "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(sc->hw.adminq.sq_last_status)); return (status); } phy_low = le64toh(pcaps.phy_type_low); phy_high = le64toh(pcaps.phy_type_high); /* make sure the added bitmap is zero'd */ memset(already_added, 0, sizeof(already_added)); /* coverity[address_of] */ for_each_set_bit(bit, &phy_low, 64) { uint64_t type = BIT_ULL(bit); int ostype; /* get the OS media type */ ostype = ice_get_phy_type_low(type); /* don't bother adding the unknown type */ if (ostype == IFM_UNKNOWN) continue; /* only add each media type to the list once */ if (ice_is_bit_set(already_added, IFM_IDX(ostype))) continue; ifmedia_add(media, IFM_ETHER | ostype, 0, NULL); ice_set_bit(IFM_IDX(ostype), already_added); } /* coverity[address_of] */ for_each_set_bit(bit, &phy_high, 64) { uint64_t type = BIT_ULL(bit); int ostype; /* get the OS media type */ ostype = ice_get_phy_type_high(type); /* don't bother adding the unknown type */ if (ostype == IFM_UNKNOWN) continue; /* only add each media type to the list once */ if (ice_is_bit_set(already_added, IFM_IDX(ostype))) continue; ifmedia_add(media, IFM_ETHER | ostype, 0, NULL); ice_set_bit(IFM_IDX(ostype), already_added); } /* Use autoselect media by default */ ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(media, IFM_ETHER | IFM_AUTO); return (0); } /** * ice_configure_rxq_interrupt - Configure HW Rx queue for an MSI-X interrupt * @hw: ice hw structure * @rxqid: Rx queue index in PF space * @vector: MSI-X vector index in PF/VF space * @itr_idx: ITR index to use for interrupt * * @remark ice_flush() may need to be called after this */ void ice_configure_rxq_interrupt(struct ice_hw *hw, u16 rxqid, u16 vector, u8 itr_idx) { u32 val; MPASS(itr_idx <= ICE_ITR_NONE); val = (QINT_RQCTL_CAUSE_ENA_M | (itr_idx << QINT_RQCTL_ITR_INDX_S) | (vector << QINT_RQCTL_MSIX_INDX_S)); wr32(hw, QINT_RQCTL(rxqid), val); } /** * ice_configure_all_rxq_interrupts - Configure HW Rx queues for MSI-X interrupts * @vsi: the VSI to configure * * Called when setting up MSI-X interrupts to configure the Rx hardware queues. */ void ice_configure_all_rxq_interrupts(struct ice_vsi *vsi) { struct ice_hw *hw = &vsi->sc->hw; int i; for (i = 0; i < vsi->num_rx_queues; i++) { struct ice_rx_queue *rxq = &vsi->rx_queues[i]; ice_configure_rxq_interrupt(hw, vsi->rx_qmap[rxq->me], rxq->irqv->me, ICE_RX_ITR); ice_debug(hw, ICE_DBG_INIT, "RXQ(%d) intr enable: me %d rxqid %d vector %d\n", i, rxq->me, vsi->rx_qmap[rxq->me], rxq->irqv->me); } ice_flush(hw); } /** * ice_configure_txq_interrupt - Configure HW Tx queue for an MSI-X interrupt * @hw: ice hw structure * @txqid: Tx queue index in PF space * @vector: MSI-X vector index in PF/VF space * @itr_idx: ITR index to use for interrupt * * @remark ice_flush() may need to be called after this */ void ice_configure_txq_interrupt(struct ice_hw *hw, u16 txqid, u16 vector, u8 itr_idx) { u32 val; MPASS(itr_idx <= ICE_ITR_NONE); val = (QINT_TQCTL_CAUSE_ENA_M | (itr_idx << QINT_TQCTL_ITR_INDX_S) | (vector << QINT_TQCTL_MSIX_INDX_S)); wr32(hw, QINT_TQCTL(txqid), val); } /** * ice_configure_all_txq_interrupts - Configure HW Tx queues for MSI-X interrupts * @vsi: the VSI to configure * * Called when setting up MSI-X interrupts to configure the Tx hardware queues. */ void ice_configure_all_txq_interrupts(struct ice_vsi *vsi) { struct ice_hw *hw = &vsi->sc->hw; int i; for (i = 0; i < vsi->num_tx_queues; i++) { struct ice_tx_queue *txq = &vsi->tx_queues[i]; ice_configure_txq_interrupt(hw, vsi->tx_qmap[txq->me], txq->irqv->me, ICE_TX_ITR); } ice_flush(hw); } /** * ice_flush_rxq_interrupts - Unconfigure Hw Rx queues MSI-X interrupt cause * @vsi: the VSI to configure * * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger * a software interrupt on that cause. This is required as part of the Rx * queue disable logic to dissociate the Rx queue from the interrupt. * * Note: this function must be called prior to disabling Rx queues with * ice_control_all_rx_queues, otherwise the Rx queue may not be disabled properly. */ void ice_flush_rxq_interrupts(struct ice_vsi *vsi) { struct ice_hw *hw = &vsi->sc->hw; int i; for (i = 0; i < vsi->num_rx_queues; i++) { struct ice_rx_queue *rxq = &vsi->rx_queues[i]; u32 reg, val; /* Clear the CAUSE_ENA flag */ reg = vsi->rx_qmap[rxq->me]; val = rd32(hw, QINT_RQCTL(reg)); val &= ~QINT_RQCTL_CAUSE_ENA_M; wr32(hw, QINT_RQCTL(reg), val); ice_flush(hw); /* Trigger a software interrupt to complete interrupt * dissociation. */ wr32(hw, GLINT_DYN_CTL(rxq->irqv->me), GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M); } } /** * ice_flush_txq_interrupts - Unconfigure Hw Tx queues MSI-X interrupt cause * @vsi: the VSI to configure * * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger * a software interrupt on that cause. This is required as part of the Tx * queue disable logic to dissociate the Tx queue from the interrupt. * * Note: this function must be called prior to ice_vsi_disable_tx, otherwise * the Tx queue disable may not complete properly. */ void ice_flush_txq_interrupts(struct ice_vsi *vsi) { struct ice_hw *hw = &vsi->sc->hw; int i; for (i = 0; i < vsi->num_tx_queues; i++) { struct ice_tx_queue *txq = &vsi->tx_queues[i]; u32 reg, val; /* Clear the CAUSE_ENA flag */ reg = vsi->tx_qmap[txq->me]; val = rd32(hw, QINT_TQCTL(reg)); val &= ~QINT_TQCTL_CAUSE_ENA_M; wr32(hw, QINT_TQCTL(reg), val); ice_flush(hw); /* Trigger a software interrupt to complete interrupt * dissociation. */ wr32(hw, GLINT_DYN_CTL(txq->irqv->me), GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M); } } /** * ice_configure_rx_itr - Configure the Rx ITR settings for this VSI * @vsi: the VSI to configure * * Program the hardware ITR registers with the settings for this VSI. */ void ice_configure_rx_itr(struct ice_vsi *vsi) { struct ice_hw *hw = &vsi->sc->hw; int i; /* TODO: Handle per-queue/per-vector ITR? */ for (i = 0; i < vsi->num_rx_queues; i++) { struct ice_rx_queue *rxq = &vsi->rx_queues[i]; wr32(hw, GLINT_ITR(ICE_RX_ITR, rxq->irqv->me), ice_itr_to_reg(hw, vsi->rx_itr)); } ice_flush(hw); } /** * ice_configure_tx_itr - Configure the Tx ITR settings for this VSI * @vsi: the VSI to configure * * Program the hardware ITR registers with the settings for this VSI. */ void ice_configure_tx_itr(struct ice_vsi *vsi) { struct ice_hw *hw = &vsi->sc->hw; int i; /* TODO: Handle per-queue/per-vector ITR? */ for (i = 0; i < vsi->num_tx_queues; i++) { struct ice_tx_queue *txq = &vsi->tx_queues[i]; wr32(hw, GLINT_ITR(ICE_TX_ITR, txq->irqv->me), ice_itr_to_reg(hw, vsi->tx_itr)); } ice_flush(hw); } /** * ice_setup_tx_ctx - Setup an ice_tlan_ctx structure for a queue * @txq: the Tx queue to configure * @tlan_ctx: the Tx LAN queue context structure to initialize * @pf_q: real queue number */ static int ice_setup_tx_ctx(struct ice_tx_queue *txq, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) { struct ice_vsi *vsi = txq->vsi; struct ice_softc *sc = vsi->sc; struct ice_hw *hw = &sc->hw; tlan_ctx->port_num = hw->port_info->lport; /* number of descriptors in the queue */ tlan_ctx->qlen = txq->desc_count; /* set the transmit queue base address, defined in 128 byte units */ tlan_ctx->base = txq->tx_paddr >> 7; tlan_ctx->pf_num = hw->pf_id; switch (vsi->type) { case ICE_VSI_PF: tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; break; case ICE_VSI_VMDQ2: tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ; break; default: return (ENODEV); } tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx); /* Enable TSO */ tlan_ctx->tso_ena = 1; tlan_ctx->internal_usage_flag = 1; tlan_ctx->tso_qnum = pf_q; /* * Stick with the older legacy Tx queue interface, instead of the new * advanced queue interface. */ tlan_ctx->legacy_int = 1; /* Descriptor WB mode */ tlan_ctx->wb_mode = 0; return (0); } /** * ice_cfg_vsi_for_tx - Configure the hardware for Tx * @vsi: the VSI to configure * * Configure the device Tx queues through firmware AdminQ commands. After * this, Tx queues will be ready for transmit. */ int ice_cfg_vsi_for_tx(struct ice_vsi *vsi) { struct ice_aqc_add_tx_qgrp *qg; struct ice_hw *hw = &vsi->sc->hw; device_t dev = vsi->sc->dev; int status; int i; int err = 0; u16 qg_size, pf_q; qg_size = ice_struct_size(qg, txqs, 1); qg = (struct ice_aqc_add_tx_qgrp *)malloc(qg_size, M_ICE, M_NOWAIT|M_ZERO); if (!qg) return (ENOMEM); qg->num_txqs = 1; for (i = 0; i < vsi->num_tx_queues; i++) { struct ice_tlan_ctx tlan_ctx = { 0 }; struct ice_tx_queue *txq = &vsi->tx_queues[i]; pf_q = vsi->tx_qmap[txq->me]; qg->txqs[0].txq_id = htole16(pf_q); err = ice_setup_tx_ctx(txq, &tlan_ctx, pf_q); if (err) goto free_txqg; ice_set_ctx(hw, (u8 *)&tlan_ctx, qg->txqs[0].txq_ctx, ice_tlan_ctx_info); status = ice_ena_vsi_txq(hw->port_info, vsi->idx, txq->tc, txq->q_handle, 1, qg, qg_size, NULL); if (status) { device_printf(dev, "Failed to set LAN Tx queue %d (TC %d, handle %d) context, err %s aq_err %s\n", i, txq->tc, txq->q_handle, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = ENODEV; goto free_txqg; } /* Keep track of the Tx queue TEID */ if (pf_q == le16toh(qg->txqs[0].txq_id)) txq->q_teid = le32toh(qg->txqs[0].q_teid); } free_txqg: free(qg, M_ICE); return (err); } /** * ice_setup_rx_ctx - Setup an Rx context structure for a receive queue * @rxq: the receive queue to program * * Setup an Rx queue context structure and program it into the hardware * registers. This is a necessary step for enabling the Rx queue. * * @pre the VSI associated with this queue must have initialized mbuf_sz */ static int ice_setup_rx_ctx(struct ice_rx_queue *rxq) { struct ice_rlan_ctx rlan_ctx = {0}; struct ice_vsi *vsi = rxq->vsi; struct ice_softc *sc = vsi->sc; struct ice_hw *hw = &sc->hw; int status; u32 rxdid = ICE_RXDID_FLEX_NIC; u32 regval; u16 pf_q; pf_q = vsi->rx_qmap[rxq->me]; /* set the receive queue base address, defined in 128 byte units */ rlan_ctx.base = rxq->rx_paddr >> 7; rlan_ctx.qlen = rxq->desc_count; rlan_ctx.dbuf = vsi->mbuf_sz >> ICE_RLAN_CTX_DBUF_S; /* use 32 byte descriptors */ rlan_ctx.dsize = 1; /* Strip the Ethernet CRC bytes before the packet is posted to the * host memory. */ rlan_ctx.crcstrip = 1; rlan_ctx.l2tsel = 1; /* don't do header splitting */ rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT; rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT; rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT; /* strip VLAN from inner headers */ rlan_ctx.showiv = 1; rlan_ctx.rxmax = min(vsi->max_frame_size, ICE_MAX_RX_SEGS * vsi->mbuf_sz); rlan_ctx.lrxqthresh = 1; if (vsi->type != ICE_VSI_VF) { regval = rd32(hw, QRXFLXP_CNTXT(pf_q)); regval &= ~QRXFLXP_CNTXT_RXDID_IDX_M; regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) & QRXFLXP_CNTXT_RXDID_IDX_M; regval &= ~QRXFLXP_CNTXT_RXDID_PRIO_M; regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) & QRXFLXP_CNTXT_RXDID_PRIO_M; wr32(hw, QRXFLXP_CNTXT(pf_q), regval); } status = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q); if (status) { device_printf(sc->dev, "Failed to set LAN Rx queue context, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } wr32(hw, rxq->tail, 0); return 0; } /** * ice_cfg_vsi_for_rx - Configure the hardware for Rx * @vsi: the VSI to configure * * Prepare an Rx context descriptor and configure the device to receive * traffic. * * @pre the VSI must have initialized mbuf_sz */ int ice_cfg_vsi_for_rx(struct ice_vsi *vsi) { int i, err; for (i = 0; i < vsi->num_rx_queues; i++) { MPASS(vsi->mbuf_sz > 0); err = ice_setup_rx_ctx(&vsi->rx_queues[i]); if (err) return err; } return (0); } /** * ice_is_rxq_ready - Check if an Rx queue is ready * @hw: ice hw structure * @pf_q: absolute PF queue index to check * @reg: on successful return, contains qrx_ctrl contents * * Reads the QRX_CTRL register and verifies if the queue is in a consistent * state. That is, QENA_REQ matches QENA_STAT. Used to check before making * a request to change the queue, as well as to verify the request has * finished. The queue should change status within a few microseconds, so we * use a small delay while polling the register. * * Returns an error code if the queue does not update after a few retries. */ static int ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg) { u32 qrx_ctrl, qena_req, qena_stat; int i; for (i = 0; i < ICE_Q_WAIT_RETRY_LIMIT; i++) { qrx_ctrl = rd32(hw, QRX_CTRL(pf_q)); qena_req = (qrx_ctrl >> QRX_CTRL_QENA_REQ_S) & 1; qena_stat = (qrx_ctrl >> QRX_CTRL_QENA_STAT_S) & 1; /* if the request and status bits equal, then the queue is * fully disabled or enabled. */ if (qena_req == qena_stat) { *reg = qrx_ctrl; return (0); } /* wait a few microseconds before we check again */ DELAY(10); } return (ETIMEDOUT); } /** * ice_control_rx_queue - Configure hardware to start or stop an Rx queue * @vsi: VSI containing queue to enable/disable * @qidx: Queue index in VSI space * @enable: true to enable queue, false to disable * * Control the Rx queue through the QRX_CTRL register, enabling or disabling * it. Wait for the appropriate time to ensure that the queue has actually * reached the expected state. */ int ice_control_rx_queue(struct ice_vsi *vsi, u16 qidx, bool enable) { struct ice_hw *hw = &vsi->sc->hw; device_t dev = vsi->sc->dev; u32 qrx_ctrl = 0; int err; struct ice_rx_queue *rxq = &vsi->rx_queues[qidx]; int pf_q = vsi->rx_qmap[rxq->me]; err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl); if (err) { device_printf(dev, "Rx queue %d is not ready\n", pf_q); return err; } /* Skip if the queue is already in correct state */ if (enable == !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M)) return (0); if (enable) qrx_ctrl |= QRX_CTRL_QENA_REQ_M; else qrx_ctrl &= ~QRX_CTRL_QENA_REQ_M; wr32(hw, QRX_CTRL(pf_q), qrx_ctrl); /* wait for the queue to finalize the request */ err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl); if (err) { device_printf(dev, "Rx queue %d %sable timeout\n", pf_q, (enable ? "en" : "dis")); return err; } /* this should never happen */ if (enable != !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M)) { device_printf(dev, "Rx queue %d invalid state\n", pf_q); return (EDOOFUS); } return (0); } /** * ice_control_all_rx_queues - Configure hardware to start or stop the Rx queues * @vsi: VSI to enable/disable queues * @enable: true to enable queues, false to disable * * Control the Rx queues through the QRX_CTRL register, enabling or disabling * them. Wait for the appropriate time to ensure that the queues have actually * reached the expected state. */ int ice_control_all_rx_queues(struct ice_vsi *vsi, bool enable) { int i, err; /* TODO: amortize waits by changing all queues up front and then * checking their status afterwards. This will become more necessary * when we have a large number of queues. */ for (i = 0; i < vsi->num_rx_queues; i++) { err = ice_control_rx_queue(vsi, i, enable); if (err) break; } return (0); } /** * ice_add_mac_to_list - Add MAC filter to a MAC filter list * @vsi: the VSI to forward to * @list: list which contains MAC filter entries * @addr: the MAC address to be added * @action: filter action to perform on match * * Adds a MAC address filter to the list which will be forwarded to firmware * to add a series of MAC address filters. * * Returns 0 on success, and an error code on failure. * */ static int ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list, const u8 *addr, enum ice_sw_fwd_act_type action) { struct ice_fltr_list_entry *entry; entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO); if (!entry) return (ENOMEM); entry->fltr_info.flag = ICE_FLTR_TX; entry->fltr_info.src_id = ICE_SRC_ID_VSI; entry->fltr_info.lkup_type = ICE_SW_LKUP_MAC; entry->fltr_info.fltr_act = action; entry->fltr_info.vsi_handle = vsi->idx; bcopy(addr, entry->fltr_info.l_data.mac.mac_addr, ETHER_ADDR_LEN); LIST_ADD(&entry->list_entry, list); return 0; } /** * ice_free_fltr_list - Free memory associated with a MAC address list * @list: the list to free * * Free the memory of each entry associated with the list. */ static void ice_free_fltr_list(struct ice_list_head *list) { struct ice_fltr_list_entry *e, *tmp; LIST_FOR_EACH_ENTRY_SAFE(e, tmp, list, ice_fltr_list_entry, list_entry) { LIST_DEL(&e->list_entry); free(e, M_ICE); } } /** * ice_add_vsi_mac_filter - Add a MAC address filter for a VSI * @vsi: the VSI to add the filter for * @addr: MAC address to add a filter for * * Add a MAC address filter for a given VSI. This is a wrapper around * ice_add_mac to simplify the interface. First, it only accepts a single * address, so we don't have to mess around with the list setup in other * functions. Second, it ignores the ICE_ERR_ALREADY_EXISTS error, so that * callers don't need to worry about attempting to add the same filter twice. */ int ice_add_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr) { struct ice_list_head mac_addr_list; struct ice_hw *hw = &vsi->sc->hw; device_t dev = vsi->sc->dev; int status; int err = 0; INIT_LIST_HEAD(&mac_addr_list); err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI); if (err) goto free_mac_list; status = ice_add_mac(hw, &mac_addr_list); if (status == ICE_ERR_ALREADY_EXISTS) { ; /* Don't complain if we try to add a filter that already exists */ } else if (status) { device_printf(dev, "Failed to add a filter for MAC %6D, err %s aq_err %s\n", addr, ":", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = (EIO); } free_mac_list: ice_free_fltr_list(&mac_addr_list); return err; } /** * ice_cfg_pf_default_mac_filters - Setup default unicast and broadcast addrs * @sc: device softc structure * * Program the default unicast and broadcast filters for the PF VSI. */ int ice_cfg_pf_default_mac_filters(struct ice_softc *sc) { struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; int err; /* Add the LAN MAC address */ err = ice_add_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr); if (err) return err; /* Add the broadcast address */ err = ice_add_vsi_mac_filter(vsi, broadcastaddr); if (err) return err; return (0); } /** * ice_remove_vsi_mac_filter - Remove a MAC address filter for a VSI * @vsi: the VSI to add the filter for * @addr: MAC address to remove a filter for * * Remove a MAC address filter from a given VSI. This is a wrapper around * ice_remove_mac to simplify the interface. First, it only accepts a single * address, so we don't have to mess around with the list setup in other * functions. Second, it ignores the ICE_ERR_DOES_NOT_EXIST error, so that * callers don't need to worry about attempting to remove filters which * haven't yet been added. */ int ice_remove_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr) { struct ice_list_head mac_addr_list; struct ice_hw *hw = &vsi->sc->hw; device_t dev = vsi->sc->dev; int status; int err = 0; INIT_LIST_HEAD(&mac_addr_list); err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI); if (err) goto free_mac_list; status = ice_remove_mac(hw, &mac_addr_list); if (status == ICE_ERR_DOES_NOT_EXIST) { ; /* Don't complain if we try to remove a filter that doesn't exist */ } else if (status) { device_printf(dev, "Failed to remove a filter for MAC %6D, err %s aq_err %s\n", addr, ":", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = (EIO); } free_mac_list: ice_free_fltr_list(&mac_addr_list); return err; } /** * ice_rm_pf_default_mac_filters - Remove default unicast and broadcast addrs * @sc: device softc structure * * Remove the default unicast and broadcast filters from the PF VSI. */ int ice_rm_pf_default_mac_filters(struct ice_softc *sc) { struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; int err; /* Remove the LAN MAC address */ err = ice_remove_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr); if (err) return err; /* Remove the broadcast address */ err = ice_remove_vsi_mac_filter(vsi, broadcastaddr); if (err) return (EIO); return (0); } /** * ice_check_ctrlq_errors - Check for and report controlq errors * @sc: device private structure * @qname: name of the controlq * @cq: the controlq to check * * Check and report controlq errors. Currently all we do is report them to the * kernel message log, but we might want to improve this in the future, such * as to keep track of statistics. */ static void ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname, struct ice_ctl_q_info *cq) { struct ice_hw *hw = &sc->hw; u32 val; /* Check for error indications. Note that all the controlqs use the * same register layout, so we use the PF_FW_AxQLEN defines only. */ val = rd32(hw, cq->rq.len); if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M | PF_FW_ARQLEN_ARQCRIT_M)) { if (val & PF_FW_ARQLEN_ARQVFE_M) device_printf(sc->dev, "%s Receive Queue VF Error detected\n", qname); if (val & PF_FW_ARQLEN_ARQOVFL_M) device_printf(sc->dev, "%s Receive Queue Overflow Error detected\n", qname); if (val & PF_FW_ARQLEN_ARQCRIT_M) device_printf(sc->dev, "%s Receive Queue Critical Error detected\n", qname); val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M | PF_FW_ARQLEN_ARQCRIT_M); wr32(hw, cq->rq.len, val); } val = rd32(hw, cq->sq.len); if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M | PF_FW_ATQLEN_ATQCRIT_M)) { if (val & PF_FW_ATQLEN_ATQVFE_M) device_printf(sc->dev, "%s Send Queue VF Error detected\n", qname); if (val & PF_FW_ATQLEN_ATQOVFL_M) device_printf(sc->dev, "%s Send Queue Overflow Error detected\n", qname); if (val & PF_FW_ATQLEN_ATQCRIT_M) device_printf(sc->dev, "%s Send Queue Critical Error detected\n", qname); val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M | PF_FW_ATQLEN_ATQCRIT_M); wr32(hw, cq->sq.len, val); } } /** * ice_process_link_event - Process a link event indication from firmware * @sc: device softc structure * @e: the received event data * * Gets the current link status from hardware, and may print a message if an * unqualified is detected. */ static void ice_process_link_event(struct ice_softc *sc, struct ice_rq_event_info __invariant_only *e) { struct ice_port_info *pi = sc->hw.port_info; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; /* Sanity check that the data length isn't too small */ MPASS(le16toh(e->desc.datalen) >= ICE_GET_LINK_STATUS_DATALEN_V1); /* * Even though the adapter gets link status information inside the * event, it needs to send a Get Link Status AQ command in order * to re-enable link events. */ pi->phy.get_link_info = true; ice_get_link_status(pi, &sc->link_up); if (pi->phy.link_info.topo_media_conflict & (ICE_AQ_LINK_TOPO_CONFLICT | ICE_AQ_LINK_MEDIA_CONFLICT | ICE_AQ_LINK_TOPO_CORRUPT)) device_printf(dev, "Possible mis-configuration of the Ethernet port detected; please use the Intel (R) Ethernet Port Configuration Tool utility to address the issue.\n"); if ((pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) && !(pi->phy.link_info.link_info & ICE_AQ_LINK_UP)) { if (!(pi->phy.link_info.an_info & ICE_AQ_QUALIFIED_MODULE)) device_printf(dev, "Link is disabled on this device because an unsupported module type was detected! Refer to the Intel (R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED) device_printf(dev, "The module's power requirements exceed the device's power supply. Cannot start link.\n"); if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT) device_printf(dev, "The installed module is incompatible with the device's NVM image. Cannot start link.\n"); } if (!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) { if (!ice_testandset_state(&sc->state, ICE_STATE_NO_MEDIA)) { status = ice_aq_set_link_restart_an(pi, false, NULL); if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EMODE) device_printf(dev, "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } } /* ICE_STATE_NO_MEDIA is cleared when polling task detects media */ /* Indicate that link status must be reported again */ ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED); /* OS link info is updated elsewhere */ } /** * ice_process_ctrlq_event - Respond to a controlq event * @sc: device private structure * @qname: the name for this controlq * @event: the event to process * * Perform actions in response to various controlq event notifications. */ static void ice_process_ctrlq_event(struct ice_softc *sc, const char *qname, struct ice_rq_event_info *event) { u16 opcode; opcode = le16toh(event->desc.opcode); switch (opcode) { case ice_aqc_opc_get_link_status: ice_process_link_event(sc, event); break; case ice_aqc_opc_fw_logs_event: ice_handle_fw_log_event(sc, &event->desc, event->msg_buf); break; case ice_aqc_opc_lldp_set_mib_change: ice_handle_mib_change_event(sc, event); break; case ice_aqc_opc_event_lan_overflow: ice_handle_lan_overflow_event(sc, event); break; case ice_aqc_opc_get_health_status: ice_handle_health_status_event(sc, event); break; default: device_printf(sc->dev, "%s Receive Queue unhandled event 0x%04x ignored\n", qname, opcode); } } /** * ice_process_ctrlq - helper function to process controlq rings * @sc: device private structure * @q_type: specific control queue type * @pending: return parameter to track remaining events * * Process controlq events for a given control queue type. Returns zero on * success, and an error code on failure. If successful, pending is the number * of remaining events left in the queue. */ int ice_process_ctrlq(struct ice_softc *sc, enum ice_ctl_q q_type, u16 *pending) { struct ice_rq_event_info event = { { 0 } }; struct ice_hw *hw = &sc->hw; struct ice_ctl_q_info *cq; int status; const char *qname; int loop = 0; switch (q_type) { case ICE_CTL_Q_ADMIN: cq = &hw->adminq; qname = "Admin"; break; case ICE_CTL_Q_SB: cq = &hw->sbq; qname = "Sideband"; break; case ICE_CTL_Q_MAILBOX: cq = &hw->mailboxq; qname = "Mailbox"; break; default: device_printf(sc->dev, "Unknown control queue type 0x%x\n", q_type); return 0; } ice_check_ctrlq_errors(sc, qname, cq); /* * Control queue processing happens during the admin task which may be * holding a non-sleepable lock, so we *must* use M_NOWAIT here. */ event.buf_len = cq->rq_buf_size; event.msg_buf = (u8 *)malloc(event.buf_len, M_ICE, M_ZERO | M_NOWAIT); if (!event.msg_buf) { device_printf(sc->dev, "Unable to allocate memory for %s Receive Queue event\n", qname); return (ENOMEM); } do { status = ice_clean_rq_elem(hw, cq, &event, pending); if (status == ICE_ERR_AQ_NO_WORK) break; if (status) { device_printf(sc->dev, "%s Receive Queue event error %s\n", qname, ice_status_str(status)); free(event.msg_buf, M_ICE); return (EIO); } /* XXX should we separate this handler by controlq type? */ ice_process_ctrlq_event(sc, qname, &event); } while (*pending && (++loop < ICE_CTRLQ_WORK_LIMIT)); free(event.msg_buf, M_ICE); return 0; } /** * pkg_ver_empty - Check if a package version is empty * @pkg_ver: the package version to check * @pkg_name: the package name to check * * Checks if the package version structure is empty. We consider a package * version as empty if none of the versions are non-zero and the name string * is null as well. * * This is used to check if the package version was initialized by the driver, * as we do not expect an actual DDP package file to have a zero'd version and * name. * * @returns true if the package version is valid, or false otherwise. */ static bool pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name) { return (pkg_name[0] == '\0' && pkg_ver->major == 0 && pkg_ver->minor == 0 && pkg_ver->update == 0 && pkg_ver->draft == 0); } /** * pkg_ver_compatible - Check if the package version is compatible * @pkg_ver: the package version to check * * Compares the package version number to the driver's expected major/minor * version. Returns an integer indicating whether the version is older, newer, * or compatible with the driver. * * @returns 0 if the package version is compatible, -1 if the package version * is older, and 1 if the package version is newer than the driver version. */ static int pkg_ver_compatible(struct ice_pkg_ver *pkg_ver) { if (pkg_ver->major > ICE_PKG_SUPP_VER_MAJ) return (1); /* newer */ else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) && (pkg_ver->minor > ICE_PKG_SUPP_VER_MNR)) return (1); /* newer */ else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) && (pkg_ver->minor == ICE_PKG_SUPP_VER_MNR)) return (0); /* compatible */ else return (-1); /* older */ } /** * ice_os_pkg_version_str - Format OS package version info into a sbuf * @hw: device hw structure * @buf: string buffer to store name/version string * * Formats the name and version of the OS DDP package as found in the ice_ddp * module into a string. * * @remark This will almost always be the same as the active package, but * could be different in some cases. Use ice_active_pkg_version_str to get the * version of the active DDP package. */ static void ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf) { char name_buf[ICE_PKG_NAME_SIZE]; /* If the OS DDP package info is empty, use "None" */ if (pkg_ver_empty(&hw->pkg_ver, hw->pkg_name)) { sbuf_printf(buf, "None"); return; } /* * This should already be null-terminated, but since this is a raw * value from an external source, strlcpy() into a new buffer to * make sure. */ bzero(name_buf, sizeof(name_buf)); strlcpy(name_buf, (char *)hw->pkg_name, ICE_PKG_NAME_SIZE); sbuf_printf(buf, "%s version %u.%u.%u.%u", name_buf, hw->pkg_ver.major, hw->pkg_ver.minor, hw->pkg_ver.update, hw->pkg_ver.draft); } /** * ice_active_pkg_version_str - Format active package version info into a sbuf * @hw: device hw structure * @buf: string buffer to store name/version string * * Formats the name and version of the active DDP package info into a string * buffer for use. */ static void ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf) { char name_buf[ICE_PKG_NAME_SIZE]; /* If the active DDP package info is empty, use "None" */ if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) { sbuf_printf(buf, "None"); return; } /* * This should already be null-terminated, but since this is a raw * value from an external source, strlcpy() into a new buffer to * make sure. */ bzero(name_buf, sizeof(name_buf)); strlcpy(name_buf, (char *)hw->active_pkg_name, ICE_PKG_NAME_SIZE); sbuf_printf(buf, "%s version %u.%u.%u.%u", name_buf, hw->active_pkg_ver.major, hw->active_pkg_ver.minor, hw->active_pkg_ver.update, hw->active_pkg_ver.draft); if (hw->active_track_id != 0) sbuf_printf(buf, ", track id 0x%08x", hw->active_track_id); } /** * ice_nvm_version_str - Format the NVM version information into a sbuf * @hw: device hw structure * @buf: string buffer to store version string * * Formats the NVM information including firmware version, API version, NVM * version, the EETRACK id, and OEM specific version information into a string * buffer. */ static void ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf) { struct ice_nvm_info *nvm = &hw->flash.nvm; struct ice_orom_info *orom = &hw->flash.orom; struct ice_netlist_info *netlist = &hw->flash.netlist; /* Note that the netlist versions are stored in packed Binary Coded * Decimal format. The use of '%x' will correctly display these as * decimal numbers. This works because every 4 bits will be displayed * as a hexadecimal digit, and the BCD format will only use the values * 0-9. */ sbuf_printf(buf, "fw %u.%u.%u api %u.%u nvm %x.%02x etid %08x netlist %x.%x.%x-%x.%x.%x.%04x oem %u.%u.%u", hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch, hw->api_maj_ver, hw->api_min_ver, nvm->major, nvm->minor, nvm->eetrack, netlist->major, netlist->minor, netlist->type >> 16, netlist->type & 0xFFFF, netlist->rev, netlist->cust_ver, netlist->hash, orom->major, orom->build, orom->patch); } /** * ice_print_nvm_version - Print the NVM info to the kernel message log * @sc: the device softc structure * * Format and print an NVM version string using ice_nvm_version_str(). */ void ice_print_nvm_version(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; struct sbuf *sbuf; sbuf = sbuf_new_auto(); ice_nvm_version_str(hw, sbuf); sbuf_finish(sbuf); device_printf(dev, "%s\n", sbuf_data(sbuf)); sbuf_delete(sbuf); } /** * ice_update_port_oversize - Update port oversize stats * @sc: device private structure * @rx_errors: VSI error drops * * Add ERROR_CNT from GLV_REPC VSI register and rx_oversize stats counter */ static void ice_update_port_oversize(struct ice_softc *sc, u64 rx_errors) { struct ice_hw_port_stats *cur_ps; cur_ps = &sc->stats.cur; sc->soft_stats.rx_roc_error = rx_errors + cur_ps->rx_oversize; } /** * ice_update_vsi_hw_stats - Update VSI-specific ethernet statistics counters * @vsi: the VSI to be updated * * Reads hardware stats and updates the ice_vsi_hw_stats tracking structure with * the updated values. */ void ice_update_vsi_hw_stats(struct ice_vsi *vsi) { struct ice_eth_stats *prev_es, *cur_es; struct ice_hw *hw = &vsi->sc->hw; u16 vsi_num; if (!ice_is_vsi_valid(hw, vsi->idx)) return; vsi_num = ice_get_hw_vsi_num(hw, vsi->idx); /* HW absolute index of a VSI */ prev_es = &vsi->hw_stats.prev; cur_es = &vsi->hw_stats.cur; #define ICE_VSI_STAT40(name, location) \ ice_stat_update40(hw, name ## L(vsi_num), \ vsi->hw_stats.offsets_loaded, \ &prev_es->location, &cur_es->location) #define ICE_VSI_STAT32(name, location) \ ice_stat_update32(hw, name(vsi_num), \ vsi->hw_stats.offsets_loaded, \ &prev_es->location, &cur_es->location) ICE_VSI_STAT40(GLV_GORC, rx_bytes); ICE_VSI_STAT40(GLV_UPRC, rx_unicast); ICE_VSI_STAT40(GLV_MPRC, rx_multicast); ICE_VSI_STAT40(GLV_BPRC, rx_broadcast); ICE_VSI_STAT32(GLV_RDPC, rx_discards); ICE_VSI_STAT40(GLV_GOTC, tx_bytes); ICE_VSI_STAT40(GLV_UPTC, tx_unicast); ICE_VSI_STAT40(GLV_MPTC, tx_multicast); ICE_VSI_STAT40(GLV_BPTC, tx_broadcast); ICE_VSI_STAT32(GLV_TEPC, tx_errors); ice_stat_update_repc(hw, vsi->idx, vsi->hw_stats.offsets_loaded, cur_es); ice_update_port_oversize(vsi->sc, cur_es->rx_errors); #undef ICE_VSI_STAT40 #undef ICE_VSI_STAT32 vsi->hw_stats.offsets_loaded = true; } /** * ice_reset_vsi_stats - Reset VSI statistics counters * @vsi: VSI structure * * Resets the software tracking counters for the VSI statistics, and indicate * that the offsets haven't been loaded. This is intended to be called * post-reset so that VSI statistics count from zero again. */ void ice_reset_vsi_stats(struct ice_vsi *vsi) { /* Reset HW stats */ memset(&vsi->hw_stats.prev, 0, sizeof(vsi->hw_stats.prev)); memset(&vsi->hw_stats.cur, 0, sizeof(vsi->hw_stats.cur)); vsi->hw_stats.offsets_loaded = false; } /** * ice_update_pf_stats - Update port stats counters * @sc: device private softc structure * * Reads hardware statistics registers and updates the software tracking * structure with new values. */ void ice_update_pf_stats(struct ice_softc *sc) { struct ice_hw_port_stats *prev_ps, *cur_ps; struct ice_hw *hw = &sc->hw; u8 lport; MPASS(hw->port_info); prev_ps = &sc->stats.prev; cur_ps = &sc->stats.cur; lport = hw->port_info->lport; #define ICE_PF_STAT_PFC(name, location, index) \ ice_stat_update40(hw, name(lport, index), \ sc->stats.offsets_loaded, \ &prev_ps->location[index], &cur_ps->location[index]) #define ICE_PF_STAT40(name, location) \ ice_stat_update40(hw, name ## L(lport), \ sc->stats.offsets_loaded, \ &prev_ps->location, &cur_ps->location) #define ICE_PF_STAT32(name, location) \ ice_stat_update32(hw, name(lport), \ sc->stats.offsets_loaded, \ &prev_ps->location, &cur_ps->location) ICE_PF_STAT40(GLPRT_GORC, eth.rx_bytes); ICE_PF_STAT40(GLPRT_UPRC, eth.rx_unicast); ICE_PF_STAT40(GLPRT_MPRC, eth.rx_multicast); ICE_PF_STAT40(GLPRT_BPRC, eth.rx_broadcast); ICE_PF_STAT40(GLPRT_GOTC, eth.tx_bytes); ICE_PF_STAT40(GLPRT_UPTC, eth.tx_unicast); ICE_PF_STAT40(GLPRT_MPTC, eth.tx_multicast); ICE_PF_STAT40(GLPRT_BPTC, eth.tx_broadcast); /* This stat register doesn't have an lport */ ice_stat_update32(hw, PRTRPB_RDPC, sc->stats.offsets_loaded, &prev_ps->eth.rx_discards, &cur_ps->eth.rx_discards); ICE_PF_STAT32(GLPRT_TDOLD, tx_dropped_link_down); ICE_PF_STAT40(GLPRT_PRC64, rx_size_64); ICE_PF_STAT40(GLPRT_PRC127, rx_size_127); ICE_PF_STAT40(GLPRT_PRC255, rx_size_255); ICE_PF_STAT40(GLPRT_PRC511, rx_size_511); ICE_PF_STAT40(GLPRT_PRC1023, rx_size_1023); ICE_PF_STAT40(GLPRT_PRC1522, rx_size_1522); ICE_PF_STAT40(GLPRT_PRC9522, rx_size_big); ICE_PF_STAT40(GLPRT_PTC64, tx_size_64); ICE_PF_STAT40(GLPRT_PTC127, tx_size_127); ICE_PF_STAT40(GLPRT_PTC255, tx_size_255); ICE_PF_STAT40(GLPRT_PTC511, tx_size_511); ICE_PF_STAT40(GLPRT_PTC1023, tx_size_1023); ICE_PF_STAT40(GLPRT_PTC1522, tx_size_1522); ICE_PF_STAT40(GLPRT_PTC9522, tx_size_big); /* Update Priority Flow Control Stats */ for (int i = 0; i <= GLPRT_PXOFFRXC_MAX_INDEX; i++) { ICE_PF_STAT_PFC(GLPRT_PXONRXC, priority_xon_rx, i); ICE_PF_STAT_PFC(GLPRT_PXOFFRXC, priority_xoff_rx, i); ICE_PF_STAT_PFC(GLPRT_PXONTXC, priority_xon_tx, i); ICE_PF_STAT_PFC(GLPRT_PXOFFTXC, priority_xoff_tx, i); ICE_PF_STAT_PFC(GLPRT_RXON2OFFCNT, priority_xon_2_xoff, i); } ICE_PF_STAT32(GLPRT_LXONRXC, link_xon_rx); ICE_PF_STAT32(GLPRT_LXOFFRXC, link_xoff_rx); ICE_PF_STAT32(GLPRT_LXONTXC, link_xon_tx); ICE_PF_STAT32(GLPRT_LXOFFTXC, link_xoff_tx); ICE_PF_STAT32(GLPRT_CRCERRS, crc_errors); ICE_PF_STAT32(GLPRT_ILLERRC, illegal_bytes); ICE_PF_STAT32(GLPRT_MLFC, mac_local_faults); ICE_PF_STAT32(GLPRT_MRFC, mac_remote_faults); ICE_PF_STAT32(GLPRT_RLEC, rx_len_errors); ICE_PF_STAT32(GLPRT_RUC, rx_undersize); ICE_PF_STAT32(GLPRT_RFC, rx_fragments); ICE_PF_STAT32(GLPRT_ROC, rx_oversize); ICE_PF_STAT32(GLPRT_RJC, rx_jabber); #undef ICE_PF_STAT40 #undef ICE_PF_STAT32 #undef ICE_PF_STAT_PFC sc->stats.offsets_loaded = true; } /** * ice_reset_pf_stats - Reset port stats counters * @sc: Device private softc structure * * Reset software tracking values for statistics to zero, and indicate that * offsets haven't been loaded. Intended to be called after a device reset so * that statistics count from zero again. */ void ice_reset_pf_stats(struct ice_softc *sc) { memset(&sc->stats.prev, 0, sizeof(sc->stats.prev)); memset(&sc->stats.cur, 0, sizeof(sc->stats.cur)); sc->stats.offsets_loaded = false; } /** * ice_sysctl_show_fw - sysctl callback to show firmware information * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for the fw_version sysctl, to display the current firmware * information found at hardware init time. */ static int ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct sbuf *sbuf; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); ice_nvm_version_str(hw, sbuf); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_pba_number - sysctl callback to show PBA number * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for the pba_number sysctl, used to read the Product Board Assembly * number for this device. */ static int ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; u8 pba_string[32] = ""; int status; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); status = ice_read_pba_string(hw, pba_string, sizeof(pba_string)); if (status) { device_printf(dev, "%s: failed to read PBA string from NVM; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } return sysctl_handle_string(oidp, pba_string, sizeof(pba_string), req); } /** * ice_sysctl_pkg_version - sysctl to show the active package version info * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for the pkg_version sysctl, to display the active DDP package name * and version information. */ static int ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct sbuf *sbuf; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); ice_active_pkg_version_str(hw, sbuf); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_os_pkg_version - sysctl to show the OS package version info * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for the pkg_version sysctl, to display the OS DDP package name and * version info found in the ice_ddp module. */ static int ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct sbuf *sbuf; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); ice_os_pkg_version_str(hw, sbuf); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_current_speed - sysctl callback to show current link speed * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for the current_speed sysctl, to display the string representing * the current link speed. */ static int ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct sbuf *sbuf; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); sbuf = sbuf_new_for_sysctl(NULL, NULL, 10, req); sbuf_printf(sbuf, "%s", ice_aq_speed_to_str(hw->port_info)); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * @var phy_link_speeds * @brief PHY link speed conversion array * * Array of link speeds to convert ICE_PHY_TYPE_LOW and ICE_PHY_TYPE_HIGH into * link speeds used by the link speed sysctls. * * @remark these are based on the indices used in the BIT() macros for the * ICE_PHY_TYPE_LOW_* and ICE_PHY_TYPE_HIGH_* definitions. */ static const uint16_t phy_link_speeds[] = { ICE_AQ_LINK_SPEED_100MB, ICE_AQ_LINK_SPEED_100MB, ICE_AQ_LINK_SPEED_1000MB, ICE_AQ_LINK_SPEED_1000MB, ICE_AQ_LINK_SPEED_1000MB, ICE_AQ_LINK_SPEED_1000MB, ICE_AQ_LINK_SPEED_1000MB, ICE_AQ_LINK_SPEED_2500MB, ICE_AQ_LINK_SPEED_2500MB, ICE_AQ_LINK_SPEED_2500MB, ICE_AQ_LINK_SPEED_5GB, ICE_AQ_LINK_SPEED_5GB, ICE_AQ_LINK_SPEED_10GB, ICE_AQ_LINK_SPEED_10GB, ICE_AQ_LINK_SPEED_10GB, ICE_AQ_LINK_SPEED_10GB, ICE_AQ_LINK_SPEED_10GB, ICE_AQ_LINK_SPEED_10GB, ICE_AQ_LINK_SPEED_10GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_25GB, ICE_AQ_LINK_SPEED_40GB, ICE_AQ_LINK_SPEED_40GB, ICE_AQ_LINK_SPEED_40GB, ICE_AQ_LINK_SPEED_40GB, ICE_AQ_LINK_SPEED_40GB, ICE_AQ_LINK_SPEED_40GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_50GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, /* These rates are for ICE_PHY_TYPE_HIGH_* */ ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_100GB, ICE_AQ_LINK_SPEED_200GB, ICE_AQ_LINK_SPEED_200GB, ICE_AQ_LINK_SPEED_200GB, ICE_AQ_LINK_SPEED_200GB, ICE_AQ_LINK_SPEED_200GB, ICE_AQ_LINK_SPEED_200GB, ICE_AQ_LINK_SPEED_200GB, ICE_AQ_LINK_SPEED_200GB, ICE_AQ_LINK_SPEED_200GB, ICE_AQ_LINK_SPEED_200GB, }; #define ICE_SYSCTL_HELP_ADVERTISE_SPEED \ "\nControl advertised link speed." \ "\nFlags:" \ "\n\t 0x0 - Auto" \ "\n\t 0x1 - 10 Mb" \ "\n\t 0x2 - 100 Mb" \ "\n\t 0x4 - 1G" \ "\n\t 0x8 - 2.5G" \ "\n\t 0x10 - 5G" \ "\n\t 0x20 - 10G" \ "\n\t 0x40 - 20G" \ "\n\t 0x80 - 25G" \ "\n\t 0x100 - 40G" \ "\n\t 0x200 - 50G" \ "\n\t 0x400 - 100G" \ "\n\t 0x800 - 200G" \ "\n\t0x8000 - Unknown" \ "\n\t" \ "\nUse \"sysctl -x\" to view flags properly." #define ICE_PHYS_100MB \ (ICE_PHY_TYPE_LOW_100BASE_TX | \ ICE_PHY_TYPE_LOW_100M_SGMII) #define ICE_PHYS_1000MB \ (ICE_PHY_TYPE_LOW_1000BASE_T | \ ICE_PHY_TYPE_LOW_1000BASE_SX | \ ICE_PHY_TYPE_LOW_1000BASE_LX | \ ICE_PHY_TYPE_LOW_1000BASE_KX | \ ICE_PHY_TYPE_LOW_1G_SGMII) #define ICE_PHYS_2500MB \ (ICE_PHY_TYPE_LOW_2500BASE_T | \ ICE_PHY_TYPE_LOW_2500BASE_X | \ ICE_PHY_TYPE_LOW_2500BASE_KX) #define ICE_PHYS_5GB \ (ICE_PHY_TYPE_LOW_5GBASE_T | \ ICE_PHY_TYPE_LOW_5GBASE_KR) #define ICE_PHYS_10GB \ (ICE_PHY_TYPE_LOW_10GBASE_T | \ ICE_PHY_TYPE_LOW_10G_SFI_DA | \ ICE_PHY_TYPE_LOW_10GBASE_SR | \ ICE_PHY_TYPE_LOW_10GBASE_LR | \ ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 | \ ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC | \ ICE_PHY_TYPE_LOW_10G_SFI_C2C) #define ICE_PHYS_25GB \ (ICE_PHY_TYPE_LOW_25GBASE_T | \ ICE_PHY_TYPE_LOW_25GBASE_CR | \ ICE_PHY_TYPE_LOW_25GBASE_CR_S | \ ICE_PHY_TYPE_LOW_25GBASE_CR1 | \ ICE_PHY_TYPE_LOW_25GBASE_SR | \ ICE_PHY_TYPE_LOW_25GBASE_LR | \ ICE_PHY_TYPE_LOW_25GBASE_KR | \ ICE_PHY_TYPE_LOW_25GBASE_KR_S | \ ICE_PHY_TYPE_LOW_25GBASE_KR1 | \ ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC | \ ICE_PHY_TYPE_LOW_25G_AUI_C2C) #define ICE_PHYS_40GB \ (ICE_PHY_TYPE_LOW_40GBASE_CR4 | \ ICE_PHY_TYPE_LOW_40GBASE_SR4 | \ ICE_PHY_TYPE_LOW_40GBASE_LR4 | \ ICE_PHY_TYPE_LOW_40GBASE_KR4 | \ ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC | \ ICE_PHY_TYPE_LOW_40G_XLAUI) #define ICE_PHYS_50GB \ (ICE_PHY_TYPE_LOW_50GBASE_CR2 | \ ICE_PHY_TYPE_LOW_50GBASE_SR2 | \ ICE_PHY_TYPE_LOW_50GBASE_LR2 | \ ICE_PHY_TYPE_LOW_50GBASE_KR2 | \ ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC | \ ICE_PHY_TYPE_LOW_50G_LAUI2 | \ ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC | \ ICE_PHY_TYPE_LOW_50G_AUI2 | \ ICE_PHY_TYPE_LOW_50GBASE_CP | \ ICE_PHY_TYPE_LOW_50GBASE_SR | \ ICE_PHY_TYPE_LOW_50GBASE_FR | \ ICE_PHY_TYPE_LOW_50GBASE_LR | \ ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4 | \ ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC | \ ICE_PHY_TYPE_LOW_50G_AUI1) #define ICE_PHYS_100GB_LOW \ (ICE_PHY_TYPE_LOW_100GBASE_CR4 | \ ICE_PHY_TYPE_LOW_100GBASE_SR4 | \ ICE_PHY_TYPE_LOW_100GBASE_LR4 | \ ICE_PHY_TYPE_LOW_100GBASE_KR4 | \ ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC | \ ICE_PHY_TYPE_LOW_100G_CAUI4 | \ ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC | \ ICE_PHY_TYPE_LOW_100G_AUI4 | \ ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 | \ ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 | \ ICE_PHY_TYPE_LOW_100GBASE_CP2 | \ ICE_PHY_TYPE_LOW_100GBASE_SR2 | \ ICE_PHY_TYPE_LOW_100GBASE_DR) #define ICE_PHYS_100GB_HIGH \ (ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4 | \ ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC | \ ICE_PHY_TYPE_HIGH_100G_CAUI2 | \ ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC | \ ICE_PHY_TYPE_HIGH_100G_AUI2) #define ICE_PHYS_200GB \ (ICE_PHY_TYPE_HIGH_200G_CR4_PAM4 | \ ICE_PHY_TYPE_HIGH_200G_SR4 | \ ICE_PHY_TYPE_HIGH_200G_FR4 | \ ICE_PHY_TYPE_HIGH_200G_LR4 | \ ICE_PHY_TYPE_HIGH_200G_DR4 | \ ICE_PHY_TYPE_HIGH_200G_KR4_PAM4 | \ ICE_PHY_TYPE_HIGH_200G_AUI4_AOC_ACC | \ ICE_PHY_TYPE_HIGH_200G_AUI4 | \ ICE_PHY_TYPE_HIGH_200G_AUI8_AOC_ACC | \ ICE_PHY_TYPE_HIGH_200G_AUI8) /** * ice_aq_phy_types_to_link_speeds - Convert the PHY Types to speeds * @phy_type_low: lower 64-bit PHY Type bitmask * @phy_type_high: upper 64-bit PHY Type bitmask * * Convert the PHY Type fields from Get PHY Abilities and Set PHY Config into * link speed flags. If phy_type_high has an unknown PHY type, then the return * value will include the "ICE_AQ_LINK_SPEED_UNKNOWN" flag as well. */ static u16 ice_aq_phy_types_to_link_speeds(u64 phy_type_low, u64 phy_type_high) { u16 sysctl_speeds = 0; int bit; /* coverity[address_of] */ for_each_set_bit(bit, &phy_type_low, 64) sysctl_speeds |= phy_link_speeds[bit]; /* coverity[address_of] */ for_each_set_bit(bit, &phy_type_high, 64) { if ((bit + 64) < (int)ARRAY_SIZE(phy_link_speeds)) sysctl_speeds |= phy_link_speeds[bit + 64]; else sysctl_speeds |= ICE_AQ_LINK_SPEED_UNKNOWN; } return (sysctl_speeds); } /** * ice_sysctl_speeds_to_aq_phy_types - Convert sysctl speed flags to AQ PHY flags * @sysctl_speeds: 16-bit sysctl speeds or AQ_LINK_SPEED flags * @phy_type_low: output parameter for lower AQ PHY flags * @phy_type_high: output parameter for higher AQ PHY flags * * Converts the given link speed flags into AQ PHY type flag sets appropriate * for use in a Set PHY Config command. */ static void ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low, u64 *phy_type_high) { *phy_type_low = 0, *phy_type_high = 0; if (sysctl_speeds & ICE_AQ_LINK_SPEED_100MB) *phy_type_low |= ICE_PHYS_100MB; if (sysctl_speeds & ICE_AQ_LINK_SPEED_1000MB) *phy_type_low |= ICE_PHYS_1000MB; if (sysctl_speeds & ICE_AQ_LINK_SPEED_2500MB) *phy_type_low |= ICE_PHYS_2500MB; if (sysctl_speeds & ICE_AQ_LINK_SPEED_5GB) *phy_type_low |= ICE_PHYS_5GB; if (sysctl_speeds & ICE_AQ_LINK_SPEED_10GB) *phy_type_low |= ICE_PHYS_10GB; if (sysctl_speeds & ICE_AQ_LINK_SPEED_25GB) *phy_type_low |= ICE_PHYS_25GB; if (sysctl_speeds & ICE_AQ_LINK_SPEED_40GB) *phy_type_low |= ICE_PHYS_40GB; if (sysctl_speeds & ICE_AQ_LINK_SPEED_50GB) *phy_type_low |= ICE_PHYS_50GB; if (sysctl_speeds & ICE_AQ_LINK_SPEED_100GB) { *phy_type_low |= ICE_PHYS_100GB_LOW; *phy_type_high |= ICE_PHYS_100GB_HIGH; } if (sysctl_speeds & ICE_AQ_LINK_SPEED_200GB) *phy_type_high |= ICE_PHYS_200GB; } /** * @struct ice_phy_data * @brief PHY caps and link speeds * * Buffer providing report mode and user speeds; * returning intersection of PHY types and speeds. */ struct ice_phy_data { u64 phy_low_orig; /* PHY low quad from report */ u64 phy_high_orig; /* PHY high quad from report */ u64 phy_low_intr; /* PHY low quad intersection with user speeds */ u64 phy_high_intr; /* PHY high quad intersection with user speeds */ u16 user_speeds_orig; /* Input from caller - See ICE_AQ_LINK_SPEED_* */ u16 user_speeds_intr; /* Intersect with report speeds */ u8 report_mode; /* See ICE_AQC_REPORT_* */ }; /** * ice_intersect_phy_types_and_speeds - Return intersection of link speeds * @sc: device private structure * @phy_data: device PHY data * * On read: Displays the currently supported speeds * On write: Sets the device's supported speeds * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED */ static int ice_intersect_phy_types_and_speeds(struct ice_softc *sc, struct ice_phy_data *phy_data) { struct ice_aqc_get_phy_caps_data pcaps = { 0 }; const char *report_types[5] = { "w/o MEDIA", "w/MEDIA", "ACTIVE", "EDOOFUS", /* Not used */ "DFLT" }; struct ice_hw *hw = &sc->hw; struct ice_port_info *pi = hw->port_info; int status; u16 report_speeds, temp_speeds; u8 report_type; bool apply_speed_filter = false; switch (phy_data->report_mode) { case ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA: case ICE_AQC_REPORT_TOPO_CAP_MEDIA: case ICE_AQC_REPORT_ACTIVE_CFG: case ICE_AQC_REPORT_DFLT_CFG: report_type = phy_data->report_mode >> 1; break; default: device_printf(sc->dev, "%s: phy_data.report_mode \"%u\" doesn't exist\n", __func__, phy_data->report_mode); return (EINVAL); } /* 0 is treated as "Auto"; the driver will handle selecting the * correct speeds. Including, in some cases, applying an override * if provided. */ if (phy_data->user_speeds_orig == 0) phy_data->user_speeds_orig = USHRT_MAX; else if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) apply_speed_filter = true; status = ice_aq_get_phy_caps(pi, false, phy_data->report_mode, &pcaps, NULL); if (status) { device_printf(sc->dev, "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n", __func__, report_types[report_type], ice_status_str(status), ice_aq_str(sc->hw.adminq.sq_last_status)); return (EIO); } phy_data->phy_low_orig = le64toh(pcaps.phy_type_low); phy_data->phy_high_orig = le64toh(pcaps.phy_type_high); report_speeds = ice_aq_phy_types_to_link_speeds(phy_data->phy_low_orig, phy_data->phy_high_orig); if (apply_speed_filter) { temp_speeds = ice_apply_supported_speed_filter(report_speeds, pcaps.module_type[0]); if ((phy_data->user_speeds_orig & temp_speeds) == 0) { device_printf(sc->dev, "User-specified speeds (\"0x%04X\") not supported\n", phy_data->user_speeds_orig); return (EINVAL); } report_speeds = temp_speeds; } ice_sysctl_speeds_to_aq_phy_types(phy_data->user_speeds_orig, &phy_data->phy_low_intr, &phy_data->phy_high_intr); phy_data->user_speeds_intr = phy_data->user_speeds_orig & report_speeds; phy_data->phy_low_intr &= phy_data->phy_low_orig; phy_data->phy_high_intr &= phy_data->phy_high_orig; return (0); } /** * ice_sysctl_advertise_speed - Display/change link speeds supported by port * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the currently supported speeds * On write: Sets the device's supported speeds * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED */ static int ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_port_info *pi = sc->hw.port_info; struct ice_phy_data phy_data = { 0 }; device_t dev = sc->dev; u16 sysctl_speeds; int ret; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Get the current speeds from the adapter's "active" configuration. */ phy_data.report_mode = ICE_AQC_REPORT_ACTIVE_CFG; ret = ice_intersect_phy_types_and_speeds(sc, &phy_data); if (ret) { /* Error message already printed within function */ return (ret); } sysctl_speeds = phy_data.user_speeds_intr; ret = sysctl_handle_16(oidp, &sysctl_speeds, 0, req); if ((ret) || (req->newptr == NULL)) return (ret); if (sysctl_speeds > ICE_SYSCTL_SPEEDS_VALID_RANGE) { device_printf(dev, "%s: \"%u\" is outside of the range of acceptable values.\n", __func__, sysctl_speeds); return (EINVAL); } pi->phy.curr_user_speed_req = sysctl_speeds; - if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && !sc->link_up) + if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && + !sc->link_up && !(if_getflags(sc->ifp) & IFF_UP)) return 0; /* Apply settings requested by user */ return ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS); } #define ICE_SYSCTL_HELP_FEC_CONFIG \ "\nDisplay or set the port's requested FEC mode." \ "\n\tauto - " ICE_FEC_STRING_AUTO \ "\n\tfc - " ICE_FEC_STRING_BASER \ "\n\trs - " ICE_FEC_STRING_RS \ "\n\tnone - " ICE_FEC_STRING_NONE \ "\nEither of the left or right strings above can be used to set the requested mode." /** * ice_sysctl_fec_config - Display/change the configured FEC mode * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the configured FEC mode * On write: Sets the device's FEC mode to the input string, if it's valid. * Valid input strings: see ICE_SYSCTL_HELP_FEC_CONFIG */ static int ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_port_info *pi = sc->hw.port_info; enum ice_fec_mode new_mode; device_t dev = sc->dev; char req_fec[32]; int ret; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); bzero(req_fec, sizeof(req_fec)); strlcpy(req_fec, ice_requested_fec_mode(pi), sizeof(req_fec)); ret = sysctl_handle_string(oidp, req_fec, sizeof(req_fec), req); if ((ret) || (req->newptr == NULL)) return (ret); if (strcmp(req_fec, "auto") == 0 || strcmp(req_fec, ice_fec_str(ICE_FEC_AUTO)) == 0) { if (sc->allow_no_fec_mod_in_auto) new_mode = ICE_FEC_DIS_AUTO; else new_mode = ICE_FEC_AUTO; } else if (strcmp(req_fec, "fc") == 0 || strcmp(req_fec, ice_fec_str(ICE_FEC_BASER)) == 0) { new_mode = ICE_FEC_BASER; } else if (strcmp(req_fec, "rs") == 0 || strcmp(req_fec, ice_fec_str(ICE_FEC_RS)) == 0) { new_mode = ICE_FEC_RS; } else if (strcmp(req_fec, "none") == 0 || strcmp(req_fec, ice_fec_str(ICE_FEC_NONE)) == 0) { new_mode = ICE_FEC_NONE; } else { device_printf(dev, "%s: \"%s\" is not a valid FEC mode\n", __func__, req_fec); return (EINVAL); } /* Cache user FEC mode for later link ups */ pi->phy.curr_user_fec_req = new_mode; if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && !sc->link_up) return 0; /* Apply settings requested by user */ return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FEC); } /** * ice_sysctl_negotiated_fec - Display the negotiated FEC mode on the link * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the negotiated FEC mode, in a string */ static int ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; char neg_fec[32]; int ret; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Copy const string into a buffer to drop const qualifier */ bzero(neg_fec, sizeof(neg_fec)); strlcpy(neg_fec, ice_negotiated_fec_mode(hw->port_info), sizeof(neg_fec)); ret = sysctl_handle_string(oidp, neg_fec, 0, req); if (req->newptr != NULL) return (EPERM); return (ret); } #define ICE_SYSCTL_HELP_FC_CONFIG \ "\nDisplay or set the port's advertised flow control mode.\n" \ "\t0 - " ICE_FC_STRING_NONE \ "\n\t1 - " ICE_FC_STRING_RX \ "\n\t2 - " ICE_FC_STRING_TX \ "\n\t3 - " ICE_FC_STRING_FULL \ "\nEither the numbers or the strings above can be used to set the advertised mode." /** * ice_sysctl_fc_config - Display/change the advertised flow control mode * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the configured flow control mode * On write: Sets the device's flow control mode to the input, if it's valid. * Valid input strings: see ICE_SYSCTL_HELP_FC_CONFIG */ static int ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_port_info *pi = sc->hw.port_info; struct ice_aqc_get_phy_caps_data pcaps = { 0 }; enum ice_fc_mode old_mode, new_mode; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; int ret, fc_num; bool mode_set = false; struct sbuf buf; char *fc_str_end; char fc_str[32]; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, &pcaps, NULL); if (status) { device_printf(dev, "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } /* Convert HW response format to SW enum value */ if ((pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) && (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)) old_mode = ICE_FC_FULL; else if (pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) old_mode = ICE_FC_TX_PAUSE; else if (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE) old_mode = ICE_FC_RX_PAUSE; else old_mode = ICE_FC_NONE; /* Create "old" string for output */ bzero(fc_str, sizeof(fc_str)); sbuf_new_for_sysctl(&buf, fc_str, sizeof(fc_str), req); sbuf_printf(&buf, "%d<%s>", old_mode, ice_fc_str(old_mode)); sbuf_finish(&buf); sbuf_delete(&buf); ret = sysctl_handle_string(oidp, fc_str, sizeof(fc_str), req); if ((ret) || (req->newptr == NULL)) return (ret); /* Try to parse input as a string, first */ if (strcasecmp(ice_fc_str(ICE_FC_FULL), fc_str) == 0) { new_mode = ICE_FC_FULL; mode_set = true; } else if (strcasecmp(ice_fc_str(ICE_FC_TX_PAUSE), fc_str) == 0) { new_mode = ICE_FC_TX_PAUSE; mode_set = true; } else if (strcasecmp(ice_fc_str(ICE_FC_RX_PAUSE), fc_str) == 0) { new_mode = ICE_FC_RX_PAUSE; mode_set = true; } else if (strcasecmp(ice_fc_str(ICE_FC_NONE), fc_str) == 0) { new_mode = ICE_FC_NONE; mode_set = true; } /* * Then check if it's an integer, for compatibility with the method * used in older drivers. */ if (!mode_set) { fc_num = strtol(fc_str, &fc_str_end, 0); if (fc_str_end == fc_str) fc_num = -1; switch (fc_num) { case 3: new_mode = ICE_FC_FULL; break; case 2: new_mode = ICE_FC_TX_PAUSE; break; case 1: new_mode = ICE_FC_RX_PAUSE; break; case 0: new_mode = ICE_FC_NONE; break; default: device_printf(dev, "%s: \"%s\" is not a valid flow control mode\n", __func__, fc_str); return (EINVAL); } } /* Save flow control mode from user */ pi->phy.curr_user_fc_req = new_mode; /* Turn off Priority Flow Control when Link Flow Control is enabled */ if ((hw->port_info->qos_cfg.is_sw_lldp) && (hw->port_info->qos_cfg.local_dcbx_cfg.pfc.pfcena != 0) && (new_mode != ICE_FC_NONE)) { ret = ice_config_pfc(sc, 0x0); if (ret) return (ret); } if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && !sc->link_up) return 0; /* Apply settings requested by user */ return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC); } /** * ice_sysctl_negotiated_fc - Display currently negotiated FC mode * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the currently negotiated flow control settings. * * If link is not established, this will report ICE_FC_NONE, as no flow * control is negotiated while link is down. */ static int ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_port_info *pi = sc->hw.port_info; const char *negotiated_fc; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); negotiated_fc = ice_flowcontrol_mode(pi); return sysctl_handle_string(oidp, __DECONST(char *, negotiated_fc), 0, req); } /** * __ice_sysctl_phy_type_handler - Display/change supported PHY types/speeds * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * @is_phy_type_high: if true, handle the high PHY type instead of the low PHY type * * Private handler for phy_type_high and phy_type_low sysctls. */ static int __ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS, bool is_phy_type_high) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_aqc_get_phy_caps_data pcaps = { 0 }; struct ice_aqc_set_phy_cfg_data cfg = { 0 }; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; uint64_t types; int ret; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); status = ice_aq_get_phy_caps(hw->port_info, false, ICE_AQC_REPORT_ACTIVE_CFG, &pcaps, NULL); if (status) { device_printf(dev, "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } if (is_phy_type_high) types = pcaps.phy_type_high; else types = pcaps.phy_type_low; ret = sysctl_handle_64(oidp, &types, sizeof(types), req); if ((ret) || (req->newptr == NULL)) return (ret); ice_copy_phy_caps_to_cfg(hw->port_info, &pcaps, &cfg); if (is_phy_type_high) cfg.phy_type_high = types & hw->port_info->phy.phy_type_high; else cfg.phy_type_low = types & hw->port_info->phy.phy_type_low; cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT; status = ice_aq_set_phy_cfg(hw, hw->port_info, &cfg, NULL); if (status) { device_printf(dev, "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } return (0); } /** * ice_sysctl_phy_type_low - Display/change supported lower PHY types/speeds * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the currently supported lower PHY types * On write: Sets the device's supported low PHY types */ static int ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS) { return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, false); } /** * ice_sysctl_phy_type_high - Display/change supported higher PHY types/speeds * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the currently supported higher PHY types * On write: Sets the device's supported high PHY types */ static int ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS) { return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, true); } /** * ice_sysctl_phy_caps - Display response from Get PHY abililties * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * @report_mode: the mode to report * * On read: Display the response from Get PHY abillities with the given report * mode. */ static int ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_aqc_get_phy_caps_data pcaps = { 0 }; struct ice_hw *hw = &sc->hw; struct ice_port_info *pi = hw->port_info; device_t dev = sc->dev; int status; int ret; UNREFERENCED_PARAMETER(arg2); ret = priv_check(curthread, PRIV_DRIVER); if (ret) return (ret); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); status = ice_aq_get_phy_caps(pi, true, report_mode, &pcaps, NULL); if (status) { device_printf(dev, "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } ret = sysctl_handle_opaque(oidp, &pcaps, sizeof(pcaps), req); if (req->newptr != NULL) return (EPERM); return (ret); } /** * ice_sysctl_phy_sw_caps - Display response from Get PHY abililties * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Display the response from Get PHY abillities reporting the last * software configuration. */ static int ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS) { return ice_sysctl_phy_caps(oidp, arg1, arg2, req, ICE_AQC_REPORT_ACTIVE_CFG); } /** * ice_sysctl_phy_nvm_caps - Display response from Get PHY abililties * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Display the response from Get PHY abillities reporting the NVM * configuration. */ static int ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS) { return ice_sysctl_phy_caps(oidp, arg1, arg2, req, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA); } /** * ice_sysctl_phy_topo_caps - Display response from Get PHY abililties * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Display the response from Get PHY abillities reporting the * topology configuration. */ static int ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS) { return ice_sysctl_phy_caps(oidp, arg1, arg2, req, ICE_AQC_REPORT_TOPO_CAP_MEDIA); } /** * ice_sysctl_phy_link_status - Display response from Get Link Status * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Display the response from firmware for the Get Link Status * request. */ static int ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS) { struct ice_aqc_get_link_status_data link_data = { 0 }; struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct ice_port_info *pi = hw->port_info; struct ice_aqc_get_link_status *resp; struct ice_aq_desc desc; device_t dev = sc->dev; int status; int ret; UNREFERENCED_PARAMETER(arg2); /* * Ensure that only contexts with driver privilege are allowed to * access this information */ ret = priv_check(curthread, PRIV_DRIVER); if (ret) return (ret); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status); resp = &desc.params.get_link_status; resp->lport_num = pi->lport; status = ice_aq_send_cmd(hw, &desc, &link_data, sizeof(link_data), NULL); if (status) { device_printf(dev, "%s: ice_aq_send_cmd failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } ret = sysctl_handle_opaque(oidp, &link_data, sizeof(link_data), req); if (req->newptr != NULL) return (EPERM); return (ret); } /** * ice_sysctl_fw_cur_lldp_persist_status - Display current FW LLDP status * @oidp: sysctl oid structure * @arg1: pointer to private softc structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays current persistent LLDP status. */ static int ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; struct sbuf *sbuf; u32 lldp_state; UNREFERENCED_PARAMETER(arg2); UNREFERENCED_PARAMETER(oidp); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); status = ice_get_cur_lldp_persist_status(hw, &lldp_state); if (status) { device_printf(dev, "Could not acquire current LLDP persistence status, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state)); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_fw_dflt_lldp_persist_status - Display default FW LLDP status * @oidp: sysctl oid structure * @arg1: pointer to private softc structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays default persistent LLDP status. */ static int ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; struct sbuf *sbuf; u32 lldp_state; UNREFERENCED_PARAMETER(arg2); UNREFERENCED_PARAMETER(oidp); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); status = ice_get_dflt_lldp_persist_status(hw, &lldp_state); if (status) { device_printf(dev, "Could not acquire default LLDP persistence status, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state)); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_dscp_is_mapped - Check for non-zero DSCP to TC mappings * @dcbcfg: Configuration struct to check for mappings in * * @return true if there exists a non-zero DSCP to TC mapping * inside the input DCB configuration struct. */ static bool ice_dscp_is_mapped(struct ice_dcbx_cfg *dcbcfg) { for (int i = 0; i < ICE_DSCP_NUM_VAL; i++) if (dcbcfg->dscp_map[i] != 0) return (true); return (false); } #define ICE_SYSCTL_HELP_FW_LLDP_AGENT \ "\nDisplay or change FW LLDP agent state:" \ "\n\t0 - disabled" \ "\n\t1 - enabled" /** * ice_sysctl_fw_lldp_agent - Display or change the FW LLDP agent status * @oidp: sysctl oid structure * @arg1: pointer to private softc structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays whether the FW LLDP agent is running * On write: Persistently enables or disables the FW LLDP agent */ static int ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_dcbx_cfg *local_dcbx_cfg; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; int ret; u32 old_state; u8 fw_lldp_enabled; bool retried_start_lldp = false; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); status = ice_get_cur_lldp_persist_status(hw, &old_state); if (status) { device_printf(dev, "Could not acquire current LLDP persistence status, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } if (old_state > ICE_LLDP_ADMINSTATUS_ENA_RXTX) { status = ice_get_dflt_lldp_persist_status(hw, &old_state); if (status) { device_printf(dev, "Could not acquire default LLDP persistence status, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } if (old_state == 0) fw_lldp_enabled = false; else fw_lldp_enabled = true; ret = sysctl_handle_bool(oidp, &fw_lldp_enabled, 0, req); if ((ret) || (req->newptr == NULL)) return (ret); if (old_state == 0 && fw_lldp_enabled == false) return (0); if (old_state != 0 && fw_lldp_enabled == true) return (0); /* Block transition to FW LLDP if DSCP mode is enabled */ local_dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg; if ((local_dcbx_cfg->pfc_mode == ICE_QOS_MODE_DSCP) || ice_dscp_is_mapped(local_dcbx_cfg)) { device_printf(dev, "Cannot enable FW-LLDP agent while DSCP QoS is active.\n"); return (EOPNOTSUPP); } if (fw_lldp_enabled == false) { status = ice_aq_stop_lldp(hw, true, true, NULL); /* EPERM is returned if the LLDP agent is already shutdown */ if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM) { device_printf(dev, "%s: ice_aq_stop_lldp failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } ice_aq_set_dcb_parameters(hw, true, NULL); hw->port_info->qos_cfg.is_sw_lldp = true; ice_add_rx_lldp_filter(sc); } else { ice_del_rx_lldp_filter(sc); retry_start_lldp: status = ice_aq_start_lldp(hw, true, NULL); if (status) { switch (hw->adminq.sq_last_status) { /* EEXIST is returned if the LLDP agent is already started */ case ICE_AQ_RC_EEXIST: break; case ICE_AQ_RC_EAGAIN: /* Retry command after a 2 second wait */ if (retried_start_lldp == false) { retried_start_lldp = true; pause("slldp", ICE_START_LLDP_RETRY_WAIT); goto retry_start_lldp; } /* Fallthrough */ default: device_printf(dev, "%s: ice_aq_start_lldp failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } ice_start_dcbx_agent(sc); /* Init DCB needs to be done during enabling LLDP to properly * propagate the configuration. */ status = ice_init_dcb(hw, true); if (status) { device_printf(dev, "%s: ice_init_dcb failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); hw->port_info->qos_cfg.dcbx_status = ICE_DCBX_STATUS_NOT_STARTED; } } return (ret); } #define ICE_SYSCTL_HELP_ETS_MIN_RATE \ "\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS bandwidth table." \ "\nIn SW DCB mode, displays and allows setting the table." \ "\nInput must be in the format e.g. 30,10,10,10,10,10,10,10" \ "\nWhere the bandwidth total must add up to 100" /** * ice_sysctl_ets_min_rate - Report/configure ETS bandwidth * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Returns the current ETS TC bandwidth table * cached by the driver. * * In SW DCB mode this sysctl also accepts a value that will * be sent to the firmware for configuration. */ static int ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_dcbx_cfg *local_dcbx_cfg; struct ice_port_info *pi; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; struct sbuf *sbuf; int ret; /* Store input rates from user */ char ets_user_buf[128] = ""; u8 new_ets_table[ICE_MAX_TRAFFIC_CLASS] = {}; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); if (req->oldptr == NULL && req->newptr == NULL) { ret = SYSCTL_OUT(req, 0, 128); return (ret); } pi = hw->port_info; local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; sbuf = sbuf_new(NULL, ets_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL); /* Format ETS BW data for output */ for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.tcbwtable[i]); if (i != ICE_MAX_TRAFFIC_CLASS - 1) sbuf_printf(sbuf, ","); } sbuf_finish(sbuf); sbuf_delete(sbuf); /* Read in the new ETS values */ ret = sysctl_handle_string(oidp, ets_user_buf, sizeof(ets_user_buf), req); if ((ret) || (req->newptr == NULL)) return (ret); /* Don't allow setting changes in FW DCB mode */ if (!hw->port_info->qos_cfg.is_sw_lldp) return (EPERM); ret = ice_ets_str_to_tbl(ets_user_buf, new_ets_table, 100); if (ret) { device_printf(dev, "%s: Could not parse input BW table: %s\n", __func__, ets_user_buf); return (ret); } if (!ice_check_ets_bw(new_ets_table)) { device_printf(dev, "%s: Bandwidth sum does not equal 100: %s\n", __func__, ets_user_buf); return (EINVAL); } memcpy(local_dcbx_cfg->etscfg.tcbwtable, new_ets_table, sizeof(new_ets_table)); /* If BW > 0, then set TSA entry to 2 */ for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { if (new_ets_table[i] > 0) local_dcbx_cfg->etscfg.tsatable[i] = 2; else local_dcbx_cfg->etscfg.tsatable[i] = 0; } local_dcbx_cfg->etscfg.willing = 0; local_dcbx_cfg->etsrec = local_dcbx_cfg->etscfg; local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING; status = ice_set_dcb_cfg(pi); if (status) { device_printf(dev, "%s: Failed to set DCB config; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } ice_do_dcb_reconfig(sc, false); return (0); } #define ICE_SYSCTL_HELP_UP2TC_MAP \ "\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS priority assignment table." \ "\nIn SW DCB mode, displays and allows setting the table." \ "\nInput must be in this format: 0,1,2,3,4,5,6,7" \ "\nWhere the 1st number is the TC for UP0, 2nd number is the TC for UP1, etc" /** * ice_sysctl_up2tc_map - Report or configure UP2TC mapping * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * In FW DCB mode, returns the current ETS prio table / * UP2TC mapping from the local MIB. * * In SW DCB mode this sysctl also accepts a value that will * be sent to the firmware for configuration. */ static int ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_dcbx_cfg *local_dcbx_cfg; struct ice_port_info *pi; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; struct sbuf *sbuf; int ret; /* Store input rates from user */ char up2tc_user_buf[128] = ""; /* This array is indexed by UP, not TC */ u8 new_up2tc[ICE_MAX_TRAFFIC_CLASS] = {}; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); if (req->oldptr == NULL && req->newptr == NULL) { ret = SYSCTL_OUT(req, 0, 128); return (ret); } pi = hw->port_info; local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; sbuf = sbuf_new(NULL, up2tc_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL); /* Format ETS Priority Mapping Table for output */ for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.prio_table[i]); if (i != ICE_MAX_TRAFFIC_CLASS - 1) sbuf_printf(sbuf, ","); } sbuf_finish(sbuf); sbuf_delete(sbuf); /* Read in the new ETS priority mapping */ ret = sysctl_handle_string(oidp, up2tc_user_buf, sizeof(up2tc_user_buf), req); if ((ret) || (req->newptr == NULL)) return (ret); /* Don't allow setting changes in FW DCB mode */ if (!hw->port_info->qos_cfg.is_sw_lldp) return (EPERM); ret = ice_ets_str_to_tbl(up2tc_user_buf, new_up2tc, ICE_MAX_TRAFFIC_CLASS - 1); if (ret) { device_printf(dev, "%s: Could not parse input priority assignment table: %s\n", __func__, up2tc_user_buf); return (ret); } /* Prepare updated ETS CFG/REC TLVs */ memcpy(local_dcbx_cfg->etscfg.prio_table, new_up2tc, sizeof(new_up2tc)); memcpy(local_dcbx_cfg->etsrec.prio_table, new_up2tc, sizeof(new_up2tc)); status = ice_set_dcb_cfg(pi); if (status) { device_printf(dev, "%s: Failed to set DCB config; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } ice_do_dcb_reconfig(sc, false); return (0); } /** * ice_config_pfc - helper function to set PFC config in FW * @sc: device private structure * @new_mode: bit flags indicating PFC status for TCs * * @pre must be in SW DCB mode * * Configures the driver's local PFC TLV and sends it to the * FW for configuration, then reconfigures the driver/VSI * for DCB if needed. */ static int ice_config_pfc(struct ice_softc *sc, u8 new_mode) { struct ice_dcbx_cfg *local_dcbx_cfg; struct ice_hw *hw = &sc->hw; struct ice_port_info *pi; device_t dev = sc->dev; int status; pi = hw->port_info; local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; /* Prepare updated PFC TLV */ local_dcbx_cfg->pfc.pfcena = new_mode; local_dcbx_cfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS; local_dcbx_cfg->pfc.willing = 0; local_dcbx_cfg->pfc.mbc = 0; /* Warn if PFC is being disabled with RoCE v2 in use */ if (new_mode == 0 && sc->rdma_entry.attached) device_printf(dev, "WARNING: Recommended that Priority Flow Control is enabled when RoCEv2 is in use\n"); status = ice_set_dcb_cfg(pi); if (status) { device_printf(dev, "%s: Failed to set DCB config; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } ice_do_dcb_reconfig(sc, false); return (0); } #define ICE_SYSCTL_HELP_PFC_CONFIG \ "\nIn FW DCB mode (fw_lldp_agent=1), displays the current Priority Flow Control configuration" \ "\nIn SW DCB mode, displays and allows setting the configuration" \ "\nInput/Output is in this format: 0xff" \ "\nWhere bit position # enables/disables PFC for that Traffic Class #" /** * ice_sysctl_pfc_config - Report or configure enabled PFC TCs * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * In FW DCB mode, returns a bitmap containing the current TCs * that have PFC enabled on them. * * In SW DCB mode this sysctl also accepts a value that will * be sent to the firmware for configuration. */ static int ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_dcbx_cfg *local_dcbx_cfg; struct ice_port_info *pi; struct ice_hw *hw = &sc->hw; int ret; /* Store input flags from user */ u8 user_pfc; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); if (req->oldptr == NULL && req->newptr == NULL) { ret = SYSCTL_OUT(req, 0, sizeof(u8)); return (ret); } pi = hw->port_info; local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; /* Format current PFC enable setting for output */ user_pfc = local_dcbx_cfg->pfc.pfcena; /* Read in the new PFC config */ ret = sysctl_handle_8(oidp, &user_pfc, 0, req); if ((ret) || (req->newptr == NULL)) return (ret); /* Don't allow setting changes in FW DCB mode */ if (!hw->port_info->qos_cfg.is_sw_lldp) return (EPERM); /* If LFC is active and PFC is going to be turned on, turn LFC off */ if (user_pfc != 0 && pi->phy.curr_user_fc_req != ICE_FC_NONE) { pi->phy.curr_user_fc_req = ICE_FC_NONE; if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) || sc->link_up) { ret = ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC); if (ret) return (ret); } } return ice_config_pfc(sc, user_pfc); } #define ICE_SYSCTL_HELP_PFC_MODE \ "\nDisplay and set the current QoS mode for the firmware" \ "\n\t0: VLAN UP mode" \ "\n\t1: DSCP mode" /** * ice_sysctl_pfc_mode * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Gets and sets whether the port is in DSCP or VLAN PCP-based * PFC mode. This is also used to set whether DSCP or VLAN PCP * -based settings are configured for DCB. */ static int ice_sysctl_pfc_mode(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_dcbx_cfg *local_dcbx_cfg; struct ice_port_info *pi; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; u8 user_pfc_mode, aq_pfc_mode; int ret; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); if (req->oldptr == NULL && req->newptr == NULL) { ret = SYSCTL_OUT(req, 0, sizeof(u8)); return (ret); } pi = hw->port_info; local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; user_pfc_mode = local_dcbx_cfg->pfc_mode; /* Read in the new mode */ ret = sysctl_handle_8(oidp, &user_pfc_mode, 0, req); if ((ret) || (req->newptr == NULL)) return (ret); /* Don't allow setting changes in FW DCB mode */ if (!hw->port_info->qos_cfg.is_sw_lldp) return (EPERM); /* Currently, there are only two modes */ switch (user_pfc_mode) { case 0: aq_pfc_mode = ICE_AQC_PFC_VLAN_BASED_PFC; break; case 1: aq_pfc_mode = ICE_AQC_PFC_DSCP_BASED_PFC; break; default: device_printf(dev, "%s: Valid input range is 0-1 (input %d)\n", __func__, user_pfc_mode); return (EINVAL); } status = ice_aq_set_pfc_mode(hw, aq_pfc_mode, NULL); if (status == ICE_ERR_NOT_SUPPORTED) { device_printf(dev, "%s: Failed to set PFC mode; DCB not supported\n", __func__); return (ENODEV); } if (status) { device_printf(dev, "%s: Failed to set PFC mode; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } /* Reset settings to default when mode is changed */ ice_set_default_local_mib_settings(sc); /* Cache current settings and reconfigure */ local_dcbx_cfg->pfc_mode = user_pfc_mode; ice_do_dcb_reconfig(sc, false); return (0); } #define ICE_SYSCTL_HELP_SET_LINK_ACTIVE \ "\nKeep link active after setting interface down:" \ "\n\t0 - disable" \ "\n\t1 - enable" /** * ice_sysctl_set_link_active * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Set the link_active_on_if_down sysctl flag. */ static int ice_sysctl_set_link_active(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; bool mode; int ret; UNREFERENCED_PARAMETER(arg2); mode = ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN); ret = sysctl_handle_bool(oidp, &mode, 0, req); if ((ret) || (req->newptr == NULL)) return (ret); if (mode) ice_set_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN); else ice_clear_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN); return (0); } /** * ice_sysctl_debug_set_link * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Set link up/down in debug session. */ static int ice_sysctl_debug_set_link(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; bool mode; int ret; UNREFERENCED_PARAMETER(arg2); ret = sysctl_handle_bool(oidp, &mode, 0, req); if ((ret) || (req->newptr == NULL)) return (ret); ice_set_link(sc, mode != 0); return (0); } /** * ice_add_device_sysctls - add device specific dynamic sysctls * @sc: device private structure * * Add per-device dynamic sysctls which show device configuration or enable * configuring device functionality. For tunable values which can be set prior * to load, see ice_add_device_tunables. * * This function depends on the sysctl layout setup by ice_add_device_tunables, * and likely should be called near the end of the attach process. */ void ice_add_device_sysctls(struct ice_softc *sc) { struct sysctl_oid *hw_node; device_t dev = sc->dev; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid_list *ctx_list = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_show_fw, "A", "Firmware version"); if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_HAS_PBA)) { SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "pba_number", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_pba_number, "A", "Product Board Assembly Number"); } if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_TEMP_SENSOR)) { SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "temp", CTLTYPE_S8 | CTLFLAG_RD, sc, 0, ice_sysctl_temperature, "CU", "Device temperature in degrees Celcius (C)"); } SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "ddp_version", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_pkg_version, "A", "Active DDP package name and version"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_current_speed, "A", "Current Port Link Speed"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "requested_fec", CTLTYPE_STRING | CTLFLAG_RW, sc, 0, ice_sysctl_fec_config, "A", ICE_SYSCTL_HELP_FEC_CONFIG); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "negotiated_fec", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_negotiated_fec, "A", "Current Negotiated FEC mode"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fc", CTLTYPE_STRING | CTLFLAG_RW, sc, 0, ice_sysctl_fc_config, "A", ICE_SYSCTL_HELP_FC_CONFIG); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "advertise_speed", CTLTYPE_U16 | CTLFLAG_RW, sc, 0, ice_sysctl_advertise_speed, "SU", ICE_SYSCTL_HELP_ADVERTISE_SPEED); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fw_lldp_agent", CTLTYPE_U8 | CTLFLAG_RWTUN, sc, 0, ice_sysctl_fw_lldp_agent, "CU", ICE_SYSCTL_HELP_FW_LLDP_AGENT); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "ets_min_rate", CTLTYPE_STRING | CTLFLAG_RW, sc, 0, ice_sysctl_ets_min_rate, "A", ICE_SYSCTL_HELP_ETS_MIN_RATE); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "up2tc_map", CTLTYPE_STRING | CTLFLAG_RW, sc, 0, ice_sysctl_up2tc_map, "A", ICE_SYSCTL_HELP_UP2TC_MAP); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "pfc", CTLTYPE_U8 | CTLFLAG_RW, sc, 0, ice_sysctl_pfc_config, "CU", ICE_SYSCTL_HELP_PFC_CONFIG); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "pfc_mode", CTLTYPE_U8 | CTLFLAG_RWTUN, sc, 0, ice_sysctl_pfc_mode, "CU", ICE_SYSCTL_HELP_PFC_MODE); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "allow_no_fec_modules_in_auto", CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, sc, 0, ice_sysctl_allow_no_fec_mod_in_auto, "CU", "Allow \"No FEC\" mode in FEC auto-negotiation"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "link_active_on_if_down", CTLTYPE_U8 | CTLFLAG_RWTUN, sc, 0, ice_sysctl_set_link_active, "CU", ICE_SYSCTL_HELP_SET_LINK_ACTIVE); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "create_mirror_interface", CTLTYPE_STRING | CTLFLAG_RW, sc, 0, ice_sysctl_create_mirror_interface, "A", ""); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "destroy_mirror_interface", CTLTYPE_STRING | CTLFLAG_RW, sc, 0, ice_sysctl_destroy_mirror_interface, "A", ""); ice_add_dscp2tc_map_sysctls(sc, ctx, ctx_list); /* Differentiate software and hardware statistics, by keeping hw stats * in their own node. This isn't in ice_add_device_tunables, because * we won't have any CTLFLAG_TUN sysctls under this node. */ hw_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "hw", CTLFLAG_RD, NULL, "Port Hardware Statistics"); ice_add_sysctls_mac_stats(ctx, hw_node, sc); /* Add the main PF VSI stats now. Other VSIs will add their own stats * during creation */ ice_add_vsi_sysctls(&sc->pf_vsi); /* Add sysctls related to debugging the device driver. This includes * sysctls which display additional internal driver state for use in * understanding what is happening within the driver. */ ice_add_debug_sysctls(sc); } /** * @enum hmc_error_type * @brief enumeration of HMC errors * * Enumeration defining the possible HMC errors that might occur. */ enum hmc_error_type { HMC_ERR_PMF_INVALID = 0, HMC_ERR_VF_IDX_INVALID = 1, HMC_ERR_VF_PARENT_PF_INVALID = 2, /* 3 is reserved */ HMC_ERR_INDEX_TOO_BIG = 4, HMC_ERR_ADDRESS_TOO_LARGE = 5, HMC_ERR_SEGMENT_DESC_INVALID = 6, HMC_ERR_SEGMENT_DESC_TOO_SMALL = 7, HMC_ERR_PAGE_DESC_INVALID = 8, HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION = 9, /* 10 is reserved */ HMC_ERR_INVALID_OBJECT_TYPE = 11, /* 12 is reserved */ }; /** * ice_log_hmc_error - Log an HMC error message * @hw: device hw structure * @dev: the device to pass to device_printf() * * Log a message when an HMC error interrupt is triggered. */ void ice_log_hmc_error(struct ice_hw *hw, device_t dev) { u32 info, data; u8 index, errtype, objtype; bool isvf; info = rd32(hw, PFHMC_ERRORINFO); data = rd32(hw, PFHMC_ERRORDATA); index = (u8)(info & PFHMC_ERRORINFO_PMF_INDEX_M); errtype = (u8)((info & PFHMC_ERRORINFO_HMC_ERROR_TYPE_M) >> PFHMC_ERRORINFO_HMC_ERROR_TYPE_S); objtype = (u8)((info & PFHMC_ERRORINFO_HMC_OBJECT_TYPE_M) >> PFHMC_ERRORINFO_HMC_OBJECT_TYPE_S); isvf = info & PFHMC_ERRORINFO_PMF_ISVF_M; device_printf(dev, "%s HMC Error detected on PMF index %d:\n", isvf ? "VF" : "PF", index); device_printf(dev, "error type %d, object type %d, data 0x%08x\n", errtype, objtype, data); switch (errtype) { case HMC_ERR_PMF_INVALID: device_printf(dev, "Private Memory Function is not valid\n"); break; case HMC_ERR_VF_IDX_INVALID: device_printf(dev, "Invalid Private Memory Function index for PE enabled VF\n"); break; case HMC_ERR_VF_PARENT_PF_INVALID: device_printf(dev, "Invalid parent PF for PE enabled VF\n"); break; case HMC_ERR_INDEX_TOO_BIG: device_printf(dev, "Object index too big\n"); break; case HMC_ERR_ADDRESS_TOO_LARGE: device_printf(dev, "Address extends beyond segment descriptor limit\n"); break; case HMC_ERR_SEGMENT_DESC_INVALID: device_printf(dev, "Segment descriptor is invalid\n"); break; case HMC_ERR_SEGMENT_DESC_TOO_SMALL: device_printf(dev, "Segment descriptor is too small\n"); break; case HMC_ERR_PAGE_DESC_INVALID: device_printf(dev, "Page descriptor is invalid\n"); break; case HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION: device_printf(dev, "Unsupported Request completion received from PCIe\n"); break; case HMC_ERR_INVALID_OBJECT_TYPE: device_printf(dev, "Invalid object type\n"); break; default: device_printf(dev, "Unknown HMC error\n"); } /* Clear the error indication */ wr32(hw, PFHMC_ERRORINFO, 0); } /** * @struct ice_sysctl_info * @brief sysctl information * * Structure used to simplify the process of defining the many similar * statistics sysctls. */ struct ice_sysctl_info { u64 *stat; const char *name; const char *description; }; /** * ice_add_sysctls_eth_stats - Add sysctls for ethernet statistics * @ctx: sysctl ctx to use * @parent: the parent node to add sysctls under * @stats: the ethernet stats structure to source values from * * Adds statistics sysctls for the ethernet statistics of the MAC or a VSI. * Will add them under the parent node specified. * * Note that tx_errors is only meaningful for VSIs and not the global MAC/PF * statistics, so it is not included here. Similarly, rx_discards has different * descriptions for VSIs and MAC/PF stats, so it is also not included here. */ void ice_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid *parent, struct ice_eth_stats *stats) { const struct ice_sysctl_info ctls[] = { /* Rx Stats */ { &stats->rx_bytes, "good_octets_rcvd", "Good Octets Received" }, { &stats->rx_unicast, "ucast_pkts_rcvd", "Unicast Packets Received" }, { &stats->rx_multicast, "mcast_pkts_rcvd", "Multicast Packets Received" }, { &stats->rx_broadcast, "bcast_pkts_rcvd", "Broadcast Packets Received" }, /* Tx Stats */ { &stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted" }, { &stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted" }, { &stats->tx_multicast, "mcast_pkts_txd", "Multicast Packets Transmitted" }, { &stats->tx_broadcast, "bcast_pkts_txd", "Broadcast Packets Transmitted" }, /* End */ { 0, 0, 0 } }; struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent); const struct ice_sysctl_info *entry = ctls; while (entry->stat != 0) { SYSCTL_ADD_U64(ctx, parent_list, OID_AUTO, entry->name, CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0, entry->description); entry++; } } /** * ice_sysctl_tx_cso_stat - Display Tx checksum offload statistic * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: Tx CSO stat to read * @req: sysctl request pointer * * On read: Sums the per-queue Tx CSO stat and displays it. */ static int ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS) { struct ice_vsi *vsi = (struct ice_vsi *)arg1; enum ice_tx_cso_stat type = (enum ice_tx_cso_stat)arg2; u64 stat = 0; int i; if (ice_driver_is_detaching(vsi->sc)) return (ESHUTDOWN); /* Check that the type is valid */ if (type >= ICE_CSO_STAT_TX_COUNT) return (EDOOFUS); /* Sum the stat for each of the Tx queues */ for (i = 0; i < vsi->num_tx_queues; i++) stat += vsi->tx_queues[i].stats.cso[type]; return sysctl_handle_64(oidp, NULL, stat, req); } /** * ice_sysctl_rx_cso_stat - Display Rx checksum offload statistic * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: Rx CSO stat to read * @req: sysctl request pointer * * On read: Sums the per-queue Rx CSO stat and displays it. */ static int ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS) { struct ice_vsi *vsi = (struct ice_vsi *)arg1; enum ice_rx_cso_stat type = (enum ice_rx_cso_stat)arg2; u64 stat = 0; int i; if (ice_driver_is_detaching(vsi->sc)) return (ESHUTDOWN); /* Check that the type is valid */ if (type >= ICE_CSO_STAT_RX_COUNT) return (EDOOFUS); /* Sum the stat for each of the Rx queues */ for (i = 0; i < vsi->num_rx_queues; i++) stat += vsi->rx_queues[i].stats.cso[type]; return sysctl_handle_64(oidp, NULL, stat, req); } /** * ice_sysctl_rx_errors_stat - Display aggregate of Rx errors * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Sums current values of Rx error statistics and * displays it. */ static int ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS) { struct ice_vsi *vsi = (struct ice_vsi *)arg1; struct ice_hw_port_stats *hs = &vsi->sc->stats.cur; u64 stat = 0; int i, type; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(vsi->sc)) return (ESHUTDOWN); stat += hs->rx_undersize; stat += hs->rx_fragments; stat += hs->rx_oversize; stat += hs->rx_jabber; stat += hs->crc_errors; stat += hs->illegal_bytes; /* Checksum error stats */ for (i = 0; i < vsi->num_rx_queues; i++) for (type = ICE_CSO_STAT_RX_IP4_ERR; type < ICE_CSO_STAT_RX_COUNT; type++) stat += vsi->rx_queues[i].stats.cso[type]; return sysctl_handle_64(oidp, NULL, stat, req); } /** * @struct ice_rx_cso_stat_info * @brief sysctl information for an Rx checksum offload statistic * * Structure used to simplify the process of defining the checksum offload * statistics. */ struct ice_rx_cso_stat_info { enum ice_rx_cso_stat type; const char *name; const char *description; }; /** * @struct ice_tx_cso_stat_info * @brief sysctl information for a Tx checksum offload statistic * * Structure used to simplify the process of defining the checksum offload * statistics. */ struct ice_tx_cso_stat_info { enum ice_tx_cso_stat type; const char *name; const char *description; }; /** * ice_add_sysctls_sw_stats - Add sysctls for software statistics * @vsi: pointer to the VSI to add sysctls for * @ctx: sysctl ctx to use * @parent: the parent node to add sysctls under * * Add statistics sysctls for software tracked statistics of a VSI. * * Currently this only adds checksum offload statistics, but more counters may * be added in the future. */ static void ice_add_sysctls_sw_stats(struct ice_vsi *vsi, struct sysctl_ctx_list *ctx, struct sysctl_oid *parent) { struct sysctl_oid *cso_node; struct sysctl_oid_list *cso_list; /* Tx CSO Stats */ const struct ice_tx_cso_stat_info tx_ctls[] = { { ICE_CSO_STAT_TX_TCP, "tx_tcp", "Transmit TCP Packets marked for HW checksum" }, { ICE_CSO_STAT_TX_UDP, "tx_udp", "Transmit UDP Packets marked for HW checksum" }, { ICE_CSO_STAT_TX_SCTP, "tx_sctp", "Transmit SCTP Packets marked for HW checksum" }, { ICE_CSO_STAT_TX_IP4, "tx_ip4", "Transmit IPv4 Packets marked for HW checksum" }, { ICE_CSO_STAT_TX_IP6, "tx_ip6", "Transmit IPv6 Packets marked for HW checksum" }, { ICE_CSO_STAT_TX_L3_ERR, "tx_l3_err", "Transmit packets that driver failed to set L3 HW CSO bits for" }, { ICE_CSO_STAT_TX_L4_ERR, "tx_l4_err", "Transmit packets that driver failed to set L4 HW CSO bits for" }, /* End */ { ICE_CSO_STAT_TX_COUNT, 0, 0 } }; /* Rx CSO Stats */ const struct ice_rx_cso_stat_info rx_ctls[] = { { ICE_CSO_STAT_RX_IP4_ERR, "rx_ip4_err", "Received packets with invalid IPv4 checksum indicated by HW" }, { ICE_CSO_STAT_RX_IP6_ERR, "rx_ip6_err", "Received IPv6 packets with extension headers" }, { ICE_CSO_STAT_RX_L3_ERR, "rx_l3_err", "Received packets with an unexpected invalid L3 checksum indicated by HW" }, { ICE_CSO_STAT_RX_TCP_ERR, "rx_tcp_err", "Received packets with invalid TCP checksum indicated by HW" }, { ICE_CSO_STAT_RX_UDP_ERR, "rx_udp_err", "Received packets with invalid UDP checksum indicated by HW" }, { ICE_CSO_STAT_RX_SCTP_ERR, "rx_sctp_err", "Received packets with invalid SCTP checksum indicated by HW" }, { ICE_CSO_STAT_RX_L4_ERR, "rx_l4_err", "Received packets with an unexpected invalid L4 checksum indicated by HW" }, /* End */ { ICE_CSO_STAT_RX_COUNT, 0, 0 } }; struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent); /* Add a node for statistics tracked by software. */ cso_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "cso", CTLFLAG_RD, NULL, "Checksum offload Statistics"); cso_list = SYSCTL_CHILDREN(cso_node); const struct ice_tx_cso_stat_info *tx_entry = tx_ctls; while (tx_entry->name && tx_entry->description) { SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, tx_entry->name, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, vsi, tx_entry->type, ice_sysctl_tx_cso_stat, "QU", tx_entry->description); tx_entry++; } const struct ice_rx_cso_stat_info *rx_entry = rx_ctls; while (rx_entry->name && rx_entry->description) { SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, rx_entry->name, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, vsi, rx_entry->type, ice_sysctl_rx_cso_stat, "QU", rx_entry->description); rx_entry++; } } /** * ice_add_vsi_sysctls - Add sysctls for a VSI * @vsi: pointer to VSI structure * * Add various sysctls for a given VSI. */ void ice_add_vsi_sysctls(struct ice_vsi *vsi) { struct sysctl_ctx_list *ctx = &vsi->ctx; struct sysctl_oid *hw_node, *sw_node; struct sysctl_oid_list *vsi_list, *hw_list; vsi_list = SYSCTL_CHILDREN(vsi->vsi_node); /* Keep hw stats in their own node. */ hw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "hw", CTLFLAG_RD, NULL, "VSI Hardware Statistics"); hw_list = SYSCTL_CHILDREN(hw_node); /* Add the ethernet statistics for this VSI */ ice_add_sysctls_eth_stats(ctx, hw_node, &vsi->hw_stats.cur); SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_discards", CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_discards, 0, "Discarded Rx Packets (see rx_errors or rx_no_desc)"); SYSCTL_ADD_PROC(ctx, hw_list, OID_AUTO, "rx_errors", CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, vsi, 0, ice_sysctl_rx_errors_stat, "QU", "Aggregate of all Rx errors"); SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_no_desc", CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_no_desc, 0, "Rx Packets Discarded Due To Lack Of Descriptors"); SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "tx_errors", CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.tx_errors, 0, "Tx Packets Discarded Due To Error"); /* Add a node for statistics tracked by software. */ sw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "sw", CTLFLAG_RD, NULL, "VSI Software Statistics"); ice_add_sysctls_sw_stats(vsi, ctx, sw_node); } /** * ice_add_sysctls_mac_pfc_one_stat - Add sysctl node for a PFC statistic * @ctx: sysctl ctx to use * @parent_list: parent sysctl list to add sysctls under * @pfc_stat_location: address of statistic for sysctl to display * @node_name: Name for statistic node * @descr: Description used for nodes added in this function * * A helper function for ice_add_sysctls_mac_pfc_stats that adds a node * for a stat and leaves for each traffic class for that stat. */ static void ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *parent_list, u64* pfc_stat_location, const char *node_name, const char *descr) { struct sysctl_oid_list *node_list; struct sysctl_oid *node; struct sbuf *namebuf, *descbuf; node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, node_name, CTLFLAG_RD, NULL, descr); node_list = SYSCTL_CHILDREN(node); namebuf = sbuf_new_auto(); descbuf = sbuf_new_auto(); for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { sbuf_clear(namebuf); sbuf_clear(descbuf); sbuf_printf(namebuf, "%d", i); sbuf_printf(descbuf, "%s for TC %d", descr, i); sbuf_finish(namebuf); sbuf_finish(descbuf); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, sbuf_data(namebuf), CTLFLAG_RD | CTLFLAG_STATS, &pfc_stat_location[i], 0, sbuf_data(descbuf)); } sbuf_delete(namebuf); sbuf_delete(descbuf); } /** * ice_add_sysctls_mac_pfc_stats - Add sysctls for MAC PFC statistics * @ctx: the sysctl ctx to use * @parent: parent node to add the sysctls under * @stats: the hw ports stat structure to pull values from * * Add global Priority Flow Control MAC statistics sysctls. These are * structured as a node with the PFC statistic, where there are eight * nodes for each traffic class. */ static void ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid *parent, struct ice_hw_port_stats *stats) { struct sysctl_oid_list *parent_list; parent_list = SYSCTL_CHILDREN(parent); ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_rx, "p_xon_recvd", "PFC XON received"); ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_rx, "p_xoff_recvd", "PFC XOFF received"); ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_tx, "p_xon_txd", "PFC XON transmitted"); ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_tx, "p_xoff_txd", "PFC XOFF transmitted"); ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_2_xoff, "p_xon2xoff", "PFC XON to XOFF transitions"); } /** * ice_add_sysctls_mac_stats - Add sysctls for global MAC statistics * @ctx: the sysctl ctx to use * @parent: parent node to add the sysctls under * @sc: device private structure * * Add global MAC statistics sysctls. */ void ice_add_sysctls_mac_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid *parent, struct ice_softc *sc) { struct sysctl_oid *mac_node; struct sysctl_oid_list *parent_list, *mac_list; struct ice_hw_port_stats *stats = &sc->stats.cur; parent_list = SYSCTL_CHILDREN(parent); mac_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "mac", CTLFLAG_RD, NULL, "Mac Hardware Statistics"); mac_list = SYSCTL_CHILDREN(mac_node); /* Add the ethernet statistics common to VSI and MAC */ ice_add_sysctls_eth_stats(ctx, mac_node, &stats->eth); /* Add PFC stats that add per-TC counters */ ice_add_sysctls_mac_pfc_stats(ctx, mac_node, stats); const struct ice_sysctl_info ctls[] = { /* Packet Reception Stats */ {&stats->rx_size_64, "rx_frames_64", "64 byte frames received"}, {&stats->rx_size_127, "rx_frames_65_127", "65-127 byte frames received"}, {&stats->rx_size_255, "rx_frames_128_255", "128-255 byte frames received"}, {&stats->rx_size_511, "rx_frames_256_511", "256-511 byte frames received"}, {&stats->rx_size_1023, "rx_frames_512_1023", "512-1023 byte frames received"}, {&stats->rx_size_1522, "rx_frames_1024_1522", "1024-1522 byte frames received"}, {&stats->rx_size_big, "rx_frames_big", "1523-9522 byte frames received"}, {&stats->rx_undersize, "rx_undersize", "Undersized packets received"}, {&stats->rx_fragments, "rx_fragmented", "Fragmented packets received"}, {&stats->rx_jabber, "rx_jabber", "Received Jabber"}, {&stats->eth.rx_discards, "rx_discards", "Discarded Rx Packets by Port (shortage of storage space)"}, /* Packet Transmission Stats */ {&stats->tx_size_64, "tx_frames_64", "64 byte frames transmitted"}, {&stats->tx_size_127, "tx_frames_65_127", "65-127 byte frames transmitted"}, {&stats->tx_size_255, "tx_frames_128_255", "128-255 byte frames transmitted"}, {&stats->tx_size_511, "tx_frames_256_511", "256-511 byte frames transmitted"}, {&stats->tx_size_1023, "tx_frames_512_1023", "512-1023 byte frames transmitted"}, {&stats->tx_size_1522, "tx_frames_1024_1522", "1024-1522 byte frames transmitted"}, {&stats->tx_size_big, "tx_frames_big", "1523-9522 byte frames transmitted"}, {&stats->tx_dropped_link_down, "tx_dropped", "Tx Dropped Due To Link Down"}, /* Flow control */ {&stats->link_xon_tx, "xon_txd", "Link XON transmitted"}, {&stats->link_xon_rx, "xon_recvd", "Link XON received"}, {&stats->link_xoff_tx, "xoff_txd", "Link XOFF transmitted"}, {&stats->link_xoff_rx, "xoff_recvd", "Link XOFF received"}, /* Other */ {&stats->crc_errors, "crc_errors", "CRC Errors"}, {&stats->illegal_bytes, "illegal_bytes", "Illegal Byte Errors"}, {&stats->mac_local_faults, "local_faults", "MAC Local Faults"}, {&stats->mac_remote_faults, "remote_faults", "MAC Remote Faults"}, /* End */ { 0, 0, 0 } }; const struct ice_sysctl_info *entry = ctls; while (entry->stat != 0) { SYSCTL_ADD_U64(ctx, mac_list, OID_AUTO, entry->name, CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0, entry->description); entry++; } /* Port oversize packet stats */ SYSCTL_ADD_U64(ctx, mac_list, OID_AUTO, "rx_oversized", CTLFLAG_RD | CTLFLAG_STATS, &sc->soft_stats.rx_roc_error, 0, "Oversized packets received"); } /** * ice_configure_misc_interrupts - enable 'other' interrupt causes * @sc: pointer to device private softc * * Enable various "other" interrupt causes, and associate them to interrupt 0, * which is our administrative interrupt. */ void ice_configure_misc_interrupts(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; u32 val; /* Read the OICR register to clear it */ rd32(hw, PFINT_OICR); /* Enable useful "other" interrupt causes */ val = (PFINT_OICR_ECC_ERR_M | PFINT_OICR_MAL_DETECT_M | PFINT_OICR_GRST_M | PFINT_OICR_PCI_EXCEPTION_M | PFINT_OICR_VFLR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_CRITERR_M); wr32(hw, PFINT_OICR_ENA, val); /* Note that since we're using MSI-X index 0, and ITR index 0, we do * not explicitly program them when writing to the PFINT_*_CTL * registers. Nevertheless, these writes are associating the * interrupts with the ITR 0 vector */ /* Associate the OICR interrupt with ITR 0, and enable it */ wr32(hw, PFINT_OICR_CTL, PFINT_OICR_CTL_CAUSE_ENA_M); /* Associate the Mailbox interrupt with ITR 0, and enable it */ wr32(hw, PFINT_MBX_CTL, PFINT_MBX_CTL_CAUSE_ENA_M); /* Associate the SB Queue interrupt with ITR 0, and enable it */ wr32(hw, PFINT_SB_CTL, PFINT_SB_CTL_CAUSE_ENA_M); /* Associate the AdminQ interrupt with ITR 0, and enable it */ wr32(hw, PFINT_FW_CTL, PFINT_FW_CTL_CAUSE_ENA_M); } /** * ice_filter_is_mcast - Check if info is a multicast filter * @vsi: vsi structure addresses are targeted towards * @info: filter info * * @returns true if the provided info is a multicast filter, and false * otherwise. */ static bool ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info) { const u8 *addr = info->l_data.mac.mac_addr; /* * Check if this info matches a multicast filter added by * ice_add_mac_to_list */ if ((info->flag == ICE_FLTR_TX) && (info->src_id == ICE_SRC_ID_VSI) && (info->lkup_type == ICE_SW_LKUP_MAC) && (info->vsi_handle == vsi->idx) && ETHER_IS_MULTICAST(addr) && !ETHER_IS_BROADCAST(addr)) return true; return false; } /** * @struct ice_mcast_sync_data * @brief data used by ice_sync_one_mcast_filter function * * Structure used to store data needed for processing by the * ice_sync_one_mcast_filter. This structure contains a linked list of filters * to be added, an error indication, and a pointer to the device softc. */ struct ice_mcast_sync_data { struct ice_list_head add_list; struct ice_softc *sc; int err; }; /** * ice_sync_one_mcast_filter - Check if we need to program the filter * @p: void pointer to algorithm data * @sdl: link level socket address * @count: unused count value * * Called by if_foreach_llmaddr to operate on each filter in the ifp filter * list. For the given address, search our internal list to see if we have * found the filter. If not, add it to our list of filters that need to be * programmed. * * @returns (1) if we've actually setup the filter to be added */ static u_int ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl, u_int __unused count) { struct ice_mcast_sync_data *data = (struct ice_mcast_sync_data *)p; struct ice_softc *sc = data->sc; struct ice_hw *hw = &sc->hw; struct ice_switch_info *sw = hw->switch_info; const u8 *sdl_addr = (const u8 *)LLADDR(sdl); struct ice_fltr_mgmt_list_entry *itr; struct ice_list_head *rules; int err; rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules; /* * If a previous filter already indicated an error, there is no need * for us to finish processing the rest of the filters. */ if (data->err) return (0); /* See if this filter has already been programmed */ LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) { struct ice_fltr_info *info = &itr->fltr_info; const u8 *addr = info->l_data.mac.mac_addr; /* Only check multicast filters */ if (!ice_filter_is_mcast(&sc->pf_vsi, info)) continue; /* * If this filter matches, mark the internal filter as * "found", and exit. */ if (bcmp(addr, sdl_addr, ETHER_ADDR_LEN) == 0) { itr->marker = ICE_FLTR_FOUND; return (1); } } /* * If we failed to locate the filter in our internal list, we need to * place it into our add list. */ err = ice_add_mac_to_list(&sc->pf_vsi, &data->add_list, sdl_addr, ICE_FWD_TO_VSI); if (err) { device_printf(sc->dev, "Failed to place MAC %6D onto add list, err %s\n", sdl_addr, ":", ice_err_str(err)); data->err = err; return (0); } return (1); } /** * ice_sync_multicast_filters - Synchronize OS and internal filter list * @sc: device private structure * * Called in response to SIOCDELMULTI to synchronize the operating system * multicast address list with the internal list of filters programmed to * firmware. * * Works in one phase to find added and deleted filters using a marker bit on * the internal list. * * First, a loop over the internal list clears the marker bit. Second, for * each filter in the ifp list is checked. If we find it in the internal list, * the marker bit is set. Otherwise, the filter is added to the add list. * Third, a loop over the internal list determines if any filters have not * been found. Each of these is added to the delete list. Finally, the add and * delete lists are programmed to firmware to update the filters. * * @returns zero on success or an integer error code on failure. */ int ice_sync_multicast_filters(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; struct ice_switch_info *sw = hw->switch_info; struct ice_fltr_mgmt_list_entry *itr; struct ice_mcast_sync_data data = {}; struct ice_list_head *rules, remove_list; int status; int err = 0; INIT_LIST_HEAD(&data.add_list); INIT_LIST_HEAD(&remove_list); data.sc = sc; data.err = 0; rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules; /* Acquire the lock for the entire duration */ ice_acquire_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock); /* (1) Reset the marker state for all filters */ LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) itr->marker = ICE_FLTR_NOT_FOUND; /* (2) determine which filters need to be added and removed */ if_foreach_llmaddr(sc->ifp, ice_sync_one_mcast_filter, (void *)&data); if (data.err) { /* ice_sync_one_mcast_filter already prints an error */ err = data.err; ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock); goto free_filter_lists; } LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) { struct ice_fltr_info *info = &itr->fltr_info; const u8 *addr = info->l_data.mac.mac_addr; /* Only check multicast filters */ if (!ice_filter_is_mcast(&sc->pf_vsi, info)) continue; /* * If the filter is not marked as found, then it must no * longer be in the ifp address list, so we need to remove it. */ if (itr->marker == ICE_FLTR_NOT_FOUND) { err = ice_add_mac_to_list(&sc->pf_vsi, &remove_list, addr, ICE_FWD_TO_VSI); if (err) { device_printf(sc->dev, "Failed to place MAC %6D onto remove list, err %s\n", addr, ":", ice_err_str(err)); ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock); goto free_filter_lists; } } } ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock); status = ice_add_mac(hw, &data.add_list); if (status) { device_printf(sc->dev, "Could not add new MAC filters, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = (EIO); goto free_filter_lists; } status = ice_remove_mac(hw, &remove_list); if (status) { device_printf(sc->dev, "Could not remove old MAC filters, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = (EIO); goto free_filter_lists; } free_filter_lists: ice_free_fltr_list(&data.add_list); ice_free_fltr_list(&remove_list); return (err); } /** * ice_add_vlan_hw_filters - Add multiple VLAN filters for a given VSI * @vsi: The VSI to add the filter for * @vid: array of VLAN ids to add * @length: length of vid array * * Programs HW filters so that the given VSI will receive the specified VLANs. */ int ice_add_vlan_hw_filters(struct ice_vsi *vsi, u16 *vid, u16 length) { struct ice_hw *hw = &vsi->sc->hw; struct ice_list_head vlan_list; struct ice_fltr_list_entry *vlan_entries; int status; MPASS(length > 0); INIT_LIST_HEAD(&vlan_list); vlan_entries = (struct ice_fltr_list_entry *) malloc(sizeof(*vlan_entries) * length, M_ICE, M_NOWAIT | M_ZERO); if (!vlan_entries) return (ICE_ERR_NO_MEMORY); for (u16 i = 0; i < length; i++) { vlan_entries[i].fltr_info.lkup_type = ICE_SW_LKUP_VLAN; vlan_entries[i].fltr_info.fltr_act = ICE_FWD_TO_VSI; vlan_entries[i].fltr_info.flag = ICE_FLTR_TX; vlan_entries[i].fltr_info.src_id = ICE_SRC_ID_VSI; vlan_entries[i].fltr_info.vsi_handle = vsi->idx; vlan_entries[i].fltr_info.l_data.vlan.vlan_id = vid[i]; LIST_ADD(&vlan_entries[i].list_entry, &vlan_list); } status = ice_add_vlan(hw, &vlan_list); if (!status) goto done; device_printf(vsi->sc->dev, "Failed to add VLAN filters:\n"); for (u16 i = 0; i < length; i++) { device_printf(vsi->sc->dev, "- vlan %d, status %d\n", vlan_entries[i].fltr_info.l_data.vlan.vlan_id, vlan_entries[i].status); } done: free(vlan_entries, M_ICE); return (status); } /** * ice_add_vlan_hw_filter - Add a VLAN filter for a given VSI * @vsi: The VSI to add the filter for * @vid: VLAN to add * * Programs a HW filter so that the given VSI will receive the specified VLAN. */ int ice_add_vlan_hw_filter(struct ice_vsi *vsi, u16 vid) { return ice_add_vlan_hw_filters(vsi, &vid, 1); } /** * ice_remove_vlan_hw_filters - Remove multiple VLAN filters for a given VSI * @vsi: The VSI to remove the filters from * @vid: array of VLAN ids to remove * @length: length of vid array * * Removes previously programmed HW filters for the specified VSI. */ int ice_remove_vlan_hw_filters(struct ice_vsi *vsi, u16 *vid, u16 length) { struct ice_hw *hw = &vsi->sc->hw; struct ice_list_head vlan_list; struct ice_fltr_list_entry *vlan_entries; int status; MPASS(length > 0); INIT_LIST_HEAD(&vlan_list); vlan_entries = (struct ice_fltr_list_entry *) malloc(sizeof(*vlan_entries) * length, M_ICE, M_NOWAIT | M_ZERO); if (!vlan_entries) return (ICE_ERR_NO_MEMORY); for (u16 i = 0; i < length; i++) { vlan_entries[i].fltr_info.lkup_type = ICE_SW_LKUP_VLAN; vlan_entries[i].fltr_info.fltr_act = ICE_FWD_TO_VSI; vlan_entries[i].fltr_info.flag = ICE_FLTR_TX; vlan_entries[i].fltr_info.src_id = ICE_SRC_ID_VSI; vlan_entries[i].fltr_info.vsi_handle = vsi->idx; vlan_entries[i].fltr_info.l_data.vlan.vlan_id = vid[i]; LIST_ADD(&vlan_entries[i].list_entry, &vlan_list); } status = ice_remove_vlan(hw, &vlan_list); if (!status) goto done; device_printf(vsi->sc->dev, "Failed to remove VLAN filters:\n"); for (u16 i = 0; i < length; i++) { device_printf(vsi->sc->dev, "- vlan %d, status %d\n", vlan_entries[i].fltr_info.l_data.vlan.vlan_id, vlan_entries[i].status); } done: free(vlan_entries, M_ICE); return (status); } /** * ice_remove_vlan_hw_filter - Remove a VLAN filter for a given VSI * @vsi: The VSI to remove the filter from * @vid: VLAN to remove * * Removes a previously programmed HW filter for the specified VSI. */ int ice_remove_vlan_hw_filter(struct ice_vsi *vsi, u16 vid) { return ice_remove_vlan_hw_filters(vsi, &vid, 1); } #define ICE_SYSCTL_HELP_RX_ITR \ "\nControl Rx interrupt throttle rate." \ "\n\t0-8160 - sets interrupt rate in usecs" \ "\n\t -1 - reset the Rx itr to default" /** * ice_sysctl_rx_itr - Display or change the Rx ITR for a VSI * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the current Rx ITR value * on write: Sets the Rx ITR value, reconfiguring device if it is up */ static int ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS) { struct ice_vsi *vsi = (struct ice_vsi *)arg1; struct ice_softc *sc = vsi->sc; int increment, ret; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); ret = sysctl_handle_16(oidp, &vsi->rx_itr, 0, req); if ((ret) || (req->newptr == NULL)) return (ret); if (vsi->rx_itr < 0) vsi->rx_itr = ICE_DFLT_RX_ITR; if (vsi->rx_itr > ICE_ITR_MAX) vsi->rx_itr = ICE_ITR_MAX; /* Assume 2usec increment if it hasn't been loaded yet */ increment = sc->hw.itr_gran ? : 2; /* We need to round the value to the hardware's ITR granularity */ vsi->rx_itr = (vsi->rx_itr / increment ) * increment; /* If the driver has finished initializing, then we need to reprogram * the ITR registers now. Otherwise, they will be programmed during * driver initialization. */ if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED)) ice_configure_rx_itr(vsi); return (0); } #define ICE_SYSCTL_HELP_TX_ITR \ "\nControl Tx interrupt throttle rate." \ "\n\t0-8160 - sets interrupt rate in usecs" \ "\n\t -1 - reset the Tx itr to default" /** * ice_sysctl_tx_itr - Display or change the Tx ITR for a VSI * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * On read: Displays the current Tx ITR value * on write: Sets the Tx ITR value, reconfiguring device if it is up */ static int ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS) { struct ice_vsi *vsi = (struct ice_vsi *)arg1; struct ice_softc *sc = vsi->sc; int increment, ret; UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); ret = sysctl_handle_16(oidp, &vsi->tx_itr, 0, req); if ((ret) || (req->newptr == NULL)) return (ret); /* Allow configuring a negative value to reset to the default */ if (vsi->tx_itr < 0) vsi->tx_itr = ICE_DFLT_TX_ITR; if (vsi->tx_itr > ICE_ITR_MAX) vsi->tx_itr = ICE_ITR_MAX; /* Assume 2usec increment if it hasn't been loaded yet */ increment = sc->hw.itr_gran ? : 2; /* We need to round the value to the hardware's ITR granularity */ vsi->tx_itr = (vsi->tx_itr / increment ) * increment; /* If the driver has finished initializing, then we need to reprogram * the ITR registers now. Otherwise, they will be programmed during * driver initialization. */ if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED)) ice_configure_tx_itr(vsi); return (0); } /** * ice_add_vsi_tunables - Add tunables and nodes for a VSI * @vsi: pointer to VSI structure * @parent: parent node to add the tunables under * * Create a sysctl context for the VSI, so that sysctls for the VSI can be * dynamically removed upon VSI removal. * * Add various tunables and set up the basic node structure for the VSI. Must * be called *prior* to ice_add_vsi_sysctls. It should be called as soon as * possible after the VSI memory is initialized. * * VSI specific sysctls with CTLFLAG_TUN should be initialized here so that * their values can be read from loader.conf prior to their first use in the * driver. */ void ice_add_vsi_tunables(struct ice_vsi *vsi, struct sysctl_oid *parent) { struct sysctl_oid_list *vsi_list; char vsi_name[32], vsi_desc[32]; struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent); /* Initialize the sysctl context for this VSI */ sysctl_ctx_init(&vsi->ctx); /* Add a node to collect this VSI's statistics together */ snprintf(vsi_name, sizeof(vsi_name), "%u", vsi->idx); snprintf(vsi_desc, sizeof(vsi_desc), "VSI %u", vsi->idx); vsi->vsi_node = SYSCTL_ADD_NODE(&vsi->ctx, parent_list, OID_AUTO, vsi_name, CTLFLAG_RD, NULL, vsi_desc); vsi_list = SYSCTL_CHILDREN(vsi->vsi_node); vsi->rx_itr = ICE_DFLT_TX_ITR; SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "rx_itr", CTLTYPE_S16 | CTLFLAG_RWTUN, vsi, 0, ice_sysctl_rx_itr, "S", ICE_SYSCTL_HELP_RX_ITR); vsi->tx_itr = ICE_DFLT_TX_ITR; SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "tx_itr", CTLTYPE_S16 | CTLFLAG_RWTUN, vsi, 0, ice_sysctl_tx_itr, "S", ICE_SYSCTL_HELP_TX_ITR); } /** * ice_del_vsi_sysctl_ctx - Delete the sysctl context(s) of a VSI * @vsi: the VSI to remove contexts for * * Free the context for the VSI sysctls. This includes the main context, as * well as the per-queue sysctls. */ void ice_del_vsi_sysctl_ctx(struct ice_vsi *vsi) { device_t dev = vsi->sc->dev; int err; if (vsi->vsi_node) { err = sysctl_ctx_free(&vsi->ctx); if (err) device_printf(dev, "failed to free VSI %d sysctl context, err %s\n", vsi->idx, ice_err_str(err)); vsi->vsi_node = NULL; } } /** * ice_add_dscp2tc_map_sysctls - Add sysctl tree for DSCP to TC mapping * @sc: pointer to device private softc * @ctx: the sysctl ctx to use * @ctx_list: list of sysctl children for device (to add sysctl tree to) * * Add a sysctl tree for individual dscp2tc_map sysctls. Each child of this * node can map 8 DSCPs to TC values; there are 8 of these in turn for a total * of 64 DSCP to TC map values that the user can configure. */ void ice_add_dscp2tc_map_sysctls(struct ice_softc *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *ctx_list) { struct sysctl_oid_list *node_list; struct sysctl_oid *node; struct sbuf *namebuf, *descbuf; int first_dscp_val, last_dscp_val; node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "dscp2tc_map", CTLFLAG_RD, NULL, "Map of DSCP values to DCB TCs"); node_list = SYSCTL_CHILDREN(node); namebuf = sbuf_new_auto(); descbuf = sbuf_new_auto(); for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { sbuf_clear(namebuf); sbuf_clear(descbuf); first_dscp_val = i * 8; last_dscp_val = first_dscp_val + 7; sbuf_printf(namebuf, "%d-%d", first_dscp_val, last_dscp_val); sbuf_printf(descbuf, "Map DSCP values %d to %d to TCs", first_dscp_val, last_dscp_val); sbuf_finish(namebuf); sbuf_finish(descbuf); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, sbuf_data(namebuf), CTLTYPE_STRING | CTLFLAG_RW, sc, i, ice_sysctl_dscp2tc_map, "A", sbuf_data(descbuf)); } sbuf_delete(namebuf); sbuf_delete(descbuf); } /** * ice_add_device_tunables - Add early tunable sysctls and sysctl nodes * @sc: device private structure * * Add per-device dynamic tunable sysctls, and setup the general sysctl trees * for re-use by ice_add_device_sysctls. * * In order for the sysctl fields to be initialized before use, this function * should be called as early as possible during attach activities. * * Any non-global sysctl marked as CTLFLAG_TUN should likely be initialized * here in this function, rather than later in ice_add_device_sysctls. * * To make things easier, this function is also expected to setup the various * sysctl nodes in addition to tunables so that other sysctls which can't be * initialized early can hook into the same nodes. */ void ice_add_device_tunables(struct ice_softc *sc) { device_t dev = sc->dev; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid_list *ctx_list = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); sc->enable_health_events = ice_enable_health_events; SYSCTL_ADD_BOOL(ctx, ctx_list, OID_AUTO, "enable_health_events", CTLFLAG_RDTUN, &sc->enable_health_events, 0, "Enable FW health event reporting for this PF"); /* Add a node to track VSI sysctls. Keep track of the node in the * softc so that we can hook other sysctls into it later. This * includes both the VSI statistics, as well as potentially dynamic * VSIs in the future. */ sc->vsi_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "vsi", CTLFLAG_RD, NULL, "VSI Configuration and Statistics"); /* Add debug tunables */ ice_add_debug_tunables(sc); } /** * ice_sysctl_dump_mac_filters - Dump a list of all HW MAC Filters * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for "mac_filters" sysctl to dump the programmed MAC filters. */ static int ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct ice_switch_info *sw = hw->switch_info; struct ice_fltr_mgmt_list_entry *fm_entry; struct ice_list_head *rule_head; struct ice_lock *rule_lock; struct ice_fltr_info *fi; struct sbuf *sbuf; int ret; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Wire the old buffer so we can take a non-sleepable lock */ ret = sysctl_wire_old_buffer(req, 0); if (ret) return (ret); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock; rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules; sbuf_printf(sbuf, "MAC Filter List"); ice_acquire_lock(rule_lock); LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) { fi = &fm_entry->fltr_info; sbuf_printf(sbuf, "\nmac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %d", fi->l_data.mac.mac_addr, ":", fi->vsi_handle, ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id); /* if we have a vsi_list_info, print some information about that */ if (fm_entry->vsi_list_info) { sbuf_printf(sbuf, ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d", fm_entry->vsi_count, fm_entry->vsi_list_info->vsi_list_id, fm_entry->vsi_list_info->ref_cnt); } } ice_release_lock(rule_lock); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_dump_vlan_filters - Dump a list of all HW VLAN Filters * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for "vlan_filters" sysctl to dump the programmed VLAN filters. */ static int ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct ice_switch_info *sw = hw->switch_info; struct ice_fltr_mgmt_list_entry *fm_entry; struct ice_list_head *rule_head; struct ice_lock *rule_lock; struct ice_fltr_info *fi; struct sbuf *sbuf; int ret; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Wire the old buffer so we can take a non-sleepable lock */ ret = sysctl_wire_old_buffer(req, 0); if (ret) return (ret); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock; rule_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules; sbuf_printf(sbuf, "VLAN Filter List"); ice_acquire_lock(rule_lock); LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) { fi = &fm_entry->fltr_info; sbuf_printf(sbuf, "\nvlan_id = %4d, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d", fi->l_data.vlan.vlan_id, fi->vsi_handle, ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id); /* if we have a vsi_list_info, print some information about that */ if (fm_entry->vsi_list_info) { sbuf_printf(sbuf, ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d", fm_entry->vsi_count, fm_entry->vsi_list_info->vsi_list_id, fm_entry->vsi_list_info->ref_cnt); } } ice_release_lock(rule_lock); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_dump_ethertype_filters - Dump a list of all HW Ethertype filters * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for "ethertype_filters" sysctl to dump the programmed Ethertype * filters. */ static int ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct ice_switch_info *sw = hw->switch_info; struct ice_fltr_mgmt_list_entry *fm_entry; struct ice_list_head *rule_head; struct ice_lock *rule_lock; struct ice_fltr_info *fi; struct sbuf *sbuf; int ret; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Wire the old buffer so we can take a non-sleepable lock */ ret = sysctl_wire_old_buffer(req, 0); if (ret) return (ret); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rule_lock; rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rules; sbuf_printf(sbuf, "Ethertype Filter List"); ice_acquire_lock(rule_lock); LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) { fi = &fm_entry->fltr_info; sbuf_printf(sbuf, "\nethertype = 0x%04x, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d", fi->l_data.ethertype_mac.ethertype, fi->vsi_handle, ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id); /* if we have a vsi_list_info, print some information about that */ if (fm_entry->vsi_list_info) { sbuf_printf(sbuf, ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d", fm_entry->vsi_count, fm_entry->vsi_list_info->vsi_list_id, fm_entry->vsi_list_info->ref_cnt); } } ice_release_lock(rule_lock); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_dump_ethertype_mac_filters - Dump a list of all HW Ethertype/MAC filters * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for "ethertype_mac_filters" sysctl to dump the programmed * Ethertype/MAC filters. */ static int ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; struct ice_switch_info *sw = hw->switch_info; struct ice_fltr_mgmt_list_entry *fm_entry; struct ice_list_head *rule_head; struct ice_lock *rule_lock; struct ice_fltr_info *fi; struct sbuf *sbuf; int ret; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Wire the old buffer so we can take a non-sleepable lock */ ret = sysctl_wire_old_buffer(req, 0); if (ret) return (ret); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rule_lock; rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rules; sbuf_printf(sbuf, "Ethertype/MAC Filter List"); ice_acquire_lock(rule_lock); LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) { fi = &fm_entry->fltr_info; sbuf_printf(sbuf, "\nethertype = 0x%04x, mac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d", fi->l_data.ethertype_mac.ethertype, fi->l_data.ethertype_mac.mac_addr, ":", fi->vsi_handle, ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id); /* if we have a vsi_list_info, print some information about that */ if (fm_entry->vsi_list_info) { sbuf_printf(sbuf, ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d", fm_entry->vsi_count, fm_entry->vsi_list_info->vsi_list_id, fm_entry->vsi_list_info->ref_cnt); } } ice_release_lock(rule_lock); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_dump_state_flags - Dump device driver state flags * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for "state" sysctl to display currently set driver state flags. */ static int ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct sbuf *sbuf; u32 copied_state; unsigned int i; bool at_least_one = false; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Make a copy of the state to ensure we display coherent values */ copied_state = atomic_load_acq_32(&sc->state); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); /* Add the string for each set state to the sbuf */ for (i = 0; i < 32; i++) { if (copied_state & BIT(i)) { const char *str = ice_state_to_str((enum ice_state)i); at_least_one = true; if (str) sbuf_printf(sbuf, "\n%s", str); else sbuf_printf(sbuf, "\nBIT(%u)", i); } } if (!at_least_one) sbuf_printf(sbuf, "Nothing set"); sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } #define ICE_SYSCTL_DEBUG_MASK_HELP \ "\nSelect debug statements to print to kernel message log" \ "\nFlags:" \ "\n\t 0x1 - Function Tracing" \ "\n\t 0x2 - Driver Initialization" \ "\n\t 0x4 - Release" \ "\n\t 0x8 - FW Logging" \ "\n\t 0x10 - Link" \ "\n\t 0x20 - PHY" \ "\n\t 0x40 - Queue Context" \ "\n\t 0x80 - NVM" \ "\n\t 0x100 - LAN" \ "\n\t 0x200 - Flow" \ "\n\t 0x400 - DCB" \ "\n\t 0x800 - Diagnostics" \ "\n\t 0x1000 - Flow Director" \ "\n\t 0x2000 - Switch" \ "\n\t 0x4000 - Scheduler" \ "\n\t 0x8000 - RDMA" \ "\n\t 0x10000 - DDP Package" \ "\n\t 0x20000 - Resources" \ "\n\t 0x40000 - ACL" \ "\n\t 0x80000 - PTP" \ -"\n\t 0x100000 - Admin Queue messages" \ -"\n\t 0x200000 - Admin Queue descriptors" \ -"\n\t 0x400000 - Admin Queue descriptor buffers" \ -"\n\t 0x800000 - Admin Queue commands" \ -"\n\t 0x1000000 - Parser" \ +"\n\t ..." \ +"\n\t 0x1000000 - Admin Queue messages" \ +"\n\t 0x2000000 - Admin Queue descriptors" \ +"\n\t 0x4000000 - Admin Queue descriptor buffers" \ +"\n\t 0x8000000 - Admin Queue commands" \ +"\n\t 0x10000000 - Parser" \ "\n\t ..." \ "\n\t 0x80000000 - (Reserved for user)" \ "\n\t" \ "\nUse \"sysctl -x\" to view flags properly." /** * ice_add_debug_tunables - Add tunables helpful for debugging the device driver * @sc: device private structure * * Add sysctl tunable values related to debugging the device driver. For now, * this means a tunable to set the debug mask early during driver load. * * The debug node will be marked CTLFLAG_SKIP unless INVARIANTS is defined, so * that in normal kernel builds, these will all be hidden, but on a debug * kernel they will be more easily visible. */ static void ice_add_debug_tunables(struct ice_softc *sc) { struct sysctl_oid_list *debug_list; device_t dev = sc->dev; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid_list *ctx_list = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); sc->debug_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL, "Debug Sysctls"); debug_list = SYSCTL_CHILDREN(sc->debug_sysctls); SYSCTL_ADD_U64(ctx, debug_list, OID_AUTO, "debug_mask", ICE_CTLFLAG_DEBUG | CTLFLAG_RWTUN, &sc->hw.debug_mask, 0, ICE_SYSCTL_DEBUG_MASK_HELP); /* Load the default value from the global sysctl first */ sc->enable_tx_fc_filter = ice_enable_tx_fc_filter; SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_fc_filter", ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN, &sc->enable_tx_fc_filter, 0, "Drop Ethertype 0x8808 control frames originating from software on this PF"); sc->tx_balance_en = ice_tx_balance_en; SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "tx_balance", ICE_CTLFLAG_DEBUG | CTLFLAG_RWTUN, &sc->tx_balance_en, 0, "Enable 5-layer scheduler topology"); /* Load the default value from the global sysctl first */ sc->enable_tx_lldp_filter = ice_enable_tx_lldp_filter; SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_lldp_filter", ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN, &sc->enable_tx_lldp_filter, 0, "Drop Ethertype 0x88cc LLDP frames originating from software on this PF"); ice_add_fw_logging_tunables(sc, sc->debug_sysctls); } #define ICE_SYSCTL_HELP_REQUEST_RESET \ "\nRequest the driver to initiate a reset." \ "\n\tpfr - Initiate a PF reset" \ "\n\tcorer - Initiate a CORE reset" \ "\n\tglobr - Initiate a GLOBAL reset" /** * @var rl_sysctl_ticks * @brief timestamp for latest reset request sysctl call * * Helps rate-limit the call to the sysctl which resets the device */ int rl_sysctl_ticks = 0; /** * ice_sysctl_request_reset - Request that the driver initiate a reset * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Callback for "request_reset" sysctl to request that the driver initiate * a reset. Expects to be passed one of the following strings * * "pfr" - Initiate a PF reset * "corer" - Initiate a CORE reset * "globr" - Initiate a Global reset */ static int ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; int status; enum ice_reset_req reset_type = ICE_RESET_INVAL; const char *reset_message; int ret; /* Buffer to store the requested reset string. Must contain enough * space to store the largest expected reset string, which currently * means 6 bytes of space. */ char reset[6] = ""; UNREFERENCED_PARAMETER(arg2); ret = priv_check(curthread, PRIV_DRIVER); if (ret) return (ret); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Read in the requested reset type. */ ret = sysctl_handle_string(oidp, reset, sizeof(reset), req); if ((ret) || (req->newptr == NULL)) return (ret); if (strcmp(reset, "pfr") == 0) { reset_message = "Requesting a PF reset"; reset_type = ICE_RESET_PFR; } else if (strcmp(reset, "corer") == 0) { reset_message = "Initiating a CORE reset"; reset_type = ICE_RESET_CORER; } else if (strcmp(reset, "globr") == 0) { reset_message = "Initiating a GLOBAL reset"; reset_type = ICE_RESET_GLOBR; } else if (strcmp(reset, "empr") == 0) { device_printf(sc->dev, "Triggering an EMP reset via software is not currently supported\n"); return (EOPNOTSUPP); } if (reset_type == ICE_RESET_INVAL) { device_printf(sc->dev, "%s is not a valid reset request\n", reset); return (EINVAL); } /* * Rate-limit the frequency at which this function is called. * Assuming this is called successfully once, typically, * everything should be handled within the allotted time frame. * However, in the odd setup situations, we've also put in * guards for when the reset has finished, but we're in the * process of rebuilding. And instead of queueing an intent, * simply error out and let the caller retry, if so desired. */ if (TICKS_2_MSEC(ticks - rl_sysctl_ticks) < 500) { device_printf(sc->dev, "Call frequency too high. Operation aborted.\n"); return (EBUSY); } rl_sysctl_ticks = ticks; if (TICKS_2_MSEC(ticks - sc->rebuild_ticks) < 100) { device_printf(sc->dev, "Device rebuilding. Operation aborted.\n"); return (EBUSY); } if (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_DEVSTATE_M) { device_printf(sc->dev, "Device in reset. Operation aborted.\n"); return (EBUSY); } device_printf(sc->dev, "%s\n", reset_message); /* Initiate the PF reset during the admin status task */ if (reset_type == ICE_RESET_PFR) { ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ); return (0); } /* * Other types of resets including CORE and GLOBAL resets trigger an * interrupt on all PFs. Initiate the reset now. Preparation and * rebuild logic will be handled by the admin status task. */ status = ice_reset(hw, reset_type); /* * Resets can take a long time and we still don't want another call * to this function before we settle down. */ rl_sysctl_ticks = ticks; if (status) { device_printf(sc->dev, "failed to initiate device reset, err %s\n", ice_status_str(status)); ice_set_state(&sc->state, ICE_STATE_RESET_FAILED); return (EFAULT); } return (0); } #define ICE_AQC_DBG_DUMP_CLUSTER_ID_INVALID (0xFFFFFF) #define ICE_SYSCTL_HELP_FW_DEBUG_DUMP_CLUSTER_SETTING \ "\nSelect clusters to dump with \"dump\" sysctl" \ "\nFlags:" \ "\n\t 0 - All clusters (default)" \ "\n\t 0x1 - Switch" \ "\n\t 0x2 - ACL" \ "\n\t 0x4 - Tx Scheduler" \ "\n\t 0x8 - Profile Configuration" \ "\n\t 0x20 - Link" \ "\n\t 0x80 - DCB" \ "\n\t 0x100 - L2P" \ "\n\t 0x400000 - Manageability Transactions (excluding E830)" \ "\n" \ "\nUse \"sysctl -x\" to view flags properly." /** * ice_sysctl_fw_debug_dump_cluster_setting - Set which clusters to dump * from FW when FW debug dump occurs * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer */ static int ice_sysctl_fw_debug_dump_cluster_setting(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; device_t dev = sc->dev; u32 clusters; int ret; UNREFERENCED_PARAMETER(arg2); ret = priv_check(curthread, PRIV_DRIVER); if (ret) return (ret); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); clusters = sc->fw_debug_dump_cluster_mask; ret = sysctl_handle_32(oidp, &clusters, 0, req); if ((ret) || (req->newptr == NULL)) return (ret); u32 valid_cluster_mask; if (ice_is_e830(&sc->hw)) valid_cluster_mask = ICE_FW_DEBUG_DUMP_VALID_CLUSTER_MASK_E830; else valid_cluster_mask = ICE_FW_DEBUG_DUMP_VALID_CLUSTER_MASK_E810; if (clusters & ~(valid_cluster_mask)) { device_printf(dev, "%s: ERROR: Incorrect settings requested\n", __func__); sc->fw_debug_dump_cluster_mask = ICE_AQC_DBG_DUMP_CLUSTER_ID_INVALID; return (EINVAL); } sc->fw_debug_dump_cluster_mask = clusters; return (0); } #define ICE_FW_DUMP_AQ_COUNT_LIMIT (10000) /** * ice_fw_debug_dump_print_cluster - Print formatted cluster data from FW * @sc: the device softc * @sbuf: initialized sbuf to print data to * @cluster_id: FW cluster ID to print data from * * Reads debug data from the specified cluster id in the FW and prints it to * the input sbuf. This function issues multiple AQ commands to the FW in * order to get all of the data in the cluster. * * @remark Only intended to be used by the sysctl handler * ice_sysctl_fw_debug_dump_do_dump */ static u16 ice_fw_debug_dump_print_cluster(struct ice_softc *sc, struct sbuf *sbuf, u16 cluster_id) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; u16 data_buf_size = ICE_AQ_MAX_BUF_LEN; const u8 reserved_buf[8] = {}; int status; int counter = 0; u8 *data_buf; /* Input parameters / loop variables */ u16 table_id = 0; u32 offset = 0; /* Output from the Get Internal Data AQ command */ u16 ret_buf_size = 0; u16 ret_next_cluster = 0; u16 ret_next_table = 0; u32 ret_next_index = 0; /* Other setup */ data_buf = (u8 *)malloc(data_buf_size, M_ICE, M_NOWAIT | M_ZERO); if (!data_buf) return ret_next_cluster; ice_debug(hw, ICE_DBG_DIAG, "%s: dumping cluster id %d\n", __func__, cluster_id); for (;;) { /* Do not trust the FW behavior to be completely correct */ if (counter++ >= ICE_FW_DUMP_AQ_COUNT_LIMIT) { device_printf(dev, "%s: Exceeded counter limit for cluster %d\n", __func__, cluster_id); break; } ice_debug(hw, ICE_DBG_DIAG, "---\n"); ice_debug(hw, ICE_DBG_DIAG, "table_id 0x%04x offset 0x%08x buf_size %d\n", table_id, offset, data_buf_size); status = ice_aq_get_internal_data(hw, cluster_id, table_id, offset, data_buf, data_buf_size, &ret_buf_size, &ret_next_cluster, &ret_next_table, &ret_next_index, NULL); if (status) { device_printf(dev, "%s: ice_aq_get_internal_data in cluster %d: err %s aq_err %s\n", __func__, cluster_id, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); break; } ice_debug(hw, ICE_DBG_DIAG, "ret_table_id 0x%04x ret_offset 0x%08x ret_buf_size %d\n", ret_next_table, ret_next_index, ret_buf_size); /* Print cluster id */ u32 print_cluster_id = (u32)cluster_id; sbuf_bcat(sbuf, &print_cluster_id, sizeof(print_cluster_id)); /* Print table id */ u32 print_table_id = (u32)table_id; sbuf_bcat(sbuf, &print_table_id, sizeof(print_table_id)); /* Print table length */ u32 print_table_length = (u32)ret_buf_size; sbuf_bcat(sbuf, &print_table_length, sizeof(print_table_length)); /* Print current offset */ u32 print_curr_offset = offset; sbuf_bcat(sbuf, &print_curr_offset, sizeof(print_curr_offset)); /* Print reserved bytes */ sbuf_bcat(sbuf, reserved_buf, sizeof(reserved_buf)); /* Print data */ sbuf_bcat(sbuf, data_buf, ret_buf_size); /* Adjust loop variables */ memset(data_buf, 0, data_buf_size); bool same_table_next = (table_id == ret_next_table); bool last_table_next; if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_NEXT_CLUSTER_ID)) last_table_next = (ret_next_table == 0xffff); else last_table_next = (ret_next_table == 0xff || ret_next_table == 0xffff); bool last_offset_next = (ret_next_index == 0xffffffff || ret_next_index == 0); if ((!same_table_next && !last_offset_next) || (same_table_next && last_table_next)) { device_printf(dev, "%s: Unexpected conditions for same_table_next(%d) last_table_next(%d) last_offset_next(%d), ending cluster (%d)\n", __func__, same_table_next, last_table_next, last_offset_next, cluster_id); break; } if (!same_table_next && !last_table_next && last_offset_next) { /* We've hit the end of the table */ table_id = ret_next_table; offset = 0; } else if (!same_table_next && last_table_next && last_offset_next) { /* We've hit the end of the cluster */ break; } else if (same_table_next && !last_table_next && last_offset_next) { if (cluster_id == 0x1 && table_id < 39) table_id += 1; else break; } else { /* if (same_table_next && !last_table_next && !last_offset_next) */ /* More data left in the table */ offset = ret_next_index; } } free(data_buf, M_ICE); return ret_next_cluster; } /** * ice_fw_debug_dump_print_clusters - Print data from FW clusters to sbuf * @sc: the device softc * @sbuf: initialized sbuf to print data to * * Handles dumping all of the clusters to dump to the indicated sbuf. The * clusters do dump are determined by the value in the * fw_debug_dump_cluster_mask field in the sc argument. * * @remark Only intended to be used by the sysctl handler * ice_sysctl_fw_debug_dump_do_dump */ static void ice_fw_debug_dump_print_clusters(struct ice_softc *sc, struct sbuf *sbuf) { u16 next_cluster_id, max_cluster_id, start_cluster_id; u32 cluster_mask = sc->fw_debug_dump_cluster_mask; struct ice_hw *hw = &sc->hw; int bit; ice_debug(hw, ICE_DBG_DIAG, "%s: Debug Dump running...\n", __func__); if (ice_is_e830(hw)) { max_cluster_id = ICE_AQC_DBG_DUMP_CLUSTER_ID_QUEUE_MNG_E830; start_cluster_id = ICE_AQC_DBG_DUMP_CLUSTER_ID_SW_E830; } else { max_cluster_id = ICE_AQC_DBG_DUMP_CLUSTER_ID_QUEUE_MNG_E810; start_cluster_id = ICE_AQC_DBG_DUMP_CLUSTER_ID_SW_E810; } if (cluster_mask != 0) { for_each_set_bit(bit, &cluster_mask, sizeof(cluster_mask) * BITS_PER_BYTE) { ice_fw_debug_dump_print_cluster(sc, sbuf, bit + start_cluster_id); } } else { next_cluster_id = start_cluster_id; /* We don't support QUEUE_MNG and FULL_CSR_SPACE */ do { next_cluster_id = ice_fw_debug_dump_print_cluster(sc, sbuf, next_cluster_id); } while ((next_cluster_id != 0) && (next_cluster_id < max_cluster_id)); } } #define ICE_SYSCTL_HELP_FW_DEBUG_DUMP_DO_DUMP \ "\nWrite 1 to output a FW debug dump containing the clusters specified by the" \ "\n\"clusters\" sysctl." \ "\n" \ "\nThe \"-b\" flag must be used in order to dump this data as binary data because" \ "\nthis data is opaque and not a string." #define ICE_FW_DUMP_BASE_TEXT_SIZE (1024 * 1024) #define ICE_FW_DUMP_ALL_TEXT_SIZE (10 * 1024 * 1024) #define ICE_FW_DUMP_CLUST0_TEXT_SIZE (2 * 1024 * 1024) #define ICE_FW_DUMP_CLUST1_TEXT_SIZE (128 * 1024) #define ICE_FW_DUMP_CLUST2_TEXT_SIZE (2 * 1024 * 1024) /** * ice_sysctl_fw_debug_dump_do_dump - Dump data from FW to sysctl output * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Sysctl handler for the debug.dump.dump sysctl. Prints out a specially- * formatted dump of some debug FW data intended to be processed by a special * Intel tool. Prints out the cluster data specified by the "clusters" * sysctl. * * @remark The actual AQ calls and printing are handled by a helper * function above. */ static int ice_sysctl_fw_debug_dump_do_dump(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; device_t dev = sc->dev; struct sbuf *sbuf; int ret; UNREFERENCED_PARAMETER(arg2); ret = priv_check(curthread, PRIV_DRIVER); if (ret) return (ret); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* If the user hasn't written "1" to this sysctl yet: */ if (!ice_test_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP)) { /* Avoid output on the first set of reads to this sysctl in * order to prevent a null byte from being written to the * end result when called via sysctl(8). */ if (req->oldptr == NULL && req->newptr == NULL) { ret = SYSCTL_OUT(req, 0, 0); return (ret); } char input_buf[2] = ""; ret = sysctl_handle_string(oidp, input_buf, sizeof(input_buf), req); if ((ret) || (req->newptr == NULL)) return (ret); /* If we get '1', then indicate we'll do a dump in the next * sysctl read call. */ if (input_buf[0] == '1') { if (sc->fw_debug_dump_cluster_mask == ICE_AQC_DBG_DUMP_CLUSTER_ID_INVALID) { device_printf(dev, "%s: Debug Dump failed because an invalid cluster was specified.\n", __func__); return (EINVAL); } ice_set_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP); return (0); } return (EINVAL); } /* --- FW debug dump state is set --- */ /* Caller just wants the upper bound for size */ if (req->oldptr == NULL && req->newptr == NULL) { size_t est_output_len = ICE_FW_DUMP_BASE_TEXT_SIZE; if (sc->fw_debug_dump_cluster_mask == 0) est_output_len += ICE_FW_DUMP_ALL_TEXT_SIZE; else { if (sc->fw_debug_dump_cluster_mask & 0x1) est_output_len += ICE_FW_DUMP_CLUST0_TEXT_SIZE; if (sc->fw_debug_dump_cluster_mask & 0x2) est_output_len += ICE_FW_DUMP_CLUST1_TEXT_SIZE; if (sc->fw_debug_dump_cluster_mask & 0x4) est_output_len += ICE_FW_DUMP_CLUST2_TEXT_SIZE; } ret = SYSCTL_OUT(req, 0, est_output_len); return (ret); } sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); sbuf_clear_flags(sbuf, SBUF_INCLUDENUL); ice_fw_debug_dump_print_clusters(sc, sbuf); sbuf_finish(sbuf); sbuf_delete(sbuf); ice_clear_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP); return (ret); } /** * ice_add_debug_sysctls - Add sysctls helpful for debugging the device driver * @sc: device private structure * * Add sysctls related to debugging the device driver. Generally these should * simply be sysctls which dump internal driver state, to aid in understanding * what the driver is doing. */ static void ice_add_debug_sysctls(struct ice_softc *sc) { struct sysctl_oid *sw_node, *dump_node; struct sysctl_oid_list *debug_list, *sw_list, *dump_list; device_t dev = sc->dev; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); debug_list = SYSCTL_CHILDREN(sc->debug_sysctls); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "request_reset", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_WR, sc, 0, ice_sysctl_request_reset, "A", ICE_SYSCTL_HELP_REQUEST_RESET); SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "pfr_count", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->soft_stats.pfr_count, 0, "# of PF resets handled"); SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "corer_count", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->soft_stats.corer_count, 0, "# of CORE resets handled"); SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "globr_count", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->soft_stats.globr_count, 0, "# of Global resets handled"); SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "empr_count", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->soft_stats.empr_count, 0, "# of EMP resets handled"); SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "tx_mdd_count", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->soft_stats.tx_mdd_count, 0, "# of Tx MDD events detected"); SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "rx_mdd_count", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->soft_stats.rx_mdd_count, 0, "# of Rx MDD events detected"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "state", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_dump_state_flags, "A", "Driver State Flags"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "set_link", ICE_CTLFLAG_DEBUG | CTLTYPE_U8 | CTLFLAG_RW, sc, 0, ice_sysctl_debug_set_link, "CU", "Set link"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_low", ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0, ice_sysctl_phy_type_low, "QU", "PHY type Low from Get PHY Caps/Set PHY Cfg"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_high", ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0, ice_sysctl_phy_type_high, "QU", "PHY type High from Get PHY Caps/Set PHY Cfg"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_sw_caps", ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0, ice_sysctl_phy_sw_caps, "", "Get PHY Capabilities (Software configuration)"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_nvm_caps", ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0, ice_sysctl_phy_nvm_caps, "", "Get PHY Capabilities (NVM configuration)"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_topo_caps", ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0, ice_sysctl_phy_topo_caps, "", "Get PHY Capabilities (Topology configuration)"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_link_status", ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0, ice_sysctl_phy_link_status, "", "Get PHY Link Status"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "read_i2c_diag_data", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_read_i2c_diag_data, "A", "Dump selected diagnostic data from FW"); SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "fw_build", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->hw.fw_build, 0, "FW Build ID"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "os_ddp_version", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_os_pkg_version, "A", "DDP package name and version found in ice_ddp"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "cur_lldp_persist_status", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_fw_cur_lldp_persist_status, "A", "Current LLDP persistent status"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "dflt_lldp_persist_status", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_fw_dflt_lldp_persist_status, "A", "Default LLDP persistent status"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "negotiated_fc", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_negotiated_fc, "A", "Current Negotiated Flow Control mode"); if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_PHY_STATISTICS)) { SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_statistics", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_dump_phy_stats, "A", "Dumps PHY statistics from firmware"); } SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "local_dcbx_cfg", CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_LOCAL, ice_sysctl_dump_dcbx_cfg, "A", "Dumps Local MIB information from firmware"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "remote_dcbx_cfg", CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_REMOTE, ice_sysctl_dump_dcbx_cfg, "A", "Dumps Remote MIB information from firmware"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "pf_vsi_cfg", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_dump_vsi_cfg, "A", "Dumps Selected PF VSI parameters from firmware"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "query_port_ets", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_query_port_ets, "A", "Prints selected output from Query Port ETS AQ command"); SYSCTL_ADD_U64(ctx, debug_list, OID_AUTO, "rx_length_errors", CTLFLAG_RD | CTLFLAG_STATS, &sc->stats.cur.rx_len_errors, 0, "Receive Length Errors (SNAP packets)"); sw_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "switch", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL, "Switch Configuration"); sw_list = SYSCTL_CHILDREN(sw_node); SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "mac_filters", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_dump_mac_filters, "A", "MAC Filters"); SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "vlan_filters", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_dump_vlan_filters, "A", "VLAN Filters"); SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_filters", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_dump_ethertype_filters, "A", "Ethertype Filters"); SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_mac_filters", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0, ice_sysctl_dump_ethertype_mac_filters, "A", "Ethertype/MAC Filters"); dump_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "dump", ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL, "Internal FW Dump"); dump_list = SYSCTL_CHILDREN(dump_node); SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "clusters", ICE_CTLFLAG_DEBUG | CTLTYPE_U32 | CTLFLAG_RW, sc, 0, ice_sysctl_fw_debug_dump_cluster_setting, "SU", ICE_SYSCTL_HELP_FW_DEBUG_DUMP_CLUSTER_SETTING); SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "dump", ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, ice_sysctl_fw_debug_dump_do_dump, "", ICE_SYSCTL_HELP_FW_DEBUG_DUMP_DO_DUMP); } /** * ice_vsi_disable_tx - Disable (unconfigure) Tx queues for a VSI * @vsi: the VSI to disable * * Disables the Tx queues associated with this VSI. Essentially the opposite * of ice_cfg_vsi_for_tx. */ int ice_vsi_disable_tx(struct ice_vsi *vsi) { struct ice_softc *sc = vsi->sc; struct ice_hw *hw = &sc->hw; int status; u32 *q_teids; u16 *q_ids, *q_handles; size_t q_teids_size, q_ids_size, q_handles_size; int tc, j, buf_idx, err = 0; if (vsi->num_tx_queues > 255) return (ENOSYS); q_teids_size = sizeof(*q_teids) * vsi->num_tx_queues; q_teids = (u32 *)malloc(q_teids_size, M_ICE, M_NOWAIT|M_ZERO); if (!q_teids) return (ENOMEM); q_ids_size = sizeof(*q_ids) * vsi->num_tx_queues; q_ids = (u16 *)malloc(q_ids_size, M_ICE, M_NOWAIT|M_ZERO); if (!q_ids) { err = (ENOMEM); goto free_q_teids; } q_handles_size = sizeof(*q_handles) * vsi->num_tx_queues; q_handles = (u16 *)malloc(q_handles_size, M_ICE, M_NOWAIT|M_ZERO); if (!q_handles) { err = (ENOMEM); goto free_q_ids; } ice_for_each_traffic_class(tc) { struct ice_tc_info *tc_info = &vsi->tc_info[tc]; u16 start_idx, end_idx; /* Skip rest of disabled TCs once the first * disabled TC is found */ if (!(vsi->tc_map & BIT(tc))) break; /* Fill out TX queue information for this TC */ start_idx = tc_info->qoffset; end_idx = start_idx + tc_info->qcount_tx; buf_idx = 0; for (j = start_idx; j < end_idx; j++) { struct ice_tx_queue *txq = &vsi->tx_queues[j]; q_ids[buf_idx] = vsi->tx_qmap[j]; q_handles[buf_idx] = txq->q_handle; q_teids[buf_idx] = txq->q_teid; buf_idx++; } status = ice_dis_vsi_txq(hw->port_info, vsi->idx, tc, buf_idx, q_handles, q_ids, q_teids, ICE_NO_RESET, 0, NULL); if (status == ICE_ERR_DOES_NOT_EXIST) { ; /* Queues have already been disabled, no need to report this as an error */ } else if (status == ICE_ERR_RESET_ONGOING) { device_printf(sc->dev, "Reset in progress. LAN Tx queues already disabled\n"); break; } else if (status) { device_printf(sc->dev, "Failed to disable LAN Tx queues: err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = (ENODEV); break; } /* Clear buffers */ memset(q_teids, 0, q_teids_size); memset(q_ids, 0, q_ids_size); memset(q_handles, 0, q_handles_size); } /* free_q_handles: */ free(q_handles, M_ICE); free_q_ids: free(q_ids, M_ICE); free_q_teids: free(q_teids, M_ICE); return err; } /** * ice_vsi_set_rss_params - Set the RSS parameters for the VSI * @vsi: the VSI to configure * * Sets the RSS table size and lookup table type for the VSI based on its * VSI type. */ static void ice_vsi_set_rss_params(struct ice_vsi *vsi) { struct ice_softc *sc = vsi->sc; struct ice_hw_common_caps *cap; cap = &sc->hw.func_caps.common_cap; switch (vsi->type) { case ICE_VSI_PF: /* The PF VSI inherits RSS instance of the PF */ vsi->rss_table_size = cap->rss_table_size; vsi->rss_lut_type = ICE_LUT_PF; break; case ICE_VSI_VF: case ICE_VSI_VMDQ2: vsi->rss_table_size = ICE_VSIQF_HLUT_ARRAY_SIZE; vsi->rss_lut_type = ICE_LUT_VSI; break; default: device_printf(sc->dev, "VSI %d: RSS not supported for VSI type %d\n", vsi->idx, vsi->type); break; } } /** * ice_vsi_add_txqs_ctx - Create a sysctl context and node to store txq sysctls * @vsi: The VSI to add the context for * * Creates a sysctl context for storing txq sysctls. Additionally creates * a node rooted at the given VSI's main sysctl node. This context will be * used to store per-txq sysctls which may need to be released during the * driver's lifetime. */ void ice_vsi_add_txqs_ctx(struct ice_vsi *vsi) { struct sysctl_oid_list *vsi_list; sysctl_ctx_init(&vsi->txqs_ctx); vsi_list = SYSCTL_CHILDREN(vsi->vsi_node); vsi->txqs_node = SYSCTL_ADD_NODE(&vsi->txqs_ctx, vsi_list, OID_AUTO, "txqs", CTLFLAG_RD, NULL, "Tx Queues"); } /** * ice_vsi_add_rxqs_ctx - Create a sysctl context and node to store rxq sysctls * @vsi: The VSI to add the context for * * Creates a sysctl context for storing rxq sysctls. Additionally creates * a node rooted at the given VSI's main sysctl node. This context will be * used to store per-rxq sysctls which may need to be released during the * driver's lifetime. */ void ice_vsi_add_rxqs_ctx(struct ice_vsi *vsi) { struct sysctl_oid_list *vsi_list; sysctl_ctx_init(&vsi->rxqs_ctx); vsi_list = SYSCTL_CHILDREN(vsi->vsi_node); vsi->rxqs_node = SYSCTL_ADD_NODE(&vsi->rxqs_ctx, vsi_list, OID_AUTO, "rxqs", CTLFLAG_RD, NULL, "Rx Queues"); } /** * ice_vsi_del_txqs_ctx - Delete the Tx queue sysctl context for this VSI * @vsi: The VSI to delete from * * Frees the txq sysctl context created for storing the per-queue Tx sysctls. * Must be called prior to freeing the Tx queue memory, in order to avoid * having sysctls point at stale memory. */ void ice_vsi_del_txqs_ctx(struct ice_vsi *vsi) { device_t dev = vsi->sc->dev; int err; if (vsi->txqs_node) { err = sysctl_ctx_free(&vsi->txqs_ctx); if (err) device_printf(dev, "failed to free VSI %d txqs_ctx, err %s\n", vsi->idx, ice_err_str(err)); vsi->txqs_node = NULL; } } /** * ice_vsi_del_rxqs_ctx - Delete the Rx queue sysctl context for this VSI * @vsi: The VSI to delete from * * Frees the rxq sysctl context created for storing the per-queue Rx sysctls. * Must be called prior to freeing the Rx queue memory, in order to avoid * having sysctls point at stale memory. */ void ice_vsi_del_rxqs_ctx(struct ice_vsi *vsi) { device_t dev = vsi->sc->dev; int err; if (vsi->rxqs_node) { err = sysctl_ctx_free(&vsi->rxqs_ctx); if (err) device_printf(dev, "failed to free VSI %d rxqs_ctx, err %s\n", vsi->idx, ice_err_str(err)); vsi->rxqs_node = NULL; } } /** * ice_add_txq_sysctls - Add per-queue sysctls for a Tx queue * @txq: pointer to the Tx queue * * Add per-queue sysctls for a given Tx queue. Can't be called during * ice_add_vsi_sysctls, since the queue memory has not yet been setup. */ void ice_add_txq_sysctls(struct ice_tx_queue *txq) { struct ice_vsi *vsi = txq->vsi; struct sysctl_ctx_list *ctx = &vsi->txqs_ctx; struct sysctl_oid_list *txqs_list, *this_txq_list; struct sysctl_oid *txq_node; char txq_name[32], txq_desc[32]; const struct ice_sysctl_info ctls[] = { { &txq->stats.tx_packets, "tx_packets", "Queue Packets Transmitted" }, { &txq->stats.tx_bytes, "tx_bytes", "Queue Bytes Transmitted" }, { &txq->stats.mss_too_small, "mss_too_small", "TSO sends with an MSS less than 64" }, { &txq->stats.tso, "tso", "TSO packets" }, { 0, 0, 0 } }; const struct ice_sysctl_info *entry = ctls; txqs_list = SYSCTL_CHILDREN(vsi->txqs_node); snprintf(txq_name, sizeof(txq_name), "%u", txq->me); snprintf(txq_desc, sizeof(txq_desc), "Tx Queue %u", txq->me); txq_node = SYSCTL_ADD_NODE(ctx, txqs_list, OID_AUTO, txq_name, CTLFLAG_RD, NULL, txq_desc); this_txq_list = SYSCTL_CHILDREN(txq_node); /* Add the Tx queue statistics */ while (entry->stat != 0) { SYSCTL_ADD_U64(ctx, this_txq_list, OID_AUTO, entry->name, CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0, entry->description); entry++; } SYSCTL_ADD_U8(ctx, this_txq_list, OID_AUTO, "tc", CTLFLAG_RD, &txq->tc, 0, "Traffic Class that Queue belongs to"); } /** * ice_add_rxq_sysctls - Add per-queue sysctls for an Rx queue * @rxq: pointer to the Rx queue * * Add per-queue sysctls for a given Rx queue. Can't be called during * ice_add_vsi_sysctls, since the queue memory has not yet been setup. */ void ice_add_rxq_sysctls(struct ice_rx_queue *rxq) { struct ice_vsi *vsi = rxq->vsi; struct sysctl_ctx_list *ctx = &vsi->rxqs_ctx; struct sysctl_oid_list *rxqs_list, *this_rxq_list; struct sysctl_oid *rxq_node; char rxq_name[32], rxq_desc[32]; const struct ice_sysctl_info ctls[] = { { &rxq->stats.rx_packets, "rx_packets", "Queue Packets Received" }, { &rxq->stats.rx_bytes, "rx_bytes", "Queue Bytes Received" }, { &rxq->stats.desc_errs, "rx_desc_errs", "Queue Rx Descriptor Errors" }, { 0, 0, 0 } }; const struct ice_sysctl_info *entry = ctls; rxqs_list = SYSCTL_CHILDREN(vsi->rxqs_node); snprintf(rxq_name, sizeof(rxq_name), "%u", rxq->me); snprintf(rxq_desc, sizeof(rxq_desc), "Rx Queue %u", rxq->me); rxq_node = SYSCTL_ADD_NODE(ctx, rxqs_list, OID_AUTO, rxq_name, CTLFLAG_RD, NULL, rxq_desc); this_rxq_list = SYSCTL_CHILDREN(rxq_node); /* Add the Rx queue statistics */ while (entry->stat != 0) { SYSCTL_ADD_U64(ctx, this_rxq_list, OID_AUTO, entry->name, CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0, entry->description); entry++; } SYSCTL_ADD_U8(ctx, this_rxq_list, OID_AUTO, "tc", CTLFLAG_RD, &rxq->tc, 0, "Traffic Class that Queue belongs to"); } /** * ice_get_default_rss_key - Obtain a default RSS key * @seed: storage for the RSS key data * * Copies a pre-generated RSS key into the seed memory. The seed pointer must * point to a block of memory that is at least 40 bytes in size. * * The key isn't randomly generated each time this function is called because * that makes the RSS key change every time we reconfigure RSS. This does mean * that we're hard coding a possibly 'well known' key. We might want to * investigate randomly generating this key once during the first call. */ static void ice_get_default_rss_key(u8 *seed) { const u8 default_seed[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE] = { 0x39, 0xed, 0xff, 0x4d, 0x43, 0x58, 0x42, 0xc3, 0x5f, 0xb8, 0xa5, 0x32, 0x95, 0x65, 0x81, 0xcd, 0x36, 0x79, 0x71, 0x97, 0xde, 0xa4, 0x41, 0x40, 0x6f, 0x27, 0xe9, 0x81, 0x13, 0xa0, 0x95, 0x93, 0x5b, 0x1e, 0x9d, 0x27, 0x9d, 0x24, 0x84, 0xb5, }; bcopy(default_seed, seed, ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE); } /** * ice_set_rss_key - Configure a given VSI with the default RSS key * @vsi: the VSI to configure * * Program the hardware RSS key. We use rss_getkey to grab the kernel RSS key. * If the kernel RSS interface is not available, this will fall back to our * pre-generated hash seed from ice_get_default_rss_key(). */ static int ice_set_rss_key(struct ice_vsi *vsi) { struct ice_aqc_get_set_rss_keys keydata = { .standard_rss_key = {0} }; struct ice_softc *sc = vsi->sc; struct ice_hw *hw = &sc->hw; int status; /* * If the RSS kernel interface is disabled, this will return the * default RSS key above. */ rss_getkey(keydata.standard_rss_key); status = ice_aq_set_rss_key(hw, vsi->idx, &keydata); if (status) { device_printf(sc->dev, "ice_aq_set_rss_key status %s, error %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } return (0); } /** * ice_set_rss_flow_flds - Program the RSS hash flows after package init * @vsi: the VSI to configure * * If the package file is initialized, the default RSS flows are reset. We * need to reprogram the expected hash configuration. We'll use * rss_gethashconfig() to determine which flows to enable. If RSS kernel * support is not enabled, this macro will fall back to suitable defaults. */ static void ice_set_rss_flow_flds(struct ice_vsi *vsi) { struct ice_softc *sc = vsi->sc; struct ice_hw *hw = &sc->hw; struct ice_rss_hash_cfg rss_cfg = { 0, 0, ICE_RSS_ANY_HEADERS, false }; device_t dev = sc->dev; int status; u_int rss_hash_config; rss_hash_config = rss_gethashconfig(); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) { rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4; rss_cfg.hash_flds = ICE_FLOW_HASH_IPV4; status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg); if (status) device_printf(dev, "ice_add_rss_cfg on VSI %d failed for ipv4 flow, err %s aq_err %s\n", vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) { rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_TCP; rss_cfg.hash_flds = ICE_HASH_TCP_IPV4; status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg); if (status) device_printf(dev, "ice_add_rss_cfg on VSI %d failed for tcp4 flow, err %s aq_err %s\n", vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) { rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_UDP; rss_cfg.hash_flds = ICE_HASH_UDP_IPV4; status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg); if (status) device_printf(dev, "ice_add_rss_cfg on VSI %d failed for udp4 flow, err %s aq_err %s\n", vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } if (rss_hash_config & (RSS_HASHTYPE_RSS_IPV6 | RSS_HASHTYPE_RSS_IPV6_EX)) { rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6; rss_cfg.hash_flds = ICE_FLOW_HASH_IPV6; status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg); if (status) device_printf(dev, "ice_add_rss_cfg on VSI %d failed for ipv6 flow, err %s aq_err %s\n", vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) { rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_TCP; rss_cfg.hash_flds = ICE_HASH_TCP_IPV6; status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg); if (status) device_printf(dev, "ice_add_rss_cfg on VSI %d failed for tcp6 flow, err %s aq_err %s\n", vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) { rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_UDP; rss_cfg.hash_flds = ICE_HASH_UDP_IPV6; status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg); if (status) device_printf(dev, "ice_add_rss_cfg on VSI %d failed for udp6 flow, err %s aq_err %s\n", vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } /* Warn about RSS hash types which are not supported */ /* coverity[dead_error_condition] */ if (rss_hash_config & ~ICE_DEFAULT_RSS_HASH_CONFIG) { device_printf(dev, "ice_add_rss_cfg on VSI %d could not configure every requested hash type\n", vsi->idx); } } /** * ice_set_rss_lut - Program the RSS lookup table for a VSI * @vsi: the VSI to configure * * Programs the RSS lookup table for a given VSI. We use * rss_get_indirection_to_bucket which will use the indirection table provided * by the kernel RSS interface when available. If the kernel RSS interface is * not available, we will fall back to a simple round-robin fashion queue * assignment. */ static int ice_set_rss_lut(struct ice_vsi *vsi) { struct ice_softc *sc = vsi->sc; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; struct ice_aq_get_set_rss_lut_params lut_params; int status; int i, err = 0; u8 *lut; lut = (u8 *)malloc(vsi->rss_table_size, M_ICE, M_NOWAIT|M_ZERO); if (!lut) { device_printf(dev, "Failed to allocate RSS lut memory\n"); return (ENOMEM); } /* Populate the LUT with max no. of queues. If the RSS kernel * interface is disabled, this will assign the lookup table in * a simple round robin fashion */ for (i = 0; i < vsi->rss_table_size; i++) { /* XXX: this needs to be changed if num_rx_queues ever counts * more than just the RSS queues */ lut[i] = rss_get_indirection_to_bucket(i) % vsi->num_rx_queues; } lut_params.vsi_handle = vsi->idx; lut_params.lut_size = vsi->rss_table_size; lut_params.lut_type = vsi->rss_lut_type; lut_params.lut = lut; lut_params.global_lut_id = 0; status = ice_aq_set_rss_lut(hw, &lut_params); if (status) { device_printf(dev, "Cannot set RSS lut, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = (EIO); } free(lut, M_ICE); return err; } /** * ice_config_rss - Configure RSS for a VSI * @vsi: the VSI to configure * * If FEATURE_RSS is enabled, configures the RSS lookup table and hash key for * a given VSI. */ int ice_config_rss(struct ice_vsi *vsi) { int err; /* Nothing to do, if RSS is not enabled */ if (!ice_is_bit_set(vsi->sc->feat_en, ICE_FEATURE_RSS)) return 0; err = ice_set_rss_key(vsi); if (err) return err; ice_set_rss_flow_flds(vsi); return ice_set_rss_lut(vsi); } /** * ice_log_pkg_init - Log a message about status of DDP initialization * @sc: the device softc pointer * @pkg_status: the status result of ice_copy_and_init_pkg * * Called by ice_load_pkg after an attempt to download the DDP package * contents to the device to log an appropriate message for the system * administrator about download status. * * @post ice_is_init_pkg_successful function is used to determine * whether the download was successful and DDP package is compatible * with this driver. Otherwise driver will transition to Safe Mode. */ void ice_log_pkg_init(struct ice_softc *sc, enum ice_ddp_state pkg_status) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; struct sbuf *active_pkg, *os_pkg; active_pkg = sbuf_new_auto(); ice_active_pkg_version_str(hw, active_pkg); sbuf_finish(active_pkg); os_pkg = sbuf_new_auto(); ice_os_pkg_version_str(hw, os_pkg); sbuf_finish(os_pkg); switch (pkg_status) { case ICE_DDP_PKG_SUCCESS: device_printf(dev, "The DDP package was successfully loaded: %s.\n", sbuf_data(active_pkg)); break; case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED: case ICE_DDP_PKG_ALREADY_LOADED: device_printf(dev, "DDP package already present on device: %s.\n", sbuf_data(active_pkg)); break; case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED: device_printf(dev, "The driver could not load the DDP package file because a compatible DDP package is already present on the device. The device has package %s. The ice_ddp module has package: %s.\n", sbuf_data(active_pkg), sbuf_data(os_pkg)); break; case ICE_DDP_PKG_FILE_VERSION_TOO_HIGH: device_printf(dev, "The device has a DDP package that is higher than the driver supports. The device has package %s. The driver requires version %d.%d.x.x. Entering Safe Mode.\n", sbuf_data(active_pkg), ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); break; case ICE_DDP_PKG_FILE_VERSION_TOO_LOW: device_printf(dev, "The device has a DDP package that is lower than the driver supports. The device has package %s. The driver requires version %d.%d.x.x. Entering Safe Mode.\n", sbuf_data(active_pkg), ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); break; case ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED: /* * This assumes that the active_pkg_ver will not be * initialized if the ice_ddp package version is not * supported. */ if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) { /* The ice_ddp version is not supported */ if (pkg_ver_compatible(&hw->pkg_ver) > 0) { device_printf(dev, "The DDP package in the ice_ddp module is higher than the driver supports. The ice_ddp module has package %s. The driver requires version %d.%d.x.x. Please use an updated driver. Entering Safe Mode.\n", sbuf_data(os_pkg), ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); } else if (pkg_ver_compatible(&hw->pkg_ver) < 0) { device_printf(dev, "The DDP package in the ice_ddp module is lower than the driver supports. The ice_ddp module has package %s. The driver requires version %d.%d.x.x. Please use an updated ice_ddp module. Entering Safe Mode.\n", sbuf_data(os_pkg), ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); } else { device_printf(dev, "An unknown error occurred when loading the DDP package. The ice_ddp module has package %s. The device has package %s. The driver requires version %d.%d.x.x. Entering Safe Mode.\n", sbuf_data(os_pkg), sbuf_data(active_pkg), ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); } } else { if (pkg_ver_compatible(&hw->active_pkg_ver) > 0) { device_printf(dev, "The device has a DDP package that is higher than the driver supports. The device has package %s. The driver requires version %d.%d.x.x. Entering Safe Mode.\n", sbuf_data(active_pkg), ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); } else if (pkg_ver_compatible(&hw->active_pkg_ver) < 0) { device_printf(dev, "The device has a DDP package that is lower than the driver supports. The device has package %s. The driver requires version %d.%d.x.x. Entering Safe Mode.\n", sbuf_data(active_pkg), ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); } else { device_printf(dev, "An unknown error occurred when loading the DDP package. The ice_ddp module has package %s. The device has package %s. The driver requires version %d.%d.x.x. Entering Safe Mode.\n", sbuf_data(os_pkg), sbuf_data(active_pkg), ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); } } break; case ICE_DDP_PKG_INVALID_FILE: device_printf(dev, "The DDP package in the ice_ddp module is invalid. Entering Safe Mode\n"); break; case ICE_DDP_PKG_FW_MISMATCH: device_printf(dev, "The firmware loaded on the device is not compatible with the DDP package. Please update the device's NVM. Entering safe mode.\n"); break; case ICE_DDP_PKG_NO_SEC_MANIFEST: case ICE_DDP_PKG_FILE_SIGNATURE_INVALID: device_printf(dev, "The DDP package in the ice_ddp module cannot be loaded because its signature is not valid. Please use a valid ice_ddp module. Entering Safe Mode.\n"); break; case ICE_DDP_PKG_SECURE_VERSION_NBR_TOO_LOW: device_printf(dev, "The DDP package in the ice_ddp module could not be loaded because its security revision is too low. Please use an updated ice_ddp module. Entering Safe Mode.\n"); break; case ICE_DDP_PKG_MANIFEST_INVALID: case ICE_DDP_PKG_BUFFER_INVALID: device_printf(dev, "An error occurred on the device while loading the DDP package. Entering Safe Mode.\n"); break; default: device_printf(dev, "An unknown error occurred when loading the DDP package. Entering Safe Mode.\n"); break; } sbuf_delete(active_pkg); sbuf_delete(os_pkg); } /** * ice_load_pkg_file - Load the DDP package file using firmware_get * @sc: device private softc * * Use firmware_get to load the DDP package memory and then request that * firmware download the package contents and program the relevant hardware * bits. * * This function makes a copy of the DDP package memory which is tracked in * the ice_hw structure. The copy will be managed and released by * ice_deinit_hw(). This allows the firmware reference to be immediately * released using firmware_put. */ int ice_load_pkg_file(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_ddp_state state; const struct firmware *pkg; int status = 0; u8 cached_layer_count; u8 *buf_copy; pkg = firmware_get("ice_ddp"); if (!pkg) { device_printf(dev, "The DDP package module (ice_ddp) failed to load or could not be found. Entering Safe Mode.\n"); if (cold) device_printf(dev, "The DDP package module cannot be automatically loaded while booting. You may want to specify ice_ddp_load=\"YES\" in your loader.conf\n"); status = ICE_ERR_CFG; goto err_load_pkg; } /* Check for topology change */ if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_TX_BALANCE)) { cached_layer_count = hw->num_tx_sched_layers; buf_copy = (u8 *)malloc(pkg->datasize, M_ICE, M_NOWAIT); if (buf_copy == NULL) return ICE_ERR_NO_MEMORY; memcpy(buf_copy, pkg->data, pkg->datasize); status = ice_cfg_tx_topo(&sc->hw, buf_copy, pkg->datasize); free(buf_copy, M_ICE); /* Success indicates a change was made */ if (!status) { /* 9 -> 5 */ if (cached_layer_count == 9) device_printf(dev, "Transmit balancing feature enabled\n"); else device_printf(dev, "Transmit balancing feature disabled\n"); ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_en); return (status); } else if (status == ICE_ERR_CFG) { /* Status is ICE_ERR_CFG when DDP does not support transmit balancing */ device_printf(dev, "DDP package does not support transmit balancing feature - please update to the latest DDP package and try again\n"); } else if (status == ICE_ERR_ALREADY_EXISTS) { /* Requested config already loaded */ } else if (status == ICE_ERR_AQ_ERROR) { device_printf(dev, "Error configuring transmit balancing: %s\n", ice_status_str(status)); } } /* Copy and download the pkg contents */ state = ice_copy_and_init_pkg(hw, (const u8 *)pkg->data, pkg->datasize); /* Release the firmware reference */ firmware_put(pkg, FIRMWARE_UNLOAD); /* Check the active DDP package version and log a message */ ice_log_pkg_init(sc, state); /* Place the driver into safe mode */ if (ice_is_init_pkg_successful(state)) return (ICE_ERR_ALREADY_EXISTS); err_load_pkg: ice_zero_bitmap(sc->feat_cap, ICE_FEATURE_COUNT); ice_zero_bitmap(sc->feat_en, ICE_FEATURE_COUNT); ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap); ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en); return (status); } /** * ice_get_ifnet_counter - Retrieve counter value for a given ifnet counter * @vsi: the vsi to retrieve the value for * @counter: the counter type to retrieve * * Returns the value for a given ifnet counter. To do so, we calculate the * value based on the matching hardware statistics. */ uint64_t ice_get_ifnet_counter(struct ice_vsi *vsi, ift_counter counter) { struct ice_hw_port_stats *hs = &vsi->sc->stats.cur; struct ice_eth_stats *es = &vsi->hw_stats.cur; /* For some statistics, especially those related to error flows, we do * not have per-VSI counters. In this case, we just report the global * counters. */ switch (counter) { case IFCOUNTER_IPACKETS: return (es->rx_unicast + es->rx_multicast + es->rx_broadcast); case IFCOUNTER_IERRORS: return (hs->crc_errors + hs->illegal_bytes + hs->mac_local_faults + hs->mac_remote_faults + hs->rx_undersize + hs->rx_oversize + hs->rx_fragments + hs->rx_jabber); case IFCOUNTER_OPACKETS: return (es->tx_unicast + es->tx_multicast + es->tx_broadcast); case IFCOUNTER_OERRORS: return (es->tx_errors); case IFCOUNTER_COLLISIONS: return (0); case IFCOUNTER_IBYTES: return (es->rx_bytes); case IFCOUNTER_OBYTES: return (es->tx_bytes); case IFCOUNTER_IMCASTS: return (es->rx_multicast); case IFCOUNTER_OMCASTS: return (es->tx_multicast); case IFCOUNTER_IQDROPS: return (es->rx_discards); case IFCOUNTER_OQDROPS: return (hs->tx_dropped_link_down); case IFCOUNTER_NOPROTO: return (es->rx_unknown_protocol); default: return if_get_counter_default(vsi->sc->ifp, counter); } } /** * ice_save_pci_info - Save PCI configuration fields in HW struct * @hw: the ice_hw struct to save the PCI information in * @dev: the device to get the PCI information from * * This should only be called once, early in the device attach * process. */ void ice_save_pci_info(struct ice_hw *hw, device_t dev) { hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->subsystem_vendor_id = pci_get_subvendor(dev); hw->subsystem_device_id = pci_get_subdevice(dev); hw->revision_id = pci_get_revid(dev); hw->bus.device = pci_get_slot(dev); hw->bus.func = pci_get_function(dev); } /** * ice_replay_all_vsi_cfg - Replace configuration for all VSIs after reset * @sc: the device softc * * Replace the configuration for each VSI, and then cleanup replay * information. Called after a hardware reset in order to reconfigure the * active VSIs. */ int ice_replay_all_vsi_cfg(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; int status; int i; for (i = 0 ; i < sc->num_available_vsi; i++) { struct ice_vsi *vsi = sc->all_vsi[i]; if (!vsi) continue; status = ice_replay_vsi(hw, vsi->idx); if (status) { device_printf(sc->dev, "Failed to replay VSI %d, err %s aq_err %s\n", vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } /* Cleanup replay filters after successful reconfiguration */ ice_replay_post(hw); return (0); } /** * ice_clean_vsi_rss_cfg - Cleanup RSS configuration for a given VSI * @vsi: pointer to the VSI structure * * Cleanup the advanced RSS configuration for a given VSI. This is necessary * during driver removal to ensure that all RSS resources are properly * released. * * @remark this function doesn't report an error as it is expected to be * called during driver reset and unload, and there isn't much the driver can * do if freeing RSS resources fails. */ static void ice_clean_vsi_rss_cfg(struct ice_vsi *vsi) { struct ice_softc *sc = vsi->sc; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; status = ice_rem_vsi_rss_cfg(hw, vsi->idx); if (status) device_printf(dev, "Failed to remove RSS configuration for VSI %d, err %s\n", vsi->idx, ice_status_str(status)); /* Remove this VSI from the RSS list */ ice_rem_vsi_rss_list(hw, vsi->idx); } /** * ice_clean_all_vsi_rss_cfg - Cleanup RSS configuration for all VSIs * @sc: the device softc pointer * * Cleanup the advanced RSS configuration for all VSIs on a given PF * interface. * * @remark This should be called while preparing for a reset, to cleanup stale * RSS configuration for all VSIs. */ void ice_clean_all_vsi_rss_cfg(struct ice_softc *sc) { int i; /* No need to cleanup if RSS is not enabled */ if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS)) return; for (i = 0; i < sc->num_available_vsi; i++) { struct ice_vsi *vsi = sc->all_vsi[i]; if (vsi) ice_clean_vsi_rss_cfg(vsi); } } /** * ice_requested_fec_mode - Return the requested FEC mode as a string * @pi: The port info structure * * Return a string representing the requested FEC mode. */ static const char * ice_requested_fec_mode(struct ice_port_info *pi) { struct ice_aqc_get_phy_caps_data pcaps = { 0 }; int status; status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, &pcaps, NULL); if (status) /* Just report unknown if we can't get capabilities */ return "Unknown"; /* Check if RS-FEC has been requested first */ if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_25G_RS_528_REQ | ICE_AQC_PHY_FEC_25G_RS_544_REQ)) return ice_fec_str(ICE_FEC_RS); /* If RS FEC has not been requested, then check BASE-R */ if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ | ICE_AQC_PHY_FEC_25G_KR_REQ)) return ice_fec_str(ICE_FEC_BASER); return ice_fec_str(ICE_FEC_NONE); } /** * ice_negotiated_fec_mode - Return the negotiated FEC mode as a string * @pi: The port info structure * * Return a string representing the current FEC mode. */ static const char * ice_negotiated_fec_mode(struct ice_port_info *pi) { /* First, check if RS has been requested first */ if (pi->phy.link_info.fec_info & (ICE_AQ_LINK_25G_RS_528_FEC_EN | ICE_AQ_LINK_25G_RS_544_FEC_EN)) return ice_fec_str(ICE_FEC_RS); /* If RS FEC has not been requested, then check BASE-R */ if (pi->phy.link_info.fec_info & ICE_AQ_LINK_25G_KR_FEC_EN) return ice_fec_str(ICE_FEC_BASER); return ice_fec_str(ICE_FEC_NONE); } /** * ice_autoneg_mode - Return string indicating of autoneg completed * @pi: The port info structure * * Return "True" if autonegotiation is completed, "False" otherwise. */ static const char * ice_autoneg_mode(struct ice_port_info *pi) { if (pi->phy.link_info.an_info & ICE_AQ_AN_COMPLETED) return "True"; else return "False"; } /** * ice_flowcontrol_mode - Return string indicating the Flow Control mode * @pi: The port info structure * * Returns the current Flow Control mode as a string. */ static const char * ice_flowcontrol_mode(struct ice_port_info *pi) { return ice_fc_str(pi->fc.current_mode); } /** * ice_link_up_msg - Log a link up message with associated info * @sc: the device private softc * * Log a link up message with LOG_NOTICE message level. Include information * about the duplex, FEC mode, autonegotiation and flow control. */ void ice_link_up_msg(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; struct ifnet *ifp = sc->ifp; const char *speed, *req_fec, *neg_fec, *autoneg, *flowcontrol; speed = ice_aq_speed_to_str(hw->port_info); req_fec = ice_requested_fec_mode(hw->port_info); neg_fec = ice_negotiated_fec_mode(hw->port_info); autoneg = ice_autoneg_mode(hw->port_info); flowcontrol = ice_flowcontrol_mode(hw->port_info); log(LOG_NOTICE, "%s: Link is up, %s Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n", if_name(ifp), speed, req_fec, neg_fec, autoneg, flowcontrol); } /** * ice_update_laa_mac - Update MAC address if Locally Administered * @sc: the device softc * * Update the device MAC address when a Locally Administered Address is * assigned. * * This function does *not* update the MAC filter list itself. Instead, it * should be called after ice_rm_pf_default_mac_filters, so that the previous * address filter will be removed, and before ice_cfg_pf_default_mac_filters, * so that the new address filter will be assigned. */ int ice_update_laa_mac(struct ice_softc *sc) { const u8 *lladdr = (const u8 *)if_getlladdr(sc->ifp); struct ice_hw *hw = &sc->hw; int status; /* If the address is the same, then there is nothing to update */ if (!memcmp(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN)) return (0); /* Reject Multicast addresses */ if (ETHER_IS_MULTICAST(lladdr)) return (EINVAL); status = ice_aq_manage_mac_write(hw, lladdr, ICE_AQC_MAN_MAC_UPDATE_LAA_WOL, NULL); if (status) { device_printf(sc->dev, "Failed to write mac %6D to firmware, err %s aq_err %s\n", lladdr, ":", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EFAULT); } /* Copy the address into place of the LAN address. */ bcopy(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN); return (0); } /** * ice_get_and_print_bus_info - Save (PCI) bus info and print messages * @sc: device softc * * This will potentially print out a warning message if bus bandwidth * is insufficient for full-speed operation. This will not print out anything * for E82x devices since those are in SoCs, do not report valid PCIe info, * and cannot be moved to a different slot. * * This should only be called once, during the attach process, after * hw->port_info has been filled out with port link topology information * (from the Get PHY Capabilities Admin Queue command). */ void ice_get_and_print_bus_info(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; u16 pci_link_status; int offset; if (!ice_is_e810(hw) && !ice_is_e830(hw)) return; pci_find_cap(dev, PCIY_EXPRESS, &offset); pci_link_status = pci_read_config(dev, offset + PCIER_LINK_STA, 2); /* Fill out hw struct with PCIE link status info */ ice_set_pci_link_status_data(hw, pci_link_status); /* Use info to print out bandwidth messages */ ice_print_bus_link_data(dev, hw); if (ice_pcie_bandwidth_check(sc)) { device_printf(dev, "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n"); device_printf(dev, "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n"); } } /** * ice_pcie_bus_speed_to_rate - Convert driver bus speed enum value to * a 64-bit baudrate. * @speed: enum value to convert * * This only goes up to PCIE Gen 5. */ static uint64_t ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed) { /* If the PCI-E speed is Gen1 or Gen2, then report * only 80% of bus speed to account for encoding overhead. */ switch (speed) { case ice_pcie_speed_2_5GT: return IF_Gbps(2); case ice_pcie_speed_5_0GT: return IF_Gbps(4); case ice_pcie_speed_8_0GT: return IF_Gbps(8); case ice_pcie_speed_16_0GT: return IF_Gbps(16); case ice_pcie_speed_32_0GT: return IF_Gbps(32); case ice_pcie_speed_unknown: default: return 0; } } /** * ice_pcie_lnk_width_to_int - Convert driver pci-e width enum value to * a 32-bit number. * @width: enum value to convert */ static int ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width) { switch (width) { case ice_pcie_lnk_x1: return (1); case ice_pcie_lnk_x2: return (2); case ice_pcie_lnk_x4: return (4); case ice_pcie_lnk_x8: return (8); case ice_pcie_lnk_x12: return (12); case ice_pcie_lnk_x16: return (16); case ice_pcie_lnk_x32: return (32); case ice_pcie_lnk_width_resrv: case ice_pcie_lnk_width_unknown: default: return (0); } } /** * ice_pcie_bandwidth_check - Check if PCI-E bandwidth is sufficient for * full-speed device operation. * @sc: adapter softc * * Returns 0 if sufficient; 1 if not. */ static uint8_t ice_pcie_bandwidth_check(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; int num_ports, pcie_width; u64 pcie_speed, port_speed; MPASS(hw->port_info); num_ports = bitcount32(hw->func_caps.common_cap.valid_functions); port_speed = ice_phy_types_to_max_rate(hw->port_info); pcie_speed = ice_pcie_bus_speed_to_rate(hw->bus.speed); pcie_width = ice_pcie_lnk_width_to_int(hw->bus.width); /* * If 2x100 on E810 or 2x200 on E830, clamp ports to 1 -- 2nd port is * intended for failover. */ if ((port_speed >= IF_Gbps(100)) && ((port_speed == IF_Gbps(100) && ice_is_e810(hw)) || (port_speed == IF_Gbps(200) && ice_is_e830(hw)))) num_ports = 1; return !!((num_ports * port_speed) > pcie_speed * pcie_width); } /** * ice_print_bus_link_data - Print PCI-E bandwidth information * @dev: device to print string for * @hw: hw struct with PCI-e link information */ static void ice_print_bus_link_data(device_t dev, struct ice_hw *hw) { device_printf(dev, "PCI Express Bus: Speed %s Width %s\n", ((hw->bus.speed == ice_pcie_speed_32_0GT) ? "32.0GT/s" : (hw->bus.speed == ice_pcie_speed_16_0GT) ? "16.0GT/s" : (hw->bus.speed == ice_pcie_speed_8_0GT) ? "8.0GT/s" : (hw->bus.speed == ice_pcie_speed_5_0GT) ? "5.0GT/s" : (hw->bus.speed == ice_pcie_speed_2_5GT) ? "2.5GT/s" : "Unknown"), (hw->bus.width == ice_pcie_lnk_x32) ? "x32" : (hw->bus.width == ice_pcie_lnk_x16) ? "x16" : (hw->bus.width == ice_pcie_lnk_x12) ? "x12" : (hw->bus.width == ice_pcie_lnk_x8) ? "x8" : (hw->bus.width == ice_pcie_lnk_x4) ? "x4" : (hw->bus.width == ice_pcie_lnk_x2) ? "x2" : (hw->bus.width == ice_pcie_lnk_x1) ? "x1" : "Unknown"); } /** * ice_set_pci_link_status_data - store PCI bus info * @hw: pointer to hardware structure * @link_status: the link status word from PCI config space * * Stores the PCI bus info (speed, width, type) within the ice_hw structure **/ static void ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status) { u16 reg; hw->bus.type = ice_bus_pci_express; reg = (link_status & PCIEM_LINK_STA_WIDTH) >> 4; switch (reg) { case ice_pcie_lnk_x1: case ice_pcie_lnk_x2: case ice_pcie_lnk_x4: case ice_pcie_lnk_x8: case ice_pcie_lnk_x12: case ice_pcie_lnk_x16: case ice_pcie_lnk_x32: hw->bus.width = (enum ice_pcie_link_width)reg; break; default: hw->bus.width = ice_pcie_lnk_width_unknown; break; } reg = (link_status & PCIEM_LINK_STA_SPEED) + 0x13; switch (reg) { case ice_pcie_speed_2_5GT: case ice_pcie_speed_5_0GT: case ice_pcie_speed_8_0GT: case ice_pcie_speed_16_0GT: case ice_pcie_speed_32_0GT: hw->bus.speed = (enum ice_pcie_bus_speed)reg; break; default: hw->bus.speed = ice_pcie_speed_unknown; break; } } /** * ice_init_link_events - Initialize Link Status Events mask * @sc: the device softc * * Initialize the Link Status Events mask to disable notification of link * events we don't care about in software. Also request that link status * events be enabled. */ int ice_init_link_events(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; int status; u16 wanted_events; /* Set the bits for the events that we want to be notified by */ wanted_events = (ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA | ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL); /* request that every event except the wanted events be masked */ status = ice_aq_set_event_mask(hw, hw->port_info->lport, ~wanted_events, NULL); if (status) { device_printf(sc->dev, "Failed to set link status event mask, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } /* Request link info with the LSE bit set to enable link status events */ status = ice_aq_get_link_info(hw->port_info, true, NULL, NULL); if (status) { device_printf(sc->dev, "Failed to enable link status events, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } return (0); } #ifndef GL_MDET_TX_TCLAN /* Temporarily use this redefinition until the definition is fixed */ #define GL_MDET_TX_TCLAN E800_GL_MDET_TX_TCLAN #define PF_MDET_TX_TCLAN E800_PF_MDET_TX_TCLAN #endif /* !defined(GL_MDET_TX_TCLAN) */ /** * ice_handle_mdd_event - Handle possibly malicious events * @sc: the device softc * * Called by the admin task if an MDD detection interrupt is triggered. * Identifies possibly malicious events coming from VFs. Also triggers for * similar incorrect behavior from the PF as well. */ void ice_handle_mdd_event(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; bool mdd_detected = false, request_reinit = false; device_t dev = sc->dev; u32 reg; if (!ice_testandclear_state(&sc->state, ICE_STATE_MDD_PENDING)) return; reg = rd32(hw, GL_MDET_TX_TCLAN); if (reg & GL_MDET_TX_TCLAN_VALID_M) { u8 pf_num = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >> GL_MDET_TX_TCLAN_PF_NUM_S; u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >> GL_MDET_TX_TCLAN_VF_NUM_S; u8 event = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >> GL_MDET_TX_TCLAN_MAL_TYPE_S; u16 queue = (reg & GL_MDET_TX_TCLAN_QNUM_M) >> GL_MDET_TX_TCLAN_QNUM_S; device_printf(dev, "Malicious Driver Detection Tx Descriptor check event '%s' on Tx queue %u PF# %u VF# %u\n", ice_mdd_tx_tclan_str(event), queue, pf_num, vf_num); /* Only clear this event if it matches this PF, that way other * PFs can read the event and determine VF and queue number. */ if (pf_num == hw->pf_id) wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff); mdd_detected = true; } /* Determine what triggered the MDD event */ reg = rd32(hw, GL_MDET_TX_PQM); if (reg & GL_MDET_TX_PQM_VALID_M) { u8 pf_num = (reg & GL_MDET_TX_PQM_PF_NUM_M) >> GL_MDET_TX_PQM_PF_NUM_S; u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >> GL_MDET_TX_PQM_VF_NUM_S; u8 event = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >> GL_MDET_TX_PQM_MAL_TYPE_S; u16 queue = (reg & GL_MDET_TX_PQM_QNUM_M) >> GL_MDET_TX_PQM_QNUM_S; device_printf(dev, "Malicious Driver Detection Tx Quanta check event '%s' on Tx queue %u PF# %u VF# %u\n", ice_mdd_tx_pqm_str(event), queue, pf_num, vf_num); /* Only clear this event if it matches this PF, that way other * PFs can read the event and determine VF and queue number. */ if (pf_num == hw->pf_id) wr32(hw, GL_MDET_TX_PQM, 0xffffffff); mdd_detected = true; } reg = rd32(hw, GL_MDET_RX); if (reg & GL_MDET_RX_VALID_M) { u8 pf_num = (reg & GL_MDET_RX_PF_NUM_M) >> GL_MDET_RX_PF_NUM_S; u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >> GL_MDET_RX_VF_NUM_S; u8 event = (reg & GL_MDET_RX_MAL_TYPE_M) >> GL_MDET_RX_MAL_TYPE_S; u16 queue = (reg & GL_MDET_RX_QNUM_M) >> GL_MDET_RX_QNUM_S; device_printf(dev, "Malicious Driver Detection Rx event '%s' on Rx queue %u PF# %u VF# %u\n", ice_mdd_rx_str(event), queue, pf_num, vf_num); /* Only clear this event if it matches this PF, that way other * PFs can read the event and determine VF and queue number. */ if (pf_num == hw->pf_id) wr32(hw, GL_MDET_RX, 0xffffffff); mdd_detected = true; } /* Now, confirm that this event actually affects this PF, by checking * the PF registers. */ if (mdd_detected) { reg = rd32(hw, PF_MDET_TX_TCLAN); if (reg & PF_MDET_TX_TCLAN_VALID_M) { wr32(hw, PF_MDET_TX_TCLAN, 0xffff); sc->soft_stats.tx_mdd_count++; request_reinit = true; } reg = rd32(hw, PF_MDET_TX_PQM); if (reg & PF_MDET_TX_PQM_VALID_M) { wr32(hw, PF_MDET_TX_PQM, 0xffff); sc->soft_stats.tx_mdd_count++; request_reinit = true; } reg = rd32(hw, PF_MDET_RX); if (reg & PF_MDET_RX_VALID_M) { wr32(hw, PF_MDET_RX, 0xffff); sc->soft_stats.rx_mdd_count++; request_reinit = true; } } /* TODO: Implement logic to detect and handle events caused by VFs. */ /* request that the upper stack re-initialize the Tx/Rx queues */ if (request_reinit) ice_request_stack_reinit(sc); ice_flush(hw); } /** * ice_start_dcbx_agent - Start DCBX agent in FW via AQ command * @sc: the device softc * * @pre device is DCB capable and the FW LLDP agent has started * * Checks DCBX status and starts the DCBX agent if it is not in * a valid state via an AQ command. */ static void ice_start_dcbx_agent(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; bool dcbx_agent_status; int status; hw->port_info->qos_cfg.dcbx_status = ice_get_dcbx_status(hw); if (hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_DONE && hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_IN_PROGRESS) { /* * Start DCBX agent, but not LLDP. The return value isn't * checked here because a more detailed dcbx agent status is * retrieved and checked in ice_init_dcb() and elsewhere. */ status = ice_aq_start_stop_dcbx(hw, true, &dcbx_agent_status, NULL); if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM) device_printf(dev, "start_stop_dcbx failed, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } } /** * ice_init_dcb_setup - Initialize DCB settings for HW * @sc: the device softc * * This needs to be called after the fw_lldp_agent sysctl is added, since that * can update the device's LLDP agent status if a tunable value is set. * * Get and store the initial state of DCB settings on driver load. Print out * informational messages as well. */ void ice_init_dcb_setup(struct ice_softc *sc) { struct ice_dcbx_cfg *local_dcbx_cfg; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; u8 pfcmode_ret; /* Don't do anything if DCB isn't supported */ if (!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_DCB)) { device_printf(dev, "%s: No DCB support\n", __func__); return; } /* Starts DCBX agent if it needs starting */ ice_start_dcbx_agent(sc); /* This sets hw->port_info->qos_cfg.is_sw_lldp */ status = ice_init_dcb(hw, true); /* If there is an error, then FW LLDP is not in a usable state */ if (status != 0 && status != ICE_ERR_NOT_READY) { /* Don't print an error message if the return code from the AQ * cmd performed in ice_init_dcb() is EPERM; that means the * FW LLDP engine is disabled, and that is a valid state. */ if (!(status == ICE_ERR_AQ_ERROR && hw->adminq.sq_last_status == ICE_AQ_RC_EPERM)) { device_printf(dev, "DCB init failed, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } hw->port_info->qos_cfg.dcbx_status = ICE_DCBX_STATUS_NOT_STARTED; } switch (hw->port_info->qos_cfg.dcbx_status) { case ICE_DCBX_STATUS_DIS: ice_debug(hw, ICE_DBG_DCB, "DCBX disabled\n"); break; case ICE_DCBX_STATUS_NOT_STARTED: ice_debug(hw, ICE_DBG_DCB, "DCBX not started\n"); break; case ICE_DCBX_STATUS_MULTIPLE_PEERS: ice_debug(hw, ICE_DBG_DCB, "DCBX detected multiple peers\n"); break; default: break; } /* LLDP disabled in FW */ if (hw->port_info->qos_cfg.is_sw_lldp) { ice_add_rx_lldp_filter(sc); device_printf(dev, "Firmware LLDP agent disabled\n"); } /* Query and cache PFC mode */ status = ice_aq_query_pfc_mode(hw, &pfcmode_ret, NULL); if (status) { device_printf(dev, "PFC mode query failed, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } local_dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg; switch (pfcmode_ret) { case ICE_AQC_PFC_VLAN_BASED_PFC: local_dcbx_cfg->pfc_mode = ICE_QOS_MODE_VLAN; break; case ICE_AQC_PFC_DSCP_BASED_PFC: local_dcbx_cfg->pfc_mode = ICE_QOS_MODE_DSCP; break; default: /* DCB is disabled, but we shouldn't get here */ break; } /* Set default SW MIB for init */ ice_set_default_local_mib_settings(sc); ice_set_bit(ICE_FEATURE_DCB, sc->feat_en); } /** * ice_dcb_get_tc_map - Scans config to get bitmap of enabled TCs * @dcbcfg: DCB configuration to examine * * Scans a TC mapping table inside dcbcfg to find traffic classes * enabled and @returns a bitmask of enabled TCs */ u8 ice_dcb_get_tc_map(const struct ice_dcbx_cfg *dcbcfg) { u8 tc_map = 0; int i = 0; switch (dcbcfg->pfc_mode) { case ICE_QOS_MODE_VLAN: /* XXX: "i" is actually "User Priority" here, not * Traffic Class, but the max for both is 8, so it works * out here. */ for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) tc_map |= BIT(dcbcfg->etscfg.prio_table[i]); break; case ICE_QOS_MODE_DSCP: for (i = 0; i < ICE_DSCP_NUM_VAL; i++) tc_map |= BIT(dcbcfg->dscp_map[i]); break; default: /* Invalid Mode */ tc_map = ICE_DFLT_TRAFFIC_CLASS; break; } return (tc_map); } /** * ice_dcb_get_num_tc - Get the number of TCs from DCBX config * @dcbcfg: config to retrieve number of TCs from * * @return number of contiguous TCs found in dcbcfg's ETS Configuration * Priority Assignment Table, a value from 1 to 8. If there are * non-contiguous TCs used (e.g. assigning 1 and 3 without using 2), * then returns 0. */ static u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg) { u8 tc_map; tc_map = ice_dcb_get_tc_map(dcbcfg); return (ice_dcb_tc_contig(tc_map)); } /** * ice_debug_print_mib_change_event - helper function to log LLDP MIB change events * @sc: the device private softc * @event: event received on a control queue * * Prints out the type and contents of an LLDP MIB change event in a DCB debug message. */ static void ice_debug_print_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event) { struct ice_aqc_lldp_get_mib *params = (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib; u8 mib_type, bridge_type, tx_status; static const char* mib_type_strings[] = { "Local MIB", "Remote MIB", "Reserved", "Reserved" }; static const char* bridge_type_strings[] = { "Nearest Bridge", "Non-TPMR Bridge", "Reserved", "Reserved" }; static const char* tx_status_strings[] = { "Port's TX active", "Port's TX suspended and drained", "Reserved", "Port's TX suspended and drained; blocked TC pipe flushed" }; mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >> ICE_AQ_LLDP_MIB_TYPE_S; bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >> ICE_AQ_LLDP_BRID_TYPE_S; tx_status = (params->type & ICE_AQ_LLDP_TX_M) >> ICE_AQ_LLDP_TX_S; ice_debug(&sc->hw, ICE_DBG_DCB, "LLDP MIB Change Event (%s, %s, %s)\n", mib_type_strings[mib_type], bridge_type_strings[bridge_type], tx_status_strings[tx_status]); /* Nothing else to report */ if (!event->msg_buf) return; ice_debug(&sc->hw, ICE_DBG_DCB, "- %s contents:\n", mib_type_strings[mib_type]); ice_debug_array(&sc->hw, ICE_DBG_DCB, 16, 1, event->msg_buf, event->msg_len); } /** * ice_dcb_needs_reconfig - Returns true if driver needs to reconfigure * @sc: the device private softc * @old_cfg: Old DCBX configuration to compare against * @new_cfg: New DCBX configuration to check * * @return true if something changed in new_cfg that requires the driver * to do some reconfiguration. */ static bool ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg, struct ice_dcbx_cfg *new_cfg) { struct ice_hw *hw = &sc->hw; bool needs_reconfig = false; /* No change detected in DCBX config */ if (!memcmp(old_cfg, new_cfg, sizeof(*old_cfg))) { ice_debug(hw, ICE_DBG_DCB, "No change detected in local DCBX configuration\n"); return (false); } /* Check if ETS config has changed */ if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg, sizeof(new_cfg->etscfg))) { /* If Priority Table has changed, then driver reconfig is needed */ if (memcmp(&new_cfg->etscfg.prio_table, &old_cfg->etscfg.prio_table, sizeof(new_cfg->etscfg.prio_table))) { ice_debug(hw, ICE_DBG_DCB, "ETS UP2TC changed\n"); needs_reconfig = true; } /* These are just informational */ if (memcmp(&new_cfg->etscfg.tcbwtable, &old_cfg->etscfg.tcbwtable, sizeof(new_cfg->etscfg.tcbwtable))) { ice_debug(hw, ICE_DBG_DCB, "ETS TCBW table changed\n"); needs_reconfig = true; } if (memcmp(&new_cfg->etscfg.tsatable, &old_cfg->etscfg.tsatable, sizeof(new_cfg->etscfg.tsatable))) { ice_debug(hw, ICE_DBG_DCB, "ETS TSA table changed\n"); needs_reconfig = true; } } /* Check if PFC config has changed */ if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) { ice_debug(hw, ICE_DBG_DCB, "PFC config changed\n"); needs_reconfig = true; } /* Check if APP table has changed */ if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app))) ice_debug(hw, ICE_DBG_DCB, "APP Table changed\n"); ice_debug(hw, ICE_DBG_DCB, "%s result: %d\n", __func__, needs_reconfig); return (needs_reconfig); } /** * ice_stop_pf_vsi - Stop queues for PF LAN VSI * @sc: the device private softc * * Flushes interrupts and stops the queues associated with the PF LAN VSI. */ static void ice_stop_pf_vsi(struct ice_softc *sc) { /* Dissociate the Tx and Rx queues from the interrupts */ ice_flush_txq_interrupts(&sc->pf_vsi); ice_flush_rxq_interrupts(&sc->pf_vsi); if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED)) return; /* Disable the Tx and Rx queues */ ice_vsi_disable_tx(&sc->pf_vsi); ice_control_all_rx_queues(&sc->pf_vsi, false); } /** * ice_vsi_setup_q_map - Setup a VSI queue map * @vsi: the VSI being configured * @ctxt: VSI context structure */ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) { u16 qcounts[ICE_MAX_TRAFFIC_CLASS] = {}; u16 offset = 0, qmap = 0, pow = 0; u16 num_q_per_tc, qcount_rx, rem_queues; int i, j, k; if (vsi->num_tcs == 0) { /* at least TC0 should be enabled by default */ vsi->num_tcs = 1; vsi->tc_map = 0x1; } qcount_rx = vsi->num_rx_queues; num_q_per_tc = min(qcount_rx / vsi->num_tcs, ICE_MAX_RXQS_PER_TC); if (!num_q_per_tc) num_q_per_tc = 1; /* Set initial values for # of queues to use for each active TC */ ice_for_each_traffic_class(i) if (i < vsi->num_tcs) qcounts[i] = num_q_per_tc; /* If any queues are unassigned, add them to TC 0 */ rem_queues = qcount_rx % vsi->num_tcs; if (rem_queues > 0) qcounts[0] += rem_queues; /* TC mapping is a function of the number of Rx queues assigned to the * VSI for each traffic class and the offset of these queues. * The first 10 bits are for queue offset for TC0, next 4 bits for no:of * queues allocated to TC0. No:of queues is a power-of-2. * * If TC is not enabled, the queue offset is set to 0, and allocate one * queue, this way, traffic for the given TC will be sent to the default * queue. * * Setup number and offset of Rx queues for all TCs for the VSI */ ice_for_each_traffic_class(i) { if (!(vsi->tc_map & BIT(i))) { /* TC is not enabled */ vsi->tc_info[i].qoffset = 0; vsi->tc_info[i].qcount_rx = 1; vsi->tc_info[i].qcount_tx = 1; ctxt->info.tc_mapping[i] = 0; continue; } /* TC is enabled */ vsi->tc_info[i].qoffset = offset; vsi->tc_info[i].qcount_rx = qcounts[i]; vsi->tc_info[i].qcount_tx = qcounts[i]; /* find the (rounded up) log-2 of queue count for current TC */ pow = fls(qcounts[i] - 1); qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & ICE_AQ_VSI_TC_Q_OFFSET_M) | ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M); ctxt->info.tc_mapping[i] = CPU_TO_LE16(qmap); /* Store traffic class and handle data in queue structures */ for (j = offset, k = 0; j < offset + qcounts[i]; j++, k++) { vsi->tx_queues[j].q_handle = k; vsi->tx_queues[j].tc = i; vsi->rx_queues[j].tc = i; } offset += qcounts[i]; } /* Rx queue mapping */ ctxt->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG); ctxt->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]); ctxt->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues); } /** * ice_pf_vsi_cfg_tc - Configure PF VSI for a given TC map * @sc: the device private softc * @tc_map: traffic class bitmap * * @pre VSI queues are stopped * * @return 0 if configuration is successful * @return EIO if Update VSI AQ cmd fails * @return ENODEV if updating Tx Scheduler fails */ static int ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; struct ice_vsi_ctx ctx = { 0 }; device_t dev = sc->dev; int status; u8 num_tcs = 0; int i = 0; /* Count the number of enabled Traffic Classes */ ice_for_each_traffic_class(i) if (tc_map & BIT(i)) num_tcs++; vsi->tc_map = tc_map; vsi->num_tcs = num_tcs; /* Set default parameters for context */ ctx.vf_num = 0; ctx.info = vsi->info; /* Setup queue map */ ice_vsi_setup_q_map(vsi, &ctx); /* Update VSI configuration in firmware (RX queues) */ ctx.info.valid_sections = CPU_TO_LE16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); status = ice_update_vsi(hw, vsi->idx, &ctx, NULL); if (status) { device_printf(dev, "%s: Update VSI AQ call failed, err %s aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } vsi->info = ctx.info; /* Use values derived in ice_vsi_setup_q_map() */ for (i = 0; i < num_tcs; i++) max_txqs[i] = vsi->tc_info[i].qcount_tx; if (hw->debug_mask & ICE_DBG_DCB) { device_printf(dev, "%s: max_txqs:", __func__); ice_for_each_traffic_class(i) printf(" %d", max_txqs[i]); printf("\n"); } /* Update LAN Tx queue info in firmware */ status = ice_cfg_vsi_lan(hw->port_info, vsi->idx, vsi->tc_map, max_txqs); if (status) { device_printf(dev, "%s: Failed VSI lan queue config, err %s aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (ENODEV); } vsi->info.valid_sections = 0; return (0); } /** * ice_dcb_tc_contig - Count TCs if they're contiguous * @tc_map: pointer to priority table * * @return The number of traffic classes in * an 8-bit TC bitmap, or if there is a gap, then returns 0. */ static u8 ice_dcb_tc_contig(u8 tc_map) { bool tc_unused = false; u8 ret = 0; /* Scan bitmask for contiguous TCs starting with TC0 */ for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { if (tc_map & BIT(i)) { if (!tc_unused) { ret++; } else { /* Non-contiguous TCs detected */ return (0); } } else tc_unused = true; } return (ret); } /** * ice_dcb_recfg - Reconfigure VSI with new DCB settings * @sc: the device private softc * * @pre All VSIs have been disabled/stopped * * Reconfigures VSI settings based on local_dcbx_cfg. */ static void ice_dcb_recfg(struct ice_softc *sc) { struct ice_dcbx_cfg *dcbcfg = &sc->hw.port_info->qos_cfg.local_dcbx_cfg; device_t dev = sc->dev; u8 tc_map = 0; int ret; tc_map = ice_dcb_get_tc_map(dcbcfg); /* If non-contiguous TCs are used, then configure * the default TC instead. There's no support for * non-contiguous TCs being used. */ if (ice_dcb_tc_contig(tc_map) == 0) { tc_map = ICE_DFLT_TRAFFIC_CLASS; ice_set_default_local_lldp_mib(sc); } /* Reconfigure VSI queues to add/remove traffic classes */ ret = ice_pf_vsi_cfg_tc(sc, tc_map); if (ret) device_printf(dev, "Failed to configure TCs for PF VSI, err %s\n", ice_err_str(ret)); } /** * ice_set_default_local_mib_settings - Set Local LLDP MIB to default settings * @sc: device softc structure * * Overwrites the driver's SW local LLDP MIB with default settings. This * ensures the driver has a valid MIB when it next uses the Set Local LLDP MIB * admin queue command. */ static void ice_set_default_local_mib_settings(struct ice_softc *sc) { struct ice_dcbx_cfg *dcbcfg; struct ice_hw *hw = &sc->hw; struct ice_port_info *pi; u8 maxtcs, maxtcs_ets, old_pfc_mode; pi = hw->port_info; dcbcfg = &pi->qos_cfg.local_dcbx_cfg; maxtcs = hw->func_caps.common_cap.maxtc; /* This value is only 3 bits; 8 TCs maps to 0 */ maxtcs_ets = maxtcs & ICE_IEEE_ETS_MAXTC_M; /* VLAN vs DSCP mode needs to be preserved */ old_pfc_mode = dcbcfg->pfc_mode; /** * Setup the default settings used by the driver for the Set Local * LLDP MIB Admin Queue command (0x0A08). (1TC w/ 100% BW, ETS, no * PFC, TSA=2). */ memset(dcbcfg, 0, sizeof(*dcbcfg)); dcbcfg->etscfg.willing = 1; dcbcfg->etscfg.tcbwtable[0] = 100; dcbcfg->etscfg.maxtcs = maxtcs_ets; dcbcfg->etscfg.tsatable[0] = 2; dcbcfg->etsrec = dcbcfg->etscfg; dcbcfg->etsrec.willing = 0; dcbcfg->pfc.willing = 1; dcbcfg->pfc.pfccap = maxtcs; dcbcfg->pfc_mode = old_pfc_mode; } /** * ice_do_dcb_reconfig - notify RDMA and reconfigure PF LAN VSI * @sc: the device private softc * @pending_mib: FW has a pending MIB change to execute * * @pre Determined that the DCB configuration requires a change * * Reconfigures the PF LAN VSI based on updated DCB configuration * found in the hw struct's/port_info's/ local dcbx configuration. */ void ice_do_dcb_reconfig(struct ice_softc *sc, bool pending_mib) { struct ice_aqc_port_ets_elem port_ets = { 0 }; struct ice_dcbx_cfg *local_dcbx_cfg; struct ice_hw *hw = &sc->hw; struct ice_port_info *pi; device_t dev = sc->dev; int status; pi = sc->hw.port_info; local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; ice_rdma_notify_dcb_qos_change(sc); /* If there's a pending MIB, tell the FW to execute the MIB change * now. */ if (pending_mib) { status = ice_lldp_execute_pending_mib(hw); if ((status == ICE_ERR_AQ_ERROR) && (hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)) { device_printf(dev, "Execute Pending LLDP MIB AQ call failed, no pending MIB\n"); } else if (status) { device_printf(dev, "Execute Pending LLDP MIB AQ call failed, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); /* This won't break traffic, but QoS will not work as expected */ } } /* Set state when there's more than one TC */ if (ice_dcb_get_num_tc(local_dcbx_cfg) > 1) { device_printf(dev, "Multiple traffic classes enabled\n"); ice_set_state(&sc->state, ICE_STATE_MULTIPLE_TCS); } else { device_printf(dev, "Multiple traffic classes disabled\n"); ice_clear_state(&sc->state, ICE_STATE_MULTIPLE_TCS); } /* Disable PF VSI since it's going to be reconfigured */ ice_stop_pf_vsi(sc); /* Query ETS configuration and update SW Tx scheduler info */ status = ice_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL); if (status) { device_printf(dev, "Query Port ETS AQ call failed, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); /* This won't break traffic, but QoS will not work as expected */ } /* Change PF VSI configuration */ ice_dcb_recfg(sc); /* Send new configuration to RDMA client driver */ ice_rdma_dcb_qos_update(sc, pi); ice_request_stack_reinit(sc); } /** * ice_handle_mib_change_event - helper function to handle LLDP MIB change events * @sc: the device private softc * @event: event received on a control queue * * Checks the updated MIB it receives and possibly reconfigures the PF LAN * VSI depending on what has changed. This will also print out some debug * information about the MIB event if ICE_DBG_DCB is enabled in the debug_mask. */ static void ice_handle_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event) { struct ice_aqc_lldp_get_mib *params = (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib; struct ice_dcbx_cfg tmp_dcbx_cfg, *local_dcbx_cfg; struct ice_port_info *pi; device_t dev = sc->dev; struct ice_hw *hw = &sc->hw; bool needs_reconfig, mib_is_pending; int status; u8 mib_type, bridge_type; ASSERT_CFG_LOCKED(sc); ice_debug_print_mib_change_event(sc, event); pi = sc->hw.port_info; mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >> ICE_AQ_LLDP_MIB_TYPE_S; bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >> ICE_AQ_LLDP_BRID_TYPE_S; mib_is_pending = (params->state & ICE_AQ_LLDP_MIB_CHANGE_STATE_M) >> ICE_AQ_LLDP_MIB_CHANGE_STATE_S; /* Ignore if event is not for Nearest Bridge */ if (bridge_type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID) return; /* Check MIB Type and return if event for Remote MIB update */ if (mib_type == ICE_AQ_LLDP_MIB_REMOTE) { /* Update the cached remote MIB and return */ status = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE, ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, &pi->qos_cfg.remote_dcbx_cfg); if (status) device_printf(dev, "%s: Failed to get Remote DCB config; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); /* Not fatal if this fails */ return; } /* Save line length by aliasing the local dcbx cfg */ local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; /* Save off the old configuration and clear current config */ tmp_dcbx_cfg = *local_dcbx_cfg; memset(local_dcbx_cfg, 0, sizeof(*local_dcbx_cfg)); /* Update the current local_dcbx_cfg with new data */ if (mib_is_pending) { ice_get_dcb_cfg_from_mib_change(pi, event); } else { /* Get updated DCBX data from firmware */ status = ice_get_dcb_cfg(pi); if (status) { device_printf(dev, "%s: Failed to get Local DCB config; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return; } } /* Check to see if DCB needs reconfiguring */ needs_reconfig = ice_dcb_needs_reconfig(sc, &tmp_dcbx_cfg, local_dcbx_cfg); if (!needs_reconfig && !mib_is_pending) return; /* Reconfigure -- this will also notify FW that configuration is done, * if the FW MIB change is only pending instead of executed. */ ice_do_dcb_reconfig(sc, mib_is_pending); } /** * ice_send_version - Send driver version to firmware * @sc: the device private softc * * Send the driver version to the firmware. This must be called as early as * possible after ice_init_hw(). */ int ice_send_version(struct ice_softc *sc) { struct ice_driver_ver driver_version = {0}; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; driver_version.major_ver = ice_major_version; driver_version.minor_ver = ice_minor_version; driver_version.build_ver = ice_patch_version; driver_version.subbuild_ver = ice_rc_version; strlcpy((char *)driver_version.driver_string, ice_driver_version, sizeof(driver_version.driver_string)); status = ice_aq_send_driver_ver(hw, &driver_version, NULL); if (status) { device_printf(dev, "Unable to send driver version to firmware, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } return (0); } /** * ice_handle_lan_overflow_event - helper function to log LAN overflow events * @sc: device softc * @event: event received on a control queue * * Prints out a message when a LAN overflow event is detected on a receive * queue. */ static void ice_handle_lan_overflow_event(struct ice_softc *sc, struct ice_rq_event_info *event) { struct ice_aqc_event_lan_overflow *params = (struct ice_aqc_event_lan_overflow *)&event->desc.params.lan_overflow; struct ice_hw *hw = &sc->hw; ice_debug(hw, ICE_DBG_DCB, "LAN overflow event detected, prtdcb_ruptq=0x%08x, qtx_ctl=0x%08x\n", LE32_TO_CPU(params->prtdcb_ruptq), LE32_TO_CPU(params->qtx_ctl)); } /** * ice_add_ethertype_to_list - Add an Ethertype filter to a filter list * @vsi: the VSI to target packets to * @list: the list to add the filter to * @ethertype: the Ethertype to filter on * @direction: The direction of the filter (Tx or Rx) * @action: the action to take * * Add an Ethertype filter to a filter list. Used to forward a series of * filters to the firmware for configuring the switch. * * Returns 0 on success, and an error code on failure. */ static int ice_add_ethertype_to_list(struct ice_vsi *vsi, struct ice_list_head *list, u16 ethertype, u16 direction, enum ice_sw_fwd_act_type action) { struct ice_fltr_list_entry *entry; MPASS((direction == ICE_FLTR_TX) || (direction == ICE_FLTR_RX)); entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO); if (!entry) return (ENOMEM); entry->fltr_info.flag = direction; entry->fltr_info.src_id = ICE_SRC_ID_VSI; entry->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE; entry->fltr_info.fltr_act = action; entry->fltr_info.vsi_handle = vsi->idx; entry->fltr_info.l_data.ethertype_mac.ethertype = ethertype; LIST_ADD(&entry->list_entry, list); return 0; } #define ETHERTYPE_PAUSE_FRAMES 0x8808 #define ETHERTYPE_LLDP_FRAMES 0x88cc /** * ice_cfg_pf_ethertype_filters - Configure switch to drop ethertypes * @sc: the device private softc * * Configure the switch to drop PAUSE frames and LLDP frames transmitted from * the host. This prevents malicious VFs from sending these frames and being * able to control or configure the network. */ int ice_cfg_pf_ethertype_filters(struct ice_softc *sc) { struct ice_list_head ethertype_list; struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; int err = 0; INIT_LIST_HEAD(ðertype_list); /* * Note that the switch filters will ignore the VSI index for the drop * action, so we only need to program drop filters once for the main * VSI. */ /* Configure switch to drop all Tx pause frames coming from any VSI. */ if (sc->enable_tx_fc_filter) { err = ice_add_ethertype_to_list(vsi, ðertype_list, ETHERTYPE_PAUSE_FRAMES, ICE_FLTR_TX, ICE_DROP_PACKET); if (err) goto free_ethertype_list; } /* Configure switch to drop LLDP frames coming from any VSI */ if (sc->enable_tx_lldp_filter) { err = ice_add_ethertype_to_list(vsi, ðertype_list, ETHERTYPE_LLDP_FRAMES, ICE_FLTR_TX, ICE_DROP_PACKET); if (err) goto free_ethertype_list; } status = ice_add_eth_mac(hw, ðertype_list); if (status) { device_printf(dev, "Failed to add Tx Ethertype filters, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = (EIO); } free_ethertype_list: ice_free_fltr_list(ðertype_list); return err; } /** * ice_add_rx_lldp_filter - add ethertype filter for Rx LLDP frames * @sc: the device private structure * * Add a switch ethertype filter which forwards the LLDP frames to the main PF * VSI. Called when the fw_lldp_agent is disabled, to allow the LLDP frames to * be forwarded to the stack. */ void ice_add_rx_lldp_filter(struct ice_softc *sc) { struct ice_list_head ethertype_list; struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; int err; u16 vsi_num; /* * If FW is new enough, use a direct AQ command to perform the filter * addition. */ if (ice_fw_supports_lldp_fltr_ctrl(hw)) { vsi_num = ice_get_hw_vsi_num(hw, vsi->idx); status = ice_lldp_fltr_add_remove(hw, vsi_num, true); if (status) { device_printf(dev, "Failed to add Rx LLDP filter, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } else ice_set_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER); return; } INIT_LIST_HEAD(ðertype_list); /* Forward Rx LLDP frames to the stack */ err = ice_add_ethertype_to_list(vsi, ðertype_list, ETHERTYPE_LLDP_FRAMES, ICE_FLTR_RX, ICE_FWD_TO_VSI); if (err) { device_printf(dev, "Failed to add Rx LLDP filter, err %s\n", ice_err_str(err)); goto free_ethertype_list; } status = ice_add_eth_mac(hw, ðertype_list); if (status && status != ICE_ERR_ALREADY_EXISTS) { device_printf(dev, "Failed to add Rx LLDP filter, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } else { /* * If status == ICE_ERR_ALREADY_EXISTS, we won't treat an * already existing filter as an error case. */ ice_set_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER); } free_ethertype_list: ice_free_fltr_list(ðertype_list); } /** * ice_del_rx_lldp_filter - Remove ethertype filter for Rx LLDP frames * @sc: the device private structure * * Remove the switch filter forwarding LLDP frames to the main PF VSI, called * when the firmware LLDP agent is enabled, to stop routing LLDP frames to the * stack. */ static void ice_del_rx_lldp_filter(struct ice_softc *sc) { struct ice_list_head ethertype_list; struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; int err; u16 vsi_num; /* * Only in the scenario where the driver added the filter during * this session (while the driver was loaded) would we be able to * delete this filter. */ if (!ice_test_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER)) return; /* * If FW is new enough, use a direct AQ command to perform the filter * removal. */ if (ice_fw_supports_lldp_fltr_ctrl(hw)) { vsi_num = ice_get_hw_vsi_num(hw, vsi->idx); status = ice_lldp_fltr_add_remove(hw, vsi_num, false); if (status) { device_printf(dev, "Failed to remove Rx LLDP filter, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } return; } INIT_LIST_HEAD(ðertype_list); /* Remove filter forwarding Rx LLDP frames to the stack */ err = ice_add_ethertype_to_list(vsi, ðertype_list, ETHERTYPE_LLDP_FRAMES, ICE_FLTR_RX, ICE_FWD_TO_VSI); if (err) { device_printf(dev, "Failed to remove Rx LLDP filter, err %s\n", ice_err_str(err)); goto free_ethertype_list; } status = ice_remove_eth_mac(hw, ðertype_list); if (status == ICE_ERR_DOES_NOT_EXIST) { ; /* Don't complain if we try to remove a filter that doesn't exist */ } else if (status) { device_printf(dev, "Failed to remove Rx LLDP filter, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } free_ethertype_list: ice_free_fltr_list(ðertype_list); } /** * ice_init_link_configuration -- Setup link in different ways depending * on whether media is available or not. * @sc: device private structure * * Called at the end of the attach process to either set default link * parameters if there is media available, or force HW link down and * set a state bit if there is no media. */ void ice_init_link_configuration(struct ice_softc *sc) { struct ice_port_info *pi = sc->hw.port_info; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; - int status; + int status, retry_count = 0; +retry: pi->phy.get_link_info = true; status = ice_get_link_status(pi, &sc->link_up); + if (status) { - device_printf(dev, - "%s: ice_get_link_status failed; status %s, aq_err %s\n", - __func__, ice_status_str(status), - ice_aq_str(hw->adminq.sq_last_status)); + if (hw->adminq.sq_last_status == ICE_AQ_RC_EAGAIN) { + retry_count++; + ice_debug(hw, ICE_DBG_LINK, + "%s: ice_get_link_status failed with EAGAIN, attempt %d\n", + __func__, retry_count); + if (retry_count < ICE_LINK_AQ_MAX_RETRIES) { + ice_msec_pause(ICE_LINK_RETRY_DELAY); + goto retry; + } + } else { + device_printf(dev, + "%s: ice_get_link_status failed; status %s, aq_err %s\n", + __func__, ice_status_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + } return; } if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA); /* Apply default link settings */ if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN)) { ice_set_link(sc, false); ice_set_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED); } else ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC); } else { /* Set link down, and poll for media available in timer. This prevents the * driver from receiving spurious link-related events. */ ice_set_state(&sc->state, ICE_STATE_NO_MEDIA); status = ice_aq_set_link_restart_an(pi, false, NULL); if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EMODE) device_printf(dev, "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } } /** * ice_apply_saved_phy_req_to_cfg -- Write saved user PHY settings to cfg data * @sc: device private structure * @cfg: new PHY config data to be modified * * Applies user settings for advertised speeds to the PHY type fields in the * supplied PHY config struct. It uses the data from pcaps to check if the * saved settings are invalid and uses the pcaps data instead if they are * invalid. */ static int ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc, struct ice_aqc_set_phy_cfg_data *cfg) { struct ice_phy_data phy_data = { 0 }; struct ice_port_info *pi = sc->hw.port_info; u64 phy_low = 0, phy_high = 0; u16 link_speeds; int ret; link_speeds = pi->phy.curr_user_speed_req; if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2)) { memset(&phy_data, 0, sizeof(phy_data)); phy_data.report_mode = ICE_AQC_REPORT_DFLT_CFG; phy_data.user_speeds_orig = link_speeds; ret = ice_intersect_phy_types_and_speeds(sc, &phy_data); if (ret != 0) { /* Error message already printed within function */ return (ret); } phy_low = phy_data.phy_low_intr; phy_high = phy_data.phy_high_intr; if (link_speeds == 0 || phy_data.user_speeds_intr) goto finalize_link_speed; if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) { memset(&phy_data, 0, sizeof(phy_data)); phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA; phy_data.user_speeds_orig = link_speeds; ret = ice_intersect_phy_types_and_speeds(sc, &phy_data); if (ret != 0) { /* Error message already printed within function */ return (ret); } phy_low = phy_data.phy_low_intr; phy_high = phy_data.phy_high_intr; if (!phy_data.user_speeds_intr) { phy_low = phy_data.phy_low_orig; phy_high = phy_data.phy_high_orig; } goto finalize_link_speed; } /* If we're here, then it means the benefits of Version 2 * link management aren't utilized. We fall through to * handling Strict Link Mode the same as Version 1 link * management. */ } memset(&phy_data, 0, sizeof(phy_data)); if ((link_speeds == 0) && (sc->ldo_tlv.phy_type_low || sc->ldo_tlv.phy_type_high)) phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA; else phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA; phy_data.user_speeds_orig = link_speeds; ret = ice_intersect_phy_types_and_speeds(sc, &phy_data); if (ret != 0) { /* Error message already printed within function */ return (ret); } phy_low = phy_data.phy_low_intr; phy_high = phy_data.phy_high_intr; if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) { if (phy_low == 0 && phy_high == 0) { device_printf(sc->dev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n"); return (EINVAL); } } else { if (link_speeds == 0) { if (sc->ldo_tlv.phy_type_low & phy_low || sc->ldo_tlv.phy_type_high & phy_high) { phy_low &= sc->ldo_tlv.phy_type_low; phy_high &= sc->ldo_tlv.phy_type_high; } } else if (phy_low == 0 && phy_high == 0) { memset(&phy_data, 0, sizeof(phy_data)); phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA; phy_data.user_speeds_orig = link_speeds; ret = ice_intersect_phy_types_and_speeds(sc, &phy_data); if (ret != 0) { /* Error message already printed within function */ return (ret); } phy_low = phy_data.phy_low_intr; phy_high = phy_data.phy_high_intr; if (!phy_data.user_speeds_intr) { phy_low = phy_data.phy_low_orig; phy_high = phy_data.phy_high_orig; } } } finalize_link_speed: /* Cache new user settings for speeds */ pi->phy.curr_user_speed_req = phy_data.user_speeds_intr; cfg->phy_type_low = htole64(phy_low); cfg->phy_type_high = htole64(phy_high); return (ret); } /** * ice_apply_saved_fec_req_to_cfg -- Write saved user FEC mode to cfg data * @sc: device private structure * @cfg: new PHY config data to be modified * * Applies user setting for FEC mode to PHY config struct. It uses the data * from pcaps to check if the saved settings are invalid and uses the pcaps * data instead if they are invalid. */ static int ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc, struct ice_aqc_set_phy_cfg_data *cfg) { struct ice_port_info *pi = sc->hw.port_info; int status; cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC; status = ice_cfg_phy_fec(pi, cfg, pi->phy.curr_user_fec_req); if (status) return (EIO); return (0); } /** * ice_apply_saved_fc_req_to_cfg -- Write saved user flow control mode to cfg data * @pi: port info struct * @cfg: new PHY config data to be modified * * Applies user setting for flow control mode to PHY config struct. There are * no invalid flow control mode settings; if there are, then this function * treats them like "ICE_FC_NONE". */ static void ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg) { cfg->caps &= ~(ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY | ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY); switch (pi->phy.curr_user_fc_req) { case ICE_FC_FULL: cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY | ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY; break; case ICE_FC_RX_PAUSE: cfg->caps |= ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY; break; case ICE_FC_TX_PAUSE: cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY; break; default: /* ICE_FC_NONE */ break; } } /** * ice_apply_saved_phy_cfg -- Re-apply user PHY config settings * @sc: device private structure * @settings: which settings to apply * * Applies user settings for advertised speeds, FEC mode, and flow * control mode to a PHY config struct; it uses the data from pcaps * to check if the saved settings are invalid and uses the pcaps * data instead if they are invalid. * * For things like sysctls where only one setting needs to be * updated, the bitmap allows the caller to specify which setting * to update. */ int ice_apply_saved_phy_cfg(struct ice_softc *sc, u8 settings) { struct ice_aqc_set_phy_cfg_data cfg = { 0 }; struct ice_port_info *pi = sc->hw.port_info; struct ice_aqc_get_phy_caps_data pcaps = { 0 }; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; u64 phy_low, phy_high; int status; enum ice_fec_mode dflt_fec_mode; u16 dflt_user_speed; if (!settings || settings > ICE_APPLY_LS_FEC_FC) { ice_debug(hw, ICE_DBG_LINK, "Settings out-of-bounds: %u\n", settings); } status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, &pcaps, NULL); if (status) { device_printf(dev, "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } phy_low = le64toh(pcaps.phy_type_low); phy_high = le64toh(pcaps.phy_type_high); /* Save off initial config parameters */ dflt_user_speed = ice_aq_phy_types_to_link_speeds(phy_low, phy_high); dflt_fec_mode = ice_caps_to_fec_mode(pcaps.caps, pcaps.link_fec_options); /* Setup new PHY config */ ice_copy_phy_caps_to_cfg(pi, &pcaps, &cfg); /* On error, restore active configuration values */ if ((settings & ICE_APPLY_LS) && ice_apply_saved_phy_req_to_cfg(sc, &cfg)) { pi->phy.curr_user_speed_req = dflt_user_speed; cfg.phy_type_low = pcaps.phy_type_low; cfg.phy_type_high = pcaps.phy_type_high; } if ((settings & ICE_APPLY_FEC) && ice_apply_saved_fec_req_to_cfg(sc, &cfg)) { pi->phy.curr_user_fec_req = dflt_fec_mode; } if (settings & ICE_APPLY_FC) { /* No real error indicators for this process, * so we'll just have to assume it works. */ ice_apply_saved_fc_req_to_cfg(pi, &cfg); } /* Enable link and re-negotiate it */ cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK; status = ice_aq_set_phy_cfg(hw, pi, &cfg, NULL); if (status) { /* Don't indicate failure if there's no media in the port. * The settings have been saved and will apply when media * is inserted. */ if ((status == ICE_ERR_AQ_ERROR) && (hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY)) { device_printf(dev, "%s: Setting will be applied when media is inserted\n", __func__); return (0); } else { device_printf(dev, "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } return (0); } /** * ice_print_ldo_tlv - Print out LDO TLV information * @sc: device private structure * @tlv: LDO TLV information from the adapter NVM * * Dump out the information in tlv to the kernel message buffer; intended for * debugging purposes. */ static void ice_print_ldo_tlv(struct ice_softc *sc, struct ice_link_default_override_tlv *tlv) { device_t dev = sc->dev; device_printf(dev, "TLV: -options 0x%02x\n", tlv->options); device_printf(dev, " -phy_config 0x%02x\n", tlv->phy_config); device_printf(dev, " -fec_options 0x%02x\n", tlv->fec_options); device_printf(dev, " -phy_high 0x%016llx\n", (unsigned long long)tlv->phy_type_high); device_printf(dev, " -phy_low 0x%016llx\n", (unsigned long long)tlv->phy_type_low); } /** * ice_set_link_management_mode -- Strict or lenient link management * @sc: device private structure * * Some NVMs give the adapter the option to advertise a superset of link * configurations. This checks to see if that option is enabled. * Further, the NVM could also provide a specific set of configurations * to try; these are cached in the driver's private structure if they * are available. */ void ice_set_link_management_mode(struct ice_softc *sc) { struct ice_port_info *pi = sc->hw.port_info; device_t dev = sc->dev; struct ice_link_default_override_tlv tlv = { 0 }; int status; /* Port must be in strict mode if FW version is below a certain * version. (i.e. Don't set lenient mode features) */ if (!(ice_fw_supports_link_override(&sc->hw))) return; status = ice_get_link_default_override(&tlv, pi); if (status) { device_printf(dev, "%s: ice_get_link_default_override failed; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(sc->hw.adminq.sq_last_status)); return; } if (sc->hw.debug_mask & ICE_DBG_LINK) ice_print_ldo_tlv(sc, &tlv); /* Set lenient link mode */ if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LENIENT_LINK_MODE) && (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE))) ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_en); /* FW supports reporting a default configuration */ if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_2) && ice_fw_supports_report_dflt_cfg(&sc->hw)) { ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_en); /* Knowing we're at a high enough firmware revision to * support this link management configuration, we don't * need to check/support earlier versions. */ return; } /* Default overrides only work if in lenient link mode */ if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_1) && ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE) && (tlv.options & ICE_LINK_OVERRIDE_EN)) ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_en); /* Cache the LDO TLV structure in the driver, since it * won't change during the driver's lifetime. */ sc->ldo_tlv = tlv; } /** * ice_set_link -- Set up/down link on phy * @sc: device private structure * @enabled: link status to set up * * This should be called when change of link status is needed. */ void ice_set_link(struct ice_softc *sc, bool enabled) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; if (ice_driver_is_detaching(sc)) return; if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) return; if (enabled) ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC); else { status = ice_aq_set_link_restart_an(hw->port_info, false, NULL); if (status) { if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE) device_printf(dev, "%s: Link control not enabled in current device mode\n", __func__); else device_printf(dev, "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } else sc->link_up = false; } } /** * ice_init_saved_phy_cfg -- Set cached user PHY cfg settings with NVM defaults * @sc: device private structure * * This should be called before the tunables for these link settings * (e.g. advertise_speed) are added -- so that these defaults don't overwrite * the cached values that the sysctl handlers will write. * * This also needs to be called before ice_init_link_configuration, to ensure * that there are sane values that can be written if there is media available * in the port. */ void ice_init_saved_phy_cfg(struct ice_softc *sc) { struct ice_port_info *pi = sc->hw.port_info; struct ice_aqc_get_phy_caps_data pcaps = { 0 }; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; u64 phy_low, phy_high; u8 report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA; if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2)) report_mode = ICE_AQC_REPORT_DFLT_CFG; status = ice_aq_get_phy_caps(pi, false, report_mode, &pcaps, NULL); if (status) { device_printf(dev, "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n", __func__, report_mode == ICE_AQC_REPORT_DFLT_CFG ? "DFLT" : "w/MEDIA", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return; } phy_low = le64toh(pcaps.phy_type_low); phy_high = le64toh(pcaps.phy_type_high); /* Save off initial config parameters */ pi->phy.curr_user_speed_req = ice_aq_phy_types_to_link_speeds(phy_low, phy_high); pi->phy.curr_user_fec_req = ice_caps_to_fec_mode(pcaps.caps, pcaps.link_fec_options); pi->phy.curr_user_fc_req = ice_caps_to_fc_mode(pcaps.caps); } /** * ice_module_init - Driver callback to handle module load * * Callback for handling module load events. This function should initialize * any data structures that are used for the life of the device driver. */ static int ice_module_init(void) { ice_rdma_init(); return (0); } /** * ice_module_exit - Driver callback to handle module exit * * Callback for handling module unload events. This function should release * any resources initialized during ice_module_init. * * If this function returns non-zero, the module will not be unloaded. It * should only return such a value if the module cannot be unloaded at all, * such as due to outstanding memory references that cannot be revoked. */ static int ice_module_exit(void) { ice_rdma_exit(); return (0); } /** * ice_module_event_handler - Callback for module events * @mod: unused module_t parameter * @what: the event requested * @arg: unused event argument * * Callback used to handle module events from the stack. Used to allow the * driver to define custom behavior that should happen at module load and * unload. */ int ice_module_event_handler(module_t __unused mod, int what, void __unused *arg) { switch (what) { case MOD_LOAD: return ice_module_init(); case MOD_UNLOAD: return ice_module_exit(); default: /* TODO: do we need to handle MOD_QUIESCE and MOD_SHUTDOWN? */ return (EOPNOTSUPP); } } /** * ice_handle_nvm_access_ioctl - Handle an NVM access ioctl request * @sc: the device private softc * @ifd: ifdrv ioctl request pointer */ int ice_handle_nvm_access_ioctl(struct ice_softc *sc, struct ifdrv *ifd) { union ice_nvm_access_data *data; struct ice_nvm_access_cmd *cmd; size_t ifd_len = ifd->ifd_len, malloc_len; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; u8 *nvm_buffer; int err; /* * ifioctl forwards SIOCxDRVSPEC to iflib without performing * a privilege check. In turn, iflib forwards the ioctl to the driver * without performing a privilege check. Perform one here to ensure * that non-privileged threads cannot access this interface. */ err = priv_check(curthread, PRIV_DRIVER); if (err) return (err); if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) { device_printf(dev, "%s: Driver must rebuild data structures after a reset. Operation aborted.\n", __func__); return (EBUSY); } if (ifd_len < sizeof(struct ice_nvm_access_cmd)) { device_printf(dev, "%s: ifdrv length is too small. Got %zu, but expected %zu\n", __func__, ifd_len, sizeof(struct ice_nvm_access_cmd)); return (EINVAL); } if (ifd->ifd_data == NULL) { device_printf(dev, "%s: ifd data buffer not present.\n", __func__); return (EINVAL); } /* * If everything works correctly, ice_handle_nvm_access should not * modify data past the size of the ioctl length. However, it could * lead to memory corruption if it did. Make sure to allocate at least * enough space for the command and data regardless. This * ensures that any access to the data union will not access invalid * memory. */ malloc_len = max(ifd_len, sizeof(*data) + sizeof(*cmd)); nvm_buffer = (u8 *)malloc(malloc_len, M_ICE, M_ZERO | M_WAITOK); if (!nvm_buffer) return (ENOMEM); /* Copy the NVM access command and data in from user space */ /* coverity[tainted_data_argument] */ err = copyin(ifd->ifd_data, nvm_buffer, ifd_len); if (err) { device_printf(dev, "%s: Copying request from user space failed, err %s\n", __func__, ice_err_str(err)); goto cleanup_free_nvm_buffer; } /* * The NVM command structure is immediately followed by data which * varies in size based on the command. */ cmd = (struct ice_nvm_access_cmd *)nvm_buffer; data = (union ice_nvm_access_data *)(nvm_buffer + sizeof(struct ice_nvm_access_cmd)); /* Handle the NVM access request */ status = ice_handle_nvm_access(hw, cmd, data); if (status) ice_debug(hw, ICE_DBG_NVM, "NVM access request failed, err %s\n", ice_status_str(status)); /* Copy the possibly modified contents of the handled request out */ err = copyout(nvm_buffer, ifd->ifd_data, ifd_len); if (err) { device_printf(dev, "%s: Copying response back to user space failed, err %s\n", __func__, ice_err_str(err)); goto cleanup_free_nvm_buffer; } /* Convert private status to an error code for proper ioctl response */ switch (status) { case 0: err = (0); break; case ICE_ERR_NO_MEMORY: err = (ENOMEM); break; case ICE_ERR_OUT_OF_RANGE: err = (ENOTTY); break; case ICE_ERR_PARAM: default: err = (EINVAL); break; } cleanup_free_nvm_buffer: free(nvm_buffer, M_ICE); return err; } /** * ice_read_sff_eeprom - Read data from SFF eeprom * @sc: device softc * @dev_addr: I2C device address (typically 0xA0 or 0xA2) * @offset: offset into the eeprom * @data: pointer to data buffer to store read data in * @length: length to read; max length is 16 * * Read from the SFF eeprom in the module for this PF's port. For more details * on the contents of an SFF eeprom, refer to SFF-8724 (SFP), SFF-8636 (QSFP), * and SFF-8024 (both). */ int ice_read_sff_eeprom(struct ice_softc *sc, u16 dev_addr, u16 offset, u8* data, u16 length) { struct ice_hw *hw = &sc->hw; int ret = 0, retries = 0; int status; if (length > 16) return (EINVAL); if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) return (ENXIO); do { status = ice_aq_sff_eeprom(hw, 0, dev_addr, offset, 0, 0, data, length, false, NULL); if (!status) { ret = 0; break; } if (status == ICE_ERR_AQ_ERROR && hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY) { ret = EBUSY; continue; } if (status == ICE_ERR_AQ_ERROR && hw->adminq.sq_last_status == ICE_AQ_RC_EACCES) { /* FW says I2C access isn't supported */ ret = EACCES; break; } if (status == ICE_ERR_AQ_ERROR && hw->adminq.sq_last_status == ICE_AQ_RC_EPERM) { device_printf(sc->dev, "%s: Module pointer location specified in command does not permit the required operation.\n", __func__); ret = EPERM; break; } else { device_printf(sc->dev, "%s: Error reading I2C data: err %s aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); ret = EIO; break; } } while (retries++ < ICE_I2C_MAX_RETRIES); if (ret == EBUSY) device_printf(sc->dev, "%s: Error reading I2C data after %d retries\n", __func__, ICE_I2C_MAX_RETRIES); return (ret); } /** * ice_handle_i2c_req - Driver independent I2C request handler * @sc: device softc * @req: The I2C parameters to use * * Read from the port's I2C eeprom using the parameters from the ioctl. */ int ice_handle_i2c_req(struct ice_softc *sc, struct ifi2creq *req) { return ice_read_sff_eeprom(sc, req->dev_addr, req->offset, req->data, req->len); } /** * ice_sysctl_read_i2c_diag_data - Read some module diagnostic data via i2c * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Read 8 bytes of diagnostic data from the SFF eeprom in the (Q)SFP module * inserted into the port. * * | SFP A2 | QSFP Lower Page * ------------|---------|---------------- * Temperature | 96-97 | 22-23 * Vcc | 98-99 | 26-27 * TX power | 102-103 | 34-35..40-41 * RX power | 104-105 | 50-51..56-57 */ static int ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; device_t dev = sc->dev; struct sbuf *sbuf; int ret; u8 data[16]; UNREFERENCED_PARAMETER(arg2); UNREFERENCED_PARAMETER(oidp); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); if (req->oldptr == NULL) { ret = SYSCTL_OUT(req, 0, 128); return (ret); } ret = ice_read_sff_eeprom(sc, 0xA0, 0, data, 1); if (ret) return (ret); /* 0x3 for SFP; 0xD/0x11 for QSFP+/QSFP28 */ if (data[0] == 0x3) { /* * Check for: * - Internally calibrated data * - Diagnostic monitoring is implemented */ ice_read_sff_eeprom(sc, 0xA0, 92, data, 1); if (!(data[0] & 0x60)) { device_printf(dev, "Module doesn't support diagnostics: 0xA0[92] = %02X\n", data[0]); return (ENODEV); } sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); ice_read_sff_eeprom(sc, 0xA2, 96, data, 4); for (int i = 0; i < 4; i++) sbuf_printf(sbuf, "%02X ", data[i]); ice_read_sff_eeprom(sc, 0xA2, 102, data, 4); for (int i = 0; i < 4; i++) sbuf_printf(sbuf, "%02X ", data[i]); } else if (data[0] == 0xD || data[0] == 0x11) { /* * QSFP+ modules are always internally calibrated, and must indicate * what types of diagnostic monitoring are implemented */ sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); ice_read_sff_eeprom(sc, 0xA0, 22, data, 2); for (int i = 0; i < 2; i++) sbuf_printf(sbuf, "%02X ", data[i]); ice_read_sff_eeprom(sc, 0xA0, 26, data, 2); for (int i = 0; i < 2; i++) sbuf_printf(sbuf, "%02X ", data[i]); ice_read_sff_eeprom(sc, 0xA0, 34, data, 2); for (int i = 0; i < 2; i++) sbuf_printf(sbuf, "%02X ", data[i]); ice_read_sff_eeprom(sc, 0xA0, 50, data, 2); for (int i = 0; i < 2; i++) sbuf_printf(sbuf, "%02X ", data[i]); } else { device_printf(dev, "Module is not SFP/SFP+/SFP28/QSFP+ (%02X)\n", data[0]); return (ENODEV); } sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_alloc_intr_tracking - Setup interrupt tracking structures * @sc: device softc structure * * Sets up the resource manager for keeping track of interrupt allocations, * and initializes the tracking maps for the PF's interrupt allocations. * * Unlike the scheme for queues, this is done in one step since both the * manager and the maps both have the same lifetime. * * @returns 0 on success, or an error code on failure. */ int ice_alloc_intr_tracking(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int err; if (hw->func_caps.common_cap.num_msix_vectors > ICE_MAX_MSIX_VECTORS) { device_printf(dev, "%s: Invalid num_msix_vectors value (%u) received from FW.\n", __func__, hw->func_caps.common_cap.num_msix_vectors); return (EINVAL); } /* Initialize the interrupt allocation manager */ err = ice_resmgr_init_contig_only(&sc->dev_imgr, hw->func_caps.common_cap.num_msix_vectors); if (err) { device_printf(dev, "Unable to initialize PF interrupt manager: %s\n", ice_err_str(err)); return (err); } /* Allocate PF interrupt mapping storage */ if (!(sc->pf_imap = (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors, M_ICE, M_NOWAIT))) { device_printf(dev, "Unable to allocate PF imap memory\n"); err = ENOMEM; goto free_imgr; } if (!(sc->rdma_imap = (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors, M_ICE, M_NOWAIT))) { device_printf(dev, "Unable to allocate RDMA imap memory\n"); err = ENOMEM; free(sc->pf_imap, M_ICE); goto free_imgr; } for (u32 i = 0; i < hw->func_caps.common_cap.num_msix_vectors; i++) { sc->pf_imap[i] = ICE_INVALID_RES_IDX; sc->rdma_imap[i] = ICE_INVALID_RES_IDX; } return (0); free_imgr: ice_resmgr_destroy(&sc->dev_imgr); return (err); } /** * ice_free_intr_tracking - Free PF interrupt tracking structures * @sc: device softc structure * * Frees the interrupt resource allocation manager and the PF's owned maps. * * VF maps are released when the owning VF's are destroyed, which should always * happen before this function is called. */ void ice_free_intr_tracking(struct ice_softc *sc) { if (sc->pf_imap) { ice_resmgr_release_map(&sc->dev_imgr, sc->pf_imap, sc->lan_vectors); free(sc->pf_imap, M_ICE); sc->pf_imap = NULL; } if (sc->rdma_imap) { ice_resmgr_release_map(&sc->dev_imgr, sc->rdma_imap, sc->lan_vectors); free(sc->rdma_imap, M_ICE); sc->rdma_imap = NULL; } ice_resmgr_destroy(&sc->dev_imgr); ice_resmgr_destroy(&sc->os_imgr); } /** * ice_apply_supported_speed_filter - Mask off unsupported speeds * @report_speeds: bit-field for the desired link speeds * @mod_type: type of module/sgmii connection we have * * Given a bitmap of the desired lenient mode link speeds, * this function will mask off the speeds that are not currently * supported by the device. */ static u16 ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type) { u16 speed_mask; enum { IS_SGMII, IS_SFP, IS_QSFP } module; /* * The SFF specification says 0 is unknown, so we'll * treat it like we're connected through SGMII for now. * This may need revisiting if a new type is supported * in the future. */ switch (mod_type) { case 0: module = IS_SGMII; break; case 3: module = IS_SFP; break; default: module = IS_QSFP; break; } /* We won't offer anything lower than 100M for any part, * but we'll need to mask off other speeds based on the * device and module type. */ speed_mask = ~((u16)ICE_AQ_LINK_SPEED_100MB - 1); if ((report_speeds & ICE_AQ_LINK_SPEED_10GB) && (module == IS_SFP)) speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1); if (report_speeds & ICE_AQ_LINK_SPEED_25GB) speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1); if (report_speeds & ICE_AQ_LINK_SPEED_50GB) { speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1); if (module == IS_QSFP) speed_mask = ~((u16)ICE_AQ_LINK_SPEED_10GB - 1); } if ((report_speeds & ICE_AQ_LINK_SPEED_100GB) || (report_speeds & ICE_AQ_LINK_SPEED_200GB)) speed_mask = ~((u16)ICE_AQ_LINK_SPEED_25GB - 1); return (report_speeds & speed_mask); } /** * ice_init_health_events - Enable FW health event reporting * @sc: device softc * * Will try to enable firmware health event reporting, but shouldn't * cause any grief (to the caller) if this fails. */ void ice_init_health_events(struct ice_softc *sc) { int status; u8 health_mask; if ((!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HEALTH_STATUS)) || (!sc->enable_health_events)) return; health_mask = ICE_AQC_HEALTH_STATUS_SET_PF_SPECIFIC_MASK | ICE_AQC_HEALTH_STATUS_SET_GLOBAL_MASK; status = ice_aq_set_health_status_config(&sc->hw, health_mask, NULL); if (status) device_printf(sc->dev, "Failed to enable firmware health events, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(sc->hw.adminq.sq_last_status)); else ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_en); } /** * ice_print_health_status_string - Print message for given FW health event * @dev: the PCIe device * @elem: health status element containing status code * * A rather large list of possible health status codes and their associated * messages. */ static void ice_print_health_status_string(device_t dev, struct ice_aqc_health_status_elem *elem) { u16 status_code = le16toh(elem->health_status_code); switch (status_code) { case ICE_AQC_HEALTH_STATUS_INFO_RECOVERY: device_printf(dev, "The device is in firmware recovery mode.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_FLASH_ACCESS: device_printf(dev, "The flash chip cannot be accessed.\n"); device_printf(dev, "Possible Solution: If issue persists, call customer support.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_NVM_AUTH: device_printf(dev, "NVM authentication failed.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_OROM_AUTH: device_printf(dev, "Option ROM authentication failed.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_DDP_AUTH: device_printf(dev, "DDP package failed.\n"); device_printf(dev, "Possible Solution: Update to latest base driver and DDP package.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_NVM_COMPAT: device_printf(dev, "NVM image is incompatible.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_OROM_COMPAT: device_printf(dev, "Option ROM is incompatible.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_DCB_MIB: device_printf(dev, "Supplied MIB file is invalid. DCB reverted to default configuration.\n"); device_printf(dev, "Possible Solution: Disable FW-LLDP and check DCBx system configuration.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT: device_printf(dev, "An unsupported module was detected.\n"); device_printf(dev, "Possible Solution 1: Check your cable connection.\n"); device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_MOD_TYPE: device_printf(dev, "Module type is not supported.\n"); device_printf(dev, "Possible Solution: Change or replace the module or cable.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_MOD_QUAL: device_printf(dev, "Module is not qualified.\n"); device_printf(dev, "Possible Solution 1: Check your cable connection.\n"); device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n"); device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_MOD_COMM: device_printf(dev, "Device cannot communicate with the module.\n"); device_printf(dev, "Possible Solution 1: Check your cable connection.\n"); device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n"); device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_MOD_CONFLICT: device_printf(dev, "Unresolved module conflict.\n"); device_printf(dev, "Possible Solution 1: Manually set speed/duplex or use Intel(R) Ethernet Port Configuration Tool to change the port option.\n"); device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_MOD_NOT_PRESENT: device_printf(dev, "Module is not present.\n"); device_printf(dev, "Possible Solution 1: Check that the module is inserted correctly.\n"); device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n"); break; case ICE_AQC_HEALTH_STATUS_INFO_MOD_UNDERUTILIZED: device_printf(dev, "Underutilized module.\n"); device_printf(dev, "Possible Solution 1: Change or replace the module or cable.\n"); device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_LENIENT: device_printf(dev, "An unsupported module was detected.\n"); device_printf(dev, "Possible Solution 1: Check your cable connection.\n"); device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n"); device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_INVALID_LINK_CFG: device_printf(dev, "Invalid link configuration.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_PORT_ACCESS: device_printf(dev, "Port hardware access error.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_PORT_UNREACHABLE: device_printf(dev, "A port is unreachable.\n"); device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n"); device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_MOD_LIMITED: device_printf(dev, "Port speed is limited due to module.\n"); device_printf(dev, "Possible Solution: Change the module or use Intel(R) Ethernet Port Configuration Tool to configure the port option to match the current module speed.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_PARALLEL_FAULT: device_printf(dev, "All configured link modes were attempted but failed to establish link.\n"); device_printf(dev, "The device will restart the process to establish link.\n"); device_printf(dev, "Possible Solution: Check link partner connection and configuration.\n"); break; case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_PHY_LIMITED: device_printf(dev, "Port speed is limited by PHY capabilities.\n"); device_printf(dev, "Possible Solution 1: Change the module to align to port option.\n"); device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_NETLIST_TOPO: device_printf(dev, "LOM topology netlist is corrupted.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_NETLIST: device_printf(dev, "Unrecoverable netlist error.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_TOPO_CONFLICT: device_printf(dev, "Port topology conflict.\n"); device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n"); device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_LINK_HW_ACCESS: device_printf(dev, "Unrecoverable hardware access error.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_LINK_RUNTIME: device_printf(dev, "Unrecoverable runtime error.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; case ICE_AQC_HEALTH_STATUS_ERR_DNL_INIT: device_printf(dev, "Link management engine failed to initialize.\n"); device_printf(dev, "Possible Solution: Update to the latest NVM image.\n"); break; default: break; } } /** * ice_handle_health_status_event - helper function to output health status * @sc: device softc structure * @event: event received on a control queue * * Prints out the appropriate string based on the given Health Status Event * code. */ static void ice_handle_health_status_event(struct ice_softc *sc, struct ice_rq_event_info *event) { struct ice_aqc_health_status_elem *health_info; u16 status_count; int i; if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_HEALTH_STATUS)) return; health_info = (struct ice_aqc_health_status_elem *)event->msg_buf; status_count = le16toh(event->desc.params.get_health_status.health_status_count); if (status_count > (event->buf_len / sizeof(*health_info))) { device_printf(sc->dev, "Received a health status event with invalid event count\n"); return; } for (i = 0; i < status_count; i++) { ice_print_health_status_string(sc->dev, health_info); health_info++; } } /** * ice_set_default_local_lldp_mib - Possibly apply local LLDP MIB to FW * @sc: device softc structure * * This function needs to be called after link up; it makes sure the FW has * certain PFC/DCB settings. In certain configurations this will re-apply a * default local LLDP MIB configuration; this is intended to workaround a FW * behavior where these settings seem to be cleared on link up. */ void ice_set_default_local_lldp_mib(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; struct ice_port_info *pi; device_t dev = sc->dev; int status; /* Set Local MIB can disrupt flow control settings for * non-DCB-supported devices. */ if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_DCB)) return; pi = hw->port_info; /* Don't overwrite a custom SW configuration */ if (!pi->qos_cfg.is_sw_lldp && !ice_test_state(&sc->state, ICE_STATE_MULTIPLE_TCS)) ice_set_default_local_mib_settings(sc); status = ice_set_dcb_cfg(pi); if (status) device_printf(dev, "Error setting Local LLDP MIB: %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } /** * ice_sbuf_print_ets_cfg - Helper function to print ETS cfg * @sbuf: string buffer to print to * @name: prefix string to use * @ets: structure to pull values from * * A helper function for ice_sysctl_dump_dcbx_cfg(), this * formats the ETS rec and cfg TLVs into text. */ static void ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name, struct ice_dcb_ets_cfg *ets) { sbuf_printf(sbuf, "%s.willing: %u\n", name, ets->willing); sbuf_printf(sbuf, "%s.cbs: %u\n", name, ets->cbs); sbuf_printf(sbuf, "%s.maxtcs: %u\n", name, ets->maxtcs); sbuf_printf(sbuf, "%s.prio_table:", name); for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) sbuf_printf(sbuf, " %d", ets->prio_table[i]); sbuf_printf(sbuf, "\n"); sbuf_printf(sbuf, "%s.tcbwtable:", name); for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) sbuf_printf(sbuf, " %d", ets->tcbwtable[i]); sbuf_printf(sbuf, "\n"); sbuf_printf(sbuf, "%s.tsatable:", name); for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) sbuf_printf(sbuf, " %d", ets->tsatable[i]); sbuf_printf(sbuf, "\n"); } /** * ice_sysctl_dump_dcbx_cfg - Print out DCBX/DCB config info * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: AQ define for either Local or Remote MIB * @req: sysctl request pointer * * Prints out DCB/DCBX configuration, including the contents * of either the local or remote MIB, depending on the value * used in arg2. */ static int ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_aqc_get_cee_dcb_cfg_resp cee_cfg = {}; struct ice_dcbx_cfg dcb_buf = {}; struct ice_dcbx_cfg *dcbcfg; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; struct sbuf *sbuf; int status; u8 maxtcs, dcbx_status, is_sw_lldp; UNREFERENCED_PARAMETER(oidp); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); is_sw_lldp = hw->port_info->qos_cfg.is_sw_lldp; /* The driver doesn't receive a Remote MIB via SW */ if (is_sw_lldp && arg2 == ICE_AQ_LLDP_MIB_REMOTE) return (ENOENT); dcbcfg = &hw->port_info->qos_cfg.local_dcbx_cfg; if (!is_sw_lldp) { /* Collect information from the FW in FW LLDP mode */ dcbcfg = &dcb_buf; status = ice_aq_get_dcb_cfg(hw, (u8)arg2, ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbcfg); if (status && arg2 == ICE_AQ_LLDP_MIB_REMOTE && hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) { device_printf(dev, "Unable to query Remote MIB; port has not received one yet\n"); return (ENOENT); } if (status) { device_printf(dev, "Unable to query LLDP MIB, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } status = ice_aq_get_cee_dcb_cfg(hw, &cee_cfg, NULL); if (!status) dcbcfg->dcbx_mode = ICE_DCBX_MODE_CEE; else if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) dcbcfg->dcbx_mode = ICE_DCBX_MODE_IEEE; else device_printf(dev, "Get CEE DCB Cfg AQ cmd err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); maxtcs = hw->func_caps.common_cap.maxtc; dcbx_status = ice_get_dcbx_status(hw); sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); /* Do the actual printing */ sbuf_printf(sbuf, "\n"); sbuf_printf(sbuf, "SW LLDP mode: %d\n", is_sw_lldp); sbuf_printf(sbuf, "Function caps maxtcs: %d\n", maxtcs); sbuf_printf(sbuf, "dcbx_status: %d\n", dcbx_status); sbuf_printf(sbuf, "numapps: %u\n", dcbcfg->numapps); sbuf_printf(sbuf, "CEE TLV status: %u\n", dcbcfg->tlv_status); sbuf_printf(sbuf, "pfc_mode: %s\n", (dcbcfg->pfc_mode == ICE_QOS_MODE_DSCP) ? "DSCP" : "VLAN"); sbuf_printf(sbuf, "dcbx_mode: %s\n", (dcbcfg->dcbx_mode == ICE_DCBX_MODE_IEEE) ? "IEEE" : (dcbcfg->dcbx_mode == ICE_DCBX_MODE_CEE) ? "CEE" : "Unknown"); ice_sbuf_print_ets_cfg(sbuf, "etscfg", &dcbcfg->etscfg); ice_sbuf_print_ets_cfg(sbuf, "etsrec", &dcbcfg->etsrec); sbuf_printf(sbuf, "pfc.willing: %u\n", dcbcfg->pfc.willing); sbuf_printf(sbuf, "pfc.mbc: %u\n", dcbcfg->pfc.mbc); sbuf_printf(sbuf, "pfc.pfccap: 0x%0x\n", dcbcfg->pfc.pfccap); sbuf_printf(sbuf, "pfc.pfcena: 0x%0x\n", dcbcfg->pfc.pfcena); if (arg2 == ICE_AQ_LLDP_MIB_LOCAL) { sbuf_printf(sbuf, "dscp_map:\n"); for (int i = 0; i < 8; i++) { for (int j = 0; j < 8; j++) sbuf_printf(sbuf, " %d", dcbcfg->dscp_map[i * 8 + j]); sbuf_printf(sbuf, "\n"); } sbuf_printf(sbuf, "\nLocal registers:\n"); sbuf_printf(sbuf, "PRTDCB_GENC.NUMTC: %d\n", (rd32(hw, PRTDCB_GENC) & PRTDCB_GENC_NUMTC_M) >> PRTDCB_GENC_NUMTC_S); sbuf_printf(sbuf, "PRTDCB_TUP2TC: 0x%0x\n", (rd32(hw, PRTDCB_TUP2TC))); sbuf_printf(sbuf, "PRTDCB_RUP2TC: 0x%0x\n", (rd32(hw, PRTDCB_RUP2TC))); sbuf_printf(sbuf, "GLDCB_TC2PFC: 0x%0x\n", (rd32(hw, GLDCB_TC2PFC))); } /* Finish */ sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_dump_vsi_cfg - print PF LAN VSI configuration * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * XXX: This could be extended to apply to arbitrary PF-owned VSIs, * but for simplicity, this only works on the PF's LAN VSI. */ static int ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_vsi_ctx ctx = { 0 }; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; struct sbuf *sbuf; int status; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* Get HW absolute index of a VSI */ ctx.vsi_num = ice_get_hw_vsi_num(hw, sc->pf_vsi.idx); status = ice_aq_get_vsi_params(hw, &ctx, NULL); if (status) { device_printf(dev, "Get VSI AQ call failed, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); /* Do the actual printing */ sbuf_printf(sbuf, "\n"); sbuf_printf(sbuf, "VSI NUM: %d\n", ctx.vsi_num); sbuf_printf(sbuf, "VF NUM: %d\n", ctx.vf_num); sbuf_printf(sbuf, "VSIs allocated: %d\n", ctx.vsis_allocd); sbuf_printf(sbuf, "VSIs unallocated: %d\n", ctx.vsis_unallocated); sbuf_printf(sbuf, "Rx Queue Map method: %d\n", LE16_TO_CPU(ctx.info.mapping_flags)); /* The PF VSI is always contiguous, so there's no if-statement here */ sbuf_printf(sbuf, "Rx Queue base: %d\n", LE16_TO_CPU(ctx.info.q_mapping[0])); sbuf_printf(sbuf, "Rx Queue count: %d\n", LE16_TO_CPU(ctx.info.q_mapping[1])); sbuf_printf(sbuf, "TC qbases :"); for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { sbuf_printf(sbuf, " %4d", ctx.info.tc_mapping[i] & ICE_AQ_VSI_TC_Q_OFFSET_M); } sbuf_printf(sbuf, "\n"); sbuf_printf(sbuf, "TC qcounts :"); for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { sbuf_printf(sbuf, " %4d", 1 << (ctx.info.tc_mapping[i] >> ICE_AQ_VSI_TC_Q_NUM_S)); } /* Finish */ sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_get_tx_rx_equalizations -- read serdes tx rx equalization params * @hw: pointer to the HW struct * @serdes_num: represents the serdes number * @ptr: structure to read all serdes parameter for given serdes * * returns all serdes equalization parameter supported per serdes number */ static int ice_get_tx_rx_equalizations(struct ice_hw *hw, u8 serdes_num, struct ice_serdes_equalization *ptr) { int err = 0; if (!ptr) return (EOPNOTSUPP); #define ICE_GET_PHY_EQUALIZATION(equ, dir, value) \ ice_aq_get_phy_equalization(hw, equ, dir, serdes_num, &(ptr->value)) err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_RX_EQU_PRE1, ICE_AQC_OP_CODE_RX_EQU, rx_equalization_pre1); if (err) return err; err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_RX_EQU_PRE2, ICE_AQC_OP_CODE_RX_EQU, rx_equalization_pre2); if (err) return err; err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_RX_EQU_POST1, ICE_AQC_OP_CODE_RX_EQU, rx_equalization_post1); if (err) return err; err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_RX_EQU_BFLF, ICE_AQC_OP_CODE_RX_EQU, rx_equalization_bflf); if (err) return err; err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_RX_EQU_BFHF, ICE_AQC_OP_CODE_RX_EQU, rx_equalization_bfhf); if (err) return err; err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_RX_EQU_DRATE, ICE_AQC_OP_CODE_RX_EQU, rx_equalization_drate); if (err) return err; err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_TX_EQU_PRE1, ICE_AQC_OP_CODE_TX_EQU, tx_equalization_pre1); if (err) return err; err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_TX_EQU_PRE2, ICE_AQC_OP_CODE_TX_EQU, tx_equalization_pre2); if (err) return err; err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_TX_EQU_PRE3, ICE_AQC_OP_CODE_TX_EQU, tx_equalization_pre3); if (err) return err; err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_TX_EQU_ATTEN, ICE_AQC_OP_CODE_TX_EQU, tx_equalization_atten); if (err) return err; err = ICE_GET_PHY_EQUALIZATION(ICE_AQC_TX_EQU_POST1, ICE_AQC_OP_CODE_TX_EQU, tx_equalization_post1); if (err) return err; return (0); } /** * ice_fec_counter_read - reads FEC stats from PHY * @hw: pointer to the HW struct * @receiver_id: pcsquad at registerlevel * @reg_offset: register for the current request * @output: pointer to the caller-supplied buffer to return requested fec stats * * Returns fec stats from phy */ static int ice_fec_counter_read(struct ice_hw *hw, u32 receiver_id, u32 reg_offset, u16 *output) { u16 flag = (ICE_AQ_FLAG_RD | ICE_AQ_FLAG_BUF | ICE_AQ_FLAG_SI); struct ice_sbq_msg_input msg = {}; int err = 0; memset(&msg, 0, sizeof(msg)); msg.msg_addr_low = ICE_LO_WORD(reg_offset); msg.msg_addr_high = ICE_LO_DWORD(receiver_id); msg.opcode = ice_sbq_msg_rd; msg.dest_dev = rmn_0; err = ice_sbq_rw_reg(hw, &msg, flag); if (err) { return err; } *output = ICE_LO_WORD(msg.data); return (0); } /** * ice_get_port_fec_stats - returns fec correctable, uncorrectable stats per pcsquad, pcsport * @hw: pointer to the HW struct * @pcs_quad: pcsquad for input port * @pcs_port: pcsport for input port * @fec_stats: buffer to hold fec statistics for given port * * Returns fec stats */ static int ice_get_port_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port, struct ice_fec_stats_to_sysctl *fec_stats) { u32 uncorr_low_reg = 0, uncorr_high_reg = 0; u16 uncorr_low_val = 0, uncorr_high_val = 0; u32 corr_low_reg = 0, corr_high_reg = 0; u16 corr_low_val = 0, corr_high_val = 0; u32 receiver_id = 0; int err; switch (pcs_port) { case 0: corr_low_reg = ICE_RS_FEC_CORR_LOW_REG_PORT0; corr_high_reg = ICE_RS_FEC_CORR_HIGH_REG_PORT0; uncorr_low_reg = ICE_RS_FEC_UNCORR_LOW_REG_PORT0; uncorr_high_reg = ICE_RS_FEC_UNCORR_HIGH_REG_PORT0; break; case 1: corr_low_reg = ICE_RS_FEC_CORR_LOW_REG_PORT1; corr_high_reg = ICE_RS_FEC_CORR_HIGH_REG_PORT1; uncorr_low_reg = ICE_RS_FEC_UNCORR_LOW_REG_PORT1; uncorr_high_reg = ICE_RS_FEC_UNCORR_HIGH_REG_PORT1; break; case 2: corr_low_reg = ICE_RS_FEC_CORR_LOW_REG_PORT2; corr_high_reg = ICE_RS_FEC_CORR_HIGH_REG_PORT2; uncorr_low_reg = ICE_RS_FEC_UNCORR_LOW_REG_PORT2; uncorr_high_reg = ICE_RS_FEC_UNCORR_HIGH_REG_PORT2; break; case 3: corr_low_reg = ICE_RS_FEC_CORR_LOW_REG_PORT3; corr_high_reg = ICE_RS_FEC_CORR_HIGH_REG_PORT3; uncorr_low_reg = ICE_RS_FEC_UNCORR_LOW_REG_PORT3; uncorr_high_reg = ICE_RS_FEC_UNCORR_HIGH_REG_PORT3; break; default: return (EINVAL); } if (pcs_quad == 0) receiver_id = ICE_RS_FEC_RECEIVER_ID_PCS0; /* MTIP PCS Quad 0 -FEC */ else if (pcs_quad == 1) receiver_id = ICE_RS_FEC_RECEIVER_ID_PCS1; /* MTIP PCS Quad 1 -FEC */ else return (EINVAL); err = ice_fec_counter_read(hw, receiver_id, corr_low_reg, &corr_low_val); if (err) return err; err = ice_fec_counter_read(hw, receiver_id, corr_high_reg, &corr_high_val); if (err) return err; err = ice_fec_counter_read(hw, receiver_id, uncorr_low_reg, &uncorr_low_val); if (err) return err; err = ice_fec_counter_read(hw, receiver_id, uncorr_high_reg, &uncorr_high_val); if (err) return err; fec_stats->fec_corr_cnt_low = corr_low_val; fec_stats->fec_corr_cnt_high = corr_high_val; fec_stats->fec_uncorr_cnt_low = uncorr_low_val; fec_stats->fec_uncorr_cnt_high = uncorr_high_val; return (0); } /** * ice_is_serdes_muxed - returns whether serdes is muxed in hardware * @hw: pointer to the HW struct * * Returns True : when serdes is muxed * False: when serdes is not muxed */ static bool ice_is_serdes_muxed(struct ice_hw *hw) { return (rd32(hw, 0xB81E0) & 0x4); } /** * ice_get_maxspeed - Get the max speed for given lport * @hw: pointer to the HW struct * @lport: logical port for which max speed is requested * @max_speed: return max speed for input lport */ static int ice_get_maxspeed(struct ice_hw *hw, u8 lport, u8 *max_speed) { struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX] = {}; u8 option_count = ICE_AQC_PORT_OPT_MAX; bool active_valid, pending_valid; u8 active_idx, pending_idx; int status; status = ice_aq_get_port_options(hw, options, &option_count, lport, true, &active_idx, &active_valid, &pending_idx, &pending_valid); if (status || active_idx >= ICE_AQC_PORT_OPT_MAX) { ice_debug(hw, ICE_DBG_PHY, "Port split read err: %d\n", status); return (EIO); } if (active_valid) { ice_debug(hw, ICE_DBG_PHY, "Active idx: %d\n", active_idx); } else { ice_debug(hw, ICE_DBG_PHY, "No valid Active option\n"); return (EINVAL); } *max_speed = options[active_idx].max_lane_speed; return (0); } /** * ice_update_port_topology - update port topology * @lport: logical port for which physical info requested * @port_topology: buffer to hold port topology * @is_muxed: serdes is muxed in hardware */ static int ice_update_port_topology(u8 lport, struct ice_port_topology *port_topology, bool is_muxed) { switch (lport) { case 0: port_topology->pcs_quad_select = 0; port_topology->pcs_port = 0; port_topology->primary_serdes_lane = 0; break; case 1: port_topology->pcs_quad_select = 1; port_topology->pcs_port = 0; if (is_muxed == true) port_topology->primary_serdes_lane = 2; else port_topology->primary_serdes_lane = 4; break; case 2: port_topology->pcs_quad_select = 0; port_topology->pcs_port = 1; port_topology->primary_serdes_lane = 1; break; case 3: port_topology->pcs_quad_select = 1; port_topology->pcs_port = 1; if (is_muxed == true) port_topology->primary_serdes_lane = 3; else port_topology->primary_serdes_lane = 5; break; case 4: port_topology->pcs_quad_select = 0; port_topology->pcs_port = 2; port_topology->primary_serdes_lane = 2; break; case 5: port_topology->pcs_quad_select = 1; port_topology->pcs_port = 2; port_topology->primary_serdes_lane = 6; break; case 6: port_topology->pcs_quad_select = 0; port_topology->pcs_port = 3; port_topology->primary_serdes_lane = 3; break; case 7: port_topology->pcs_quad_select = 1; port_topology->pcs_port = 3; port_topology->primary_serdes_lane = 7; break; default: return (EINVAL); } return 0; } /** * ice_get_port_topology - returns physical topology * @hw: pointer to the HW struct * @lport: logical port for which physical info requested * @port_topology: buffer to hold port topology * * Returns the physical component associated with the Port like pcsquad, pcsport, serdesnumber */ static int ice_get_port_topology(struct ice_hw *hw, u8 lport, struct ice_port_topology *port_topology) { struct ice_aqc_get_link_topo cmd; bool is_muxed = false; u8 cage_type = 0; u16 node_handle; u8 ctx = 0; int err; if (!hw || !port_topology) return (EINVAL); if (hw->device_id >= ICE_DEV_ID_E810_XXV_BACKPLANE) { port_topology->serdes_lane_count = 1; if (lport == 0) { port_topology->pcs_quad_select = 0; port_topology->pcs_port = 0; port_topology->primary_serdes_lane = 0; } else if (lport == 1) { port_topology->pcs_quad_select = 1; port_topology->pcs_port = 0; port_topology->primary_serdes_lane = 1; } else { return (EINVAL); } return (0); } memset(&cmd, 0, sizeof(cmd)); ctx = ICE_AQC_LINK_TOPO_NODE_TYPE_CAGE << ICE_AQC_LINK_TOPO_NODE_TYPE_S; ctx |= ICE_AQC_LINK_TOPO_NODE_CTX_PORT << ICE_AQC_LINK_TOPO_NODE_CTX_S; cmd.addr.topo_params.node_type_ctx = ctx; cmd.addr.topo_params.index = 0; cmd.addr.topo_params.lport_num = 0; cmd.addr.topo_params.lport_num_valid = 0; err = ice_aq_get_netlist_node(hw, &cmd, &cage_type, &node_handle); if (err) return (EINVAL); is_muxed = ice_is_serdes_muxed(hw); err = ice_update_port_topology(lport, port_topology, is_muxed); if (err) return err; if (cage_type == 0x11 || /* SFP */ cage_type == 0x12) { /* SFP28 */ port_topology->serdes_lane_count = 1; } else if (cage_type == 0x13 || /* QSFP */ cage_type == 0x14) { /* QSFP28 */ u8 max_speed = 0; err = ice_get_maxspeed(hw, port_topology->primary_serdes_lane, &max_speed); if (err) return err; if (max_speed == ICE_AQC_PORT_OPT_MAX_LANE_M) device_printf(ice_hw_to_dev(hw), "%s: WARNING: reported max_lane_speed is N/A\n", __func__); if (max_speed == ICE_AQC_PORT_OPT_MAX_LANE_100G) port_topology->serdes_lane_count = 4; else if (max_speed == ICE_AQC_PORT_OPT_MAX_LANE_50G) port_topology->serdes_lane_count = 2; else port_topology->serdes_lane_count = 1; } else return (EINVAL); ice_debug(hw, ICE_DBG_PHY, "%s: Port Topology (lport %d):\n", __func__, lport); ice_debug(hw, ICE_DBG_PHY, "serdes lane count %d\n", port_topology->serdes_lane_count); ice_debug(hw, ICE_DBG_PHY, "pcs quad select %d\n", port_topology->pcs_quad_select); ice_debug(hw, ICE_DBG_PHY, "pcs port %d\n", port_topology->pcs_port); ice_debug(hw, ICE_DBG_PHY, "primary serdes lane %d\n", port_topology->primary_serdes_lane); return (0); } /** * ice_sysctl_dump_phy_stats - print PHY stats * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer */ static int ice_sysctl_dump_phy_stats(SYSCTL_HANDLER_ARGS) { struct ice_regdump_to_sysctl ice_prv_regs_buf = {}; struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_port_topology port_topology; struct ice_hw *hw = &sc->hw; struct ice_port_info *pi; device_t dev = sc->dev; u8 serdes_num = 0; unsigned int i; int err = 0; struct sbuf *sbuf; pi = hw->port_info; if (!pi) { device_printf(dev, "Port info structure is null\n"); return (EINVAL); } UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); UNREFERENCED_PARAMETER(req); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); if (ice_get_port_topology(hw, pi->lport, &port_topology) != 0) { device_printf(dev, "Extended register dump failed for Lport %d\n", pi->lport); return (EIO); } if (port_topology.serdes_lane_count > ICE_MAX_SERDES_LANE_COUNT) { device_printf(dev, "Extended register dump failed: Lport %d Serdes count %d\n", pi->lport, port_topology.serdes_lane_count); return (EINVAL); } sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); /* Get serdes equalization parameter for available serdes */ for (i = 0; i < port_topology.serdes_lane_count; i++) { serdes_num = port_topology.primary_serdes_lane + i; err = ice_get_tx_rx_equalizations(hw, serdes_num, &(ice_prv_regs_buf.equalization[i])); if (err) { device_printf(dev, "Serdes equalization get failed Lport %d Serdes %d Err %d\n", pi->lport,serdes_num, err); sbuf_finish(sbuf); sbuf_delete(sbuf); return (EIO); } sbuf_printf(sbuf, "\nSerdes lane: %d\n", i); sbuf_printf(sbuf, "RX PRE1 = %d\n", ice_prv_regs_buf.equalization[i].rx_equalization_pre1); sbuf_printf(sbuf, "RX PRE2 = %d\n", (s16)ice_prv_regs_buf.equalization[i].rx_equalization_pre2); sbuf_printf(sbuf, "RX POST1 = %d\n", ice_prv_regs_buf.equalization[i].rx_equalization_post1); sbuf_printf(sbuf, "RX BFLF = %d\n", ice_prv_regs_buf.equalization[i].rx_equalization_bflf); sbuf_printf(sbuf, "RX BFHF = %d\n", ice_prv_regs_buf.equalization[i].rx_equalization_bfhf); sbuf_printf(sbuf, "RX DRATE = %d\n", (s16)ice_prv_regs_buf.equalization[i].rx_equalization_drate); sbuf_printf(sbuf, "TX PRE1 = %d\n", ice_prv_regs_buf.equalization[i].tx_equalization_pre1); sbuf_printf(sbuf, "TX PRE2 = %d\n", ice_prv_regs_buf.equalization[i].tx_equalization_pre2); sbuf_printf(sbuf, "TX PRE3 = %d\n", ice_prv_regs_buf.equalization[i].tx_equalization_pre3); sbuf_printf(sbuf, "TX POST1 = %d\n", ice_prv_regs_buf.equalization[i].tx_equalization_post1); sbuf_printf(sbuf, "TX ATTEN = %d\n", ice_prv_regs_buf.equalization[i].tx_equalization_atten); } /* Get fec correctable , uncorrectable counter */ err = ice_get_port_fec_stats(hw, port_topology.pcs_quad_select, port_topology.pcs_port, &(ice_prv_regs_buf.stats)); if (err) { device_printf(dev, "failed to get FEC stats Lport %d Err %d\n", pi->lport, err); sbuf_finish(sbuf); sbuf_delete(sbuf); return (EIO); } sbuf_printf(sbuf, "\nRS FEC Corrected codeword count = %d\n", ((u32)ice_prv_regs_buf.stats.fec_corr_cnt_high << 16) | ice_prv_regs_buf.stats.fec_corr_cnt_low); sbuf_printf(sbuf, "RS FEC Uncorrected codeword count = %d\n", ((u32)ice_prv_regs_buf.stats.fec_uncorr_cnt_high << 16) | ice_prv_regs_buf.stats.fec_uncorr_cnt_low); /* Finish */ sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_ets_str_to_tbl - Parse string into ETS table * @str: input string to parse * @table: output eight values used for ETS values * @limit: max valid value to accept for ETS values * * Parses a string and converts the eight values within * into a table that can be used in setting ETS settings * in a MIB. * * @return 0 on success, EINVAL if a parsed value is * not between 0 and limit. */ static int ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit) { const char *str_start = str; char *str_end; long token; for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { token = strtol(str_start, &str_end, 0); if (token < 0 || token > limit) return (EINVAL); table[i] = (u8)token; str_start = (str_end + 1); } return (0); } /** * ice_check_ets_bw - Check if ETS bw vals are valid * @table: eight values used for ETS bandwidth * * @return true if the sum of all 8 values in table * equals 100. */ static bool ice_check_ets_bw(u8 *table) { int sum = 0; for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) sum += (int)table[i]; return (sum == 100); } /** * ice_cfg_pba_num - Determine if PBA Number is retrievable * @sc: the device private softc structure * * Sets the feature flag for the existence of a PBA number * based on the success of the read command. This does not * cache the result. */ void ice_cfg_pba_num(struct ice_softc *sc) { u8 pba_string[32] = ""; if ((ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HAS_PBA)) && (ice_read_pba_string(&sc->hw, pba_string, sizeof(pba_string)) == 0)) ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_en); } /** * ice_sysctl_query_port_ets - print Port ETS Config from AQ * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer */ static int ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_aqc_port_ets_elem port_ets = { 0 }; struct ice_hw *hw = &sc->hw; struct ice_port_info *pi; device_t dev = sc->dev; struct sbuf *sbuf; int status; int i = 0; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); pi = hw->port_info; status = ice_aq_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL); if (status) { device_printf(dev, "Query Port ETS AQ call failed, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); /* Do the actual printing */ sbuf_printf(sbuf, "\n"); sbuf_printf(sbuf, "Valid TC map: 0x%x\n", port_ets.tc_valid_bits); sbuf_printf(sbuf, "TC BW %%:"); ice_for_each_traffic_class(i) { sbuf_printf(sbuf, " %3d", port_ets.tc_bw_share[i]); } sbuf_printf(sbuf, "\n"); sbuf_printf(sbuf, "EIR profile ID: %d\n", port_ets.port_eir_prof_id); sbuf_printf(sbuf, "CIR profile ID: %d\n", port_ets.port_cir_prof_id); sbuf_printf(sbuf, "TC Node prio: 0x%x\n", port_ets.tc_node_prio); sbuf_printf(sbuf, "TC Node TEIDs:\n"); ice_for_each_traffic_class(i) { sbuf_printf(sbuf, "%d: %d\n", i, port_ets.tc_node_teid[i]); } /* Finish */ sbuf_finish(sbuf); sbuf_delete(sbuf); return (0); } /** * ice_sysctl_dscp2tc_map - Map DSCP to hardware TCs * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: which eight DSCP to UP mappings to configure (0 - 7) * @req: sysctl request pointer * * Gets or sets the current DSCP to UP table cached by the driver. Since there * are 64 possible DSCP values to configure, this sysctl only configures * chunks of 8 in that space at a time. * * This sysctl is only relevant in DSCP mode, and will only function in SW DCB * mode. */ static int ice_sysctl_dscp2tc_map(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_dcbx_cfg *local_dcbx_cfg; struct ice_port_info *pi; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; struct sbuf *sbuf; int ret; /* Store input rates from user */ char dscp_user_buf[128] = ""; u8 new_dscp_table_seg[ICE_MAX_TRAFFIC_CLASS] = {}; if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); if (req->oldptr == NULL && req->newptr == NULL) { ret = SYSCTL_OUT(req, 0, 128); return (ret); } pi = hw->port_info; local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; sbuf = sbuf_new(NULL, dscp_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL); /* Format DSCP-to-UP data for output */ for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { sbuf_printf(sbuf, "%d", local_dcbx_cfg->dscp_map[arg2 * 8 + i]); if (i != ICE_MAX_TRAFFIC_CLASS - 1) sbuf_printf(sbuf, ","); } sbuf_finish(sbuf); sbuf_delete(sbuf); /* Read in the new DSCP mapping values */ ret = sysctl_handle_string(oidp, dscp_user_buf, sizeof(dscp_user_buf), req); if ((ret) || (req->newptr == NULL)) return (ret); /* Don't allow setting changes in FW DCB mode */ if (!hw->port_info->qos_cfg.is_sw_lldp) { device_printf(dev, "%s: DSCP mapping is not allowed in FW DCBX mode\n", __func__); return (EINVAL); } /* Convert 8 values in a string to a table; this is similar to what * needs to be done for ETS settings, so this function can be re-used * for that purpose. */ ret = ice_ets_str_to_tbl(dscp_user_buf, new_dscp_table_seg, ICE_MAX_TRAFFIC_CLASS - 1); if (ret) { device_printf(dev, "%s: Could not parse input DSCP2TC table: %s\n", __func__, dscp_user_buf); return (ret); } memcpy(&local_dcbx_cfg->dscp_map[arg2 * 8], new_dscp_table_seg, sizeof(new_dscp_table_seg)); local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING; status = ice_set_dcb_cfg(pi); if (status) { device_printf(dev, "%s: Failed to set DCB config; status %s, aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } ice_do_dcb_reconfig(sc, false); return (0); } /** * ice_handle_debug_dump_ioctl - Handle a debug dump ioctl request * @sc: the device private softc * @ifd: ifdrv ioctl request pointer */ int ice_handle_debug_dump_ioctl(struct ice_softc *sc, struct ifdrv *ifd) { size_t ifd_len = ifd->ifd_len; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; struct ice_debug_dump_cmd *ddc; int status; int err = 0; /* Returned arguments from the Admin Queue */ u16 ret_buf_size = 0; u16 ret_next_cluster = 0; u16 ret_next_table = 0; u32 ret_next_index = 0; /* * ifioctl forwards SIOCxDRVSPEC to iflib without performing * a privilege check. In turn, iflib forwards the ioctl to the driver * without performing a privilege check. Perform one here to ensure * that non-privileged threads cannot access this interface. */ err = priv_check(curthread, PRIV_DRIVER); if (err) return (err); if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) { device_printf(dev, "%s: Driver must rebuild data structures after a reset. Operation aborted.\n", __func__); return (EBUSY); } if (ifd_len < sizeof(*ddc)) { device_printf(dev, "%s: ifdrv length is too small. Got %zu, but expected %zu\n", __func__, ifd_len, sizeof(*ddc)); return (EINVAL); } if (ifd->ifd_data == NULL) { device_printf(dev, "%s: ifd data buffer not present.\n", __func__); return (EINVAL); } ddc = (struct ice_debug_dump_cmd *)malloc(ifd_len, M_ICE, M_ZERO | M_NOWAIT); if (!ddc) return (ENOMEM); /* Copy the NVM access command and data in from user space */ /* coverity[tainted_data_argument] */ err = copyin(ifd->ifd_data, ddc, ifd_len); if (err) { device_printf(dev, "%s: Copying request from user space failed, err %s\n", __func__, ice_err_str(err)); goto out; } /* The data_size arg must be at least 1 for the AQ cmd to work */ if (ddc->data_size == 0) { device_printf(dev, "%s: data_size must be greater than 0\n", __func__); err = EINVAL; goto out; } /* ...and it can't be too long */ if (ddc->data_size > (ifd_len - sizeof(*ddc))) { device_printf(dev, "%s: data_size (%d) is larger than ifd_len space (%zu)?\n", __func__, ddc->data_size, ifd_len - sizeof(*ddc)); err = EINVAL; goto out; } /* Make sure any possible data buffer space is zeroed */ memset(ddc->data, 0, ifd_len - sizeof(*ddc)); status = ice_aq_get_internal_data(hw, ddc->cluster_id, ddc->table_id, ddc->offset, (u8 *)ddc->data, ddc->data_size, &ret_buf_size, &ret_next_cluster, &ret_next_table, &ret_next_index, NULL); ice_debug(hw, ICE_DBG_DIAG, "%s: ret_buf_size %d, ret_next_table %d, ret_next_index %d\n", __func__, ret_buf_size, ret_next_table, ret_next_index); if (status) { device_printf(dev, "%s: Get Internal Data AQ command failed, err %s aq_err %s\n", __func__, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); goto aq_error; } ddc->table_id = ret_next_table; ddc->offset = ret_next_index; ddc->data_size = ret_buf_size; ddc->cluster_id = ret_next_cluster; /* Copy the possibly modified contents of the handled request out */ err = copyout(ddc, ifd->ifd_data, ifd->ifd_len); if (err) { device_printf(dev, "%s: Copying response back to user space failed, err %s\n", __func__, ice_err_str(err)); goto out; } aq_error: /* Convert private status to an error code for proper ioctl response */ switch (status) { case 0: err = (0); break; case ICE_ERR_NO_MEMORY: err = (ENOMEM); break; case ICE_ERR_OUT_OF_RANGE: err = (ENOTTY); break; case ICE_ERR_AQ_ERROR: err = (EIO); break; case ICE_ERR_PARAM: default: err = (EINVAL); break; } out: free(ddc, M_ICE); return (err); } /** * ice_sysctl_allow_no_fec_mod_in_auto - Change Auto FEC behavior * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * Allows user to let "No FEC" mode to be used in "Auto" * FEC mode during FEC negotiation. This is only supported * on newer firmware versions. */ static int ice_sysctl_allow_no_fec_mod_in_auto(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; u8 user_flag; int ret; UNREFERENCED_PARAMETER(arg2); ret = priv_check(curthread, PRIV_DRIVER); if (ret) return (ret); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); user_flag = (u8)sc->allow_no_fec_mod_in_auto; ret = sysctl_handle_bool(oidp, &user_flag, 0, req); if ((ret) || (req->newptr == NULL)) return (ret); if (!ice_fw_supports_fec_dis_auto(hw)) { log(LOG_INFO, "%s: Enabling or disabling of auto configuration of modules that don't support FEC is unsupported by the current firmware\n", device_get_nameunit(dev)); return (ENODEV); } if (user_flag == (bool)sc->allow_no_fec_mod_in_auto) return (0); sc->allow_no_fec_mod_in_auto = (u8)user_flag; if (sc->allow_no_fec_mod_in_auto) log(LOG_INFO, "%s: Enabled auto configuration of No FEC modules\n", device_get_nameunit(dev)); else log(LOG_INFO, "%s: Auto configuration of No FEC modules reset to NVM defaults\n", device_get_nameunit(dev)); return (0); } /** * ice_sysctl_temperature - Retrieve NIC temp via AQ command * @oidp: sysctl oid structure * @arg1: pointer to private data structure * @arg2: unused * @req: sysctl request pointer * * If ICE_DBG_DIAG is set in the debug.debug_mask sysctl, then this will print * temperature threshold information in the kernel message log, too. */ static int ice_sysctl_temperature(SYSCTL_HANDLER_ARGS) { struct ice_aqc_get_sensor_reading_resp resp; struct ice_softc *sc = (struct ice_softc *)arg1; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; UNREFERENCED_PARAMETER(oidp); UNREFERENCED_PARAMETER(arg2); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); status = ice_aq_get_sensor_reading(hw, ICE_AQC_INT_TEMP_SENSOR, ICE_AQC_INT_TEMP_FORMAT, &resp, NULL); if (status) { device_printf(dev, "Get Sensor Reading AQ call failed, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } ice_debug(hw, ICE_DBG_DIAG, "%s: Warning Temp Threshold: %d\n", __func__, resp.data.s0f0.temp_warning_threshold); ice_debug(hw, ICE_DBG_DIAG, "%s: Critical Temp Threshold: %d\n", __func__, resp.data.s0f0.temp_critical_threshold); ice_debug(hw, ICE_DBG_DIAG, "%s: Fatal Temp Threshold: %d\n", __func__, resp.data.s0f0.temp_fatal_threshold); return sysctl_handle_8(oidp, &resp.data.s0f0.temp, 0, req); } /** * ice_sysctl_create_mirror_interface - Create a new ifnet that monitors * traffic from the main PF VSI */ static int ice_sysctl_create_mirror_interface(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; device_t dev = sc->dev; int ret; UNREFERENCED_PARAMETER(arg2); ret = priv_check(curthread, PRIV_DRIVER); if (ret) return (ret); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* If the user hasn't written "1" to this sysctl yet: */ if (!ice_test_state(&sc->state, ICE_STATE_DO_CREATE_MIRR_INTFC)) { /* Avoid output on the first set of reads to this sysctl in * order to prevent a null byte from being written to the * end result when called via sysctl(8). */ if (req->oldptr == NULL && req->newptr == NULL) { ret = SYSCTL_OUT(req, 0, 0); return (ret); } char input_buf[2] = ""; ret = sysctl_handle_string(oidp, input_buf, sizeof(input_buf), req); if ((ret) || (req->newptr == NULL)) return (ret); /* If we get '1', then indicate we'll create the interface in * the next sysctl read call. */ if (input_buf[0] == '1') { if (sc->mirr_if) { device_printf(dev, "Mirror interface %s already exists!\n", if_name(sc->mirr_if->ifp)); return (EEXIST); } ice_set_state(&sc->state, ICE_STATE_DO_CREATE_MIRR_INTFC); return (0); } return (EINVAL); } /* --- "Do Create Mirror Interface" is set --- */ /* Caller just wants the upper bound for size */ if (req->oldptr == NULL && req->newptr == NULL) { ret = SYSCTL_OUT(req, 0, 128); return (ret); } device_printf(dev, "Creating new mirroring interface...\n"); ret = ice_create_mirror_interface(sc); if (ret) return (ret); ice_clear_state(&sc->state, ICE_STATE_DO_CREATE_MIRR_INTFC); ret = sysctl_handle_string(oidp, __DECONST(char *, "Interface attached"), 0, req); return (ret); } /** * ice_sysctl_destroy_mirror_interface - Destroy network interface that monitors * traffic from the main PF VSI */ static int ice_sysctl_destroy_mirror_interface(SYSCTL_HANDLER_ARGS) { struct ice_softc *sc = (struct ice_softc *)arg1; device_t dev = sc->dev; int ret; UNREFERENCED_PARAMETER(arg2); ret = priv_check(curthread, PRIV_DRIVER); if (ret) return (ret); if (ice_driver_is_detaching(sc)) return (ESHUTDOWN); /* If the user hasn't written "1" to this sysctl yet: */ if (!ice_test_state(&sc->state, ICE_STATE_DO_DESTROY_MIRR_INTFC)) { /* Avoid output on the first set of reads to this sysctl in * order to prevent a null byte from being written to the * end result when called via sysctl(8). */ if (req->oldptr == NULL && req->newptr == NULL) { ret = SYSCTL_OUT(req, 0, 0); return (ret); } char input_buf[2] = ""; ret = sysctl_handle_string(oidp, input_buf, sizeof(input_buf), req); if ((ret) || (req->newptr == NULL)) return (ret); /* If we get '1', then indicate we'll create the interface in * the next sysctl read call. */ if (input_buf[0] == '1') { if (!sc->mirr_if) { device_printf(dev, "No mirror interface exists!\n"); return (EINVAL); } ice_set_state(&sc->state, ICE_STATE_DO_DESTROY_MIRR_INTFC); return (0); } return (EINVAL); } /* --- "Do Destroy Mirror Interface" is set --- */ /* Caller just wants the upper bound for size */ if (req->oldptr == NULL && req->newptr == NULL) { ret = SYSCTL_OUT(req, 0, 128); return (ret); } device_printf(dev, "Destroying mirroring interface...\n"); ice_destroy_mirror_interface(sc); ice_clear_state(&sc->state, ICE_STATE_DO_DESTROY_MIRR_INTFC); ret = sysctl_handle_string(oidp, __DECONST(char *, "Interface destroyed"), 0, req); return (ret); } diff --git a/sys/dev/ice/ice_lib.h b/sys/dev/ice/ice_lib.h index afc03ebd3b51..466cb8701b79 100644 --- a/sys/dev/ice/ice_lib.h +++ b/sys/dev/ice/ice_lib.h @@ -1,1016 +1,1027 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2024, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** * @file ice_lib.h * @brief header for generic device and sysctl functions * * Contains definitions and function declarations for the ice_lib.c file. It * does not depend on the iflib networking stack. */ #ifndef _ICE_LIB_H_ #define _ICE_LIB_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ice_dcb.h" #include "ice_type.h" #include "ice_common.h" #include "ice_flow.h" #include "ice_sched.h" #include "ice_resmgr.h" #include "ice_rdma_internal.h" #include "ice_rss.h" /* Hide debug sysctls unless INVARIANTS is enabled */ #ifdef INVARIANTS #define ICE_CTLFLAG_DEBUG 0 #else #define ICE_CTLFLAG_DEBUG CTLFLAG_SKIP #endif /** * for_each_set_bit - For loop over each set bit in a bit string * @bit: storage for the bit index * @data: address of data block to loop over * @nbits: maximum number of bits to loop over * * macro to create a for loop over a bit string, which runs the body once for * each bit that is set in the string. The bit variable will be set to the * index of each set bit in the string, with zero representing the first bit. */ #define for_each_set_bit(bit, data, nbits) \ for (bit_ffs((bitstr_t *)(data), (nbits), &(bit)); \ (bit) != -1; \ bit_ffs_at((bitstr_t *)(data), (bit) + 1, (nbits), &(bit))) /** * @var broadcastaddr * @brief broadcast MAC address * * constant defining the broadcast MAC address, used for programming the * broadcast address as a MAC filter for the PF VSI. */ static const u8 broadcastaddr[ETHER_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; MALLOC_DECLARE(M_ICE); extern const char ice_driver_version[]; extern const uint8_t ice_major_version; extern const uint8_t ice_minor_version; extern const uint8_t ice_patch_version; extern const uint8_t ice_rc_version; /* global sysctl indicating whether the Tx FC filter should be enabled */ extern bool ice_enable_tx_fc_filter; /* global sysctl indicating whether the Tx LLDP filter should be enabled */ extern bool ice_enable_tx_lldp_filter; /* global sysctl indicating whether FW health status events should be enabled */ extern bool ice_enable_health_events; /* global sysctl indicating whether to enable 5-layer scheduler topology */ extern bool ice_tx_balance_en; /** * @struct ice_bar_info * @brief PCI BAR mapping information * * Contains data about a PCI BAR that the driver has mapped for use. */ struct ice_bar_info { struct resource *res; bus_space_tag_t tag; bus_space_handle_t handle; bus_size_t size; int rid; }; /* Alignment for queues */ #define DBA_ALIGN 128 /* Maximum TSO size is (256K)-1 */ #define ICE_TSO_SIZE ((256*1024) - 1) /* Minimum size for TSO MSS */ #define ICE_MIN_TSO_MSS 64 #define ICE_MAX_TX_SEGS 8 #define ICE_MAX_TSO_SEGS 128 #define ICE_MAX_DMA_SEG_SIZE ((16*1024) - 1) #define ICE_MAX_RX_SEGS 5 #define ICE_MAX_TSO_HDR_SEGS 3 #define ICE_MSIX_BAR 3 #define ICE_MAX_MSIX_VECTORS (GLINT_DYN_CTL_MAX_INDEX + 1) #define ICE_DEFAULT_DESC_COUNT 1024 #define ICE_MAX_DESC_COUNT 8160 #define ICE_MIN_DESC_COUNT 64 #define ICE_DESC_COUNT_INCR 32 /* List of hardware offloads we support */ #define ICE_CSUM_OFFLOAD (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP | CSUM_IP_SCTP | \ CSUM_IP6_TCP| CSUM_IP6_UDP | CSUM_IP6_SCTP | \ CSUM_IP_TSO | CSUM_IP6_TSO) /* Macros to decide what kind of hardware offload to enable */ #define ICE_CSUM_TCP (CSUM_IP_TCP|CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP6_TCP) #define ICE_CSUM_UDP (CSUM_IP_UDP|CSUM_IP6_UDP) #define ICE_CSUM_SCTP (CSUM_IP_SCTP|CSUM_IP6_SCTP) #define ICE_CSUM_IP (CSUM_IP|CSUM_IP_TSO) /* List of known RX CSUM offload flags */ #define ICE_RX_CSUM_FLAGS (CSUM_L3_CALC | CSUM_L3_VALID | CSUM_L4_CALC | \ CSUM_L4_VALID | CSUM_L5_CALC | CSUM_L5_VALID | \ CSUM_COALESCED) /* List of interface capabilities supported by ice hardware */ #define ICE_FULL_CAPS \ (IFCAP_TSO4 | IFCAP_TSO6 | \ IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 | \ IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | \ IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO | \ IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO | \ IFCAP_VLAN_MTU | IFCAP_JUMBO_MTU | IFCAP_LRO) /* Safe mode disables support for hardware checksums and TSO */ #define ICE_SAFE_CAPS \ (ICE_FULL_CAPS & ~(IFCAP_HWCSUM | IFCAP_TSO | \ IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM)) #define ICE_CAPS(sc) \ (ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE) ? ICE_SAFE_CAPS : ICE_FULL_CAPS) /** * ICE_NVM_ACCESS * @brief Private ioctl command number for NVM access ioctls * * The ioctl command number used by NVM update for accessing the driver for * NVM access commands. */ #define ICE_NVM_ACCESS \ (((((((('E' << 4) + '1') << 4) + 'K') << 4) + 'G') << 4) | 5) /** * ICE_DEBUG_DUMP * @brief Private ioctl command number for retrieving debug dump data * * The ioctl command number used by a userspace tool for accessing the driver for * getting debug dump data from the firmware. */ #define ICE_DEBUG_DUMP \ (((((((('E' << 4) + '1') << 4) + 'K') << 4) + 'G') << 4) | 6) #define ICE_AQ_LEN 1023 #define ICE_MBXQ_LEN 512 #define ICE_SBQ_LEN 512 #define ICE_CTRLQ_WORK_LIMIT 256 #define ICE_DFLT_TRAFFIC_CLASS BIT(0) /* wait up to 50 microseconds for queue state change */ #define ICE_Q_WAIT_RETRY_LIMIT 5 #define ICE_UP_TABLE_TRANSLATE(val, i) \ (((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \ ICE_AQ_VSI_UP_TABLE_UP##i##_M) /* * For now, set this to the hardware maximum. Each function gets a smaller * number assigned to it in hw->func_caps.guar_num_vsi, though there * appears to be no guarantee that is the maximum number that a function * can use. */ #define ICE_MAX_VSI_AVAILABLE 768 /* Maximum size of a single frame (for Tx and Rx) */ #define ICE_MAX_FRAME_SIZE ICE_AQ_SET_MAC_FRAME_SIZE_MAX /* Maximum MTU size */ #define ICE_MAX_MTU (ICE_MAX_FRAME_SIZE - \ ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN) /* * Hardware requires that TSO packets have an segment size of at least 64 * bytes. To avoid sending bad frames to the hardware, the driver forces the * MSS for all TSO packets to have a segment size of at least 64 bytes. * * However, if the MTU is reduced below a certain size, then the resulting * larger MSS can result in transmitting segmented frames with a packet size * larger than the MTU. * * Avoid this by preventing the MTU from being lowered below this limit. * Alternative solutions require changing the TCP stack to disable offloading * the segmentation when the requested segment size goes below 64 bytes. */ #define ICE_MIN_MTU 112 /* * The default number of queues reserved for a VF is 4, according to the * AVF Base Mode specification. */ #define ICE_DEFAULT_VF_QUEUES 4 /* * An invalid VSI number to indicate that mirroring should be disabled. */ #define ICE_INVALID_MIRROR_VSI ((u16)-1) /* * The maximum number of RX queues allowed per TC in a VSI. */ #define ICE_MAX_RXQS_PER_TC 256 /* * There are three settings that can be updated independently or * altogether: Link speed, FEC, and Flow Control. These macros allow * the caller to specify which setting(s) to update. */ #define ICE_APPLY_LS BIT(0) #define ICE_APPLY_FEC BIT(1) #define ICE_APPLY_FC BIT(2) #define ICE_APPLY_LS_FEC (ICE_APPLY_LS | ICE_APPLY_FEC) #define ICE_APPLY_LS_FC (ICE_APPLY_LS | ICE_APPLY_FC) #define ICE_APPLY_FEC_FC (ICE_APPLY_FEC | ICE_APPLY_FC) #define ICE_APPLY_LS_FEC_FC (ICE_APPLY_LS_FEC | ICE_APPLY_FC) /* * Mask of valid flags that can be used as an input for the * advertise_speed sysctl. */ #define ICE_SYSCTL_SPEEDS_VALID_RANGE 0xFFF /** * @enum ice_dyn_idx_t * @brief Dynamic Control ITR indexes * * This enum matches hardware bits and is meant to be used by DYN_CTLN * registers and QINT registers or more generally anywhere in the manual * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any * register but instead is a special value meaning "don't update" ITR0/1/2. */ enum ice_dyn_idx_t { ICE_IDX_ITR0 = 0, ICE_IDX_ITR1 = 1, ICE_IDX_ITR2 = 2, ICE_ITR_NONE = 3 /* ITR_NONE must not be used as an index */ }; /* By convenction ITR0 is used for RX, and ITR1 is used for TX */ #define ICE_RX_ITR ICE_IDX_ITR0 #define ICE_TX_ITR ICE_IDX_ITR1 #define ICE_ITR_MAX 8160 /* Define the default Tx and Rx ITR as 50us (translates to ~20k int/sec max) */ #define ICE_DFLT_TX_ITR 50 #define ICE_DFLT_RX_ITR 50 /* RS FEC register values */ #define ICE_RS_FEC_REG_SHIFT 2 #define ICE_RS_FEC_RECV_ID_SHIFT 4 #define ICE_RS_FEC_CORR_LOW_REG_PORT0 (0x02 << ICE_RS_FEC_REG_SHIFT) #define ICE_RS_FEC_CORR_HIGH_REG_PORT0 (0x03 << ICE_RS_FEC_REG_SHIFT) #define ICE_RS_FEC_UNCORR_LOW_REG_PORT0 (0x04 << ICE_RS_FEC_REG_SHIFT) #define ICE_RS_FEC_UNCORR_HIGH_REG_PORT0 (0x05 << ICE_RS_FEC_REG_SHIFT) #define ICE_RS_FEC_CORR_LOW_REG_PORT1 (0x42 << ICE_RS_FEC_REG_SHIFT) #define ICE_RS_FEC_CORR_HIGH_REG_PORT1 (0x43 << ICE_RS_FEC_REG_SHIFT) #define ICE_RS_FEC_UNCORR_LOW_REG_PORT1 (0x44 << ICE_RS_FEC_REG_SHIFT) #define ICE_RS_FEC_UNCORR_HIGH_REG_PORT1 (0x45 << ICE_RS_FEC_REG_SHIFT) #define ICE_RS_FEC_CORR_LOW_REG_PORT2 (0x4A << ICE_RS_FEC_REG_SHIFT) #define ICE_RS_FEC_CORR_HIGH_REG_PORT2 (0x4B << ICE_RS_FEC_REG_SHIFT) #define ICE_RS_FEC_UNCORR_LOW_REG_PORT2 (0x4C << ICE_RS_FEC_REG_SHIFT) #define ICE_RS_FEC_UNCORR_HIGH_REG_PORT2 (0x4D << ICE_RS_FEC_REG_SHIFT) #define ICE_RS_FEC_CORR_LOW_REG_PORT3 (0x52 << ICE_RS_FEC_REG_SHIFT) #define ICE_RS_FEC_CORR_HIGH_REG_PORT3 (0x53 << ICE_RS_FEC_REG_SHIFT) #define ICE_RS_FEC_UNCORR_LOW_REG_PORT3 (0x54 << ICE_RS_FEC_REG_SHIFT) #define ICE_RS_FEC_UNCORR_HIGH_REG_PORT3 (0x55 << ICE_RS_FEC_REG_SHIFT) #define ICE_RS_FEC_RECEIVER_ID_PCS0 (0x33 << ICE_RS_FEC_RECV_ID_SHIFT) #define ICE_RS_FEC_RECEIVER_ID_PCS1 (0x34 << ICE_RS_FEC_RECV_ID_SHIFT) /** * ice_itr_to_reg - Convert an ITR setting into its register equivalent * @hw: The device HW structure * @itr_setting: the ITR setting to convert * * Based on the hardware ITR granularity, convert an ITR setting into the * correct value to prepare programming to the HW. */ static inline u16 ice_itr_to_reg(struct ice_hw *hw, u16 itr_setting) { return itr_setting / hw->itr_gran; } /** * @enum ice_rx_dtype * @brief DTYPE header split options * * This enum matches the Rx context bits to define whether header split is * enabled or not. */ enum ice_rx_dtype { ICE_RX_DTYPE_NO_SPLIT = 0, ICE_RX_DTYPE_HEADER_SPLIT = 1, ICE_RX_DTYPE_SPLIT_ALWAYS = 2, }; /* Strings used for displaying FEC mode * * Use ice_fec_str() to get these unless these need to be embedded in a * string constant. */ #define ICE_FEC_STRING_AUTO "Auto" #define ICE_FEC_STRING_RS "RS-FEC" #define ICE_FEC_STRING_BASER "FC-FEC/BASE-R" #define ICE_FEC_STRING_NONE "None" #define ICE_FEC_STRING_DIS_AUTO "Auto (w/ No-FEC)" /* Strings used for displaying Flow Control mode * * Use ice_fc_str() to get these unless these need to be embedded in a * string constant. */ #define ICE_FC_STRING_FULL "Full" #define ICE_FC_STRING_TX "Tx" #define ICE_FC_STRING_RX "Rx" #define ICE_FC_STRING_NONE "None" /* * The number of times the ice_handle_i2c_req function will retry reading * I2C data via the Admin Queue before returning EBUSY. */ #define ICE_I2C_MAX_RETRIES 10 +/* + * The Get Link Status AQ command and other link commands can return + * EAGAIN, indicating that the FW Link Management engine is busy. + * Define the number of times that the driver should retry sending these + * commands and the amount of time it should wait between those retries + * (in milliseconds) here. + */ +#define ICE_LINK_AQ_MAX_RETRIES 10 +#define ICE_LINK_RETRY_DELAY 17 + /* * The Start LLDP Agent AQ command will fail if it's sent too soon after * the LLDP agent is stopped. The period between the stop and start * commands must currently be at least 2 seconds. */ #define ICE_START_LLDP_RETRY_WAIT (2 * hz) /* * Only certain clusters are valid for certain devices for the FW debug dump * functionality, so define masks of those here. */ #define ICE_FW_DEBUG_DUMP_VALID_CLUSTER_MASK_E810 0x4001AF #define ICE_FW_DEBUG_DUMP_VALID_CLUSTER_MASK_E830 0x1AF struct ice_softc; /** * @enum ice_rx_cso_stat * @brief software checksum offload statistics * * Enumeration of possible checksum offload statistics captured by software * during the Rx path. */ enum ice_rx_cso_stat { ICE_CSO_STAT_RX_IP4_ERR, ICE_CSO_STAT_RX_IP6_ERR, ICE_CSO_STAT_RX_L3_ERR, ICE_CSO_STAT_RX_TCP_ERR, ICE_CSO_STAT_RX_UDP_ERR, ICE_CSO_STAT_RX_SCTP_ERR, ICE_CSO_STAT_RX_L4_ERR, ICE_CSO_STAT_RX_COUNT }; /** * @enum ice_tx_cso_stat * @brief software checksum offload statistics * * Enumeration of possible checksum offload statistics captured by software * during the Tx path. */ enum ice_tx_cso_stat { ICE_CSO_STAT_TX_TCP, ICE_CSO_STAT_TX_UDP, ICE_CSO_STAT_TX_SCTP, ICE_CSO_STAT_TX_IP4, ICE_CSO_STAT_TX_IP6, ICE_CSO_STAT_TX_L3_ERR, ICE_CSO_STAT_TX_L4_ERR, ICE_CSO_STAT_TX_COUNT }; /** * @struct tx_stats * @brief software Tx statistics * * Contains software counted Tx statistics for a single queue */ struct tx_stats { /* Soft Stats */ u64 tx_bytes; u64 tx_packets; u64 mss_too_small; u64 tso; u64 cso[ICE_CSO_STAT_TX_COUNT]; }; /** * @struct rx_stats * @brief software Rx statistics * * Contains software counted Rx statistics for a single queue */ struct rx_stats { /* Soft Stats */ u64 rx_packets; u64 rx_bytes; u64 desc_errs; u64 cso[ICE_CSO_STAT_RX_COUNT]; }; /** * @struct ice_vsi_hw_stats * @brief hardware statistics for a VSI * * Stores statistics that are generated by hardware for a VSI. */ struct ice_vsi_hw_stats { struct ice_eth_stats prev; struct ice_eth_stats cur; bool offsets_loaded; }; /** * @struct ice_pf_hw_stats * @brief hardware statistics for a PF * * Stores statistics that are generated by hardware for each PF. */ struct ice_pf_hw_stats { struct ice_hw_port_stats prev; struct ice_hw_port_stats cur; bool offsets_loaded; }; /** * @struct ice_pf_sw_stats * @brief software statistics for a PF * * Contains software generated statistics relevant to a PF. */ struct ice_pf_sw_stats { /* # of reset events handled, by type */ u32 corer_count; u32 globr_count; u32 empr_count; u32 pfr_count; /* # of detected MDD events for Tx and Rx */ u32 tx_mdd_count; u32 rx_mdd_count; u64 rx_roc_error; /* port oversize packet stats, error_cnt \ from GLV_REPC VSI register + RxOversize */ }; /** * @struct ice_tc_info * @brief Traffic class information for a VSI * * Stores traffic class information used in configuring * a VSI. */ struct ice_tc_info { u16 qoffset; /* Offset in VSI queue space */ u16 qcount_tx; /* TX queues for this Traffic Class */ u16 qcount_rx; /* RX queues */ }; /** * @struct ice_vsi * @brief VSI structure * * Contains data relevant to a single VSI */ struct ice_vsi { /* back pointer to the softc */ struct ice_softc *sc; bool dynamic; /* if true, dynamically allocated */ enum ice_vsi_type type; /* type of this VSI */ u16 idx; /* software index to sc->all_vsi[] */ u16 *tx_qmap; /* Tx VSI to PF queue mapping */ u16 *rx_qmap; /* Rx VSI to PF queue mapping */ enum ice_resmgr_alloc_type qmap_type; struct ice_tx_queue *tx_queues; /* Tx queue array */ struct ice_rx_queue *rx_queues; /* Rx queue array */ int num_tx_queues; int num_rx_queues; int num_vectors; int16_t rx_itr; int16_t tx_itr; /* RSS configuration */ u16 rss_table_size; /* HW RSS table size */ u8 rss_lut_type; /* Used to configure Get/Set RSS LUT AQ call */ int max_frame_size; u16 mbuf_sz; struct ice_aqc_vsi_props info; /* DCB configuration */ u8 num_tcs; /* Total number of enabled TCs */ u16 tc_map; /* bitmap of enabled Traffic Classes */ /* Information for each traffic class */ struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS]; /* context for per-VSI sysctls */ struct sysctl_ctx_list ctx; struct sysctl_oid *vsi_node; /* context for per-txq sysctls */ struct sysctl_ctx_list txqs_ctx; struct sysctl_oid *txqs_node; /* context for per-rxq sysctls */ struct sysctl_ctx_list rxqs_ctx; struct sysctl_oid *rxqs_node; /* VSI-level stats */ struct ice_vsi_hw_stats hw_stats; /* VSI mirroring details */ u16 mirror_src_vsi; u16 rule_mir_ingress; u16 rule_mir_egress; }; /** * @struct ice_debug_dump_cmd * @brief arguments/return value for debug dump ioctl */ struct ice_debug_dump_cmd { u32 offset; /* offset to read/write from table, in bytes */ u16 cluster_id; /* also used to get next cluster id */ u16 table_id; u16 data_size; /* size of data field, in bytes */ u16 reserved1; u32 reserved2; u8 data[]; }; /** * @struct ice_serdes_equalization * @brief serdes equalization info */ struct ice_serdes_equalization { int rx_equalization_pre1; int rx_equalization_pre2; int rx_equalization_post1; int rx_equalization_bflf; int rx_equalization_bfhf; int rx_equalization_drate; int tx_equalization_pre1; int tx_equalization_pre2; int tx_equalization_pre3; int tx_equalization_atten; int tx_equalization_post1; }; /** * @struct ice_fec_stats_to_sysctl * @brief FEC stats register value of port */ struct ice_fec_stats_to_sysctl { u16 fec_corr_cnt_low; u16 fec_corr_cnt_high; u16 fec_uncorr_cnt_low; u16 fec_uncorr_cnt_high; }; #define ICE_MAX_SERDES_LANE_COUNT 4 /** * @struct ice_regdump_to_sysctl * @brief PHY stats of port */ struct ice_regdump_to_sysctl { /* A multilane port can have max 4 serdes */ struct ice_serdes_equalization equalization[ICE_MAX_SERDES_LANE_COUNT]; struct ice_fec_stats_to_sysctl stats; }; /** * @struct ice_port_topology * @brief Port topology from lport i.e. serdes mapping, pcsquad, macport, cage */ struct ice_port_topology { u16 pcs_port; u16 primary_serdes_lane; u16 serdes_lane_count; u16 pcs_quad_select; }; /** * @enum ice_state * @brief Driver state flags * * Used to indicate the status of various driver events. Intended to be * modified only using atomic operations, so that we can use it even in places * which aren't locked. */ enum ice_state { ICE_STATE_CONTROLQ_EVENT_PENDING, ICE_STATE_VFLR_PENDING, ICE_STATE_MDD_PENDING, ICE_STATE_RESET_OICR_RECV, ICE_STATE_RESET_PFR_REQ, ICE_STATE_PREPARED_FOR_RESET, ICE_STATE_SUBIF_NEEDS_REINIT, ICE_STATE_RESET_FAILED, ICE_STATE_DRIVER_INITIALIZED, ICE_STATE_NO_MEDIA, ICE_STATE_RECOVERY_MODE, ICE_STATE_ROLLBACK_MODE, ICE_STATE_LINK_STATUS_REPORTED, ICE_STATE_ATTACHING, ICE_STATE_DETACHING, ICE_STATE_LINK_DEFAULT_OVERRIDE_PENDING, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER, ICE_STATE_MULTIPLE_TCS, ICE_STATE_DO_FW_DEBUG_DUMP, ICE_STATE_LINK_ACTIVE_ON_DOWN, ICE_STATE_FIRST_INIT_LINK, ICE_STATE_DO_CREATE_MIRR_INTFC, ICE_STATE_DO_DESTROY_MIRR_INTFC, + ICE_STATE_PHY_FW_INIT_PENDING, /* This entry must be last */ ICE_STATE_LAST, }; /* Functions for setting and checking driver state. Note the functions take * bit positions, not bitmasks. The atomic_testandset_32 and * atomic_testandclear_32 operations require bit positions, while the * atomic_set_32 and atomic_clear_32 require bitmasks. This can easily lead to * programming error, so we provide wrapper functions to avoid this. */ /** * ice_set_state - Set the specified state * @s: the state bitmap * @bit: the state to set * * Atomically update the state bitmap with the specified bit set. */ static inline void ice_set_state(volatile u32 *s, enum ice_state bit) { /* atomic_set_32 expects a bitmask */ atomic_set_32(s, BIT(bit)); } /** * ice_clear_state - Clear the specified state * @s: the state bitmap * @bit: the state to clear * * Atomically update the state bitmap with the specified bit cleared. */ static inline void ice_clear_state(volatile u32 *s, enum ice_state bit) { /* atomic_clear_32 expects a bitmask */ atomic_clear_32(s, BIT(bit)); } /** * ice_testandset_state - Test and set the specified state * @s: the state bitmap * @bit: the bit to test * * Atomically update the state bitmap, setting the specified bit. Returns the * previous value of the bit. */ static inline u32 ice_testandset_state(volatile u32 *s, enum ice_state bit) { /* atomic_testandset_32 expects a bit position */ return atomic_testandset_32(s, bit); } /** * ice_testandclear_state - Test and clear the specified state * @s: the state bitmap * @bit: the bit to test * * Atomically update the state bitmap, clearing the specified bit. Returns the * previous value of the bit. */ static inline u32 ice_testandclear_state(volatile u32 *s, enum ice_state bit) { /* atomic_testandclear_32 expects a bit position */ return atomic_testandclear_32(s, bit); } /** * ice_test_state - Test the specified state * @s: the state bitmap * @bit: the bit to test * * Return true if the state is set, false otherwise. Use this only if the flow * does not need to update the state. If you must update the state as well, * prefer ice_testandset_state or ice_testandclear_state. */ static inline u32 ice_test_state(volatile u32 *s, enum ice_state bit) { return (*s & BIT(bit)) ? true : false; } /** * @struct ice_str_buf * @brief static length buffer for string returning * * Structure containing a fixed size string buffer, used to implement * numeric->string conversion functions that may want to return non-constant * strings. * * This allows returning a fixed size string that is generated by a conversion * function, and then copied to the used location without needing to use an * explicit local variable passed by reference. */ struct ice_str_buf { char str[ICE_STR_BUF_LEN]; }; struct ice_str_buf _ice_aq_str(enum ice_aq_err aq_err); struct ice_str_buf _ice_status_str(int status); struct ice_str_buf _ice_err_str(int err); struct ice_str_buf _ice_fltr_flag_str(u16 flag); struct ice_str_buf _ice_log_sev_str(u8 log_level); struct ice_str_buf _ice_mdd_tx_tclan_str(u8 event); struct ice_str_buf _ice_mdd_tx_pqm_str(u8 event); struct ice_str_buf _ice_mdd_rx_str(u8 event); struct ice_str_buf _ice_fw_lldp_status(u32 lldp_status); #define ice_aq_str(err) _ice_aq_str(err).str #define ice_status_str(err) _ice_status_str(err).str #define ice_err_str(err) _ice_err_str(err).str #define ice_fltr_flag_str(flag) _ice_fltr_flag_str(flag).str #define ice_mdd_tx_tclan_str(event) _ice_mdd_tx_tclan_str(event).str #define ice_mdd_tx_pqm_str(event) _ice_mdd_tx_pqm_str(event).str #define ice_mdd_rx_str(event) _ice_mdd_rx_str(event).str #define ice_log_sev_str(log_level) _ice_log_sev_str(log_level).str #define ice_fw_lldp_status(lldp_status) _ice_fw_lldp_status(lldp_status).str /** * ice_enable_intr - Enable interrupts for given vector * @hw: the device private HW structure * @vector: the interrupt index in PF space * * In MSI or Legacy interrupt mode, interrupt 0 is the only valid index. */ static inline void ice_enable_intr(struct ice_hw *hw, int vector) { u32 dyn_ctl; /* Use ITR_NONE so that ITR configuration is not changed. */ dyn_ctl = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S); wr32(hw, GLINT_DYN_CTL(vector), dyn_ctl); } /** * ice_disable_intr - Disable interrupts for given vector * @hw: the device private HW structure * @vector: the interrupt index in PF space * * In MSI or Legacy interrupt mode, interrupt 0 is the only valid index. */ static inline void ice_disable_intr(struct ice_hw *hw, int vector) { u32 dyn_ctl; /* Use ITR_NONE so that ITR configuration is not changed. */ dyn_ctl = ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S; wr32(hw, GLINT_DYN_CTL(vector), dyn_ctl); } /** * ice_is_tx_desc_done - determine if a Tx descriptor is done * @txd: the Tx descriptor to check * * Returns true if hardware is done with a Tx descriptor and software is * capable of re-using it. */ static inline bool ice_is_tx_desc_done(struct ice_tx_desc *txd) { return (((txd->cmd_type_offset_bsz & ICE_TXD_QW1_DTYPE_M) >> ICE_TXD_QW1_DTYPE_S) == ICE_TX_DESC_DTYPE_DESC_DONE); } /** * ice_get_pf_id - Get the PF id from the hardware registers * @hw: the ice hardware structure * * Reads the PF_FUNC_RID register and extracts the function number from it. * Intended to be used in cases where hw->pf_id hasn't yet been assigned by * ice_init_hw. * * @pre this function should be called only after PCI register access has been * setup, and prior to ice_init_hw. After hardware has been initialized, the * cached hw->pf_id value can be used. */ static inline u8 ice_get_pf_id(struct ice_hw *hw) { return (u8)((rd32(hw, PF_FUNC_RID) & PF_FUNC_RID_FUNCTION_NUMBER_M) >> PF_FUNC_RID_FUNCTION_NUMBER_S); } /* Details of how to re-initialize depend on the networking stack */ void ice_request_stack_reinit(struct ice_softc *sc); /* Details of how to check if the network stack is detaching us */ bool ice_driver_is_detaching(struct ice_softc *sc); /* Details of how to setup/teardown a mirror interface */ /** * @brief Create an interface for mirroring */ int ice_create_mirror_interface(struct ice_softc *sc); /** * @brief Destroy created mirroring interface */ void ice_destroy_mirror_interface(struct ice_softc *sc); const char * ice_fw_module_str(enum ice_aqc_fw_logging_mod module); void ice_add_fw_logging_tunables(struct ice_softc *sc, struct sysctl_oid *parent); void ice_handle_fw_log_event(struct ice_softc *sc, struct ice_aq_desc *desc, void *buf); int ice_process_ctrlq(struct ice_softc *sc, enum ice_ctl_q q_type, u16 *pending); int ice_map_bar(device_t dev, struct ice_bar_info *bar, int bar_num); void ice_free_bar(device_t dev, struct ice_bar_info *bar); void ice_set_ctrlq_len(struct ice_hw *hw); void ice_release_vsi(struct ice_vsi *vsi); struct ice_vsi *ice_alloc_vsi(struct ice_softc *sc, enum ice_vsi_type type); void ice_alloc_vsi_qmap(struct ice_vsi *vsi, const int max_tx_queues, const int max_rx_queues); void ice_free_vsi_qmaps(struct ice_vsi *vsi); int ice_initialize_vsi(struct ice_vsi *vsi); void ice_deinit_vsi(struct ice_vsi *vsi); uint64_t ice_aq_speed_to_rate(struct ice_port_info *pi); int ice_get_phy_type_low(uint64_t phy_type_low); int ice_get_phy_type_high(uint64_t phy_type_high); int ice_add_media_types(struct ice_softc *sc, struct ifmedia *media); void ice_configure_rxq_interrupt(struct ice_hw *hw, u16 rxqid, u16 vector, u8 itr_idx); void ice_configure_all_rxq_interrupts(struct ice_vsi *vsi); void ice_configure_txq_interrupt(struct ice_hw *hw, u16 txqid, u16 vector, u8 itr_idx); void ice_configure_all_txq_interrupts(struct ice_vsi *vsi); void ice_flush_rxq_interrupts(struct ice_vsi *vsi); void ice_flush_txq_interrupts(struct ice_vsi *vsi); int ice_cfg_vsi_for_tx(struct ice_vsi *vsi); int ice_cfg_vsi_for_rx(struct ice_vsi *vsi); int ice_control_rx_queue(struct ice_vsi *vsi, u16 qidx, bool enable); int ice_control_all_rx_queues(struct ice_vsi *vsi, bool enable); int ice_cfg_pf_default_mac_filters(struct ice_softc *sc); int ice_rm_pf_default_mac_filters(struct ice_softc *sc); void ice_print_nvm_version(struct ice_softc *sc); void ice_update_vsi_hw_stats(struct ice_vsi *vsi); void ice_reset_vsi_stats(struct ice_vsi *vsi); void ice_update_pf_stats(struct ice_softc *sc); void ice_reset_pf_stats(struct ice_softc *sc); void ice_add_device_sysctls(struct ice_softc *sc); void ice_log_hmc_error(struct ice_hw *hw, device_t dev); void ice_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid *parent, struct ice_eth_stats *stats); void ice_add_vsi_sysctls(struct ice_vsi *vsi); void ice_add_sysctls_mac_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid *parent, struct ice_softc *sc); void ice_configure_misc_interrupts(struct ice_softc *sc); int ice_sync_multicast_filters(struct ice_softc *sc); int ice_add_vlan_hw_filters(struct ice_vsi *vsi, u16 *vid, u16 length); int ice_add_vlan_hw_filter(struct ice_vsi *vsi, u16 vid); int ice_remove_vlan_hw_filters(struct ice_vsi *vsi, u16 *vid, u16 length); int ice_remove_vlan_hw_filter(struct ice_vsi *vsi, u16 vid); void ice_add_vsi_tunables(struct ice_vsi *vsi, struct sysctl_oid *parent); void ice_del_vsi_sysctl_ctx(struct ice_vsi *vsi); void ice_add_device_tunables(struct ice_softc *sc); int ice_add_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr); int ice_remove_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr); int ice_vsi_disable_tx(struct ice_vsi *vsi); void ice_vsi_add_txqs_ctx(struct ice_vsi *vsi); void ice_vsi_add_rxqs_ctx(struct ice_vsi *vsi); void ice_vsi_del_txqs_ctx(struct ice_vsi *vsi); void ice_vsi_del_rxqs_ctx(struct ice_vsi *vsi); void ice_add_txq_sysctls(struct ice_tx_queue *txq); void ice_add_rxq_sysctls(struct ice_rx_queue *rxq); int ice_config_rss(struct ice_vsi *vsi); void ice_clean_all_vsi_rss_cfg(struct ice_softc *sc); int ice_load_pkg_file(struct ice_softc *sc); void ice_log_pkg_init(struct ice_softc *sc, enum ice_ddp_state pkg_status); uint64_t ice_get_ifnet_counter(struct ice_vsi *vsi, ift_counter counter); void ice_save_pci_info(struct ice_hw *hw, device_t dev); int ice_replay_all_vsi_cfg(struct ice_softc *sc); void ice_link_up_msg(struct ice_softc *sc); int ice_update_laa_mac(struct ice_softc *sc); void ice_get_and_print_bus_info(struct ice_softc *sc); const char *ice_fec_str(enum ice_fec_mode mode); const char *ice_fc_str(enum ice_fc_mode mode); const char *ice_fwd_act_str(enum ice_sw_fwd_act_type action); const char *ice_state_to_str(enum ice_state state); int ice_init_link_events(struct ice_softc *sc); void ice_configure_rx_itr(struct ice_vsi *vsi); void ice_configure_tx_itr(struct ice_vsi *vsi); void ice_setup_pf_vsi(struct ice_softc *sc); void ice_handle_mdd_event(struct ice_softc *sc); void ice_init_dcb_setup(struct ice_softc *sc); int ice_send_version(struct ice_softc *sc); int ice_cfg_pf_ethertype_filters(struct ice_softc *sc); void ice_init_link_configuration(struct ice_softc *sc); void ice_init_saved_phy_cfg(struct ice_softc *sc); int ice_apply_saved_phy_cfg(struct ice_softc *sc, u8 settings); void ice_set_link_management_mode(struct ice_softc *sc); int ice_module_event_handler(module_t mod, int what, void *arg); int ice_handle_nvm_access_ioctl(struct ice_softc *sc, struct ifdrv *ifd); int ice_handle_i2c_req(struct ice_softc *sc, struct ifi2creq *req); int ice_read_sff_eeprom(struct ice_softc *sc, u16 dev_addr, u16 offset, u8* data, u16 length); int ice_alloc_intr_tracking(struct ice_softc *sc); void ice_free_intr_tracking(struct ice_softc *sc); void ice_set_default_local_lldp_mib(struct ice_softc *sc); void ice_set_link(struct ice_softc *sc, bool enabled); void ice_add_rx_lldp_filter(struct ice_softc *sc); void ice_init_health_events(struct ice_softc *sc); void ice_cfg_pba_num(struct ice_softc *sc); int ice_handle_debug_dump_ioctl(struct ice_softc *sc, struct ifdrv *ifd); u8 ice_dcb_get_tc_map(const struct ice_dcbx_cfg *dcbcfg); void ice_do_dcb_reconfig(struct ice_softc *sc, bool pending_mib); int ice_setup_vsi_mirroring(struct ice_vsi *vsi); #endif /* _ICE_LIB_H_ */ diff --git a/sys/dev/ice/ice_strings.c b/sys/dev/ice/ice_strings.c index 5b5da737cadb..1b377a1bf518 100644 --- a/sys/dev/ice/ice_strings.c +++ b/sys/dev/ice/ice_strings.c @@ -1,1184 +1,1186 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2024, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** * @file ice_strings.c * @brief functions to convert enumerated values to human readable strings * * Contains various functions which convert enumerated values into human * readable strings. Primarily this is used for error values, such as the * ice_status enum, the ice_aq_err values, or standard sys/errno.h values. * * Additionally, various other driver enumerations which are displayed via * sysctl have converter functions. * * Some of the functions return struct ice_str_buf, instead of a character * string pointer. This is a trick to allow the function to create a struct * with space to convert unknown numeric values into a string, and return the * contents via copying the struct memory back. The functions then have an * associated macro to access the string value immediately. This allows the * functions to return static strings for known values, and convert unknown * values into a numeric representation. It also does not require * pre-allocating storage at each callsite, or using a local static value * which wouldn't be re-entrant, and could collide if multiple threads call * the function. The extra copies are somewhat annoying, but generally the * error functions aren't expected to be in a hot path so this is an * acceptable trade off. */ #include "ice_lib.h" /** * ice_aq_str - Convert an AdminQ error into a string * @aq_err: the AQ error code to convert * * Convert the AdminQ status into its string name, if known. Otherwise, format * the error as an integer. */ struct ice_str_buf _ice_aq_str(enum ice_aq_err aq_err) { struct ice_str_buf buf = { .str = "" }; const char *str = NULL; switch (aq_err) { case ICE_AQ_RC_OK: str = "OK"; break; case ICE_AQ_RC_EPERM: str = "AQ_RC_EPERM"; break; case ICE_AQ_RC_ENOENT: str = "AQ_RC_ENOENT"; break; case ICE_AQ_RC_ESRCH: str = "AQ_RC_ESRCH"; break; case ICE_AQ_RC_EINTR: str = "AQ_RC_EINTR"; break; case ICE_AQ_RC_EIO: str = "AQ_RC_EIO"; break; case ICE_AQ_RC_ENXIO: str = "AQ_RC_ENXIO"; break; case ICE_AQ_RC_E2BIG: str = "AQ_RC_E2BIG"; break; case ICE_AQ_RC_EAGAIN: str = "AQ_RC_EAGAIN"; break; case ICE_AQ_RC_ENOMEM: str = "AQ_RC_ENOMEM"; break; case ICE_AQ_RC_EACCES: str = "AQ_RC_EACCES"; break; case ICE_AQ_RC_EFAULT: str = "AQ_RC_EFAULT"; break; case ICE_AQ_RC_EBUSY: str = "AQ_RC_EBUSY"; break; case ICE_AQ_RC_EEXIST: str = "AQ_RC_EEXIST"; break; case ICE_AQ_RC_EINVAL: str = "AQ_RC_EINVAL"; break; case ICE_AQ_RC_ENOTTY: str = "AQ_RC_ENOTTY"; break; case ICE_AQ_RC_ENOSPC: str = "AQ_RC_ENOSPC"; break; case ICE_AQ_RC_ENOSYS: str = "AQ_RC_ENOSYS"; break; case ICE_AQ_RC_ERANGE: str = "AQ_RC_ERANGE"; break; case ICE_AQ_RC_EFLUSHED: str = "AQ_RC_EFLUSHED"; break; case ICE_AQ_RC_BAD_ADDR: str = "AQ_RC_BAD_ADDR"; break; case ICE_AQ_RC_EMODE: str = "AQ_RC_EMODE"; break; case ICE_AQ_RC_EFBIG: str = "AQ_RC_EFBIG"; break; case ICE_AQ_RC_ESBCOMP: str = "AQ_RC_ESBCOMP"; break; case ICE_AQ_RC_ENOSEC: str = "AQ_RC_ENOSEC"; break; case ICE_AQ_RC_EBADSIG: str = "AQ_RC_EBADSIG"; break; case ICE_AQ_RC_ESVN: str = "AQ_RC_ESVN"; break; case ICE_AQ_RC_EBADMAN: str = "AQ_RC_EBADMAN"; break; case ICE_AQ_RC_EBADBUF: str = "AQ_RC_EBADBUF"; break; case ICE_AQ_RC_EACCES_BMCU: str = "AQ_RC_EACCES_BMCU"; break; } if (str) snprintf(buf.str, ICE_STR_BUF_LEN, "%s", str); else snprintf(buf.str, ICE_STR_BUF_LEN, "%d", aq_err); return buf; } /** * ice_status_str - convert status err code to a string * @status: the status error code to convert * * Convert the status code into its string name if known. * * Otherwise, use the scratch space to format the status code into a number. */ struct ice_str_buf _ice_status_str(int status) { struct ice_str_buf buf = { .str = "" }; const char *str = NULL; switch (status) { case 0: str = "OK"; break; case ICE_ERR_PARAM: str = "ICE_ERR_PARAM"; break; case ICE_ERR_NOT_IMPL: str = "ICE_ERR_NOT_IMPL"; break; case ICE_ERR_NOT_READY: str = "ICE_ERR_NOT_READY"; break; case ICE_ERR_NOT_SUPPORTED: str = "ICE_ERR_NOT_SUPPORTED"; break; case ICE_ERR_BAD_PTR: str = "ICE_ERR_BAD_PTR"; break; case ICE_ERR_INVAL_SIZE: str = "ICE_ERR_INVAL_SIZE"; break; case ICE_ERR_DEVICE_NOT_SUPPORTED: str = "ICE_ERR_DEVICE_NOT_SUPPORTED"; break; case ICE_ERR_RESET_FAILED: str = "ICE_ERR_RESET_FAILED"; break; case ICE_ERR_FW_API_VER: str = "ICE_ERR_FW_API_VER"; break; case ICE_ERR_NO_MEMORY: str = "ICE_ERR_NO_MEMORY"; break; case ICE_ERR_CFG: str = "ICE_ERR_CFG"; break; case ICE_ERR_OUT_OF_RANGE: str = "ICE_ERR_OUT_OF_RANGE"; break; case ICE_ERR_ALREADY_EXISTS: str = "ICE_ERR_ALREADY_EXISTS"; break; case ICE_ERR_NVM: str = "ICE_ERR_NVM"; break; case ICE_ERR_NVM_CHECKSUM: str = "ICE_ERR_NVM_CHECKSUM"; break; case ICE_ERR_BUF_TOO_SHORT: str = "ICE_ERR_BUF_TOO_SHORT"; break; case ICE_ERR_NVM_BLANK_MODE: str = "ICE_ERR_NVM_BLANK_MODE"; break; case ICE_ERR_IN_USE: str = "ICE_ERR_IN_USE"; break; case ICE_ERR_MAX_LIMIT: str = "ICE_ERR_MAX_LIMIT"; break; case ICE_ERR_RESET_ONGOING: str = "ICE_ERR_RESET_ONGOING"; break; case ICE_ERR_HW_TABLE: str = "ICE_ERR_HW_TABLE"; break; case ICE_ERR_FW_DDP_MISMATCH: str = "ICE_ERR_FW_DDP_MISMATCH"; break; case ICE_ERR_DOES_NOT_EXIST: str = "ICE_ERR_DOES_NOT_EXIST"; break; case ICE_ERR_AQ_ERROR: str = "ICE_ERR_AQ_ERROR"; break; case ICE_ERR_AQ_TIMEOUT: str = "ICE_ERR_AQ_TIMEOUT"; break; case ICE_ERR_AQ_FULL: str = "ICE_ERR_AQ_FULL"; break; case ICE_ERR_AQ_NO_WORK: str = "ICE_ERR_AQ_NO_WORK"; break; case ICE_ERR_AQ_EMPTY: str = "ICE_ERR_AQ_EMPTY"; break; case ICE_ERR_AQ_FW_CRITICAL: str = "ICE_ERR_AQ_FW_CRITICAL"; break; } if (str) snprintf(buf.str, ICE_STR_BUF_LEN, "%s", str); else snprintf(buf.str, ICE_STR_BUF_LEN, "%d", status); return buf; } /** * ice_err_str - convert error code to a string * @err: the error code to convert * * Convert an error code into its string/macro name if known. Note, it doesn't * handle negated errors. * * Otherwise, use the scratch space to format the error into a number. */ struct ice_str_buf _ice_err_str(int err) { struct ice_str_buf buf = { .str = "" }; const char *str = NULL; switch (err) { case 0: str = "OK"; break; case EPERM: str = "EPERM"; break; case ENOENT: str = "ENOENT"; break; case ESRCH: str = "ESRCH"; break; case EINTR: str = "EINTR"; break; case EIO: str = "EIO"; break; case ENXIO: str = "ENXIO"; break; case E2BIG: str = "E2BIG"; break; case ENOEXEC: str = "ENOEXEC"; break; case EBADF: str = "EBADF"; break; case ECHILD: str = "ECHILD"; break; case EDEADLK: str = "EDEADLK"; break; case ENOMEM: str = "ENOMEM"; break; case EACCES: str = "EACCES"; break; case EFAULT: str = "EFAULT"; break; case ENOTBLK: str = "ENOTBLK"; break; case EBUSY: str = "EBUSY"; break; case EEXIST: str = "EEXIST"; break; case EXDEV: str = "EXDEV"; break; case ENODEV: str = "ENODEV"; break; case ENOTDIR: str = "ENOTDIR"; break; case EISDIR: str = "EISDIR"; break; case EINVAL: str = "EINVAL"; break; case ENFILE: str = "ENFILE"; break; case EMFILE: str = "EMFILE"; break; case ENOTTY: str = "ENOTTY"; break; case ETXTBSY: str = "ETXTBSY"; break; case EFBIG: str = "EFBIG"; break; case ENOSPC: str = "ENOSPC"; break; case ESPIPE: str = "ESPIPE"; break; case EROFS: str = "EROFS"; break; case EMLINK: str = "EMLINK"; break; case EPIPE: str = "EPIPE"; break; case EDOM: str = "EDOM"; break; case ERANGE: str = "ERANGE"; break; case EAGAIN: /* EWOULDBLOCK */ str = "EAGAIN"; break; case EINPROGRESS: str = "EINPROGRESS"; break; case EALREADY: str = "EALREADY"; break; case ENOTSOCK: str = "ENOTSOCK"; break; case EDESTADDRREQ: str = "EDESTADDRREQ"; break; case EMSGSIZE: str = "EMSGSIZE"; break; case EPROTOTYPE: str = "EPROTOTYPE"; break; case ENOPROTOOPT: str = "ENOPROTOOPT"; break; case EPROTONOSUPPORT: str = "EPROTONOSUPPORT"; break; case ESOCKTNOSUPPORT: str = "ESOCKTNOSUPPORT"; break; case EOPNOTSUPP: str = "EOPNOTSUPP"; break; case EPFNOSUPPORT: /* ENOTSUP */ str = "EPFNOSUPPORT"; break; case EAFNOSUPPORT: str = "EAFNOSUPPORT"; break; case EADDRINUSE: str = "EADDRINUSE"; break; case EADDRNOTAVAIL: str = "EADDRNOTAVAIL"; break; case ENETDOWN: str = "ENETDOWN"; break; case ENETUNREACH: str = "ENETUNREACH"; break; case ENETRESET: str = "ENETRESET"; break; case ECONNABORTED: str = "ECONNABORTED"; break; case ECONNRESET: str = "ECONNRESET"; break; case ENOBUFS: str = "ENOBUFS"; break; case EISCONN: str = "EISCONN"; break; case ENOTCONN: str = "ENOTCONN"; break; case ESHUTDOWN: str = "ESHUTDOWN"; break; case ETOOMANYREFS: str = "ETOOMANYREFS"; break; case ETIMEDOUT: str = "ETIMEDOUT"; break; case ECONNREFUSED: str = "ECONNREFUSED"; break; case ELOOP: str = "ELOOP"; break; case ENAMETOOLONG: str = "ENAMETOOLONG"; break; case EHOSTDOWN: str = "EHOSTDOWN"; break; case EHOSTUNREACH: str = "EHOSTUNREACH"; break; case ENOTEMPTY: str = "ENOTEMPTY"; break; case EPROCLIM: str = "EPROCLIM"; break; case EUSERS: str = "EUSERS"; break; case EDQUOT: str = "EDQUOT"; break; case ESTALE: str = "ESTALE"; break; case EREMOTE: str = "EREMOTE"; break; case EBADRPC: str = "EBADRPC"; break; case ERPCMISMATCH: str = "ERPCMISMATCH"; break; case EPROGUNAVAIL: str = "EPROGUNAVAIL"; break; case EPROGMISMATCH: str = "EPROGMISMATCH"; break; case EPROCUNAVAIL: str = "EPROCUNAVAIL"; break; case ENOLCK: str = "ENOLCK"; break; case ENOSYS: str = "ENOSYS"; break; case EFTYPE: str = "EFTYPE"; break; case EAUTH: str = "EAUTH"; break; case ENEEDAUTH: str = "ENEEDAUTH"; break; case EIDRM: str = "EIDRM"; break; case ENOMSG: str = "ENOMSG"; break; case EOVERFLOW: str = "EOVERFLOW"; break; case ECANCELED: str = "ECANCELED"; break; case EILSEQ: str = "EILSEQ"; break; case ENOATTR: str = "ENOATTR"; break; case EDOOFUS: str = "EDOOFUS"; break; case EBADMSG: str = "EBADMSG"; break; case EMULTIHOP: str = "EMULTIHOP"; break; case ENOLINK: str = "ENOLINK"; break; case EPROTO: str = "EPROTO"; break; case ENOTCAPABLE: str = "ENOTCAPABLE"; break; case ECAPMODE: str = "ECAPMODE"; break; case ENOTRECOVERABLE: str = "ENOTRECOVERABLE"; break; case EOWNERDEAD: str = "EOWNERDEAD"; break; } if (str) snprintf(buf.str, ICE_STR_BUF_LEN, "%s", str); else snprintf(buf.str, ICE_STR_BUF_LEN, "%d", err); return buf; } /** * ice_fec_str - convert fec mode enum to a string * @mode: the enum value to convert * * Convert an FEC mode enum to a string for display in a sysctl or log message. * Returns "Unknown" if the mode is not one of currently known FEC modes. */ const char * ice_fec_str(enum ice_fec_mode mode) { switch (mode) { case ICE_FEC_AUTO: return ICE_FEC_STRING_AUTO; case ICE_FEC_RS: return ICE_FEC_STRING_RS; case ICE_FEC_BASER: return ICE_FEC_STRING_BASER; case ICE_FEC_NONE: return ICE_FEC_STRING_NONE; case ICE_FEC_DIS_AUTO: return ICE_FEC_STRING_DIS_AUTO; } /* The compiler generates errors on unhandled enum values if we omit * the default case. */ return "Unknown"; } /** * ice_fc_str - convert flow control mode enum to a string * @mode: the enum value to convert * * Convert a flow control mode enum to a string for display in a sysctl or log * message. Returns "Unknown" if the mode is not one of currently supported or * known flow control modes. */ const char * ice_fc_str(enum ice_fc_mode mode) { switch (mode) { case ICE_FC_FULL: return ICE_FC_STRING_FULL; case ICE_FC_TX_PAUSE: return ICE_FC_STRING_TX; case ICE_FC_RX_PAUSE: return ICE_FC_STRING_RX; case ICE_FC_NONE: return ICE_FC_STRING_NONE; case ICE_FC_AUTO: case ICE_FC_PFC: case ICE_FC_DFLT: break; } /* The compiler generates errors on unhandled enum values if we omit * the default case. */ return "Unknown"; } /** * ice_fltr_flag_str - Convert filter flags to a string * @flag: the filter flags to convert * * Convert the u16 flag value of a filter into a readable string for * outputting in a sysctl. */ struct ice_str_buf _ice_fltr_flag_str(u16 flag) { struct ice_str_buf buf = { .str = "" }; const char *str = NULL; switch (flag) { case ICE_FLTR_RX: str = "RX"; break; case ICE_FLTR_TX: str = "TX"; break; case ICE_FLTR_TX_RX: str = "TX_RX"; break; default: break; } if (str) snprintf(buf.str, ICE_STR_BUF_LEN, "%s", str); else snprintf(buf.str, ICE_STR_BUF_LEN, "%u", flag); return buf; } /** * ice_log_sev_str - Convert log level to a string * @log_level: the log level to convert * * Convert the u8 log level of a FW logging module into a readable * string for outputting in a sysctl. */ struct ice_str_buf _ice_log_sev_str(u8 log_level) { struct ice_str_buf buf = { .str = "" }; const char *str = NULL; switch (log_level) { case ICE_FWLOG_LEVEL_NONE: str = "none"; break; case ICE_FWLOG_LEVEL_ERROR: str = "error"; break; case ICE_FWLOG_LEVEL_WARNING: str = "warning"; break; case ICE_FWLOG_LEVEL_NORMAL: str = "normal"; break; case ICE_FWLOG_LEVEL_VERBOSE: str = "verbose"; break; default: break; } if (str) snprintf(buf.str, ICE_STR_BUF_LEN, "%s", str); else snprintf(buf.str, ICE_STR_BUF_LEN, "%u", log_level); return buf; } /** * ice_fwd_act_str - convert filter action enum to a string * @action: the filter action to convert * * Convert an enum value of type enum ice_sw_fwd_act_type into a string, for * display in a sysctl filter list. Returns "UNKNOWN" for actions outside the * enumeration type. */ const char * ice_fwd_act_str(enum ice_sw_fwd_act_type action) { switch (action) { case ICE_FWD_TO_VSI: return "FWD_TO_VSI"; case ICE_FWD_TO_VSI_LIST: return "FWD_TO_VSI_LIST"; case ICE_FWD_TO_Q: return "FWD_TO_Q"; case ICE_FWD_TO_QGRP: return "FWD_TO_QGRP"; case ICE_DROP_PACKET: return "DROP_PACKET"; case ICE_LG_ACTION: return "LG_ACTION"; case ICE_INVAL_ACT: return "INVAL_ACT"; } /* The compiler generates errors on unhandled enum values if we omit * the default case. */ return "Unknown"; } /** * ice_mdd_tx_tclan_str - Convert MDD Tx TCLAN event to a string * @event: the MDD event number to convert * * Convert the Tx TCLAN event value from the GL_MDET_TX_TCLAN register into * a human readable string for logging of MDD events. */ struct ice_str_buf _ice_mdd_tx_tclan_str(u8 event) { struct ice_str_buf buf = { .str = "" }; const char *str = NULL; switch (event) { case 0: str = "Wrong descriptor format/order"; break; case 1: str = "Descriptor fetch failed"; break; case 2: str = "Tail descriptor not EOP/NOP"; break; case 3: str = "False scheduling error"; break; case 4: str = "Tail value larger than ring len"; break; case 5: str = "Too many data commands"; break; case 6: str = "Zero packets sent in quanta"; break; case 7: str = "Packet too small or too big"; break; case 8: str = "TSO length doesn't match sum"; break; case 9: str = "TSO tail reached before TLEN"; break; case 10: str = "TSO max 3 descs for headers"; break; case 11: str = "EOP on header descriptor"; break; case 12: str = "MSS is 0 or TLEN is 0"; break; case 13: str = "CTX desc invalid IPSec fields"; break; case 14: str = "Quanta invalid # of SSO packets"; break; case 15: str = "Quanta bytes exceeds pkt_len*64"; break; case 16: str = "Quanta exceeds max_cmds_in_sq"; break; case 17: str = "incoherent last_lso_quanta"; break; case 18: str = "incoherent TSO TLEN"; break; case 19: str = "Quanta: too many descriptors"; break; case 20: str = "Quanta: # of packets mismatch"; break; default: break; } if (str) snprintf(buf.str, ICE_STR_BUF_LEN, "%s", str); else snprintf(buf.str, ICE_STR_BUF_LEN, "Unknown Tx TCLAN event %u", event); return buf; } /** * ice_mdd_tx_pqm_str - Convert MDD Tx PQM event to a string * @event: the MDD event number to convert * * Convert the Tx PQM event value from the GL_MDET_TX_PQM register into * a human readable string for logging of MDD events. */ struct ice_str_buf _ice_mdd_tx_pqm_str(u8 event) { struct ice_str_buf buf = { .str = "" }; const char *str = NULL; switch (event) { case 0: str = "PCI_DUMMY_COMP"; break; case 1: str = "PCI_UR_COMP"; break; /* Index 2 is unused */ case 3: str = "RCV_SH_BE_LSO"; break; case 4: str = "Q_FL_MNG_EPY_CH"; break; case 5: str = "Q_EPY_MNG_FL_CH"; break; case 6: str = "LSO_NUMDESCS_ZERO"; break; case 7: str = "LSO_LENGTH_ZERO"; break; case 8: str = "LSO_MSS_BELOW_MIN"; break; case 9: str = "LSO_MSS_ABOVE_MAX"; break; case 10: str = "LSO_HDR_SIZE_ZERO"; break; case 11: str = "RCV_CNT_BE_LSO"; break; case 12: str = "SKIP_ONE_QT_ONLY"; break; case 13: str = "LSO_PKTCNT_ZERO"; break; case 14: str = "SSO_LENGTH_ZERO"; break; case 15: str = "SSO_LENGTH_EXCEED"; break; case 16: str = "SSO_PKTCNT_ZERO"; break; case 17: str = "SSO_PKTCNT_EXCEED"; break; case 18: str = "SSO_NUMDESCS_ZERO"; break; case 19: str = "SSO_NUMDESCS_EXCEED"; break; case 20: str = "TAIL_GT_RING_LENGTH"; break; case 21: str = "RESERVED_DBL_TYPE"; break; case 22: str = "ILLEGAL_HEAD_DROP_DBL"; break; case 23: str = "LSO_OVER_COMMS_Q"; break; case 24: str = "ILLEGAL_VF_QNUM"; break; case 25: str = "QTAIL_GT_RING_LENGTH"; break; default: break; } if (str) snprintf(buf.str, ICE_STR_BUF_LEN, "%s", str); else snprintf(buf.str, ICE_STR_BUF_LEN, "Unknown Tx PQM event %u", event); return buf; } /** * ice_mdd_rx_str - Convert MDD Rx queue event to a string * @event: the MDD event number to convert * * Convert the Rx queue event value from the GL_MDET_RX register into a human * readable string for logging of MDD events. */ struct ice_str_buf _ice_mdd_rx_str(u8 event) { struct ice_str_buf buf = { .str = "" }; const char *str = NULL; switch (event) { case 1: str = "Descriptor fetch failed"; break; default: break; } if (str) snprintf(buf.str, ICE_STR_BUF_LEN, "%s", str); else snprintf(buf.str, ICE_STR_BUF_LEN, "Unknown Rx event %u", event); return buf; } /** * ice_state_to_str - Convert the state enum to a string value * @state: the state bit to convert * * Converts a given state bit to its human readable string name. If the enum * value is unknown, returns NULL; */ const char * ice_state_to_str(enum ice_state state) { switch (state) { case ICE_STATE_CONTROLQ_EVENT_PENDING: return "CONTROLQ_EVENT_PENDING"; case ICE_STATE_VFLR_PENDING: return "VFLR_PENDING"; case ICE_STATE_MDD_PENDING: return "MDD_PENDING"; case ICE_STATE_RESET_OICR_RECV: return "RESET_OICR_RECV"; case ICE_STATE_RESET_PFR_REQ: return "RESET_PFR_REQ"; case ICE_STATE_PREPARED_FOR_RESET: return "PREPARED_FOR_RESET"; case ICE_STATE_SUBIF_NEEDS_REINIT: return "SUBIF_NEEDS_REINIT"; case ICE_STATE_RESET_FAILED: return "RESET_FAILED"; case ICE_STATE_DRIVER_INITIALIZED: return "DRIVER_INITIALIZED"; case ICE_STATE_NO_MEDIA: return "NO_MEDIA"; case ICE_STATE_RECOVERY_MODE: return "RECOVERY_MODE"; case ICE_STATE_ROLLBACK_MODE: return "ROLLBACK_MODE"; case ICE_STATE_LINK_STATUS_REPORTED: return "LINK_STATUS_REPORTED"; case ICE_STATE_ATTACHING: return "ATTACHING"; case ICE_STATE_DETACHING: return "DETACHING"; case ICE_STATE_LINK_DEFAULT_OVERRIDE_PENDING: return "LINK_DEFAULT_OVERRIDE_PENDING"; case ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER: return "LLDP_RX_FLTR_FROM_DRIVER"; case ICE_STATE_MULTIPLE_TCS: return "MULTIPLE_TCS"; case ICE_STATE_DO_FW_DEBUG_DUMP: return "DO_FW_DEBUG_DUMP"; case ICE_STATE_LINK_ACTIVE_ON_DOWN: return "LINK_ACTIVE_ON_DOWN"; case ICE_STATE_FIRST_INIT_LINK: return "FIRST_INIT_LINK"; case ICE_STATE_DO_CREATE_MIRR_INTFC: return "DO_CREATE_MIRR_INTFC"; case ICE_STATE_DO_DESTROY_MIRR_INTFC: return "DO_DESTROY_MIRR_INTFC"; + case ICE_STATE_PHY_FW_INIT_PENDING: + return "PHY_FW_INIT_PENDING"; case ICE_STATE_LAST: return NULL; } return NULL; } /** * ice_fw_module_str - Convert a FW logging module to a string name * @module: the module to convert * * Given a FW logging module id, convert it to a shorthand human readable * name, for generating sysctl tunables. */ const char * ice_fw_module_str(enum ice_aqc_fw_logging_mod module) { switch (module) { case ICE_AQC_FW_LOG_ID_GENERAL: return "general"; case ICE_AQC_FW_LOG_ID_CTRL: return "ctrl"; case ICE_AQC_FW_LOG_ID_LINK: return "link"; case ICE_AQC_FW_LOG_ID_LINK_TOPO: return "link_topo"; case ICE_AQC_FW_LOG_ID_DNL: return "dnl"; case ICE_AQC_FW_LOG_ID_I2C: return "i2c"; case ICE_AQC_FW_LOG_ID_SDP: return "sdp"; case ICE_AQC_FW_LOG_ID_MDIO: return "mdio"; case ICE_AQC_FW_LOG_ID_ADMINQ: return "adminq"; case ICE_AQC_FW_LOG_ID_HDMA: return "hdma"; case ICE_AQC_FW_LOG_ID_LLDP: return "lldp"; case ICE_AQC_FW_LOG_ID_DCBX: return "dcbx"; case ICE_AQC_FW_LOG_ID_DCB: return "dcb"; case ICE_AQC_FW_LOG_ID_XLR: return "xlr"; case ICE_AQC_FW_LOG_ID_NVM: return "nvm"; case ICE_AQC_FW_LOG_ID_AUTH: return "auth"; case ICE_AQC_FW_LOG_ID_VPD: return "vpd"; case ICE_AQC_FW_LOG_ID_IOSF: return "iosf"; case ICE_AQC_FW_LOG_ID_PARSER: return "parser"; case ICE_AQC_FW_LOG_ID_SW: return "sw"; case ICE_AQC_FW_LOG_ID_SCHEDULER: return "scheduler"; case ICE_AQC_FW_LOG_ID_TXQ: return "txq"; case ICE_AQC_FW_LOG_ID_RSVD: return "acl"; case ICE_AQC_FW_LOG_ID_POST: return "post"; case ICE_AQC_FW_LOG_ID_WATCHDOG: return "watchdog"; case ICE_AQC_FW_LOG_ID_TASK_DISPATCH: return "task_dispatch"; case ICE_AQC_FW_LOG_ID_MNG: return "mng"; case ICE_AQC_FW_LOG_ID_SYNCE: return "synce"; case ICE_AQC_FW_LOG_ID_HEALTH: return "health"; case ICE_AQC_FW_LOG_ID_TSDRV: return "tsdrv"; case ICE_AQC_FW_LOG_ID_PFREG: return "pfreg"; case ICE_AQC_FW_LOG_ID_MDLVER: return "mdlver"; case ICE_AQC_FW_LOG_ID_MAX: return "unknown"; } /* The compiler generates errors on unhandled enum values if we omit * the default case. */ return "unknown"; } /** * ice_fw_lldp_status - Convert FW LLDP status to a string * @lldp_status: firmware LLDP status value to convert * * Given the FW LLDP status, convert it to a human readable string. */ struct ice_str_buf _ice_fw_lldp_status(u32 lldp_status) { struct ice_str_buf buf = { .str = "" }; const char *str = NULL; switch (lldp_status) { case ICE_LLDP_ADMINSTATUS_DIS: str = "DISABLED"; break; case ICE_LLDP_ADMINSTATUS_ENA_RX: str = "ENA_RX"; break; case ICE_LLDP_ADMINSTATUS_ENA_TX: str = "ENA_TX"; break; case ICE_LLDP_ADMINSTATUS_ENA_RXTX: str = "ENA_RXTX"; break; case 0xF: str = "NVM_DEFAULT"; break; } if (str) snprintf(buf.str, ICE_STR_BUF_LEN, "%s", str); else snprintf(buf.str, ICE_STR_BUF_LEN, "Unknown LLDP status %u", lldp_status); return buf; } diff --git a/sys/dev/ice/ice_switch.c b/sys/dev/ice/ice_switch.c index 1edd39497ab5..1880d6abdd26 100644 --- a/sys/dev/ice/ice_switch.c +++ b/sys/dev/ice/ice_switch.c @@ -1,4383 +1,4381 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2024, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "ice_common.h" #include "ice_switch.h" #include "ice_flex_type.h" #include "ice_flow.h" #define ICE_ETH_DA_OFFSET 0 #define ICE_ETH_ETHTYPE_OFFSET 12 #define ICE_ETH_VLAN_TCI_OFFSET 14 #define ICE_MAX_VLAN_ID 0xFFF #define ICE_IPV6_ETHER_ID 0x86DD #define ICE_PPP_IPV6_PROTO_ID 0x0057 #define ICE_ETH_P_8021Q 0x8100 /* Dummy ethernet header needed in the ice_sw_rule_* * struct to configure any switch filter rules. * {DA (6 bytes), SA(6 bytes), * Ether type (2 bytes for header without VLAN tag) OR * VLAN tag (4 bytes for header with VLAN tag) } * * Word on Hardcoded values * byte 0 = 0x2: to identify it as locally administered DA MAC * byte 6 = 0x2: to identify it as locally administered SA MAC * byte 12 = 0x81 & byte 13 = 0x00: * In case of VLAN filter first two bytes defines ether type (0x8100) * and remaining two bytes are placeholder for programming a given VLAN ID * In case of Ether type filter it is treated as header without VLAN tag * and byte 12 and 13 is used to program a given Ether type instead */ static const u8 dummy_eth_header[DUMMY_ETH_HDR_LEN] = { 0x2, 0, 0, 0, 0, 0, 0x2, 0, 0, 0, 0, 0, 0x81, 0, 0, 0}; static bool ice_vsi_uses_fltr(struct ice_fltr_mgmt_list_entry *fm_entry, u16 vsi_handle); /** * ice_init_def_sw_recp - initialize the recipe book keeping tables * @hw: pointer to the HW struct * @recp_list: pointer to sw recipe list * * Allocate memory for the entire recipe table and initialize the structures/ * entries corresponding to basic recipes. */ int ice_init_def_sw_recp(struct ice_hw *hw, struct ice_sw_recipe **recp_list) { struct ice_sw_recipe *recps; u8 i; recps = (struct ice_sw_recipe *) ice_calloc(hw, ICE_MAX_NUM_RECIPES, sizeof(*recps)); if (!recps) return ICE_ERR_NO_MEMORY; for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) { recps[i].root_rid = i; INIT_LIST_HEAD(&recps[i].filt_rules); INIT_LIST_HEAD(&recps[i].filt_replay_rules); INIT_LIST_HEAD(&recps[i].rg_list); ice_init_lock(&recps[i].filt_rule_lock); } *recp_list = recps; return 0; } /** * ice_aq_get_sw_cfg - get switch configuration * @hw: pointer to the hardware structure * @buf: pointer to the result buffer * @buf_size: length of the buffer available for response * @req_desc: pointer to requested descriptor * @num_elems: pointer to number of elements * @cd: pointer to command details structure or NULL * * Get switch configuration (0x0200) to be placed in buf. * This admin command returns information such as initial VSI/port number * and switch ID it belongs to. * * NOTE: *req_desc is both an input/output parameter. * The caller of this function first calls this function with *request_desc set * to 0. If the response from f/w has *req_desc set to 0, all the switch * configuration information has been returned; if non-zero (meaning not all * the information was returned), the caller should call this function again * with *req_desc set to the previous value returned by f/w to get the * next block of switch configuration information. * * *num_elems is output only parameter. This reflects the number of elements * in response buffer. The caller of this function to use *num_elems while * parsing the response buffer. */ static int ice_aq_get_sw_cfg(struct ice_hw *hw, struct ice_aqc_get_sw_cfg_resp_elem *buf, u16 buf_size, u16 *req_desc, u16 *num_elems, struct ice_sq_cd *cd) { struct ice_aqc_get_sw_cfg *cmd; struct ice_aq_desc desc; int status; ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_sw_cfg); cmd = &desc.params.get_sw_conf; cmd->element = CPU_TO_LE16(*req_desc); status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); if (!status) { *req_desc = LE16_TO_CPU(cmd->element); *num_elems = LE16_TO_CPU(cmd->num_elems); } return status; } /** * ice_alloc_rss_global_lut - allocate a RSS global LUT * @hw: pointer to the HW struct * @shared_res: true to allocate as a shared resource and false to allocate as a dedicated resource * @global_lut_id: output parameter for the RSS global LUT's ID */ int ice_alloc_rss_global_lut(struct ice_hw *hw, bool shared_res, u16 *global_lut_id) { struct ice_aqc_alloc_free_res_elem *sw_buf; int status; u16 buf_len; buf_len = ice_struct_size(sw_buf, elem, 1); sw_buf = (struct ice_aqc_alloc_free_res_elem *)ice_malloc(hw, buf_len); if (!sw_buf) return ICE_ERR_NO_MEMORY; sw_buf->num_elems = CPU_TO_LE16(1); sw_buf->res_type = CPU_TO_LE16(ICE_AQC_RES_TYPE_GLOBAL_RSS_HASH | (shared_res ? ICE_AQC_RES_TYPE_FLAG_SHARED : ICE_AQC_RES_TYPE_FLAG_DEDICATED)); status = ice_aq_alloc_free_res(hw, 1, sw_buf, buf_len, ice_aqc_opc_alloc_res, NULL); if (status) { ice_debug(hw, ICE_DBG_RES, "Failed to allocate %s RSS global LUT, status %d\n", shared_res ? "shared" : "dedicated", status); goto ice_alloc_global_lut_exit; } *global_lut_id = LE16_TO_CPU(sw_buf->elem[0].e.sw_resp); ice_alloc_global_lut_exit: ice_free(hw, sw_buf); return status; } /** * ice_free_rss_global_lut - free a RSS global LUT * @hw: pointer to the HW struct * @global_lut_id: ID of the RSS global LUT to free */ int ice_free_rss_global_lut(struct ice_hw *hw, u16 global_lut_id) { struct ice_aqc_alloc_free_res_elem *sw_buf; u16 buf_len, num_elems = 1; int status; buf_len = ice_struct_size(sw_buf, elem, num_elems); sw_buf = (struct ice_aqc_alloc_free_res_elem *)ice_malloc(hw, buf_len); if (!sw_buf) return ICE_ERR_NO_MEMORY; sw_buf->num_elems = CPU_TO_LE16(num_elems); sw_buf->res_type = CPU_TO_LE16(ICE_AQC_RES_TYPE_GLOBAL_RSS_HASH); sw_buf->elem[0].e.sw_resp = CPU_TO_LE16(global_lut_id); status = ice_aq_alloc_free_res(hw, num_elems, sw_buf, buf_len, ice_aqc_opc_free_res, NULL); if (status) ice_debug(hw, ICE_DBG_RES, "Failed to free RSS global LUT %d, status %d\n", global_lut_id, status); ice_free(hw, sw_buf); return status; } /** * ice_alloc_sw - allocate resources specific to switch * @hw: pointer to the HW struct * @ena_stats: true to turn on VEB stats * @shared_res: true for shared resource, false for dedicated resource * @sw_id: switch ID returned * @counter_id: VEB counter ID returned * * allocates switch resources (SWID and VEB counter) (0x0208) */ int ice_alloc_sw(struct ice_hw *hw, bool ena_stats, bool shared_res, u16 *sw_id, u16 *counter_id) { struct ice_aqc_alloc_free_res_elem *sw_buf; struct ice_aqc_res_elem *sw_ele; u16 buf_len; int status; buf_len = ice_struct_size(sw_buf, elem, 1); sw_buf = (struct ice_aqc_alloc_free_res_elem *)ice_malloc(hw, buf_len); if (!sw_buf) return ICE_ERR_NO_MEMORY; /* Prepare buffer for switch ID. * The number of resource entries in buffer is passed as 1 since only a * single switch/VEB instance is allocated, and hence a single sw_id * is requested. */ sw_buf->num_elems = CPU_TO_LE16(1); sw_buf->res_type = CPU_TO_LE16(ICE_AQC_RES_TYPE_SWID | (shared_res ? ICE_AQC_RES_TYPE_FLAG_SHARED : ICE_AQC_RES_TYPE_FLAG_DEDICATED)); status = ice_aq_alloc_free_res(hw, 1, sw_buf, buf_len, ice_aqc_opc_alloc_res, NULL); if (status) goto ice_alloc_sw_exit; sw_ele = &sw_buf->elem[0]; *sw_id = LE16_TO_CPU(sw_ele->e.sw_resp); if (ena_stats) { /* Prepare buffer for VEB Counter */ enum ice_adminq_opc opc = ice_aqc_opc_alloc_res; struct ice_aqc_alloc_free_res_elem *counter_buf; struct ice_aqc_res_elem *counter_ele; counter_buf = (struct ice_aqc_alloc_free_res_elem *) ice_malloc(hw, buf_len); if (!counter_buf) { status = ICE_ERR_NO_MEMORY; goto ice_alloc_sw_exit; } /* The number of resource entries in buffer is passed as 1 since * only a single switch/VEB instance is allocated, and hence a * single VEB counter is requested. */ counter_buf->num_elems = CPU_TO_LE16(1); counter_buf->res_type = CPU_TO_LE16(ICE_AQC_RES_TYPE_VEB_COUNTER | ICE_AQC_RES_TYPE_FLAG_DEDICATED); status = ice_aq_alloc_free_res(hw, 1, counter_buf, buf_len, opc, NULL); if (status) { ice_free(hw, counter_buf); goto ice_alloc_sw_exit; } counter_ele = &counter_buf->elem[0]; *counter_id = LE16_TO_CPU(counter_ele->e.sw_resp); ice_free(hw, counter_buf); } ice_alloc_sw_exit: ice_free(hw, sw_buf); return status; } /** * ice_free_sw - free resources specific to switch * @hw: pointer to the HW struct * @sw_id: switch ID returned * @counter_id: VEB counter ID returned * * free switch resources (SWID and VEB counter) (0x0209) * * NOTE: This function frees multiple resources. It continues * releasing other resources even after it encounters error. * The error code returned is the last error it encountered. */ int ice_free_sw(struct ice_hw *hw, u16 sw_id, u16 counter_id) { struct ice_aqc_alloc_free_res_elem *sw_buf, *counter_buf; int status, ret_status; u16 buf_len; buf_len = ice_struct_size(sw_buf, elem, 1); sw_buf = (struct ice_aqc_alloc_free_res_elem *)ice_malloc(hw, buf_len); if (!sw_buf) return ICE_ERR_NO_MEMORY; /* Prepare buffer to free for switch ID res. * The number of resource entries in buffer is passed as 1 since only a * single switch/VEB instance is freed, and hence a single sw_id * is released. */ sw_buf->num_elems = CPU_TO_LE16(1); sw_buf->res_type = CPU_TO_LE16(ICE_AQC_RES_TYPE_SWID); sw_buf->elem[0].e.sw_resp = CPU_TO_LE16(sw_id); ret_status = ice_aq_alloc_free_res(hw, 1, sw_buf, buf_len, ice_aqc_opc_free_res, NULL); if (ret_status) ice_debug(hw, ICE_DBG_SW, "CQ CMD Buffer:\n"); /* Prepare buffer to free for VEB Counter resource */ counter_buf = (struct ice_aqc_alloc_free_res_elem *) ice_malloc(hw, buf_len); if (!counter_buf) { ice_free(hw, sw_buf); return ICE_ERR_NO_MEMORY; } /* The number of resource entries in buffer is passed as 1 since only a * single switch/VEB instance is freed, and hence a single VEB counter * is released */ counter_buf->num_elems = CPU_TO_LE16(1); counter_buf->res_type = CPU_TO_LE16(ICE_AQC_RES_TYPE_VEB_COUNTER); counter_buf->elem[0].e.sw_resp = CPU_TO_LE16(counter_id); status = ice_aq_alloc_free_res(hw, 1, counter_buf, buf_len, ice_aqc_opc_free_res, NULL); if (status) { ice_debug(hw, ICE_DBG_SW, "VEB counter resource could not be freed\n"); ret_status = status; } ice_free(hw, counter_buf); ice_free(hw, sw_buf); return ret_status; } /** * ice_aq_add_vsi * @hw: pointer to the HW struct * @vsi_ctx: pointer to a VSI context struct * @cd: pointer to command details structure or NULL * * Add a VSI context to the hardware (0x0210) */ int ice_aq_add_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, struct ice_sq_cd *cd) { struct ice_aqc_add_update_free_vsi_resp *res; struct ice_aqc_add_get_update_free_vsi *cmd; struct ice_aq_desc desc; int status; cmd = &desc.params.vsi_cmd; res = &desc.params.add_update_free_vsi_res; ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_vsi); if (!vsi_ctx->alloc_from_pool) cmd->vsi_num = CPU_TO_LE16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID); cmd->vf_id = vsi_ctx->vf_num; cmd->vsi_flags = CPU_TO_LE16(vsi_ctx->flags); desc.flags |= CPU_TO_LE16(ICE_AQ_FLAG_RD); status = ice_aq_send_cmd(hw, &desc, &vsi_ctx->info, sizeof(vsi_ctx->info), cd); if (!status) { vsi_ctx->vsi_num = LE16_TO_CPU(res->vsi_num) & ICE_AQ_VSI_NUM_M; vsi_ctx->vsis_allocd = LE16_TO_CPU(res->vsi_used); vsi_ctx->vsis_unallocated = LE16_TO_CPU(res->vsi_free); } return status; } /** * ice_aq_free_vsi * @hw: pointer to the HW struct * @vsi_ctx: pointer to a VSI context struct * @keep_vsi_alloc: keep VSI allocation as part of this PF's resources * @cd: pointer to command details structure or NULL * * Free VSI context info from hardware (0x0213) */ int ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, bool keep_vsi_alloc, struct ice_sq_cd *cd) { struct ice_aqc_add_update_free_vsi_resp *resp; struct ice_aqc_add_get_update_free_vsi *cmd; struct ice_aq_desc desc; int status; cmd = &desc.params.vsi_cmd; resp = &desc.params.add_update_free_vsi_res; ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_free_vsi); cmd->vsi_num = CPU_TO_LE16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID); if (keep_vsi_alloc) cmd->cmd_flags = CPU_TO_LE16(ICE_AQ_VSI_KEEP_ALLOC); status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); if (!status) { vsi_ctx->vsis_allocd = LE16_TO_CPU(resp->vsi_used); vsi_ctx->vsis_unallocated = LE16_TO_CPU(resp->vsi_free); } return status; } /** * ice_aq_update_vsi * @hw: pointer to the HW struct * @vsi_ctx: pointer to a VSI context struct * @cd: pointer to command details structure or NULL * * Update VSI context in the hardware (0x0211) */ int ice_aq_update_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, struct ice_sq_cd *cd) { struct ice_aqc_add_update_free_vsi_resp *resp; struct ice_aqc_add_get_update_free_vsi *cmd; struct ice_aq_desc desc; int status; cmd = &desc.params.vsi_cmd; resp = &desc.params.add_update_free_vsi_res; ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_update_vsi); cmd->vsi_num = CPU_TO_LE16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID); desc.flags |= CPU_TO_LE16(ICE_AQ_FLAG_RD); status = ice_aq_send_cmd(hw, &desc, &vsi_ctx->info, sizeof(vsi_ctx->info), cd); if (!status) { vsi_ctx->vsis_allocd = LE16_TO_CPU(resp->vsi_used); vsi_ctx->vsis_unallocated = LE16_TO_CPU(resp->vsi_free); } return status; } /** * ice_is_vsi_valid - check whether the VSI is valid or not * @hw: pointer to the HW struct * @vsi_handle: VSI handle * * check whether the VSI is valid or not */ bool ice_is_vsi_valid(struct ice_hw *hw, u16 vsi_handle) { return vsi_handle < ICE_MAX_VSI && hw->vsi_ctx[vsi_handle]; } /** * ice_get_hw_vsi_num - return the HW VSI number * @hw: pointer to the HW struct * @vsi_handle: VSI handle * * return the HW VSI number * Caution: call this function only if VSI is valid (ice_is_vsi_valid) */ u16 ice_get_hw_vsi_num(struct ice_hw *hw, u16 vsi_handle) { return hw->vsi_ctx[vsi_handle]->vsi_num; } /** * ice_get_vsi_ctx - return the VSI context entry for a given VSI handle * @hw: pointer to the HW struct * @vsi_handle: VSI handle * * return the VSI context entry for a given VSI handle */ struct ice_vsi_ctx *ice_get_vsi_ctx(struct ice_hw *hw, u16 vsi_handle) { return (vsi_handle >= ICE_MAX_VSI) ? NULL : hw->vsi_ctx[vsi_handle]; } /** * ice_save_vsi_ctx - save the VSI context for a given VSI handle * @hw: pointer to the HW struct * @vsi_handle: VSI handle * @vsi: VSI context pointer * * save the VSI context entry for a given VSI handle */ static void ice_save_vsi_ctx(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi) { hw->vsi_ctx[vsi_handle] = vsi; } /** * ice_clear_vsi_q_ctx - clear VSI queue contexts for all TCs * @hw: pointer to the HW struct * @vsi_handle: VSI handle */ void ice_clear_vsi_q_ctx(struct ice_hw *hw, u16 vsi_handle) { struct ice_vsi_ctx *vsi; u8 i; vsi = ice_get_vsi_ctx(hw, vsi_handle); if (!vsi) return; ice_for_each_traffic_class(i) { if (vsi->lan_q_ctx[i]) { ice_free(hw, vsi->lan_q_ctx[i]); vsi->lan_q_ctx[i] = NULL; } if (vsi->rdma_q_ctx[i]) { ice_free(hw, vsi->rdma_q_ctx[i]); vsi->rdma_q_ctx[i] = NULL; } } } /** * ice_clear_vsi_ctx - clear the VSI context entry * @hw: pointer to the HW struct * @vsi_handle: VSI handle * * clear the VSI context entry */ static void ice_clear_vsi_ctx(struct ice_hw *hw, u16 vsi_handle) { struct ice_vsi_ctx *vsi; vsi = ice_get_vsi_ctx(hw, vsi_handle); if (vsi) { ice_clear_vsi_q_ctx(hw, vsi_handle); ice_free(hw, vsi); hw->vsi_ctx[vsi_handle] = NULL; } } /** * ice_clear_all_vsi_ctx - clear all the VSI context entries * @hw: pointer to the HW struct */ void ice_clear_all_vsi_ctx(struct ice_hw *hw) { u16 i; for (i = 0; i < ICE_MAX_VSI; i++) ice_clear_vsi_ctx(hw, i); } /** * ice_add_vsi - add VSI context to the hardware and VSI handle list * @hw: pointer to the HW struct * @vsi_handle: unique VSI handle provided by drivers * @vsi_ctx: pointer to a VSI context struct * @cd: pointer to command details structure or NULL * * Add a VSI context to the hardware also add it into the VSI handle list. * If this function gets called after reset for existing VSIs then update * with the new HW VSI number in the corresponding VSI handle list entry. */ int ice_add_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx, struct ice_sq_cd *cd) { struct ice_vsi_ctx *tmp_vsi_ctx; int status; if (vsi_handle >= ICE_MAX_VSI) return ICE_ERR_PARAM; status = ice_aq_add_vsi(hw, vsi_ctx, cd); if (status) return status; tmp_vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle); if (!tmp_vsi_ctx) { /* Create a new VSI context */ tmp_vsi_ctx = (struct ice_vsi_ctx *) ice_malloc(hw, sizeof(*tmp_vsi_ctx)); if (!tmp_vsi_ctx) { ice_aq_free_vsi(hw, vsi_ctx, false, cd); return ICE_ERR_NO_MEMORY; } *tmp_vsi_ctx = *vsi_ctx; ice_save_vsi_ctx(hw, vsi_handle, tmp_vsi_ctx); } else { /* update with new HW VSI num */ tmp_vsi_ctx->vsi_num = vsi_ctx->vsi_num; } return 0; } /** * ice_free_vsi- free VSI context from hardware and VSI handle list * @hw: pointer to the HW struct * @vsi_handle: unique VSI handle * @vsi_ctx: pointer to a VSI context struct * @keep_vsi_alloc: keep VSI allocation as part of this PF's resources * @cd: pointer to command details structure or NULL * * Free VSI context info from hardware as well as from VSI handle list */ int ice_free_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx, bool keep_vsi_alloc, struct ice_sq_cd *cd) { int status; if (!ice_is_vsi_valid(hw, vsi_handle)) return ICE_ERR_PARAM; vsi_ctx->vsi_num = ice_get_hw_vsi_num(hw, vsi_handle); status = ice_aq_free_vsi(hw, vsi_ctx, keep_vsi_alloc, cd); if (!status) ice_clear_vsi_ctx(hw, vsi_handle); return status; } /** * ice_update_vsi * @hw: pointer to the HW struct * @vsi_handle: unique VSI handle * @vsi_ctx: pointer to a VSI context struct * @cd: pointer to command details structure or NULL * * Update VSI context in the hardware */ int ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx, struct ice_sq_cd *cd) { if (!ice_is_vsi_valid(hw, vsi_handle)) return ICE_ERR_PARAM; vsi_ctx->vsi_num = ice_get_hw_vsi_num(hw, vsi_handle); return ice_aq_update_vsi(hw, vsi_ctx, cd); } /** * ice_cfg_iwarp_fltr - enable/disable iWARP filtering on VSI * @hw: pointer to HW struct * @vsi_handle: VSI SW index * @enable: boolean for enable/disable */ int ice_cfg_iwarp_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable) { struct ice_vsi_ctx *ctx, *cached_ctx; int status; cached_ctx = ice_get_vsi_ctx(hw, vsi_handle); if (!cached_ctx) return ICE_ERR_DOES_NOT_EXIST; ctx = (struct ice_vsi_ctx *)ice_calloc(hw, 1, sizeof(*ctx)); if (!ctx) return ICE_ERR_NO_MEMORY; ctx->info.q_opt_rss = cached_ctx->info.q_opt_rss; ctx->info.q_opt_tc = cached_ctx->info.q_opt_tc; ctx->info.q_opt_flags = cached_ctx->info.q_opt_flags; ctx->info.valid_sections = CPU_TO_LE16(ICE_AQ_VSI_PROP_Q_OPT_VALID); if (enable) ctx->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN; else ctx->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN; status = ice_update_vsi(hw, vsi_handle, ctx, NULL); if (!status) { cached_ctx->info.q_opt_flags = ctx->info.q_opt_flags; cached_ctx->info.valid_sections |= ctx->info.valid_sections; } ice_free(hw, ctx); return status; } /** * ice_aq_get_vsi_params * @hw: pointer to the HW struct * @vsi_ctx: pointer to a VSI context struct * @cd: pointer to command details structure or NULL * * Get VSI context info from hardware (0x0212) */ int ice_aq_get_vsi_params(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx, struct ice_sq_cd *cd) { struct ice_aqc_add_get_update_free_vsi *cmd; struct ice_aqc_get_vsi_resp *resp; struct ice_aq_desc desc; int status; cmd = &desc.params.vsi_cmd; resp = &desc.params.get_vsi_resp; ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_vsi_params); cmd->vsi_num = CPU_TO_LE16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID); status = ice_aq_send_cmd(hw, &desc, &vsi_ctx->info, sizeof(vsi_ctx->info), cd); if (!status) { vsi_ctx->vsi_num = LE16_TO_CPU(resp->vsi_num) & ICE_AQ_VSI_NUM_M; vsi_ctx->vf_num = resp->vf_id; vsi_ctx->vsis_allocd = LE16_TO_CPU(resp->vsi_used); vsi_ctx->vsis_unallocated = LE16_TO_CPU(resp->vsi_free); } return status; } /** * ice_aq_add_update_mir_rule - add/update a mirror rule * @hw: pointer to the HW struct * @rule_type: Rule Type * @dest_vsi: VSI number to which packets will be mirrored * @count: length of the list * @mr_buf: buffer for list of mirrored VSI numbers * @cd: pointer to command details structure or NULL * @rule_id: Rule ID * * Add/Update Mirror Rule (0x260). */ int ice_aq_add_update_mir_rule(struct ice_hw *hw, u16 rule_type, u16 dest_vsi, u16 count, struct ice_mir_rule_buf *mr_buf, struct ice_sq_cd *cd, u16 *rule_id) { struct ice_aqc_add_update_mir_rule *cmd; struct ice_aq_desc desc; __le16 *mr_list = NULL; u16 buf_size = 0; int status; switch (rule_type) { case ICE_AQC_RULE_TYPE_VPORT_INGRESS: case ICE_AQC_RULE_TYPE_VPORT_EGRESS: /* Make sure count and mr_buf are set for these rule_types */ if (!(count && mr_buf)) return ICE_ERR_PARAM; buf_size = count * sizeof(__le16); mr_list = (_FORCE_ __le16 *)ice_malloc(hw, buf_size); if (!mr_list) return ICE_ERR_NO_MEMORY; break; case ICE_AQC_RULE_TYPE_PPORT_INGRESS: case ICE_AQC_RULE_TYPE_PPORT_EGRESS: /* Make sure count and mr_buf are not set for these * rule_types */ if (count || mr_buf) return ICE_ERR_PARAM; break; default: ice_debug(hw, ICE_DBG_SW, "Error due to unsupported rule_type %u\n", rule_type); return ICE_ERR_OUT_OF_RANGE; } ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_update_mir_rule); /* Pre-process 'mr_buf' items for add/update of virtual port * ingress/egress mirroring (but not physical port ingress/egress * mirroring) */ if (mr_buf) { int i; for (i = 0; i < count; i++) { u16 id; id = mr_buf[i].vsi_idx & ICE_AQC_RULE_MIRRORED_VSI_M; /* Validate specified VSI number, make sure it is less * than ICE_MAX_VSI, if not return with error. */ if (id >= ICE_MAX_VSI) { ice_debug(hw, ICE_DBG_SW, "Error VSI index (%u) out-of-range\n", id); ice_free(hw, mr_list); return ICE_ERR_OUT_OF_RANGE; } /* add VSI to mirror rule */ if (mr_buf[i].add) mr_list[i] = CPU_TO_LE16(id | ICE_AQC_RULE_ACT_M); else /* remove VSI from mirror rule */ mr_list[i] = CPU_TO_LE16(id); } desc.flags |= CPU_TO_LE16(ICE_AQ_FLAG_RD); } cmd = &desc.params.add_update_rule; if ((*rule_id) != ICE_INVAL_MIRROR_RULE_ID) cmd->rule_id = CPU_TO_LE16(((*rule_id) & ICE_AQC_RULE_ID_M) | ICE_AQC_RULE_ID_VALID_M); cmd->rule_type = CPU_TO_LE16(rule_type & ICE_AQC_RULE_TYPE_M); cmd->num_entries = CPU_TO_LE16(count); cmd->dest = CPU_TO_LE16(dest_vsi); status = ice_aq_send_cmd(hw, &desc, mr_list, buf_size, cd); if (!status) *rule_id = LE16_TO_CPU(cmd->rule_id) & ICE_AQC_RULE_ID_M; ice_free(hw, mr_list); return status; } /** * ice_aq_delete_mir_rule - delete a mirror rule * @hw: pointer to the HW struct * @rule_id: Mirror rule ID (to be deleted) * @keep_allocd: if set, the VSI stays part of the PF allocated res, * otherwise it is returned to the shared pool * @cd: pointer to command details structure or NULL * * Delete Mirror Rule (0x261). */ int ice_aq_delete_mir_rule(struct ice_hw *hw, u16 rule_id, bool keep_allocd, struct ice_sq_cd *cd) { struct ice_aqc_delete_mir_rule *cmd; struct ice_aq_desc desc; /* rule_id should be in the range 0...63 */ if (rule_id >= ICE_MAX_NUM_MIRROR_RULES) return ICE_ERR_OUT_OF_RANGE; ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_del_mir_rule); cmd = &desc.params.del_rule; rule_id |= ICE_AQC_RULE_ID_VALID_M; cmd->rule_id = CPU_TO_LE16(rule_id); if (keep_allocd) cmd->flags = CPU_TO_LE16(ICE_AQC_FLAG_KEEP_ALLOCD_M); return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); } /** * ice_aq_alloc_free_vsi_list * @hw: pointer to the HW struct * @vsi_list_id: VSI list ID returned or used for lookup * @lkup_type: switch rule filter lookup type * @opc: switch rules population command type - pass in the command opcode * * allocates or free a VSI list resource */ static int ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id, enum ice_sw_lkup_type lkup_type, enum ice_adminq_opc opc) { struct ice_aqc_alloc_free_res_elem *sw_buf; struct ice_aqc_res_elem *vsi_ele; u16 buf_len; int status; buf_len = ice_struct_size(sw_buf, elem, 1); sw_buf = (struct ice_aqc_alloc_free_res_elem *)ice_malloc(hw, buf_len); if (!sw_buf) return ICE_ERR_NO_MEMORY; sw_buf->num_elems = CPU_TO_LE16(1); if (lkup_type == ICE_SW_LKUP_MAC || lkup_type == ICE_SW_LKUP_MAC_VLAN || lkup_type == ICE_SW_LKUP_ETHERTYPE || lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC || lkup_type == ICE_SW_LKUP_PROMISC || lkup_type == ICE_SW_LKUP_PROMISC_VLAN || lkup_type == ICE_SW_LKUP_DFLT || lkup_type == ICE_SW_LKUP_LAST) { sw_buf->res_type = CPU_TO_LE16(ICE_AQC_RES_TYPE_VSI_LIST_REP); } else if (lkup_type == ICE_SW_LKUP_VLAN) { sw_buf->res_type = CPU_TO_LE16(ICE_AQC_RES_TYPE_VSI_LIST_PRUNE); } else { status = ICE_ERR_PARAM; goto ice_aq_alloc_free_vsi_list_exit; } if (opc == ice_aqc_opc_free_res) sw_buf->elem[0].e.sw_resp = CPU_TO_LE16(*vsi_list_id); status = ice_aq_alloc_free_res(hw, 1, sw_buf, buf_len, opc, NULL); if (status) goto ice_aq_alloc_free_vsi_list_exit; if (opc == ice_aqc_opc_alloc_res) { vsi_ele = &sw_buf->elem[0]; *vsi_list_id = LE16_TO_CPU(vsi_ele->e.sw_resp); } ice_aq_alloc_free_vsi_list_exit: ice_free(hw, sw_buf); return status; } /** * ice_aq_set_storm_ctrl - Sets storm control configuration * @hw: pointer to the HW struct * @bcast_thresh: represents the upper threshold for broadcast storm control * @mcast_thresh: represents the upper threshold for multicast storm control * @ctl_bitmask: storm control knobs * * Sets the storm control configuration (0x0280) */ int ice_aq_set_storm_ctrl(struct ice_hw *hw, u32 bcast_thresh, u32 mcast_thresh, u32 ctl_bitmask) { struct ice_aqc_storm_cfg *cmd; struct ice_aq_desc desc; cmd = &desc.params.storm_conf; ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_storm_cfg); cmd->bcast_thresh_size = CPU_TO_LE32(bcast_thresh & ICE_AQ_THRESHOLD_M); cmd->mcast_thresh_size = CPU_TO_LE32(mcast_thresh & ICE_AQ_THRESHOLD_M); cmd->storm_ctrl_ctrl = CPU_TO_LE32(ctl_bitmask); return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); } /** * ice_aq_get_storm_ctrl - gets storm control configuration * @hw: pointer to the HW struct * @bcast_thresh: represents the upper threshold for broadcast storm control * @mcast_thresh: represents the upper threshold for multicast storm control * @ctl_bitmask: storm control knobs * * Gets the storm control configuration (0x0281) */ int ice_aq_get_storm_ctrl(struct ice_hw *hw, u32 *bcast_thresh, u32 *mcast_thresh, u32 *ctl_bitmask) { struct ice_aq_desc desc; int status; ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_storm_cfg); status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); if (!status) { struct ice_aqc_storm_cfg *resp = &desc.params.storm_conf; if (bcast_thresh) *bcast_thresh = LE32_TO_CPU(resp->bcast_thresh_size) & ICE_AQ_THRESHOLD_M; if (mcast_thresh) *mcast_thresh = LE32_TO_CPU(resp->mcast_thresh_size) & ICE_AQ_THRESHOLD_M; if (ctl_bitmask) *ctl_bitmask = LE32_TO_CPU(resp->storm_ctrl_ctrl); } return status; } /** * ice_aq_sw_rules - add/update/remove switch rules * @hw: pointer to the HW struct * @rule_list: pointer to switch rule population list * @rule_list_sz: total size of the rule list in bytes * @num_rules: number of switch rules in the rule_list * @opc: switch rules population command type - pass in the command opcode * @cd: pointer to command details structure or NULL * * Add(0x02a0)/Update(0x02a1)/Remove(0x02a2) switch rules commands to firmware */ int ice_aq_sw_rules(struct ice_hw *hw, void *rule_list, u16 rule_list_sz, u8 num_rules, enum ice_adminq_opc opc, struct ice_sq_cd *cd) { struct ice_aq_desc desc; int status; ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__); if (opc != ice_aqc_opc_add_sw_rules && opc != ice_aqc_opc_update_sw_rules && opc != ice_aqc_opc_remove_sw_rules) return ICE_ERR_PARAM; ice_fill_dflt_direct_cmd_desc(&desc, opc); desc.flags |= CPU_TO_LE16(ICE_AQ_FLAG_RD); desc.params.sw_rules.num_rules_fltr_entry_index = CPU_TO_LE16(num_rules); status = ice_aq_send_cmd(hw, &desc, rule_list, rule_list_sz, cd); if (opc != ice_aqc_opc_add_sw_rules && hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) status = ICE_ERR_DOES_NOT_EXIST; return status; } /* ice_init_port_info - Initialize port_info with switch configuration data * @pi: pointer to port_info * @vsi_port_num: VSI number or port number * @type: Type of switch element (port or VSI) * @swid: switch ID of the switch the element is attached to * @pf_vf_num: PF or VF number * @is_vf: true if the element is a VF, false otherwise */ static void ice_init_port_info(struct ice_port_info *pi, u16 vsi_port_num, u8 type, u16 swid, u16 pf_vf_num, bool is_vf) { switch (type) { case ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT: pi->lport = (u8)(vsi_port_num & ICE_LPORT_MASK); pi->sw_id = swid; pi->pf_vf_num = pf_vf_num; pi->is_vf = is_vf; break; default: ice_debug(pi->hw, ICE_DBG_SW, "incorrect VSI/port type received\n"); break; } } /* ice_get_initial_sw_cfg - Get initial port and default VSI data * @hw: pointer to the hardware structure */ int ice_get_initial_sw_cfg(struct ice_hw *hw) { struct ice_aqc_get_sw_cfg_resp_elem *rbuf; u8 num_total_ports; u16 req_desc = 0; u16 num_elems; int status; u8 j = 0; u16 i; num_total_ports = 1; rbuf = (struct ice_aqc_get_sw_cfg_resp_elem *) ice_malloc(hw, ICE_SW_CFG_MAX_BUF_LEN); if (!rbuf) return ICE_ERR_NO_MEMORY; /* Multiple calls to ice_aq_get_sw_cfg may be required * to get all the switch configuration information. The need * for additional calls is indicated by ice_aq_get_sw_cfg * writing a non-zero value in req_desc */ do { struct ice_aqc_get_sw_cfg_resp_elem *ele; status = ice_aq_get_sw_cfg(hw, rbuf, ICE_SW_CFG_MAX_BUF_LEN, &req_desc, &num_elems, NULL); if (status) break; for (i = 0, ele = rbuf; i < num_elems; i++, ele++) { u16 pf_vf_num, swid, vsi_port_num; bool is_vf = false; u8 res_type; vsi_port_num = LE16_TO_CPU(ele->vsi_port_num) & ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_M; pf_vf_num = LE16_TO_CPU(ele->pf_vf_num) & ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_M; swid = LE16_TO_CPU(ele->swid); if (LE16_TO_CPU(ele->pf_vf_num) & ICE_AQC_GET_SW_CONF_RESP_IS_VF) is_vf = true; res_type = (u8)(LE16_TO_CPU(ele->vsi_port_num) >> ICE_AQC_GET_SW_CONF_RESP_TYPE_S); switch (res_type) { case ICE_AQC_GET_SW_CONF_RESP_VSI: if (hw->fw_vsi_num != ICE_DFLT_VSI_INVAL) ice_debug(hw, ICE_DBG_SW, "fw_vsi_num %d -> %d\n", hw->fw_vsi_num, vsi_port_num); hw->fw_vsi_num = vsi_port_num; break; case ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT: case ICE_AQC_GET_SW_CONF_RESP_VIRT_PORT: if (j == num_total_ports) { ice_debug(hw, ICE_DBG_SW, "more ports than expected\n"); status = ICE_ERR_CFG; goto out; } ice_init_port_info(hw->port_info, vsi_port_num, res_type, swid, pf_vf_num, is_vf); j++; break; default: break; } } } while (req_desc && !status); out: ice_free(hw, rbuf); return status; } /** * ice_fill_sw_info - Helper function to populate lb_en and lan_en * @hw: pointer to the hardware structure * @fi: filter info structure to fill/update * * This helper function populates the lb_en and lan_en elements of the provided * ice_fltr_info struct using the switch's type and characteristics of the * switch rule being configured. */ static void ice_fill_sw_info(struct ice_hw *hw, struct ice_fltr_info *fi) { fi->lb_en = false; fi->lan_en = false; if ((fi->flag & ICE_FLTR_TX) && (fi->fltr_act == ICE_FWD_TO_VSI || fi->fltr_act == ICE_FWD_TO_VSI_LIST || fi->fltr_act == ICE_FWD_TO_Q || fi->fltr_act == ICE_FWD_TO_QGRP)) { /* Setting LB for prune actions will result in replicated * packets to the internal switch that will be dropped. */ if (fi->lkup_type != ICE_SW_LKUP_VLAN) fi->lb_en = true; /* Set lan_en to TRUE if * 1. The switch is a VEB AND * 2 * 2.1 The lookup is a directional lookup like ethertype, * promiscuous, ethertype-MAC, promiscuous-VLAN * and default-port OR * 2.2 The lookup is VLAN, OR * 2.3 The lookup is MAC with mcast or bcast addr for MAC, OR * 2.4 The lookup is MAC_VLAN with mcast or bcast addr for MAC. * * OR * * The switch is a VEPA. * * In all other cases, the LAN enable has to be set to false. */ if (hw->evb_veb) { if (fi->lkup_type == ICE_SW_LKUP_ETHERTYPE || fi->lkup_type == ICE_SW_LKUP_PROMISC || fi->lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC || fi->lkup_type == ICE_SW_LKUP_PROMISC_VLAN || fi->lkup_type == ICE_SW_LKUP_DFLT || fi->lkup_type == ICE_SW_LKUP_VLAN || (fi->lkup_type == ICE_SW_LKUP_MAC && !IS_UNICAST_ETHER_ADDR(fi->l_data.mac.mac_addr)) || (fi->lkup_type == ICE_SW_LKUP_MAC_VLAN && !IS_UNICAST_ETHER_ADDR(fi->l_data.mac.mac_addr))) { if (!fi->fltVeb_en) fi->lan_en = true; } } else { fi->lan_en = true; } } /* To be able to receive packets coming from the VF on the same PF, * unicast filter needs to be added without LB_EN bit */ if (fi->flag & ICE_FLTR_RX_LB) { fi->lb_en = false; fi->lan_en = true; } } /** * ice_fill_sw_rule - Helper function to fill switch rule structure * @hw: pointer to the hardware structure * @f_info: entry containing packet forwarding information * @s_rule: switch rule structure to be filled in based on mac_entry * @opc: switch rules population command type - pass in the command opcode */ static void ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info, struct ice_sw_rule_lkup_rx_tx *s_rule, enum ice_adminq_opc opc) { u16 vlan_id = ICE_MAX_VLAN_ID + 1; u16 vlan_tpid = ICE_ETH_P_8021Q; void *daddr = NULL; u16 eth_hdr_sz; u8 *eth_hdr; u32 act = 0; __be16 *off; u8 q_rgn; if (opc == ice_aqc_opc_remove_sw_rules) { s_rule->act = 0; s_rule->index = CPU_TO_LE16(f_info->fltr_rule_id); s_rule->hdr_len = 0; return; } eth_hdr_sz = sizeof(dummy_eth_header); eth_hdr = s_rule->hdr_data; /* initialize the ether header with a dummy header */ ice_memcpy(eth_hdr, dummy_eth_header, eth_hdr_sz, ICE_NONDMA_TO_NONDMA); ice_fill_sw_info(hw, f_info); switch (f_info->fltr_act) { case ICE_FWD_TO_VSI: act |= (f_info->fwd_id.hw_vsi_id << ICE_SINGLE_ACT_VSI_ID_S) & ICE_SINGLE_ACT_VSI_ID_M; if (f_info->lkup_type != ICE_SW_LKUP_VLAN) act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT; break; case ICE_FWD_TO_VSI_LIST: act |= ICE_SINGLE_ACT_VSI_LIST; act |= (f_info->fwd_id.vsi_list_id << ICE_SINGLE_ACT_VSI_LIST_ID_S) & ICE_SINGLE_ACT_VSI_LIST_ID_M; if (f_info->lkup_type != ICE_SW_LKUP_VLAN) act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT; break; case ICE_FWD_TO_Q: act |= ICE_SINGLE_ACT_TO_Q; act |= (f_info->fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) & ICE_SINGLE_ACT_Q_INDEX_M; break; case ICE_DROP_PACKET: act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_DROP | ICE_SINGLE_ACT_VALID_BIT; break; case ICE_FWD_TO_QGRP: q_rgn = f_info->qgrp_size > 0 ? (u8)ice_ilog2(f_info->qgrp_size) : 0; act |= ICE_SINGLE_ACT_TO_Q; act |= (f_info->fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) & ICE_SINGLE_ACT_Q_INDEX_M; act |= (q_rgn << ICE_SINGLE_ACT_Q_REGION_S) & ICE_SINGLE_ACT_Q_REGION_M; break; default: return; } if (f_info->lb_en) act |= ICE_SINGLE_ACT_LB_ENABLE; if (f_info->lan_en) act |= ICE_SINGLE_ACT_LAN_ENABLE; switch (f_info->lkup_type) { case ICE_SW_LKUP_MAC: daddr = f_info->l_data.mac.mac_addr; break; case ICE_SW_LKUP_VLAN: vlan_id = f_info->l_data.vlan.vlan_id; if (f_info->l_data.vlan.tpid_valid) vlan_tpid = f_info->l_data.vlan.tpid; if (f_info->fltr_act == ICE_FWD_TO_VSI || f_info->fltr_act == ICE_FWD_TO_VSI_LIST) { act |= ICE_SINGLE_ACT_PRUNE; act |= ICE_SINGLE_ACT_EGRESS | ICE_SINGLE_ACT_INGRESS; } break; case ICE_SW_LKUP_ETHERTYPE_MAC: daddr = f_info->l_data.ethertype_mac.mac_addr; /* fall-through */ case ICE_SW_LKUP_ETHERTYPE: off = (_FORCE_ __be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET); *off = CPU_TO_BE16(f_info->l_data.ethertype_mac.ethertype); break; case ICE_SW_LKUP_MAC_VLAN: daddr = f_info->l_data.mac_vlan.mac_addr; vlan_id = f_info->l_data.mac_vlan.vlan_id; break; case ICE_SW_LKUP_PROMISC_VLAN: vlan_id = f_info->l_data.mac_vlan.vlan_id; /* fall-through */ case ICE_SW_LKUP_PROMISC: daddr = f_info->l_data.mac_vlan.mac_addr; break; default: break; } s_rule->hdr.type = (f_info->flag & ICE_FLTR_RX) ? CPU_TO_LE16(ICE_AQC_SW_RULES_T_LKUP_RX) : CPU_TO_LE16(ICE_AQC_SW_RULES_T_LKUP_TX); /* Recipe set depending on lookup type */ s_rule->recipe_id = CPU_TO_LE16(f_info->lkup_type); s_rule->src = CPU_TO_LE16(f_info->src); s_rule->act = CPU_TO_LE32(act); if (daddr) ice_memcpy(eth_hdr + ICE_ETH_DA_OFFSET, daddr, ETH_ALEN, ICE_NONDMA_TO_NONDMA); if (!(vlan_id > ICE_MAX_VLAN_ID)) { off = (_FORCE_ __be16 *)(eth_hdr + ICE_ETH_VLAN_TCI_OFFSET); *off = CPU_TO_BE16(vlan_id); off = (_FORCE_ __be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET); *off = CPU_TO_BE16(vlan_tpid); } /* Create the switch rule with the final dummy Ethernet header */ if (opc != ice_aqc_opc_update_sw_rules) s_rule->hdr_len = CPU_TO_LE16(eth_hdr_sz); } /** * ice_add_marker_act * @hw: pointer to the hardware structure * @m_ent: the management entry for which sw marker needs to be added * @sw_marker: sw marker to tag the Rx descriptor with * @l_id: large action resource ID * * Create a large action to hold software marker and update the switch rule * entry pointed by m_ent with newly created large action */ static int ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent, u16 sw_marker, u16 l_id) { struct ice_sw_rule_lkup_rx_tx *rx_tx; struct ice_sw_rule_lg_act *lg_act; /* For software marker we need 3 large actions * 1. FWD action: FWD TO VSI or VSI LIST * 2. GENERIC VALUE action to hold the profile ID * 3. GENERIC VALUE action to hold the software marker ID */ const u16 num_lg_acts = 3; u16 lg_act_size; u16 rules_size; int status; u32 act; u16 id; if (m_ent->fltr_info.lkup_type != ICE_SW_LKUP_MAC) return ICE_ERR_PARAM; /* Create two back-to-back switch rules and submit them to the HW using * one memory buffer: * 1. Large Action * 2. Look up Tx Rx */ lg_act_size = (u16)ice_struct_size(lg_act, act, num_lg_acts); rules_size = lg_act_size + ice_struct_size(rx_tx, hdr_data, DUMMY_ETH_HDR_LEN); lg_act = (struct ice_sw_rule_lg_act *)ice_malloc(hw, rules_size); if (!lg_act) return ICE_ERR_NO_MEMORY; rx_tx = (struct ice_sw_rule_lkup_rx_tx *)((u8 *)lg_act + lg_act_size); /* Fill in the first switch rule i.e. large action */ lg_act->hdr.type = CPU_TO_LE16(ICE_AQC_SW_RULES_T_LG_ACT); lg_act->index = CPU_TO_LE16(l_id); lg_act->size = CPU_TO_LE16(num_lg_acts); /* First action VSI forwarding or VSI list forwarding depending on how * many VSIs */ id = (m_ent->vsi_count > 1) ? m_ent->fltr_info.fwd_id.vsi_list_id : m_ent->fltr_info.fwd_id.hw_vsi_id; act = ICE_LG_ACT_VSI_FORWARDING | ICE_LG_ACT_VALID_BIT; act |= (id << ICE_LG_ACT_VSI_LIST_ID_S) & ICE_LG_ACT_VSI_LIST_ID_M; if (m_ent->vsi_count > 1) act |= ICE_LG_ACT_VSI_LIST; lg_act->act[0] = CPU_TO_LE32(act); /* Second action descriptor type */ act = ICE_LG_ACT_GENERIC; act |= (1 << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M; lg_act->act[1] = CPU_TO_LE32(act); act = (ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX << ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_OFFSET_M; /* Third action Marker value */ act |= ICE_LG_ACT_GENERIC; act |= (sw_marker << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M; lg_act->act[2] = CPU_TO_LE32(act); /* call the fill switch rule to fill the lookup Tx Rx structure */ ice_fill_sw_rule(hw, &m_ent->fltr_info, rx_tx, ice_aqc_opc_update_sw_rules); /* Update the action to point to the large action ID */ rx_tx->act = CPU_TO_LE32(ICE_SINGLE_ACT_PTR | ((l_id << ICE_SINGLE_ACT_PTR_VAL_S) & ICE_SINGLE_ACT_PTR_VAL_M)); /* Use the filter rule ID of the previously created rule with single * act. Once the update happens, hardware will treat this as large * action */ rx_tx->index = CPU_TO_LE16(m_ent->fltr_info.fltr_rule_id); status = ice_aq_sw_rules(hw, lg_act, rules_size, 2, ice_aqc_opc_update_sw_rules, NULL); if (!status) { m_ent->lg_act_idx = l_id; m_ent->sw_marker_id = sw_marker; } ice_free(hw, lg_act); return status; } /** * ice_add_counter_act - add/update filter rule with counter action * @hw: pointer to the hardware structure * @m_ent: the management entry for which counter needs to be added * @counter_id: VLAN counter ID returned as part of allocate resource * @l_id: large action resource ID */ static int ice_add_counter_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent, u16 counter_id, u16 l_id) { struct ice_sw_rule_lkup_rx_tx *rx_tx; struct ice_sw_rule_lg_act *lg_act; /* 2 actions will be added while adding a large action counter */ const int num_acts = 2; u16 lg_act_size; u16 rules_size; u16 f_rule_id; u32 act; int status; u16 id; if (m_ent->fltr_info.lkup_type != ICE_SW_LKUP_MAC) return ICE_ERR_PARAM; /* Create two back-to-back switch rules and submit them to the HW using * one memory buffer: * 1. Large Action * 2. Look up Tx Rx */ lg_act_size = (u16)ice_struct_size(lg_act, act, num_acts); rules_size = lg_act_size + ice_struct_size(rx_tx, hdr_data, DUMMY_ETH_HDR_LEN); lg_act = (struct ice_sw_rule_lg_act *)ice_malloc(hw, rules_size); if (!lg_act) return ICE_ERR_NO_MEMORY; rx_tx = (struct ice_sw_rule_lkup_rx_tx *)((u8 *)lg_act + lg_act_size); /* Fill in the first switch rule i.e. large action */ lg_act->hdr.type = CPU_TO_LE16(ICE_AQC_SW_RULES_T_LG_ACT); lg_act->index = CPU_TO_LE16(l_id); lg_act->size = CPU_TO_LE16(num_acts); /* First action VSI forwarding or VSI list forwarding depending on how * many VSIs */ id = (m_ent->vsi_count > 1) ? m_ent->fltr_info.fwd_id.vsi_list_id : m_ent->fltr_info.fwd_id.hw_vsi_id; act = ICE_LG_ACT_VSI_FORWARDING | ICE_LG_ACT_VALID_BIT; act |= (id << ICE_LG_ACT_VSI_LIST_ID_S) & ICE_LG_ACT_VSI_LIST_ID_M; if (m_ent->vsi_count > 1) act |= ICE_LG_ACT_VSI_LIST; lg_act->act[0] = CPU_TO_LE32(act); /* Second action counter ID */ act = ICE_LG_ACT_STAT_COUNT; act |= (counter_id << ICE_LG_ACT_STAT_COUNT_S) & ICE_LG_ACT_STAT_COUNT_M; lg_act->act[1] = CPU_TO_LE32(act); /* call the fill switch rule to fill the lookup Tx Rx structure */ ice_fill_sw_rule(hw, &m_ent->fltr_info, rx_tx, ice_aqc_opc_update_sw_rules); act = ICE_SINGLE_ACT_PTR; act |= (l_id << ICE_SINGLE_ACT_PTR_VAL_S) & ICE_SINGLE_ACT_PTR_VAL_M; rx_tx->act = CPU_TO_LE32(act); /* Use the filter rule ID of the previously created rule with single * act. Once the update happens, hardware will treat this as large * action */ f_rule_id = m_ent->fltr_info.fltr_rule_id; rx_tx->index = CPU_TO_LE16(f_rule_id); status = ice_aq_sw_rules(hw, lg_act, rules_size, 2, ice_aqc_opc_update_sw_rules, NULL); if (!status) { m_ent->lg_act_idx = l_id; m_ent->counter_index = (u8)counter_id; } ice_free(hw, lg_act); return status; } /** * ice_create_vsi_list_map * @hw: pointer to the hardware structure * @vsi_handle_arr: array of VSI handles to set in the VSI mapping * @num_vsi: number of VSI handles in the array * @vsi_list_id: VSI list ID generated as part of allocate resource * * Helper function to create a new entry of VSI list ID to VSI mapping * using the given VSI list ID */ static struct ice_vsi_list_map_info * ice_create_vsi_list_map(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi, u16 vsi_list_id) { struct ice_switch_info *sw = hw->switch_info; struct ice_vsi_list_map_info *v_map; int i; v_map = (struct ice_vsi_list_map_info *)ice_malloc(hw, sizeof(*v_map)); if (!v_map) return NULL; v_map->vsi_list_id = vsi_list_id; v_map->ref_cnt = 1; for (i = 0; i < num_vsi; i++) ice_set_bit(vsi_handle_arr[i], v_map->vsi_map); LIST_ADD(&v_map->list_entry, &sw->vsi_list_map_head); return v_map; } /** * ice_update_vsi_list_rule * @hw: pointer to the hardware structure * @vsi_handle_arr: array of VSI handles to form a VSI list * @num_vsi: number of VSI handles in the array * @vsi_list_id: VSI list ID generated as part of allocate resource * @remove: Boolean value to indicate if this is a remove action * @opc: switch rules population command type - pass in the command opcode * @lkup_type: lookup type of the filter * * Call AQ command to add a new switch rule or update existing switch rule * using the given VSI list ID */ static int ice_update_vsi_list_rule(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi, u16 vsi_list_id, bool remove, enum ice_adminq_opc opc, enum ice_sw_lkup_type lkup_type) { struct ice_sw_rule_vsi_list *s_rule; u16 s_rule_size; u16 rule_type; int status; int i; if (!num_vsi) return ICE_ERR_PARAM; if (lkup_type == ICE_SW_LKUP_MAC || lkup_type == ICE_SW_LKUP_MAC_VLAN || lkup_type == ICE_SW_LKUP_ETHERTYPE || lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC || lkup_type == ICE_SW_LKUP_PROMISC || lkup_type == ICE_SW_LKUP_PROMISC_VLAN || lkup_type == ICE_SW_LKUP_DFLT || lkup_type == ICE_SW_LKUP_LAST) rule_type = remove ? ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR : ICE_AQC_SW_RULES_T_VSI_LIST_SET; else if (lkup_type == ICE_SW_LKUP_VLAN) rule_type = remove ? ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR : ICE_AQC_SW_RULES_T_PRUNE_LIST_SET; else return ICE_ERR_PARAM; s_rule_size = (u16)ice_struct_size(s_rule, vsi, num_vsi); s_rule = (struct ice_sw_rule_vsi_list *)ice_malloc(hw, s_rule_size); if (!s_rule) return ICE_ERR_NO_MEMORY; for (i = 0; i < num_vsi; i++) { if (!ice_is_vsi_valid(hw, vsi_handle_arr[i])) { status = ICE_ERR_PARAM; goto exit; } /* AQ call requires hw_vsi_id(s) */ s_rule->vsi[i] = CPU_TO_LE16(ice_get_hw_vsi_num(hw, vsi_handle_arr[i])); } s_rule->hdr.type = CPU_TO_LE16(rule_type); s_rule->number_vsi = CPU_TO_LE16(num_vsi); s_rule->index = CPU_TO_LE16(vsi_list_id); status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1, opc, NULL); exit: ice_free(hw, s_rule); return status; } /** * ice_create_vsi_list_rule - Creates and populates a VSI list rule * @hw: pointer to the HW struct * @vsi_handle_arr: array of VSI handles to form a VSI list * @num_vsi: number of VSI handles in the array * @vsi_list_id: stores the ID of the VSI list to be created * @lkup_type: switch rule filter's lookup type */ static int ice_create_vsi_list_rule(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi, u16 *vsi_list_id, enum ice_sw_lkup_type lkup_type) { int status; status = ice_aq_alloc_free_vsi_list(hw, vsi_list_id, lkup_type, ice_aqc_opc_alloc_res); if (status) return status; /* Update the newly created VSI list to include the specified VSIs */ return ice_update_vsi_list_rule(hw, vsi_handle_arr, num_vsi, *vsi_list_id, false, ice_aqc_opc_add_sw_rules, lkup_type); } /** * ice_create_pkt_fwd_rule * @hw: pointer to the hardware structure * @recp_list: corresponding filter management list * @f_entry: entry containing packet forwarding information * * Create switch rule with given filter information and add an entry * to the corresponding filter management list to track this switch rule * and VSI mapping */ static int ice_create_pkt_fwd_rule(struct ice_hw *hw, struct ice_sw_recipe *recp_list, struct ice_fltr_list_entry *f_entry) { struct ice_fltr_mgmt_list_entry *fm_entry; struct ice_sw_rule_lkup_rx_tx *s_rule; int status; s_rule = (struct ice_sw_rule_lkup_rx_tx *) ice_malloc(hw, ice_struct_size(s_rule, hdr_data, DUMMY_ETH_HDR_LEN)); if (!s_rule) return ICE_ERR_NO_MEMORY; fm_entry = (struct ice_fltr_mgmt_list_entry *) ice_malloc(hw, sizeof(*fm_entry)); if (!fm_entry) { status = ICE_ERR_NO_MEMORY; goto ice_create_pkt_fwd_rule_exit; } fm_entry->fltr_info = f_entry->fltr_info; /* Initialize all the fields for the management entry */ fm_entry->vsi_count = 1; fm_entry->lg_act_idx = ICE_INVAL_LG_ACT_INDEX; fm_entry->sw_marker_id = ICE_INVAL_SW_MARKER_ID; fm_entry->counter_index = ICE_INVAL_COUNTER_ID; ice_fill_sw_rule(hw, &fm_entry->fltr_info, s_rule, ice_aqc_opc_add_sw_rules); status = ice_aq_sw_rules(hw, s_rule, ice_struct_size(s_rule, hdr_data, DUMMY_ETH_HDR_LEN), 1, ice_aqc_opc_add_sw_rules, NULL); if (status) { ice_free(hw, fm_entry); goto ice_create_pkt_fwd_rule_exit; } f_entry->fltr_info.fltr_rule_id = LE16_TO_CPU(s_rule->index); fm_entry->fltr_info.fltr_rule_id = LE16_TO_CPU(s_rule->index); /* The book keeping entries will get removed when base driver * calls remove filter AQ command */ LIST_ADD(&fm_entry->list_entry, &recp_list->filt_rules); ice_create_pkt_fwd_rule_exit: ice_free(hw, s_rule); return status; } /** * ice_update_pkt_fwd_rule * @hw: pointer to the hardware structure * @f_info: filter information for switch rule * * Call AQ command to update a previously created switch rule with a * VSI list ID */ static int ice_update_pkt_fwd_rule(struct ice_hw *hw, struct ice_fltr_info *f_info) { struct ice_sw_rule_lkup_rx_tx *s_rule; int status; s_rule = (struct ice_sw_rule_lkup_rx_tx *) ice_malloc(hw, ice_struct_size(s_rule, hdr_data, DUMMY_ETH_HDR_LEN)); if (!s_rule) return ICE_ERR_NO_MEMORY; ice_fill_sw_rule(hw, f_info, s_rule, ice_aqc_opc_update_sw_rules); s_rule->index = CPU_TO_LE16(f_info->fltr_rule_id); /* Update switch rule with new rule set to forward VSI list */ status = ice_aq_sw_rules(hw, s_rule, ice_struct_size(s_rule, hdr_data, DUMMY_ETH_HDR_LEN), 1, ice_aqc_opc_update_sw_rules, NULL); ice_free(hw, s_rule); return status; } /** * ice_update_sw_rule_bridge_mode * @hw: pointer to the HW struct * * Updates unicast switch filter rules based on VEB/VEPA mode */ int ice_update_sw_rule_bridge_mode(struct ice_hw *hw) { struct ice_fltr_mgmt_list_entry *fm_entry; struct LIST_HEAD_TYPE *rule_head; struct ice_lock *rule_lock; /* Lock to protect filter rule list */ struct ice_switch_info *sw; int status = 0; sw = hw->switch_info; rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock; rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules; ice_acquire_lock(rule_lock); LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) { struct ice_fltr_info *fi = &fm_entry->fltr_info; u8 *addr = fi->l_data.mac.mac_addr; /* Update unicast Tx rules to reflect the selected * VEB/VEPA mode */ if ((fi->flag & ICE_FLTR_TX) && IS_UNICAST_ETHER_ADDR(addr) && (fi->fltr_act == ICE_FWD_TO_VSI || fi->fltr_act == ICE_FWD_TO_VSI_LIST || fi->fltr_act == ICE_FWD_TO_Q || fi->fltr_act == ICE_FWD_TO_QGRP)) { status = ice_update_pkt_fwd_rule(hw, fi); if (status) break; } } ice_release_lock(rule_lock); return status; } /** * ice_add_update_vsi_list * @hw: pointer to the hardware structure * @m_entry: pointer to current filter management list entry * @cur_fltr: filter information from the book keeping entry * @new_fltr: filter information with the new VSI to be added * * Call AQ command to add or update previously created VSI list with new VSI. * * Helper function to do book keeping associated with adding filter information * The algorithm to do the book keeping is described below : * When a VSI needs to subscribe to a given filter (MAC/VLAN/Ethtype etc.) * if only one VSI has been added till now * Allocate a new VSI list and add two VSIs * to this list using switch rule command * Update the previously created switch rule with the * newly created VSI list ID * if a VSI list was previously created * Add the new VSI to the previously created VSI list set * using the update switch rule command */ static int ice_add_update_vsi_list(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_entry, struct ice_fltr_info *cur_fltr, struct ice_fltr_info *new_fltr) { u16 vsi_list_id = 0; int status = 0; if ((cur_fltr->fltr_act == ICE_FWD_TO_Q || cur_fltr->fltr_act == ICE_FWD_TO_QGRP)) return ICE_ERR_NOT_IMPL; if ((new_fltr->fltr_act == ICE_FWD_TO_Q || new_fltr->fltr_act == ICE_FWD_TO_QGRP) && (cur_fltr->fltr_act == ICE_FWD_TO_VSI || cur_fltr->fltr_act == ICE_FWD_TO_VSI_LIST)) return ICE_ERR_NOT_IMPL; if (m_entry->vsi_count < 2 && !m_entry->vsi_list_info) { /* Only one entry existed in the mapping and it was not already * a part of a VSI list. So, create a VSI list with the old and * new VSIs. */ struct ice_fltr_info tmp_fltr; u16 vsi_handle_arr[2]; /* A rule already exists with the new VSI being added */ if (cur_fltr->vsi_handle == new_fltr->vsi_handle) return ICE_ERR_ALREADY_EXISTS; vsi_handle_arr[0] = cur_fltr->vsi_handle; vsi_handle_arr[1] = new_fltr->vsi_handle; status = ice_create_vsi_list_rule(hw, &vsi_handle_arr[0], 2, &vsi_list_id, new_fltr->lkup_type); if (status) return status; tmp_fltr = *new_fltr; tmp_fltr.fltr_rule_id = cur_fltr->fltr_rule_id; tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST; tmp_fltr.fwd_id.vsi_list_id = vsi_list_id; /* Update the previous switch rule of "MAC forward to VSI" to * "MAC fwd to VSI list" */ status = ice_update_pkt_fwd_rule(hw, &tmp_fltr); if (status) return status; cur_fltr->fwd_id.vsi_list_id = vsi_list_id; cur_fltr->fltr_act = ICE_FWD_TO_VSI_LIST; m_entry->vsi_list_info = ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2, vsi_list_id); if (!m_entry->vsi_list_info) return ICE_ERR_NO_MEMORY; /* If this entry was large action then the large action needs * to be updated to point to FWD to VSI list */ if (m_entry->sw_marker_id != ICE_INVAL_SW_MARKER_ID) status = ice_add_marker_act(hw, m_entry, m_entry->sw_marker_id, m_entry->lg_act_idx); } else { u16 vsi_handle = new_fltr->vsi_handle; enum ice_adminq_opc opcode; if (!m_entry->vsi_list_info) return ICE_ERR_CFG; /* A rule already exists with the new VSI being added */ if (ice_is_bit_set(m_entry->vsi_list_info->vsi_map, vsi_handle)) return ICE_ERR_ALREADY_EXISTS; /* Update the previously created VSI list set with * the new VSI ID passed in */ vsi_list_id = cur_fltr->fwd_id.vsi_list_id; opcode = ice_aqc_opc_update_sw_rules; status = ice_update_vsi_list_rule(hw, &vsi_handle, 1, vsi_list_id, false, opcode, new_fltr->lkup_type); /* update VSI list mapping info with new VSI ID */ if (!status) ice_set_bit(vsi_handle, m_entry->vsi_list_info->vsi_map); } if (!status) m_entry->vsi_count++; return status; } /** * ice_find_rule_entry - Search a rule entry * @list_head: head of rule list * @f_info: rule information * * Helper function to search for a given rule entry * Returns pointer to entry storing the rule if found */ static struct ice_fltr_mgmt_list_entry * ice_find_rule_entry(struct LIST_HEAD_TYPE *list_head, struct ice_fltr_info *f_info) { struct ice_fltr_mgmt_list_entry *list_itr, *ret = NULL; LIST_FOR_EACH_ENTRY(list_itr, list_head, ice_fltr_mgmt_list_entry, list_entry) { if (!memcmp(&f_info->l_data, &list_itr->fltr_info.l_data, sizeof(f_info->l_data)) && f_info->flag == list_itr->fltr_info.flag) { ret = list_itr; break; } } return ret; } /** * ice_find_vsi_list_entry - Search VSI list map with VSI count 1 * @recp_list: VSI lists needs to be searched * @vsi_handle: VSI handle to be found in VSI list * @vsi_list_id: VSI list ID found containing vsi_handle * * Helper function to search a VSI list with single entry containing given VSI * handle element. This can be extended further to search VSI list with more * than 1 vsi_count. Returns pointer to VSI list entry if found. */ struct ice_vsi_list_map_info * ice_find_vsi_list_entry(struct ice_sw_recipe *recp_list, u16 vsi_handle, u16 *vsi_list_id) { struct ice_vsi_list_map_info *map_info = NULL; struct LIST_HEAD_TYPE *list_head; list_head = &recp_list->filt_rules; if (recp_list->adv_rule) { struct ice_adv_fltr_mgmt_list_entry *list_itr; LIST_FOR_EACH_ENTRY(list_itr, list_head, ice_adv_fltr_mgmt_list_entry, list_entry) { if (list_itr->vsi_list_info) { map_info = list_itr->vsi_list_info; if (ice_is_bit_set(map_info->vsi_map, vsi_handle)) { *vsi_list_id = map_info->vsi_list_id; return map_info; } } } } else { struct ice_fltr_mgmt_list_entry *list_itr; LIST_FOR_EACH_ENTRY(list_itr, list_head, ice_fltr_mgmt_list_entry, list_entry) { if (list_itr->vsi_count == 1 && list_itr->vsi_list_info) { map_info = list_itr->vsi_list_info; if (ice_is_bit_set(map_info->vsi_map, vsi_handle)) { *vsi_list_id = map_info->vsi_list_id; return map_info; } } } } return NULL; } /** * ice_add_rule_internal - add rule for a given lookup type * @hw: pointer to the hardware structure * @recp_list: recipe list for which rule has to be added * @lport: logic port number on which function add rule * @f_entry: structure containing MAC forwarding information * * Adds or updates the rule lists for a given recipe */ static int ice_add_rule_internal(struct ice_hw *hw, struct ice_sw_recipe *recp_list, u8 lport, struct ice_fltr_list_entry *f_entry) { struct ice_fltr_info *new_fltr, *cur_fltr; struct ice_fltr_mgmt_list_entry *m_entry; struct ice_lock *rule_lock; /* Lock to protect filter rule list */ int status = 0; if (!ice_is_vsi_valid(hw, f_entry->fltr_info.vsi_handle)) return ICE_ERR_PARAM; /* Load the hw_vsi_id only if the fwd action is fwd to VSI */ if (f_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI) f_entry->fltr_info.fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, f_entry->fltr_info.vsi_handle); rule_lock = &recp_list->filt_rule_lock; ice_acquire_lock(rule_lock); new_fltr = &f_entry->fltr_info; if (new_fltr->flag & ICE_FLTR_RX) new_fltr->src = lport; else if (new_fltr->flag & (ICE_FLTR_TX | ICE_FLTR_RX_LB)) new_fltr->src = ice_get_hw_vsi_num(hw, f_entry->fltr_info.vsi_handle); m_entry = ice_find_rule_entry(&recp_list->filt_rules, new_fltr); if (!m_entry) { status = ice_create_pkt_fwd_rule(hw, recp_list, f_entry); goto exit_add_rule_internal; } cur_fltr = &m_entry->fltr_info; status = ice_add_update_vsi_list(hw, m_entry, cur_fltr, new_fltr); exit_add_rule_internal: ice_release_lock(rule_lock); return status; } /** * ice_remove_vsi_list_rule * @hw: pointer to the hardware structure * @vsi_list_id: VSI list ID generated as part of allocate resource * @lkup_type: switch rule filter lookup type * * The VSI list should be emptied before this function is called to remove the * VSI list. */ static int ice_remove_vsi_list_rule(struct ice_hw *hw, u16 vsi_list_id, enum ice_sw_lkup_type lkup_type) { /* Free the vsi_list resource that we allocated. It is assumed that the * list is empty at this point. */ return ice_aq_alloc_free_vsi_list(hw, &vsi_list_id, lkup_type, ice_aqc_opc_free_res); } /** * ice_rem_update_vsi_list * @hw: pointer to the hardware structure * @vsi_handle: VSI handle of the VSI to remove * @fm_list: filter management entry for which the VSI list management needs to * be done */ static int ice_rem_update_vsi_list(struct ice_hw *hw, u16 vsi_handle, struct ice_fltr_mgmt_list_entry *fm_list) { enum ice_sw_lkup_type lkup_type; u16 vsi_list_id; int status = 0; if (fm_list->fltr_info.fltr_act != ICE_FWD_TO_VSI_LIST || fm_list->vsi_count == 0) return ICE_ERR_PARAM; /* A rule with the VSI being removed does not exist */ if (!ice_is_bit_set(fm_list->vsi_list_info->vsi_map, vsi_handle)) return ICE_ERR_DOES_NOT_EXIST; lkup_type = fm_list->fltr_info.lkup_type; vsi_list_id = fm_list->fltr_info.fwd_id.vsi_list_id; status = ice_update_vsi_list_rule(hw, &vsi_handle, 1, vsi_list_id, true, ice_aqc_opc_update_sw_rules, lkup_type); if (status) return status; fm_list->vsi_count--; ice_clear_bit(vsi_handle, fm_list->vsi_list_info->vsi_map); if (fm_list->vsi_count == 1 && lkup_type != ICE_SW_LKUP_VLAN) { struct ice_fltr_info tmp_fltr_info = fm_list->fltr_info; struct ice_vsi_list_map_info *vsi_list_info = fm_list->vsi_list_info; u16 rem_vsi_handle; rem_vsi_handle = ice_find_first_bit(vsi_list_info->vsi_map, ICE_MAX_VSI); if (!ice_is_vsi_valid(hw, rem_vsi_handle)) return ICE_ERR_OUT_OF_RANGE; /* Make sure VSI list is empty before removing it below */ status = ice_update_vsi_list_rule(hw, &rem_vsi_handle, 1, vsi_list_id, true, ice_aqc_opc_update_sw_rules, lkup_type); if (status) return status; tmp_fltr_info.fltr_act = ICE_FWD_TO_VSI; tmp_fltr_info.fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, rem_vsi_handle); tmp_fltr_info.vsi_handle = rem_vsi_handle; status = ice_update_pkt_fwd_rule(hw, &tmp_fltr_info); if (status) { ice_debug(hw, ICE_DBG_SW, "Failed to update pkt fwd rule to FWD_TO_VSI on HW VSI %d, error %d\n", tmp_fltr_info.fwd_id.hw_vsi_id, status); return status; } fm_list->fltr_info = tmp_fltr_info; } if ((fm_list->vsi_count == 1 && lkup_type != ICE_SW_LKUP_VLAN) || (fm_list->vsi_count == 0 && lkup_type == ICE_SW_LKUP_VLAN)) { struct ice_vsi_list_map_info *vsi_list_info = fm_list->vsi_list_info; /* Remove the VSI list since it is no longer used */ status = ice_remove_vsi_list_rule(hw, vsi_list_id, lkup_type); if (status) { ice_debug(hw, ICE_DBG_SW, "Failed to remove VSI list %d, error %d\n", vsi_list_id, status); return status; } LIST_DEL(&vsi_list_info->list_entry); ice_free(hw, vsi_list_info); fm_list->vsi_list_info = NULL; } return status; } /** * ice_remove_rule_internal - Remove a filter rule of a given type * @hw: pointer to the hardware structure * @recp_list: recipe list for which the rule needs to removed * @f_entry: rule entry containing filter information */ static int ice_remove_rule_internal(struct ice_hw *hw, struct ice_sw_recipe *recp_list, struct ice_fltr_list_entry *f_entry) { struct ice_fltr_mgmt_list_entry *list_elem; struct ice_lock *rule_lock; /* Lock to protect filter rule list */ bool remove_rule = false; int status = 0; u16 vsi_handle; if (!ice_is_vsi_valid(hw, f_entry->fltr_info.vsi_handle)) return ICE_ERR_PARAM; f_entry->fltr_info.fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, f_entry->fltr_info.vsi_handle); rule_lock = &recp_list->filt_rule_lock; ice_acquire_lock(rule_lock); list_elem = ice_find_rule_entry(&recp_list->filt_rules, &f_entry->fltr_info); if (!list_elem) { status = ICE_ERR_DOES_NOT_EXIST; goto exit; } if (list_elem->fltr_info.fltr_act != ICE_FWD_TO_VSI_LIST) { remove_rule = true; } else if (!list_elem->vsi_list_info) { status = ICE_ERR_DOES_NOT_EXIST; goto exit; } else if (list_elem->vsi_list_info->ref_cnt > 1) { /* a ref_cnt > 1 indicates that the vsi_list is being * shared by multiple rules. Decrement the ref_cnt and * remove this rule, but do not modify the list, as it * is in-use by other rules. */ list_elem->vsi_list_info->ref_cnt--; remove_rule = true; } else { /* a ref_cnt of 1 indicates the vsi_list is only used * by one rule. However, the original removal request is only * for a single VSI. Update the vsi_list first, and only * remove the rule if there are no further VSIs in this list. */ vsi_handle = f_entry->fltr_info.vsi_handle; status = ice_rem_update_vsi_list(hw, vsi_handle, list_elem); if (status) goto exit; /* if VSI count goes to zero after updating the VSI list */ if (list_elem->vsi_count == 0) remove_rule = true; } if (remove_rule) { /* Remove the lookup rule */ struct ice_sw_rule_lkup_rx_tx *s_rule; s_rule = (struct ice_sw_rule_lkup_rx_tx *) ice_malloc(hw, ice_struct_size(s_rule, hdr_data, 0)); if (!s_rule) { status = ICE_ERR_NO_MEMORY; goto exit; } ice_fill_sw_rule(hw, &list_elem->fltr_info, s_rule, ice_aqc_opc_remove_sw_rules); status = ice_aq_sw_rules(hw, s_rule, ice_struct_size(s_rule, hdr_data, 0), 1, ice_aqc_opc_remove_sw_rules, NULL); /* Remove a book keeping from the list */ ice_free(hw, s_rule); if (status) goto exit; LIST_DEL(&list_elem->list_entry); ice_free(hw, list_elem); } exit: ice_release_lock(rule_lock); return status; } /** * ice_aq_get_res_alloc - get allocated resources * @hw: pointer to the HW struct * @num_entries: pointer to u16 to store the number of resource entries returned * @buf: pointer to buffer * @buf_size: size of buf * @cd: pointer to command details structure or NULL * * The caller-supplied buffer must be large enough to store the resource * information for all resource types. Each resource type is an * ice_aqc_get_res_resp_elem structure. */ int ice_aq_get_res_alloc(struct ice_hw *hw, u16 *num_entries, struct ice_aqc_get_res_resp_elem *buf, u16 buf_size, struct ice_sq_cd *cd) { struct ice_aqc_get_res_alloc *resp; struct ice_aq_desc desc; int status; if (!buf) return ICE_ERR_BAD_PTR; if (buf_size < ICE_AQ_GET_RES_ALLOC_BUF_LEN) return ICE_ERR_INVAL_SIZE; resp = &desc.params.get_res; ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_res_alloc); status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); if (!status && num_entries) *num_entries = LE16_TO_CPU(resp->resp_elem_num); return status; } /** * ice_aq_get_res_descs - get allocated resource descriptors * @hw: pointer to the hardware structure * @num_entries: number of resource entries in buffer * @buf: structure to hold response data buffer * @buf_size: size of buffer * @res_type: resource type * @res_shared: is resource shared * @desc_id: input - first desc ID to start; output - next desc ID * @cd: pointer to command details structure or NULL */ int ice_aq_get_res_descs(struct ice_hw *hw, u16 num_entries, struct ice_aqc_res_elem *buf, u16 buf_size, u16 res_type, bool res_shared, u16 *desc_id, struct ice_sq_cd *cd) { struct ice_aqc_get_allocd_res_desc *cmd; struct ice_aq_desc desc; int status; ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__); cmd = &desc.params.get_res_desc; if (!buf) return ICE_ERR_PARAM; if (buf_size != (num_entries * sizeof(*buf))) return ICE_ERR_PARAM; ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_allocd_res_desc); cmd->ops.cmd.res = CPU_TO_LE16(((res_type << ICE_AQC_RES_TYPE_S) & ICE_AQC_RES_TYPE_M) | (res_shared ? ICE_AQC_RES_TYPE_FLAG_SHARED : 0)); cmd->ops.cmd.first_desc = CPU_TO_LE16(*desc_id); status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); if (!status) *desc_id = LE16_TO_CPU(cmd->ops.resp.next_desc); return status; } /** * ice_add_mac_rule - Add a MAC address based filter rule * @hw: pointer to the hardware structure * @m_list: list of MAC addresses and forwarding information * @sw: pointer to switch info struct for which function add rule * @lport: logic port number on which function add rule * * IMPORTANT: When the umac_shared flag is set to false and m_list has * multiple unicast addresses, the function assumes that all the * addresses are unique in a given add_mac call. It doesn't * check for duplicates in this case, removing duplicates from a given * list should be taken care of in the caller of this function. */ static int ice_add_mac_rule(struct ice_hw *hw, struct LIST_HEAD_TYPE *m_list, struct ice_switch_info *sw, u8 lport) { struct ice_sw_recipe *recp_list = &sw->recp_list[ICE_SW_LKUP_MAC]; struct ice_sw_rule_lkup_rx_tx *s_rule, *r_iter; struct ice_fltr_list_entry *m_list_itr; struct LIST_HEAD_TYPE *rule_head; u16 total_elem_left, s_rule_size; struct ice_lock *rule_lock; /* Lock to protect filter rule list */ u16 num_unicast = 0; int status = 0; u8 elem_sent; s_rule = NULL; rule_lock = &recp_list->filt_rule_lock; rule_head = &recp_list->filt_rules; LIST_FOR_EACH_ENTRY(m_list_itr, m_list, ice_fltr_list_entry, list_entry) { u8 *add = &m_list_itr->fltr_info.l_data.mac.mac_addr[0]; u16 vsi_handle; u16 hw_vsi_id; m_list_itr->fltr_info.flag = ICE_FLTR_TX; vsi_handle = m_list_itr->fltr_info.vsi_handle; if (!ice_is_vsi_valid(hw, vsi_handle)) return ICE_ERR_PARAM; hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); if (m_list_itr->fltr_info.fltr_act == ICE_FWD_TO_VSI) m_list_itr->fltr_info.fwd_id.hw_vsi_id = hw_vsi_id; /* update the src in case it is VSI num */ if (m_list_itr->fltr_info.src_id != ICE_SRC_ID_VSI) return ICE_ERR_PARAM; m_list_itr->fltr_info.src = hw_vsi_id; if (m_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_MAC || IS_ZERO_ETHER_ADDR(add)) return ICE_ERR_PARAM; if (IS_UNICAST_ETHER_ADDR(add) && !hw->umac_shared) { /* Don't overwrite the unicast address */ ice_acquire_lock(rule_lock); if (ice_find_rule_entry(rule_head, &m_list_itr->fltr_info)) { ice_release_lock(rule_lock); continue; } ice_release_lock(rule_lock); num_unicast++; } else if (IS_MULTICAST_ETHER_ADDR(add) || (IS_UNICAST_ETHER_ADDR(add) && hw->umac_shared)) { m_list_itr->status = ice_add_rule_internal(hw, recp_list, lport, m_list_itr); if (m_list_itr->status) return m_list_itr->status; } } ice_acquire_lock(rule_lock); /* Exit if no suitable entries were found for adding bulk switch rule */ if (!num_unicast) { status = 0; goto ice_add_mac_exit; } /* Allocate switch rule buffer for the bulk update for unicast */ s_rule_size = ice_struct_size(s_rule, hdr_data, DUMMY_ETH_HDR_LEN); s_rule = (struct ice_sw_rule_lkup_rx_tx *) ice_calloc(hw, num_unicast, s_rule_size); if (!s_rule) { status = ICE_ERR_NO_MEMORY; goto ice_add_mac_exit; } r_iter = s_rule; LIST_FOR_EACH_ENTRY(m_list_itr, m_list, ice_fltr_list_entry, list_entry) { struct ice_fltr_info *f_info = &m_list_itr->fltr_info; u8 *mac_addr = &f_info->l_data.mac.mac_addr[0]; if (IS_UNICAST_ETHER_ADDR(mac_addr)) { ice_fill_sw_rule(hw, &m_list_itr->fltr_info, r_iter, ice_aqc_opc_add_sw_rules); r_iter = (struct ice_sw_rule_lkup_rx_tx *) ((u8 *)r_iter + s_rule_size); } } /* Call AQ bulk switch rule update for all unicast addresses */ r_iter = s_rule; /* Call AQ switch rule in AQ_MAX chunk */ for (total_elem_left = num_unicast; total_elem_left > 0; total_elem_left -= elem_sent) { struct ice_sw_rule_lkup_rx_tx *entry = r_iter; elem_sent = MIN_T(u8, total_elem_left, (ICE_AQ_MAX_BUF_LEN / s_rule_size)); status = ice_aq_sw_rules(hw, entry, elem_sent * s_rule_size, elem_sent, ice_aqc_opc_add_sw_rules, NULL); if (status) goto ice_add_mac_exit; r_iter = (struct ice_sw_rule_lkup_rx_tx *) ((u8 *)r_iter + (elem_sent * s_rule_size)); } /* Fill up rule ID based on the value returned from FW */ r_iter = s_rule; LIST_FOR_EACH_ENTRY(m_list_itr, m_list, ice_fltr_list_entry, list_entry) { struct ice_fltr_info *f_info = &m_list_itr->fltr_info; u8 *mac_addr = &f_info->l_data.mac.mac_addr[0]; struct ice_fltr_mgmt_list_entry *fm_entry; if (IS_UNICAST_ETHER_ADDR(mac_addr)) { f_info->fltr_rule_id = LE16_TO_CPU(r_iter->index); f_info->fltr_act = ICE_FWD_TO_VSI; /* Create an entry to track this MAC address */ fm_entry = (struct ice_fltr_mgmt_list_entry *) ice_malloc(hw, sizeof(*fm_entry)); if (!fm_entry) { status = ICE_ERR_NO_MEMORY; goto ice_add_mac_exit; } fm_entry->fltr_info = *f_info; fm_entry->vsi_count = 1; /* The book keeping entries will get removed when * base driver calls remove filter AQ command */ LIST_ADD(&fm_entry->list_entry, rule_head); r_iter = (struct ice_sw_rule_lkup_rx_tx *) ((u8 *)r_iter + s_rule_size); } } ice_add_mac_exit: ice_release_lock(rule_lock); if (s_rule) ice_free(hw, s_rule); return status; } /** * ice_add_mac - Add a MAC address based filter rule * @hw: pointer to the hardware structure * @m_list: list of MAC addresses and forwarding information * * Function add MAC rule for logical port from HW struct */ int ice_add_mac(struct ice_hw *hw, struct LIST_HEAD_TYPE *m_list) { if (!m_list || !hw) return ICE_ERR_PARAM; return ice_add_mac_rule(hw, m_list, hw->switch_info, hw->port_info->lport); } /** * ice_add_vlan_internal - Add one VLAN based filter rule * @hw: pointer to the hardware structure * @recp_list: recipe list for which rule has to be added * @f_entry: filter entry containing one VLAN information */ static int ice_add_vlan_internal(struct ice_hw *hw, struct ice_sw_recipe *recp_list, struct ice_fltr_list_entry *f_entry) { struct ice_fltr_mgmt_list_entry *v_list_itr; struct ice_fltr_info *new_fltr, *cur_fltr; enum ice_sw_lkup_type lkup_type; u16 vsi_list_id = 0, vsi_handle; struct ice_lock *rule_lock; /* Lock to protect filter rule list */ int status = 0; if (!ice_is_vsi_valid(hw, f_entry->fltr_info.vsi_handle)) return ICE_ERR_PARAM; f_entry->fltr_info.fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, f_entry->fltr_info.vsi_handle); new_fltr = &f_entry->fltr_info; /* VLAN ID should only be 12 bits */ if (new_fltr->l_data.vlan.vlan_id > ICE_MAX_VLAN_ID) return ICE_ERR_PARAM; if (new_fltr->src_id != ICE_SRC_ID_VSI) return ICE_ERR_PARAM; new_fltr->src = new_fltr->fwd_id.hw_vsi_id; lkup_type = new_fltr->lkup_type; vsi_handle = new_fltr->vsi_handle; rule_lock = &recp_list->filt_rule_lock; ice_acquire_lock(rule_lock); v_list_itr = ice_find_rule_entry(&recp_list->filt_rules, new_fltr); if (!v_list_itr) { struct ice_vsi_list_map_info *map_info = NULL; if (new_fltr->fltr_act == ICE_FWD_TO_VSI) { /* All VLAN pruning rules use a VSI list. Check if * there is already a VSI list containing VSI that we * want to add. If found, use the same vsi_list_id for * this new VLAN rule or else create a new list. */ map_info = ice_find_vsi_list_entry(recp_list, vsi_handle, &vsi_list_id); if (!map_info) { status = ice_create_vsi_list_rule(hw, &vsi_handle, 1, &vsi_list_id, lkup_type); if (status) goto exit; } /* Convert the action to forwarding to a VSI list. */ new_fltr->fltr_act = ICE_FWD_TO_VSI_LIST; new_fltr->fwd_id.vsi_list_id = vsi_list_id; } status = ice_create_pkt_fwd_rule(hw, recp_list, f_entry); if (!status) { v_list_itr = ice_find_rule_entry(&recp_list->filt_rules, new_fltr); if (!v_list_itr) { status = ICE_ERR_DOES_NOT_EXIST; goto exit; } /* reuse VSI list for new rule and increment ref_cnt */ if (map_info) { v_list_itr->vsi_list_info = map_info; map_info->ref_cnt++; } else { v_list_itr->vsi_list_info = ice_create_vsi_list_map(hw, &vsi_handle, 1, vsi_list_id); } } } else if (v_list_itr->vsi_list_info->ref_cnt == 1) { /* Update existing VSI list to add new VSI ID only if it used * by one VLAN rule. */ cur_fltr = &v_list_itr->fltr_info; status = ice_add_update_vsi_list(hw, v_list_itr, cur_fltr, new_fltr); } else { /* If VLAN rule exists and VSI list being used by this rule is * referenced by more than 1 VLAN rule. Then create a new VSI * list appending previous VSI with new VSI and update existing * VLAN rule to point to new VSI list ID */ struct ice_fltr_info tmp_fltr; u16 vsi_handle_arr[2]; u16 cur_handle; /* Current implementation only supports reusing VSI list with * one VSI count. We should never hit below condition */ if (v_list_itr->vsi_count > 1 && v_list_itr->vsi_list_info->ref_cnt > 1) { ice_debug(hw, ICE_DBG_SW, "Invalid configuration: Optimization to reuse VSI list with more than one VSI is not being done yet\n"); status = ICE_ERR_CFG; goto exit; } cur_handle = ice_find_first_bit(v_list_itr->vsi_list_info->vsi_map, ICE_MAX_VSI); /* A rule already exists with the new VSI being added */ if (cur_handle == vsi_handle) { status = ICE_ERR_ALREADY_EXISTS; goto exit; } vsi_handle_arr[0] = cur_handle; vsi_handle_arr[1] = vsi_handle; status = ice_create_vsi_list_rule(hw, &vsi_handle_arr[0], 2, &vsi_list_id, lkup_type); if (status) goto exit; tmp_fltr = v_list_itr->fltr_info; tmp_fltr.fltr_rule_id = v_list_itr->fltr_info.fltr_rule_id; tmp_fltr.fwd_id.vsi_list_id = vsi_list_id; tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST; /* Update the previous switch rule to a new VSI list which * includes current VSI that is requested */ status = ice_update_pkt_fwd_rule(hw, &tmp_fltr); if (status) goto exit; /* before overriding VSI list map info. decrement ref_cnt of * previous VSI list */ v_list_itr->vsi_list_info->ref_cnt--; /* now update to newly created list */ v_list_itr->fltr_info.fwd_id.vsi_list_id = vsi_list_id; v_list_itr->vsi_list_info = ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2, vsi_list_id); v_list_itr->vsi_count++; } exit: ice_release_lock(rule_lock); return status; } /** * ice_add_vlan_rule - Add VLAN based filter rule * @hw: pointer to the hardware structure * @v_list: list of VLAN entries and forwarding information * @sw: pointer to switch info struct for which function add rule */ static int ice_add_vlan_rule(struct ice_hw *hw, struct LIST_HEAD_TYPE *v_list, struct ice_switch_info *sw) { struct ice_fltr_list_entry *v_list_itr; struct ice_sw_recipe *recp_list; recp_list = &sw->recp_list[ICE_SW_LKUP_VLAN]; LIST_FOR_EACH_ENTRY(v_list_itr, v_list, ice_fltr_list_entry, list_entry) { if (v_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_VLAN) return ICE_ERR_PARAM; v_list_itr->fltr_info.flag = ICE_FLTR_TX; v_list_itr->status = ice_add_vlan_internal(hw, recp_list, v_list_itr); if (v_list_itr->status) return v_list_itr->status; } return 0; } /** * ice_add_vlan - Add a VLAN based filter rule * @hw: pointer to the hardware structure * @v_list: list of VLAN and forwarding information * * Function add VLAN rule for logical port from HW struct */ int ice_add_vlan(struct ice_hw *hw, struct LIST_HEAD_TYPE *v_list) { if (!v_list || !hw) return ICE_ERR_PARAM; return ice_add_vlan_rule(hw, v_list, hw->switch_info); } /** * ice_add_eth_mac_rule - Add ethertype and MAC based filter rule * @hw: pointer to the hardware structure * @em_list: list of ether type MAC filter, MAC is optional * @sw: pointer to switch info struct for which function add rule * @lport: logic port number on which function add rule * * This function requires the caller to populate the entries in * the filter list with the necessary fields (including flags to * indicate Tx or Rx rules). */ static int ice_add_eth_mac_rule(struct ice_hw *hw, struct LIST_HEAD_TYPE *em_list, struct ice_switch_info *sw, u8 lport) { struct ice_fltr_list_entry *em_list_itr; LIST_FOR_EACH_ENTRY(em_list_itr, em_list, ice_fltr_list_entry, list_entry) { struct ice_sw_recipe *recp_list; enum ice_sw_lkup_type l_type; l_type = em_list_itr->fltr_info.lkup_type; recp_list = &sw->recp_list[l_type]; if (l_type != ICE_SW_LKUP_ETHERTYPE_MAC && l_type != ICE_SW_LKUP_ETHERTYPE) return ICE_ERR_PARAM; em_list_itr->status = ice_add_rule_internal(hw, recp_list, lport, em_list_itr); if (em_list_itr->status) return em_list_itr->status; } return 0; } /** * ice_add_eth_mac - Add a ethertype based filter rule * @hw: pointer to the hardware structure * @em_list: list of ethertype and forwarding information * * Function add ethertype rule for logical port from HW struct */ int ice_add_eth_mac(struct ice_hw *hw, struct LIST_HEAD_TYPE *em_list) { if (!em_list || !hw) return ICE_ERR_PARAM; return ice_add_eth_mac_rule(hw, em_list, hw->switch_info, hw->port_info->lport); } /** * ice_remove_eth_mac_rule - Remove an ethertype (or MAC) based filter rule * @hw: pointer to the hardware structure * @em_list: list of ethertype or ethertype MAC entries * @sw: pointer to switch info struct for which function add rule */ static int ice_remove_eth_mac_rule(struct ice_hw *hw, struct LIST_HEAD_TYPE *em_list, struct ice_switch_info *sw) { struct ice_fltr_list_entry *em_list_itr, *tmp; LIST_FOR_EACH_ENTRY_SAFE(em_list_itr, tmp, em_list, ice_fltr_list_entry, list_entry) { struct ice_sw_recipe *recp_list; enum ice_sw_lkup_type l_type; l_type = em_list_itr->fltr_info.lkup_type; if (l_type != ICE_SW_LKUP_ETHERTYPE_MAC && l_type != ICE_SW_LKUP_ETHERTYPE) return ICE_ERR_PARAM; recp_list = &sw->recp_list[l_type]; em_list_itr->status = ice_remove_rule_internal(hw, recp_list, em_list_itr); if (em_list_itr->status) return em_list_itr->status; } return 0; } /** * ice_remove_eth_mac - remove a ethertype based filter rule * @hw: pointer to the hardware structure * @em_list: list of ethertype and forwarding information * */ int ice_remove_eth_mac(struct ice_hw *hw, struct LIST_HEAD_TYPE *em_list) { if (!em_list || !hw) return ICE_ERR_PARAM; return ice_remove_eth_mac_rule(hw, em_list, hw->switch_info); } /** * ice_get_lg_act_aqc_res_type - get resource type for a large action * @res_type: resource type to be filled in case of function success * @num_acts: number of actions to hold with a large action entry * * Get resource type for a large action depending on the number * of single actions that it contains. */ static int ice_get_lg_act_aqc_res_type(u16 *res_type, int num_acts) { if (!res_type) return ICE_ERR_BAD_PTR; /* If num_acts is 1, use ICE_AQC_RES_TYPE_WIDE_TABLE_1. * If num_acts is 2, use ICE_AQC_RES_TYPE_WIDE_TABLE_3. * If num_acts is greater than 2, then use * ICE_AQC_RES_TYPE_WIDE_TABLE_4. * The num_acts cannot be equal to 0 or greater than 4. */ switch (num_acts) { case 1: *res_type = ICE_AQC_RES_TYPE_WIDE_TABLE_1; break; case 2: *res_type = ICE_AQC_RES_TYPE_WIDE_TABLE_2; break; case 3: case 4: *res_type = ICE_AQC_RES_TYPE_WIDE_TABLE_4; break; default: return ICE_ERR_PARAM; } return 0; } /** * ice_alloc_res_lg_act - add large action resource * @hw: pointer to the hardware structure * @l_id: large action ID to fill it in * @num_acts: number of actions to hold with a large action entry */ static int ice_alloc_res_lg_act(struct ice_hw *hw, u16 *l_id, u16 num_acts) { struct ice_aqc_alloc_free_res_elem *sw_buf; u16 buf_len, res_type; int status; if (!l_id) return ICE_ERR_BAD_PTR; status = ice_get_lg_act_aqc_res_type(&res_type, num_acts); if (status) return status; /* Allocate resource for large action */ buf_len = ice_struct_size(sw_buf, elem, 1); sw_buf = (struct ice_aqc_alloc_free_res_elem *)ice_malloc(hw, buf_len); if (!sw_buf) return ICE_ERR_NO_MEMORY; sw_buf->res_type = CPU_TO_LE16(res_type); sw_buf->num_elems = CPU_TO_LE16(1); status = ice_aq_alloc_free_res(hw, 1, sw_buf, buf_len, ice_aqc_opc_alloc_res, NULL); if (!status) *l_id = LE16_TO_CPU(sw_buf->elem[0].e.sw_resp); ice_free(hw, sw_buf); return status; } /** * ice_rem_sw_rule_info * @hw: pointer to the hardware structure * @rule_head: pointer to the switch list structure that we want to delete */ static void ice_rem_sw_rule_info(struct ice_hw *hw, struct LIST_HEAD_TYPE *rule_head) { if (!LIST_EMPTY(rule_head)) { struct ice_fltr_mgmt_list_entry *entry; struct ice_fltr_mgmt_list_entry *tmp; LIST_FOR_EACH_ENTRY_SAFE(entry, tmp, rule_head, ice_fltr_mgmt_list_entry, list_entry) { LIST_DEL(&entry->list_entry); ice_free(hw, entry); } } } /** * ice_rem_all_sw_rules_info * @hw: pointer to the hardware structure */ void ice_rem_all_sw_rules_info(struct ice_hw *hw) { struct ice_switch_info *sw = hw->switch_info; u8 i; for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) { struct LIST_HEAD_TYPE *rule_head; rule_head = &sw->recp_list[i].filt_rules; if (!sw->recp_list[i].adv_rule) ice_rem_sw_rule_info(hw, rule_head); } } /** * ice_cfg_dflt_vsi - change state of VSI to set/clear default * @pi: pointer to the port_info structure * @vsi_handle: VSI handle to set as default * @set: true to add the above mentioned switch rule, false to remove it * @direction: ICE_FLTR_RX or ICE_FLTR_TX * * add filter rule to set/unset given VSI as default VSI for the switch * (represented by swid) */ int ice_cfg_dflt_vsi(struct ice_port_info *pi, u16 vsi_handle, bool set, u8 direction) { struct ice_fltr_list_entry f_list_entry; struct ice_sw_recipe *recp_list = NULL; struct ice_fltr_info f_info; struct ice_hw *hw = pi->hw; u8 lport = pi->lport; u16 hw_vsi_id; int status; recp_list = &pi->hw->switch_info->recp_list[ICE_SW_LKUP_DFLT]; if (!ice_is_vsi_valid(hw, vsi_handle)) return ICE_ERR_PARAM; hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); ice_memset(&f_info, 0, sizeof(f_info), ICE_NONDMA_MEM); f_info.lkup_type = ICE_SW_LKUP_DFLT; f_info.flag = direction; f_info.fltr_act = ICE_FWD_TO_VSI; f_info.fwd_id.hw_vsi_id = hw_vsi_id; f_info.vsi_handle = vsi_handle; if (f_info.flag & ICE_FLTR_RX) { f_info.src = pi->lport; f_info.src_id = ICE_SRC_ID_LPORT; } else if (f_info.flag & ICE_FLTR_TX) { f_info.src_id = ICE_SRC_ID_VSI; f_info.src = hw_vsi_id; } f_list_entry.fltr_info = f_info; if (set) status = ice_add_rule_internal(hw, recp_list, lport, &f_list_entry); else status = ice_remove_rule_internal(hw, recp_list, &f_list_entry); return status; } /** * ice_check_if_dflt_vsi - check if VSI is default VSI * @pi: pointer to the port_info structure * @vsi_handle: vsi handle to check for in filter list * @rule_exists: indicates if there are any VSI's in the rule list * * checks if the VSI is in a default VSI list, and also indicates * if the default VSI list is empty */ bool ice_check_if_dflt_vsi(struct ice_port_info *pi, u16 vsi_handle, bool *rule_exists) { struct ice_fltr_mgmt_list_entry *fm_entry; struct LIST_HEAD_TYPE *rule_head; struct ice_sw_recipe *recp_list; struct ice_lock *rule_lock; bool ret = false; recp_list = &pi->hw->switch_info->recp_list[ICE_SW_LKUP_DFLT]; rule_lock = &recp_list->filt_rule_lock; rule_head = &recp_list->filt_rules; ice_acquire_lock(rule_lock); if (rule_exists && !LIST_EMPTY(rule_head)) *rule_exists = true; LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) { if (ice_vsi_uses_fltr(fm_entry, vsi_handle)) { ret = true; break; } } ice_release_lock(rule_lock); return ret; } /** * ice_find_ucast_rule_entry - Search for a unicast MAC filter rule entry * @list_head: head of rule list * @f_info: rule information * * Helper function to search for a unicast rule entry - this is to be used * to remove unicast MAC filter that is not shared with other VSIs on the * PF switch. * * Returns pointer to entry storing the rule if found */ static struct ice_fltr_mgmt_list_entry * ice_find_ucast_rule_entry(struct LIST_HEAD_TYPE *list_head, struct ice_fltr_info *f_info) { struct ice_fltr_mgmt_list_entry *list_itr; LIST_FOR_EACH_ENTRY(list_itr, list_head, ice_fltr_mgmt_list_entry, list_entry) { if (!memcmp(&f_info->l_data, &list_itr->fltr_info.l_data, sizeof(f_info->l_data)) && f_info->fwd_id.hw_vsi_id == list_itr->fltr_info.fwd_id.hw_vsi_id && f_info->flag == list_itr->fltr_info.flag) return list_itr; } return NULL; } /** * ice_remove_mac_rule - remove a MAC based filter rule * @hw: pointer to the hardware structure * @m_list: list of MAC addresses and forwarding information * @recp_list: list from which function remove MAC address * * This function removes either a MAC filter rule or a specific VSI from a * VSI list for a multicast MAC address. * * Returns ICE_ERR_DOES_NOT_EXIST if a given entry was not added by * ice_add_mac. Caller should be aware that this call will only work if all * the entries passed into m_list were added previously. It will not attempt to * do a partial remove of entries that were found. */ static int ice_remove_mac_rule(struct ice_hw *hw, struct LIST_HEAD_TYPE *m_list, struct ice_sw_recipe *recp_list) { struct ice_fltr_list_entry *list_itr, *tmp; struct ice_lock *rule_lock; /* Lock to protect filter rule list */ if (!m_list) return ICE_ERR_PARAM; rule_lock = &recp_list->filt_rule_lock; LIST_FOR_EACH_ENTRY_SAFE(list_itr, tmp, m_list, ice_fltr_list_entry, list_entry) { enum ice_sw_lkup_type l_type = list_itr->fltr_info.lkup_type; u8 *add = &list_itr->fltr_info.l_data.mac.mac_addr[0]; u16 vsi_handle; if (l_type != ICE_SW_LKUP_MAC) return ICE_ERR_PARAM; vsi_handle = list_itr->fltr_info.vsi_handle; if (!ice_is_vsi_valid(hw, vsi_handle)) return ICE_ERR_PARAM; list_itr->fltr_info.fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); if (IS_UNICAST_ETHER_ADDR(add) && !hw->umac_shared) { /* Don't remove the unicast address that belongs to * another VSI on the switch, since it is not being * shared... */ ice_acquire_lock(rule_lock); if (!ice_find_ucast_rule_entry(&recp_list->filt_rules, &list_itr->fltr_info)) { ice_release_lock(rule_lock); return ICE_ERR_DOES_NOT_EXIST; } ice_release_lock(rule_lock); } list_itr->status = ice_remove_rule_internal(hw, recp_list, list_itr); if (list_itr->status) return list_itr->status; } return 0; } /** * ice_remove_mac - remove a MAC address based filter rule * @hw: pointer to the hardware structure * @m_list: list of MAC addresses and forwarding information * */ int ice_remove_mac(struct ice_hw *hw, struct LIST_HEAD_TYPE *m_list) { struct ice_sw_recipe *recp_list; recp_list = &hw->switch_info->recp_list[ICE_SW_LKUP_MAC]; return ice_remove_mac_rule(hw, m_list, recp_list); } /** * ice_remove_vlan_rule - Remove VLAN based filter rule * @hw: pointer to the hardware structure * @v_list: list of VLAN entries and forwarding information * @recp_list: list from which function remove VLAN */ static int ice_remove_vlan_rule(struct ice_hw *hw, struct LIST_HEAD_TYPE *v_list, struct ice_sw_recipe *recp_list) { struct ice_fltr_list_entry *v_list_itr, *tmp; LIST_FOR_EACH_ENTRY_SAFE(v_list_itr, tmp, v_list, ice_fltr_list_entry, list_entry) { enum ice_sw_lkup_type l_type = v_list_itr->fltr_info.lkup_type; if (l_type != ICE_SW_LKUP_VLAN) return ICE_ERR_PARAM; v_list_itr->status = ice_remove_rule_internal(hw, recp_list, v_list_itr); if (v_list_itr->status) return v_list_itr->status; } return 0; } /** * ice_remove_vlan - remove a VLAN address based filter rule * @hw: pointer to the hardware structure * @v_list: list of VLAN and forwarding information * */ int ice_remove_vlan(struct ice_hw *hw, struct LIST_HEAD_TYPE *v_list) { struct ice_sw_recipe *recp_list; if (!v_list || !hw) return ICE_ERR_PARAM; recp_list = &hw->switch_info->recp_list[ICE_SW_LKUP_VLAN]; return ice_remove_vlan_rule(hw, v_list, recp_list); } /** * ice_vsi_uses_fltr - Determine if given VSI uses specified filter * @fm_entry: filter entry to inspect * @vsi_handle: VSI handle to compare with filter info */ static bool ice_vsi_uses_fltr(struct ice_fltr_mgmt_list_entry *fm_entry, u16 vsi_handle) { return ((fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI && fm_entry->fltr_info.vsi_handle == vsi_handle) || (fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI_LIST && fm_entry->vsi_list_info && (ice_is_bit_set(fm_entry->vsi_list_info->vsi_map, vsi_handle)))); } /** * ice_add_entry_to_vsi_fltr_list - Add copy of fltr_list_entry to remove list * @hw: pointer to the hardware structure * @vsi_handle: VSI handle to remove filters from * @vsi_list_head: pointer to the list to add entry to * @fi: pointer to fltr_info of filter entry to copy & add * * Helper function, used when creating a list of filters to remove from * a specific VSI. The entry added to vsi_list_head is a COPY of the * original filter entry, with the exception of fltr_info.fltr_act and * fltr_info.fwd_id fields. These are set such that later logic can * extract which VSI to remove the fltr from, and pass on that information. */ static int ice_add_entry_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_handle, struct LIST_HEAD_TYPE *vsi_list_head, struct ice_fltr_info *fi) { struct ice_fltr_list_entry *tmp; /* this memory is freed up in the caller function * once filters for this VSI are removed */ tmp = (struct ice_fltr_list_entry *)ice_malloc(hw, sizeof(*tmp)); if (!tmp) return ICE_ERR_NO_MEMORY; tmp->fltr_info = *fi; /* Overwrite these fields to indicate which VSI to remove filter from, * so find and remove logic can extract the information from the * list entries. Note that original entries will still have proper * values. */ tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI; tmp->fltr_info.vsi_handle = vsi_handle; tmp->fltr_info.fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); LIST_ADD(&tmp->list_entry, vsi_list_head); return 0; } /** * ice_add_to_vsi_fltr_list - Add VSI filters to the list * @hw: pointer to the hardware structure * @vsi_handle: VSI handle to remove filters from * @lkup_list_head: pointer to the list that has certain lookup type filters * @vsi_list_head: pointer to the list pertaining to VSI with vsi_handle * * Locates all filters in lkup_list_head that are used by the given VSI, * and adds COPIES of those entries to vsi_list_head (intended to be used * to remove the listed filters). * Note that this means all entries in vsi_list_head must be explicitly * deallocated by the caller when done with list. */ static int ice_add_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_handle, struct LIST_HEAD_TYPE *lkup_list_head, struct LIST_HEAD_TYPE *vsi_list_head) { struct ice_fltr_mgmt_list_entry *fm_entry; int status = 0; /* check to make sure VSI ID is valid and within boundary */ if (!ice_is_vsi_valid(hw, vsi_handle)) return ICE_ERR_PARAM; LIST_FOR_EACH_ENTRY(fm_entry, lkup_list_head, ice_fltr_mgmt_list_entry, list_entry) { if (!ice_vsi_uses_fltr(fm_entry, vsi_handle)) continue; status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle, vsi_list_head, &fm_entry->fltr_info); if (status) return status; } return status; } /** * ice_determine_promisc_mask * @fi: filter info to parse * @promisc_mask: pointer to mask to be filled in * * Helper function to determine which ICE_PROMISC_ mask corresponds * to given filter into. */ static void ice_determine_promisc_mask(struct ice_fltr_info *fi, ice_bitmap_t *promisc_mask) { u16 vid = fi->l_data.mac_vlan.vlan_id; u8 *macaddr = fi->l_data.mac.mac_addr; bool is_rx_lb_fltr = false; bool is_tx_fltr = false; ice_zero_bitmap(promisc_mask, ICE_PROMISC_MAX); if (fi->flag == ICE_FLTR_TX) is_tx_fltr = true; if (fi->flag == ICE_FLTR_RX_LB) is_rx_lb_fltr = true; if (IS_BROADCAST_ETHER_ADDR(macaddr)) { ice_set_bit(is_tx_fltr ? ICE_PROMISC_BCAST_TX : ICE_PROMISC_BCAST_RX, promisc_mask); } else if (IS_MULTICAST_ETHER_ADDR(macaddr)) { ice_set_bit(is_tx_fltr ? ICE_PROMISC_MCAST_TX : ICE_PROMISC_MCAST_RX, promisc_mask); } else if (IS_UNICAST_ETHER_ADDR(macaddr)) { if (is_tx_fltr) ice_set_bit(ICE_PROMISC_UCAST_TX, promisc_mask); else if (is_rx_lb_fltr) ice_set_bit(ICE_PROMISC_UCAST_RX_LB, promisc_mask); else ice_set_bit(ICE_PROMISC_UCAST_RX, promisc_mask); } if (vid) { ice_set_bit(is_tx_fltr ? ICE_PROMISC_VLAN_TX : ICE_PROMISC_VLAN_RX, promisc_mask); } } /** * _ice_get_vsi_promisc - get promiscuous mode of given VSI * @hw: pointer to the hardware structure * @vsi_handle: VSI handle to retrieve info from * @promisc_mask: pointer to mask to be filled in * @vid: VLAN ID of promisc VLAN VSI * @sw: pointer to switch info struct for which function add rule * @lkup: switch rule filter lookup type */ static int _ice_get_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, ice_bitmap_t *promisc_mask, u16 *vid, struct ice_switch_info *sw, enum ice_sw_lkup_type lkup) { ice_declare_bitmap(fltr_promisc_mask, ICE_PROMISC_MAX); struct ice_fltr_mgmt_list_entry *itr; struct LIST_HEAD_TYPE *rule_head; struct ice_lock *rule_lock; /* Lock to protect filter rule list */ if (!ice_is_vsi_valid(hw, vsi_handle) || (lkup != ICE_SW_LKUP_PROMISC && lkup != ICE_SW_LKUP_PROMISC_VLAN)) return ICE_ERR_PARAM; *vid = 0; rule_head = &sw->recp_list[lkup].filt_rules; rule_lock = &sw->recp_list[lkup].filt_rule_lock; ice_zero_bitmap(promisc_mask, ICE_PROMISC_MAX); ice_acquire_lock(rule_lock); LIST_FOR_EACH_ENTRY(itr, rule_head, ice_fltr_mgmt_list_entry, list_entry) { /* Continue if this filter doesn't apply to this VSI or the * VSI ID is not in the VSI map for this filter */ if (!ice_vsi_uses_fltr(itr, vsi_handle)) continue; ice_determine_promisc_mask(&itr->fltr_info, fltr_promisc_mask); ice_or_bitmap(promisc_mask, promisc_mask, fltr_promisc_mask, ICE_PROMISC_MAX); } ice_release_lock(rule_lock); return 0; } /** * ice_get_vsi_promisc - get promiscuous mode of given VSI * @hw: pointer to the hardware structure * @vsi_handle: VSI handle to retrieve info from * @promisc_mask: pointer to mask to be filled in * @vid: VLAN ID of promisc VLAN VSI */ int ice_get_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, ice_bitmap_t *promisc_mask, u16 *vid) { if (!vid || !promisc_mask || !hw) return ICE_ERR_PARAM; return _ice_get_vsi_promisc(hw, vsi_handle, promisc_mask, vid, hw->switch_info, ICE_SW_LKUP_PROMISC); } /** * ice_get_vsi_vlan_promisc - get VLAN promiscuous mode of given VSI * @hw: pointer to the hardware structure * @vsi_handle: VSI handle to retrieve info from * @promisc_mask: pointer to mask to be filled in * @vid: VLAN ID of promisc VLAN VSI */ int ice_get_vsi_vlan_promisc(struct ice_hw *hw, u16 vsi_handle, ice_bitmap_t *promisc_mask, u16 *vid) { if (!hw || !promisc_mask || !vid) return ICE_ERR_PARAM; return _ice_get_vsi_promisc(hw, vsi_handle, promisc_mask, vid, hw->switch_info, ICE_SW_LKUP_PROMISC_VLAN); } /** * ice_remove_promisc - Remove promisc based filter rules * @hw: pointer to the hardware structure * @recp_id: recipe ID for which the rule needs to removed * @v_list: list of promisc entries */ static int ice_remove_promisc(struct ice_hw *hw, u8 recp_id, struct LIST_HEAD_TYPE *v_list) { struct ice_fltr_list_entry *v_list_itr, *tmp; struct ice_sw_recipe *recp_list; recp_list = &hw->switch_info->recp_list[recp_id]; LIST_FOR_EACH_ENTRY_SAFE(v_list_itr, tmp, v_list, ice_fltr_list_entry, list_entry) { v_list_itr->status = ice_remove_rule_internal(hw, recp_list, v_list_itr); if (v_list_itr->status) return v_list_itr->status; } return 0; } /** * _ice_clear_vsi_promisc - clear specified promiscuous mode(s) * @hw: pointer to the hardware structure * @vsi_handle: VSI handle to clear mode * @promisc_mask: pointer to mask of promiscuous config bits to clear * @vid: VLAN ID to clear VLAN promiscuous * @sw: pointer to switch info struct for which function add rule */ static int _ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, ice_bitmap_t *promisc_mask, u16 vid, struct ice_switch_info *sw) { ice_declare_bitmap(compl_promisc_mask, ICE_PROMISC_MAX); ice_declare_bitmap(fltr_promisc_mask, ICE_PROMISC_MAX); struct ice_fltr_list_entry *fm_entry, *tmp; struct LIST_HEAD_TYPE remove_list_head; struct ice_fltr_mgmt_list_entry *itr; struct LIST_HEAD_TYPE *rule_head; struct ice_lock *rule_lock; /* Lock to protect filter rule list */ int status = 0; u8 recipe_id; if (!ice_is_vsi_valid(hw, vsi_handle)) return ICE_ERR_PARAM; if (ice_is_bit_set(promisc_mask, ICE_PROMISC_VLAN_RX) && ice_is_bit_set(promisc_mask, ICE_PROMISC_VLAN_TX)) recipe_id = ICE_SW_LKUP_PROMISC_VLAN; else recipe_id = ICE_SW_LKUP_PROMISC; rule_head = &sw->recp_list[recipe_id].filt_rules; rule_lock = &sw->recp_list[recipe_id].filt_rule_lock; INIT_LIST_HEAD(&remove_list_head); ice_acquire_lock(rule_lock); LIST_FOR_EACH_ENTRY(itr, rule_head, ice_fltr_mgmt_list_entry, list_entry) { struct ice_fltr_info *fltr_info; ice_zero_bitmap(compl_promisc_mask, ICE_PROMISC_MAX); if (!ice_vsi_uses_fltr(itr, vsi_handle)) continue; fltr_info = &itr->fltr_info; if (recipe_id == ICE_SW_LKUP_PROMISC_VLAN && vid != fltr_info->l_data.mac_vlan.vlan_id) continue; ice_determine_promisc_mask(fltr_info, fltr_promisc_mask); ice_andnot_bitmap(compl_promisc_mask, fltr_promisc_mask, promisc_mask, ICE_PROMISC_MAX); /* Skip if filter is not completely specified by given mask */ if (ice_is_any_bit_set(compl_promisc_mask, ICE_PROMISC_MAX)) continue; status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle, &remove_list_head, fltr_info); if (status) { ice_release_lock(rule_lock); goto free_fltr_list; } } ice_release_lock(rule_lock); status = ice_remove_promisc(hw, recipe_id, &remove_list_head); free_fltr_list: LIST_FOR_EACH_ENTRY_SAFE(fm_entry, tmp, &remove_list_head, ice_fltr_list_entry, list_entry) { LIST_DEL(&fm_entry->list_entry); ice_free(hw, fm_entry); } return status; } /** * ice_clear_vsi_promisc - clear specified promiscuous mode(s) for given VSI * @hw: pointer to the hardware structure * @vsi_handle: VSI handle to clear mode * @promisc_mask: pointer to mask of promiscuous config bits to clear * @vid: VLAN ID to clear VLAN promiscuous */ int ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, ice_bitmap_t *promisc_mask, u16 vid) { if (!hw || !promisc_mask) return ICE_ERR_PARAM; return _ice_clear_vsi_promisc(hw, vsi_handle, promisc_mask, vid, hw->switch_info); } /** * _ice_set_vsi_promisc - set given VSI to given promiscuous mode(s) * @hw: pointer to the hardware structure * @vsi_handle: VSI handle to configure * @promisc_mask: pointer to mask of promiscuous config bits * @vid: VLAN ID to set VLAN promiscuous * @lport: logical port number to configure promisc mode * @sw: pointer to switch info struct for which function add rule */ static int _ice_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, ice_bitmap_t *promisc_mask, u16 vid, u8 lport, struct ice_switch_info *sw) { enum { UCAST_FLTR = 1, MCAST_FLTR, BCAST_FLTR }; ice_declare_bitmap(p_mask, ICE_PROMISC_MAX); struct ice_fltr_list_entry f_list_entry; bool is_tx_fltr, is_rx_lb_fltr; struct ice_fltr_info new_fltr; int status = 0; u16 hw_vsi_id; int pkt_type; u8 recipe_id; ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__); if (!ice_is_vsi_valid(hw, vsi_handle)) return ICE_ERR_PARAM; hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); ice_memset(&new_fltr, 0, sizeof(new_fltr), ICE_NONDMA_MEM); /* Do not modify original bitmap */ ice_cp_bitmap(p_mask, promisc_mask, ICE_PROMISC_MAX); if (ice_is_bit_set(p_mask, ICE_PROMISC_VLAN_RX) && ice_is_bit_set(p_mask, ICE_PROMISC_VLAN_TX)) { new_fltr.lkup_type = ICE_SW_LKUP_PROMISC_VLAN; new_fltr.l_data.mac_vlan.vlan_id = vid; recipe_id = ICE_SW_LKUP_PROMISC_VLAN; } else { new_fltr.lkup_type = ICE_SW_LKUP_PROMISC; recipe_id = ICE_SW_LKUP_PROMISC; } /* Separate filters must be set for each direction/packet type * combination, so we will loop over the mask value, store the * individual type, and clear it out in the input mask as it * is found. */ while (ice_is_any_bit_set(p_mask, ICE_PROMISC_MAX)) { struct ice_sw_recipe *recp_list; u8 *mac_addr; pkt_type = 0; is_tx_fltr = false; is_rx_lb_fltr = false; if (ice_test_and_clear_bit(ICE_PROMISC_UCAST_RX, p_mask)) { pkt_type = UCAST_FLTR; } else if (ice_test_and_clear_bit(ICE_PROMISC_UCAST_TX, p_mask)) { pkt_type = UCAST_FLTR; is_tx_fltr = true; } else if (ice_test_and_clear_bit(ICE_PROMISC_MCAST_RX, p_mask)) { pkt_type = MCAST_FLTR; } else if (ice_test_and_clear_bit(ICE_PROMISC_MCAST_TX, p_mask)) { pkt_type = MCAST_FLTR; is_tx_fltr = true; } else if (ice_test_and_clear_bit(ICE_PROMISC_BCAST_RX, p_mask)) { pkt_type = BCAST_FLTR; } else if (ice_test_and_clear_bit(ICE_PROMISC_BCAST_TX, p_mask)) { pkt_type = BCAST_FLTR; is_tx_fltr = true; } else if (ice_test_and_clear_bit(ICE_PROMISC_UCAST_RX_LB, p_mask)) { pkt_type = UCAST_FLTR; is_rx_lb_fltr = true; } /* Check for VLAN promiscuous flag */ if (ice_is_bit_set(p_mask, ICE_PROMISC_VLAN_RX)) { ice_clear_bit(ICE_PROMISC_VLAN_RX, p_mask); } else if (ice_test_and_clear_bit(ICE_PROMISC_VLAN_TX, p_mask)) { is_tx_fltr = true; } /* Set filter DA based on packet type */ mac_addr = new_fltr.l_data.mac.mac_addr; if (pkt_type == BCAST_FLTR) { ice_memset(mac_addr, 0xff, ETH_ALEN, ICE_NONDMA_MEM); } else if (pkt_type == MCAST_FLTR || pkt_type == UCAST_FLTR) { /* Use the dummy ether header DA */ ice_memcpy(mac_addr, dummy_eth_header, ETH_ALEN, ICE_NONDMA_TO_NONDMA); if (pkt_type == MCAST_FLTR) mac_addr[0] |= 0x1; /* Set multicast bit */ } /* Need to reset this to zero for all iterations */ new_fltr.flag = 0; if (is_tx_fltr) { new_fltr.flag |= ICE_FLTR_TX; new_fltr.src = hw_vsi_id; } else if (is_rx_lb_fltr) { new_fltr.flag |= ICE_FLTR_RX_LB; new_fltr.src = hw_vsi_id; } else { new_fltr.flag |= ICE_FLTR_RX; new_fltr.src = lport; } new_fltr.fltr_act = ICE_FWD_TO_VSI; new_fltr.vsi_handle = vsi_handle; new_fltr.fwd_id.hw_vsi_id = hw_vsi_id; f_list_entry.fltr_info = new_fltr; recp_list = &sw->recp_list[recipe_id]; status = ice_add_rule_internal(hw, recp_list, lport, &f_list_entry); if (status) goto set_promisc_exit; } set_promisc_exit: return status; } /** * ice_set_vsi_promisc - set given VSI to given promiscuous mode(s) * @hw: pointer to the hardware structure * @vsi_handle: VSI handle to configure * @promisc_mask: pointer to mask of promiscuous config bits * @vid: VLAN ID to set VLAN promiscuous */ int ice_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, ice_bitmap_t *promisc_mask, u16 vid) { if (!hw || !promisc_mask) return ICE_ERR_PARAM; return _ice_set_vsi_promisc(hw, vsi_handle, promisc_mask, vid, hw->port_info->lport, hw->switch_info); } /** * _ice_set_vlan_vsi_promisc * @hw: pointer to the hardware structure * @vsi_handle: VSI handle to configure * @promisc_mask: pointer to mask of promiscuous config bits * @rm_vlan_promisc: Clear VLANs VSI promisc mode * @lport: logical port number to configure promisc mode * @sw: pointer to switch info struct for which function add rule * * Configure VSI with all associated VLANs to given promiscuous mode(s) */ static int _ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, ice_bitmap_t *promisc_mask, bool rm_vlan_promisc, u8 lport, struct ice_switch_info *sw) { struct ice_fltr_list_entry *list_itr, *tmp; struct LIST_HEAD_TYPE vsi_list_head; struct LIST_HEAD_TYPE *vlan_head; struct ice_lock *vlan_lock; /* Lock to protect filter rule list */ int status; u16 vlan_id; INIT_LIST_HEAD(&vsi_list_head); vlan_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock; vlan_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules; ice_acquire_lock(vlan_lock); status = ice_add_to_vsi_fltr_list(hw, vsi_handle, vlan_head, &vsi_list_head); ice_release_lock(vlan_lock); if (status) goto free_fltr_list; LIST_FOR_EACH_ENTRY(list_itr, &vsi_list_head, ice_fltr_list_entry, list_entry) { /* Avoid enabling or disabling vlan zero twice when in double * vlan mode */ if (ice_is_dvm_ena(hw) && list_itr->fltr_info.l_data.vlan.tpid == 0) continue; vlan_id = list_itr->fltr_info.l_data.vlan.vlan_id; if (rm_vlan_promisc) status = _ice_clear_vsi_promisc(hw, vsi_handle, promisc_mask, vlan_id, sw); else status = _ice_set_vsi_promisc(hw, vsi_handle, promisc_mask, vlan_id, lport, sw); if (status && status != ICE_ERR_ALREADY_EXISTS) break; } free_fltr_list: LIST_FOR_EACH_ENTRY_SAFE(list_itr, tmp, &vsi_list_head, ice_fltr_list_entry, list_entry) { LIST_DEL(&list_itr->list_entry); ice_free(hw, list_itr); } return status; } /** * ice_set_vlan_vsi_promisc * @hw: pointer to the hardware structure * @vsi_handle: VSI handle to configure * @promisc_mask: mask of promiscuous config bits * @rm_vlan_promisc: Clear VLANs VSI promisc mode * * Configure VSI with all associated VLANs to given promiscuous mode(s) */ int ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, ice_bitmap_t *promisc_mask, bool rm_vlan_promisc) { if (!hw || !promisc_mask) return ICE_ERR_PARAM; return _ice_set_vlan_vsi_promisc(hw, vsi_handle, promisc_mask, rm_vlan_promisc, hw->port_info->lport, hw->switch_info); } /** * ice_remove_vsi_lkup_fltr - Remove lookup type filters for a VSI * @hw: pointer to the hardware structure * @vsi_handle: VSI handle to remove filters from * @recp_list: recipe list from which function remove fltr * @lkup: switch rule filter lookup type */ static void ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_handle, struct ice_sw_recipe *recp_list, enum ice_sw_lkup_type lkup) { struct ice_fltr_list_entry *fm_entry; struct LIST_HEAD_TYPE remove_list_head; struct LIST_HEAD_TYPE *rule_head; struct ice_fltr_list_entry *tmp; struct ice_lock *rule_lock; /* Lock to protect filter rule list */ int status; INIT_LIST_HEAD(&remove_list_head); rule_lock = &recp_list[lkup].filt_rule_lock; rule_head = &recp_list[lkup].filt_rules; ice_acquire_lock(rule_lock); status = ice_add_to_vsi_fltr_list(hw, vsi_handle, rule_head, &remove_list_head); ice_release_lock(rule_lock); if (status) goto free_fltr_list; switch (lkup) { case ICE_SW_LKUP_MAC: ice_remove_mac_rule(hw, &remove_list_head, &recp_list[lkup]); break; case ICE_SW_LKUP_VLAN: ice_remove_vlan_rule(hw, &remove_list_head, &recp_list[lkup]); break; case ICE_SW_LKUP_PROMISC: case ICE_SW_LKUP_PROMISC_VLAN: ice_remove_promisc(hw, (u8)lkup, &remove_list_head); break; case ICE_SW_LKUP_MAC_VLAN: ice_debug(hw, ICE_DBG_SW, "MAC VLAN look up is not supported yet\n"); break; case ICE_SW_LKUP_ETHERTYPE: case ICE_SW_LKUP_ETHERTYPE_MAC: ice_remove_eth_mac(hw, &remove_list_head); break; case ICE_SW_LKUP_DFLT: ice_debug(hw, ICE_DBG_SW, "Remove filters for this lookup type hasn't been implemented yet\n"); break; case ICE_SW_LKUP_LAST: ice_debug(hw, ICE_DBG_SW, "Unsupported lookup type\n"); break; } free_fltr_list: LIST_FOR_EACH_ENTRY_SAFE(fm_entry, tmp, &remove_list_head, ice_fltr_list_entry, list_entry) { LIST_DEL(&fm_entry->list_entry); ice_free(hw, fm_entry); } } /** * ice_remove_vsi_fltr_rule - Remove all filters for a VSI * @hw: pointer to the hardware structure * @vsi_handle: VSI handle to remove filters from * @sw: pointer to switch info struct */ static void ice_remove_vsi_fltr_rule(struct ice_hw *hw, u16 vsi_handle, struct ice_switch_info *sw) { ice_debug(hw, ICE_DBG_TRACE, "%s\n", __func__); ice_remove_vsi_lkup_fltr(hw, vsi_handle, sw->recp_list, ICE_SW_LKUP_MAC); ice_remove_vsi_lkup_fltr(hw, vsi_handle, sw->recp_list, ICE_SW_LKUP_MAC_VLAN); ice_remove_vsi_lkup_fltr(hw, vsi_handle, sw->recp_list, ICE_SW_LKUP_PROMISC); ice_remove_vsi_lkup_fltr(hw, vsi_handle, sw->recp_list, ICE_SW_LKUP_VLAN); ice_remove_vsi_lkup_fltr(hw, vsi_handle, sw->recp_list, ICE_SW_LKUP_DFLT); ice_remove_vsi_lkup_fltr(hw, vsi_handle, sw->recp_list, ICE_SW_LKUP_ETHERTYPE); ice_remove_vsi_lkup_fltr(hw, vsi_handle, sw->recp_list, ICE_SW_LKUP_ETHERTYPE_MAC); ice_remove_vsi_lkup_fltr(hw, vsi_handle, sw->recp_list, ICE_SW_LKUP_PROMISC_VLAN); } /** * ice_remove_vsi_fltr - Remove all filters for a VSI * @hw: pointer to the hardware structure * @vsi_handle: VSI handle to remove filters from */ void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_handle) { ice_remove_vsi_fltr_rule(hw, vsi_handle, hw->switch_info); } /** * ice_alloc_res_cntr - allocating resource counter * @hw: pointer to the hardware structure * @type: type of resource * @alloc_shared: if set it is shared else dedicated * @num_items: number of entries requested for FD resource type * @counter_id: counter index returned by AQ call */ static int ice_alloc_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, u16 *counter_id) { struct ice_aqc_alloc_free_res_elem *buf; u16 buf_len; int status; /* Allocate resource */ buf_len = ice_struct_size(buf, elem, 1); buf = (struct ice_aqc_alloc_free_res_elem *)ice_malloc(hw, buf_len); if (!buf) return ICE_ERR_NO_MEMORY; buf->num_elems = CPU_TO_LE16(num_items); buf->res_type = CPU_TO_LE16(((type << ICE_AQC_RES_TYPE_S) & ICE_AQC_RES_TYPE_M) | alloc_shared); status = ice_aq_alloc_free_res(hw, 1, buf, buf_len, ice_aqc_opc_alloc_res, NULL); if (status) goto exit; *counter_id = LE16_TO_CPU(buf->elem[0].e.sw_resp); exit: ice_free(hw, buf); return status; } /** * ice_free_res_cntr - free resource counter * @hw: pointer to the hardware structure * @type: type of resource * @alloc_shared: if set it is shared else dedicated * @num_items: number of entries to be freed for FD resource type * @counter_id: counter ID resource which needs to be freed */ static int ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, u16 counter_id) { struct ice_aqc_alloc_free_res_elem *buf; u16 buf_len; int status; /* Free resource */ buf_len = ice_struct_size(buf, elem, 1); buf = (struct ice_aqc_alloc_free_res_elem *)ice_malloc(hw, buf_len); if (!buf) return ICE_ERR_NO_MEMORY; buf->num_elems = CPU_TO_LE16(num_items); buf->res_type = CPU_TO_LE16(((type << ICE_AQC_RES_TYPE_S) & ICE_AQC_RES_TYPE_M) | alloc_shared); buf->elem[0].e.sw_resp = CPU_TO_LE16(counter_id); status = ice_aq_alloc_free_res(hw, 1, buf, buf_len, ice_aqc_opc_free_res, NULL); if (status) ice_debug(hw, ICE_DBG_SW, "counter resource could not be freed\n"); ice_free(hw, buf); return status; } /** * ice_alloc_vlan_res_counter - obtain counter resource for VLAN type * @hw: pointer to the hardware structure * @counter_id: returns counter index */ int ice_alloc_vlan_res_counter(struct ice_hw *hw, u16 *counter_id) { return ice_alloc_res_cntr(hw, ICE_AQC_RES_TYPE_VLAN_COUNTER, ICE_AQC_RES_TYPE_FLAG_DEDICATED, 1, counter_id); } /** * ice_free_vlan_res_counter - Free counter resource for VLAN type * @hw: pointer to the hardware structure * @counter_id: counter index to be freed */ int ice_free_vlan_res_counter(struct ice_hw *hw, u16 counter_id) { return ice_free_res_cntr(hw, ICE_AQC_RES_TYPE_VLAN_COUNTER, ICE_AQC_RES_TYPE_FLAG_DEDICATED, 1, counter_id); } /** * ice_add_mac_with_sw_marker - add filter with sw marker * @hw: pointer to the hardware structure * @f_info: filter info structure containing the MAC filter information * @sw_marker: sw marker to tag the Rx descriptor with */ int ice_add_mac_with_sw_marker(struct ice_hw *hw, struct ice_fltr_info *f_info, u16 sw_marker) { struct ice_fltr_mgmt_list_entry *m_entry; struct ice_fltr_list_entry fl_info; struct ice_sw_recipe *recp_list; struct LIST_HEAD_TYPE l_head; struct ice_lock *rule_lock; /* Lock to protect filter rule list */ bool entry_exists; u16 lg_act_id; int ret; if (f_info->fltr_act != ICE_FWD_TO_VSI) return ICE_ERR_PARAM; if (f_info->lkup_type != ICE_SW_LKUP_MAC) return ICE_ERR_PARAM; if (sw_marker == ICE_INVAL_SW_MARKER_ID) return ICE_ERR_PARAM; if (!ice_is_vsi_valid(hw, f_info->vsi_handle)) return ICE_ERR_PARAM; f_info->fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, f_info->vsi_handle); /* Add filter if it doesn't exist so then the adding of large * action always results in update */ INIT_LIST_HEAD(&l_head); fl_info.fltr_info = *f_info; LIST_ADD(&fl_info.list_entry, &l_head); entry_exists = false; ret = ice_add_mac_rule(hw, &l_head, hw->switch_info, hw->port_info->lport); if (ret == ICE_ERR_ALREADY_EXISTS) entry_exists = true; else if (ret) return ret; recp_list = &hw->switch_info->recp_list[ICE_SW_LKUP_MAC]; rule_lock = &recp_list->filt_rule_lock; ice_acquire_lock(rule_lock); /* Get the book keeping entry for the filter */ m_entry = ice_find_rule_entry(&recp_list->filt_rules, f_info); if (!m_entry) goto exit_error; /* If counter action was enabled for this rule then don't enable * sw marker large action */ if (m_entry->counter_index != ICE_INVAL_COUNTER_ID) { ret = ICE_ERR_PARAM; goto exit_error; } /* if same marker was added before */ if (m_entry->sw_marker_id == sw_marker) { ret = ICE_ERR_ALREADY_EXISTS; goto exit_error; } /* Allocate a hardware table entry to hold large act. Three actions * for marker based large action */ ret = ice_alloc_res_lg_act(hw, &lg_act_id, 3); if (ret) goto exit_error; if (lg_act_id == ICE_INVAL_LG_ACT_INDEX) goto exit_error; /* Update the switch rule to add the marker action */ ret = ice_add_marker_act(hw, m_entry, sw_marker, lg_act_id); if (!ret) { ice_release_lock(rule_lock); return ret; } exit_error: ice_release_lock(rule_lock); /* only remove entry if it did not exist previously */ if (!entry_exists) ret = ice_remove_mac(hw, &l_head); return ret; } /** * ice_add_mac_with_counter - add filter with counter enabled * @hw: pointer to the hardware structure * @f_info: pointer to filter info structure containing the MAC filter * information */ int ice_add_mac_with_counter(struct ice_hw *hw, struct ice_fltr_info *f_info) { struct ice_fltr_mgmt_list_entry *m_entry; struct ice_fltr_list_entry fl_info; struct ice_sw_recipe *recp_list; struct LIST_HEAD_TYPE l_head; struct ice_lock *rule_lock; /* Lock to protect filter rule list */ bool entry_exist; u16 counter_id; u16 lg_act_id; int ret; if (f_info->fltr_act != ICE_FWD_TO_VSI) return ICE_ERR_PARAM; if (f_info->lkup_type != ICE_SW_LKUP_MAC) return ICE_ERR_PARAM; if (!ice_is_vsi_valid(hw, f_info->vsi_handle)) return ICE_ERR_PARAM; f_info->fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, f_info->vsi_handle); recp_list = &hw->switch_info->recp_list[ICE_SW_LKUP_MAC]; entry_exist = false; rule_lock = &recp_list->filt_rule_lock; /* Add filter if it doesn't exist so then the adding of large * action always results in update */ INIT_LIST_HEAD(&l_head); fl_info.fltr_info = *f_info; LIST_ADD(&fl_info.list_entry, &l_head); ret = ice_add_mac_rule(hw, &l_head, hw->switch_info, hw->port_info->lport); if (ret == ICE_ERR_ALREADY_EXISTS) entry_exist = true; else if (ret) return ret; ice_acquire_lock(rule_lock); m_entry = ice_find_rule_entry(&recp_list->filt_rules, f_info); if (!m_entry) { ret = ICE_ERR_BAD_PTR; goto exit_error; } /* Don't enable counter for a filter for which sw marker was enabled */ if (m_entry->sw_marker_id != ICE_INVAL_SW_MARKER_ID) { ret = ICE_ERR_PARAM; goto exit_error; } /* If a counter was already enabled then don't need to add again */ if (m_entry->counter_index != ICE_INVAL_COUNTER_ID) { ret = ICE_ERR_ALREADY_EXISTS; goto exit_error; } /* Allocate a hardware table entry to VLAN counter */ ret = ice_alloc_vlan_res_counter(hw, &counter_id); if (ret) goto exit_error; /* Allocate a hardware table entry to hold large act. Two actions for * counter based large action */ ret = ice_alloc_res_lg_act(hw, &lg_act_id, 2); if (ret) goto exit_error; if (lg_act_id == ICE_INVAL_LG_ACT_INDEX) goto exit_error; /* Update the switch rule to add the counter action */ ret = ice_add_counter_act(hw, m_entry, counter_id, lg_act_id); if (!ret) { ice_release_lock(rule_lock); return ret; } exit_error: ice_release_lock(rule_lock); /* only remove entry if it did not exist previously */ if (!entry_exist) ret = ice_remove_mac(hw, &l_head); return ret; } /** * ice_replay_fltr - Replay all the filters stored by a specific list head * @hw: pointer to the hardware structure * @list_head: list for which filters needs to be replayed * @recp_id: Recipe ID for which rules need to be replayed */ static int ice_replay_fltr(struct ice_hw *hw, u8 recp_id, struct LIST_HEAD_TYPE *list_head) { struct ice_fltr_mgmt_list_entry *itr; struct ice_sw_recipe *recp_list; u8 lport = hw->port_info->lport; struct LIST_HEAD_TYPE l_head; int status = 0; if (LIST_EMPTY(list_head)) return status; recp_list = &hw->switch_info->recp_list[recp_id]; /* Move entries from the given list_head to a temporary l_head so that * they can be replayed. Otherwise when trying to re-add the same * filter, the function will return already exists */ LIST_REPLACE_INIT(list_head, &l_head); /* Mark the given list_head empty by reinitializing it so filters * could be added again by *handler */ LIST_FOR_EACH_ENTRY(itr, &l_head, ice_fltr_mgmt_list_entry, list_entry) { struct ice_fltr_list_entry f_entry; u16 vsi_handle; f_entry.fltr_info = itr->fltr_info; if (itr->vsi_count < 2 && recp_id != ICE_SW_LKUP_VLAN) { status = ice_add_rule_internal(hw, recp_list, lport, &f_entry); if (status) goto end; continue; } /* Add a filter per VSI separately */ ice_for_each_set_bit(vsi_handle, itr->vsi_list_info->vsi_map, ICE_MAX_VSI) { if (!ice_is_vsi_valid(hw, vsi_handle)) break; ice_clear_bit(vsi_handle, itr->vsi_list_info->vsi_map); f_entry.fltr_info.vsi_handle = vsi_handle; f_entry.fltr_info.fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); f_entry.fltr_info.fltr_act = ICE_FWD_TO_VSI; if (recp_id == ICE_SW_LKUP_VLAN) status = ice_add_vlan_internal(hw, recp_list, &f_entry); else status = ice_add_rule_internal(hw, recp_list, lport, &f_entry); if (status) goto end; } } end: /* Clear the filter management list */ ice_rem_sw_rule_info(hw, &l_head); return status; } /** * ice_replay_all_fltr - replay all filters stored in bookkeeping lists * @hw: pointer to the hardware structure * * NOTE: This function does not clean up partially added filters on error. * It is up to caller of the function to issue a reset or fail early. */ int ice_replay_all_fltr(struct ice_hw *hw) { struct ice_switch_info *sw = hw->switch_info; int status = ICE_SUCCESS; u8 i; for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) { struct LIST_HEAD_TYPE *head = &sw->recp_list[i].filt_rules; status = ice_replay_fltr(hw, i, head); if (status != ICE_SUCCESS) return status; } return status; } /** * ice_replay_vsi_fltr - Replay filters for requested VSI * @hw: pointer to the hardware structure * @pi: pointer to port information structure * @sw: pointer to switch info struct for which function replays filters * @vsi_handle: driver VSI handle * @recp_id: Recipe ID for which rules need to be replayed * @list_head: list for which filters need to be replayed * * Replays the filter of recipe recp_id for a VSI represented via vsi_handle. * It is required to pass valid VSI handle. */ static int ice_replay_vsi_fltr(struct ice_hw *hw, struct ice_port_info *pi, struct ice_switch_info *sw, u16 vsi_handle, u8 recp_id, struct LIST_HEAD_TYPE *list_head) { struct ice_fltr_mgmt_list_entry *itr; struct ice_sw_recipe *recp_list; int status = 0; u16 hw_vsi_id; if (LIST_EMPTY(list_head)) return status; recp_list = &sw->recp_list[recp_id]; hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); LIST_FOR_EACH_ENTRY(itr, list_head, ice_fltr_mgmt_list_entry, list_entry) { struct ice_fltr_list_entry f_entry; f_entry.fltr_info = itr->fltr_info; if (itr->vsi_count < 2 && recp_id != ICE_SW_LKUP_VLAN && itr->fltr_info.vsi_handle == vsi_handle) { /* update the src in case it is VSI num */ if (f_entry.fltr_info.src_id == ICE_SRC_ID_VSI) f_entry.fltr_info.src = hw_vsi_id; status = ice_add_rule_internal(hw, recp_list, pi->lport, &f_entry); if (status) goto end; continue; } if (!itr->vsi_list_info || !ice_is_bit_set(itr->vsi_list_info->vsi_map, vsi_handle)) continue; - /* Clearing it so that the logic can add it back */ - ice_clear_bit(vsi_handle, itr->vsi_list_info->vsi_map); f_entry.fltr_info.vsi_handle = vsi_handle; f_entry.fltr_info.fltr_act = ICE_FWD_TO_VSI; /* update the src in case it is VSI num */ if (f_entry.fltr_info.src_id == ICE_SRC_ID_VSI) f_entry.fltr_info.src = hw_vsi_id; if (recp_id == ICE_SW_LKUP_VLAN) status = ice_add_vlan_internal(hw, recp_list, &f_entry); else status = ice_add_rule_internal(hw, recp_list, pi->lport, &f_entry); if (status) goto end; } end: return status; } /** * ice_replay_vsi_all_fltr - replay all filters stored in bookkeeping lists * @hw: pointer to the hardware structure * @pi: pointer to port information structure * @vsi_handle: driver VSI handle * * Replays filters for requested VSI via vsi_handle. */ int ice_replay_vsi_all_fltr(struct ice_hw *hw, struct ice_port_info *pi, u16 vsi_handle) { struct ice_switch_info *sw = NULL; int status = 0; u8 i; sw = hw->switch_info; /* Update the recipes that were created */ for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) { struct LIST_HEAD_TYPE *head; head = &sw->recp_list[i].filt_replay_rules; if (!sw->recp_list[i].adv_rule) status = ice_replay_vsi_fltr(hw, pi, sw, vsi_handle, i, head); if (status) return status; } return 0; } /** * ice_rm_sw_replay_rule_info - helper function to delete filter replay rules * @hw: pointer to the HW struct * @sw: pointer to switch info struct for which function removes filters * * Deletes the filter replay rules for given switch */ void ice_rm_sw_replay_rule_info(struct ice_hw *hw, struct ice_switch_info *sw) { u8 i; if (!sw) return; for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) { if (!LIST_EMPTY(&sw->recp_list[i].filt_replay_rules)) { struct LIST_HEAD_TYPE *l_head; l_head = &sw->recp_list[i].filt_replay_rules; if (!sw->recp_list[i].adv_rule) ice_rem_sw_rule_info(hw, l_head); } } } /** * ice_rm_all_sw_replay_rule_info - deletes filter replay rules * @hw: pointer to the HW struct * * Deletes the filter replay rules. */ void ice_rm_all_sw_replay_rule_info(struct ice_hw *hw) { ice_rm_sw_replay_rule_info(hw, hw->switch_info); } diff --git a/sys/dev/ice/if_ice_iflib.c b/sys/dev/ice/if_ice_iflib.c index 0fb7faecb2d7..9b3f38f885b9 100644 --- a/sys/dev/ice/if_ice_iflib.c +++ b/sys/dev/ice/if_ice_iflib.c @@ -1,4433 +1,4480 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2024, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** * @file if_ice_iflib.c * @brief iflib driver implementation * * Contains the main entry point for the iflib driver implementation. It * implements the various ifdi driver methods, and sets up the module and * driver values to load an iflib driver. */ #include "ice_iflib.h" #include "ice_drv_info.h" #include "ice_switch.h" #include "ice_sched.h" #include #include #include #include #include /* * Device method prototypes */ static void *ice_register(device_t); static int ice_if_attach_pre(if_ctx_t); static int ice_attach_pre_recovery_mode(struct ice_softc *sc); static int ice_if_attach_post(if_ctx_t); static void ice_attach_post_recovery_mode(struct ice_softc *sc); static int ice_if_detach(if_ctx_t); static int ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix); static void ice_if_queues_free(if_ctx_t ctx); static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void ice_if_intr_enable(if_ctx_t ctx); static void ice_if_intr_disable(if_ctx_t ctx); static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static int ice_if_promisc_set(if_ctx_t ctx, int flags); static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr); static int ice_if_media_change(if_ctx_t ctx); static void ice_if_init(if_ctx_t ctx); static void ice_if_timer(if_ctx_t ctx, uint16_t qid); static void ice_if_update_admin_status(if_ctx_t ctx); static void ice_if_multi_set(if_ctx_t ctx); static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag); static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag); static void ice_if_stop(if_ctx_t ctx); static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter); static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data); static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req); static int ice_if_suspend(if_ctx_t ctx); static int ice_if_resume(if_ctx_t ctx); static bool ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event); +static void ice_init_link(struct ice_softc *sc); static int ice_setup_mirror_vsi(struct ice_mirr_if *mif); static int ice_wire_mirror_intrs(struct ice_mirr_if *mif); static void ice_free_irqvs_subif(struct ice_mirr_if *mif); static void *ice_subif_register(device_t); static void ice_subif_setup_scctx(struct ice_mirr_if *mif); static int ice_subif_rebuild(struct ice_softc *sc); static int ice_subif_rebuild_vsi_qmap(struct ice_softc *sc); /* Iflib API */ static int ice_subif_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int ice_subif_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets); static int ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static void ice_subif_if_intr_enable(if_ctx_t ctx); static int ice_subif_if_msix_intr_assign(if_ctx_t ctx, int msix); static void ice_subif_if_init(if_ctx_t ctx); static void ice_subif_if_stop(if_ctx_t ctx); static void ice_subif_if_queues_free(if_ctx_t ctx); static int ice_subif_if_attach_pre(if_ctx_t); static int ice_subif_if_attach_post(if_ctx_t); static void ice_subif_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr); static int ice_subif_if_promisc_set(if_ctx_t ctx, int flags); static int ice_msix_que(void *arg); static int ice_msix_admin(void *arg); /* * Helper function prototypes */ static int ice_pci_mapping(struct ice_softc *sc); static void ice_free_pci_mapping(struct ice_softc *sc); static void ice_update_link_status(struct ice_softc *sc, bool update_media); static void ice_init_device_features(struct ice_softc *sc); static void ice_init_tx_tracking(struct ice_vsi *vsi); static void ice_handle_reset_event(struct ice_softc *sc); static void ice_handle_pf_reset_request(struct ice_softc *sc); static void ice_prepare_for_reset(struct ice_softc *sc); static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc); static void ice_rebuild(struct ice_softc *sc); static void ice_rebuild_recovery_mode(struct ice_softc *sc); static void ice_free_irqvs(struct ice_softc *sc); static void ice_update_rx_mbuf_sz(struct ice_softc *sc); static void ice_poll_for_media_avail(struct ice_softc *sc); static void ice_setup_scctx(struct ice_softc *sc); static int ice_allocate_msix(struct ice_softc *sc); static void ice_admin_timer(void *arg); static void ice_transition_recovery_mode(struct ice_softc *sc); static void ice_transition_safe_mode(struct ice_softc *sc); static void ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask); /* * Device Interface Declaration */ /** * @var ice_methods * @brief ice driver method entry points * * List of device methods implementing the generic device interface used by * the device stack to interact with the ice driver. Since this is an iflib * driver, most of the methods point to the generic iflib implementation. */ static device_method_t ice_methods[] = { /* Device interface */ DEVMETHOD(device_register, ice_register), DEVMETHOD(device_probe, iflib_device_probe_vendor), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; /** * @var ice_iflib_methods * @brief iflib method entry points * * List of device methods used by the iflib stack to interact with this * driver. These are the real main entry points used to interact with this * driver. */ static device_method_t ice_iflib_methods[] = { DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre), DEVMETHOD(ifdi_attach_post, ice_if_attach_post), DEVMETHOD(ifdi_detach, ice_if_detach), DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc), DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign), DEVMETHOD(ifdi_queues_free, ice_if_queues_free), DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set), DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable), DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable), DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable), DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set), DEVMETHOD(ifdi_media_status, ice_if_media_status), DEVMETHOD(ifdi_media_change, ice_if_media_change), DEVMETHOD(ifdi_init, ice_if_init), DEVMETHOD(ifdi_stop, ice_if_stop), DEVMETHOD(ifdi_timer, ice_if_timer), DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status), DEVMETHOD(ifdi_multi_set, ice_if_multi_set), DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, ice_if_get_counter), DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl), DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req), DEVMETHOD(ifdi_suspend, ice_if_suspend), DEVMETHOD(ifdi_resume, ice_if_resume), DEVMETHOD(ifdi_needs_restart, ice_if_needs_restart), DEVMETHOD_END }; /** * @var ice_driver * @brief driver structure for the generic device stack * * driver_t definition used to setup the generic device methods. */ static driver_t ice_driver = { .name = "ice", .methods = ice_methods, .size = sizeof(struct ice_softc), }; /** * @var ice_iflib_driver * @brief driver structure for the iflib stack * * driver_t definition used to setup the iflib device methods. */ static driver_t ice_iflib_driver = { .name = "ice", .methods = ice_iflib_methods, .size = sizeof(struct ice_softc), }; extern struct if_txrx ice_txrx; extern struct if_txrx ice_recovery_txrx; /** * @var ice_sctx * @brief ice driver shared context * * Structure defining shared values (context) that is used by all instances of * the device. Primarily used to setup details about how the iflib stack * should treat this driver. Also defines the default, minimum, and maximum * number of descriptors in each ring. */ static struct if_shared_ctx ice_sctx = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = ICE_MAX_FRAME_SIZE, /* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but * that doesn't make sense since that would be larger than the maximum * size of a single packet. */ .isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE, /* XXX: This is only used by iflib to ensure that * scctx->isc_tx_tso_size_max + the VLAN header is a valid size. */ .isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header), /* XXX: This is used by iflib to set the number of segments in the TSO * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the * related ifnet parameter. */ .isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE, .isc_rx_maxsize = ICE_MAX_FRAME_SIZE, .isc_rx_nsegments = ICE_MAX_RX_SEGS, .isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE, .isc_nfl = 1, .isc_ntxqs = 1, .isc_nrxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = ice_vendor_info_array, .isc_driver_version = __DECONST(char *, ice_driver_version), .isc_driver = &ice_iflib_driver, /* * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available * for hardware checksum offload * * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the * IP sum field, required by our hardware to calculate valid TSO * checksums. * * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs * even when the interface is down. * * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X * vectors manually instead of relying on iflib code to do this. */ .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP | IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX, .isc_nrxd_min = {ICE_MIN_DESC_COUNT}, .isc_ntxd_min = {ICE_MIN_DESC_COUNT}, .isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT}, .isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT}, .isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT}, .isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT}, }; DRIVER_MODULE(ice, pci, ice_driver, ice_module_event_handler, NULL); MODULE_VERSION(ice, 1); MODULE_DEPEND(ice, pci, 1, 1, 1); MODULE_DEPEND(ice, ether, 1, 1, 1); MODULE_DEPEND(ice, iflib, 1, 1, 1); IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array); /* Static driver-wide sysctls */ #include "ice_iflib_sysctls.h" /** * ice_pci_mapping - Map PCI BAR memory * @sc: device private softc * * Map PCI BAR 0 for device operation. */ static int ice_pci_mapping(struct ice_softc *sc) { int rc; /* Map BAR0 */ rc = ice_map_bar(sc->dev, &sc->bar0, 0); if (rc) return rc; return 0; } /** * ice_free_pci_mapping - Release PCI BAR memory * @sc: device private softc * * Release PCI BARs which were previously mapped by ice_pci_mapping(). */ static void ice_free_pci_mapping(struct ice_softc *sc) { /* Free BAR0 */ ice_free_bar(sc->dev, &sc->bar0); } /* * Device methods */ /** * ice_register - register device method callback * @dev: the device being registered * * Returns a pointer to the shared context structure, which is used by iflib. */ static void * ice_register(device_t dev __unused) { return &ice_sctx; } /* ice_register */ /** * ice_setup_scctx - Setup the iflib softc context structure * @sc: the device private structure * * Setup the parameters in if_softc_ctx_t structure used by the iflib stack * when loading. */ static void ice_setup_scctx(struct ice_softc *sc) { if_softc_ctx_t scctx = sc->scctx; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; bool safe_mode, recovery_mode; safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE); recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE); /* * If the driver loads in Safe mode or Recovery mode, limit iflib to * a single queue pair. */ if (safe_mode || recovery_mode) { scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1; scctx->isc_ntxqsets_max = 1; scctx->isc_nrxqsets_max = 1; } else { /* * iflib initially sets the isc_ntxqsets and isc_nrxqsets to * the values of the override sysctls. Cache these initial * values so that the driver can be aware of what the iflib * sysctl value is when setting up MSI-X vectors. */ sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets; sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets; if (scctx->isc_ntxqsets == 0) scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size; if (scctx->isc_nrxqsets == 0) scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size; scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq; scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq; /* * Sanity check that the iflib sysctl values are within the * maximum supported range. */ if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max) sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max; if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max) sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max; } scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(struct ice_tx_desc), DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN); scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS; scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS; scctx->isc_tx_tso_size_max = ICE_TSO_SIZE; scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE; scctx->isc_msix_bar = pci_msix_table_bar(dev); scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size; /* * If the driver loads in recovery mode, disable Tx/Rx functionality */ if (recovery_mode) scctx->isc_txrx = &ice_recovery_txrx; else scctx->isc_txrx = &ice_txrx; /* * If the driver loads in Safe mode or Recovery mode, disable * advanced features including hardware offloads. */ if (safe_mode || recovery_mode) { scctx->isc_capenable = ICE_SAFE_CAPS; scctx->isc_tx_csum_flags = 0; } else { scctx->isc_capenable = ICE_FULL_CAPS; scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD; } scctx->isc_capabilities = scctx->isc_capenable; } /* ice_setup_scctx */ /** * ice_if_attach_pre - Early device attach logic * @ctx: the iflib context structure * * Called by iflib during the attach process. Earliest main driver entry * point which performs necessary hardware and driver initialization. Called * before the Tx and Rx queues are allocated. */ static int ice_if_attach_pre(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); enum ice_fw_modes fw_mode; int status; if_softc_ctx_t scctx; struct ice_hw *hw; device_t dev; int err; device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n"); ice_set_state(&sc->state, ICE_STATE_ATTACHING); sc->ctx = ctx; sc->media = iflib_get_media(ctx); sc->sctx = iflib_get_sctx(ctx); sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx); sc->ifp = iflib_get_ifp(ctx); dev = sc->dev = iflib_get_dev(ctx); scctx = sc->scctx = iflib_get_softc_ctx(ctx); hw = &sc->hw; hw->back = sc; snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name), "%s:admin", device_get_nameunit(dev)); mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF); callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0); ASSERT_CTX_LOCKED(sc); if (ice_pci_mapping(sc)) { err = (ENXIO); goto destroy_admin_timer; } /* Save off the PCI information */ ice_save_pci_info(hw, dev); /* create tunables as early as possible */ ice_add_device_tunables(sc); /* Setup ControlQ lengths */ ice_set_ctrlq_len(hw); reinit_hw: fw_mode = ice_get_fw_mode(hw); if (fw_mode == ICE_FW_MODE_REC) { device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); err = ice_attach_pre_recovery_mode(sc); if (err) goto free_pci_mapping; return (0); } /* Initialize the hw data structure */ status = ice_init_hw(hw); if (status) { if (status == ICE_ERR_FW_API_VER) { /* Enter recovery mode, so that the driver remains * loaded. This way, if the system administrator * cannot update the driver, they may still attempt to * downgrade the NVM. */ err = ice_attach_pre_recovery_mode(sc); if (err) goto free_pci_mapping; return (0); } else { err = EIO; device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } goto free_pci_mapping; } ice_init_device_features(sc); /* Keep flag set by default */ ice_set_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN); /* Notify firmware of the device driver version */ err = ice_send_version(sc); if (err) goto deinit_hw; /* * Success indicates a change was made that requires a reinitialization * of the hardware */ err = ice_load_pkg_file(sc); if (!err) { ice_deinit_hw(hw); goto reinit_hw; } err = ice_init_link_events(sc); if (err) { device_printf(dev, "ice_init_link_events failed: %s\n", ice_err_str(err)); goto deinit_hw; } /* Initialize VLAN mode in FW; if dual VLAN mode is supported by the package * and firmware, this will force them to use single VLAN mode. */ status = ice_set_vlan_mode(hw); if (status) { err = EIO; device_printf(dev, "Unable to initialize VLAN mode, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); goto deinit_hw; } ice_print_nvm_version(sc); /* Setup the MAC address */ iflib_set_mac(ctx, hw->port_info->mac.lan_addr); /* Setup the iflib softc context structure */ ice_setup_scctx(sc); /* Initialize the Tx queue manager */ err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq); if (err) { device_printf(dev, "Unable to initialize Tx queue manager: %s\n", ice_err_str(err)); goto deinit_hw; } /* Initialize the Rx queue manager */ err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq); if (err) { device_printf(dev, "Unable to initialize Rx queue manager: %s\n", ice_err_str(err)); goto free_tx_qmgr; } /* Initialize the PF device interrupt resource manager */ err = ice_alloc_intr_tracking(sc); if (err) /* Errors are already printed */ goto free_rx_qmgr; /* Determine maximum number of VSIs we'll prepare for */ sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE, hw->func_caps.guar_num_vsi); if (!sc->num_available_vsi) { err = EIO; device_printf(dev, "No VSIs allocated to host\n"); goto free_intr_tracking; } /* Allocate storage for the VSI pointers */ sc->all_vsi = (struct ice_vsi **) malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi, M_ICE, M_WAITOK | M_ZERO); if (!sc->all_vsi) { err = ENOMEM; device_printf(dev, "Unable to allocate VSI array\n"); goto free_intr_tracking; } /* * Prepare the statically allocated primary PF VSI in the softc * structure. Other VSIs will be dynamically allocated as needed. */ ice_setup_pf_vsi(sc); ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max, scctx->isc_nrxqsets_max); /* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */ err = ice_allocate_msix(sc); if (err) goto free_main_vsi; return 0; free_main_vsi: /* ice_release_vsi will free the queue maps if they were allocated */ ice_release_vsi(&sc->pf_vsi); free(sc->all_vsi, M_ICE); sc->all_vsi = NULL; free_intr_tracking: ice_free_intr_tracking(sc); free_rx_qmgr: ice_resmgr_destroy(&sc->rx_qmgr); free_tx_qmgr: ice_resmgr_destroy(&sc->tx_qmgr); deinit_hw: ice_deinit_hw(hw); free_pci_mapping: ice_free_pci_mapping(sc); destroy_admin_timer: mtx_lock(&sc->admin_mtx); callout_stop(&sc->admin_timer); mtx_unlock(&sc->admin_mtx); mtx_destroy(&sc->admin_mtx); return err; } /* ice_if_attach_pre */ /** * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery * @sc: the device private softc * * Loads the device driver in limited Firmware Recovery mode, intended to * allow users to update the firmware to attempt to recover the device. * * @remark We may enter recovery mode in case either (a) the firmware is * detected to be in an invalid state and must be re-programmed, or (b) the * driver detects that the loaded firmware has a non-compatible API version * that the driver cannot operate with. */ static int ice_attach_pre_recovery_mode(struct ice_softc *sc) { ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE); /* Setup the iflib softc context */ ice_setup_scctx(sc); /* Setup the PF VSI back pointer */ sc->pf_vsi.sc = sc; /* * We still need to allocate MSI-X vectors since we need one vector to * run the administrative admin interrupt */ return ice_allocate_msix(sc); } /** * ice_update_link_status - notify OS of link state change * @sc: device private softc structure * @update_media: true if we should update media even if link didn't change * * Called to notify iflib core of link status changes. Should be called once * during attach_post, and whenever link status changes during runtime. * * This call only updates the currently supported media types if the link * status changed, or if update_media is set to true. */ static void ice_update_link_status(struct ice_softc *sc, bool update_media) { struct ice_hw *hw = &sc->hw; int status; /* Never report link up when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; /* Report link status to iflib only once each time it changes */ if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) { if (sc->link_up) { /* link is up */ uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info); if (!(hw->port_info->phy.link_info_old.link_info & ICE_AQ_LINK_UP)) ice_set_default_local_lldp_mib(sc); iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate); ice_rdma_link_change(sc, LINK_STATE_UP, baudrate); ice_link_up_msg(sc); } else { /* link is down */ iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0); ice_rdma_link_change(sc, LINK_STATE_DOWN, 0); } update_media = true; } /* Update the supported media types */ if (update_media && !ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) { status = ice_add_media_types(sc, sc->media); if (status) device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } } /** * ice_if_attach_post - Late device attach logic * @ctx: the iflib context structure * * Called by iflib to finish up attaching the device. Performs any attach * logic which must wait until after the Tx and Rx queues have been * allocated. */ static int ice_if_attach_post(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); int status; int err; ASSERT_CTX_LOCKED(sc); /* We don't yet support loading if MSI-X is not supported */ if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) { device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n"); return (ENOTSUP); } /* The ifnet structure hasn't yet been initialized when the attach_pre * handler is called, so wait until attach_post to setup the * isc_max_frame_size. */ sc->scctx->isc_max_frame_size = if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; /* * If we are in recovery mode, only perform a limited subset of * initialization to support NVM recovery. */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) { ice_attach_post_recovery_mode(sc); return (0); } sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size; err = ice_initialize_vsi(&sc->pf_vsi); if (err) { device_printf(sc->dev, "Unable to initialize Main VSI: %s\n", ice_err_str(err)); return err; } /* Enable FW health event reporting */ ice_init_health_events(sc); /* Configure the main PF VSI for RSS */ err = ice_config_rss(&sc->pf_vsi); if (err) { device_printf(sc->dev, "Unable to configure RSS for the main VSI, err %s\n", ice_err_str(err)); return err; } /* Configure switch to drop transmitted LLDP and PAUSE frames */ err = ice_cfg_pf_ethertype_filters(sc); if (err) return err; ice_get_and_print_bus_info(sc); ice_set_link_management_mode(sc); ice_init_saved_phy_cfg(sc); ice_cfg_pba_num(sc); /* Set a default value for PFC mode on attach since the FW state is unknown * before sysctl tunables are executed and it can't be queried. This fixes an * issue when loading the driver with the FW LLDP agent enabled but the FW * was previously in DSCP PFC mode. */ status = ice_aq_set_pfc_mode(&sc->hw, ICE_AQC_PFC_VLAN_BASED_PFC, NULL); if (status) device_printf(sc->dev, "Setting pfc mode failed, status %s\n", ice_status_str(status)); ice_add_device_sysctls(sc); /* Get DCBX/LLDP state and start DCBX agent */ ice_init_dcb_setup(sc); - /* Setup link configuration parameters */ - ice_init_link_configuration(sc); - ice_update_link_status(sc, true); + /* Setup link, if PHY FW is ready */ + ice_init_link(sc); /* Configure interrupt causes for the administrative interrupt */ ice_configure_misc_interrupts(sc); /* Enable ITR 0 right away, so that we can handle admin interrupts */ ice_enable_intr(&sc->hw, sc->irqvs[0].me); err = ice_rdma_pf_attach(sc); if (err) return (err); /* Start the admin timer */ mtx_lock(&sc->admin_mtx); callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc); mtx_unlock(&sc->admin_mtx); if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && !ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) ice_set_state(&sc->state, ICE_STATE_FIRST_INIT_LINK); ice_clear_state(&sc->state, ICE_STATE_ATTACHING); return 0; } /* ice_if_attach_post */ /** * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery * @sc: the device private softc * * Performs minimal work to prepare the driver to recover an NVM in case the * firmware is in recovery mode. */ static void ice_attach_post_recovery_mode(struct ice_softc *sc) { /* Configure interrupt causes for the administrative interrupt */ ice_configure_misc_interrupts(sc); /* Enable ITR 0 right away, so that we can handle admin interrupts */ ice_enable_intr(&sc->hw, sc->irqvs[0].me); /* Start the admin timer */ mtx_lock(&sc->admin_mtx); callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc); mtx_unlock(&sc->admin_mtx); ice_clear_state(&sc->state, ICE_STATE_ATTACHING); } /** * ice_free_irqvs - Free IRQ vector memory * @sc: the device private softc structure * * Free IRQ vector memory allocated during ice_if_msix_intr_assign. */ static void ice_free_irqvs(struct ice_softc *sc) { struct ice_vsi *vsi = &sc->pf_vsi; if_ctx_t ctx = sc->ctx; int i; /* If the irqvs array is NULL, then there are no vectors to free */ if (sc->irqvs == NULL) return; /* Free the IRQ vectors */ for (i = 0; i < sc->num_irq_vectors; i++) iflib_irq_free(ctx, &sc->irqvs[i].irq); /* Clear the irqv pointers */ for (i = 0; i < vsi->num_rx_queues; i++) vsi->rx_queues[i].irqv = NULL; for (i = 0; i < vsi->num_tx_queues; i++) vsi->tx_queues[i].irqv = NULL; /* Release the vector array memory */ free(sc->irqvs, M_ICE); sc->irqvs = NULL; sc->num_irq_vectors = 0; } /** * ice_if_detach - Device driver detach logic * @ctx: iflib context structure * * Perform device shutdown logic to detach the device driver. * * Note that there is no guarantee of the ordering of ice_if_queues_free() and * ice_if_detach(). It is possible for the functions to be called in either * order, and they must not assume to have a strict ordering. */ static int ice_if_detach(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; int status; int i; ASSERT_CTX_LOCKED(sc); /* Indicate that we're detaching */ ice_set_state(&sc->state, ICE_STATE_DETACHING); /* Stop the admin timer */ mtx_lock(&sc->admin_mtx); callout_stop(&sc->admin_timer); mtx_unlock(&sc->admin_mtx); mtx_destroy(&sc->admin_mtx); /* Remove additional interfaces if they exist */ if (sc->mirr_if) ice_destroy_mirror_interface(sc); ice_rdma_pf_detach(sc); /* Free allocated media types */ ifmedia_removeall(sc->media); /* Free the Tx and Rx sysctl contexts, and assign NULL to the node * pointers. Note, the calls here and those in ice_if_queues_free() * are *BOTH* necessary, as we cannot guarantee which path will be * run first */ ice_vsi_del_txqs_ctx(vsi); ice_vsi_del_rxqs_ctx(vsi); /* Release MSI-X resources */ ice_free_irqvs(sc); for (i = 0; i < sc->num_available_vsi; i++) { if (sc->all_vsi[i]) ice_release_vsi(sc->all_vsi[i]); } if (sc->all_vsi) { free(sc->all_vsi, M_ICE); sc->all_vsi = NULL; } /* Release MSI-X memory */ pci_release_msi(sc->dev); if (sc->msix_table != NULL) { bus_release_resource(sc->dev, SYS_RES_MEMORY, rman_get_rid(sc->msix_table), sc->msix_table); sc->msix_table = NULL; } ice_free_intr_tracking(sc); /* Destroy the queue managers */ ice_resmgr_destroy(&sc->tx_qmgr); ice_resmgr_destroy(&sc->rx_qmgr); if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) ice_deinit_hw(&sc->hw); IFLIB_CTX_UNLOCK(sc); status = ice_reset(&sc->hw, ICE_RESET_PFR); IFLIB_CTX_LOCK(sc); if (status) { device_printf(sc->dev, "device PF reset failed, err %s\n", ice_status_str(status)); } ice_free_pci_mapping(sc); return 0; } /* ice_if_detach */ /** * ice_if_tx_queues_alloc - Allocate Tx queue memory * @ctx: iflib context structure * @vaddrs: virtual addresses for the queue memory * @paddrs: physical addresses for the queue memory * @ntxqs: the number of Tx queues per set (should always be 1) * @ntxqsets: the number of Tx queue sets to allocate * * Called by iflib to allocate Tx queues for the device. Allocates driver * memory to track each queue, the status arrays used for descriptor * status reporting, and Tx queue sysctls. */ static int ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int __invariant_only ntxqs, int ntxqsets) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_tx_queue *txq; int err, i, j; MPASS(ntxqs == 1); MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT); ASSERT_CTX_LOCKED(sc); /* Do not bother allocating queues if we're in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (0); /* Allocate queue structure memory */ if (!(vsi->tx_queues = (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) { device_printf(sc->dev, "Unable to allocate Tx queue memory\n"); return (ENOMEM); } /* Allocate report status arrays */ for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) { if (!(txq->tx_rsq = (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_NOWAIT))) { device_printf(sc->dev, "Unable to allocate tx_rsq memory\n"); err = ENOMEM; goto free_tx_queues; } /* Initialize report status array */ for (j = 0; j < sc->scctx->isc_ntxd[0]; j++) txq->tx_rsq[j] = QIDX_INVALID; } /* Assign queues from PF space to the main VSI */ err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets); if (err) { device_printf(sc->dev, "Unable to assign PF queues: %s\n", ice_err_str(err)); goto free_tx_queues; } vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS; /* Add Tx queue sysctls context */ ice_vsi_add_txqs_ctx(vsi); for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) { /* q_handle == me when only one TC */ txq->me = txq->q_handle = i; txq->vsi = vsi; /* store the queue size for easier access */ txq->desc_count = sc->scctx->isc_ntxd[0]; /* get the virtual and physical address of the hardware queues */ txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]); txq->tx_base = (struct ice_tx_desc *)vaddrs[i]; txq->tx_paddr = paddrs[i]; ice_add_txq_sysctls(txq); } vsi->num_tx_queues = ntxqsets; return (0); free_tx_queues: for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) { if (txq->tx_rsq != NULL) { free(txq->tx_rsq, M_ICE); txq->tx_rsq = NULL; } } free(vsi->tx_queues, M_ICE); vsi->tx_queues = NULL; return err; } /** * ice_if_rx_queues_alloc - Allocate Rx queue memory * @ctx: iflib context structure * @vaddrs: virtual addresses for the queue memory * @paddrs: physical addresses for the queue memory * @nrxqs: number of Rx queues per set (should always be 1) * @nrxqsets: number of Rx queue sets to allocate * * Called by iflib to allocate Rx queues for the device. Allocates driver * memory to track each queue, as well as sets up the Rx queue sysctls. */ static int ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int __invariant_only nrxqs, int nrxqsets) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_rx_queue *rxq; int err, i; MPASS(nrxqs == 1); MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT); ASSERT_CTX_LOCKED(sc); /* Do not bother allocating queues if we're in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (0); /* Allocate queue structure memory */ if (!(vsi->rx_queues = (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) { device_printf(sc->dev, "Unable to allocate Rx queue memory\n"); return (ENOMEM); } /* Assign queues from PF space to the main VSI */ err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets); if (err) { device_printf(sc->dev, "Unable to assign PF queues: %s\n", ice_err_str(err)); goto free_rx_queues; } vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS; /* Add Rx queue sysctls context */ ice_vsi_add_rxqs_ctx(vsi); for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) { rxq->me = i; rxq->vsi = vsi; /* store the queue size for easier access */ rxq->desc_count = sc->scctx->isc_nrxd[0]; /* get the virtual and physical address of the hardware queues */ rxq->tail = QRX_TAIL(vsi->rx_qmap[i]); rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i]; rxq->rx_paddr = paddrs[i]; ice_add_rxq_sysctls(rxq); } vsi->num_rx_queues = nrxqsets; return (0); free_rx_queues: free(vsi->rx_queues, M_ICE); vsi->rx_queues = NULL; return err; } /** * ice_if_queues_free - Free queue memory * @ctx: the iflib context structure * * Free queue memory allocated by ice_if_tx_queues_alloc() and * ice_if_rx_queues_alloc(). * * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be * called in the same order. It's possible for ice_if_queues_free() to be * called prior to ice_if_detach(), and vice versa. * * For this reason, the main VSI is a static member of the ice_softc, which is * not free'd until after iflib finishes calling both of these functions. * * Thus, care must be taken in how we manage the memory being freed by this * function, and in what tasks it can and must perform. */ static void ice_if_queues_free(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_tx_queue *txq; int i; /* Free the Tx and Rx sysctl contexts, and assign NULL to the node * pointers. Note, the calls here and those in ice_if_detach() * are *BOTH* necessary, as we cannot guarantee which path will be * run first */ ice_vsi_del_txqs_ctx(vsi); ice_vsi_del_rxqs_ctx(vsi); /* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */ ice_free_irqvs(sc); if (vsi->tx_queues != NULL) { /* free the tx_rsq arrays */ for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) { if (txq->tx_rsq != NULL) { free(txq->tx_rsq, M_ICE); txq->tx_rsq = NULL; } } free(vsi->tx_queues, M_ICE); vsi->tx_queues = NULL; vsi->num_tx_queues = 0; } if (vsi->rx_queues != NULL) { free(vsi->rx_queues, M_ICE); vsi->rx_queues = NULL; vsi->num_rx_queues = 0; } } /** * ice_msix_que - Fast interrupt handler for MSI-X receive queues * @arg: The Rx queue memory * * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when * an MSI-X interrupt for a given queue is triggered. Currently this just asks * iflib to schedule the main Rx thread. */ static int ice_msix_que(void *arg) { struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg; /* TODO: dynamic ITR algorithm?? */ return (FILTER_SCHEDULE_THREAD); } /** * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt * @arg: pointer to device softc memory * * Called by iflib when an administrative interrupt occurs. Should perform any * fast logic for handling the interrupt cause, and then indicate whether the * admin task needs to be queued. */ static int ice_msix_admin(void *arg) { struct ice_softc *sc = (struct ice_softc *)arg; struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; u32 oicr; /* There is no safe way to modify the enabled miscellaneous causes of * the OICR vector at runtime, as doing so would be prone to race * conditions. Reading PFINT_OICR will unmask the associated interrupt * causes and allow future interrupts to occur. The admin interrupt * vector will not be re-enabled until after we exit this function, * but any delayed tasks must be resilient against possible "late * arrival" interrupts that occur while we're already handling the * task. This is done by using state bits and serializing these * delayed tasks via the admin status task function. */ oicr = rd32(hw, PFINT_OICR); /* Processing multiple controlq interrupts on a single vector does not * provide an indication of which controlq triggered the interrupt. * We might try reading the INTEVENT bit of the respective PFINT_*_CTL * registers. However, the INTEVENT bit is not guaranteed to be set as * it gets automatically cleared when the hardware acknowledges the * interrupt. * * This means we don't really have a good indication of whether or * which controlq triggered this interrupt. We'll just notify the * admin task that it should check all the controlqs. */ ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING); if (oicr & PFINT_OICR_VFLR_M) { ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING); } if (oicr & PFINT_OICR_MAL_DETECT_M) { ice_set_state(&sc->state, ICE_STATE_MDD_PENDING); } if (oicr & PFINT_OICR_GRST_M) { u32 reset; reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >> GLGEN_RSTAT_RESET_TYPE_S; if (reset == ICE_RESET_CORER) sc->soft_stats.corer_count++; else if (reset == ICE_RESET_GLOBR) sc->soft_stats.globr_count++; else sc->soft_stats.empr_count++; /* There are a couple of bits at play for handling resets. * First, the ICE_STATE_RESET_OICR_RECV bit is used to * indicate that the driver has received an OICR with a reset * bit active, indicating that a CORER/GLOBR/EMPR is about to * happen. Second, we set hw->reset_ongoing to indicate that * the hardware is in reset. We will set this back to false as * soon as the driver has determined that the hardware is out * of reset. * * If the driver wishes to trigger a request, it can set one of * the ICE_STATE_RESET_*_REQ bits, which will trigger the * correct type of reset. */ if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV)) { hw->reset_ongoing = true; /* * During the NVM update process, there is a driver reset and link * goes down and then up. The below if-statement prevents a second * link flap from occurring in ice_if_init(). */ if (if_getflags(sc->ifp) & IFF_UP) ice_set_state(&sc->state, ICE_STATE_FIRST_INIT_LINK); } } if (oicr & PFINT_OICR_ECC_ERR_M) { device_printf(dev, "ECC Error detected!\n"); ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ); } if (oicr & (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M)) { if (oicr & PFINT_OICR_HMC_ERR_M) /* Log the HMC errors */ ice_log_hmc_error(hw, dev); ice_rdma_notify_pe_intr(sc, oicr); } if (oicr & PFINT_OICR_PCI_EXCEPTION_M) { device_printf(dev, "PCI Exception detected!\n"); ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ); } return (FILTER_SCHEDULE_THREAD); } /** * ice_allocate_msix - Allocate MSI-X vectors for the interface * @sc: the device private softc * * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process. * * First, determine a suitable total number of vectors based on the number * of CPUs, RSS buckets, the administrative vector, and other demands such as * RDMA. * * Request the desired amount of vectors, and see how many we obtain. If we * don't obtain as many as desired, reduce the demands by lowering the number * of requested queues or reducing the demand from other features such as * RDMA. * * @remark This function is required because the driver sets the * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors * manually. * * @remark This driver will only use MSI-X vectors. If this is not possible, * neither MSI or legacy interrupts will be tried. * * @remark if it exists, os_imgr is initialized here for keeping track of * the assignments of extra MSIX vectors. * * @post on success this function must set the following scctx parameters: * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr. * * @returns zero on success or an error code on failure. */ static int ice_allocate_msix(struct ice_softc *sc) { bool iflib_override_queue_count = false; if_softc_ctx_t scctx = sc->scctx; device_t dev = sc->dev; cpuset_t cpus; int bar, queues, vectors, requested; int err = 0; int rdma; /* Allocate the MSI-X bar */ bar = scctx->isc_msix_bar; sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE); if (!sc->msix_table) { device_printf(dev, "Unable to map MSI-X table\n"); return (ENOMEM); } /* Check if the iflib queue count sysctls have been set */ if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs) iflib_override_queue_count = true; err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus); if (err) { device_printf(dev, "%s: Unable to fetch the CPU list: %s\n", __func__, ice_err_str(err)); CPU_COPY(&all_cpus, &cpus); } /* Attempt to mimic behavior of iflib_msix_init */ if (iflib_override_queue_count) { /* * If the override sysctls have been set, limit the queues to * the number of logical CPUs. */ queues = mp_ncpus; } else { /* * Otherwise, limit the queue count to the CPUs associated * with the NUMA node the device is associated with. */ queues = CPU_COUNT(&cpus); } /* Clamp to the number of RSS buckets */ queues = imin(queues, rss_getnumbuckets()); /* * Clamp the number of queue pairs to the minimum of the requested Tx * and Rx queues. */ queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets); queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets); if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) { /* * Choose a number of RDMA vectors based on the number of CPUs * up to a maximum */ rdma = min(CPU_COUNT(&cpus), ICE_RDMA_MAX_MSIX); /* Further limit by the user configurable tunable */ rdma = min(rdma, ice_rdma_max_msix); } else { rdma = 0; } /* * Determine the number of vectors to request. Note that we also need * to allocate one vector for administrative tasks. */ requested = rdma + queues + 1; /* Add extra vectors requested by the user for later subinterface * creation. */ if_ctx_t ctx = sc->ctx; u32 extra_vectors = iflib_get_extra_msix_vectors_sysctl(ctx); requested += extra_vectors; vectors = requested; err = pci_alloc_msix(dev, &vectors); if (err) { device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n", vectors, ice_err_str(err)); goto err_free_msix_table; } /* If we don't receive enough vectors, reduce demands */ if (vectors < requested) { int diff = requested - vectors; device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n", requested, vectors); diff += extra_vectors; extra_vectors = 0; /* * The OS didn't grant us the requested number of vectors. * Check to see if we can reduce demands by limiting the * number of vectors allocated to certain features. */ if (rdma >= diff) { /* Reduce the number of RDMA vectors we reserve */ rdma -= diff; diff = 0; } else { /* Disable RDMA and reduce the difference */ ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap); diff -= rdma; rdma = 0; } /* * If we still have a difference, we need to reduce the number * of queue pairs. * * However, we still need at least one vector for the admin * interrupt and one queue pair. */ if (queues <= diff) { device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n"); err = (ERANGE); goto err_pci_release_msi; } queues -= diff; } device_printf(dev, "Using %d Tx and Rx queues\n", queues); if (rdma) device_printf(dev, "Reserving %d MSI-X interrupts for iRDMA\n", rdma); device_printf(dev, "Using MSI-X interrupts with %d vectors\n", vectors); /* Split resulting vectors back into requested splits */ scctx->isc_vectors = vectors; scctx->isc_nrxqsets = queues; scctx->isc_ntxqsets = queues; scctx->isc_intr = IFLIB_INTR_MSIX; sc->irdma_vectors = rdma; /* Interrupt allocation tracking isn't required in recovery mode, * since neither RDMA nor VFs are enabled. */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (0); /* Keep track of which interrupt indices are being used for what */ sc->lan_vectors = vectors - rdma; sc->lan_vectors -= extra_vectors; err = ice_resmgr_assign_contiguous(&sc->dev_imgr, sc->pf_imap, sc->lan_vectors); if (err) { device_printf(dev, "Unable to assign PF interrupt mapping: %s\n", ice_err_str(err)); goto err_pci_release_msi; } err = ice_resmgr_assign_contiguous(&sc->dev_imgr, sc->rdma_imap, rdma); if (err) { device_printf(dev, "Unable to assign PF RDMA interrupt mapping: %s\n", ice_err_str(err)); goto err_release_pf_imap; } sc->extra_vectors = extra_vectors; /* Setup another resource manager to track the assignments of extra OS * vectors. These OS interrupt allocations don't need to be contiguous, * unlike the ones that come from the device. */ err = ice_resmgr_init(&sc->os_imgr, sc->extra_vectors); if (err) { device_printf(dev, "Unable to initialize OS extra interrupt manager: %s\n", ice_err_str(err)); ice_resmgr_release_map(&sc->dev_imgr, sc->rdma_imap, rdma); goto err_release_pf_imap; } return (0); err_release_pf_imap: ice_resmgr_release_map(&sc->dev_imgr, sc->pf_imap, sc->lan_vectors); err_pci_release_msi: pci_release_msi(dev); err_free_msix_table: if (sc->msix_table != NULL) { bus_release_resource(sc->dev, SYS_RES_MEMORY, rman_get_rid(sc->msix_table), sc->msix_table); sc->msix_table = NULL; } return (err); } /** * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues * @ctx: the iflib context structure * @msix: the number of vectors we were assigned * * Called by iflib to assign MSI-X vectors to queues. Currently requires that * we get at least the same number of vectors as we have queues, and that we * always have the same number of Tx and Rx queues. * * Tx queues use a softirq instead of using their own hardware interrupt. */ static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; int err, i, vector; ASSERT_CTX_LOCKED(sc); if (vsi->num_rx_queues != vsi->num_tx_queues) { device_printf(sc->dev, "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n", vsi->num_tx_queues, vsi->num_rx_queues); return (EOPNOTSUPP); } if (msix < (vsi->num_rx_queues + 1)) { device_printf(sc->dev, "Not enough MSI-X vectors to assign one vector to each queue pair\n"); return (EOPNOTSUPP); } /* Save the number of vectors for future use */ sc->num_irq_vectors = vsi->num_rx_queues + 1; /* Allocate space to store the IRQ vector data */ if (!(sc->irqvs = (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors), M_ICE, M_NOWAIT))) { device_printf(sc->dev, "Unable to allocate irqv memory\n"); return (ENOMEM); } /* Administrative interrupt events will use vector 0 */ err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN, ice_msix_admin, sc, 0, "admin"); if (err) { device_printf(sc->dev, "Failed to register Admin queue handler: %s\n", ice_err_str(err)); goto free_irqvs; } sc->irqvs[0].me = 0; /* Do not allocate queue interrupts when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (0); int rid; for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) { struct ice_rx_queue *rxq = &vsi->rx_queues[i]; struct ice_tx_queue *txq = &vsi->tx_queues[i]; char irq_name[16]; rid = vector + 1; snprintf(irq_name, sizeof(irq_name), "rxq%d", i); err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid, IFLIB_INTR_RXTX, ice_msix_que, rxq, rxq->me, irq_name); if (err) { device_printf(sc->dev, "Failed to allocate q int %d err: %s\n", i, ice_err_str(err)); vector--; i--; goto fail; } sc->irqvs[vector].me = vector; rxq->irqv = &sc->irqvs[vector]; bzero(irq_name, sizeof(irq_name)); snprintf(irq_name, sizeof(irq_name), "txq%d", i); iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq, IFLIB_INTR_TX, txq, txq->me, irq_name); txq->irqv = &sc->irqvs[vector]; } /* For future interrupt assignments */ sc->last_rid = rid + sc->irdma_vectors; return (0); fail: for (; i >= 0; i--, vector--) iflib_irq_free(ctx, &sc->irqvs[vector].irq); iflib_irq_free(ctx, &sc->irqvs[0].irq); free_irqvs: free(sc->irqvs, M_ICE); sc->irqvs = NULL; return err; } /** * ice_if_mtu_set - Set the device MTU * @ctx: iflib context structure * @mtu: the MTU requested * * Called by iflib to configure the device's Maximum Transmission Unit (MTU). * * @pre assumes the caller holds the iflib CTX lock */ static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); ASSERT_CTX_LOCKED(sc); /* Do not support configuration when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU) return (EINVAL); sc->scctx->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size; return (0); } /** * ice_if_intr_enable - Enable device interrupts * @ctx: iflib context structure * * Called by iflib to request enabling device interrupts. */ static void ice_if_intr_enable(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; ASSERT_CTX_LOCKED(sc); /* Enable ITR 0 */ ice_enable_intr(hw, sc->irqvs[0].me); /* Do not enable queue interrupts in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; /* Enable all queue interrupts */ for (int i = 0; i < vsi->num_rx_queues; i++) ice_enable_intr(hw, vsi->rx_queues[i].irqv->me); } /** * ice_if_intr_disable - Disable device interrupts * @ctx: iflib context structure * * Called by iflib to request disabling device interrupts. */ static void ice_if_intr_disable(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_hw *hw = &sc->hw; unsigned int i; ASSERT_CTX_LOCKED(sc); /* IFDI_INTR_DISABLE may be called prior to interrupts actually being * assigned to queues. Instead of assuming that the interrupt * assignment in the rx_queues structure is valid, just disable all * possible interrupts * * Note that we choose not to disable ITR 0 because this handles the * AdminQ interrupts, and we want to keep processing these even when * the interface is offline. */ for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++) ice_disable_intr(hw, i); } /** * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt * @ctx: iflib context structure * @rxqid: the Rx queue to enable * * Enable a specific Rx queue interrupt. * * This function is not protected by the iflib CTX lock. */ static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; /* Do not enable queue interrupts in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me); return (0); } /** * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt * @ctx: iflib context structure * @txqid: the Tx queue to enable * * Enable a specific Tx queue interrupt. * * This function is not protected by the iflib CTX lock. */ static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_vsi *vsi = &sc->pf_vsi; struct ice_hw *hw = &sc->hw; /* Do not enable queue interrupts in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me); return (0); } /** * ice_set_default_promisc_mask - Set default config for promisc settings * @promisc_mask: bitmask to setup * * The ice_(set|clear)_vsi_promisc() function expects a mask of promiscuous * modes to operate on. The mask used in here is the default one for the * driver, where promiscuous is enabled/disabled for all types of * non-VLAN-tagged/VLAN 0 traffic. */ static void ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask) { ice_zero_bitmap(promisc_mask, ICE_PROMISC_MAX); ice_set_bit(ICE_PROMISC_UCAST_TX, promisc_mask); ice_set_bit(ICE_PROMISC_UCAST_RX, promisc_mask); ice_set_bit(ICE_PROMISC_MCAST_TX, promisc_mask); ice_set_bit(ICE_PROMISC_MCAST_RX, promisc_mask); } /** * ice_if_promisc_set - Set device promiscuous mode * @ctx: iflib context structure * @flags: promiscuous flags to configure * * Called by iflib to configure device promiscuous mode. * * @remark Calls to this function will always overwrite the previous setting */ static int ice_if_promisc_set(if_ctx_t ctx, int flags) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; int status; bool promisc_enable = flags & IFF_PROMISC; bool multi_enable = flags & IFF_ALLMULTI; ice_declare_bitmap(promisc_mask, ICE_PROMISC_MAX); /* Do not support configuration when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); ice_set_default_promisc_mask(promisc_mask); if (multi_enable) return (EOPNOTSUPP); if (promisc_enable) { status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx, promisc_mask, 0); if (status && status != ICE_ERR_ALREADY_EXISTS) { device_printf(dev, "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } else { status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx, promisc_mask, 0); if (status) { device_printf(dev, "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); return (EIO); } } return (0); } /** * ice_if_media_change - Change device media * @ctx: device ctx structure * * Called by iflib when a media change is requested. This operation is not * supported by the hardware, so we just return an error code. */ static int ice_if_media_change(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); device_printf(sc->dev, "Media change is not supported.\n"); return (ENODEV); } /** * ice_if_media_status - Report current device media * @ctx: iflib context structure * @ifmr: ifmedia request structure to update * * Updates the provided ifmr with current device media status, including link * status and media type. */ static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ice_link_status *li = &sc->hw.port_info->phy.link_info; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; /* Never report link up or media types when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; if (!sc->link_up) return; ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_FDX; if (li->phy_type_low) ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low); else if (li->phy_type_high) ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high); else ifmr->ifm_active |= IFM_UNKNOWN; /* Report flow control status as well */ if (li->an_info & ICE_AQ_LINK_PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; if (li->an_info & ICE_AQ_LINK_PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; } /** * ice_init_tx_tracking - Initialize Tx queue software tracking values * @vsi: the VSI to initialize * * Initialize Tx queue software tracking values, including the Report Status * queue, and related software tracking values. */ static void ice_init_tx_tracking(struct ice_vsi *vsi) { struct ice_tx_queue *txq; size_t j; int i; for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) { txq->tx_rs_cidx = txq->tx_rs_pidx = 0; /* Initialize the last processed descriptor to be the end of * the ring, rather than the start, so that we avoid an * off-by-one error in ice_ift_txd_credits_update for the * first packet. */ txq->tx_cidx_processed = txq->desc_count - 1; for (j = 0; j < txq->desc_count; j++) txq->tx_rsq[j] = QIDX_INVALID; } } /** * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues * @sc: the device softc * * Called to update the Rx queue mbuf_sz parameter for configuring the receive * buffer sizes when programming hardware. */ static void ice_update_rx_mbuf_sz(struct ice_softc *sc) { uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx); struct ice_vsi *vsi = &sc->pf_vsi; MPASS(mbuf_sz <= UINT16_MAX); vsi->mbuf_sz = mbuf_sz; } /** * ice_if_init - Initialize the device * @ctx: iflib ctx structure * * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes * device filters and prepares the Tx and Rx engines. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_init(if_ctx_t ctx) { struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx); struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); device_t dev = sc->dev; int err; ASSERT_CTX_LOCKED(sc); /* * We've seen an issue with 11.3/12.1 where sideband routines are * called after detach is called. This would call routines after * if_stop, causing issues with the teardown process. This has * seemingly been fixed in STABLE snapshots, but it seems like a * good idea to have this guard here regardless. */ if (ice_driver_is_detaching(sc)) return; if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) { device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n"); return; } if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) { device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n"); return; } ice_update_rx_mbuf_sz(sc); /* Update the MAC address... User might use a LAA */ err = ice_update_laa_mac(sc); if (err) { device_printf(dev, "LAA address change failed, err %s\n", ice_err_str(err)); return; } /* Initialize software Tx tracking values */ ice_init_tx_tracking(&sc->pf_vsi); err = ice_cfg_vsi_for_tx(&sc->pf_vsi); if (err) { device_printf(dev, "Unable to configure the main VSI for Tx: %s\n", ice_err_str(err)); return; } err = ice_cfg_vsi_for_rx(&sc->pf_vsi); if (err) { device_printf(dev, "Unable to configure the main VSI for Rx: %s\n", ice_err_str(err)); goto err_cleanup_tx; } err = ice_control_all_rx_queues(&sc->pf_vsi, true); if (err) { device_printf(dev, "Unable to enable Rx rings for transmit: %s\n", ice_err_str(err)); goto err_cleanup_tx; } err = ice_cfg_pf_default_mac_filters(sc); if (err) { device_printf(dev, "Unable to configure default MAC filters: %s\n", ice_err_str(err)); goto err_stop_rx; } /* We use software interrupts for Tx, so we only program the hardware * interrupts for Rx. */ ice_configure_all_rxq_interrupts(&sc->pf_vsi); ice_configure_rx_itr(&sc->pf_vsi); /* Configure promiscuous mode */ ice_if_promisc_set(ctx, if_getflags(sc->ifp)); if (!ice_testandclear_state(&sc->state, ICE_STATE_FIRST_INIT_LINK)) if (!sc->link_up && ((if_getflags(sc->ifp) & IFF_UP) || ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN))) ice_set_link(sc, true); ice_rdma_pf_init(sc); ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED); if (sc->mirr_if && ice_testandclear_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT)) { ice_clear_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED); iflib_request_reset(sc->mirr_if->subctx); iflib_admin_intr_deferred(sc->mirr_if->subctx); } return; err_stop_rx: ice_control_all_rx_queues(&sc->pf_vsi, false); err_cleanup_tx: ice_vsi_disable_tx(&sc->pf_vsi); } /** * ice_poll_for_media_avail - Re-enable link if media is detected * @sc: device private structure * * Intended to be called from the driver's timer function, this function * sends the Get Link Status AQ command and re-enables HW link if the * command says that media is available. * * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing, * since media removal events are supposed to be sent to the driver through * a link status event. */ static void ice_poll_for_media_avail(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; struct ice_port_info *pi = hw->port_info; + /* E830 only: There's no interrupt for when the PHY FW has finished loading, + * so poll for the status in the media task here if it's previously + * been detected that it's still loading. + */ + if (ice_is_e830(hw) && + ice_test_state(&sc->state, ICE_STATE_PHY_FW_INIT_PENDING)) { + if (rd32(hw, GL_MNG_FWSM) & GL_MNG_FWSM_FW_LOADING_M) + ice_clear_state(&sc->state, ICE_STATE_PHY_FW_INIT_PENDING); + else + return; + } + if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) { pi->phy.get_link_info = true; ice_get_link_status(pi, &sc->link_up); if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { int status; /* Re-enable link and re-apply user link settings */ if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) || (if_getflags(sc->ifp) & IFF_UP)) { ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC); /* Update the OS about changes in media capability */ status = ice_add_media_types(sc, sc->media); if (status) device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA); } } } /** * ice_if_timer - called by iflib periodically * @ctx: iflib ctx structure * @qid: the queue this timer was called for * * This callback is triggered by iflib periodically. We use it to update the * hw statistics. * * @remark this function is not protected by the iflib CTX lock. */ static void ice_if_timer(if_ctx_t ctx, uint16_t qid) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx; if (qid != 0) return; /* Do not attempt to update stats when in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; /* Update device statistics */ ice_update_pf_stats(sc); /* * For proper watchdog management, the iflib stack needs to know if * we've been paused during the last interval. Check if the * link_xoff_rx stat changed, and set the isc_pause_frames, if so. */ if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx) sc->scctx->isc_pause_frames = 1; /* Update the primary VSI stats */ ice_update_vsi_hw_stats(&sc->pf_vsi); /* Update mirror VSI stats */ if (sc->mirr_if && sc->mirr_if->if_attached) ice_update_vsi_hw_stats(sc->mirr_if->vsi); } /** * ice_admin_timer - called periodically to trigger the admin task * @arg: callout(9) argument pointing to the device private softc structure * * Timer function used as part of a callout(9) timer that will periodically * trigger the admin task, even when the interface is down. * * @remark this function is not called by iflib and is not protected by the * iflib CTX lock. * * @remark because this is a callout function, it cannot sleep and should not * attempt taking the iflib CTX lock. */ static void ice_admin_timer(void *arg) { struct ice_softc *sc = (struct ice_softc *)arg; /* * There is a point where callout routines are no longer * cancelable. So there exists a window of time where the * driver enters detach() and tries to cancel the callout, but the * callout routine has passed the cancellation point. The detach() * routine is unaware of this and tries to free resources that the * callout routine needs. So we check for the detach state flag to * at least shrink the window of opportunity. */ if (ice_driver_is_detaching(sc)) return; /* Fire off the admin task */ iflib_admin_intr_deferred(sc->ctx); /* Reschedule the admin timer */ callout_schedule(&sc->admin_timer, hz/2); } /** * ice_transition_recovery_mode - Transition to recovery mode * @sc: the device private softc * * Called when the driver detects that the firmware has entered recovery mode * at run time. */ static void ice_transition_recovery_mode(struct ice_softc *sc) { struct ice_vsi *vsi = &sc->pf_vsi; int i; device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); /* Tell the stack that the link has gone down */ iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0); /* Request that the device be re-initialized */ ice_request_stack_reinit(sc); ice_rdma_pf_detach(sc); ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap); ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en); ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap); ice_vsi_del_txqs_ctx(vsi); ice_vsi_del_rxqs_ctx(vsi); for (i = 0; i < sc->num_available_vsi; i++) { if (sc->all_vsi[i]) ice_release_vsi(sc->all_vsi[i]); } sc->num_available_vsi = 0; if (sc->all_vsi) { free(sc->all_vsi, M_ICE); sc->all_vsi = NULL; } /* Destroy the interrupt manager */ ice_resmgr_destroy(&sc->dev_imgr); /* Destroy the queue managers */ ice_resmgr_destroy(&sc->tx_qmgr); ice_resmgr_destroy(&sc->rx_qmgr); ice_deinit_hw(&sc->hw); } /** * ice_transition_safe_mode - Transition to safe mode * @sc: the device private softc * * Called when the driver attempts to reload the DDP package during a device * reset, and the new download fails. If so, we must transition to safe mode * at run time. * * @remark although safe mode normally allocates only a single queue, we can't * change the number of queues dynamically when using iflib. Due to this, we * do not attempt to reduce the number of queues. */ static void ice_transition_safe_mode(struct ice_softc *sc) { /* Indicate that we are in Safe mode */ ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap); ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en); ice_rdma_pf_detach(sc); ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap); ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en); ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap); ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap); ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en); } /** * ice_if_update_admin_status - update admin status * @ctx: iflib ctx structure * * Called by iflib to update the admin status. For our purposes, this means * check the adminq, and update the link status. It's ultimately triggered by * our admin interrupt, or by the ice_if_timer periodically. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_update_admin_status(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); enum ice_fw_modes fw_mode; bool reschedule = false; u16 pending = 0; ASSERT_CTX_LOCKED(sc); /* Check if the firmware entered recovery mode at run time */ fw_mode = ice_get_fw_mode(&sc->hw); if (fw_mode == ICE_FW_MODE_REC) { if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) { /* If we just entered recovery mode, log a warning to * the system administrator and deinit driver state * that is no longer functional. */ ice_transition_recovery_mode(sc); } } else if (fw_mode == ICE_FW_MODE_ROLLBACK) { if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) { /* Rollback mode isn't fatal, but we don't want to * repeatedly post a message about it. */ ice_print_rollback_msg(&sc->hw); } } /* Handle global reset events */ ice_handle_reset_event(sc); /* Handle PF reset requests */ ice_handle_pf_reset_request(sc); /* Handle MDD events */ ice_handle_mdd_event(sc); if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) || ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) || ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) { /* * If we know the control queues are disabled, skip processing * the control queues entirely. */ ; } else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) { ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending); if (pending > 0) reschedule = true; if (ice_is_generic_mac(&sc->hw)) { ice_process_ctrlq(sc, ICE_CTL_Q_SB, &pending); if (pending > 0) reschedule = true; } ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending); if (pending > 0) reschedule = true; } /* Poll for link up */ ice_poll_for_media_avail(sc); /* Check and update link status */ ice_update_link_status(sc, false); /* * If there are still messages to process, we need to reschedule * ourselves. Otherwise, we can just re-enable the interrupt. We'll be * woken up at the next interrupt or timer event. */ if (reschedule) { ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING); iflib_admin_intr_deferred(ctx); } else { ice_enable_intr(&sc->hw, sc->irqvs[0].me); } } /** * ice_prepare_for_reset - Prepare device for an impending reset * @sc: The device private softc * * Prepare the driver for an impending reset, shutting down VSIs, clearing the * scheduler setup, and shutting down controlqs. Uses the * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the * driver for reset or not. */ static void ice_prepare_for_reset(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; /* If we're already prepared, there's nothing to do */ if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) return; log(LOG_INFO, "%s: preparing to reset device logic\n", if_name(sc->ifp)); /* In recovery mode, hardware is not initialized */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; /* inform the RDMA client */ ice_rdma_notify_reset(sc); /* stop the RDMA client */ ice_rdma_pf_stop(sc); /* Release the main PF VSI queue mappings */ ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap, sc->pf_vsi.num_tx_queues); ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap, sc->pf_vsi.num_rx_queues); if (sc->mirr_if) { ice_resmgr_release_map(&sc->tx_qmgr, sc->mirr_if->vsi->tx_qmap, sc->mirr_if->num_irq_vectors); ice_resmgr_release_map(&sc->rx_qmgr, sc->mirr_if->vsi->rx_qmap, sc->mirr_if->num_irq_vectors); } ice_clear_hw_tbls(hw); if (hw->port_info) ice_sched_cleanup_all(hw); ice_shutdown_all_ctrlq(hw, false); } /** * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping * @sc: the device softc pointer * * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue * mapping after a reset occurred. */ static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc) { struct ice_vsi *vsi = &sc->pf_vsi; struct ice_tx_queue *txq; struct ice_rx_queue *rxq; int err, i; /* Re-assign Tx queues from PF space to the main VSI */ err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, vsi->num_tx_queues); if (err) { device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n", ice_err_str(err)); return (err); } /* Re-assign Rx queues from PF space to this VSI */ err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, vsi->num_rx_queues); if (err) { device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n", ice_err_str(err)); goto err_release_tx_queues; } vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS; /* Re-assign Tx queue tail pointers */ for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]); /* Re-assign Rx queue tail pointers */ for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++) rxq->tail = QRX_TAIL(vsi->rx_qmap[i]); return (0); err_release_tx_queues: ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap, sc->pf_vsi.num_tx_queues); return (err); } /* determine if the iflib context is active */ #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING)) /** * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode * @sc: The device private softc * * Handle a driver rebuild while in recovery mode. This will only rebuild the * limited functionality supported while in recovery mode. */ static void ice_rebuild_recovery_mode(struct ice_softc *sc) { device_t dev = sc->dev; /* enable PCIe bus master */ pci_enable_busmaster(dev); /* Configure interrupt causes for the administrative interrupt */ ice_configure_misc_interrupts(sc); /* Enable ITR 0 right away, so that we can handle admin interrupts */ ice_enable_intr(&sc->hw, sc->irqvs[0].me); /* Now that the rebuild is finished, we're no longer prepared to reset */ ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET); log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp)); /* In order to completely restore device functionality, the iflib core * needs to be reset. We need to request an iflib reset. Additionally, * because the state of IFC_DO_RESET is cached within task_fn_admin in * the iflib core, we also want re-run the admin task so that iflib * resets immediately instead of waiting for the next interrupt. */ ice_request_stack_reinit(sc); return; } /** * ice_rebuild - Rebuild driver state post reset * @sc: The device private softc * * Restore driver state after a reset occurred. Restart the controlqs, setup * the hardware port, and re-enable the VSIs. */ static void ice_rebuild(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; device_t dev = sc->dev; enum ice_ddp_state pkg_state; int status; int err; sc->rebuild_ticks = ticks; /* If we're rebuilding, then a reset has succeeded. */ ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED); /* * If the firmware is in recovery mode, only restore the limited * functionality supported by recovery mode. */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) { ice_rebuild_recovery_mode(sc); return; } /* enable PCIe bus master */ pci_enable_busmaster(dev); status = ice_init_all_ctrlq(hw); if (status) { device_printf(dev, "failed to re-init controlqs, err %s\n", ice_status_str(status)); goto err_shutdown_ctrlq; } /* Query the allocated resources for Tx scheduler */ status = ice_sched_query_res_alloc(hw); if (status) { device_printf(dev, "Failed to query scheduler resources, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); goto err_shutdown_ctrlq; } /* Re-enable FW logging. Keep going even if this fails */ status = ICE_SUCCESS; if (hw->pf_id == 0) status = ice_fwlog_set(hw, &hw->fwlog_cfg); if (!status) { /* * We should have the most updated cached copy of the * configuration, regardless of whether we're rebuilding * or not. So we'll simply check to see if logging was * enabled pre-rebuild. */ if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) { status = ice_fwlog_register(hw); if (status) device_printf(dev, "failed to re-register fw logging, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); } } else device_printf(dev, "failed to rebuild fw logging configuration, err %s aq_err %s\n", ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status)); err = ice_send_version(sc); if (err) goto err_shutdown_ctrlq; err = ice_init_link_events(sc); if (err) { device_printf(dev, "ice_init_link_events failed: %s\n", ice_err_str(err)); goto err_shutdown_ctrlq; } status = ice_clear_pf_cfg(hw); if (status) { device_printf(dev, "failed to clear PF configuration, err %s\n", ice_status_str(status)); goto err_shutdown_ctrlq; } ice_clean_all_vsi_rss_cfg(sc); ice_clear_pxe_mode(hw); status = ice_get_caps(hw); if (status) { device_printf(dev, "failed to get capabilities, err %s\n", ice_status_str(status)); goto err_shutdown_ctrlq; } status = ice_sched_init_port(hw->port_info); if (status) { device_printf(dev, "failed to initialize port, err %s\n", ice_status_str(status)); goto err_sched_cleanup; } /* If we previously loaded the package, it needs to be reloaded now */ if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) { pkg_state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size); if (!ice_is_init_pkg_successful(pkg_state)) { ice_log_pkg_init(sc, pkg_state); ice_transition_safe_mode(sc); } } ice_reset_pf_stats(sc); err = ice_rebuild_pf_vsi_qmap(sc); if (err) { device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n", ice_err_str(err)); goto err_sched_cleanup; } err = ice_initialize_vsi(&sc->pf_vsi); if (err) { device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n", ice_err_str(err)); goto err_release_queue_allocations; } /* Replay all VSI configuration */ err = ice_replay_all_vsi_cfg(sc); if (err) goto err_deinit_pf_vsi; /* Re-enable FW health event reporting */ ice_init_health_events(sc); /* Reconfigure the main PF VSI for RSS */ err = ice_config_rss(&sc->pf_vsi); if (err) { device_printf(sc->dev, "Unable to reconfigure RSS for the main VSI, err %s\n", ice_err_str(err)); goto err_deinit_pf_vsi; } if (hw->port_info->qos_cfg.is_sw_lldp) ice_add_rx_lldp_filter(sc); - /* Refresh link status */ + /* Apply previous link settings and refresh link status, if PHY + * FW is ready. + */ ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED); - sc->hw.port_info->phy.get_link_info = true; - ice_get_link_status(sc->hw.port_info, &sc->link_up); - ice_update_link_status(sc, true); + ice_init_link(sc); /* RDMA interface will be restarted by the stack re-init */ /* Configure interrupt causes for the administrative interrupt */ ice_configure_misc_interrupts(sc); /* Enable ITR 0 right away, so that we can handle admin interrupts */ ice_enable_intr(&sc->hw, sc->irqvs[0].me); /* Now that the rebuild is finished, we're no longer prepared to reset */ ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET); /* Reconfigure the subinterface */ if (sc->mirr_if) { err = ice_subif_rebuild(sc); if (err) goto err_deinit_pf_vsi; } log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp)); /* In order to completely restore device functionality, the iflib core * needs to be reset. We need to request an iflib reset. Additionally, * because the state of IFC_DO_RESET is cached within task_fn_admin in * the iflib core, we also want re-run the admin task so that iflib * resets immediately instead of waiting for the next interrupt. * If LLDP is enabled we need to reconfig DCB to properly reinit all TC * queues, not only 0. It contains ice_request_stack_reinit as well. */ if (hw->port_info->qos_cfg.is_sw_lldp) ice_request_stack_reinit(sc); else ice_do_dcb_reconfig(sc, false); return; err_deinit_pf_vsi: ice_deinit_vsi(&sc->pf_vsi); err_release_queue_allocations: ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap, sc->pf_vsi.num_tx_queues); ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap, sc->pf_vsi.num_rx_queues); err_sched_cleanup: ice_sched_cleanup_all(hw); err_shutdown_ctrlq: ice_shutdown_all_ctrlq(hw, false); ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET); ice_set_state(&sc->state, ICE_STATE_RESET_FAILED); device_printf(dev, "Driver rebuild failed, please reload the device driver\n"); } /** * ice_handle_reset_event - Handle reset events triggered by OICR * @sc: The device private softc * * Handle reset events triggered by an OICR notification. This includes CORER, * GLOBR, and EMPR resets triggered by software on this or any other PF or by * firmware. * * @pre assumes the iflib context lock is held, and will unlock it while * waiting for the hardware to finish reset. */ static void ice_handle_reset_event(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; int status; device_t dev = sc->dev; /* When a CORER, GLOBR, or EMPR is about to happen, the hardware will * trigger an OICR interrupt. Our OICR handler will determine when * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as * appropriate. */ if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV)) return; ice_prepare_for_reset(sc); /* * Release the iflib context lock and wait for the device to finish * resetting. */ IFLIB_CTX_UNLOCK(sc); + +#define ICE_EMPR_ADDL_WAIT_MSEC_SLOW 20000 + if ((ice_is_e830(hw) || ice_is_e825c(hw)) && + (((rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >> + GLGEN_RSTAT_RESET_TYPE_S) == ICE_RESET_EMPR)) + ice_msec_pause(ICE_EMPR_ADDL_WAIT_MSEC_SLOW); + status = ice_check_reset(hw); IFLIB_CTX_LOCK(sc); if (status) { device_printf(dev, "Device never came out of reset, err %s\n", ice_status_str(status)); + ice_set_state(&sc->state, ICE_STATE_RESET_FAILED); + ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ); + ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET); + device_printf(dev, "Reset failed; please reload the device driver\n"); return; } /* We're done with the reset, so we can rebuild driver state */ sc->hw.reset_ongoing = false; ice_rebuild(sc); /* In the unlikely event that a PF reset request occurs at the same * time as a global reset, clear the request now. This avoids * resetting a second time right after we reset due to a global event. */ if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ)) device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n"); } /** * ice_handle_pf_reset_request - Initiate PF reset requested by software * @sc: The device private softc * * Initiate a PF reset requested by software. We handle this in the admin task * so that only one thread actually handles driver preparation and cleanup, * rather than having multiple threads possibly attempt to run this code * simultaneously. * * @pre assumes the iflib context lock is held and will unlock it while * waiting for the PF reset to complete. */ static void ice_handle_pf_reset_request(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; int status; /* Check for PF reset requests */ if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ)) return; /* Make sure we're prepared for reset */ ice_prepare_for_reset(sc); /* * Release the iflib context lock and wait for the device to finish * resetting. */ IFLIB_CTX_UNLOCK(sc); status = ice_reset(hw, ICE_RESET_PFR); IFLIB_CTX_LOCK(sc); if (status) { device_printf(sc->dev, "device PF reset failed, err %s\n", ice_status_str(status)); ice_set_state(&sc->state, ICE_STATE_RESET_FAILED); return; } sc->soft_stats.pfr_count++; ice_rebuild(sc); } /** * ice_init_device_features - Init device driver features * @sc: driver softc structure * * @pre assumes that the function capabilities bits have been set up by * ice_init_hw(). */ static void ice_init_device_features(struct ice_softc *sc) { struct ice_hw *hw = &sc->hw; /* Set capabilities that all devices support */ ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap); ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap); ice_set_bit(ICE_FEATURE_RDMA, sc->feat_cap); ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap); ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap); ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap); ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap); ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap); ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_cap); ice_set_bit(ICE_FEATURE_DCB, sc->feat_cap); ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap); ice_set_bit(ICE_FEATURE_PHY_STATISTICS, sc->feat_cap); if (ice_is_e810(hw)) ice_set_bit(ICE_FEATURE_PHY_STATISTICS, sc->feat_en); - /* Set capabilities based on device */ - switch (hw->device_id) { - case ICE_DEV_ID_E825C_BACKPLANE: - case ICE_DEV_ID_E825C_QSFP: - case ICE_DEV_ID_E825C_SFP: + if (ice_is_e825c(hw)) ice_set_bit(ICE_FEATURE_DUAL_NAC, sc->feat_cap); - break; - default: - break; - } /* Disable features due to hardware limitations... */ if (!hw->func_caps.common_cap.rss_table_size) ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap); if (!hw->func_caps.common_cap.iwarp || !ice_enable_irdma || ice_is_e830(hw)) ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap); if (!hw->func_caps.common_cap.dcb) ice_clear_bit(ICE_FEATURE_DCB, sc->feat_cap); /* Disable features due to firmware limitations... */ if (!ice_is_fw_health_report_supported(hw)) ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap); if (!ice_fwlog_supported(hw)) ice_clear_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap); if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) { if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_FW_LOGGING)) ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_en); else ice_fwlog_unregister(hw); } /* Disable capabilities not supported by the OS */ ice_disable_unsupported_features(sc->feat_cap); /* RSS is always enabled for iflib */ if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS)) ice_set_bit(ICE_FEATURE_RSS, sc->feat_en); /* Disable features based on sysctl settings */ if (!ice_tx_balance_en) ice_clear_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap); if (hw->dev_caps.supported_sensors & ICE_SENSOR_SUPPORT_E810_INT_TEMP) { ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_cap); ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_en); } if (hw->func_caps.common_cap.next_cluster_id_support || hw->dev_caps.common_cap.next_cluster_id_support) { ice_set_bit(ICE_FEATURE_NEXT_CLUSTER_ID, sc->feat_cap); ice_set_bit(ICE_FEATURE_NEXT_CLUSTER_ID, sc->feat_en); } } /** * ice_if_multi_set - Callback to update Multicast filters in HW * @ctx: iflib ctx structure * * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search * the if_multiaddrs list and determine which filters have been added or * removed from the list, and update HW programming to reflect the new list. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_multi_set(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); int err; ASSERT_CTX_LOCKED(sc); /* Do not handle multicast configuration in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; err = ice_sync_multicast_filters(sc); if (err) { device_printf(sc->dev, "Failed to synchronize multicast filter list: %s\n", ice_err_str(err)); return; } } /** * ice_if_vlan_register - Register a VLAN with the hardware * @ctx: iflib ctx pointer * @vtag: VLAN to add * * Programs the main PF VSI with a hardware filter for the given VLAN. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); int status; ASSERT_CTX_LOCKED(sc); /* Do not handle VLAN configuration in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag); if (status) { device_printf(sc->dev, "Failure adding VLAN %d to main VSI, err %s aq_err %s\n", vtag, ice_status_str(status), ice_aq_str(sc->hw.adminq.sq_last_status)); } } /** * ice_if_vlan_unregister - Remove a VLAN filter from the hardware * @ctx: iflib ctx pointer * @vtag: VLAN to add * * Removes the previously programmed VLAN filter from the main PF VSI. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); int status; ASSERT_CTX_LOCKED(sc); /* Do not handle VLAN configuration in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag); if (status) { device_printf(sc->dev, "Failure removing VLAN %d from main VSI, err %s aq_err %s\n", vtag, ice_status_str(status), ice_aq_str(sc->hw.adminq.sq_last_status)); } } /** * ice_if_stop - Stop the device * @ctx: iflib context structure * * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0 * down) * * @pre assumes the caller holds the iflib CTX lock */ static void ice_if_stop(if_ctx_t ctx) { struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx); struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); ASSERT_CTX_LOCKED(sc); /* * The iflib core may call IFDI_STOP prior to the first call to * IFDI_INIT. This will cause us to attempt to remove MAC filters we * don't have, and disable Tx queues which aren't yet configured. * Although it is likely these extra operations are harmless, they do * cause spurious warning messages to be displayed, which may confuse * users. * * To avoid these messages, we use a state bit indicating if we've * been initialized. It will be set when ice_if_init is called, and * cleared here in ice_if_stop. */ if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED)) return; if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) { device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n"); return; } if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) { device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n"); return; } ice_rdma_pf_stop(sc); /* Remove the MAC filters, stop Tx, and stop Rx. We don't check the * return of these functions because there's nothing we can really do * if they fail, and the functions already print error messages. * Just try to shut down as much as we can. */ ice_rm_pf_default_mac_filters(sc); /* Dissociate the Tx and Rx queues from the interrupts */ ice_flush_txq_interrupts(&sc->pf_vsi); ice_flush_rxq_interrupts(&sc->pf_vsi); /* Disable the Tx and Rx queues */ ice_vsi_disable_tx(&sc->pf_vsi); ice_control_all_rx_queues(&sc->pf_vsi, false); if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && !(if_getflags(sc->ifp) & IFF_UP) && sc->link_up) ice_set_link(sc, false); if (sc->mirr_if && ice_test_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT)) { ice_subif_if_stop(sc->mirr_if->subctx); device_printf(sc->dev, "The subinterface also comes down and up after reset\n"); } } /** * ice_if_get_counter - Get current value of an ifnet statistic * @ctx: iflib context pointer * @counter: ifnet counter to read * * Reads the current value of an ifnet counter for the device. * * This function is not protected by the iflib CTX lock. */ static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); /* Return the counter for the main PF VSI */ return ice_get_ifnet_counter(&sc->pf_vsi, counter); } /** * ice_request_stack_reinit - Request that iflib re-initialize * @sc: the device private softc * * Request that the device be brought down and up, to re-initialize. For * example, this may be called when a device reset occurs, or when Tx and Rx * queues need to be re-initialized. * * This is required because the iflib state is outside the driver, and must be * re-initialized if we need to resart Tx and Rx queues. */ void ice_request_stack_reinit(struct ice_softc *sc) { if (CTX_ACTIVE(sc->ctx)) { iflib_request_reset(sc->ctx); iflib_admin_intr_deferred(sc->ctx); } } /** * ice_driver_is_detaching - Check if the driver is detaching/unloading * @sc: device private softc * * Returns true if the driver is detaching, false otherwise. * * @remark on newer kernels, take advantage of iflib_in_detach in order to * report detachment correctly as early as possible. * * @remark this function is used by various code paths that want to avoid * running if the driver is about to be removed. This includes sysctls and * other driver access points. Note that it does not fully resolve * detach-based race conditions as it is possible for a thread to race with * iflib_in_detach. */ bool ice_driver_is_detaching(struct ice_softc *sc) { return (ice_test_state(&sc->state, ICE_STATE_DETACHING) || iflib_in_detach(sc->ctx)); } /** * ice_if_priv_ioctl - Device private ioctl handler * @ctx: iflib context pointer * @command: The ioctl command issued * @data: ioctl specific data * * iflib callback for handling custom driver specific ioctls. * * @pre Assumes that the iflib context lock is held. */ static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); struct ifdrv *ifd; device_t dev = sc->dev; if (data == NULL) return (EINVAL); ASSERT_CTX_LOCKED(sc); /* Make sure the command type is valid */ switch (command) { case SIOCSDRVSPEC: case SIOCGDRVSPEC: /* Accepted commands */ break; case SIOCGPRIVATE_0: /* * Although we do not support this ioctl command, it's * expected that iflib will forward it to the IFDI_PRIV_IOCTL * handler. Do not print a message in this case */ return (ENOTSUP); default: /* * If we get a different command for this function, it's * definitely unexpected, so log a message indicating what * command we got for debugging purposes. */ device_printf(dev, "%s: unexpected ioctl command %08lx\n", __func__, command); return (EINVAL); } ifd = (struct ifdrv *)data; switch (ifd->ifd_cmd) { case ICE_NVM_ACCESS: return ice_handle_nvm_access_ioctl(sc, ifd); case ICE_DEBUG_DUMP: return ice_handle_debug_dump_ioctl(sc, ifd); default: return EINVAL; } } /** * ice_if_i2c_req - I2C request handler for iflib * @ctx: iflib context pointer * @req: The I2C parameters to use * * Read from the port's I2C eeprom using the parameters from the ioctl. * * @remark The iflib-only part is pretty simple. */ static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); return ice_handle_i2c_req(sc, req); } /** * ice_if_suspend - PCI device suspend handler for iflib * @ctx: iflib context pointer * * Deinitializes the driver and clears HW resources in preparation for * suspend or an FLR. * * @returns 0; this return value is ignored */ static int ice_if_suspend(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); /* At least a PFR is always going to happen after this; * either via FLR or during the D3->D0 transition. */ ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ); ice_prepare_for_reset(sc); return (0); } /** * ice_if_resume - PCI device resume handler for iflib * @ctx: iflib context pointer * * Reinitializes the driver and the HW after PCI resume or after * an FLR. An init is performed by iflib after this function is finished. * * @returns 0; this return value is ignored */ static int ice_if_resume(if_ctx_t ctx) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); ice_rebuild(sc); return (0); } /** * ice_if_needs_restart - Tell iflib when the driver needs to be reinitialized * @ctx: iflib context pointer * @event: event code to check * * Defaults to returning true for unknown events. * * @returns true if iflib needs to reinit the interface */ static bool ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event) { struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx); switch (event) { case IFLIB_RESTART_VLAN_CONFIG: if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && !(if_getflags(sc->ifp) & IFF_UP)) return false; default: return true; } } +/** + * ice_init_link - Do link configuration and link status reporting + * @sc: driver private structure + * + * Contains an extra check that skips link config when an E830 device + * does not have the "FW_LOADING"/"PHYBUSY" bit set in GL_MNG_FWSM set. + */ +static void +ice_init_link(struct ice_softc *sc) +{ + struct ice_hw *hw = &sc->hw; + device_t dev = sc->dev; + + /* Check if FW is ready before setting up link; defer setup to the + * admin task if it isn't. + */ + if (ice_is_e830(hw) && + (rd32(hw, GL_MNG_FWSM) & GL_MNG_FWSM_FW_LOADING_M)) { + ice_set_state(&sc->state, ICE_STATE_PHY_FW_INIT_PENDING); + device_printf(dev, + "Link initialization is blocked by PHY FW initialization.\n"); + device_printf(dev, + "Link initialization will continue after PHY FW initialization completes.\n"); + /* Do not access PHY config while PHY FW is busy initializing */ + } else { + ice_clear_state(&sc->state, ICE_STATE_PHY_FW_INIT_PENDING); + ice_init_link_configuration(sc); + ice_update_link_status(sc, true); + } + +} + extern struct if_txrx ice_subif_txrx; /** * @var ice_subif_methods * @brief ice driver method entry points */ static device_method_t ice_subif_methods[] = { /* Device interface */ DEVMETHOD(device_register, ice_subif_register), DEVMETHOD_END }; /** * @var ice_subif_driver * @brief driver structure for the device API */ static driver_t ice_subif_driver = { .name = "ice_subif", .methods = ice_subif_methods, .size = sizeof(struct ice_mirr_if), }; static device_method_t ice_iflib_subif_methods[] = { DEVMETHOD(ifdi_attach_pre, ice_subif_if_attach_pre), DEVMETHOD(ifdi_attach_post, ice_subif_if_attach_post), DEVMETHOD(ifdi_tx_queues_alloc, ice_subif_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, ice_subif_if_rx_queues_alloc), DEVMETHOD(ifdi_msix_intr_assign, ice_subif_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, ice_subif_if_intr_enable), DEVMETHOD(ifdi_rx_queue_intr_enable, ice_subif_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, ice_subif_if_tx_queue_intr_enable), DEVMETHOD(ifdi_init, ice_subif_if_init), DEVMETHOD(ifdi_stop, ice_subif_if_stop), DEVMETHOD(ifdi_queues_free, ice_subif_if_queues_free), DEVMETHOD(ifdi_media_status, ice_subif_if_media_status), DEVMETHOD(ifdi_promisc_set, ice_subif_if_promisc_set), }; /** * @var ice_iflib_subif_driver * @brief driver structure for the iflib stack * * driver_t definition used to setup the iflib device methods. */ static driver_t ice_iflib_subif_driver = { .name = "ice_subif", .methods = ice_iflib_subif_methods, .size = sizeof(struct ice_mirr_if), }; /** * @var ice_subif_sctx * @brief ice driver shared context * * Similar to the existing ice_sctx, this structure has these differences: * - isc_admin_intrcnt is set to 0 * - Uses subif iflib driver methods * - Flagged as a VF for iflib */ static struct if_shared_ctx ice_subif_sctx = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = ICE_MAX_FRAME_SIZE, .isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE, .isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE, .isc_rx_maxsize = ICE_MAX_FRAME_SIZE, .isc_rx_nsegments = ICE_MAX_RX_SEGS, .isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE, .isc_nfl = 1, .isc_ntxqs = 1, .isc_nrxqs = 1, .isc_admin_intrcnt = 0, .isc_vendor_info = ice_vendor_info_array, .isc_driver_version = __DECONST(char *, ice_driver_version), .isc_driver = &ice_iflib_subif_driver, .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP | IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX | IFLIB_IS_VF, .isc_nrxd_min = {ICE_MIN_DESC_COUNT}, .isc_ntxd_min = {ICE_MIN_DESC_COUNT}, .isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT}, .isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT}, .isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT}, .isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT}, }; static void * ice_subif_register(device_t dev __unused) { return (&ice_subif_sctx); } static void ice_subif_setup_scctx(struct ice_mirr_if *mif) { if_softc_ctx_t scctx = mif->subscctx; scctx->isc_txrx = &ice_subif_txrx; scctx->isc_capenable = ICE_FULL_CAPS; scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD; scctx->isc_ntxqsets = 4; scctx->isc_nrxqsets = 4; scctx->isc_vectors = scctx->isc_nrxqsets; scctx->isc_ntxqsets_max = 256; scctx->isc_nrxqsets_max = 256; scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(struct ice_tx_desc), DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN); scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS; scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS; scctx->isc_tx_tso_size_max = ICE_TSO_SIZE; scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE; } static int ice_subif_if_attach_pre(if_ctx_t ctx) { struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); mif->subctx = ctx; mif->subdev = dev; mif->subscctx = iflib_get_softc_ctx(ctx); /* Setup the iflib softc context structure */ ice_subif_setup_scctx(mif); return (0); } static int ice_subif_if_attach_post(if_ctx_t ctx __unused) { return (0); } /** * ice_destroy_mirror_interface - destroy mirror interface * @sc: driver private data * * Destroys all resources associated with the mirroring interface. * Will not exit early on failure. * * @pre: Mirror interface already exists and is initialized. */ void ice_destroy_mirror_interface(struct ice_softc *sc) { struct ice_mirr_if *mif = sc->mirr_if; struct ice_vsi *vsi = mif->vsi; bool is_locked = false; int ret; is_locked = sx_xlocked(sc->iflib_ctx_lock); if (is_locked) IFLIB_CTX_UNLOCK(sc); if (mif->ifp) { ret = iflib_device_deregister(mif->subctx); if (ret) { device_printf(sc->dev, "iflib_device_deregister for mirror interface failed: %d\n", ret); } } bus_topo_lock(); ret = device_delete_child(sc->dev, mif->subdev); bus_topo_unlock(); if (ret) { device_printf(sc->dev, "device_delete_child for mirror interface failed: %d\n", ret); } if (is_locked) IFLIB_CTX_LOCK(sc); if (mif->if_imap) { free(mif->if_imap, M_ICE); mif->if_imap = NULL; } if (mif->os_imap) { free(mif->os_imap, M_ICE); mif->os_imap = NULL; } /* These are freed via ice_subif_queues_free_subif * vsi: * - rx_irqvs * - tx_queues * - rx_queues */ ice_release_vsi(vsi); free(mif, M_ICE); sc->mirr_if = NULL; } /** * ice_setup_mirror_vsi - Initialize mirror VSI * @mif: driver private data for mirror interface * * Allocates a VSI for a mirror interface, and sets that VSI up for use as a * mirror for the main PF VSI. * * Returns 0 on success, or a standard error code on failure. */ static int ice_setup_mirror_vsi(struct ice_mirr_if *mif) { struct ice_softc *sc = mif->back; device_t dev = sc->dev; struct ice_vsi *vsi; int ret = 0; /* vsi is for the new mirror vsi, not the PF's main VSI */ vsi = ice_alloc_vsi(sc, ICE_VSI_VMDQ2); if (!vsi) { /* Already prints an error message */ return (ENOMEM); } mif->vsi = vsi; /* Reserve VSI queue allocation from PF queues */ ice_alloc_vsi_qmap(vsi, ICE_DEFAULT_VF_QUEUES, ICE_DEFAULT_VF_QUEUES); vsi->num_tx_queues = vsi->num_rx_queues = ICE_DEFAULT_VF_QUEUES; /* Assign Tx queues from PF space */ ret = ice_resmgr_assign_scattered(&sc->tx_qmgr, vsi->tx_qmap, vsi->num_tx_queues); if (ret) { device_printf(dev, "Unable to assign mirror VSI Tx queues: %s\n", ice_err_str(ret)); goto release_vsi; } /* Assign Rx queues from PF space */ ret = ice_resmgr_assign_scattered(&sc->rx_qmgr, vsi->rx_qmap, vsi->num_rx_queues); if (ret) { device_printf(dev, "Unable to assign mirror VSI Rx queues: %s\n", ice_err_str(ret)); goto release_vsi; } vsi->qmap_type = ICE_RESMGR_ALLOC_SCATTERED; vsi->max_frame_size = ICE_MAX_FRAME_SIZE; ret = ice_initialize_vsi(vsi); if (ret) { device_printf(dev, "%s: Error in ice_initialize_vsi for mirror VSI: %s\n", __func__, ice_err_str(ret)); goto release_vsi; } /* Setup this VSI for receiving traffic */ ret = ice_config_rss(vsi); if (ret) { device_printf(dev, "Unable to configure RSS for mirror VSI: %s\n", ice_err_str(ret)); goto release_vsi; } /* Set HW rules for mirroring traffic */ vsi->mirror_src_vsi = sc->pf_vsi.idx; ice_debug(&sc->hw, ICE_DBG_INIT, "Configuring mirroring from VSI %d to %d\n", vsi->mirror_src_vsi, vsi->idx); ice_debug(&sc->hw, ICE_DBG_INIT, "(HW num: VSI %d to %d)\n", ice_get_hw_vsi_num(&sc->hw, vsi->mirror_src_vsi), ice_get_hw_vsi_num(&sc->hw, vsi->idx)); ret = ice_setup_vsi_mirroring(vsi); if (ret) { device_printf(dev, "Unable to configure mirroring for VSI: %s\n", ice_err_str(ret)); goto release_vsi; } return (0); release_vsi: ice_release_vsi(vsi); mif->vsi = NULL; return (ret); } /** * ice_create_mirror_interface - Initialize mirror interface * @sc: driver private data * * Creates and sets up a mirror interface that will mirror traffic from * the main PF interface. Includes a call to iflib_device_register() in order * to setup necessary iflib structures for this new interface as well. * * If it returns successfully, a new interface will be created and will show * up in the ifconfig interface list. * * Returns 0 on success, or a standard error code on failure. */ int ice_create_mirror_interface(struct ice_softc *sc) { device_t dev = sc->dev; struct ice_mirr_if *mif; struct ifmedia *media; struct sbuf *sb; int ret = 0; mif = (struct ice_mirr_if *)malloc(sizeof(*mif), M_ICE, M_ZERO | M_NOWAIT); if (!mif) { device_printf(dev, "malloc() error allocating mirror interface\n"); return (ENOMEM); } /* Set pointers */ sc->mirr_if = mif; mif->back = sc; /* Do early setup because these will be called during iflib_device_register(): * - ice_subif_if_tx_queues_alloc * - ice_subif_if_rx_queues_alloc */ ret = ice_setup_mirror_vsi(mif); if (ret) goto out; /* Determine name for new interface: * (base interface name)(modifier name)(modifier unit number) * e.g. for ice0 with a new mirror interface (modifier m) * of index 0, this equals "ice0m0" */ sb = sbuf_new_auto(); MPASS(sb != NULL); sbuf_printf(sb, "%sm", device_get_nameunit(dev)); sbuf_finish(sb); bus_topo_lock(); mif->subdev = device_add_child(dev, sbuf_data(sb), 0); bus_topo_unlock(); if (!mif->subdev) { device_printf(dev, "device_add_child failed for %s0\n", sbuf_data(sb)); sbuf_delete(sb); free(mif, M_ICE); sc->mirr_if = NULL; return (ENOMEM); } sbuf_delete(sb); device_set_driver(mif->subdev, &ice_subif_driver); /* Use iflib_device_register() directly because the driver already * has an initialized softc to pass to iflib */ ret = iflib_device_register(mif->subdev, mif, &ice_subif_sctx, &mif->subctx); if (ret) goto out; /* Indicate that created interface will be just for monitoring */ mif->ifp = iflib_get_ifp(mif->subctx); if_setflagbits(mif->ifp, IFF_MONITOR, 0); /* Use autoselect media by default */ media = iflib_get_media(mif->subctx); ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(media, IFM_ETHER | IFM_AUTO); device_printf(dev, "Created dev %s and ifnet %s for mirroring\n", device_get_nameunit(mif->subdev), if_name(mif->ifp)); ice_add_vsi_sysctls(mif->vsi); ret = ice_wire_mirror_intrs(mif); if (ret) goto out; mif->if_attached = true; return (0); out: ice_destroy_mirror_interface(sc); return (ret); } /** * ice_wire_mirror_intrs * @mif: driver private subinterface structure * * Helper function that sets up driver interrupt data and calls * into iflib in order to setup interrupts in its data structures as well. * * Like ice_if_msix_intr_assign, currently requires that we get at least the same * number of vectors as we have queues, and that we always have the same number * of Tx and Rx queues. Unlike that function, this calls a special * iflib_irq_alloc_generic_subif() function for RX interrupts because the * driver needs to get MSI-X resources from the parent device. * * Tx queues use a softirq instead of using their own hardware interrupt so that * remains unchanged. * * Returns 0 on success or an error code from iflib_irq_alloc_generic_subctx() * on failure. */ static int ice_wire_mirror_intrs(struct ice_mirr_if *mif) { struct ice_softc *sc = mif->back; struct ice_hw *hw = &sc->hw; struct ice_vsi *vsi = mif->vsi; device_t dev = mif->subdev; int err, i, rid; if_ctx_t ctx = mif->subctx; ice_debug(hw, ICE_DBG_INIT, "%s: Last rid: %d\n", __func__, sc->last_rid); rid = sc->last_rid + 1; for (i = 0; i < vsi->num_rx_queues; i++, rid++) { struct ice_rx_queue *rxq = &vsi->rx_queues[i]; struct ice_tx_queue *txq = &vsi->tx_queues[i]; char irq_name[16]; // TODO: Change to use dynamic interface number snprintf(irq_name, sizeof(irq_name), "m0rxq%d", i); /* First arg is parent device (physical port's) iflib ctx */ err = iflib_irq_alloc_generic_subctx(sc->ctx, ctx, &mif->rx_irqvs[i].irq, rid, IFLIB_INTR_RXTX, ice_msix_que, rxq, rxq->me, irq_name); if (err) { device_printf(dev, "Failed to allocate q int %d err: %s\n", i, ice_err_str(err)); i--; goto fail; } MPASS(rid - 1 > 0); /* Set vector number used in interrupt enable/disable functions */ mif->rx_irqvs[i].me = rid - 1; rxq->irqv = &mif->rx_irqvs[i]; bzero(irq_name, sizeof(irq_name)); snprintf(irq_name, sizeof(irq_name), "m0txq%d", i); iflib_softirq_alloc_generic(ctx, &mif->rx_irqvs[i].irq, IFLIB_INTR_TX, txq, txq->me, irq_name); txq->irqv = &mif->rx_irqvs[i]; } sc->last_rid = rid - 1; ice_debug(hw, ICE_DBG_INIT, "%s: New last rid: %d\n", __func__, sc->last_rid); return (0); fail: for (; i >= 0; i--) iflib_irq_free(ctx, &mif->rx_irqvs[i].irq); return (err); } /** * ice_subif_rebuild - Rebuild subinterface post reset * @sc: The device private softc * * Restore subinterface state after a reset occurred. * Restart the VSI and enable the mirroring. */ static int ice_subif_rebuild(struct ice_softc *sc) { struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(sc->ctx); struct ice_vsi *vsi = sc->mirr_if->vsi; int err; err = ice_subif_rebuild_vsi_qmap(sc); if (err) { device_printf(sc->dev, "Unable to re-assign mirror VSI queues, err %s\n", ice_err_str(err)); return (err); } err = ice_initialize_vsi(vsi); if (err) { device_printf(sc->dev, "Unable to re-initialize mirror VSI, err %s\n", ice_err_str(err)); goto err_release_queue_allocations_subif; } err = ice_config_rss(vsi); if (err) { device_printf(sc->dev, "Unable to reconfigure RSS for the mirror VSI, err %s\n", ice_err_str(err)); goto err_deinit_subif_vsi; } vsi->mirror_src_vsi = sc->pf_vsi.idx; err = ice_setup_vsi_mirroring(vsi); if (err) { device_printf(sc->dev, "Unable to configure mirroring for VSI: %s\n", ice_err_str(err)); goto err_deinit_subif_vsi; } ice_set_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT); return (0); err_deinit_subif_vsi: ice_deinit_vsi(vsi); err_release_queue_allocations_subif: ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap, sc->mirr_if->num_irq_vectors); ice_resmgr_release_map(&sc->rx_qmgr, vsi->rx_qmap, sc->mirr_if->num_irq_vectors); return (err); } /** * ice_subif_rebuild_vsi_qmap - Rebuild the mirror VSI queue mapping * @sc: the device softc pointer * * Loops over the Tx and Rx queues for the mirror VSI and reassigns the queue * mapping after a reset occurred. */ static int ice_subif_rebuild_vsi_qmap(struct ice_softc *sc) { struct ice_vsi *vsi = sc->mirr_if->vsi; struct ice_tx_queue *txq; struct ice_rx_queue *rxq; int err, i; err = ice_resmgr_assign_scattered(&sc->tx_qmgr, vsi->tx_qmap, sc->mirr_if->num_irq_vectors); if (err) { device_printf(sc->dev, "Unable to assign mirror VSI Tx queues: %s\n", ice_err_str(err)); return (err); } err = ice_resmgr_assign_scattered(&sc->rx_qmgr, vsi->rx_qmap, sc->mirr_if->num_irq_vectors); if (err) { device_printf(sc->dev, "Unable to assign mirror VSI Rx queues: %s\n", ice_err_str(err)); goto err_release_tx_queues; } vsi->qmap_type = ICE_RESMGR_ALLOC_SCATTERED; /* Re-assign Tx queue tail pointers */ for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]); /* Re-assign Rx queue tail pointers */ for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++) rxq->tail = QRX_TAIL(vsi->rx_qmap[i]); return (0); err_release_tx_queues: ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap, vsi->num_tx_queues); return (err); } /** * ice_subif_if_tx_queues_alloc - Allocate Tx queue memory for subinterfaces * @ctx: iflib context structure * @vaddrs: virtual addresses for the queue memory * @paddrs: physical addresses for the queue memory * @ntxqs: the number of Tx queues per set (should always be 1) * @ntxqsets: the number of Tx queue sets to allocate * * See ice_if_tx_queues_alloc() description. Similar to that function, but * for subinterfaces instead. */ static int ice_subif_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int __invariant_only ntxqs, int ntxqsets) { struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx); struct ice_tx_queue *txq; device_t dev = mif->subdev; struct ice_vsi *vsi; int err, i, j; MPASS(mif != NULL); MPASS(ntxqs == 1); MPASS(mif->subscctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT); vsi = mif->vsi; MPASS(vsi->num_tx_queues == ntxqsets); /* Allocate queue structure memory */ if (!(vsi->tx_queues = (struct ice_tx_queue *)malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) { device_printf(dev, "%s: Unable to allocate Tx queue memory for subfunction\n", __func__); return (ENOMEM); } /* Allocate report status arrays */ for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) { if (!(txq->tx_rsq = (uint16_t *)malloc(sizeof(uint16_t) * mif->subscctx->isc_ntxd[0], M_ICE, M_NOWAIT))) { device_printf(dev, "%s: Unable to allocate tx_rsq memory for subfunction\n", __func__); err = ENOMEM; goto free_tx_queues; } /* Initialize report status array */ for (j = 0; j < mif->subscctx->isc_ntxd[0]; j++) txq->tx_rsq[j] = QIDX_INVALID; } /* Add Tx queue sysctls context */ ice_vsi_add_txqs_ctx(vsi); for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) { /* q_handle == me when only one TC */ txq->me = txq->q_handle = i; txq->vsi = vsi; /* store the queue size for easier access */ txq->desc_count = mif->subscctx->isc_ntxd[0]; /* get the virtual and physical address of the hardware queues */ txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]); txq->tx_base = (struct ice_tx_desc *)vaddrs[i]; txq->tx_paddr = paddrs[i]; ice_add_txq_sysctls(txq); } return (0); free_tx_queues: for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) { if (txq->tx_rsq != NULL) { free(txq->tx_rsq, M_ICE); txq->tx_rsq = NULL; } } free(vsi->tx_queues, M_ICE); vsi->tx_queues = NULL; return (err); } /** * ice_subif_if_rx_queues_alloc - Allocate Rx queue memory for subinterfaces * @ctx: iflib context structure * @vaddrs: virtual addresses for the queue memory * @paddrs: physical addresses for the queue memory * @nrxqs: number of Rx queues per set (should always be 1) * @nrxqsets: number of Rx queue sets to allocate * * See ice_if_rx_queues_alloc() for general summary; this is similar to that * but implemented for subinterfaces. */ static int ice_subif_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int __invariant_only nrxqs, int nrxqsets) { struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx); struct ice_rx_queue *rxq; device_t dev = mif->subdev; struct ice_vsi *vsi; int i; MPASS(mif != NULL); MPASS(nrxqs == 1); MPASS(mif->subscctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT); vsi = mif->vsi; MPASS(vsi->num_rx_queues == nrxqsets); /* Allocate queue structure memory */ if (!(vsi->rx_queues = (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) { device_printf(dev, "%s: Unable to allocate Rx queue memory for subfunction\n", __func__); return (ENOMEM); } /* Add Rx queue sysctls context */ ice_vsi_add_rxqs_ctx(vsi); for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) { rxq->me = i; rxq->vsi = vsi; /* store the queue size for easier access */ rxq->desc_count = mif->subscctx->isc_nrxd[0]; /* get the virtual and physical address of the hardware queues */ rxq->tail = QRX_TAIL(vsi->rx_qmap[i]); rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i]; rxq->rx_paddr = paddrs[i]; ice_add_rxq_sysctls(rxq); } return (0); } /** * ice_subif_if_msix_intr_assign - Assign MSI-X interrupts to new sub interface * @ctx: the iflib context structure * @msix: the number of vectors we were assigned * * Allocates and assigns driver private resources for MSI-X interrupt tracking. * * @pre OS MSI-X resources have been pre-allocated by parent interface. */ static int ice_subif_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx); struct ice_softc *sc = mif->back; struct ice_vsi *vsi = mif->vsi; device_t dev = mif->subdev; int ret; if (vsi->num_rx_queues != vsi->num_tx_queues) { device_printf(dev, "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n", vsi->num_tx_queues, vsi->num_rx_queues); return (EOPNOTSUPP); } if (msix > sc->extra_vectors) { device_printf(dev, "%s: Not enough spare (%d) msix vectors for new sub-interface requested (%d)\n", __func__, sc->extra_vectors, msix); return (ENOSPC); } device_printf(dev, "%s: Using %d vectors for sub-interface\n", __func__, msix); /* Allocate space to store the IRQ vector data */ mif->num_irq_vectors = vsi->num_rx_queues; mif->rx_irqvs = (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (mif->num_irq_vectors), M_ICE, M_NOWAIT); if (!mif->rx_irqvs) { device_printf(dev, "Unable to allocate RX irqv memory for mirror's %d vectors\n", mif->num_irq_vectors); return (ENOMEM); } /* Assign mirror interface interrupts from PF device space */ if (!(mif->if_imap = (u16 *)malloc(sizeof(u16) * mif->num_irq_vectors, M_ICE, M_NOWAIT))) { device_printf(dev, "Unable to allocate mirror intfc if_imap memory\n"); ret = ENOMEM; goto free_irqvs; } ret = ice_resmgr_assign_contiguous(&sc->dev_imgr, mif->if_imap, mif->num_irq_vectors); if (ret) { device_printf(dev, "Unable to assign mirror intfc PF device interrupt mapping: %s\n", ice_err_str(ret)); goto free_if_imap; } /* Assign mirror interface interrupts from OS interrupt allocation space */ if (!(mif->os_imap = (u16 *)malloc(sizeof(u16) * mif->num_irq_vectors, M_ICE, M_NOWAIT))) { device_printf(dev, "Unable to allocate mirror intfc os_imap memory\n"); ret = ENOMEM; goto free_if_imap; } ret = ice_resmgr_assign_contiguous(&sc->os_imgr, mif->os_imap, mif->num_irq_vectors); if (ret) { device_printf(dev, "Unable to assign mirror intfc OS interrupt mapping: %s\n", ice_err_str(ret)); goto free_if_imap; } return (0); free_if_imap: free(mif->if_imap, M_ICE); mif->if_imap = NULL; free_irqvs: free(mif->rx_irqvs, M_ICE); mif->rx_irqvs = NULL; return (ret); } /** * ice_subif_if_intr_enable - Enable device interrupts for a subinterface * @ctx: iflib context structure * * Called by iflib to request enabling all interrupts that belong to a * subinterface. */ static void ice_subif_if_intr_enable(if_ctx_t ctx) { struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx); struct ice_softc *sc = mif->back; struct ice_vsi *vsi = mif->vsi; struct ice_hw *hw = &sc->hw; /* Do not enable queue interrupts in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; /* Enable all queue interrupts */ for (int i = 0; i < vsi->num_rx_queues; i++) ice_enable_intr(hw, vsi->rx_queues[i].irqv->me); } /** * ice_subif_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt * @ctx: iflib context structure * @rxqid: the Rx queue to enable * * Enable a specific Rx queue interrupt. * * This function is not protected by the iflib CTX lock. */ static int ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx); struct ice_softc *sc = mif->back; struct ice_vsi *vsi = mif->vsi; struct ice_hw *hw = &sc->hw; /* Do not enable queue interrupts in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me); return (0); } /** * ice_subif_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt * @ctx: iflib context structure * @txqid: the Tx queue to enable * * Enable a specific Tx queue interrupt. * * This function is not protected by the iflib CTX lock. */ static int ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx); struct ice_softc *sc = mif->back; struct ice_vsi *vsi = mif->vsi; struct ice_hw *hw = &sc->hw; /* Do not enable queue interrupts in recovery mode */ if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return (ENOSYS); ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me); return (0); } /** * ice_subif_if_init - Initialize the subinterface * @ctx: iflib ctx structure * * Called by iflib to bring the device up, i.e. ifconfig ice0m0 up. * Prepares the Tx and Rx engines and enables interrupts. * * @pre assumes the caller holds the iflib CTX lock */ static void ice_subif_if_init(if_ctx_t ctx) { struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx); struct ice_softc *sc = mif->back; struct ice_vsi *vsi = mif->vsi; device_t dev = mif->subdev; int err; if (ice_driver_is_detaching(sc)) return; if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) return; if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) { device_printf(dev, "request to start interface cannot be completed as the parent device %s failed to reset\n", device_get_nameunit(sc->dev)); return; } if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) { device_printf(dev, "request to start interface cannot be completed while parent device %s is prepared for impending reset\n", device_get_nameunit(sc->dev)); return; } /* XXX: Equiv to ice_update_rx_mbuf_sz */ vsi->mbuf_sz = iflib_get_rx_mbuf_sz(ctx); /* Initialize software Tx tracking values */ ice_init_tx_tracking(vsi); err = ice_cfg_vsi_for_tx(vsi); if (err) { device_printf(dev, "Unable to configure subif VSI for Tx: %s\n", ice_err_str(err)); return; } err = ice_cfg_vsi_for_rx(vsi); if (err) { device_printf(dev, "Unable to configure subif VSI for Rx: %s\n", ice_err_str(err)); goto err_cleanup_tx; } err = ice_control_all_rx_queues(vsi, true); if (err) { device_printf(dev, "Unable to enable subif Rx rings for receive: %s\n", ice_err_str(err)); goto err_cleanup_tx; } ice_configure_all_rxq_interrupts(vsi); ice_configure_rx_itr(vsi); ice_set_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED); return; err_cleanup_tx: ice_vsi_disable_tx(vsi); } /** * ice_if_stop_subif - Stop the subinterface * @ctx: iflib context structure * @ifs: subinterface context structure * * Called by iflib to stop the subinterface and bring it down. * (e.g. ifconfig ice0m0 down) * * @pre assumes the caller holds the iflib CTX lock */ static void ice_subif_if_stop(if_ctx_t ctx) { struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx); struct ice_softc *sc = mif->back; struct ice_vsi *vsi = mif->vsi; device_t dev = mif->subdev; if (!ice_testandclear_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED)) return; if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) { device_printf(dev, "request to stop interface cannot be completed as the parent device %s failed to reset\n", device_get_nameunit(sc->dev)); return; } if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) { device_printf(dev, "request to stop interface cannot be completed while parent device %s is prepared for impending reset\n", device_get_nameunit(sc->dev)); return; } /* Dissociate the Tx and Rx queues from the interrupts */ ice_flush_txq_interrupts(vsi); ice_flush_rxq_interrupts(vsi); /* Disable the Tx and Rx queues */ ice_vsi_disable_tx(vsi); ice_control_all_rx_queues(vsi, false); } /** * ice_free_irqvs_subif - Free IRQ vector memory for subinterfaces * @mif: Mirror interface private structure * * Free IRQ vector memory allocated during ice_subif_if_msix_intr_assign. */ static void ice_free_irqvs_subif(struct ice_mirr_if *mif) { struct ice_softc *sc = mif->back; struct ice_vsi *vsi = mif->vsi; if_ctx_t ctx = sc->ctx; int i; /* If the irqvs array is NULL, then there are no vectors to free */ if (mif->rx_irqvs == NULL) return; /* Free the IRQ vectors -- currently subinterfaces have number * of vectors equal to number of RX queues * * XXX: ctx is parent device's ctx, not the subinterface ctx */ for (i = 0; i < vsi->num_rx_queues; i++) iflib_irq_free(ctx, &mif->rx_irqvs[i].irq); ice_resmgr_release_map(&sc->os_imgr, mif->os_imap, mif->num_irq_vectors); ice_resmgr_release_map(&sc->dev_imgr, mif->if_imap, mif->num_irq_vectors); sc->last_rid -= vsi->num_rx_queues; /* Clear the irqv pointers */ for (i = 0; i < vsi->num_rx_queues; i++) vsi->rx_queues[i].irqv = NULL; for (i = 0; i < vsi->num_tx_queues; i++) vsi->tx_queues[i].irqv = NULL; /* Release the vector array memory */ free(mif->rx_irqvs, M_ICE); mif->rx_irqvs = NULL; } /** * ice_subif_if_queues_free - Free queue memory for subinterfaces * @ctx: the iflib context structure * * Free queue memory allocated by ice_subif_tx_queues_alloc() and * ice_subif_if_rx_queues_alloc(). */ static void ice_subif_if_queues_free(if_ctx_t ctx) { struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx); struct ice_vsi *vsi = mif->vsi; struct ice_tx_queue *txq; int i; /* Free the Tx and Rx sysctl contexts, and assign NULL to the node * pointers. */ ice_vsi_del_txqs_ctx(vsi); ice_vsi_del_rxqs_ctx(vsi); /* Release MSI-X IRQ vectors */ ice_free_irqvs_subif(mif); if (vsi->tx_queues != NULL) { /* free the tx_rsq arrays */ for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) { if (txq->tx_rsq != NULL) { free(txq->tx_rsq, M_ICE); txq->tx_rsq = NULL; } } free(vsi->tx_queues, M_ICE); vsi->tx_queues = NULL; } if (vsi->rx_queues != NULL) { free(vsi->rx_queues, M_ICE); vsi->rx_queues = NULL; } } /** * ice_subif_if_media_status - Report subinterface media * @ctx: iflib context structure * @ifmr: ifmedia request structure to update * * Updates the provided ifmr with something, in order to prevent a * "no media types?" message from ifconfig. * * Mirror interfaces are always up. */ static void ice_subif_if_media_status(if_ctx_t ctx __unused, struct ifmediareq *ifmr) { ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE; ifmr->ifm_active = IFM_ETHER | IFM_AUTO; } /** * ice_subif_if_promisc_set - Set subinterface promiscuous mode * @ctx: iflib context structure * @flags: promiscuous flags to configure * * Called by iflib to configure device promiscuous mode. * * @remark This does not need to be implemented for now. */ static int ice_subif_if_promisc_set(if_ctx_t ctx __unused, int flags __unused) { return (0); }