Index: head/sys/dev/e1000/if_em.c =================================================================== --- head/sys/dev/e1000/if_em.c (revision 327827) +++ head/sys/dev/e1000/if_em.c (revision 327828) @@ -1,4559 +1,4560 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2016 Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #include "if_em.h" #include #include #define em_mac_min e1000_82547 #define igb_mac_min e1000_82575 /********************************************************************* * Driver version: *********************************************************************/ char em_driver_version[] = "7.6.1-k"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static pci_vendor_info_t em_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection - Legacy em*/ PVID(0x8086, E1000_DEV_ID_82540EM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EM_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EP, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EP_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EP_LP, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541EI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541ER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541ER_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541EI_MOBILE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541GI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541GI_LF, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541GI_MOBILE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82542, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82543GC_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82543GC_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544EI_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544EI_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544GC_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544GC_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545EM_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545EM_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545GM_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545GM_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545GM_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546EB_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546EB_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_PCIE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82547EI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82547EI_MOBILE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82547GI, "Intel(R) PRO/1000 Network Connection"), /* Intel(R) PRO/1000 Network Connection - em */ PVID(0x8086, E1000_DEV_ID_82571EB_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82573E, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82573E_IAMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82573L, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82583V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_C, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE_GT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE_G, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_M, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_82567V_3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_C, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE_GT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE_G, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_BM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82574L, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82574LA, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_LF, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_LF, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_M_HV_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_M_HV_LC, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_D_HV_DM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_D_HV_DC, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH2_LV_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH2_LV_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPT_I217_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_LM2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_V2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_LM3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_V3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CNP_I219_LM6, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CNP_I219_V6, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CNP_I219_LM7, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CNP_I219_V7, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_LM8, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_V8, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_LM9, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_V9, "Intel(R) PRO/1000 Network Connection"), /* required last entry */ PVID_END }; static pci_vendor_info_t igb_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection - igb */ PVID(0x8086, E1000_DEV_ID_82575EB_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_NS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_NS_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_SERDES_QUAD, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_QUAD_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_VF, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_COPPER_DUAL, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_QUAD_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SFP, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_VF, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_IT, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_OEM1, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I211_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I354_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), /* required last entry */ PVID_END }; /********************************************************************* * Function prototypes *********************************************************************/ static void *em_register(device_t dev); static void *igb_register(device_t dev); static int em_if_attach_pre(if_ctx_t ctx); static int em_if_attach_post(if_ctx_t ctx); static int em_if_detach(if_ctx_t ctx); static int em_if_shutdown(if_ctx_t ctx); static int em_if_suspend(if_ctx_t ctx); static int em_if_resume(if_ctx_t ctx); static int em_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int em_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets); static void em_if_queues_free(if_ctx_t ctx); static uint64_t em_if_get_counter(if_ctx_t, ift_counter); static void em_if_init(if_ctx_t ctx); static void em_if_stop(if_ctx_t ctx); static void em_if_media_status(if_ctx_t, struct ifmediareq *); static int em_if_media_change(if_ctx_t ctx); static int em_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void em_if_timer(if_ctx_t ctx, uint16_t qid); static void em_if_vlan_register(if_ctx_t ctx, u16 vtag); static void em_if_vlan_unregister(if_ctx_t ctx, u16 vtag); static void em_identify_hardware(if_ctx_t ctx); static int em_allocate_pci_resources(if_ctx_t ctx); static void em_free_pci_resources(if_ctx_t ctx); static void em_reset(if_ctx_t ctx); static int em_setup_interface(if_ctx_t ctx); static int em_setup_msix(if_ctx_t ctx); static void em_initialize_transmit_unit(if_ctx_t ctx); static void em_initialize_receive_unit(if_ctx_t ctx); static void em_if_enable_intr(if_ctx_t ctx); static void em_if_disable_intr(if_ctx_t ctx); static int em_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static void em_if_multi_set(if_ctx_t ctx); static void em_if_update_admin_status(if_ctx_t ctx); static void em_if_debug(if_ctx_t ctx); static void em_update_stats_counters(struct adapter *); static void em_add_hw_stats(struct adapter *adapter); static int em_if_set_promisc(if_ctx_t ctx, int flags); static void em_setup_vlan_hw_support(struct adapter *); static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void em_print_nvm_info(struct adapter *); static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS); static int em_get_rs(SYSCTL_HANDLER_ARGS); static void em_print_debug_info(struct adapter *); static int em_is_valid_ether_addr(u8 *); static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS); static void em_add_int_delay_sysctl(struct adapter *, const char *, const char *, struct em_int_delay_info *, int, int); /* Management and WOL Support */ static void em_init_manageability(struct adapter *); static void em_release_manageability(struct adapter *); static void em_get_hw_control(struct adapter *); static void em_release_hw_control(struct adapter *); static void em_get_wakeup(if_ctx_t ctx); static void em_enable_wakeup(if_ctx_t ctx); static int em_enable_phy_wakeup(struct adapter *); static void em_disable_aspm(struct adapter *); int em_intr(void *arg); static void em_disable_promisc(if_ctx_t ctx); /* MSIX handlers */ static int em_if_msix_intr_assign(if_ctx_t, int); static int em_msix_link(void *); static void em_handle_link(void *context); static void em_enable_vectors_82574(if_ctx_t); static int em_set_flowcntl(SYSCTL_HANDLER_ARGS); static int em_sysctl_eee(SYSCTL_HANDLER_ARGS); static void em_if_led_func(if_ctx_t ctx, int onoff); static int em_get_regs(SYSCTL_HANDLER_ARGS); static void lem_smartspeed(struct adapter *adapter); static void igb_configure_queues(struct adapter *adapter); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t em_methods[] = { /* Device interface */ DEVMETHOD(device_register, em_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static device_method_t igb_methods[] = { /* Device interface */ DEVMETHOD(device_register, igb_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static driver_t em_driver = { "em", em_methods, sizeof(struct adapter), }; static devclass_t em_devclass; DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0); MODULE_DEPEND(em, pci, 1, 1, 1); MODULE_DEPEND(em, ether, 1, 1, 1); MODULE_DEPEND(em, iflib, 1, 1, 1); IFLIB_PNP_INFO(pci, em, em_vendor_info_array); static driver_t igb_driver = { "igb", igb_methods, sizeof(struct adapter), }; static devclass_t igb_devclass; DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0); MODULE_DEPEND(igb, pci, 1, 1, 1); MODULE_DEPEND(igb, ether, 1, 1, 1); MODULE_DEPEND(igb, iflib, 1, 1, 1); IFLIB_PNP_INFO(pci, igb, igb_vendor_info_array); static device_method_t em_if_methods[] = { DEVMETHOD(ifdi_attach_pre, em_if_attach_pre), DEVMETHOD(ifdi_attach_post, em_if_attach_post), DEVMETHOD(ifdi_detach, em_if_detach), DEVMETHOD(ifdi_shutdown, em_if_shutdown), DEVMETHOD(ifdi_suspend, em_if_suspend), DEVMETHOD(ifdi_resume, em_if_resume), DEVMETHOD(ifdi_init, em_if_init), DEVMETHOD(ifdi_stop, em_if_stop), DEVMETHOD(ifdi_msix_intr_assign, em_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, em_if_enable_intr), DEVMETHOD(ifdi_intr_disable, em_if_disable_intr), DEVMETHOD(ifdi_tx_queues_alloc, em_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, em_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, em_if_queues_free), DEVMETHOD(ifdi_update_admin_status, em_if_update_admin_status), DEVMETHOD(ifdi_multi_set, em_if_multi_set), DEVMETHOD(ifdi_media_status, em_if_media_status), DEVMETHOD(ifdi_media_change, em_if_media_change), DEVMETHOD(ifdi_mtu_set, em_if_mtu_set), DEVMETHOD(ifdi_promisc_set, em_if_set_promisc), DEVMETHOD(ifdi_timer, em_if_timer), DEVMETHOD(ifdi_vlan_register, em_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, em_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, em_if_get_counter), DEVMETHOD(ifdi_led_func, em_if_led_func), DEVMETHOD(ifdi_rx_queue_intr_enable, em_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, em_if_tx_queue_intr_enable), DEVMETHOD(ifdi_debug, em_if_debug), DEVMETHOD_END }; /* * note that if (adapter->msix_mem) is replaced by: * if (adapter->intr_type == IFLIB_INTR_MSIX) */ static driver_t em_if_driver = { "em_if", em_if_methods, sizeof(struct adapter) }; /********************************************************************* * Tunable default values. *********************************************************************/ #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) #define M_TSO_LEN 66 #define MAX_INTS_PER_SEC 8000 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) /* Allow common code without TSO */ #ifndef CSUM_TSO #define CSUM_TSO 0 #endif #define TSO_WORKAROUND 4 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters"); static int em_disable_crc_stripping = 0; SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN, &em_disable_crc_stripping, 0, "Disable CRC Stripping"); static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt, 0, "Default transmit interrupt delay in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt, 0, "Default receive interrupt delay in usecs"); static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN, &em_tx_abs_int_delay_dflt, 0, "Default transmit interrupt delay limit in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN, &em_rx_abs_int_delay_dflt, 0, "Default receive interrupt delay limit in usecs"); static int em_smart_pwr_down = FALSE; SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down, 0, "Set to true to leave smart power down enabled on newer adapters"); /* Controls whether promiscuous also shows bad packets */ static int em_debug_sbp = TRUE; SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0, "Show bad packets in promiscuous mode"); /* How many packets rxeof tries to clean at a time */ static int em_rx_process_limit = 100; SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &em_rx_process_limit, 0, "Maximum number of received packets to process " "at a time, -1 means unlimited"); /* Energy efficient ethernet - default to OFF */ static int eee_setting = 1; SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0, "Enable Energy Efficient Ethernet"); /* ** Tuneable Interrupt rate */ static int em_max_interrupt_rate = 8000; SYSCTL_INT(_hw_em, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &em_max_interrupt_rate, 0, "Maximum interrupts per second"); /* Global used in WOL setup with multiport cards */ static int global_quad_port_a = 0; extern struct if_txrx igb_txrx; extern struct if_txrx em_txrx; extern struct if_txrx lem_txrx; static struct if_shared_ctx em_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = EM_TSO_SIZE, .isc_tx_maxsegsize = PAGE_SIZE, .isc_rx_maxsize = MJUM9BYTES, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = MJUM9BYTES, .isc_nfl = 1, .isc_nrxqs = 1, .isc_ntxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = em_vendor_info_array, .isc_driver_version = em_driver_version, .isc_driver = &em_if_driver, .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM, .isc_nrxd_min = {EM_MIN_RXD}, .isc_ntxd_min = {EM_MIN_TXD}, .isc_nrxd_max = {EM_MAX_RXD}, .isc_ntxd_max = {EM_MAX_TXD}, .isc_nrxd_default = {EM_DEFAULT_RXD}, .isc_ntxd_default = {EM_DEFAULT_TXD}, }; if_shared_ctx_t em_sctx = &em_sctx_init; static struct if_shared_ctx igb_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = EM_TSO_SIZE, .isc_tx_maxsegsize = PAGE_SIZE, .isc_rx_maxsize = MJUM9BYTES, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = MJUM9BYTES, .isc_nfl = 1, .isc_nrxqs = 1, .isc_ntxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = igb_vendor_info_array, .isc_driver_version = em_driver_version, .isc_driver = &em_if_driver, .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM, .isc_nrxd_min = {EM_MIN_RXD}, .isc_ntxd_min = {EM_MIN_TXD}, .isc_nrxd_max = {IGB_MAX_RXD}, .isc_ntxd_max = {IGB_MAX_TXD}, .isc_nrxd_default = {EM_DEFAULT_RXD}, .isc_ntxd_default = {EM_DEFAULT_TXD}, }; if_shared_ctx_t igb_sctx = &igb_sctx_init; /***************************************************************** * * Dump Registers * ****************************************************************/ #define IGB_REGS_LEN 739 static int em_get_regs(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct e1000_hw *hw = &adapter->hw; struct sbuf *sb; u32 *regs_buff; int rc; regs_buff = malloc(sizeof(u32) * IGB_REGS_LEN, M_DEVBUF, M_WAITOK); memset(regs_buff, 0, IGB_REGS_LEN * sizeof(u32)); rc = sysctl_wire_old_buffer(req, 0); MPASS(rc == 0); if (rc != 0) { free(regs_buff, M_DEVBUF); return (rc); } sb = sbuf_new_for_sysctl(NULL, NULL, 32*400, req); MPASS(sb != NULL); if (sb == NULL) { free(regs_buff, M_DEVBUF); return (ENOMEM); } /* General Registers */ regs_buff[0] = E1000_READ_REG(hw, E1000_CTRL); regs_buff[1] = E1000_READ_REG(hw, E1000_STATUS); regs_buff[2] = E1000_READ_REG(hw, E1000_CTRL_EXT); regs_buff[3] = E1000_READ_REG(hw, E1000_ICR); regs_buff[4] = E1000_READ_REG(hw, E1000_RCTL); regs_buff[5] = E1000_READ_REG(hw, E1000_RDLEN(0)); regs_buff[6] = E1000_READ_REG(hw, E1000_RDH(0)); regs_buff[7] = E1000_READ_REG(hw, E1000_RDT(0)); regs_buff[8] = E1000_READ_REG(hw, E1000_RXDCTL(0)); regs_buff[9] = E1000_READ_REG(hw, E1000_RDBAL(0)); regs_buff[10] = E1000_READ_REG(hw, E1000_RDBAH(0)); regs_buff[11] = E1000_READ_REG(hw, E1000_TCTL); regs_buff[12] = E1000_READ_REG(hw, E1000_TDBAL(0)); regs_buff[13] = E1000_READ_REG(hw, E1000_TDBAH(0)); regs_buff[14] = E1000_READ_REG(hw, E1000_TDLEN(0)); regs_buff[15] = E1000_READ_REG(hw, E1000_TDH(0)); regs_buff[16] = E1000_READ_REG(hw, E1000_TDT(0)); regs_buff[17] = E1000_READ_REG(hw, E1000_TXDCTL(0)); regs_buff[18] = E1000_READ_REG(hw, E1000_TDFH); regs_buff[19] = E1000_READ_REG(hw, E1000_TDFT); regs_buff[20] = E1000_READ_REG(hw, E1000_TDFHS); regs_buff[21] = E1000_READ_REG(hw, E1000_TDFPC); sbuf_printf(sb, "General Registers\n"); sbuf_printf(sb, "\tCTRL\t %08x\n", regs_buff[0]); sbuf_printf(sb, "\tSTATUS\t %08x\n", regs_buff[1]); sbuf_printf(sb, "\tCTRL_EXIT\t %08x\n\n", regs_buff[2]); sbuf_printf(sb, "Interrupt Registers\n"); sbuf_printf(sb, "\tICR\t %08x\n\n", regs_buff[3]); sbuf_printf(sb, "RX Registers\n"); sbuf_printf(sb, "\tRCTL\t %08x\n", regs_buff[4]); sbuf_printf(sb, "\tRDLEN\t %08x\n", regs_buff[5]); sbuf_printf(sb, "\tRDH\t %08x\n", regs_buff[6]); sbuf_printf(sb, "\tRDT\t %08x\n", regs_buff[7]); sbuf_printf(sb, "\tRXDCTL\t %08x\n", regs_buff[8]); sbuf_printf(sb, "\tRDBAL\t %08x\n", regs_buff[9]); sbuf_printf(sb, "\tRDBAH\t %08x\n\n", regs_buff[10]); sbuf_printf(sb, "TX Registers\n"); sbuf_printf(sb, "\tTCTL\t %08x\n", regs_buff[11]); sbuf_printf(sb, "\tTDBAL\t %08x\n", regs_buff[12]); sbuf_printf(sb, "\tTDBAH\t %08x\n", regs_buff[13]); sbuf_printf(sb, "\tTDLEN\t %08x\n", regs_buff[14]); sbuf_printf(sb, "\tTDH\t %08x\n", regs_buff[15]); sbuf_printf(sb, "\tTDT\t %08x\n", regs_buff[16]); sbuf_printf(sb, "\tTXDCTL\t %08x\n", regs_buff[17]); sbuf_printf(sb, "\tTDFH\t %08x\n", regs_buff[18]); sbuf_printf(sb, "\tTDFT\t %08x\n", regs_buff[19]); sbuf_printf(sb, "\tTDFHS\t %08x\n", regs_buff[20]); sbuf_printf(sb, "\tTDFPC\t %08x\n\n", regs_buff[21]); free(regs_buff, M_DEVBUF); #ifdef DUMP_DESCS { if_softc_ctx_t scctx = adapter->shared; struct rx_ring *rxr = &rx_que->rxr; struct tx_ring *txr = &tx_que->txr; int ntxd = scctx->isc_ntxd[0]; int nrxd = scctx->isc_nrxd[0]; int j; for (j = 0; j < nrxd; j++) { u32 staterr = le32toh(rxr->rx_base[j].wb.upper.status_error); u32 length = le32toh(rxr->rx_base[j].wb.upper.length); sbuf_printf(sb, "\tReceive Descriptor Address %d: %08" PRIx64 " Error:%d Length:%d\n", j, rxr->rx_base[j].read.buffer_addr, staterr, length); } for (j = 0; j < min(ntxd, 256); j++) { unsigned int *ptr = (unsigned int *)&txr->tx_base[j]; sbuf_printf(sb, "\tTXD[%03d] [0]: %08x [1]: %08x [2]: %08x [3]: %08x eop: %d DD=%d\n", j, ptr[0], ptr[1], ptr[2], ptr[3], buf->eop, buf->eop != -1 ? txr->tx_base[buf->eop].upper.fields.status & E1000_TXD_STAT_DD : 0); } } #endif rc = sbuf_finish(sb); sbuf_delete(sb); return(rc); } static void * em_register(device_t dev) { return (em_sctx); } static void * igb_register(device_t dev) { return (igb_sctx); } static int em_set_num_queues(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); int maxqueues; /* Sanity check based on HW */ switch (adapter->hw.mac.type) { case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: maxqueues = 8; break; case e1000_i210: case e1000_82575: maxqueues = 4; break; case e1000_i211: case e1000_82574: maxqueues = 2; break; default: maxqueues = 1; break; } return (maxqueues); } #define EM_CAPS \ IFCAP_TSO4 | IFCAP_TXCSUM | IFCAP_LRO | IFCAP_RXCSUM | IFCAP_VLAN_HWFILTER | IFCAP_WOL_MAGIC | \ IFCAP_WOL_MCAST | IFCAP_WOL | IFCAP_VLAN_HWTSO | IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; #define IGB_CAPS \ IFCAP_TSO4 | IFCAP_TXCSUM | IFCAP_LRO | IFCAP_RXCSUM | IFCAP_VLAN_HWFILTER | IFCAP_WOL_MAGIC | \ IFCAP_WOL_MCAST | IFCAP_WOL | IFCAP_VLAN_HWTSO | IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | \ IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU | IFCAP_TXCSUM_IPV6 | IFCAP_HWCSUM_IPV6 | IFCAP_JUMBO_MTU; /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int em_if_attach_pre(if_ctx_t ctx) { struct adapter *adapter; if_softc_ctx_t scctx; device_t dev; struct e1000_hw *hw; int error = 0; INIT_DEBUGOUT("em_if_attach_pre begin"); dev = iflib_get_dev(ctx); adapter = iflib_get_softc(ctx); if (resource_disabled("em", device_get_unit(dev))) { device_printf(dev, "Disabled by device hint\n"); return (ENXIO); } adapter->ctx = ctx; adapter->dev = adapter->osdep.dev = dev; scctx = adapter->shared = iflib_get_softc_ctx(ctx); adapter->media = iflib_get_media(ctx); hw = &adapter->hw; adapter->tx_process_limit = scctx->isc_ntxd[0]; /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_nvm_info, "I", "NVM Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_debug_info, "I", "Debug Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_set_flowcntl, "I", "Flow Control"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "reg_dump", CTLTYPE_STRING | CTLFLAG_RD, adapter, 0, em_get_regs, "A", "Dump Registers"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "rs_dump", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, em_get_rs, "I", "Dump RS indexes"); /* Determine hardware and mac info */ em_identify_hardware(ctx); /* Set isc_msix_bar */ scctx->isc_msix_bar = PCIR_BAR(EM_MSIX_BAR); scctx->isc_tx_nsegments = EM_MAX_SCATTER; scctx->isc_tx_tso_segments_max = scctx->isc_tx_nsegments; scctx->isc_tx_tso_size_max = EM_TSO_SIZE; scctx->isc_tx_tso_segsize_max = EM_TSO_SEG_SIZE; scctx->isc_nrxqsets_max = scctx->isc_ntxqsets_max = em_set_num_queues(ctx); device_printf(dev, "attach_pre capping queues at %d\n", scctx->isc_ntxqsets_max); scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; if (adapter->hw.mac.type >= igb_mac_min) { int try_second_bar; scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(union e1000_adv_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union e1000_adv_rx_desc), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(union e1000_adv_tx_desc); scctx->isc_rxd_size[0] = sizeof(union e1000_adv_rx_desc); scctx->isc_txrx = &igb_txrx; scctx->isc_capenable = IGB_CAPS; scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_IP6_TCP \ | CSUM_IP6_UDP | CSUM_IP6_TCP; if (adapter->hw.mac.type != e1000_82575) scctx->isc_tx_csum_flags |= CSUM_SCTP | CSUM_IP6_SCTP; /* ** Some new devices, as with ixgbe, now may ** use a different BAR, so we need to keep ** track of which is used. */ try_second_bar = pci_read_config(dev, scctx->isc_msix_bar, 4); if (try_second_bar == 0) scctx->isc_msix_bar += 4; } else if (adapter->hw.mac.type >= em_mac_min) { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]* sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(struct e1000_tx_desc); scctx->isc_rxd_size[0] = sizeof(union e1000_rx_desc_extended); scctx->isc_txrx = &em_txrx; scctx->isc_capenable = EM_CAPS; scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; } else { scctx->isc_txqsizes[0] = roundup2((scctx->isc_ntxd[0] + 1) * sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2((scctx->isc_nrxd[0] + 1) * sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(struct e1000_tx_desc); scctx->isc_rxd_size[0] = sizeof(struct e1000_rx_desc); scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; scctx->isc_txrx = &lem_txrx; scctx->isc_capenable = EM_CAPS; if (adapter->hw.mac.type < e1000_82543) scctx->isc_capenable &= ~(IFCAP_HWCSUM|IFCAP_VLAN_HWCSUM); scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; scctx->isc_msix_bar = 0; } /* Setup PCI resources */ if (em_allocate_pci_resources(ctx)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* ** For ICH8 and family we need to ** map the flash memory, and this ** must happen after the MAC is ** identified */ if ((hw->mac.type == e1000_ich8lan) || (hw->mac.type == e1000_ich9lan) || (hw->mac.type == e1000_ich10lan) || (hw->mac.type == e1000_pchlan) || (hw->mac.type == e1000_pch2lan) || (hw->mac.type == e1000_pch_lpt)) { int rid = EM_BAR_TYPE_FLASH; adapter->flash = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->flash == NULL) { device_printf(dev, "Mapping of Flash failed\n"); error = ENXIO; goto err_pci; } /* This is used in the shared code */ hw->flash_address = (u8 *)adapter->flash; adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash); adapter->osdep.flash_bus_space_handle = rman_get_bushandle(adapter->flash); } /* ** In the new SPT device flash is not a ** separate BAR, rather it is also in BAR0, ** so use the same tag and an offset handle for the ** FLASH read/write macros in the shared code. */ else if (hw->mac.type >= e1000_pch_spt) { adapter->osdep.flash_bus_space_tag = adapter->osdep.mem_bus_space_tag; adapter->osdep.flash_bus_space_handle = adapter->osdep.mem_bus_space_handle + E1000_FLASH_BASE_ADDR; } /* Do Shared Code initialization */ error = e1000_setup_init_funcs(hw, TRUE); if (error) { device_printf(dev, "Setup of Shared code failed, error %d\n", error); error = ENXIO; goto err_pci; } em_setup_msix(ctx); e1000_get_bus_info(hw); /* Set up some sysctls for the tunable interrupt delays */ em_add_int_delay_sysctl(adapter, "rx_int_delay", "receive interrupt delay in usecs", &adapter->rx_int_delay, E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_int_delay", "transmit interrupt delay in usecs", &adapter->tx_int_delay, E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "rx_abs_int_delay", "receive interrupt delay limit in usecs", &adapter->rx_abs_int_delay, E1000_REGISTER(hw, E1000_RADV), em_rx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_abs_int_delay", "transmit interrupt delay limit in usecs", &adapter->tx_abs_int_delay, E1000_REGISTER(hw, E1000_TADV), em_tx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "itr", "interrupt delay limit in usecs/4", &adapter->tx_itr, E1000_REGISTER(hw, E1000_ITR), DEFAULT_ITR); hw->mac.autoneg = DO_AUTO_NEG; hw->phy.autoneg_wait_to_complete = FALSE; hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; if (adapter->hw.mac.type < em_mac_min) { e1000_init_script_state_82541(&adapter->hw, TRUE); e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE); } /* Copper options */ if (hw->phy.media_type == e1000_media_type_copper) { hw->phy.mdix = AUTO_ALL_MODES; hw->phy.disable_polarity_correction = FALSE; hw->phy.ms_type = EM_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ scctx->isc_max_frame_size = adapter->hw.mac.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; /* * This controls when hardware reports transmit completion * status. */ hw->mac.report_tx_early = 1; /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Check SOL/IDER usage */ if (e1000_check_reset_block(hw)) device_printf(dev, "PHY reset is blocked" " due to SOL/IDER session.\n"); /* Sysctl for setting Energy Efficient Ethernet */ hw->dev_spec.ich8lan.eee_disable = eee_setting; SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_eee, "I", "Disable Energy Efficient Ethernet"); /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(hw); /* Make sure we have a good EEPROM before we read from it */ if (e1000_validate_nvm_checksum(hw) < 0) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } } /* Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_late; } if (!em_is_valid_ether_addr(hw->mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_late; } /* Disable ULP support */ e1000_disable_ulp_lpt_lp(hw, TRUE); /* * Get Wake-on-Lan and Management info for later use */ em_get_wakeup(ctx); iflib_set_mac(ctx, hw->mac.addr); return (0); err_late: em_release_hw_control(adapter); err_pci: em_free_pci_resources(ctx); free(adapter->mta, M_DEVBUF); return (error); } static int em_if_attach_post(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; int error = 0; /* Setup OS specific network interface */ error = em_setup_interface(ctx); if (error != 0) { goto err_late; } em_reset(ctx); /* Initialize statistics */ em_update_stats_counters(adapter); hw->mac.get_link_status = 1; em_if_update_admin_status(ctx); em_add_hw_stats(adapter); /* Non-AMT based hardware can now take control from firmware */ if (adapter->has_manage && !adapter->has_amt) em_get_hw_control(adapter); INIT_DEBUGOUT("em_if_attach_post: end"); return (error); err_late: em_release_hw_control(adapter); em_free_pci_resources(ctx); em_if_queues_free(ctx); free(adapter->mta, M_DEVBUF); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int em_if_detach(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("em_detach: begin"); e1000_phy_hw_reset(&adapter->hw); em_release_manageability(adapter); em_release_hw_control(adapter); em_free_pci_resources(ctx); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int em_if_shutdown(if_ctx_t ctx) { return em_if_suspend(ctx); } /* * Suspend/resume device methods. */ static int em_if_suspend(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); em_release_manageability(adapter); em_release_hw_control(adapter); em_enable_wakeup(ctx); return (0); } static int em_if_resume(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if (adapter->hw.mac.type == e1000_pch2lan) e1000_resume_workarounds_pchlan(&adapter->hw); em_if_init(ctx); em_init_manageability(adapter); return(0); } static int em_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { int max_frame_size; struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = iflib_get_softc_ctx(ctx); IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); switch (adapter->hw.mac.type) { case e1000_82571: case e1000_82572: case e1000_ich9lan: case e1000_ich10lan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: case e1000_82574: case e1000_82583: case e1000_80003es2lan: /* 9K Jumbo Frame size */ max_frame_size = 9234; break; case e1000_pchlan: max_frame_size = 4096; break; case e1000_82542: case e1000_ich8lan: /* Adapters that do not support jumbo frames */ max_frame_size = ETHER_MAX_LEN; break; default: if (adapter->hw.mac.type >= igb_mac_min) max_frame_size = 9234; else /* lem */ max_frame_size = MAX_JUMBO_FRAME_SIZE; } if (mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { return (EINVAL); } scctx->isc_max_frame_size = adapter->hw.mac.max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; return (0); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void em_if_init(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); struct em_tx_queue *tx_que; int i; INIT_DEBUGOUT("em_if_init: begin"); /* Get the latest mac address, User can use a LAA */ bcopy(if_getlladdr(ifp), adapter->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* * With the 82571 adapter, RAR[0] may be overwritten * when the other port is reset, we make a duplicate * in RAR[14] for that eventuality, this assures * the interface continues to function. */ if (adapter->hw.mac.type == e1000_82571) { e1000_set_laa_state_82571(&adapter->hw, TRUE); e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, E1000_RAR_ENTRIES - 1); } /* Initialize the hardware */ em_reset(ctx); em_if_update_admin_status(ctx); for (i = 0, tx_que = adapter->tx_queues; i < adapter->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; txr->tx_rs_cidx = txr->tx_rs_pidx = txr->tx_cidx_processed = 0; } /* Setup VLAN support, basic and offload if available */ E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); /* Clear bad data from Rx FIFOs */ if (adapter->hw.mac.type >= igb_mac_min) e1000_rx_fifo_flush_82575(&adapter->hw); /* Configure for OS presence */ em_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ em_initialize_transmit_unit(ctx); /* Setup Multicast table */ em_if_multi_set(ctx); /* * Figure out the desired mbuf * pool for doing jumbos */ if (adapter->hw.mac.max_frame_size <= 2048) adapter->rx_mbuf_sz = MCLBYTES; #ifndef CONTIGMALLOC_WORKS else adapter->rx_mbuf_sz = MJUMPAGESIZE; #else else if (adapter->hw.mac.max_frame_size <= 4096) adapter->rx_mbuf_sz = MJUMPAGESIZE; else adapter->rx_mbuf_sz = MJUM9BYTES; #endif em_initialize_receive_unit(ctx); /* Use real VLAN Filter support? */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) { if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) /* Use real VLAN Filter support */ em_setup_vlan_hw_support(adapter); else { u32 ctrl; ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= E1000_CTRL_VME; E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); } } /* Don't lose promiscuous settings */ em_if_set_promisc(ctx, IFF_PROMISC); e1000_clear_hw_cntrs_base_generic(&adapter->hw); /* MSI/X configuration for 82574 */ if (adapter->hw.mac.type == e1000_82574) { int tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); /* Set the IVAR - interrupt vector routing. */ E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); } else if (adapter->intr_type == IFLIB_INTR_MSIX) /* Set up queue routing */ igb_configure_queues(adapter); /* this clears any pending interrupts */ E1000_READ_REG(&adapter->hw, E1000_ICR); E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); /* AMT based hardware can now take control from firmware */ if (adapter->has_manage && adapter->has_amt) em_get_hw_control(adapter); /* Set Energy Efficient Ethernet */ if (adapter->hw.mac.type >= igb_mac_min && adapter->hw.phy.media_type == e1000_media_type_copper) { if (adapter->hw.mac.type == e1000_i354) e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); else e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); } } /********************************************************************* * * Fast Legacy/MSI Combined Interrupt Service routine * *********************************************************************/ int em_intr(void *arg) { struct adapter *adapter = arg; if_ctx_t ctx = adapter->ctx; u32 reg_icr; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (adapter->intr_type != IFLIB_INTR_LEGACY) goto skip_stray; /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; /* * Starting with the 82571 chip, bit 31 should be used to * determine whether the interrupt belongs to us. */ if (adapter->hw.mac.type >= e1000_82571 && (reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; skip_stray: /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; iflib_admin_intr_deferred(ctx); } if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; return (FILTER_SCHEDULE_THREAD); } static void igb_rx_enable_queue(struct adapter *adapter, struct em_rx_queue *rxq) { E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxq->eims); } static void em_rx_enable_queue(struct adapter *adapter, struct em_rx_queue *rxq) { E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxq->eims); } static void igb_tx_enable_queue(struct adapter *adapter, struct em_tx_queue *txq) { E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txq->eims); } static void em_tx_enable_queue(struct adapter *adapter, struct em_tx_queue *txq) { E1000_WRITE_REG(&adapter->hw, E1000_IMS, txq->eims); } static int em_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *rxq = &adapter->rx_queues[rxqid]; if (adapter->hw.mac.type >= igb_mac_min) igb_rx_enable_queue(adapter, rxq); else em_rx_enable_queue(adapter, rxq); return (0); } static int em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct adapter *adapter = iflib_get_softc(ctx); struct em_tx_queue *txq = &adapter->tx_queues[txqid]; if (adapter->hw.mac.type >= igb_mac_min) igb_tx_enable_queue(adapter, txq); else em_tx_enable_queue(adapter, txq); return (0); } /********************************************************************* * * MSIX RX Interrupt Service routine * **********************************************************************/ static int em_msix_que(void *arg) { struct em_rx_queue *que = arg; ++que->irqs; return (FILTER_SCHEDULE_THREAD); } /********************************************************************* * * MSIX Link Fast Interrupt Service routine * **********************************************************************/ static int em_msix_link(void *arg) { struct adapter *adapter = arg; u32 reg_icr; ++adapter->link_irq; MPASS(adapter->hw.back != NULL); reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { em_handle_link(adapter->ctx); } else { E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); if (adapter->hw.mac.type >= igb_mac_min) E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); } /* * Because we must read the ICR for this interrupt * it may clear other causes using autoclear, for * this reason we simply create a soft interrupt * for all these vectors. */ if (reg_icr && adapter->hw.mac.type < igb_mac_min) { E1000_WRITE_REG(&adapter->hw, E1000_ICS, adapter->ims); } return (FILTER_HANDLED); } static void em_handle_link(void *context) { if_ctx_t ctx = context; struct adapter *adapter = iflib_get_softc(ctx); adapter->hw.mac.get_link_status = 1; iflib_admin_intr_deferred(ctx); } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void em_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) { struct adapter *adapter = iflib_get_softc(ctx); u_char fiber_type = IFM_1000_SX; INIT_DEBUGOUT("em_if_media_status: begin"); iflib_admin_intr_deferred(ctx); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { return; } ifmr->ifm_status |= IFM_ACTIVE; if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { if (adapter->hw.mac.type == e1000_82545) fiber_type = IFM_1000_LX; ifmr->ifm_active |= fiber_type | IFM_FDX; } else { switch (adapter->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; } if (adapter->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int em_if_media_change(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifmedia *ifm = iflib_get_media(ctx); INIT_DEBUGOUT("em_if_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(adapter->dev, "Unsupported media type\n"); } em_if_init(ctx); return (0); } static int em_if_set_promisc(if_ctx_t ctx, int flags) { struct adapter *adapter = iflib_get_softc(ctx); u32 reg_rctl; em_disable_promisc(ctx); reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (flags & IFF_PROMISC) { reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); /* Turn this on if you want to see bad packets */ if (em_debug_sbp) reg_rctl |= E1000_RCTL_SBP; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else if (flags & IFF_ALLMULTI) { reg_rctl |= E1000_RCTL_MPE; reg_rctl &= ~E1000_RCTL_UPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } return (0); } static void em_disable_promisc(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); u32 reg_rctl; int mcnt = 0; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= (~E1000_RCTL_UPE); if (if_getflags(ifp) & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES); /* Don't disable if in MAX groups */ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg_rctl &= (~E1000_RCTL_MPE); reg_rctl &= (~E1000_RCTL_SBP); E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void em_if_multi_set(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); u32 reg_rctl = 0; u8 *mta; /* Multicast array memory */ int mcnt = 0; IOCTL_DEBUGOUT("em_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_clear_mwi(&adapter->hw); reg_rctl |= E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); } if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES); if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= ~E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_set_mwi(&adapter->hw); } } /********************************************************************* * Timer routine * * This routine checks for link status and updates statistics. * **********************************************************************/ static void em_if_timer(if_ctx_t ctx, uint16_t qid) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *que; int i; int trigger = 0; if (qid != 0) return; iflib_admin_intr_deferred(ctx); /* Reset LAA into RAR[0] on 82571 */ if ((adapter->hw.mac.type == e1000_82571) && e1000_get_laa_state_82571(&adapter->hw)) e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); if (adapter->hw.mac.type < em_mac_min) lem_smartspeed(adapter); /* Mask to use in the irq trigger */ if (adapter->intr_type == IFLIB_INTR_MSIX) { for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) trigger |= que->eims; } else { trigger = E1000_ICS_RXDMT0; } } static void em_if_update_admin_status(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; struct ifnet *ifp = iflib_get_ifp(ctx); device_t dev = iflib_get_dev(ctx); u32 link_check, thstat, ctrl; link_check = thstat = ctrl = 0; /* Get the cached link value or read phy for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { if (hw->mac.type == e1000_pch_spt) msec_delay(50); /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; if (link_check) /* ESB2 fix */ e1000_cfg_on_link_up(hw); } else { link_check = TRUE; } break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = adapter->hw.mac.serdes_has_link; break; /* VF device is type_unknown */ case e1000_media_type_unknown: e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; /* FALLTHROUGH */ default: break; } /* Check for thermal downshift or shutdown */ if (hw->mac.type == e1000_i350) { thstat = E1000_READ_REG(hw, E1000_THSTAT); ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); } /* Now check for a transition */ if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(hw, &adapter->link_speed, &adapter->link_duplex); /* Check if we must disable SPEED_MODE bit on PCI-E */ if ((adapter->link_speed != SPEED_1000) && ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572))) { int tarc0; tarc0 = E1000_READ_REG(hw, E1000_TARC(0)); tarc0 &= ~TARC_SPEED_MODE_BIT; E1000_WRITE_REG(hw, E1000_TARC(0), tarc0); } if (bootverbose) device_printf(dev, "Link is up %d Mbps %s\n", adapter->link_speed, ((adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex")); adapter->link_active = 1; adapter->smartspeed = 0; if_setbaudrate(ifp, adapter->link_speed * 1000000); if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && (thstat & E1000_THSTAT_LINK_THROTTLE)) device_printf(dev, "Link: thermal downshift\n"); /* Delay Link Up for Phy update */ if (((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) && (hw->phy.id == I210_I_PHY_ID)) msec_delay(I210_LINK_DELAY); /* Reset if the media type changed. */ if ((hw->dev_spec._82575.media_changed) && (adapter->hw.mac.type >= igb_mac_min)) { hw->dev_spec._82575.media_changed = false; adapter->flags |= IGB_MEDIA_RESET; em_reset(ctx); } iflib_link_state_change(ctx, LINK_STATE_UP, ifp->if_baudrate); printf("Link state changed to up\n"); } else if (!link_check && (adapter->link_active == 1)) { if_setbaudrate(ifp, 0); adapter->link_speed = 0; adapter->link_duplex = 0; if (bootverbose) device_printf(dev, "Link is Down\n"); adapter->link_active = 0; iflib_link_state_change(ctx, LINK_STATE_DOWN, ifp->if_baudrate); printf("link state changed to down\n"); } em_update_stats_counters(adapter); E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * * This routine should always be called with BOTH the CORE * and TX locks. **********************************************************************/ static void em_if_stop(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("em_stop: begin"); e1000_reset_hw(&adapter->hw); if (adapter->hw.mac.type >= e1000_82544) E1000_WRITE_REG(&adapter->hw, E1000_WUFC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void em_identify_hardware(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct adapter *adapter = iflib_get_softc(ctx); /* Make sure our PCI config space has the necessary stuff set */ adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Do Shared Code Init and Setup */ if (e1000_set_mac_type(&adapter->hw)) { device_printf(dev, "Setup init failure\n"); return; } } static int em_allocate_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); int rid, val; rid = PCIR_BAR(0); adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->memory == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->memory); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->memory); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; /* Only older adapters use IO mapping */ if (adapter->hw.mac.type < em_mac_min && adapter->hw.mac.type > e1000_82543) { /* Figure our where our IO BAR is ? */ for (rid = PCIR_BAR(0); rid < PCIR_CIS;) { val = pci_read_config(dev, rid, 4); if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) { adapter->io_rid = rid; break; } rid += 4; /* check for 64bit BAR */ if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT) rid += 4; } if (rid >= PCIR_CIS) { device_printf(dev, "Unable to locate IO BAR\n"); return (ENXIO); } adapter->ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE); if (adapter->ioport == NULL) { device_printf(dev, "Unable to allocate bus resource: " "ioport\n"); return (ENXIO); } adapter->hw.io_base = 0; adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->ioport); adapter->osdep.io_bus_space_handle = rman_get_bushandle(adapter->ioport); } adapter->hw.back = &adapter->osdep; return (0); } /********************************************************************* * * Setup the MSIX Interrupt handlers * **********************************************************************/ static int em_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *rx_que = adapter->rx_queues; struct em_tx_queue *tx_que = adapter->tx_queues; int error, rid, i, vector = 0, rx_vectors; char buf[16]; /* First set up ring resources */ for (i = 0; i < adapter->rx_num_queues; i++, rx_que++, vector++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, em_msix_que, rx_que, rx_que->me, buf); if (error) { device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); adapter->rx_num_queues = i + 1; goto fail; } rx_que->msix = vector; /* * Set the bit to enable interrupt * in E1000_IMS -- bits 20 and 21 * are for RX0 and RX1, note this has * NOTHING to do with the MSIX vector */ if (adapter->hw.mac.type == e1000_82574) { rx_que->eims = 1 << (20 + i); adapter->ims |= rx_que->eims; adapter->ivars |= (8 | rx_que->msix) << (i * 4); } else if (adapter->hw.mac.type == e1000_82575) rx_que->eims = E1000_EICR_TX_QUEUE0 << vector; else rx_que->eims = 1 << vector; } rx_vectors = vector; vector = 0; for (i = 0; i < adapter->tx_num_queues; i++, tx_que++, vector++) { rid = vector + 1; snprintf(buf, sizeof(buf), "txq%d", i); tx_que = &adapter->tx_queues[i]; iflib_softirq_alloc_generic(ctx, &adapter->rx_queues[i % adapter->rx_num_queues].que_irq, IFLIB_INTR_TX, tx_que, tx_que->me, buf); tx_que->msix = (vector % adapter->tx_num_queues); /* * Set the bit to enable interrupt * in E1000_IMS -- bits 22 and 23 * are for TX0 and TX1, note this has * NOTHING to do with the MSIX vector */ if (adapter->hw.mac.type == e1000_82574) { tx_que->eims = 1 << (22 + i); adapter->ims |= tx_que->eims; adapter->ivars |= (8 | tx_que->msix) << (8 + (i * 4)); } else if (adapter->hw.mac.type == e1000_82575) { tx_que->eims = E1000_EICR_TX_QUEUE0 << (i % adapter->tx_num_queues); } else { tx_que->eims = 1 << (i % adapter->tx_num_queues); } } /* Link interrupt */ rid = rx_vectors + 1; error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, em_msix_link, adapter, 0, "aq"); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); goto fail; } adapter->linkvec = rx_vectors; if (adapter->hw.mac.type < igb_mac_min) { adapter->ivars |= (8 | rx_vectors) << 16; adapter->ivars |= 0x80000000; } return (0); fail: iflib_irq_free(ctx, &adapter->irq); rx_que = adapter->rx_queues; for (int i = 0; i < adapter->rx_num_queues; i++, rx_que++) iflib_irq_free(ctx, &rx_que->que_irq); return (error); } static void igb_configure_queues(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct em_rx_queue *rx_que; struct em_tx_queue *tx_que; u32 tmp, ivar = 0, newitr = 0; /* First turn on RSS capability */ if (adapter->hw.mac.type != e1000_82575) E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | E1000_GPIE_PBA | E1000_GPIE_NSICR); /* Turn on MSIX */ switch (adapter->hw.mac.type) { case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: /* RX entries */ for (int i = 0; i < adapter->rx_num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); rx_que = &adapter->rx_queues[i]; if (i & 1) { ivar &= 0xFF00FFFF; ivar |= (rx_que->msix | E1000_IVAR_VALID) << 16; } else { ivar &= 0xFFFFFF00; ivar |= rx_que->msix | E1000_IVAR_VALID; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); } /* TX entries */ for (int i = 0; i < adapter->tx_num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); tx_que = &adapter->tx_queues[i]; if (i & 1) { ivar &= 0x00FFFFFF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 24; } else { ivar &= 0xFFFF00FF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 8; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= tx_que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82576: /* RX entries */ for (int i = 0; i < adapter->rx_num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); rx_que = &adapter->rx_queues[i]; if (i < 8) { ivar &= 0xFFFFFF00; ivar |= rx_que->msix | E1000_IVAR_VALID; } else { ivar &= 0xFF00FFFF; ivar |= (rx_que->msix | E1000_IVAR_VALID) << 16; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= rx_que->eims; } /* TX entries */ for (int i = 0; i < adapter->tx_num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); tx_que = &adapter->tx_queues[i]; if (i < 8) { ivar &= 0xFFFF00FF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 8; } else { ivar &= 0x00FFFFFF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 24; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= tx_que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82575: /* enable MSI-X support*/ tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; /* Auto-Mask interrupts upon ICR read. */ tmp |= E1000_CTRL_EXT_EIAME; tmp |= E1000_CTRL_EXT_IRCA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); /* Queues */ for (int i = 0; i < adapter->rx_num_queues; i++) { rx_que = &adapter->rx_queues[i]; tmp = E1000_EICR_RX_QUEUE0 << i; tmp |= E1000_EICR_TX_QUEUE0 << i; rx_que->eims = tmp; E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), i, rx_que->eims); adapter->que_mask |= rx_que->eims; } /* Link */ E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec), E1000_EIMS_OTHER); adapter->link_mask |= E1000_EIMS_OTHER; default: break; } /* Set the starting interrupt rate */ if (em_max_interrupt_rate > 0) newitr = (4000000 / em_max_interrupt_rate) & 0x7FFC; if (hw->mac.type == e1000_82575) newitr |= newitr << 16; else newitr |= E1000_EITR_CNT_IGNR; for (int i = 0; i < adapter->rx_num_queues; i++) { rx_que = &adapter->rx_queues[i]; E1000_WRITE_REG(hw, E1000_EITR(rx_que->msix), newitr); } return; } static void em_free_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *que = adapter->rx_queues; device_t dev = iflib_get_dev(ctx); /* Release all msix queue resources */ if (adapter->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &adapter->irq); for (int i = 0; i < adapter->rx_num_queues; i++, que++) { iflib_irq_free(ctx, &que->que_irq); } /* First release all the interrupt resources */ if (adapter->memory != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->memory); adapter->memory = NULL; } if (adapter->flash != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH, adapter->flash); adapter->flash = NULL; } if (adapter->ioport != NULL) bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid, adapter->ioport); } /* Setup MSI or MSI/X */ static int em_setup_msix(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if (adapter->hw.mac.type == e1000_82574) { em_enable_vectors_82574(ctx); } return (0); } /********************************************************************* * * Initialize the hardware to a configuration * as specified by the adapter structure. * **********************************************************************/ static void lem_smartspeed(struct adapter *adapter) { u16 phy_tmp; if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) || adapter->hw.mac.autoneg == 0 || (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0) return; if (adapter->smartspeed == 0) { /* If Master/Slave config fault is asserted twice, * we assume back-to-back */ e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT)) return; e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) { e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); if(phy_tmp & CR_1000T_MS_ENABLE) { phy_tmp &= ~CR_1000T_MS_ENABLE; e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); adapter->smartspeed++; if(adapter->hw.mac.autoneg && !e1000_copper_link_autoneg(&adapter->hw) && !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { phy_tmp |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); } } } return; } else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) { /* If still no link, perhaps using 2/3 pair cable */ e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); phy_tmp |= CR_1000T_MS_ENABLE; e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); if(adapter->hw.mac.autoneg && !e1000_copper_link_autoneg(&adapter->hw) && !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { phy_tmp |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); } } /* Restart process after EM_SMARTSPEED_MAX iterations */ if(adapter->smartspeed++ == EM_SMARTSPEED_MAX) adapter->smartspeed = 0; } /********************************************************************* * * Initialize the DMA Coalescing feature * **********************************************************************/ static void igb_init_dmac(struct adapter *adapter, u32 pba) { device_t dev = adapter->dev; struct e1000_hw *hw = &adapter->hw; u32 dmac, reg = ~E1000_DMACR_DMAC_EN; u16 hwm; u16 max_frame_size; if (hw->mac.type == e1000_i211) return; max_frame_size = adapter->shared->isc_max_frame_size; if (hw->mac.type > e1000_82580) { if (adapter->dmac == 0) { /* Disabling it */ E1000_WRITE_REG(hw, E1000_DMACR, reg); return; } else device_printf(dev, "DMA Coalescing enabled\n"); /* Set starting threshold */ E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); hwm = 64 * pba - max_frame_size / 16; if (hwm < 64 * (pba - 6)) hwm = 64 * (pba - 6); reg = E1000_READ_REG(hw, E1000_FCRTC); reg &= ~E1000_FCRTC_RTH_COAL_MASK; reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) & E1000_FCRTC_RTH_COAL_MASK); E1000_WRITE_REG(hw, E1000_FCRTC, reg); dmac = pba - max_frame_size / 512; if (dmac < pba - 10) dmac = pba - 10; reg = E1000_READ_REG(hw, E1000_DMACR); reg &= ~E1000_DMACR_DMACTHR_MASK; reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT) & E1000_DMACR_DMACTHR_MASK); /* transition to L0x or L1 if available..*/ reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); /* Check if status is 2.5Gb backplane connection * before configuration of watchdog timer, which is * in msec values in 12.8usec intervals * watchdog timer= msec values in 32usec intervals * for non 2.5Gb connection */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= ((adapter->dmac * 5) >> 6); else reg |= (adapter->dmac >> 5); } else { reg |= (adapter->dmac >> 5); } E1000_WRITE_REG(hw, E1000_DMACR, reg); E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); /* Set the interval before transition */ reg = E1000_READ_REG(hw, E1000_DMCTLX); if (hw->mac.type == e1000_i350) reg |= IGB_DMCTLX_DCFLUSH_DIS; /* ** in 2.5Gb connection, TTLX unit is 0.4 usec ** which is 0x4*2 = 0xA. But delay is still 4 usec */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= 0xA; else reg |= 0x4; } else { reg |= 0x4; } E1000_WRITE_REG(hw, E1000_DMCTLX, reg); /* free space in tx packet buffer to wake from DMA coal */ E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE - (2 * max_frame_size)) >> 6); /* make low power state decision controlled by DMA coal */ reg = E1000_READ_REG(hw, E1000_PCIEMISC); reg &= ~E1000_PCIEMISC_LX_DECISION; E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); } else if (hw->mac.type == e1000_82580) { u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); E1000_WRITE_REG(hw, E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION); E1000_WRITE_REG(hw, E1000_DMACR, 0); } } static void em_reset(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); struct e1000_hw *hw = &adapter->hw; u16 rx_buffer_size; u32 pba; INIT_DEBUGOUT("em_reset: begin"); /* Let the firmware know the OS is in control */ em_get_hw_control(adapter); /* Set up smart power down as default off on newer adapters. */ if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 || hw->mac.type == e1000_82572)) { u16 phy_tmp = 0; /* Speed up time to link by disabling smart power down. */ e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp); phy_tmp &= ~IGP02E1000_PM_SPD; e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp); } /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. */ switch (hw->mac.type) { /* Total Packet Buffer on these is 48K */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */ break; case e1000_82573: /* 82573: Total Packet Buffer is 32K */ pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */ break; case e1000_82574: case e1000_82583: pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */ break; case e1000_ich8lan: pba = E1000_PBA_8K; break; case e1000_ich9lan: case e1000_ich10lan: /* Boost Receive side for jumbo frames */ if (adapter->hw.mac.max_frame_size > 4096) pba = E1000_PBA_14K; else pba = E1000_PBA_10K; break; case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: pba = E1000_PBA_26K; break; case e1000_82575: pba = E1000_PBA_32K; break; case e1000_82576: case e1000_vfadapt: pba = E1000_READ_REG(hw, E1000_RXPBS); pba &= E1000_RXPBS_SIZE_MASK_82576; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_vfadapt_i350: pba = E1000_READ_REG(hw, E1000_RXPBS); pba = e1000_rxpbs_adjust_82580(pba); break; case e1000_i210: case e1000_i211: pba = E1000_PBA_34K; break; default: if (adapter->hw.mac.max_frame_size > 8192) pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ else pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ } /* Special needs in case of Jumbo frames */ if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) { u32 tx_space, min_tx, min_rx; pba = E1000_READ_REG(hw, E1000_PBA); tx_space = pba >> 16; pba &= 0xffff; min_tx = (adapter->hw.mac.max_frame_size + sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2; min_tx = roundup2(min_tx, 1024); min_tx >>= 10; min_rx = adapter->hw.mac.max_frame_size; min_rx = roundup2(min_rx, 1024); min_rx >>= 10; if (tx_space < min_tx && ((min_tx - tx_space) < pba)) { pba = pba - (min_tx - tx_space); /* * if short on rx space, rx wins * and must trump tx adjustment */ if (pba < min_rx) pba = min_rx; } E1000_WRITE_REG(hw, E1000_PBA, pba); } if (hw->mac.type < igb_mac_min) E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); INIT_DEBUGOUT1("em_reset: pba=%dK",pba); /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. Here we use an arbitrary value of 1500 which will * restart after one full frame is pulled from the buffer. There * could be several smaller frames in the buffer and if so they will * not trigger the XON until their total number reduces the buffer * by 1500. * - The pause time is fairly large at 1000 x 512ns = 512 usec. */ rx_buffer_size = (pba & 0xffff) << 10; hw->fc.high_water = rx_buffer_size - roundup2(adapter->hw.mac.max_frame_size, 1024); hw->fc.low_water = hw->fc.high_water - 1500; if (adapter->fc) /* locally set flow control value? */ hw->fc.requested_mode = adapter->fc; else hw->fc.requested_mode = e1000_fc_full; if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; else hw->fc.pause_time = EM_FC_PAUSE_TIME; hw->fc.send_xon = TRUE; /* Device specific overrides/settings */ switch (hw->mac.type) { case e1000_pchlan: /* Workaround: no TX flow ctrl for PCH */ hw->fc.requested_mode = e1000_fc_rx_pause; hw->fc.pause_time = 0xFFFF; /* override */ if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x3500; hw->fc.low_water = 0x1500; } else { hw->fc.high_water = 0x5000; hw->fc.low_water = 0x3000; } hw->fc.refresh_time = 0x1000; break; case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: hw->fc.high_water = 0x5C20; hw->fc.low_water = 0x5048; hw->fc.pause_time = 0x0650; hw->fc.refresh_time = 0x0400; /* Jumbos need adjusted PBA */ if (if_getmtu(ifp) > ETHERMTU) E1000_WRITE_REG(hw, E1000_PBA, 12); else E1000_WRITE_REG(hw, E1000_PBA, 26); break; case e1000_82575: case e1000_82576: /* 8-byte granularity */ hw->fc.low_water = hw->fc.high_water - 8; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: /* 16-byte granularity */ hw->fc.low_water = hw->fc.high_water - 16; break; case e1000_ich9lan: case e1000_ich10lan: if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x2800; hw->fc.low_water = hw->fc.high_water - 8; break; } /* FALLTHROUGH */ default: if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; break; } /* Issue a global reset */ e1000_reset_hw(hw); if (adapter->hw.mac.type >= igb_mac_min) { E1000_WRITE_REG(hw, E1000_WUC, 0); } else { E1000_WRITE_REG(hw, E1000_WUFC, 0); em_disable_aspm(adapter); } if (adapter->flags & IGB_MEDIA_RESET) { e1000_setup_init_funcs(hw, TRUE); e1000_get_bus_info(hw); adapter->flags &= ~IGB_MEDIA_RESET; } /* and a re-init */ if (e1000_init_hw(hw) < 0) { device_printf(dev, "Hardware Initialization Failed\n"); return; } if (adapter->hw.mac.type >= igb_mac_min) igb_init_dmac(adapter, pba); E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); } #define RSSKEYLEN 10 static void em_initialize_rss_mapping(struct adapter *adapter) { uint8_t rss_key[4 * RSSKEYLEN]; uint32_t reta = 0; struct e1000_hw *hw = &adapter->hw; int i; /* * Configure RSS key */ arc4rand(rss_key, sizeof(rss_key), 0); for (i = 0; i < RSSKEYLEN; ++i) { uint32_t rssrk = 0; rssrk = EM_RSSRK_VAL(rss_key, i); E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk); } /* * Configure RSS redirect table in following fashion: * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] */ for (i = 0; i < sizeof(reta); ++i) { uint32_t q; q = (i % adapter->rx_num_queues) << 7; reta |= q << (8 * i); } for (i = 0; i < 32; ++i) E1000_WRITE_REG(hw, E1000_RETA(i), reta); E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | E1000_MRQC_RSS_FIELD_IPV4_TCP | E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | E1000_MRQC_RSS_FIELD_IPV6_EX | E1000_MRQC_RSS_FIELD_IPV6); } static void igb_initialize_rss_mapping(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; int i; int queue_id; u32 reta; u32 rss_key[10], mrqc, shift = 0; /* XXX? */ if (adapter->hw.mac.type == e1000_82575) shift = 6; /* * The redirection table controls which destination * queue each bucket redirects traffic to. * Each DWORD represents four queues, with the LSB * being the first queue in the DWORD. * * This just allocates buckets to queues using round-robin * allocation. * * NOTE: It Just Happens to line up with the default * RSS allocation method. */ /* Warning FM follows */ reta = 0; for (i = 0; i < 128; i++) { #ifdef RSS queue_id = rss_get_indirection_to_bucket(i); /* * If we have more queues than buckets, we'll * end up mapping buckets to a subset of the * queues. * * If we have more buckets than queues, we'll * end up instead assigning multiple buckets * to queues. * * Both are suboptimal, but we need to handle * the case so we don't go out of bounds * indexing arrays and such. */ queue_id = queue_id % adapter->rx_num_queues; #else queue_id = (i % adapter->rx_num_queues); #endif /* Adjust if required */ queue_id = queue_id << shift; /* * The low 8 bits are for hash value (n+0); * The next 8 bits are for hash value (n+1), etc. */ reta = reta >> 8; reta = reta | ( ((uint32_t) queue_id) << 24); if ((i & 3) == 3) { E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta); reta = 0; } } /* Now fill in hash table */ /* * MRQC: Multiple Receive Queues Command * Set queuing to RSS control, number depends on the device. */ mrqc = E1000_MRQC_ENABLE_RSS_8Q; #ifdef RSS /* XXX ew typecasting */ rss_getkey((uint8_t *) &rss_key); #else arc4rand(&rss_key, sizeof(rss_key), 0); #endif for (i = 0; i < 10; i++) E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key[i]); /* * Configure the RSS fields to hash upon. */ mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV4_TCP); mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 | E1000_MRQC_RSS_FIELD_IPV6_TCP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP | E1000_MRQC_RSS_FIELD_IPV6_UDP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); E1000_WRITE_REG(hw, E1000_MRQC, mrqc); } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int em_setup_interface(if_ctx_t ctx) { struct ifnet *ifp = iflib_get_ifp(ctx); struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; uint64_t cap = 0; INIT_DEBUGOUT("em_setup_interface: begin"); /* TSO parameters */ if_sethwtsomax(ifp, IP_MAXPACKET); /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */ if_sethwtsomaxsegcount(ifp, EM_MAX_SCATTER - 5); if_sethwtsomaxsegsize(ifp, EM_TSO_SEG_SIZE); /* Single Queue */ if (adapter->tx_num_queues == 1) { if_setsendqlen(ifp, scctx->isc_ntxd[0] - 1); if_setsendqready(ifp); } cap = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4; cap |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; /* * Tell the upper layer(s) we * support full VLAN capability */ if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); if_setcapabilitiesbit(ifp, cap, 0); /* * Don't turn this on by default, if vlans are * created on another pseudo device (eg. lagg) * then vlan events are not passed thru, breaking * operation, but with HW FILTER off it works. If * using vlans directly on the em driver you can * enable this and get full hardware tag filtering. */ if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0); /* Enable only WOL MAGIC by default */ if (adapter->wol) { if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, IFCAP_WOL_MCAST| IFCAP_WOL_UCAST); } else { if_setcapenablebit(ifp, 0, IFCAP_WOL_MAGIC | IFCAP_WOL_MCAST| IFCAP_WOL_UCAST); } /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { u_char fiber_type = IFM_1000_SX; /* default type */ if (adapter->hw.mac.type == e1000_82545) fiber_type = IFM_1000_LX; ifmedia_add(adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | fiber_type, 0, NULL); } else { ifmedia_add(adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); return (0); } static int em_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; int error = E1000_SUCCESS; struct em_tx_queue *que; int i, j; MPASS(adapter->tx_num_queues > 0); MPASS(adapter->tx_num_queues == ntxqsets); /* First allocate the top level queue structs */ if (!(adapter->tx_queues = - (struct em_tx_queue *) malloc(sizeof(struct em_tx_queue) * - adapter->tx_num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(iflib_get_dev(ctx), "Unable to allocate queue memory\n"); + (struct em_tx_queue *) mallocarray(adapter->tx_num_queues, + sizeof(struct em_tx_queue), M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(iflib_get_dev(ctx), "Unable to allocate queue memory\n"); return(ENOMEM); } for (i = 0, que = adapter->tx_queues; i < adapter->tx_num_queues; i++, que++) { /* Set up some basics */ struct tx_ring *txr = &que->txr; txr->adapter = que->adapter = adapter; que->me = txr->me = i; /* Allocate report status array */ - if (!(txr->tx_rsq = (qidx_t *) malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_DEVBUF, M_NOWAIT | M_ZERO))) { + if (!(txr->tx_rsq = (qidx_t *) mallocarray(scctx->isc_ntxd[0], + sizeof(qidx_t), M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "failed to allocate rs_idxs memory\n"); error = ENOMEM; goto fail; } for (j = 0; j < scctx->isc_ntxd[0]; j++) txr->tx_rsq[j] = QIDX_INVALID; /* get the virtual and physical address of the hardware queues */ txr->tx_base = (struct e1000_tx_desc *)vaddrs[i*ntxqs]; txr->tx_paddr = paddrs[i*ntxqs]; } device_printf(iflib_get_dev(ctx), "allocated for %d tx_queues\n", adapter->tx_num_queues); return (0); fail: em_if_queues_free(ctx); return (error); } static int em_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct adapter *adapter = iflib_get_softc(ctx); int error = E1000_SUCCESS; struct em_rx_queue *que; int i; MPASS(adapter->rx_num_queues > 0); MPASS(adapter->rx_num_queues == nrxqsets); /* First allocate the top level queue structs */ if (!(adapter->rx_queues = - (struct em_rx_queue *) malloc(sizeof(struct em_rx_queue) * - adapter->rx_num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { + (struct em_rx_queue *) mallocarray(adapter->rx_num_queues, + sizeof(struct em_rx_queue), M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate queue memory\n"); error = ENOMEM; goto fail; } for (i = 0, que = adapter->rx_queues; i < nrxqsets; i++, que++) { /* Set up some basics */ struct rx_ring *rxr = &que->rxr; rxr->adapter = que->adapter = adapter; rxr->que = que; que->me = rxr->me = i; /* get the virtual and physical address of the hardware queues */ rxr->rx_base = (union e1000_rx_desc_extended *)vaddrs[i*nrxqs]; rxr->rx_paddr = paddrs[i*nrxqs]; } device_printf(iflib_get_dev(ctx), "allocated for %d rx_queues\n", adapter->rx_num_queues); return (0); fail: em_if_queues_free(ctx); return (error); } static void em_if_queues_free(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct em_tx_queue *tx_que = adapter->tx_queues; struct em_rx_queue *rx_que = adapter->rx_queues; if (tx_que != NULL) { for (int i = 0; i < adapter->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; if (txr->tx_rsq == NULL) break; free(txr->tx_rsq, M_DEVBUF); txr->tx_rsq = NULL; } free(adapter->tx_queues, M_DEVBUF); adapter->tx_queues = NULL; } if (rx_que != NULL) { free(adapter->rx_queues, M_DEVBUF); adapter->rx_queues = NULL; } em_release_hw_control(adapter); if (adapter->mta != NULL) { free(adapter->mta, M_DEVBUF); } } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void em_initialize_transmit_unit(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct em_tx_queue *que; struct tx_ring *txr; struct e1000_hw *hw = &adapter->hw; u32 tctl, txdctl = 0, tarc, tipg = 0; INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); for (int i = 0; i < adapter->tx_num_queues; i++, txr++) { u64 bus_addr; caddr_t offp, endp; que = &adapter->tx_queues[i]; txr = &que->txr; bus_addr = txr->tx_paddr; /* Clear checksum offload context. */ offp = (caddr_t)&txr->csum_flags; endp = (caddr_t)(txr + 1); bzero(offp, endp - offp); /* Base and Len of TX Ring */ E1000_WRITE_REG(hw, E1000_TDLEN(i), scctx->isc_ntxd[0] * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(hw, E1000_TDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_TDBAL(i), (u32)bus_addr); /* Init the HEAD/TAIL indices */ E1000_WRITE_REG(hw, E1000_TDT(i), 0); E1000_WRITE_REG(hw, E1000_TDH(i), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)), E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); txdctl = 0; /* clear txdctl */ txdctl |= 0x1f; /* PTHRESH */ txdctl |= 1 << 8; /* HTHRESH */ txdctl |= 1 << 16;/* WTHRESH */ txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */ txdctl |= E1000_TXDCTL_GRAN; txdctl |= 1 << 25; /* LWTHRESH */ E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); } /* Set the default values for the Tx Inter Packet Gap timer */ switch (adapter->hw.mac.type) { case e1000_80003es2lan: tipg = DEFAULT_82543_TIPG_IPGR1; tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; case e1000_82542: tipg = DEFAULT_82542_TIPG_IPGT; tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; default: if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) tipg = DEFAULT_82543_TIPG_IPGT_FIBER; else tipg = DEFAULT_82543_TIPG_IPGT_COPPER; tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; } E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); if(adapter->hw.mac.type >= e1000_82540) E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value); if ((adapter->hw.mac.type == e1000_82571) || (adapter->hw.mac.type == e1000_82572)) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= TARC_SPEED_MODE_BIT; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } else if (adapter->hw.mac.type == e1000_80003es2lan) { /* errata: program both queues to unweighted RR */ tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } else if (adapter->hw.mac.type == e1000_82574) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= TARC_ERRATA_BIT; if ( adapter->tx_num_queues > 1) { tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX); E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } else E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } if (adapter->tx_int_delay.value > 0) adapter->txd_cmd |= E1000_TXD_CMD_IDE; /* Program the Transmit Control Register */ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); if (adapter->hw.mac.type >= e1000_82571) tctl |= E1000_TCTL_MULR; /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); /* SPT and KBL errata workarounds */ if (hw->mac.type == e1000_pch_spt) { u32 reg; reg = E1000_READ_REG(hw, E1000_IOSFPC); reg |= E1000_RCTL_RDMTS_HEX; E1000_WRITE_REG(hw, E1000_IOSFPC, reg); /* i218-i219 Specification Update 1.5.4.4 */ reg = E1000_READ_REG(hw, E1000_TARC(0)); reg &= E1000_TARC0_CB_MULTIQ_3_REQ; reg |= E1000_TARC0_CB_MULTIQ_2_REQ; E1000_WRITE_REG(hw, E1000_TARC(0), reg); } } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void em_initialize_receive_unit(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct ifnet *ifp = iflib_get_ifp(ctx); struct e1000_hw *hw = &adapter->hw; struct em_rx_queue *que; int i; u32 rctl, rxcsum, rfctl; INIT_DEBUGOUT("em_initialize_receive_units: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(hw, E1000_RCTL); /* Do not disable if ever enabled on this hardware */ if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583)) E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Do not store bad packets */ rctl &= ~E1000_RCTL_SBP; /* Enable Long Packet receive */ if (if_getmtu(ifp) > ETHERMTU) rctl |= E1000_RCTL_LPE; else rctl &= ~E1000_RCTL_LPE; /* Strip the CRC */ if (!em_disable_crc_stripping) rctl |= E1000_RCTL_SECRC; if (adapter->hw.mac.type >= e1000_82540) { E1000_WRITE_REG(&adapter->hw, E1000_RADV, adapter->rx_abs_int_delay.value); /* * Set the interrupt throttling rate. Value is calculated * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */ E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); } E1000_WRITE_REG(&adapter->hw, E1000_RDTR, adapter->rx_int_delay.value); /* Use extended rx descriptor formats */ rfctl = E1000_READ_REG(hw, E1000_RFCTL); rfctl |= E1000_RFCTL_EXTEN; /* * When using MSIX interrupts we need to throttle * using the EITR register (82574 only) */ if (hw->mac.type == e1000_82574) { for (int i = 0; i < 4; i++) E1000_WRITE_REG(hw, E1000_EITR_82574(i), DEFAULT_ITR); /* Disable accelerated acknowledge */ rfctl |= E1000_RFCTL_ACK_DIS; } E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); if (if_getcapenable(ifp) & IFCAP_RXCSUM && adapter->hw.mac.type >= e1000_82543) { if (adapter->tx_num_queues > 1) { if (adapter->hw.mac.type >= igb_mac_min) { rxcsum |= E1000_RXCSUM_PCSD; if (hw->mac.type != e1000_82575) rxcsum |= E1000_RXCSUM_CRCOFL; } else rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL | E1000_RXCSUM_PCSD; } else { if (adapter->hw.mac.type >= igb_mac_min) rxcsum |= E1000_RXCSUM_IPPCSE; else rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL; if (adapter->hw.mac.type > e1000_82575) rxcsum |= E1000_RXCSUM_CRCOFL; } } else rxcsum &= ~E1000_RXCSUM_TUOFL; E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); if (adapter->rx_num_queues > 1) { if (adapter->hw.mac.type >= igb_mac_min) igb_initialize_rss_mapping(adapter); else em_initialize_rss_mapping(adapter); } /* * XXX TEMPORARY WORKAROUND: on some systems with 82573 * long latencies are observed, like Lenovo X60. This * change eliminates the problem, but since having positive * values in RDTR is a known source of problems on other * platforms another solution is being sought. */ if (hw->mac.type == e1000_82573) E1000_WRITE_REG(hw, E1000_RDTR, 0x20); for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; /* Setup the Base and Length of the Rx Descriptor Ring */ u64 bus_addr = rxr->rx_paddr; #if 0 u32 rdt = adapter->rx_num_queues -1; /* default */ #endif E1000_WRITE_REG(hw, E1000_RDLEN(i), scctx->isc_nrxd[0] * sizeof(union e1000_rx_desc_extended)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); /* Setup the Head and Tail Descriptor Pointers */ E1000_WRITE_REG(hw, E1000_RDH(i), 0); E1000_WRITE_REG(hw, E1000_RDT(i), 0); } /* * Set PTHRESH for improved jumbo performance * According to 10.2.5.11 of Intel 82574 Datasheet, * RXDCTL(1) is written whenever RXDCTL(0) is written. * Only write to RXDCTL(1) if there is a need for different * settings. */ if (((adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_pch2lan) || (adapter->hw.mac.type == e1000_ich10lan)) && (if_getmtu(ifp) > ETHERMTU)) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); } else if (adapter->hw.mac.type == e1000_82574) { for (int i = 0; i < adapter->rx_num_queues; i++) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= 0x20; /* PTHRESH */ rxdctl |= 4 << 8; /* HTHRESH */ rxdctl |= 4 << 16;/* WTHRESH */ rxdctl |= 1 << 24; /* Switch to granularity */ E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } } else if (adapter->hw.mac.type >= igb_mac_min) { u32 psize, srrctl = 0; if (if_getmtu(ifp) > ETHERMTU) { /* Set maximum packet len */ if (adapter->rx_mbuf_sz <= 4096) { srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; } else if (adapter->rx_mbuf_sz > 4096) { srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; } psize = scctx->isc_max_frame_size; /* are we on a vlan? */ if (ifp->if_vlantrunk != NULL) psize += VLAN_TAG_SIZE; E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); } else { srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_2048; } /* * If TX flow control is disabled and there's >1 queue defined, * enable DROP. * * This drops frames rather than hanging the RX MAC for all queues. */ if ((adapter->rx_num_queues > 1) && (adapter->fc == e1000_fc_none || adapter->fc == e1000_fc_rx_pause)) { srrctl |= E1000_SRRCTL_DROP_EN; } /* Setup the Base and Length of the Rx Descriptor Rings */ for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; u64 bus_addr = rxr->rx_paddr; u32 rxdctl; #ifdef notyet /* Configure for header split? -- ignore for now */ rxr->hdr_split = igb_header_split; #else srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; #endif E1000_WRITE_REG(hw, E1000_RDLEN(i), scctx->isc_nrxd[0] * sizeof(struct e1000_rx_desc)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (uint32_t)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr); E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); /* Enable this Queue */ rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; rxdctl &= 0xFFF00000; rxdctl |= IGB_RX_PTHRESH; rxdctl |= IGB_RX_HTHRESH << 8; rxdctl |= IGB_RX_WTHRESH << 16; E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } } else if (adapter->hw.mac.type >= e1000_pch2lan) { if (if_getmtu(ifp) > ETHERMTU) e1000_lv_jumbo_workaround_ich8lan(hw, TRUE); else e1000_lv_jumbo_workaround_ich8lan(hw, FALSE); } /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; if (adapter->hw.mac.type < igb_mac_min) { if (adapter->rx_mbuf_sz == MCLBYTES) rctl |= E1000_RCTL_SZ_2048; else if (adapter->rx_mbuf_sz == MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; /* ensure we clear use DTYPE of 00 here */ rctl &= ~0x00000C00; } /* Write out the settings */ E1000_WRITE_REG(hw, E1000_RCTL, rctl); return; } static void em_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u32 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; } static void em_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u32 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; } static void em_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 reg; /* * We get here thru init_locked, meaning * a soft reset, this has already cleared * the VFTA and other state, so if there * have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* * A soft reset zero's out the VFTA, so * we need to repopulate it now. */ for (int i = 0; i < EM_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, i, adapter->shadow_vfta[i]); reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Enable the Filter Table */ reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } static void em_if_enable_intr(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; u32 ims_mask = IMS_ENABLE_MASK; if (hw->mac.type == e1000_82574) { E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK); ims_mask |= adapter->ims; } else if (adapter->intr_type == IFLIB_INTR_MSIX && hw->mac.type >= igb_mac_min) { u32 mask = (adapter->que_mask | adapter->link_mask); E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask); E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask); E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask); ims_mask = E1000_IMS_LSC; } E1000_WRITE_REG(hw, E1000_IMS, ims_mask); } static void em_if_disable_intr(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; if (adapter->intr_type == IFLIB_INTR_MSIX) { if (hw->mac.type >= igb_mac_min) E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0); E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0); } E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka * to disable special hardware management features */ static void em_init_manageability(struct adapter *adapter) { /* A shared code workaround */ #define E1000_82542_MANC2H E1000_MANC2H if (adapter->has_manage) { int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); /* enable receiving management packets to the host */ manc |= E1000_MANC_EN_MNG2HOST; #define E1000_MNG2HOST_PORT_623 (1 << 5) #define E1000_MNG2HOST_PORT_664 (1 << 6) manc2h |= E1000_MNG2HOST_PORT_623; manc2h |= E1000_MNG2HOST_PORT_664; E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void em_release_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; manc &= ~E1000_MANC_EN_MNG2HOST; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means * that the driver is loaded. For AMT version type f/w * this means that the network i/f is open. */ static void em_get_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (adapter->vf_ifp) return; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm | E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); } /* * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is no longer loaded. For AMT versions of the * f/w this means that the network i/f is closed. */ static void em_release_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (!adapter->has_manage) return; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm & ~E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); return; } static int em_is_valid_ether_addr(u8 *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (FALSE); } return (TRUE); } /* ** Parse the interface capabilities with regard ** to both system management and wake-on-lan for ** later use. */ static void em_get_wakeup(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); u16 eeprom_data = 0, device_id, apme_mask; adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); apme_mask = EM_EEPROM_APME; switch (adapter->hw.mac.type) { case e1000_82542: case e1000_82543: break; case e1000_82544: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL2_REG, 1, &eeprom_data); apme_mask = EM_82544_APME; break; case e1000_82546: case e1000_82546_rev_3: if (adapter->hw.bus.func == 1) { e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; case e1000_82573: case e1000_82583: adapter->has_amt = TRUE; /* FALLTHROUGH */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: if (adapter->hw.bus.func == 1) { e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_82575: /* listing all igb devices */ case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: apme_mask = E1000_WUC_APME; adapter->has_amt = TRUE; eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC); break; default: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; } if (eeprom_data & apme_mask) adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); /* * We have the eeprom settings, now apply the special cases * where the eeprom may be wrong or the board won't support * wake on lan on a particular port */ device_id = pci_get_device(dev); switch (device_id) { case E1000_DEV_ID_82546GB_PCIE: adapter->wol = 0; break; case E1000_DEV_ID_82546EB_FIBER: case E1000_DEV_ID_82546GB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->wol = 0; break; case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; case E1000_DEV_ID_82571EB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->wol = 0; break; case E1000_DEV_ID_82571EB_QUAD_COPPER: case E1000_DEV_ID_82571EB_QUAD_FIBER: case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; } return; } /* * Enable PCI Wake On Lan capability */ static void em_enable_wakeup(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); if_t ifp = iflib_get_ifp(ctx); int error = 0; u32 pmc, ctrl, ctrl_ext, rctl; u16 status; if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0) return; /* * Determine type of Wakeup: note that wol * is set with all bits on by default. */ if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0) adapter->wol &= ~E1000_WUFC_MAG; if ((if_getcapenable(ifp) & IFCAP_WOL_UCAST) == 0) adapter->wol &= ~E1000_WUFC_EX; if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0) adapter->wol &= ~E1000_WUFC_MC; else { rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); } if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC))) goto pme; /* Advertise the wakeup capability */ ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); /* Keep the laser running on Fiber adapters */ if (adapter->hw.phy.media_type == e1000_media_type_fiber || adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); } if ((adapter->hw.mac.type == e1000_ich8lan) || (adapter->hw.mac.type == e1000_pchlan) || (adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_ich10lan)) e1000_suspend_workarounds_ich8lan(&adapter->hw); if ( adapter->hw.mac.type >= e1000_pchlan) { error = em_enable_phy_wakeup(adapter); if (error) goto pme; } else { /* Enable wakeup by the MAC */ E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); } if (adapter->hw.phy.type == e1000_phy_igp_3) e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); pme: status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if (!error && (if_getcapenable(ifp) & IFCAP_WOL)) status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); return; } /* * WOL in the newer chipset interfaces (pchlan) * require thing to be copied into the phy */ static int em_enable_phy_wakeup(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mreg, ret = 0; u16 preg; /* copy MAC RARs to PHY RARs */ e1000_copy_rx_addrs_to_phy_ich8lan(hw); /* copy MAC MTA to PHY MTA */ for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); e1000_write_phy_reg(hw, BM_MTA(i) + 1, (u16)((mreg >> 16) & 0xFFFF)); } /* configure PHY Rx Control register */ e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); mreg = E1000_READ_REG(hw, E1000_RCTL); if (mreg & E1000_RCTL_UPE) preg |= BM_RCTL_UPE; if (mreg & E1000_RCTL_MPE) preg |= BM_RCTL_MPE; preg &= ~(BM_RCTL_MO_MASK); if (mreg & E1000_RCTL_MO_3) preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) << BM_RCTL_MO_SHIFT); if (mreg & E1000_RCTL_BAM) preg |= BM_RCTL_BAM; if (mreg & E1000_RCTL_PMCF) preg |= BM_RCTL_PMCF; mreg = E1000_READ_REG(hw, E1000_CTRL); if (mreg & E1000_CTRL_RFCE) preg |= BM_RCTL_RFCE; e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); /* enable PHY wakeup in MAC register */ E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN | E1000_WUC_APME); E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); /* configure and enable PHY wakeup in PHY registers */ e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); /* activate PHY wakeup */ ret = hw->phy.ops.acquire(hw); if (ret) { printf("Could not acquire PHY\n"); return ret; } e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); if (ret) { printf("Could not read PHY page 769\n"); goto out; } preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); if (ret) printf("Could not set PHY Host Wakeup bit\n"); out: hw->phy.ops.release(hw); return ret; } static void em_if_led_func(if_ctx_t ctx, int onoff) { struct adapter *adapter = iflib_get_softc(ctx); if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); } else { e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } } /* * Disable the L0S and L1 LINK states */ static void em_disable_aspm(struct adapter *adapter) { int base, reg; u16 link_cap,link_ctrl; device_t dev = adapter->dev; switch (adapter->hw.mac.type) { case e1000_82573: case e1000_82574: case e1000_82583: break; default: return; } if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0) return; reg = base + PCIER_LINK_CAP; link_cap = pci_read_config(dev, reg, 2); if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0) return; reg = base + PCIER_LINK_CTL; link_ctrl = pci_read_config(dev, reg, 2); link_ctrl &= ~PCIEM_LINK_CTL_ASPMC; pci_write_config(dev, reg, link_ctrl, 2); return; } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void em_update_stats_counters(struct adapter *adapter) { if(adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); } adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); /* ** For watchdog management we need to know if we have been ** paused during the last interval, so capture that here. */ adapter->shared->isc_pause_frames = adapter->stats.xoffrxc; adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32); adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32); adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); /* Interrupt Counts */ adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC); adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC); adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC); adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC); adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC); adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC); adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC); adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC); adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC); if (adapter->hw.mac.type >= e1000_82543) { adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, E1000_RXERRC); adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, E1000_TNCRS); adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, E1000_CEXTERR); adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, E1000_TSCTC); adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, E1000_TSCTFC); } } static uint64_t em_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_COLLISIONS: return (adapter->stats.colc); case IFCOUNTER_IERRORS: return (adapter->dropped_pkts + adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc + adapter->stats.mpc + adapter->stats.cexterr); case IFCOUNTER_OERRORS: return (adapter->stats.ecol + adapter->stats.latecol + adapter->watchdog_events); default: return (if_get_counter_default(ifp, cnt)); } } /* Export a single 32-bit register via a read-only sysctl. */ static int em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; u_int val; adapter = oidp->oid_arg1; val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* * Add sysctl variables, one per statistic, to the system. */ static void em_add_hw_stats(struct adapter *adapter) { device_t dev = iflib_get_dev(adapter->ctx); struct em_tx_queue *tx_que = adapter->tx_queues; struct em_rx_queue *rx_que = adapter->rx_queues; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = &adapter->stats; struct sysctl_oid *stat_node, *queue_node, *int_node; struct sysctl_oid_list *stat_list, *queue_list, *int_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSIX IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", CTLFLAG_RD, &adapter->mbuf_defrag_failed, "Defragmenting mbuf chain failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &adapter->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL, em_sysctl_reg_handler, "IU", "Device Control Register"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL, em_sysctl_reg_handler, "IU", "Receiver Control Register"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); for (int i = 0; i < adapter->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "TX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq", CTLFLAG_RD, &txr->tx_irq, "Queue MSI-X Transmit Interrupts"); } for (int j = 0; j < adapter->rx_num_queues; j++, rx_que++) { struct rx_ring *rxr = &rx_que->rxr; snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", j); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "RX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq", CTLFLAG_RD, &rxr->rx_irq, "Queue MSI-X Receive Interrupts"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &adapter->stats.symerrs, "Symbol Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &adapter->stats.sec, "Sequence Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &adapter->stats.dc, "Defer Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &adapter->stats.mpc, "Missed Packets"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &adapter->stats.rnbc, "Receive No Buffers"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &adapter->stats.ruc, "Receive Undersize"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &adapter->stats.rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &adapter->stats.roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &adapter->stats.rjc, "Recevied Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &adapter->stats.rxerrc, "Receive Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &adapter->stats.crcerrs, "CRC errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &adapter->stats.algnerrc, "Alignment Errors"); /* On 82575 these are collision counts */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &adapter->stats.cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &adapter->stats.xonrxc, "XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &adapter->stats.xontxc, "XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &adapter->stats.xoffrxc, "XOFF Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &adapter->stats.xofftxc, "XOFF Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &adapter->stats.tpr, "Total Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &adapter->stats.gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &adapter->stats.prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &adapter->stats.prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &adapter->stats.prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &adapter->stats.prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &adapter->stats.prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &adapter->stats.gorc, "Good Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &adapter->stats.gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &adapter->stats.tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &adapter->stats.gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &adapter->stats.bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &adapter->stats.mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &adapter->stats.ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &adapter->stats.ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &adapter->stats.ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &adapter->stats.ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &adapter->stats.ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &adapter->stats.tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &adapter->stats.tsctfc, "TSO Contexts Failed"); /* Interrupt Stats */ int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", CTLFLAG_RD, NULL, "Interrupt Statistics"); int_list = SYSCTL_CHILDREN(int_node); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts", CTLFLAG_RD, &adapter->stats.iac, "Interrupt Assertion Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", CTLFLAG_RD, &adapter->stats.icrxptc, "Interrupt Cause Rx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", CTLFLAG_RD, &adapter->stats.icrxatc, "Interrupt Cause Rx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", CTLFLAG_RD, &adapter->stats.ictxptc, "Interrupt Cause Tx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", CTLFLAG_RD, &adapter->stats.ictxatc, "Interrupt Cause Tx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", CTLFLAG_RD, &adapter->stats.ictxqec, "Interrupt Cause Tx Queue Empty Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", CTLFLAG_RD, &adapter->stats.ictxqmtc, "Interrupt Cause Tx Queue Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", CTLFLAG_RD, &adapter->stats.icrxdmtc, "Interrupt Cause Rx Desc Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun", CTLFLAG_RD, &adapter->stats.icrxoc, "Interrupt Cause Receiver Overrun Count"); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) em_print_nvm_info(adapter); return (error); } static void em_print_nvm_info(struct adapter *adapter) { u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } printf("\n"); } static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { struct em_int_delay_info *info; struct adapter *adapter; u32 regval; int error, usecs, ticks; info = (struct em_int_delay_info *) arg1; usecs = info->value; error = sysctl_handle_int(oidp, &usecs, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) return (EINVAL); info->value = usecs; ticks = EM_USECS_TO_TICKS(usecs); if (info->offset == E1000_ITR) /* units are 256ns here */ ticks *= 4; adapter = info->adapter; regval = E1000_READ_OFFSET(&adapter->hw, info->offset); regval = (regval & ~0xffff) | (ticks & 0xffff); /* Handle a few special cases. */ switch (info->offset) { case E1000_RDTR: break; case E1000_TIDV: if (ticks == 0) { adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; /* Don't write 0 into the TIDV register. */ regval++; } else adapter->txd_cmd |= E1000_TXD_CMD_IDE; break; } E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); return (0); } static void em_add_int_delay_sysctl(struct adapter *adapter, const char *name, const char *description, struct em_int_delay_info *info, int offset, int value) { info->adapter = adapter; info->offset = offset; info->value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, em_sysctl_int_delay, "I", description); } /* * Set flow control using sysctl: * Flow control values: * 0 - off * 1 - rx pause * 2 - tx pause * 3 - full */ static int em_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error; static int input = 3; /* default is full */ struct adapter *adapter = (struct adapter *) arg1; error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (input == adapter->fc) /* no change? */ return (error); switch (input) { case e1000_fc_rx_pause: case e1000_fc_tx_pause: case e1000_fc_full: case e1000_fc_none: adapter->hw.fc.requested_mode = input; adapter->fc = input; break; default: /* Do nothing */ return (error); } adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; e1000_force_mac_fc(&adapter->hw); return (error); } /* * Manage Energy Efficient Ethernet: * Control values: * 0/1 - enabled/disabled */ static int em_sysctl_eee(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error, value; value = adapter->hw.dev_spec.ich8lan.eee_disable; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0); em_if_init(adapter->ctx); return (0); } static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { adapter = (struct adapter *) arg1; em_print_debug_info(adapter); } return (error); } static int em_get_rs(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error; int result; result = 0; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr || result != 1) return (error); em_dump_rs(adapter); return (error); } static void em_if_debug(if_ctx_t ctx) { em_dump_rs(iflib_get_softc(ctx)); } /* * This routine is meant to be fluid, add whatever is * needed for debugging a problem. -jfv */ static void em_print_debug_info(struct adapter *adapter) { device_t dev = iflib_get_dev(adapter->ctx); struct ifnet *ifp = iflib_get_ifp(adapter->ctx); struct tx_ring *txr = &adapter->tx_queues->txr; struct rx_ring *rxr = &adapter->rx_queues->rxr; if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) printf("Interface is RUNNING "); else printf("Interface is NOT RUNNING\n"); if (if_getdrvflags(ifp) & IFF_DRV_OACTIVE) printf("and INACTIVE\n"); else printf("and ACTIVE\n"); for (int i = 0; i < adapter->tx_num_queues; i++, txr++) { device_printf(dev, "TX Queue %d ------\n", i); device_printf(dev, "hw tdh = %d, hw tdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_TDH(i)), E1000_READ_REG(&adapter->hw, E1000_TDT(i))); } for (int j=0; j < adapter->rx_num_queues; j++, rxr++) { device_printf(dev, "RX Queue %d ------\n", j); device_printf(dev, "hw rdh = %d, hw rdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_RDH(j)), E1000_READ_REG(&adapter->hw, E1000_RDT(j))); } } /* * 82574 only: * Write a new value to the EEPROM increasing the number of MSIX * vectors from 3 to 5, for proper multiqueue support. */ static void em_enable_vectors_82574(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; device_t dev = iflib_get_dev(ctx); u16 edata; e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); printf("Current cap: %#06x\n", edata); if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) { device_printf(dev, "Writing to eeprom: increasing " "reported MSIX vectors from 3 to 5...\n"); edata &= ~(EM_NVM_MSIX_N_MASK); edata |= 4 << EM_NVM_MSIX_N_SHIFT; e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); e1000_update_nvm_checksum(hw); device_printf(dev, "Writing to eeprom: done\n"); } } Index: head/sys/dev/ixl/if_ixlv.c =================================================================== --- head/sys/dev/ixl/if_ixlv.c (revision 327827) +++ head/sys/dev/ixl/if_ixlv.c (revision 327828) @@ -1,3104 +1,3104 @@ /****************************************************************************** Copyright (c) 2013-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "ixl.h" #include "ixlv.h" /********************************************************************* * Driver version *********************************************************************/ char ixlv_driver_version[] = "1.4.12-k"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into ixlv_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static ixl_vendor_info_t ixlv_vendor_info_array[] = { {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_VF, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_X722_VF, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_X722_A0_VF, 0, 0, 0}, /* required last entry */ {0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings *********************************************************************/ static char *ixlv_strings[] = { "Intel(R) Ethernet Connection XL710/X722 VF Driver" }; /********************************************************************* * Function prototypes *********************************************************************/ static int ixlv_probe(device_t); static int ixlv_attach(device_t); static int ixlv_detach(device_t); static int ixlv_shutdown(device_t); static void ixlv_init_locked(struct ixlv_sc *); static int ixlv_allocate_pci_resources(struct ixlv_sc *); static void ixlv_free_pci_resources(struct ixlv_sc *); static int ixlv_assign_msix(struct ixlv_sc *); static int ixlv_init_msix(struct ixlv_sc *); static int ixlv_init_taskqueue(struct ixlv_sc *); static int ixlv_setup_queues(struct ixlv_sc *); static void ixlv_config_rss(struct ixlv_sc *); static void ixlv_stop(struct ixlv_sc *); static void ixlv_add_multi(struct ixl_vsi *); static void ixlv_del_multi(struct ixl_vsi *); static void ixlv_free_queues(struct ixl_vsi *); static int ixlv_setup_interface(device_t, struct ixlv_sc *); static int ixlv_teardown_adminq_msix(struct ixlv_sc *); static int ixlv_media_change(struct ifnet *); static void ixlv_media_status(struct ifnet *, struct ifmediareq *); static void ixlv_local_timer(void *); static int ixlv_add_mac_filter(struct ixlv_sc *, u8 *, u16); static int ixlv_del_mac_filter(struct ixlv_sc *sc, u8 *macaddr); static void ixlv_init_filters(struct ixlv_sc *); static void ixlv_free_filters(struct ixlv_sc *); static void ixlv_msix_que(void *); static void ixlv_msix_adminq(void *); static void ixlv_do_adminq(void *, int); static void ixlv_do_adminq_locked(struct ixlv_sc *sc); static void ixlv_handle_que(void *, int); static int ixlv_reset(struct ixlv_sc *); static int ixlv_reset_complete(struct i40e_hw *); static void ixlv_set_queue_rx_itr(struct ixl_queue *); static void ixlv_set_queue_tx_itr(struct ixl_queue *); static void ixl_init_cmd_complete(struct ixl_vc_cmd *, void *, enum i40e_status_code); static void ixlv_configure_itr(struct ixlv_sc *); static void ixlv_enable_adminq_irq(struct i40e_hw *); static void ixlv_disable_adminq_irq(struct i40e_hw *); static void ixlv_enable_queue_irq(struct i40e_hw *, int); static void ixlv_disable_queue_irq(struct i40e_hw *, int); static void ixlv_setup_vlan_filters(struct ixlv_sc *); static void ixlv_register_vlan(void *, struct ifnet *, u16); static void ixlv_unregister_vlan(void *, struct ifnet *, u16); static void ixlv_init_hw(struct ixlv_sc *); static int ixlv_setup_vc(struct ixlv_sc *); static int ixlv_vf_config(struct ixlv_sc *); static void ixlv_cap_txcsum_tso(struct ixl_vsi *, struct ifnet *, int); static void ixlv_add_sysctls(struct ixlv_sc *); #ifdef IXL_DEBUG static int ixlv_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS); static int ixlv_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS); #endif /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t ixlv_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ixlv_probe), DEVMETHOD(device_attach, ixlv_attach), DEVMETHOD(device_detach, ixlv_detach), DEVMETHOD(device_shutdown, ixlv_shutdown), {0, 0} }; static driver_t ixlv_driver = { "ixlv", ixlv_methods, sizeof(struct ixlv_sc), }; devclass_t ixlv_devclass; DRIVER_MODULE(ixlv, pci, ixlv_driver, ixlv_devclass, 0, 0); MODULE_DEPEND(ixlv, pci, 1, 1, 1); MODULE_DEPEND(ixlv, ether, 1, 1, 1); /* ** TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, ixlv, CTLFLAG_RD, 0, "IXLV driver parameters"); /* ** Number of descriptors per ring: ** - TX and RX are the same size */ static int ixlv_ringsz = IXL_DEFAULT_RING; TUNABLE_INT("hw.ixlv.ringsz", &ixlv_ringsz); SYSCTL_INT(_hw_ixlv, OID_AUTO, ring_size, CTLFLAG_RDTUN, &ixlv_ringsz, 0, "Descriptor Ring Size"); /* Set to zero to auto calculate */ int ixlv_max_queues = 0; TUNABLE_INT("hw.ixlv.max_queues", &ixlv_max_queues); SYSCTL_INT(_hw_ixlv, OID_AUTO, max_queues, CTLFLAG_RDTUN, &ixlv_max_queues, 0, "Number of Queues"); /* ** Number of entries in Tx queue buf_ring. ** Increasing this will reduce the number of ** errors when transmitting fragmented UDP ** packets. */ static int ixlv_txbrsz = DEFAULT_TXBRSZ; TUNABLE_INT("hw.ixlv.txbrsz", &ixlv_txbrsz); SYSCTL_INT(_hw_ixlv, OID_AUTO, txbr_size, CTLFLAG_RDTUN, &ixlv_txbrsz, 0, "TX Buf Ring Size"); /* ** Controls for Interrupt Throttling ** - true/false for dynamic adjustment ** - default values for static ITR */ int ixlv_dynamic_rx_itr = 0; TUNABLE_INT("hw.ixlv.dynamic_rx_itr", &ixlv_dynamic_rx_itr); SYSCTL_INT(_hw_ixlv, OID_AUTO, dynamic_rx_itr, CTLFLAG_RDTUN, &ixlv_dynamic_rx_itr, 0, "Dynamic RX Interrupt Rate"); int ixlv_dynamic_tx_itr = 0; TUNABLE_INT("hw.ixlv.dynamic_tx_itr", &ixlv_dynamic_tx_itr); SYSCTL_INT(_hw_ixlv, OID_AUTO, dynamic_tx_itr, CTLFLAG_RDTUN, &ixlv_dynamic_tx_itr, 0, "Dynamic TX Interrupt Rate"); int ixlv_rx_itr = IXL_ITR_8K; TUNABLE_INT("hw.ixlv.rx_itr", &ixlv_rx_itr); SYSCTL_INT(_hw_ixlv, OID_AUTO, rx_itr, CTLFLAG_RDTUN, &ixlv_rx_itr, 0, "RX Interrupt Rate"); int ixlv_tx_itr = IXL_ITR_4K; TUNABLE_INT("hw.ixlv.tx_itr", &ixlv_tx_itr); SYSCTL_INT(_hw_ixlv, OID_AUTO, tx_itr, CTLFLAG_RDTUN, &ixlv_tx_itr, 0, "TX Interrupt Rate"); /********************************************************************* * Device identification routine * * ixlv_probe determines if the driver should be loaded on * the hardware based on PCI vendor/device id of the device. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int ixlv_probe(device_t dev) { ixl_vendor_info_t *ent; u16 pci_vendor_id, pci_device_id; u16 pci_subvendor_id, pci_subdevice_id; char device_name[256]; #if 0 INIT_DEBUGOUT("ixlv_probe: begin"); #endif pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != I40E_INTEL_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = ixlv_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == 0)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == 0))) { sprintf(device_name, "%s, Version - %s", ixlv_strings[ent->index], ixlv_driver_version); device_set_desc_copy(dev, device_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int ixlv_attach(device_t dev) { struct ixlv_sc *sc; struct i40e_hw *hw; struct ixl_vsi *vsi; int error = 0; INIT_DBG_DEV(dev, "begin"); /* Allocate, clear, and link in our primary soft structure */ sc = device_get_softc(dev); sc->dev = sc->osdep.dev = dev; hw = &sc->hw; vsi = &sc->vsi; vsi->dev = dev; /* Initialize hw struct */ ixlv_init_hw(sc); /* Allocate filter lists */ ixlv_init_filters(sc); /* Core Lock Init */ mtx_init(&sc->mtx, device_get_nameunit(dev), "IXL SC Lock", MTX_DEF); /* Set up the timer callout */ callout_init_mtx(&sc->timer, &sc->mtx, 0); /* Do PCI setup - map BAR0, etc */ if (ixlv_allocate_pci_resources(sc)) { device_printf(dev, "%s: Allocation of PCI resources failed\n", __func__); error = ENXIO; goto err_early; } INIT_DBG_DEV(dev, "Allocated PCI resources and MSIX vectors"); error = i40e_set_mac_type(hw); if (error) { device_printf(dev, "%s: set_mac_type failed: %d\n", __func__, error); goto err_pci_res; } error = ixlv_reset_complete(hw); if (error) { device_printf(dev, "%s: Device is still being reset\n", __func__); goto err_pci_res; } INIT_DBG_DEV(dev, "VF Device is ready for configuration"); error = ixlv_setup_vc(sc); if (error) { device_printf(dev, "%s: Error setting up PF comms, %d\n", __func__, error); goto err_pci_res; } INIT_DBG_DEV(dev, "PF API version verified"); /* Need API version before sending reset message */ error = ixlv_reset(sc); if (error) { device_printf(dev, "VF reset failed; reload the driver\n"); goto err_aq; } INIT_DBG_DEV(dev, "VF reset complete"); /* Ask for VF config from PF */ error = ixlv_vf_config(sc); if (error) { device_printf(dev, "Error getting configuration from PF: %d\n", error); goto err_aq; } device_printf(dev, "VSIs %d, QPs %d, MSIX %d, RSS sizes: key %d lut %d\n", sc->vf_res->num_vsis, sc->vf_res->num_queue_pairs, sc->vf_res->max_vectors, sc->vf_res->rss_key_size, sc->vf_res->rss_lut_size); #ifdef IXL_DEBUG device_printf(dev, "Offload flags: 0x%b\n", sc->vf_res->vf_offload_flags, IXLV_PRINTF_VF_OFFLOAD_FLAGS); #endif /* got VF config message back from PF, now we can parse it */ for (int i = 0; i < sc->vf_res->num_vsis; i++) { if (sc->vf_res->vsi_res[i].vsi_type == I40E_VSI_SRIOV) sc->vsi_res = &sc->vf_res->vsi_res[i]; } if (!sc->vsi_res) { device_printf(dev, "%s: no LAN VSI found\n", __func__); error = EIO; goto err_res_buf; } INIT_DBG_DEV(dev, "Resource Acquisition complete"); /* If no mac address was assigned just make a random one */ if (!ixlv_check_ether_addr(hw->mac.addr)) { u8 addr[ETHER_ADDR_LEN]; arc4rand(&addr, sizeof(addr), 0); addr[0] &= 0xFE; addr[0] |= 0x02; bcopy(addr, hw->mac.addr, sizeof(addr)); } /* Now that the number of queues for this VF is known, set up interrupts */ sc->msix = ixlv_init_msix(sc); /* We fail without MSIX support */ if (sc->msix == 0) { error = ENXIO; goto err_res_buf; } vsi->id = sc->vsi_res->vsi_id; vsi->back = (void *)sc; sc->link_up = TRUE; /* This allocates the memory and early settings */ if (ixlv_setup_queues(sc) != 0) { device_printf(dev, "%s: setup queues failed!\n", __func__); error = EIO; goto out; } /* Setup the stack interface */ if (ixlv_setup_interface(dev, sc) != 0) { device_printf(dev, "%s: setup interface failed!\n", __func__); error = EIO; goto out; } INIT_DBG_DEV(dev, "Queue memory and interface setup"); /* Do queue interrupt setup */ if (ixlv_assign_msix(sc) != 0) { device_printf(dev, "%s: allocating queue interrupts failed!\n", __func__); error = ENXIO; goto out; } /* Start AdminQ taskqueue */ ixlv_init_taskqueue(sc); /* Initialize stats */ bzero(&sc->vsi.eth_stats, sizeof(struct i40e_eth_stats)); ixlv_add_sysctls(sc); /* Register for VLAN events */ vsi->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, ixlv_register_vlan, vsi, EVENTHANDLER_PRI_FIRST); vsi->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, ixlv_unregister_vlan, vsi, EVENTHANDLER_PRI_FIRST); /* We want AQ enabled early */ ixlv_enable_adminq_irq(hw); /* Set things up to run init */ sc->init_state = IXLV_INIT_READY; ixl_vc_init_mgr(sc, &sc->vc_mgr); INIT_DBG_DEV(dev, "end"); return (error); out: ixlv_free_queues(vsi); err_res_buf: free(sc->vf_res, M_DEVBUF); err_aq: i40e_shutdown_adminq(hw); err_pci_res: ixlv_free_pci_resources(sc); err_early: mtx_destroy(&sc->mtx); ixlv_free_filters(sc); INIT_DBG_DEV(dev, "end: error %d", error); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int ixlv_detach(device_t dev) { struct ixlv_sc *sc = device_get_softc(dev); struct ixl_vsi *vsi = &sc->vsi; struct i40e_hw *hw = &sc->hw; enum i40e_status_code status; INIT_DBG_DEV(dev, "begin"); /* Make sure VLANS are not using driver */ if (vsi->ifp->if_vlantrunk != NULL) { if_printf(vsi->ifp, "Vlan in use, detach first\n"); return (EBUSY); } /* Stop driver */ ether_ifdetach(vsi->ifp); if (vsi->ifp->if_drv_flags & IFF_DRV_RUNNING) { mtx_lock(&sc->mtx); ixlv_stop(sc); mtx_unlock(&sc->mtx); } /* Unregister VLAN events */ if (vsi->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, vsi->vlan_attach); if (vsi->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, vsi->vlan_detach); /* Drain VC mgr */ callout_drain(&sc->vc_mgr.callout); ixlv_disable_adminq_irq(hw); ixlv_teardown_adminq_msix(sc); /* Drain admin queue taskqueue */ taskqueue_free(sc->tq); status = i40e_shutdown_adminq(&sc->hw); if (status != I40E_SUCCESS) { device_printf(dev, "i40e_shutdown_adminq() failed with status %s\n", i40e_stat_str(hw, status)); } if_free(vsi->ifp); free(sc->vf_res, M_DEVBUF); ixlv_free_pci_resources(sc); ixlv_free_queues(vsi); ixlv_free_filters(sc); bus_generic_detach(dev); mtx_destroy(&sc->mtx); INIT_DBG_DEV(dev, "end"); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int ixlv_shutdown(device_t dev) { struct ixlv_sc *sc = device_get_softc(dev); INIT_DBG_DEV(dev, "begin"); mtx_lock(&sc->mtx); ixlv_stop(sc); mtx_unlock(&sc->mtx); INIT_DBG_DEV(dev, "end"); return (0); } /* * Configure TXCSUM(IPV6) and TSO(4/6) * - the hardware handles these together so we * need to tweak them */ static void ixlv_cap_txcsum_tso(struct ixl_vsi *vsi, struct ifnet *ifp, int mask) { /* Enable/disable TXCSUM/TSO4 */ if (!(ifp->if_capenable & IFCAP_TXCSUM) && !(ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) { ifp->if_capenable |= IFCAP_TXCSUM; /* enable TXCSUM, restore TSO if previously enabled */ if (vsi->flags & IXL_FLAGS_KEEP_TSO4) { vsi->flags &= ~IXL_FLAGS_KEEP_TSO4; ifp->if_capenable |= IFCAP_TSO4; } } else if (mask & IFCAP_TSO4) { ifp->if_capenable |= (IFCAP_TXCSUM | IFCAP_TSO4); vsi->flags &= ~IXL_FLAGS_KEEP_TSO4; if_printf(ifp, "TSO4 requires txcsum, enabling both...\n"); } } else if((ifp->if_capenable & IFCAP_TXCSUM) && !(ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) ifp->if_capenable &= ~IFCAP_TXCSUM; else if (mask & IFCAP_TSO4) ifp->if_capenable |= IFCAP_TSO4; } else if((ifp->if_capenable & IFCAP_TXCSUM) && (ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) { vsi->flags |= IXL_FLAGS_KEEP_TSO4; ifp->if_capenable &= ~(IFCAP_TXCSUM | IFCAP_TSO4); if_printf(ifp, "TSO4 requires txcsum, disabling both...\n"); } else if (mask & IFCAP_TSO4) ifp->if_capenable &= ~IFCAP_TSO4; } /* Enable/disable TXCSUM_IPV6/TSO6 */ if (!(ifp->if_capenable & IFCAP_TXCSUM_IPV6) && !(ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable |= IFCAP_TXCSUM_IPV6; if (vsi->flags & IXL_FLAGS_KEEP_TSO6) { vsi->flags &= ~IXL_FLAGS_KEEP_TSO6; ifp->if_capenable |= IFCAP_TSO6; } } else if (mask & IFCAP_TSO6) { ifp->if_capenable |= (IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); vsi->flags &= ~IXL_FLAGS_KEEP_TSO6; if_printf(ifp, "TSO6 requires txcsum6, enabling both...\n"); } } else if((ifp->if_capenable & IFCAP_TXCSUM_IPV6) && !(ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) ifp->if_capenable &= ~IFCAP_TXCSUM_IPV6; else if (mask & IFCAP_TSO6) ifp->if_capenable |= IFCAP_TSO6; } else if ((ifp->if_capenable & IFCAP_TXCSUM_IPV6) && (ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) { vsi->flags |= IXL_FLAGS_KEEP_TSO6; ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); if_printf(ifp, "TSO6 requires txcsum6, disabling both...\n"); } else if (mask & IFCAP_TSO6) ifp->if_capenable &= ~IFCAP_TSO6; } } /********************************************************************* * Ioctl entry point * * ixlv_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int ixlv_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ixl_vsi *vsi = ifp->if_softc; struct ixlv_sc *sc = vsi->back; struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; bool avoid_reset = FALSE; #endif int error = 0; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif #if defined(INET) || defined(INET6) /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ixlv_init(vsi); #ifdef INET if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; #endif case SIOCSIFMTU: IOCTL_DBG_IF2(ifp, "SIOCSIFMTU (Set Interface MTU)"); mtx_lock(&sc->mtx); if (ifr->ifr_mtu > IXL_MAX_FRAME - ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN) { error = EINVAL; IOCTL_DBG_IF(ifp, "mtu too large"); } else { IOCTL_DBG_IF2(ifp, "mtu: %lu -> %d", (u_long)ifp->if_mtu, ifr->ifr_mtu); // ERJ: Interestingly enough, these types don't match ifp->if_mtu = (u_long)ifr->ifr_mtu; vsi->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixlv_init_locked(sc); } mtx_unlock(&sc->mtx); break; case SIOCSIFFLAGS: IOCTL_DBG_IF2(ifp, "SIOCSIFFLAGS (Set Interface Flags)"); mtx_lock(&sc->mtx); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ixlv_init_locked(sc); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixlv_stop(sc); sc->if_flags = ifp->if_flags; mtx_unlock(&sc->mtx); break; case SIOCADDMULTI: IOCTL_DBG_IF2(ifp, "SIOCADDMULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { mtx_lock(&sc->mtx); ixlv_disable_intr(vsi); ixlv_add_multi(vsi); ixlv_enable_intr(vsi); mtx_unlock(&sc->mtx); } break; case SIOCDELMULTI: IOCTL_DBG_IF2(ifp, "SIOCDELMULTI"); if (sc->init_state == IXLV_RUNNING) { mtx_lock(&sc->mtx); ixlv_disable_intr(vsi); ixlv_del_multi(vsi); ixlv_enable_intr(vsi); mtx_unlock(&sc->mtx); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: IOCTL_DBG_IF2(ifp, "SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &sc->media, command); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; IOCTL_DBG_IF2(ifp, "SIOCSIFCAP (Set Capabilities)"); ixlv_cap_txcsum_tso(vsi, ifp, mask); if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ixlv_init(vsi); } VLAN_CAPABILITIES(ifp); break; } default: IOCTL_DBG_IF2(ifp, "UNKNOWN (0x%X)", (int)command); error = ether_ioctl(ifp, command, data); break; } return (error); } /* ** To do a reinit on the VF is unfortunately more complicated ** than a physical device, we must have the PF more or less ** completely recreate our memory, so many things that were ** done only once at attach in traditional drivers now must be ** redone at each reinitialization. This function does that ** 'prelude' so we can then call the normal locked init code. */ int ixlv_reinit_locked(struct ixlv_sc *sc) { struct i40e_hw *hw = &sc->hw; struct ixl_vsi *vsi = &sc->vsi; struct ifnet *ifp = vsi->ifp; struct ixlv_mac_filter *mf, *mf_temp; struct ixlv_vlan_filter *vf; int error = 0; INIT_DBG_IF(ifp, "begin"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixlv_stop(sc); error = ixlv_reset(sc); INIT_DBG_IF(ifp, "VF was reset"); /* set the state in case we went thru RESET */ sc->init_state = IXLV_RUNNING; /* ** Resetting the VF drops all filters from hardware; ** we need to mark them to be re-added in init. */ SLIST_FOREACH_SAFE(mf, sc->mac_filters, next, mf_temp) { if (mf->flags & IXL_FILTER_DEL) { SLIST_REMOVE(sc->mac_filters, mf, ixlv_mac_filter, next); free(mf, M_DEVBUF); } else mf->flags |= IXL_FILTER_ADD; } if (vsi->num_vlans != 0) SLIST_FOREACH(vf, sc->vlan_filters, next) vf->flags = IXL_FILTER_ADD; else { /* clean any stale filters */ while (!SLIST_EMPTY(sc->vlan_filters)) { vf = SLIST_FIRST(sc->vlan_filters); SLIST_REMOVE_HEAD(sc->vlan_filters, next); free(vf, M_DEVBUF); } } ixlv_enable_adminq_irq(hw); ixl_vc_flush(&sc->vc_mgr); INIT_DBG_IF(ifp, "end"); return (error); } static void ixl_init_cmd_complete(struct ixl_vc_cmd *cmd, void *arg, enum i40e_status_code code) { struct ixlv_sc *sc; sc = arg; /* * Ignore "Adapter Stopped" message as that happens if an ifconfig down * happens while a command is in progress, so we don't print an error * in that case. */ if (code != I40E_SUCCESS && code != I40E_ERR_ADAPTER_STOPPED) { if_printf(sc->vsi.ifp, "Error %s waiting for PF to complete operation %d\n", i40e_stat_str(&sc->hw, code), cmd->request); } } static void ixlv_init_locked(struct ixlv_sc *sc) { struct i40e_hw *hw = &sc->hw; struct ixl_vsi *vsi = &sc->vsi; struct ixl_queue *que = vsi->queues; struct ifnet *ifp = vsi->ifp; int error = 0; INIT_DBG_IF(ifp, "begin"); IXLV_CORE_LOCK_ASSERT(sc); /* Do a reinit first if an init has already been done */ if ((sc->init_state == IXLV_RUNNING) || (sc->init_state == IXLV_RESET_REQUIRED) || (sc->init_state == IXLV_RESET_PENDING)) error = ixlv_reinit_locked(sc); /* Don't bother with init if we failed reinit */ if (error) goto init_done; /* Remove existing MAC filter if new MAC addr is set */ if (bcmp(IF_LLADDR(ifp), hw->mac.addr, ETHER_ADDR_LEN) != 0) { error = ixlv_del_mac_filter(sc, hw->mac.addr); if (error == 0) ixl_vc_enqueue(&sc->vc_mgr, &sc->del_mac_cmd, IXLV_FLAG_AQ_DEL_MAC_FILTER, ixl_init_cmd_complete, sc); } /* Check for an LAA mac address... */ bcopy(IF_LLADDR(ifp), hw->mac.addr, ETHER_ADDR_LEN); ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_OFFLOAD_IPV4 & ~CSUM_IP); if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= CSUM_OFFLOAD_IPV6; /* Add mac filter for this VF to PF */ if (i40e_validate_mac_addr(hw->mac.addr) == I40E_SUCCESS) { error = ixlv_add_mac_filter(sc, hw->mac.addr, 0); if (!error || error == EEXIST) ixl_vc_enqueue(&sc->vc_mgr, &sc->add_mac_cmd, IXLV_FLAG_AQ_ADD_MAC_FILTER, ixl_init_cmd_complete, sc); } /* Setup vlan's if needed */ ixlv_setup_vlan_filters(sc); /* Prepare the queues for operation */ for (int i = 0; i < vsi->num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; ixl_init_tx_ring(que); if (vsi->max_frame_size <= MCLBYTES) rxr->mbuf_sz = MCLBYTES; else rxr->mbuf_sz = MJUMPAGESIZE; ixl_init_rx_ring(que); } /* Set initial ITR values */ ixlv_configure_itr(sc); /* Configure queues */ ixl_vc_enqueue(&sc->vc_mgr, &sc->config_queues_cmd, IXLV_FLAG_AQ_CONFIGURE_QUEUES, ixl_init_cmd_complete, sc); /* Set up RSS */ ixlv_config_rss(sc); /* Map vectors */ ixl_vc_enqueue(&sc->vc_mgr, &sc->map_vectors_cmd, IXLV_FLAG_AQ_MAP_VECTORS, ixl_init_cmd_complete, sc); /* Enable queues */ ixl_vc_enqueue(&sc->vc_mgr, &sc->enable_queues_cmd, IXLV_FLAG_AQ_ENABLE_QUEUES, ixl_init_cmd_complete, sc); /* Start the local timer */ callout_reset(&sc->timer, hz, ixlv_local_timer, sc); sc->init_state = IXLV_RUNNING; init_done: INIT_DBG_IF(ifp, "end"); return; } /* ** Init entry point for the stack */ void ixlv_init(void *arg) { struct ixl_vsi *vsi = (struct ixl_vsi *)arg; struct ixlv_sc *sc = vsi->back; int retries = 0; /* Prevent init from running again while waiting for AQ calls * made in init_locked() to complete. */ mtx_lock(&sc->mtx); if (sc->init_in_progress) { mtx_unlock(&sc->mtx); return; } else sc->init_in_progress = true; ixlv_init_locked(sc); mtx_unlock(&sc->mtx); /* Wait for init_locked to finish */ while (!(vsi->ifp->if_drv_flags & IFF_DRV_RUNNING) && ++retries < IXLV_MAX_INIT_WAIT) { i40e_msec_pause(25); } if (retries >= IXLV_MAX_INIT_WAIT) { if_printf(vsi->ifp, "Init failed to complete in allotted time!\n"); } mtx_lock(&sc->mtx); sc->init_in_progress = false; mtx_unlock(&sc->mtx); } /* * ixlv_attach() helper function; gathers information about * the (virtual) hardware for use elsewhere in the driver. */ static void ixlv_init_hw(struct ixlv_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; /* Save off the information about this board */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); hw->bus.device = pci_get_slot(dev); hw->bus.func = pci_get_function(dev); } /* * ixlv_attach() helper function; initalizes the admin queue * and attempts to establish contact with the PF by * retrying the initial "API version" message several times * or until the PF responds. */ static int ixlv_setup_vc(struct ixlv_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; int error = 0, ret_error = 0, asq_retries = 0; bool send_api_ver_retried = 0; /* Need to set these AQ paramters before initializing AQ */ hw->aq.num_arq_entries = IXL_AQ_LEN; hw->aq.num_asq_entries = IXL_AQ_LEN; hw->aq.arq_buf_size = IXL_AQ_BUF_SZ; hw->aq.asq_buf_size = IXL_AQ_BUF_SZ; for (int i = 0; i < IXLV_AQ_MAX_ERR; i++) { /* Initialize admin queue */ error = i40e_init_adminq(hw); if (error) { device_printf(dev, "%s: init_adminq failed: %d\n", __func__, error); ret_error = 1; continue; } INIT_DBG_DEV(dev, "Initialized Admin Queue; starting" " send_api_ver attempt %d", i+1); retry_send: /* Send VF's API version */ error = ixlv_send_api_ver(sc); if (error) { i40e_shutdown_adminq(hw); ret_error = 2; device_printf(dev, "%s: unable to send api" " version to PF on attempt %d, error %d\n", __func__, i+1, error); } asq_retries = 0; while (!i40e_asq_done(hw)) { if (++asq_retries > IXLV_AQ_MAX_ERR) { i40e_shutdown_adminq(hw); device_printf(dev, "Admin Queue timeout " "(waiting for send_api_ver), %d more tries...\n", IXLV_AQ_MAX_ERR - (i + 1)); ret_error = 3; break; } i40e_msec_pause(10); } if (asq_retries > IXLV_AQ_MAX_ERR) continue; INIT_DBG_DEV(dev, "Sent API version message to PF"); /* Verify that the VF accepts the PF's API version */ error = ixlv_verify_api_ver(sc); if (error == ETIMEDOUT) { if (!send_api_ver_retried) { /* Resend message, one more time */ send_api_ver_retried = true; device_printf(dev, "%s: Timeout while verifying API version on first" " try!\n", __func__); goto retry_send; } else { device_printf(dev, "%s: Timeout while verifying API version on second" " try!\n", __func__); ret_error = 4; break; } } if (error) { device_printf(dev, "%s: Unable to verify API version," " error %s\n", __func__, i40e_stat_str(hw, error)); ret_error = 5; } break; } if (ret_error >= 4) i40e_shutdown_adminq(hw); return (ret_error); } /* * ixlv_attach() helper function; asks the PF for this VF's * configuration, and saves the information if it receives it. */ static int ixlv_vf_config(struct ixlv_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; int bufsz, error = 0, ret_error = 0; int asq_retries, retried = 0; retry_config: error = ixlv_send_vf_config_msg(sc); if (error) { device_printf(dev, "%s: Unable to send VF config request, attempt %d," " error %d\n", __func__, retried + 1, error); ret_error = 2; } asq_retries = 0; while (!i40e_asq_done(hw)) { if (++asq_retries > IXLV_AQ_MAX_ERR) { device_printf(dev, "%s: Admin Queue timeout " "(waiting for send_vf_config_msg), attempt %d\n", __func__, retried + 1); ret_error = 3; goto fail; } i40e_msec_pause(10); } INIT_DBG_DEV(dev, "Sent VF config message to PF, attempt %d", retried + 1); if (!sc->vf_res) { bufsz = sizeof(struct i40e_virtchnl_vf_resource) + (I40E_MAX_VF_VSI * sizeof(struct i40e_virtchnl_vsi_resource)); sc->vf_res = malloc(bufsz, M_DEVBUF, M_NOWAIT); if (!sc->vf_res) { device_printf(dev, "%s: Unable to allocate memory for VF configuration" " message from PF on attempt %d\n", __func__, retried + 1); ret_error = 1; goto fail; } } /* Check for VF config response */ error = ixlv_get_vf_config(sc); if (error == ETIMEDOUT) { /* The 1st time we timeout, send the configuration message again */ if (!retried) { retried++; goto retry_config; } device_printf(dev, "%s: ixlv_get_vf_config() timed out waiting for a response\n", __func__); } if (error) { device_printf(dev, "%s: Unable to get VF configuration from PF after %d tries!\n", __func__, retried + 1); ret_error = 4; } goto done; fail: free(sc->vf_res, M_DEVBUF); done: return (ret_error); } /* * Allocate MSI/X vectors, setup the AQ vector early */ static int ixlv_init_msix(struct ixlv_sc *sc) { device_t dev = sc->dev; int rid, want, vectors, queues, available; int auto_max_queues; rid = PCIR_BAR(IXL_MSIX_BAR); sc->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!sc->msix_mem) { /* May not be enabled */ device_printf(sc->dev, "Unable to map MSIX table\n"); goto fail; } available = pci_msix_count(dev); if (available == 0) { /* system has msix disabled */ bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->msix_mem); sc->msix_mem = NULL; goto fail; } /* Clamp queues to number of CPUs and # of MSI-X vectors available */ auto_max_queues = min(mp_ncpus, available - 1); /* Clamp queues to # assigned to VF by PF */ auto_max_queues = min(auto_max_queues, sc->vf_res->num_queue_pairs); /* Override with tunable value if tunable is less than autoconfig count */ if ((ixlv_max_queues != 0) && (ixlv_max_queues <= auto_max_queues)) queues = ixlv_max_queues; /* Use autoconfig amount if that's lower */ else if ((ixlv_max_queues != 0) && (ixlv_max_queues > auto_max_queues)) { device_printf(dev, "ixlv_max_queues (%d) is too large, using " "autoconfig amount (%d)...\n", ixlv_max_queues, auto_max_queues); queues = auto_max_queues; } /* Limit maximum auto-configured queues to 8 if no user value is set */ else queues = min(auto_max_queues, 8); #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif /* ** Want one vector (RX/TX pair) per queue ** plus an additional for the admin queue. */ want = queues + 1; if (want <= available) /* Have enough */ vectors = want; else { device_printf(sc->dev, "MSIX Configuration Problem, " "%d vectors available but %d wanted!\n", available, want); goto fail; } #ifdef RSS /* * If we're doing RSS, the number of queues needs to * match the number of RSS buckets that are configured. * * + If there's more queues than RSS buckets, we'll end * up with queues that get no traffic. * * + If there's more RSS buckets than queues, we'll end * up having multiple RSS buckets map to the same queue, * so there'll be some contention. */ if (queues != rss_getnumbuckets()) { device_printf(dev, "%s: queues (%d) != RSS buckets (%d)" "; performance will be impacted.\n", __func__, queues, rss_getnumbuckets()); } #endif if (pci_alloc_msix(dev, &vectors) == 0) { device_printf(sc->dev, "Using MSIX interrupts with %d vectors\n", vectors); sc->msix = vectors; sc->vsi.num_queues = queues; } /* Next we need to setup the vector for the Admin Queue */ rid = 1; /* zero vector + 1 */ sc->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->res == NULL) { device_printf(dev, "Unable to allocate" " bus resource: AQ interrupt \n"); goto fail; } if (bus_setup_intr(dev, sc->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixlv_msix_adminq, sc, &sc->tag)) { sc->res = NULL; device_printf(dev, "Failed to register AQ handler"); goto fail; } bus_describe_intr(dev, sc->res, sc->tag, "adminq"); return (vectors); fail: /* The VF driver MUST use MSIX */ return (0); } static int ixlv_allocate_pci_resources(struct ixlv_sc *sc) { int rid; device_t dev = sc->dev; rid = PCIR_BAR(0); sc->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(sc->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->pci_mem); sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->pci_mem); sc->osdep.mem_bus_space_size = rman_get_size(sc->pci_mem); sc->osdep.flush_reg = I40E_VFGEN_RSTAT; sc->hw.hw_addr = (u8 *) &sc->osdep.mem_bus_space_handle; sc->hw.back = &sc->osdep; /* ** Explicitly set the guest PCI BUSMASTER capability ** and we must rewrite the ENABLE in the MSIX control ** register again at this point to cause the host to ** successfully initialize us. ** ** This must be set before accessing any registers. */ { u16 pci_cmd_word; int msix_ctrl; pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); pci_cmd_word |= PCIM_CMD_BUSMASTEREN; pci_write_config(dev, PCIR_COMMAND, pci_cmd_word, 2); pci_find_cap(dev, PCIY_MSIX, &rid); rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } /* Disable adminq interrupts (just in case) */ ixlv_disable_adminq_irq(&sc->hw); return (0); } static void ixlv_free_pci_resources(struct ixlv_sc *sc) { struct ixl_vsi *vsi = &sc->vsi; struct ixl_queue *que = vsi->queues; device_t dev = sc->dev; /* We may get here before stations are setup */ if (que == NULL) goto early; /* ** Release all msix queue resources: */ for (int i = 0; i < vsi->num_queues; i++, que++) { int rid = que->msix + 1; if (que->tag != NULL) { bus_teardown_intr(dev, que->res, que->tag); que->tag = NULL; } if (que->res != NULL) { bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); que->res = NULL; } } early: pci_release_msi(dev); if (sc->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(IXL_MSIX_BAR), sc->msix_mem); if (sc->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), sc->pci_mem); } /* * Create taskqueue and tasklet for Admin Queue interrupts. */ static int ixlv_init_taskqueue(struct ixlv_sc *sc) { int error = 0; TASK_INIT(&sc->aq_irq, 0, ixlv_do_adminq, sc); sc->tq = taskqueue_create_fast("ixl_adm", M_NOWAIT, taskqueue_thread_enqueue, &sc->tq); taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s sc->tq", device_get_nameunit(sc->dev)); return (error); } /********************************************************************* * * Setup MSIX Interrupt resources and handlers for the VSI queues * **********************************************************************/ static int ixlv_assign_msix(struct ixlv_sc *sc) { device_t dev = sc->dev; struct ixl_vsi *vsi = &sc->vsi; struct ixl_queue *que = vsi->queues; struct tx_ring *txr; int error, rid, vector = 1; #ifdef RSS cpuset_t cpu_mask; #endif for (int i = 0; i < vsi->num_queues; i++, vector++, que++) { int cpu_id = i; rid = vector + 1; txr = &que->txr; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (que->res == NULL) { device_printf(dev,"Unable to allocate" " bus resource: que interrupt [%d]\n", vector); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixlv_msix_que, que, &que->tag); if (error) { que->res = NULL; device_printf(dev, "Failed to register que handler"); return (error); } bus_describe_intr(dev, que->res, que->tag, "que %d", i); /* Bind the vector to a CPU */ #ifdef RSS cpu_id = rss_getcpu(i % rss_getnumbuckets()); #endif bus_bind_intr(dev, que->res, cpu_id); que->msix = vector; TASK_INIT(&que->tx_task, 0, ixl_deferred_mq_start, que); TASK_INIT(&que->task, 0, ixlv_handle_que, que); que->tq = taskqueue_create_fast("ixlv_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); #ifdef RSS CPU_SETOF(cpu_id, &cpu_mask); taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, &cpu_mask, "%s (bucket %d)", device_get_nameunit(dev), cpu_id); #else taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(dev)); #endif } return (0); } /* ** Requests a VF reset from the PF. ** ** Requires the VF's Admin Queue to be initialized. */ static int ixlv_reset(struct ixlv_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; int error = 0; /* Ask the PF to reset us if we are initiating */ if (sc->init_state != IXLV_RESET_PENDING) ixlv_request_reset(sc); i40e_msec_pause(100); error = ixlv_reset_complete(hw); if (error) { device_printf(dev, "%s: VF reset failed\n", __func__); return (error); } error = i40e_shutdown_adminq(hw); if (error) { device_printf(dev, "%s: shutdown_adminq failed: %d\n", __func__, error); return (error); } error = i40e_init_adminq(hw); if (error) { device_printf(dev, "%s: init_adminq failed: %d\n", __func__, error); return(error); } return (0); } static int ixlv_reset_complete(struct i40e_hw *hw) { u32 reg; /* Wait up to ~10 seconds */ for (int i = 0; i < 100; i++) { reg = rd32(hw, I40E_VFGEN_RSTAT) & I40E_VFGEN_RSTAT_VFR_STATE_MASK; if ((reg == I40E_VFR_VFACTIVE) || (reg == I40E_VFR_COMPLETED)) return (0); i40e_msec_pause(100); } return (EBUSY); } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int ixlv_setup_interface(device_t dev, struct ixlv_sc *sc) { struct ifnet *ifp; struct ixl_vsi *vsi = &sc->vsi; struct ixl_queue *que = vsi->queues; INIT_DBG_DEV(dev, "begin"); ifp = vsi->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "%s: could not allocate ifnet" " structure!\n", __func__); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_mtu = ETHERMTU; ifp->if_baudrate = IF_Gbps(40); ifp->if_init = ixlv_init; ifp->if_softc = vsi; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ixlv_ioctl; #if __FreeBSD_version >= 1100000 if_setgetcounterfn(ifp, ixl_get_counter); #endif ifp->if_transmit = ixl_mq_start; ifp->if_qflush = ixl_qflush; ifp->if_snd.ifq_maxlen = que->num_desc - 2; ether_ifattach(ifp, sc->hw.mac.addr); vsi->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; /* * Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_HWCSUM; ifp->if_capabilities |= IFCAP_HWCSUM_IPV6; ifp->if_capabilities |= IFCAP_TSO; ifp->if_capabilities |= IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM | IFCAP_LRO; ifp->if_capenable = ifp->if_capabilities; /* ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the ixl driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&sc->media, IFM_IMASK, ixlv_media_change, ixlv_media_status); // JFV Add media types later? ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO); INIT_DBG_DEV(dev, "end"); return (0); } /* ** Allocate and setup the interface queues */ static int ixlv_setup_queues(struct ixlv_sc *sc) { device_t dev = sc->dev; struct ixl_vsi *vsi; struct ixl_queue *que; struct tx_ring *txr; struct rx_ring *rxr; int rsize, tsize; int error = I40E_SUCCESS; vsi = &sc->vsi; vsi->back = (void *)sc; vsi->hw = &sc->hw; vsi->num_vlans = 0; /* Get memory for the station queues */ if (!(vsi->queues = - (struct ixl_queue *) malloc(sizeof(struct ixl_queue) * - vsi->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { + (struct ixl_queue *) mallocarray(vsi->num_queues, + sizeof(struct ixl_queue), M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate queue memory\n"); error = ENOMEM; goto early; } for (int i = 0; i < vsi->num_queues; i++) { que = &vsi->queues[i]; que->num_desc = ixlv_ringsz; que->me = i; que->vsi = vsi; txr = &que->txr; txr->que = que; txr->tail = I40E_QTX_TAIL1(que->me); /* Initialize the TX lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), que->me); mtx_init(&txr->mtx, txr->mtx_name, NULL, MTX_DEF); /* ** Create the TX descriptor ring, the extra int is ** added as the location for HEAD WB. */ tsize = roundup2((que->num_desc * sizeof(struct i40e_tx_desc)) + sizeof(u32), DBA_ALIGN); if (i40e_allocate_dma_mem(&sc->hw, &txr->dma, i40e_mem_reserved, tsize, DBA_ALIGN)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto fail; } txr->base = (struct i40e_tx_desc *)txr->dma.va; bzero((void *)txr->base, tsize); /* Now allocate transmit soft structs for the ring */ if (ixl_allocate_tx_data(que)) { device_printf(dev, "Critical Failure setting up TX structures\n"); error = ENOMEM; goto fail; } /* Allocate a buf ring */ txr->br = buf_ring_alloc(ixlv_txbrsz, M_DEVBUF, M_WAITOK, &txr->mtx); if (txr->br == NULL) { device_printf(dev, "Critical Failure setting up TX buf ring\n"); error = ENOMEM; goto fail; } /* * Next the RX queues... */ rsize = roundup2(que->num_desc * sizeof(union i40e_rx_desc), DBA_ALIGN); rxr = &que->rxr; rxr->que = que; rxr->tail = I40E_QRX_TAIL1(que->me); /* Initialize the RX side lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), que->me); mtx_init(&rxr->mtx, rxr->mtx_name, NULL, MTX_DEF); if (i40e_allocate_dma_mem(&sc->hw, &rxr->dma, i40e_mem_reserved, rsize, 4096)) { //JFV - should this be DBA? device_printf(dev, "Unable to allocate RX Descriptor memory\n"); error = ENOMEM; goto fail; } rxr->base = (union i40e_rx_desc *)rxr->dma.va; bzero((void *)rxr->base, rsize); /* Allocate receive soft structs for the ring */ if (ixl_allocate_rx_data(que)) { device_printf(dev, "Critical Failure setting up receive structs\n"); error = ENOMEM; goto fail; } } return (0); fail: for (int i = 0; i < vsi->num_queues; i++) { que = &vsi->queues[i]; rxr = &que->rxr; txr = &que->txr; if (rxr->base) i40e_free_dma_mem(&sc->hw, &rxr->dma); if (txr->base) i40e_free_dma_mem(&sc->hw, &txr->dma); } free(vsi->queues, M_DEVBUF); early: return (error); } /* ** This routine is run via an vlan config EVENT, ** it enables us to use the HW Filter table since ** we can get the vlan id. This just creates the ** entry in the soft version of the VFTA, init will ** repopulate the real table. */ static void ixlv_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct ixl_vsi *vsi = arg; struct ixlv_sc *sc = vsi->back; struct ixlv_vlan_filter *v; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; /* Sanity check - make sure it doesn't already exist */ SLIST_FOREACH(v, sc->vlan_filters, next) { if (v->vlan == vtag) return; } mtx_lock(&sc->mtx); ++vsi->num_vlans; v = malloc(sizeof(struct ixlv_vlan_filter), M_DEVBUF, M_NOWAIT | M_ZERO); SLIST_INSERT_HEAD(sc->vlan_filters, v, next); v->vlan = vtag; v->flags = IXL_FILTER_ADD; ixl_vc_enqueue(&sc->vc_mgr, &sc->add_vlan_cmd, IXLV_FLAG_AQ_ADD_VLAN_FILTER, ixl_init_cmd_complete, sc); mtx_unlock(&sc->mtx); return; } /* ** This routine is run via an vlan ** unconfig EVENT, remove our entry ** in the soft vfta. */ static void ixlv_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct ixl_vsi *vsi = arg; struct ixlv_sc *sc = vsi->back; struct ixlv_vlan_filter *v; int i = 0; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; mtx_lock(&sc->mtx); SLIST_FOREACH(v, sc->vlan_filters, next) { if (v->vlan == vtag) { v->flags = IXL_FILTER_DEL; ++i; --vsi->num_vlans; } } if (i) ixl_vc_enqueue(&sc->vc_mgr, &sc->del_vlan_cmd, IXLV_FLAG_AQ_DEL_VLAN_FILTER, ixl_init_cmd_complete, sc); mtx_unlock(&sc->mtx); return; } /* ** Get a new filter and add it to the mac filter list. */ static struct ixlv_mac_filter * ixlv_get_mac_filter(struct ixlv_sc *sc) { struct ixlv_mac_filter *f; f = malloc(sizeof(struct ixlv_mac_filter), M_DEVBUF, M_NOWAIT | M_ZERO); if (f) SLIST_INSERT_HEAD(sc->mac_filters, f, next); return (f); } /* ** Find the filter with matching MAC address */ static struct ixlv_mac_filter * ixlv_find_mac_filter(struct ixlv_sc *sc, u8 *macaddr) { struct ixlv_mac_filter *f; bool match = FALSE; SLIST_FOREACH(f, sc->mac_filters, next) { if (cmp_etheraddr(f->macaddr, macaddr)) { match = TRUE; break; } } if (!match) f = NULL; return (f); } static int ixlv_teardown_adminq_msix(struct ixlv_sc *sc) { device_t dev = sc->dev; int error = 0; if (sc->tag != NULL) { bus_teardown_intr(dev, sc->res, sc->tag); if (error) { device_printf(dev, "bus_teardown_intr() for" " interrupt 0 failed\n"); // return (ENXIO); } sc->tag = NULL; } if (sc->res != NULL) { bus_release_resource(dev, SYS_RES_IRQ, 1, sc->res); if (error) { device_printf(dev, "bus_release_resource() for" " interrupt 0 failed\n"); // return (ENXIO); } sc->res = NULL; } return (0); } /* ** Admin Queue interrupt handler */ static void ixlv_msix_adminq(void *arg) { struct ixlv_sc *sc = arg; struct i40e_hw *hw = &sc->hw; u32 reg, mask; reg = rd32(hw, I40E_VFINT_ICR01); mask = rd32(hw, I40E_VFINT_ICR0_ENA1); reg = rd32(hw, I40E_VFINT_DYN_CTL01); reg |= I40E_VFINT_DYN_CTL01_CLEARPBA_MASK; wr32(hw, I40E_VFINT_DYN_CTL01, reg); /* schedule task */ taskqueue_enqueue(sc->tq, &sc->aq_irq); return; } void ixlv_enable_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; ixlv_enable_adminq_irq(hw); for (int i = 0; i < vsi->num_queues; i++, que++) ixlv_enable_queue_irq(hw, que->me); } void ixlv_disable_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; ixlv_disable_adminq_irq(hw); for (int i = 0; i < vsi->num_queues; i++, que++) ixlv_disable_queue_irq(hw, que->me); } static void ixlv_disable_adminq_irq(struct i40e_hw *hw) { wr32(hw, I40E_VFINT_DYN_CTL01, 0); wr32(hw, I40E_VFINT_ICR0_ENA1, 0); /* flush */ rd32(hw, I40E_VFGEN_RSTAT); return; } static void ixlv_enable_adminq_irq(struct i40e_hw *hw) { wr32(hw, I40E_VFINT_DYN_CTL01, I40E_VFINT_DYN_CTL01_INTENA_MASK | I40E_VFINT_DYN_CTL01_ITR_INDX_MASK); wr32(hw, I40E_VFINT_ICR0_ENA1, I40E_VFINT_ICR0_ENA1_ADMINQ_MASK); /* flush */ rd32(hw, I40E_VFGEN_RSTAT); return; } static void ixlv_enable_queue_irq(struct i40e_hw *hw, int id) { u32 reg; reg = I40E_VFINT_DYN_CTLN1_INTENA_MASK | I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK; wr32(hw, I40E_VFINT_DYN_CTLN1(id), reg); } static void ixlv_disable_queue_irq(struct i40e_hw *hw, int id) { wr32(hw, I40E_VFINT_DYN_CTLN1(id), I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK); rd32(hw, I40E_VFGEN_RSTAT); return; } /* * Get initial ITR values from tunable values. */ static void ixlv_configure_itr(struct ixlv_sc *sc) { struct i40e_hw *hw = &sc->hw; struct ixl_vsi *vsi = &sc->vsi; struct ixl_queue *que = vsi->queues; vsi->rx_itr_setting = ixlv_rx_itr; vsi->tx_itr_setting = ixlv_tx_itr; for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; wr32(hw, I40E_VFINT_ITRN1(IXL_RX_ITR, i), vsi->rx_itr_setting); rxr->itr = vsi->rx_itr_setting; rxr->latency = IXL_AVE_LATENCY; wr32(hw, I40E_VFINT_ITRN1(IXL_TX_ITR, i), vsi->tx_itr_setting); txr->itr = vsi->tx_itr_setting; txr->latency = IXL_AVE_LATENCY; } } /* ** Provide a update to the queue RX ** interrupt moderation value. */ static void ixlv_set_queue_rx_itr(struct ixl_queue *que) { struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct rx_ring *rxr = &que->rxr; u16 rx_itr; u16 rx_latency = 0; int rx_bytes; /* Idle, do nothing */ if (rxr->bytes == 0) return; if (ixlv_dynamic_rx_itr) { rx_bytes = rxr->bytes/rxr->itr; rx_itr = rxr->itr; /* Adjust latency range */ switch (rxr->latency) { case IXL_LOW_LATENCY: if (rx_bytes > 10) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (rx_bytes > 20) { rx_latency = IXL_BULK_LATENCY; rx_itr = IXL_ITR_8K; } else if (rx_bytes <= 10) { rx_latency = IXL_LOW_LATENCY; rx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (rx_bytes <= 20) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; } rxr->latency = rx_latency; if (rx_itr != rxr->itr) { /* do an exponential smoothing */ rx_itr = (10 * rx_itr * rxr->itr) / ((9 * rx_itr) + rxr->itr); rxr->itr = min(rx_itr, IXL_MAX_ITR); wr32(hw, I40E_VFINT_ITRN1(IXL_RX_ITR, que->me), rxr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->rx_itr_setting & IXL_ITR_DYNAMIC) vsi->rx_itr_setting = ixlv_rx_itr; /* Update the hardware if needed */ if (rxr->itr != vsi->rx_itr_setting) { rxr->itr = vsi->rx_itr_setting; wr32(hw, I40E_VFINT_ITRN1(IXL_RX_ITR, que->me), rxr->itr); } } rxr->bytes = 0; rxr->packets = 0; return; } /* ** Provide a update to the queue TX ** interrupt moderation value. */ static void ixlv_set_queue_tx_itr(struct ixl_queue *que) { struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; u16 tx_itr; u16 tx_latency = 0; int tx_bytes; /* Idle, do nothing */ if (txr->bytes == 0) return; if (ixlv_dynamic_tx_itr) { tx_bytes = txr->bytes/txr->itr; tx_itr = txr->itr; switch (txr->latency) { case IXL_LOW_LATENCY: if (tx_bytes > 10) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (tx_bytes > 20) { tx_latency = IXL_BULK_LATENCY; tx_itr = IXL_ITR_8K; } else if (tx_bytes <= 10) { tx_latency = IXL_LOW_LATENCY; tx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (tx_bytes <= 20) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; } txr->latency = tx_latency; if (tx_itr != txr->itr) { /* do an exponential smoothing */ tx_itr = (10 * tx_itr * txr->itr) / ((9 * tx_itr) + txr->itr); txr->itr = min(tx_itr, IXL_MAX_ITR); wr32(hw, I40E_VFINT_ITRN1(IXL_TX_ITR, que->me), txr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->tx_itr_setting & IXL_ITR_DYNAMIC) vsi->tx_itr_setting = ixlv_tx_itr; /* Update the hardware if needed */ if (txr->itr != vsi->tx_itr_setting) { txr->itr = vsi->tx_itr_setting; wr32(hw, I40E_VFINT_ITRN1(IXL_TX_ITR, que->me), txr->itr); } } txr->bytes = 0; txr->packets = 0; return; } /* ** ** MSIX Interrupt Handlers and Tasklets ** */ static void ixlv_handle_que(void *context, int pending) { struct ixl_queue *que = context; struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; struct ifnet *ifp = vsi->ifp; bool more; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { more = ixl_rxeof(que, IXL_RX_LIMIT); mtx_lock(&txr->mtx); ixl_txeof(que); if (!drbr_empty(ifp, txr->br)) ixl_mq_start_locked(ifp, txr); mtx_unlock(&txr->mtx); if (more) { taskqueue_enqueue(que->tq, &que->task); return; } } /* Reenable this interrupt - hmmm */ ixlv_enable_queue_irq(hw, que->me); return; } /********************************************************************* * * MSIX Queue Interrupt Service routine * **********************************************************************/ static void ixlv_msix_que(void *arg) { struct ixl_queue *que = arg; struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; bool more_tx, more_rx; /* Spurious interrupts are ignored */ if (!(vsi->ifp->if_drv_flags & IFF_DRV_RUNNING)) return; ++que->irqs; more_rx = ixl_rxeof(que, IXL_RX_LIMIT); mtx_lock(&txr->mtx); more_tx = ixl_txeof(que); /* ** Make certain that if the stack ** has anything queued the task gets ** scheduled to handle it. */ if (!drbr_empty(vsi->ifp, txr->br)) more_tx = 1; mtx_unlock(&txr->mtx); ixlv_set_queue_rx_itr(que); ixlv_set_queue_tx_itr(que); if (more_tx || more_rx) taskqueue_enqueue(que->tq, &que->task); else ixlv_enable_queue_irq(hw, que->me); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void ixlv_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) { struct ixl_vsi *vsi = ifp->if_softc; struct ixlv_sc *sc = vsi->back; INIT_DBG_IF(ifp, "begin"); mtx_lock(&sc->mtx); ixlv_update_link_status(sc); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!sc->link_up) { mtx_unlock(&sc->mtx); INIT_DBG_IF(ifp, "end: link not up"); return; } ifmr->ifm_status |= IFM_ACTIVE; /* Hardware is always full-duplex */ ifmr->ifm_active |= IFM_FDX; mtx_unlock(&sc->mtx); INIT_DBG_IF(ifp, "end"); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int ixlv_media_change(struct ifnet * ifp) { struct ixl_vsi *vsi = ifp->if_softc; struct ifmedia *ifm = &vsi->media; INIT_DBG_IF(ifp, "begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); INIT_DBG_IF(ifp, "end"); return (0); } /********************************************************************* * Multicast Initialization * * This routine is called by init to reset a fresh state. * **********************************************************************/ static void ixlv_init_multi(struct ixl_vsi *vsi) { struct ixlv_mac_filter *f; struct ixlv_sc *sc = vsi->back; int mcnt = 0; IOCTL_DBG_IF(vsi->ifp, "begin"); /* First clear any multicast filters */ SLIST_FOREACH(f, sc->mac_filters, next) { if ((f->flags & IXL_FILTER_USED) && (f->flags & IXL_FILTER_MC)) { f->flags |= IXL_FILTER_DEL; mcnt++; } } if (mcnt > 0) ixl_vc_enqueue(&sc->vc_mgr, &sc->del_multi_cmd, IXLV_FLAG_AQ_DEL_MAC_FILTER, ixl_init_cmd_complete, sc); IOCTL_DBG_IF(vsi->ifp, "end"); } static void ixlv_add_multi(struct ixl_vsi *vsi) { struct ifmultiaddr *ifma; struct ifnet *ifp = vsi->ifp; struct ixlv_sc *sc = vsi->back; int mcnt = 0; IOCTL_DBG_IF(ifp, "begin"); if_maddr_rlock(ifp); /* ** Get a count, to decide if we ** simply use multicast promiscuous. */ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mcnt++; } if_maddr_runlock(ifp); /* TODO: Remove -- cannot set promiscuous mode in a VF */ if (__predict_false(mcnt >= MAX_MULTICAST_ADDR)) { /* delete all multicast filters */ ixlv_init_multi(vsi); sc->promiscuous_flags |= I40E_FLAG_VF_MULTICAST_PROMISC; ixl_vc_enqueue(&sc->vc_mgr, &sc->add_multi_cmd, IXLV_FLAG_AQ_CONFIGURE_PROMISC, ixl_init_cmd_complete, sc); IOCTL_DEBUGOUT("%s: end: too many filters", __func__); return; } mcnt = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (!ixlv_add_mac_filter(sc, (u8*)LLADDR((struct sockaddr_dl *) ifma->ifma_addr), IXL_FILTER_MC)) mcnt++; } if_maddr_runlock(ifp); /* ** Notify AQ task that sw filters need to be ** added to hw list */ if (mcnt > 0) ixl_vc_enqueue(&sc->vc_mgr, &sc->add_multi_cmd, IXLV_FLAG_AQ_ADD_MAC_FILTER, ixl_init_cmd_complete, sc); IOCTL_DBG_IF(ifp, "end"); } static void ixlv_del_multi(struct ixl_vsi *vsi) { struct ixlv_mac_filter *f; struct ifmultiaddr *ifma; struct ifnet *ifp = vsi->ifp; struct ixlv_sc *sc = vsi->back; int mcnt = 0; bool match = FALSE; IOCTL_DBG_IF(ifp, "begin"); /* Search for removed multicast addresses */ if_maddr_rlock(ifp); SLIST_FOREACH(f, sc->mac_filters, next) { if ((f->flags & IXL_FILTER_USED) && (f->flags & IXL_FILTER_MC)) { /* check if mac address in filter is in sc's list */ match = FALSE; TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; u8 *mc_addr = (u8 *)LLADDR((struct sockaddr_dl *)ifma->ifma_addr); if (cmp_etheraddr(f->macaddr, mc_addr)) { match = TRUE; break; } } /* if this filter is not in the sc's list, remove it */ if (match == FALSE && !(f->flags & IXL_FILTER_DEL)) { f->flags |= IXL_FILTER_DEL; mcnt++; IOCTL_DBG_IF(ifp, "marked: " MAC_FORMAT, MAC_FORMAT_ARGS(f->macaddr)); } else if (match == FALSE) IOCTL_DBG_IF(ifp, "exists: " MAC_FORMAT, MAC_FORMAT_ARGS(f->macaddr)); } } if_maddr_runlock(ifp); if (mcnt > 0) ixl_vc_enqueue(&sc->vc_mgr, &sc->del_multi_cmd, IXLV_FLAG_AQ_DEL_MAC_FILTER, ixl_init_cmd_complete, sc); IOCTL_DBG_IF(ifp, "end"); } /********************************************************************* * Timer routine * * This routine checks for link status,updates statistics, * and runs the watchdog check. * **********************************************************************/ static void ixlv_local_timer(void *arg) { struct ixlv_sc *sc = arg; struct i40e_hw *hw = &sc->hw; struct ixl_vsi *vsi = &sc->vsi; struct ixl_queue *que = vsi->queues; device_t dev = sc->dev; struct tx_ring *txr; int hung = 0; u32 mask, val; s32 timer, new_timer; IXLV_CORE_LOCK_ASSERT(sc); /* If Reset is in progress just bail */ if (sc->init_state == IXLV_RESET_PENDING) return; /* Check for when PF triggers a VF reset */ val = rd32(hw, I40E_VFGEN_RSTAT) & I40E_VFGEN_RSTAT_VFR_STATE_MASK; if (val != I40E_VFR_VFACTIVE && val != I40E_VFR_COMPLETED) { DDPRINTF(dev, "reset in progress! (%d)", val); return; } ixlv_request_stats(sc); /* clean and process any events */ taskqueue_enqueue(sc->tq, &sc->aq_irq); /* ** Check status on the queues for a hang */ mask = (I40E_VFINT_DYN_CTLN1_INTENA_MASK | I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK); for (int i = 0; i < vsi->num_queues; i++, que++) { txr = &que->txr; timer = atomic_load_acq_32(&txr->watchdog_timer); if (timer > 0) { new_timer = timer - hz; if (new_timer <= 0) { atomic_store_rel_32(&txr->watchdog_timer, -1); device_printf(dev, "WARNING: queue %d " "appears to be hung!\n", que->me); ++hung; } else { /* * If this fails, that means something in the TX path has updated * the watchdog, so it means the TX path is still working and * the watchdog doesn't need to countdown. */ atomic_cmpset_rel_32(&txr->watchdog_timer, timer, new_timer); /* Any queues with outstanding work get a sw irq */ wr32(hw, I40E_VFINT_DYN_CTLN1(que->me), mask); } } } /* Reset when a queue shows hung */ if (hung) goto hung; callout_reset(&sc->timer, hz, ixlv_local_timer, sc); return; hung: device_printf(dev, "WARNING: Resetting!\n"); sc->init_state = IXLV_RESET_REQUIRED; sc->watchdog_events++; ixlv_stop(sc); ixlv_init_locked(sc); } /* ** Note: this routine updates the OS on the link state ** the real check of the hardware only happens with ** a link interrupt. */ void ixlv_update_link_status(struct ixlv_sc *sc) { struct ixl_vsi *vsi = &sc->vsi; struct ifnet *ifp = vsi->ifp; if (sc->link_up){ if (vsi->link_active == FALSE) { if (bootverbose) if_printf(ifp,"Link is Up, %d Gbps\n", (sc->link_speed == I40E_LINK_SPEED_40GB) ? 40:10); vsi->link_active = TRUE; if_link_state_change(ifp, LINK_STATE_UP); } } else { /* Link down */ if (vsi->link_active == TRUE) { if (bootverbose) if_printf(ifp,"Link is Down\n"); if_link_state_change(ifp, LINK_STATE_DOWN); vsi->link_active = FALSE; } } return; } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void ixlv_stop(struct ixlv_sc *sc) { struct ifnet *ifp; int start; ifp = sc->vsi.ifp; INIT_DBG_IF(ifp, "begin"); IXLV_CORE_LOCK_ASSERT(sc); ixl_vc_flush(&sc->vc_mgr); ixlv_disable_queues(sc); start = ticks; while ((ifp->if_drv_flags & IFF_DRV_RUNNING) && ((ticks - start) < hz/10)) ixlv_do_adminq_locked(sc); /* Stop the local timer */ callout_stop(&sc->timer); INIT_DBG_IF(ifp, "end"); } /********************************************************************* * * Free all station queue structs. * **********************************************************************/ static void ixlv_free_queues(struct ixl_vsi *vsi) { struct ixlv_sc *sc = (struct ixlv_sc *)vsi->back; struct ixl_queue *que = vsi->queues; for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; if (!mtx_initialized(&txr->mtx)) /* uninitialized */ continue; IXL_TX_LOCK(txr); ixl_free_que_tx(que); if (txr->base) i40e_free_dma_mem(&sc->hw, &txr->dma); IXL_TX_UNLOCK(txr); IXL_TX_LOCK_DESTROY(txr); if (!mtx_initialized(&rxr->mtx)) /* uninitialized */ continue; IXL_RX_LOCK(rxr); ixl_free_que_rx(que); if (rxr->base) i40e_free_dma_mem(&sc->hw, &rxr->dma); IXL_RX_UNLOCK(rxr); IXL_RX_LOCK_DESTROY(rxr); } free(vsi->queues, M_DEVBUF); } static void ixlv_config_rss_reg(struct ixlv_sc *sc) { struct i40e_hw *hw = &sc->hw; struct ixl_vsi *vsi = &sc->vsi; u32 lut = 0; u64 set_hena = 0, hena; int i, j, que_id; u32 rss_seed[IXL_RSS_KEY_SIZE_REG]; #ifdef RSS u32 rss_hash_config; #endif /* Don't set up RSS if using a single queue */ if (vsi->num_queues == 1) { wr32(hw, I40E_VFQF_HENA(0), 0); wr32(hw, I40E_VFQF_HENA(1), 0); ixl_flush(hw); return; } #ifdef RSS /* Fetch the configured RSS key */ rss_getkey((uint8_t *) &rss_seed); #else ixl_get_default_rss_key(rss_seed); #endif /* Fill out hash function seed */ for (i = 0; i < IXL_RSS_KEY_SIZE_REG; i++) wr32(hw, I40E_VFQF_HKEY(i), rss_seed[i]); /* Enable PCTYPES for RSS: */ #ifdef RSS rss_hash_config = rss_gethashconfig(); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP); #else set_hena = IXL_DEFAULT_RSS_HENA_XL710; #endif hena = (u64)rd32(hw, I40E_VFQF_HENA(0)) | ((u64)rd32(hw, I40E_VFQF_HENA(1)) << 32); hena |= set_hena; wr32(hw, I40E_VFQF_HENA(0), (u32)hena); wr32(hw, I40E_VFQF_HENA(1), (u32)(hena >> 32)); /* Populate the LUT with max no. of queues in round robin fashion */ for (i = 0, j = 0; i < IXL_RSS_VSI_LUT_SIZE; i++, j++) { if (j == vsi->num_queues) j = 0; #ifdef RSS /* * Fetch the RSS bucket id for the given indirection entry. * Cap it at the number of configured buckets (which is * num_queues.) */ que_id = rss_get_indirection_to_bucket(i); que_id = que_id % vsi->num_queues; #else que_id = j; #endif /* lut = 4-byte sliding window of 4 lut entries */ lut = (lut << 8) | (que_id & IXL_RSS_VF_LUT_ENTRY_MASK); /* On i = 3, we have 4 entries in lut; write to the register */ if ((i & 3) == 3) { wr32(hw, I40E_VFQF_HLUT(i >> 2), lut); DDPRINTF(sc->dev, "HLUT(%2d): %#010x", i, lut); } } ixl_flush(hw); } static void ixlv_config_rss_pf(struct ixlv_sc *sc) { ixl_vc_enqueue(&sc->vc_mgr, &sc->config_rss_key_cmd, IXLV_FLAG_AQ_CONFIG_RSS_KEY, ixl_init_cmd_complete, sc); ixl_vc_enqueue(&sc->vc_mgr, &sc->set_rss_hena_cmd, IXLV_FLAG_AQ_SET_RSS_HENA, ixl_init_cmd_complete, sc); ixl_vc_enqueue(&sc->vc_mgr, &sc->config_rss_lut_cmd, IXLV_FLAG_AQ_CONFIG_RSS_LUT, ixl_init_cmd_complete, sc); } /* ** ixlv_config_rss - setup RSS ** ** RSS keys and table are cleared on VF reset. */ static void ixlv_config_rss(struct ixlv_sc *sc) { if (sc->vf_res->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG) { DDPRINTF(sc->dev, "Setting up RSS using VF registers..."); ixlv_config_rss_reg(sc); } else if (sc->vf_res->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) { DDPRINTF(sc->dev, "Setting up RSS using messages to PF..."); ixlv_config_rss_pf(sc); } else device_printf(sc->dev, "VF does not support RSS capability sent by PF.\n"); } /* ** This routine refreshes vlan filters, called by init ** it scans the filter table and then updates the AQ */ static void ixlv_setup_vlan_filters(struct ixlv_sc *sc) { struct ixl_vsi *vsi = &sc->vsi; struct ixlv_vlan_filter *f; int cnt = 0; if (vsi->num_vlans == 0) return; /* ** Scan the filter table for vlan entries, ** and if found call for the AQ update. */ SLIST_FOREACH(f, sc->vlan_filters, next) if (f->flags & IXL_FILTER_ADD) cnt++; if (cnt > 0) ixl_vc_enqueue(&sc->vc_mgr, &sc->add_vlan_cmd, IXLV_FLAG_AQ_ADD_VLAN_FILTER, ixl_init_cmd_complete, sc); } /* ** This routine adds new MAC filters to the sc's list; ** these are later added in hardware by sending a virtual ** channel message. */ static int ixlv_add_mac_filter(struct ixlv_sc *sc, u8 *macaddr, u16 flags) { struct ixlv_mac_filter *f; /* Does one already exist? */ f = ixlv_find_mac_filter(sc, macaddr); if (f != NULL) { IDPRINTF(sc->vsi.ifp, "exists: " MAC_FORMAT, MAC_FORMAT_ARGS(macaddr)); return (EEXIST); } /* If not, get a new empty filter */ f = ixlv_get_mac_filter(sc); if (f == NULL) { if_printf(sc->vsi.ifp, "%s: no filters available!!\n", __func__); return (ENOMEM); } IDPRINTF(sc->vsi.ifp, "marked: " MAC_FORMAT, MAC_FORMAT_ARGS(macaddr)); bcopy(macaddr, f->macaddr, ETHER_ADDR_LEN); f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); f->flags |= flags; return (0); } /* ** Marks a MAC filter for deletion. */ static int ixlv_del_mac_filter(struct ixlv_sc *sc, u8 *macaddr) { struct ixlv_mac_filter *f; f = ixlv_find_mac_filter(sc, macaddr); if (f == NULL) return (ENOENT); f->flags |= IXL_FILTER_DEL; return (0); } /* ** Tasklet handler for MSIX Adminq interrupts ** - done outside interrupt context since it might sleep */ static void ixlv_do_adminq(void *context, int pending) { struct ixlv_sc *sc = context; mtx_lock(&sc->mtx); ixlv_do_adminq_locked(sc); mtx_unlock(&sc->mtx); return; } static void ixlv_do_adminq_locked(struct ixlv_sc *sc) { struct i40e_hw *hw = &sc->hw; struct i40e_arq_event_info event; struct i40e_virtchnl_msg *v_msg; device_t dev = sc->dev; u16 result = 0; u32 reg, oldreg; i40e_status ret; bool aq_error = false; IXLV_CORE_LOCK_ASSERT(sc); event.buf_len = IXL_AQ_BUF_SZ; event.msg_buf = sc->aq_buffer; v_msg = (struct i40e_virtchnl_msg *)&event.desc; do { ret = i40e_clean_arq_element(hw, &event, &result); if (ret) break; ixlv_vc_completion(sc, v_msg->v_opcode, v_msg->v_retval, event.msg_buf, event.msg_len); if (result != 0) bzero(event.msg_buf, IXL_AQ_BUF_SZ); } while (result); /* check for Admin queue errors */ oldreg = reg = rd32(hw, hw->aq.arq.len); if (reg & I40E_VF_ARQLEN1_ARQVFE_MASK) { device_printf(dev, "ARQ VF Error detected\n"); reg &= ~I40E_VF_ARQLEN1_ARQVFE_MASK; aq_error = true; } if (reg & I40E_VF_ARQLEN1_ARQOVFL_MASK) { device_printf(dev, "ARQ Overflow Error detected\n"); reg &= ~I40E_VF_ARQLEN1_ARQOVFL_MASK; aq_error = true; } if (reg & I40E_VF_ARQLEN1_ARQCRIT_MASK) { device_printf(dev, "ARQ Critical Error detected\n"); reg &= ~I40E_VF_ARQLEN1_ARQCRIT_MASK; aq_error = true; } if (oldreg != reg) wr32(hw, hw->aq.arq.len, reg); oldreg = reg = rd32(hw, hw->aq.asq.len); if (reg & I40E_VF_ATQLEN1_ATQVFE_MASK) { device_printf(dev, "ASQ VF Error detected\n"); reg &= ~I40E_VF_ATQLEN1_ATQVFE_MASK; aq_error = true; } if (reg & I40E_VF_ATQLEN1_ATQOVFL_MASK) { device_printf(dev, "ASQ Overflow Error detected\n"); reg &= ~I40E_VF_ATQLEN1_ATQOVFL_MASK; aq_error = true; } if (reg & I40E_VF_ATQLEN1_ATQCRIT_MASK) { device_printf(dev, "ASQ Critical Error detected\n"); reg &= ~I40E_VF_ATQLEN1_ATQCRIT_MASK; aq_error = true; } if (oldreg != reg) wr32(hw, hw->aq.asq.len, reg); if (aq_error) { /* Need to reset adapter */ device_printf(dev, "WARNING: Resetting!\n"); sc->init_state = IXLV_RESET_REQUIRED; ixlv_stop(sc); ixlv_init_locked(sc); } ixlv_enable_adminq_irq(hw); } static void ixlv_add_sysctls(struct ixlv_sc *sc) { device_t dev = sc->dev; struct ixl_vsi *vsi = &sc->vsi; struct i40e_eth_stats *es = &vsi->eth_stats; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct sysctl_oid *vsi_node, *queue_node; struct sysctl_oid_list *vsi_list, *queue_list; #define QUEUE_NAME_LEN 32 char queue_namebuf[QUEUE_NAME_LEN]; struct ixl_queue *queues = vsi->queues; struct tx_ring *txr; struct rx_ring *rxr; /* Driver statistics sysctls */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &sc->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "admin_irq", CTLFLAG_RD, &sc->admin_irq, "Admin Queue IRQ Handled"); /* VSI statistics sysctls */ vsi_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "vsi", CTLFLAG_RD, NULL, "VSI-specific statistics"); vsi_list = SYSCTL_CHILDREN(vsi_node); struct ixl_sysctl_info ctls[] = { {&es->rx_bytes, "good_octets_rcvd", "Good Octets Received"}, {&es->rx_unicast, "ucast_pkts_rcvd", "Unicast Packets Received"}, {&es->rx_multicast, "mcast_pkts_rcvd", "Multicast Packets Received"}, {&es->rx_broadcast, "bcast_pkts_rcvd", "Broadcast Packets Received"}, {&es->rx_discards, "rx_discards", "Discarded RX packets"}, {&es->rx_unknown_protocol, "rx_unknown_proto", "RX unknown protocol packets"}, {&es->tx_bytes, "good_octets_txd", "Good Octets Transmitted"}, {&es->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted"}, {&es->tx_multicast, "mcast_pkts_txd", "Multicast Packets Transmitted"}, {&es->tx_broadcast, "bcast_pkts_txd", "Broadcast Packets Transmitted"}, {&es->tx_errors, "tx_errors", "TX packet errors"}, // end {0,0,0} }; struct ixl_sysctl_info *entry = ctls; while (entry->stat != NULL) { SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, entry->name, CTLFLAG_RD, entry->stat, entry->description); entry++; } /* Queue sysctls */ for (int q = 0; q < vsi->num_queues; q++) { snprintf(queue_namebuf, QUEUE_NAME_LEN, "que%d", q); queue_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, queue_namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); txr = &(queues[q].txr); rxr = &(queues[q].rxr); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &(queues[q].mbuf_defrag_failed), "m_defrag() failed"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "dropped", CTLFLAG_RD, &(queues[q].dropped_pkts), "Driver dropped packets"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "irqs", CTLFLAG_RD, &(queues[q].irqs), "irqs on this queue"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tso_tx", CTLFLAG_RD, &(queues[q].tso), "TSO"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_dmamap_failed", CTLFLAG_RD, &(queues[q].tx_dmamap_failed), "Driver tx dma failure in xmit"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &(txr->no_desc), "Queue No Descriptor Available"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &(txr->total_packets), "Queue Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, &(txr->tx_bytes), "Queue Bytes Transmitted"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &(rxr->rx_packets), "Queue Packets Received"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &(rxr->rx_bytes), "Queue Bytes Received"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_itr", CTLFLAG_RD, &(rxr->itr), 0, "Queue Rx ITR Interval"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "tx_itr", CTLFLAG_RD, &(txr->itr), 0, "Queue Tx ITR Interval"); #ifdef IXL_DEBUG /* Examine queue state */ SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "qtx_head", CTLTYPE_UINT | CTLFLAG_RD, &queues[q], sizeof(struct ixl_queue), ixlv_sysctl_qtx_tail_handler, "IU", "Queue Transmit Descriptor Tail"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "qrx_head", CTLTYPE_UINT | CTLFLAG_RD, &queues[q], sizeof(struct ixl_queue), ixlv_sysctl_qrx_tail_handler, "IU", "Queue Receive Descriptor Tail"); SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "watchdog_timer", CTLFLAG_RD, &(txr.watchdog_timer), 0, "Ticks before watchdog event is triggered"); #endif } } static void ixlv_init_filters(struct ixlv_sc *sc) { sc->mac_filters = malloc(sizeof(struct ixlv_mac_filter), M_DEVBUF, M_NOWAIT | M_ZERO); SLIST_INIT(sc->mac_filters); sc->vlan_filters = malloc(sizeof(struct ixlv_vlan_filter), M_DEVBUF, M_NOWAIT | M_ZERO); SLIST_INIT(sc->vlan_filters); return; } static void ixlv_free_filters(struct ixlv_sc *sc) { struct ixlv_mac_filter *f; struct ixlv_vlan_filter *v; while (!SLIST_EMPTY(sc->mac_filters)) { f = SLIST_FIRST(sc->mac_filters); SLIST_REMOVE_HEAD(sc->mac_filters, next); free(f, M_DEVBUF); } while (!SLIST_EMPTY(sc->vlan_filters)) { v = SLIST_FIRST(sc->vlan_filters); SLIST_REMOVE_HEAD(sc->vlan_filters, next); free(v, M_DEVBUF); } return; } #ifdef IXL_DEBUG /** * ixlv_sysctl_qtx_tail_handler * Retrieves I40E_QTX_TAIL1 value from hardware * for a sysctl. */ static int ixlv_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS) { struct ixl_queue *que; int error; u32 val; que = ((struct ixl_queue *)oidp->oid_arg1); if (!que) return 0; val = rd32(que->vsi->hw, que->txr.tail); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /** * ixlv_sysctl_qrx_tail_handler * Retrieves I40E_QRX_TAIL1 value from hardware * for a sysctl. */ static int ixlv_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS) { struct ixl_queue *que; int error; u32 val; que = ((struct ixl_queue *)oidp->oid_arg1); if (!que) return 0; val = rd32(que->vsi->hw, que->rxr.tail); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } #endif Index: head/sys/dev/ixl/ixl_pf_iov.c =================================================================== --- head/sys/dev/ixl/ixl_pf_iov.c (revision 327827) +++ head/sys/dev/ixl/ixl_pf_iov.c (revision 327828) @@ -1,1865 +1,1865 @@ /****************************************************************************** Copyright (c) 2013-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "ixl_pf_iov.h" /* Private functions */ static void ixl_vf_map_vsi_queue(struct i40e_hw *hw, struct ixl_vf *vf, int qnum, uint32_t val); static void ixl_vf_disable_queue_intr(struct i40e_hw *hw, uint32_t vfint_reg); static void ixl_vf_unregister_intr(struct i40e_hw *hw, uint32_t vpint_reg); static bool ixl_zero_mac(const uint8_t *addr); static bool ixl_bcast_mac(const uint8_t *addr); static int ixl_vc_opcode_level(uint16_t opcode); static int ixl_vf_mac_valid(struct ixl_vf *vf, const uint8_t *addr); static int ixl_vf_alloc_vsi(struct ixl_pf *pf, struct ixl_vf *vf); static int ixl_vf_setup_vsi(struct ixl_pf *pf, struct ixl_vf *vf); static void ixl_vf_map_queues(struct ixl_pf *pf, struct ixl_vf *vf); static void ixl_vf_vsi_release(struct ixl_pf *pf, struct ixl_vsi *vsi); static void ixl_vf_release_resources(struct ixl_pf *pf, struct ixl_vf *vf); static int ixl_flush_pcie(struct ixl_pf *pf, struct ixl_vf *vf); static void ixl_reset_vf(struct ixl_pf *pf, struct ixl_vf *vf); static void ixl_reinit_vf(struct ixl_pf *pf, struct ixl_vf *vf); static void ixl_send_vf_msg(struct ixl_pf *pf, struct ixl_vf *vf, uint16_t op, enum i40e_status_code status, void *msg, uint16_t len); static void ixl_send_vf_ack(struct ixl_pf *pf, struct ixl_vf *vf, uint16_t op); static void ixl_send_vf_nack_msg(struct ixl_pf *pf, struct ixl_vf *vf, uint16_t op, enum i40e_status_code status, const char *file, int line); static void ixl_vf_version_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size); static void ixl_vf_reset_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size); static void ixl_vf_get_resources_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size); static int ixl_vf_config_tx_queue(struct ixl_pf *pf, struct ixl_vf *vf, struct i40e_virtchnl_txq_info *info); static int ixl_vf_config_rx_queue(struct ixl_pf *pf, struct ixl_vf *vf, struct i40e_virtchnl_rxq_info *info); static void ixl_vf_config_vsi_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size); static void ixl_vf_set_qctl(struct ixl_pf *pf, const struct i40e_virtchnl_vector_map *vector, enum i40e_queue_type cur_type, uint16_t cur_queue, enum i40e_queue_type *last_type, uint16_t *last_queue); static void ixl_vf_config_vector(struct ixl_pf *pf, struct ixl_vf *vf, const struct i40e_virtchnl_vector_map *vector); static void ixl_vf_config_irq_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size); static void ixl_vf_enable_queues_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size); static void ixl_vf_disable_queues_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size); static void ixl_vf_add_mac_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size); static void ixl_vf_del_mac_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size); static enum i40e_status_code ixl_vf_enable_vlan_strip(struct ixl_pf *pf, struct ixl_vf *vf); static void ixl_vf_add_vlan_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size); static void ixl_vf_del_vlan_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size); static void ixl_vf_config_promisc_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size); static void ixl_vf_get_stats_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size); static int ixl_vf_reserve_queues(struct ixl_pf *pf, struct ixl_vf *vf, int num_queues); static int ixl_adminq_err_to_errno(enum i40e_admin_queue_err err); void ixl_initialize_sriov(struct ixl_pf *pf) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; nvlist_t *pf_schema, *vf_schema; int iov_error; /* SR-IOV is only supported when MSI-X is in use. */ if (pf->msix <= 1) return; pf_schema = pci_iov_schema_alloc_node(); vf_schema = pci_iov_schema_alloc_node(); pci_iov_schema_add_unicast_mac(vf_schema, "mac-addr", 0, NULL); pci_iov_schema_add_bool(vf_schema, "mac-anti-spoof", IOV_SCHEMA_HASDEFAULT, TRUE); pci_iov_schema_add_bool(vf_schema, "allow-set-mac", IOV_SCHEMA_HASDEFAULT, FALSE); pci_iov_schema_add_bool(vf_schema, "allow-promisc", IOV_SCHEMA_HASDEFAULT, FALSE); pci_iov_schema_add_uint16(vf_schema, "num-queues", IOV_SCHEMA_HASDEFAULT, max(1, hw->func_caps.num_msix_vectors_vf - 1) % IXLV_MAX_QUEUES); iov_error = pci_iov_attach(dev, pf_schema, vf_schema); if (iov_error != 0) { device_printf(dev, "Failed to initialize SR-IOV (error=%d)\n", iov_error); } else device_printf(dev, "SR-IOV ready\n"); pf->vc_debug_lvl = 1; } /* * Allocate the VSI for a VF. */ static int ixl_vf_alloc_vsi(struct ixl_pf *pf, struct ixl_vf *vf) { device_t dev; struct i40e_hw *hw; struct ixl_vsi *vsi; struct i40e_vsi_context vsi_ctx; int i; enum i40e_status_code code; hw = &pf->hw; vsi = &pf->vsi; dev = pf->dev; vsi_ctx.pf_num = hw->pf_id; vsi_ctx.uplink_seid = pf->veb_seid; vsi_ctx.connection_type = IXL_VSI_DATA_PORT; vsi_ctx.vf_num = hw->func_caps.vf_base_id + vf->vf_num; vsi_ctx.flags = I40E_AQ_VSI_TYPE_VF; bzero(&vsi_ctx.info, sizeof(vsi_ctx.info)); vsi_ctx.info.valid_sections = htole16(I40E_AQ_VSI_PROP_SWITCH_VALID); vsi_ctx.info.switch_id = htole16(0); vsi_ctx.info.valid_sections |= htole16(I40E_AQ_VSI_PROP_SECURITY_VALID); vsi_ctx.info.sec_flags = 0; if (vf->vf_flags & VF_FLAG_MAC_ANTI_SPOOF) vsi_ctx.info.sec_flags |= I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK; vsi_ctx.info.valid_sections |= htole16(I40E_AQ_VSI_PROP_VLAN_VALID); vsi_ctx.info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL | I40E_AQ_VSI_PVLAN_EMOD_NOTHING; vsi_ctx.info.valid_sections |= htole16(I40E_AQ_VSI_PROP_QUEUE_MAP_VALID); vsi_ctx.info.mapping_flags = htole16(I40E_AQ_VSI_QUE_MAP_NONCONTIG); /* ERJ: Only scattered allocation is supported for VFs right now */ for (i = 0; i < vf->qtag.num_active; i++) vsi_ctx.info.queue_mapping[i] = vf->qtag.qidx[i]; for (; i < nitems(vsi_ctx.info.queue_mapping); i++) vsi_ctx.info.queue_mapping[i] = htole16(I40E_AQ_VSI_QUEUE_MASK); vsi_ctx.info.tc_mapping[0] = htole16( (0 << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) | (bsrl(vf->qtag.num_allocated) << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT)); code = i40e_aq_add_vsi(hw, &vsi_ctx, NULL); if (code != I40E_SUCCESS) return (ixl_adminq_err_to_errno(hw->aq.asq_last_status)); vf->vsi.seid = vsi_ctx.seid; vf->vsi.vsi_num = vsi_ctx.vsi_number; // vf->vsi.first_queue = vf->qtag.qidx[0]; vf->vsi.num_queues = vf->qtag.num_active; code = i40e_aq_get_vsi_params(hw, &vsi_ctx, NULL); if (code != I40E_SUCCESS) return (ixl_adminq_err_to_errno(hw->aq.asq_last_status)); code = i40e_aq_config_vsi_bw_limit(hw, vf->vsi.seid, 0, 0, NULL); if (code != I40E_SUCCESS) { device_printf(dev, "Failed to disable BW limit: %d\n", ixl_adminq_err_to_errno(hw->aq.asq_last_status)); return (ixl_adminq_err_to_errno(hw->aq.asq_last_status)); } memcpy(&vf->vsi.info, &vsi_ctx.info, sizeof(vf->vsi.info)); return (0); } static int ixl_vf_setup_vsi(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; int error; hw = &pf->hw; error = ixl_vf_alloc_vsi(pf, vf); if (error != 0) return (error); vf->vsi.hw_filters_add = 0; vf->vsi.hw_filters_del = 0; ixl_add_filter(&vf->vsi, ixl_bcast_addr, IXL_VLAN_ANY); ixl_reconfigure_filters(&vf->vsi); return (0); } static void ixl_vf_map_vsi_queue(struct i40e_hw *hw, struct ixl_vf *vf, int qnum, uint32_t val) { uint32_t qtable; int index, shift; /* * Two queues are mapped in a single register, so we have to do some * gymnastics to convert the queue number into a register index and * shift. */ index = qnum / 2; shift = (qnum % 2) * I40E_VSILAN_QTABLE_QINDEX_1_SHIFT; qtable = i40e_read_rx_ctl(hw, I40E_VSILAN_QTABLE(index, vf->vsi.vsi_num)); qtable &= ~(I40E_VSILAN_QTABLE_QINDEX_0_MASK << shift); qtable |= val << shift; i40e_write_rx_ctl(hw, I40E_VSILAN_QTABLE(index, vf->vsi.vsi_num), qtable); } static void ixl_vf_map_queues(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; uint32_t qtable; int i; hw = &pf->hw; /* * Contiguous mappings aren't actually supported by the hardware, * so we have to use non-contiguous mappings. */ i40e_write_rx_ctl(hw, I40E_VSILAN_QBASE(vf->vsi.vsi_num), I40E_VSILAN_QBASE_VSIQTABLE_ENA_MASK); /* Enable LAN traffic on this VF */ wr32(hw, I40E_VPLAN_MAPENA(vf->vf_num), I40E_VPLAN_MAPENA_TXRX_ENA_MASK); /* Program index of each VF queue into PF queue space * (This is only needed if QTABLE is enabled) */ for (i = 0; i < vf->vsi.num_queues; i++) { qtable = ixl_pf_qidx_from_vsi_qidx(&vf->qtag, i) << I40E_VPLAN_QTABLE_QINDEX_SHIFT; wr32(hw, I40E_VPLAN_QTABLE(i, vf->vf_num), qtable); } for (; i < IXL_MAX_VSI_QUEUES; i++) wr32(hw, I40E_VPLAN_QTABLE(i, vf->vf_num), I40E_VPLAN_QTABLE_QINDEX_MASK); /* Map queues allocated to VF to its VSI; * This mapping matches the VF-wide mapping since the VF * is only given a single VSI */ for (i = 0; i < vf->vsi.num_queues; i++) ixl_vf_map_vsi_queue(hw, vf, i, ixl_pf_qidx_from_vsi_qidx(&vf->qtag, i)); /* Set rest of VSI queues as unused. */ for (; i < IXL_MAX_VSI_QUEUES; i++) ixl_vf_map_vsi_queue(hw, vf, i, I40E_VSILAN_QTABLE_QINDEX_0_MASK); ixl_flush(hw); } static void ixl_vf_vsi_release(struct ixl_pf *pf, struct ixl_vsi *vsi) { struct i40e_hw *hw; hw = &pf->hw; if (vsi->seid == 0) return; i40e_aq_delete_element(hw, vsi->seid, NULL); } static void ixl_vf_disable_queue_intr(struct i40e_hw *hw, uint32_t vfint_reg) { wr32(hw, vfint_reg, I40E_VFINT_DYN_CTLN_CLEARPBA_MASK); ixl_flush(hw); } static void ixl_vf_unregister_intr(struct i40e_hw *hw, uint32_t vpint_reg) { wr32(hw, vpint_reg, I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_MASK | I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK); ixl_flush(hw); } static void ixl_vf_release_resources(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; uint32_t vfint_reg, vpint_reg; int i; hw = &pf->hw; ixl_vf_vsi_release(pf, &vf->vsi); /* Index 0 has a special register. */ ixl_vf_disable_queue_intr(hw, I40E_VFINT_DYN_CTL0(vf->vf_num)); for (i = 1; i < hw->func_caps.num_msix_vectors_vf; i++) { vfint_reg = IXL_VFINT_DYN_CTLN_REG(hw, i , vf->vf_num); ixl_vf_disable_queue_intr(hw, vfint_reg); } /* Index 0 has a special register. */ ixl_vf_unregister_intr(hw, I40E_VPINT_LNKLST0(vf->vf_num)); for (i = 1; i < hw->func_caps.num_msix_vectors_vf; i++) { vpint_reg = IXL_VPINT_LNKLSTN_REG(hw, i, vf->vf_num); ixl_vf_unregister_intr(hw, vpint_reg); } vf->vsi.num_queues = 0; } static int ixl_flush_pcie(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; int i; uint16_t global_vf_num; uint32_t ciad; hw = &pf->hw; global_vf_num = hw->func_caps.vf_base_id + vf->vf_num; wr32(hw, I40E_PF_PCI_CIAA, IXL_PF_PCI_CIAA_VF_DEVICE_STATUS | (global_vf_num << I40E_PF_PCI_CIAA_VF_NUM_SHIFT)); for (i = 0; i < IXL_VF_RESET_TIMEOUT; i++) { ciad = rd32(hw, I40E_PF_PCI_CIAD); if ((ciad & IXL_PF_PCI_CIAD_VF_TRANS_PENDING_MASK) == 0) return (0); DELAY(1); } return (ETIMEDOUT); } static void ixl_reset_vf(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; uint32_t vfrtrig; hw = &pf->hw; vfrtrig = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_num)); vfrtrig |= I40E_VPGEN_VFRTRIG_VFSWR_MASK; wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_num), vfrtrig); ixl_flush(hw); ixl_reinit_vf(pf, vf); } static void ixl_reinit_vf(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; uint32_t vfrstat, vfrtrig; int i, error; hw = &pf->hw; error = ixl_flush_pcie(pf, vf); if (error != 0) device_printf(pf->dev, "Timed out waiting for PCIe activity to stop on VF-%d\n", vf->vf_num); for (i = 0; i < IXL_VF_RESET_TIMEOUT; i++) { DELAY(10); vfrstat = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_num)); if (vfrstat & I40E_VPGEN_VFRSTAT_VFRD_MASK) break; } if (i == IXL_VF_RESET_TIMEOUT) device_printf(pf->dev, "VF %d failed to reset\n", vf->vf_num); wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_num), I40E_VFR_COMPLETED); vfrtrig = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_num)); vfrtrig &= ~I40E_VPGEN_VFRTRIG_VFSWR_MASK; wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_num), vfrtrig); if (vf->vsi.seid != 0) ixl_disable_rings(&vf->vsi); ixl_vf_release_resources(pf, vf); ixl_vf_setup_vsi(pf, vf); ixl_vf_map_queues(pf, vf); wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_num), I40E_VFR_VFACTIVE); ixl_flush(hw); } static int ixl_vc_opcode_level(uint16_t opcode) { switch (opcode) { case I40E_VIRTCHNL_OP_GET_STATS: return (10); default: return (5); } } static void ixl_send_vf_msg(struct ixl_pf *pf, struct ixl_vf *vf, uint16_t op, enum i40e_status_code status, void *msg, uint16_t len) { struct i40e_hw *hw; int global_vf_id; hw = &pf->hw; global_vf_id = hw->func_caps.vf_base_id + vf->vf_num; I40E_VC_DEBUG(pf, ixl_vc_opcode_level(op), "Sending msg (op=%s[%d], status=%d) to VF-%d\n", ixl_vc_opcode_str(op), op, status, vf->vf_num); i40e_aq_send_msg_to_vf(hw, global_vf_id, op, status, msg, len, NULL); } static void ixl_send_vf_ack(struct ixl_pf *pf, struct ixl_vf *vf, uint16_t op) { ixl_send_vf_msg(pf, vf, op, I40E_SUCCESS, NULL, 0); } static void ixl_send_vf_nack_msg(struct ixl_pf *pf, struct ixl_vf *vf, uint16_t op, enum i40e_status_code status, const char *file, int line) { I40E_VC_DEBUG(pf, 1, "Sending NACK (op=%s[%d], err=%s[%d]) to VF-%d from %s:%d\n", ixl_vc_opcode_str(op), op, i40e_stat_str(&pf->hw, status), status, vf->vf_num, file, line); ixl_send_vf_msg(pf, vf, op, status, NULL, 0); } static void ixl_vf_version_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_version_info reply; if (msg_size != sizeof(struct i40e_virtchnl_version_info)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_VERSION, I40E_ERR_PARAM); return; } vf->version = ((struct i40e_virtchnl_version_info *)msg)->minor; reply.major = I40E_VIRTCHNL_VERSION_MAJOR; reply.minor = I40E_VIRTCHNL_VERSION_MINOR; ixl_send_vf_msg(pf, vf, I40E_VIRTCHNL_OP_VERSION, I40E_SUCCESS, &reply, sizeof(reply)); } static void ixl_vf_reset_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { if (msg_size != 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_RESET_VF, I40E_ERR_PARAM); return; } ixl_reset_vf(pf, vf); /* No response to a reset message. */ } static void ixl_vf_get_resources_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_vf_resource reply; if ((vf->version == 0 && msg_size != 0) || (vf->version == 1 && msg_size != 4)) { device_printf(pf->dev, "Invalid GET_VF_RESOURCES message size," " for VF version %d.%d\n", I40E_VIRTCHNL_VERSION_MAJOR, vf->version); i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_GET_VF_RESOURCES, I40E_ERR_PARAM); return; } bzero(&reply, sizeof(reply)); if (vf->version == I40E_VIRTCHNL_VERSION_MINOR_NO_VF_CAPS) reply.vf_offload_flags = I40E_VIRTCHNL_VF_OFFLOAD_L2 | I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG | I40E_VIRTCHNL_VF_OFFLOAD_VLAN; else /* Force VF RSS setup by PF in 1.1+ VFs */ reply.vf_offload_flags = *(u32 *)msg & ( I40E_VIRTCHNL_VF_OFFLOAD_L2 | I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF | I40E_VIRTCHNL_VF_OFFLOAD_VLAN); reply.num_vsis = 1; reply.num_queue_pairs = vf->vsi.num_queues; reply.max_vectors = pf->hw.func_caps.num_msix_vectors_vf; reply.rss_key_size = 52; reply.rss_lut_size = 64; reply.vsi_res[0].vsi_id = vf->vsi.vsi_num; reply.vsi_res[0].vsi_type = I40E_VSI_SRIOV; reply.vsi_res[0].num_queue_pairs = vf->vsi.num_queues; memcpy(reply.vsi_res[0].default_mac_addr, vf->mac, ETHER_ADDR_LEN); ixl_send_vf_msg(pf, vf, I40E_VIRTCHNL_OP_GET_VF_RESOURCES, I40E_SUCCESS, &reply, sizeof(reply)); } static int ixl_vf_config_tx_queue(struct ixl_pf *pf, struct ixl_vf *vf, struct i40e_virtchnl_txq_info *info) { struct i40e_hw *hw; struct i40e_hmc_obj_txq txq; uint16_t global_queue_num, global_vf_num; enum i40e_status_code status; uint32_t qtx_ctl; hw = &pf->hw; global_queue_num = ixl_pf_qidx_from_vsi_qidx(&vf->qtag, info->queue_id); global_vf_num = hw->func_caps.vf_base_id + vf->vf_num; bzero(&txq, sizeof(txq)); DDPRINTF(pf->dev, "VF %d: PF TX queue %d / VF TX queue %d (Global VF %d)\n", vf->vf_num, global_queue_num, info->queue_id, global_vf_num); status = i40e_clear_lan_tx_queue_context(hw, global_queue_num); if (status != I40E_SUCCESS) return (EINVAL); txq.base = info->dma_ring_addr / IXL_TX_CTX_BASE_UNITS; txq.head_wb_ena = info->headwb_enabled; txq.head_wb_addr = info->dma_headwb_addr; txq.qlen = info->ring_len; txq.rdylist = le16_to_cpu(vf->vsi.info.qs_handle[0]); txq.rdylist_act = 0; status = i40e_set_lan_tx_queue_context(hw, global_queue_num, &txq); if (status != I40E_SUCCESS) return (EINVAL); qtx_ctl = I40E_QTX_CTL_VF_QUEUE | (hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) | (global_vf_num << I40E_QTX_CTL_VFVM_INDX_SHIFT); wr32(hw, I40E_QTX_CTL(global_queue_num), qtx_ctl); ixl_flush(hw); ixl_pf_qmgr_mark_queue_configured(&vf->qtag, info->queue_id, true); return (0); } static int ixl_vf_config_rx_queue(struct ixl_pf *pf, struct ixl_vf *vf, struct i40e_virtchnl_rxq_info *info) { struct i40e_hw *hw; struct i40e_hmc_obj_rxq rxq; uint16_t global_queue_num; enum i40e_status_code status; hw = &pf->hw; global_queue_num = ixl_pf_qidx_from_vsi_qidx(&vf->qtag, info->queue_id); bzero(&rxq, sizeof(rxq)); DDPRINTF(pf->dev, "VF %d: PF RX queue %d / VF RX queue %d\n", vf->vf_num, global_queue_num, info->queue_id); if (info->databuffer_size > IXL_VF_MAX_BUFFER) return (EINVAL); if (info->max_pkt_size > IXL_VF_MAX_FRAME || info->max_pkt_size < ETHER_MIN_LEN) return (EINVAL); if (info->splithdr_enabled) { if (info->hdr_size > IXL_VF_MAX_HDR_BUFFER) return (EINVAL); rxq.hsplit_0 = info->rx_split_pos & (I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_L2 | I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_IP | I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_TCP_UDP | I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_SCTP); rxq.hbuff = info->hdr_size >> I40E_RXQ_CTX_HBUFF_SHIFT; rxq.dtype = 2; } status = i40e_clear_lan_rx_queue_context(hw, global_queue_num); if (status != I40E_SUCCESS) return (EINVAL); rxq.base = info->dma_ring_addr / IXL_RX_CTX_BASE_UNITS; rxq.qlen = info->ring_len; rxq.dbuff = info->databuffer_size >> I40E_RXQ_CTX_DBUFF_SHIFT; rxq.dsize = 1; rxq.crcstrip = 1; rxq.l2tsel = 1; rxq.rxmax = info->max_pkt_size; rxq.tphrdesc_ena = 1; rxq.tphwdesc_ena = 1; rxq.tphdata_ena = 1; rxq.tphhead_ena = 1; rxq.lrxqthresh = 2; rxq.prefena = 1; status = i40e_set_lan_rx_queue_context(hw, global_queue_num, &rxq); if (status != I40E_SUCCESS) return (EINVAL); ixl_pf_qmgr_mark_queue_configured(&vf->qtag, info->queue_id, false); return (0); } static void ixl_vf_config_vsi_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_vsi_queue_config_info *info; struct i40e_virtchnl_queue_pair_info *pair; uint16_t expected_msg_size; int i; if (msg_size < sizeof(*info)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } info = msg; if (info->num_queue_pairs == 0 || info->num_queue_pairs > vf->vsi.num_queues) { device_printf(pf->dev, "VF %d: invalid # of qpairs (msg has %d, VSI has %d)\n", vf->vf_num, info->num_queue_pairs, vf->vsi.num_queues); i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } expected_msg_size = sizeof(*info) + info->num_queue_pairs * sizeof(*pair); if (msg_size != expected_msg_size) { device_printf(pf->dev, "VF %d: size of recvd message (%d) does not match expected size (%d)\n", vf->vf_num, msg_size, expected_msg_size); i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } if (info->vsi_id != vf->vsi.vsi_num) { device_printf(pf->dev, "VF %d: VSI id in recvd message (%d) does not match expected id (%d)\n", vf->vf_num, info->vsi_id, vf->vsi.vsi_num); i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } for (i = 0; i < info->num_queue_pairs; i++) { pair = &info->qpair[i]; if (pair->txq.vsi_id != vf->vsi.vsi_num || pair->rxq.vsi_id != vf->vsi.vsi_num || pair->txq.queue_id != pair->rxq.queue_id || pair->txq.queue_id >= vf->vsi.num_queues) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } if (ixl_vf_config_tx_queue(pf, vf, &pair->txq) != 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } if (ixl_vf_config_rx_queue(pf, vf, &pair->rxq) != 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES); } static void ixl_vf_set_qctl(struct ixl_pf *pf, const struct i40e_virtchnl_vector_map *vector, enum i40e_queue_type cur_type, uint16_t cur_queue, enum i40e_queue_type *last_type, uint16_t *last_queue) { uint32_t offset, qctl; uint16_t itr_indx; if (cur_type == I40E_QUEUE_TYPE_RX) { offset = I40E_QINT_RQCTL(cur_queue); itr_indx = vector->rxitr_idx; } else { offset = I40E_QINT_TQCTL(cur_queue); itr_indx = vector->txitr_idx; } qctl = htole32((vector->vector_id << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | (*last_type << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | (*last_queue << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | I40E_QINT_RQCTL_CAUSE_ENA_MASK | (itr_indx << I40E_QINT_RQCTL_ITR_INDX_SHIFT)); wr32(&pf->hw, offset, qctl); *last_type = cur_type; *last_queue = cur_queue; } static void ixl_vf_config_vector(struct ixl_pf *pf, struct ixl_vf *vf, const struct i40e_virtchnl_vector_map *vector) { struct i40e_hw *hw; u_int qindex; enum i40e_queue_type type, last_type; uint32_t lnklst_reg; uint16_t rxq_map, txq_map, cur_queue, last_queue; hw = &pf->hw; rxq_map = vector->rxq_map; txq_map = vector->txq_map; last_queue = IXL_END_OF_INTR_LNKLST; last_type = I40E_QUEUE_TYPE_RX; /* * The datasheet says to optimize performance, RX queues and TX queues * should be interleaved in the interrupt linked list, so we process * both at once here. */ while ((rxq_map != 0) || (txq_map != 0)) { if (txq_map != 0) { qindex = ffs(txq_map) - 1; type = I40E_QUEUE_TYPE_TX; cur_queue = ixl_pf_qidx_from_vsi_qidx(&vf->qtag, qindex); ixl_vf_set_qctl(pf, vector, type, cur_queue, &last_type, &last_queue); txq_map &= ~(1 << qindex); } if (rxq_map != 0) { qindex = ffs(rxq_map) - 1; type = I40E_QUEUE_TYPE_RX; cur_queue = ixl_pf_qidx_from_vsi_qidx(&vf->qtag, qindex); ixl_vf_set_qctl(pf, vector, type, cur_queue, &last_type, &last_queue); rxq_map &= ~(1 << qindex); } } if (vector->vector_id == 0) lnklst_reg = I40E_VPINT_LNKLST0(vf->vf_num); else lnklst_reg = IXL_VPINT_LNKLSTN_REG(hw, vector->vector_id, vf->vf_num); wr32(hw, lnklst_reg, (last_queue << I40E_VPINT_LNKLST0_FIRSTQ_INDX_SHIFT) | (last_type << I40E_VPINT_LNKLST0_FIRSTQ_TYPE_SHIFT)); ixl_flush(hw); } static void ixl_vf_config_irq_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_irq_map_info *map; struct i40e_virtchnl_vector_map *vector; struct i40e_hw *hw; int i, largest_txq, largest_rxq; hw = &pf->hw; if (msg_size < sizeof(*map)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } map = msg; if (map->num_vectors == 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } if (msg_size != sizeof(*map) + map->num_vectors * sizeof(*vector)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } for (i = 0; i < map->num_vectors; i++) { vector = &map->vecmap[i]; if ((vector->vector_id >= hw->func_caps.num_msix_vectors_vf) || vector->vsi_id != vf->vsi.vsi_num) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } if (vector->rxq_map != 0) { largest_rxq = fls(vector->rxq_map) - 1; if (largest_rxq >= vf->vsi.num_queues) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } } if (vector->txq_map != 0) { largest_txq = fls(vector->txq_map) - 1; if (largest_txq >= vf->vsi.num_queues) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } } if (vector->rxitr_idx > IXL_MAX_ITR_IDX || vector->txitr_idx > IXL_MAX_ITR_IDX) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } ixl_vf_config_vector(pf, vf, vector); } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP); } static void ixl_vf_enable_queues_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_queue_select *select; int error = 0; if (msg_size != sizeof(*select)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES, I40E_ERR_PARAM); return; } select = msg; if (select->vsi_id != vf->vsi.vsi_num || select->rx_queues == 0 || select->tx_queues == 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES, I40E_ERR_PARAM); return; } /* Enable TX rings selected by the VF */ for (int i = 0; i < 32; i++) { if ((1 << i) & select->tx_queues) { /* Warn if queue is out of VF allocation range */ if (i >= vf->vsi.num_queues) { device_printf(pf->dev, "VF %d: TX ring %d is outside of VF VSI allocation!\n", vf->vf_num, i); break; } /* Skip this queue if it hasn't been configured */ if (!ixl_pf_qmgr_is_queue_configured(&vf->qtag, i, true)) continue; /* Warn if this queue is already marked as enabled */ if (ixl_pf_qmgr_is_queue_enabled(&vf->qtag, i, true)) device_printf(pf->dev, "VF %d: TX ring %d is already enabled!\n", vf->vf_num, i); error = ixl_enable_tx_ring(pf, &vf->qtag, i); if (error) break; else ixl_pf_qmgr_mark_queue_enabled(&vf->qtag, i, true); } } /* Enable RX rings selected by the VF */ for (int i = 0; i < 32; i++) { if ((1 << i) & select->rx_queues) { /* Warn if queue is out of VF allocation range */ if (i >= vf->vsi.num_queues) { device_printf(pf->dev, "VF %d: RX ring %d is outside of VF VSI allocation!\n", vf->vf_num, i); break; } /* Skip this queue if it hasn't been configured */ if (!ixl_pf_qmgr_is_queue_configured(&vf->qtag, i, false)) continue; /* Warn if this queue is already marked as enabled */ if (ixl_pf_qmgr_is_queue_enabled(&vf->qtag, i, false)) device_printf(pf->dev, "VF %d: RX ring %d is already enabled!\n", vf->vf_num, i); error = ixl_enable_rx_ring(pf, &vf->qtag, i); if (error) break; else ixl_pf_qmgr_mark_queue_enabled(&vf->qtag, i, false); } } if (error) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES, I40E_ERR_TIMEOUT); return; } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES); } static void ixl_vf_disable_queues_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_queue_select *select; int error = 0; if (msg_size != sizeof(*select)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_DISABLE_QUEUES, I40E_ERR_PARAM); return; } select = msg; if (select->vsi_id != vf->vsi.vsi_num || select->rx_queues == 0 || select->tx_queues == 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_DISABLE_QUEUES, I40E_ERR_PARAM); return; } /* Disable TX rings selected by the VF */ for (int i = 0; i < 32; i++) { if ((1 << i) & select->tx_queues) { /* Warn if queue is out of VF allocation range */ if (i >= vf->vsi.num_queues) { device_printf(pf->dev, "VF %d: TX ring %d is outside of VF VSI allocation!\n", vf->vf_num, i); break; } /* Skip this queue if it hasn't been configured */ if (!ixl_pf_qmgr_is_queue_configured(&vf->qtag, i, true)) continue; /* Warn if this queue is already marked as disabled */ if (!ixl_pf_qmgr_is_queue_enabled(&vf->qtag, i, true)) { device_printf(pf->dev, "VF %d: TX ring %d is already disabled!\n", vf->vf_num, i); continue; } error = ixl_disable_tx_ring(pf, &vf->qtag, i); if (error) break; else ixl_pf_qmgr_mark_queue_disabled(&vf->qtag, i, true); } } /* Enable RX rings selected by the VF */ for (int i = 0; i < 32; i++) { if ((1 << i) & select->rx_queues) { /* Warn if queue is out of VF allocation range */ if (i >= vf->vsi.num_queues) { device_printf(pf->dev, "VF %d: RX ring %d is outside of VF VSI allocation!\n", vf->vf_num, i); break; } /* Skip this queue if it hasn't been configured */ if (!ixl_pf_qmgr_is_queue_configured(&vf->qtag, i, false)) continue; /* Warn if this queue is already marked as disabled */ if (!ixl_pf_qmgr_is_queue_enabled(&vf->qtag, i, false)) { device_printf(pf->dev, "VF %d: RX ring %d is already disabled!\n", vf->vf_num, i); continue; } error = ixl_disable_rx_ring(pf, &vf->qtag, i); if (error) break; else ixl_pf_qmgr_mark_queue_disabled(&vf->qtag, i, false); } } if (error) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_DISABLE_QUEUES, I40E_ERR_TIMEOUT); return; } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_DISABLE_QUEUES); } static bool ixl_zero_mac(const uint8_t *addr) { uint8_t zero[ETHER_ADDR_LEN] = {0, 0, 0, 0, 0, 0}; return (cmp_etheraddr(addr, zero)); } static bool ixl_bcast_mac(const uint8_t *addr) { return (cmp_etheraddr(addr, ixl_bcast_addr)); } static int ixl_vf_mac_valid(struct ixl_vf *vf, const uint8_t *addr) { if (ixl_zero_mac(addr) || ixl_bcast_mac(addr)) return (EINVAL); /* * If the VF is not allowed to change its MAC address, don't let it * set a MAC filter for an address that is not a multicast address and * is not its assigned MAC. */ if (!(vf->vf_flags & VF_FLAG_SET_MAC_CAP) && !(ETHER_IS_MULTICAST(addr) || cmp_etheraddr(addr, vf->mac))) return (EPERM); return (0); } static void ixl_vf_add_mac_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_ether_addr_list *addr_list; struct i40e_virtchnl_ether_addr *addr; struct ixl_vsi *vsi; int i; size_t expected_size; vsi = &vf->vsi; if (msg_size < sizeof(*addr_list)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } addr_list = msg; expected_size = sizeof(*addr_list) + addr_list->num_elements * sizeof(*addr); if (addr_list->num_elements == 0 || addr_list->vsi_id != vsi->vsi_num || msg_size != expected_size) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } for (i = 0; i < addr_list->num_elements; i++) { if (ixl_vf_mac_valid(vf, addr_list->list[i].addr) != 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } } for (i = 0; i < addr_list->num_elements; i++) { addr = &addr_list->list[i]; ixl_add_filter(vsi, addr->addr, IXL_VLAN_ANY); } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS); } static void ixl_vf_del_mac_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_ether_addr_list *addr_list; struct i40e_virtchnl_ether_addr *addr; size_t expected_size; int i; if (msg_size < sizeof(*addr_list)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } addr_list = msg; expected_size = sizeof(*addr_list) + addr_list->num_elements * sizeof(*addr); if (addr_list->num_elements == 0 || addr_list->vsi_id != vf->vsi.vsi_num || msg_size != expected_size) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } for (i = 0; i < addr_list->num_elements; i++) { addr = &addr_list->list[i]; if (ixl_zero_mac(addr->addr) || ixl_bcast_mac(addr->addr)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } } for (i = 0; i < addr_list->num_elements; i++) { addr = &addr_list->list[i]; ixl_del_filter(&vf->vsi, addr->addr, IXL_VLAN_ANY); } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS); } static enum i40e_status_code ixl_vf_enable_vlan_strip(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_vsi_context vsi_ctx; vsi_ctx.seid = vf->vsi.seid; bzero(&vsi_ctx.info, sizeof(vsi_ctx.info)); vsi_ctx.info.valid_sections = htole16(I40E_AQ_VSI_PROP_VLAN_VALID); vsi_ctx.info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL | I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH; return (i40e_aq_update_vsi_params(&pf->hw, &vsi_ctx, NULL)); } static void ixl_vf_add_vlan_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_vlan_filter_list *filter_list; enum i40e_status_code code; size_t expected_size; int i; if (msg_size < sizeof(*filter_list)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } filter_list = msg; expected_size = sizeof(*filter_list) + filter_list->num_elements * sizeof(uint16_t); if (filter_list->num_elements == 0 || filter_list->vsi_id != vf->vsi.vsi_num || msg_size != expected_size) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } if (!(vf->vf_flags & VF_FLAG_VLAN_CAP)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } for (i = 0; i < filter_list->num_elements; i++) { if (filter_list->vlan_id[i] > EVL_VLID_MASK) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } } code = ixl_vf_enable_vlan_strip(pf, vf); if (code != I40E_SUCCESS) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); } for (i = 0; i < filter_list->num_elements; i++) ixl_add_filter(&vf->vsi, vf->mac, filter_list->vlan_id[i]); ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN); } static void ixl_vf_del_vlan_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_vlan_filter_list *filter_list; int i; size_t expected_size; if (msg_size < sizeof(*filter_list)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_DEL_VLAN, I40E_ERR_PARAM); return; } filter_list = msg; expected_size = sizeof(*filter_list) + filter_list->num_elements * sizeof(uint16_t); if (filter_list->num_elements == 0 || filter_list->vsi_id != vf->vsi.vsi_num || msg_size != expected_size) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_DEL_VLAN, I40E_ERR_PARAM); return; } for (i = 0; i < filter_list->num_elements; i++) { if (filter_list->vlan_id[i] > EVL_VLID_MASK) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } } if (!(vf->vf_flags & VF_FLAG_VLAN_CAP)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } for (i = 0; i < filter_list->num_elements; i++) ixl_del_filter(&vf->vsi, vf->mac, filter_list->vlan_id[i]); ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_DEL_VLAN); } static void ixl_vf_config_promisc_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_promisc_info *info; enum i40e_status_code code; if (msg_size != sizeof(*info)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, I40E_ERR_PARAM); return; } if (!(vf->vf_flags & VF_FLAG_PROMISC_CAP)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, I40E_ERR_PARAM); return; } info = msg; if (info->vsi_id != vf->vsi.vsi_num) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, I40E_ERR_PARAM); return; } code = i40e_aq_set_vsi_unicast_promiscuous(&pf->hw, info->vsi_id, info->flags & I40E_FLAG_VF_UNICAST_PROMISC, NULL, TRUE); if (code != I40E_SUCCESS) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, code); return; } code = i40e_aq_set_vsi_multicast_promiscuous(&pf->hw, info->vsi_id, info->flags & I40E_FLAG_VF_MULTICAST_PROMISC, NULL); if (code != I40E_SUCCESS) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, code); return; } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE); } static void ixl_vf_get_stats_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_queue_select *queue; if (msg_size != sizeof(*queue)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_GET_STATS, I40E_ERR_PARAM); return; } queue = msg; if (queue->vsi_id != vf->vsi.vsi_num) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_GET_STATS, I40E_ERR_PARAM); return; } ixl_update_eth_stats(&vf->vsi); ixl_send_vf_msg(pf, vf, I40E_VIRTCHNL_OP_GET_STATS, I40E_SUCCESS, &vf->vsi.eth_stats, sizeof(vf->vsi.eth_stats)); } static void ixl_vf_config_rss_key_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_hw *hw; struct i40e_virtchnl_rss_key *key; struct i40e_aqc_get_set_rss_key_data key_data; enum i40e_status_code status; hw = &pf->hw; if (msg_size < sizeof(*key)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_RSS_KEY, I40E_ERR_PARAM); return; } key = msg; if (key->key_len > 52) { device_printf(pf->dev, "VF %d: Key size in msg (%d) is greater than max key size (%d)\n", vf->vf_num, key->key_len, 52); i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_RSS_KEY, I40E_ERR_PARAM); return; } if (key->vsi_id != vf->vsi.vsi_num) { device_printf(pf->dev, "VF %d: VSI id in recvd message (%d) does not match expected id (%d)\n", vf->vf_num, key->vsi_id, vf->vsi.vsi_num); i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_RSS_KEY, I40E_ERR_PARAM); return; } /* Fill out hash using MAC-dependent method */ if (hw->mac.type == I40E_MAC_X722) { bzero(&key_data, sizeof(key_data)); if (key->key_len <= 40) bcopy(key->key, key_data.standard_rss_key, key->key_len); else { bcopy(key->key, key_data.standard_rss_key, 40); bcopy(&key->key[40], key_data.extended_hash_key, key->key_len - 40); } status = i40e_aq_set_rss_key(hw, vf->vsi.vsi_num, &key_data); if (status) { device_printf(pf->dev, "i40e_aq_set_rss_key status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_RSS_KEY, I40E_ERR_ADMIN_QUEUE_ERROR); return; } } else { for (int i = 0; i < (key->key_len / 4); i++) i40e_write_rx_ctl(hw, I40E_VFQF_HKEY1(i, IXL_GLOBAL_VF_NUM(hw, vf)), ((u32 *)key->key)[i]); } DDPRINTF(pf->dev, "VF %d: Programmed key starting with 0x%x ok!", vf->vf_num, key->key[0]); ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_RSS_KEY); } static void ixl_vf_config_rss_lut_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_hw *hw; struct i40e_virtchnl_rss_lut *lut; enum i40e_status_code status; hw = &pf->hw; if (msg_size < sizeof(*lut)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_RSS_LUT, I40E_ERR_PARAM); return; } lut = msg; if (lut->lut_entries > 64) { device_printf(pf->dev, "VF %d: # of LUT entries in msg (%d) is greater than max (%d)\n", vf->vf_num, lut->lut_entries, 64); i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_RSS_LUT, I40E_ERR_PARAM); return; } if (lut->vsi_id != vf->vsi.vsi_num) { device_printf(pf->dev, "VF %d: VSI id in recvd message (%d) does not match expected id (%d)\n", vf->vf_num, lut->vsi_id, vf->vsi.vsi_num); i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_RSS_LUT, I40E_ERR_PARAM); return; } /* Fill out LUT using MAC-dependent method */ if (hw->mac.type == I40E_MAC_X722) { status = i40e_aq_set_rss_lut(hw, vf->vsi.vsi_num, false, lut->lut, lut->lut_entries); if (status) { device_printf(pf->dev, "i40e_aq_set_rss_lut status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_RSS_LUT, I40E_ERR_ADMIN_QUEUE_ERROR); return; } } else { for (int i = 0; i < (lut->lut_entries / 4); i++) i40e_write_rx_ctl(hw, I40E_VFQF_HLUT1(i, IXL_GLOBAL_VF_NUM(hw, vf)), ((u32 *)lut->lut)[i]); } DDPRINTF(pf->dev, "VF %d: Programmed LUT starting with 0x%x and length %d ok!", vf->vf_num, lut->lut[0], lut->lut_entries); ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_RSS_LUT); } static void ixl_vf_set_rss_hena_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_hw *hw; struct i40e_virtchnl_rss_hena *hena; hw = &pf->hw; if (msg_size < sizeof(*hena)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_SET_RSS_HENA, I40E_ERR_PARAM); return; } hena = msg; /* Set HENA */ i40e_write_rx_ctl(hw, I40E_VFQF_HENA1(0, IXL_GLOBAL_VF_NUM(hw, vf)), (u32)hena->hena); i40e_write_rx_ctl(hw, I40E_VFQF_HENA1(1, IXL_GLOBAL_VF_NUM(hw, vf)), (u32)(hena->hena >> 32)); DDPRINTF(pf->dev, "VF %d: Programmed HENA with 0x%016lx", vf->vf_num, hena->hena); ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_SET_RSS_HENA); } void ixl_handle_vf_msg(struct ixl_pf *pf, struct i40e_arq_event_info *event) { struct ixl_vf *vf; void *msg; uint16_t vf_num, msg_size; uint32_t opcode; vf_num = le16toh(event->desc.retval) - pf->hw.func_caps.vf_base_id; opcode = le32toh(event->desc.cookie_high); if (vf_num >= pf->num_vfs) { device_printf(pf->dev, "Got msg from illegal VF: %d\n", vf_num); return; } vf = &pf->vfs[vf_num]; msg = event->msg_buf; msg_size = event->msg_len; I40E_VC_DEBUG(pf, ixl_vc_opcode_level(opcode), "Got msg %s(%d) from%sVF-%d of size %d\n", ixl_vc_opcode_str(opcode), opcode, (vf->vf_flags & VF_FLAG_ENABLED) ? " " : " disabled ", vf_num, msg_size); /* This must be a stray msg from a previously destroyed VF. */ if (!(vf->vf_flags & VF_FLAG_ENABLED)) return; switch (opcode) { case I40E_VIRTCHNL_OP_VERSION: ixl_vf_version_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_RESET_VF: ixl_vf_reset_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_GET_VF_RESOURCES: ixl_vf_get_resources_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES: ixl_vf_config_vsi_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP: ixl_vf_config_irq_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_ENABLE_QUEUES: ixl_vf_enable_queues_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_DISABLE_QUEUES: ixl_vf_disable_queues_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS: ixl_vf_add_mac_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS: ixl_vf_del_mac_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_ADD_VLAN: ixl_vf_add_vlan_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_DEL_VLAN: ixl_vf_del_vlan_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE: ixl_vf_config_promisc_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_GET_STATS: ixl_vf_get_stats_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_CONFIG_RSS_KEY: ixl_vf_config_rss_key_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_CONFIG_RSS_LUT: ixl_vf_config_rss_lut_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_SET_RSS_HENA: ixl_vf_set_rss_hena_msg(pf, vf, msg, msg_size); break; /* These two opcodes have been superseded by CONFIG_VSI_QUEUES. */ case I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE: case I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE: default: i40e_send_vf_nack(pf, vf, opcode, I40E_ERR_NOT_IMPLEMENTED); break; } } /* Handle any VFs that have reset themselves via a Function Level Reset(FLR). */ void ixl_handle_vflr(void *arg, int pending) { struct ixl_pf *pf; struct ixl_vf *vf; struct i40e_hw *hw; uint16_t global_vf_num; uint32_t vflrstat_index, vflrstat_mask, vflrstat, icr0; int i; pf = arg; hw = &pf->hw; IXL_PF_LOCK(pf); for (i = 0; i < pf->num_vfs; i++) { global_vf_num = hw->func_caps.vf_base_id + i; vf = &pf->vfs[i]; if (!(vf->vf_flags & VF_FLAG_ENABLED)) continue; vflrstat_index = IXL_GLGEN_VFLRSTAT_INDEX(global_vf_num); vflrstat_mask = IXL_GLGEN_VFLRSTAT_MASK(global_vf_num); vflrstat = rd32(hw, I40E_GLGEN_VFLRSTAT(vflrstat_index)); if (vflrstat & vflrstat_mask) { wr32(hw, I40E_GLGEN_VFLRSTAT(vflrstat_index), vflrstat_mask); ixl_reinit_vf(pf, vf); } } icr0 = rd32(hw, I40E_PFINT_ICR0_ENA); icr0 |= I40E_PFINT_ICR0_ENA_VFLR_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, icr0); ixl_flush(hw); IXL_PF_UNLOCK(pf); } static int ixl_adminq_err_to_errno(enum i40e_admin_queue_err err) { switch (err) { case I40E_AQ_RC_EPERM: return (EPERM); case I40E_AQ_RC_ENOENT: return (ENOENT); case I40E_AQ_RC_ESRCH: return (ESRCH); case I40E_AQ_RC_EINTR: return (EINTR); case I40E_AQ_RC_EIO: return (EIO); case I40E_AQ_RC_ENXIO: return (ENXIO); case I40E_AQ_RC_E2BIG: return (E2BIG); case I40E_AQ_RC_EAGAIN: return (EAGAIN); case I40E_AQ_RC_ENOMEM: return (ENOMEM); case I40E_AQ_RC_EACCES: return (EACCES); case I40E_AQ_RC_EFAULT: return (EFAULT); case I40E_AQ_RC_EBUSY: return (EBUSY); case I40E_AQ_RC_EEXIST: return (EEXIST); case I40E_AQ_RC_EINVAL: return (EINVAL); case I40E_AQ_RC_ENOTTY: return (ENOTTY); case I40E_AQ_RC_ENOSPC: return (ENOSPC); case I40E_AQ_RC_ENOSYS: return (ENOSYS); case I40E_AQ_RC_ERANGE: return (ERANGE); case I40E_AQ_RC_EFLUSHED: return (EINVAL); /* No exact equivalent in errno.h */ case I40E_AQ_RC_BAD_ADDR: return (EFAULT); case I40E_AQ_RC_EMODE: return (EPERM); case I40E_AQ_RC_EFBIG: return (EFBIG); default: return (EINVAL); } } int ixl_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params) { struct ixl_pf *pf; struct i40e_hw *hw; struct ixl_vsi *pf_vsi; enum i40e_status_code ret; int i, error; pf = device_get_softc(dev); hw = &pf->hw; pf_vsi = &pf->vsi; IXL_PF_LOCK(pf); - pf->vfs = malloc(sizeof(struct ixl_vf) * num_vfs, M_IXL, M_NOWAIT | - M_ZERO); + pf->vfs = mallocarray(num_vfs, sizeof(struct ixl_vf), M_IXL, + M_NOWAIT | M_ZERO); if (pf->vfs == NULL) { error = ENOMEM; goto fail; } for (i = 0; i < num_vfs; i++) sysctl_ctx_init(&pf->vfs[i].ctx); ret = i40e_aq_add_veb(hw, pf_vsi->uplink_seid, pf_vsi->seid, 1, FALSE, &pf->veb_seid, FALSE, NULL); if (ret != I40E_SUCCESS) { error = ixl_adminq_err_to_errno(hw->aq.asq_last_status); device_printf(dev, "add_veb failed; code=%d error=%d", ret, error); goto fail; } pf->num_vfs = num_vfs; IXL_PF_UNLOCK(pf); return (0); fail: free(pf->vfs, M_IXL); pf->vfs = NULL; IXL_PF_UNLOCK(pf); return (error); } void ixl_iov_uninit(device_t dev) { struct ixl_pf *pf; struct i40e_hw *hw; struct ixl_vsi *vsi; struct ifnet *ifp; struct ixl_vf *vfs; int i, num_vfs; pf = device_get_softc(dev); hw = &pf->hw; vsi = &pf->vsi; ifp = vsi->ifp; IXL_PF_LOCK(pf); for (i = 0; i < pf->num_vfs; i++) { if (pf->vfs[i].vsi.seid != 0) i40e_aq_delete_element(hw, pf->vfs[i].vsi.seid, NULL); ixl_pf_qmgr_release(&pf->qmgr, &pf->vfs[i].qtag); DDPRINTF(dev, "VF %d: %d released\n", i, pf->vfs[i].qtag.num_allocated); DDPRINTF(dev, "Unallocated total: %d\n", ixl_pf_qmgr_get_num_free(&pf->qmgr)); } if (pf->veb_seid != 0) { i40e_aq_delete_element(hw, pf->veb_seid, NULL); pf->veb_seid = 0; } vfs = pf->vfs; num_vfs = pf->num_vfs; pf->vfs = NULL; pf->num_vfs = 0; IXL_PF_UNLOCK(pf); /* Do this after the unlock as sysctl_ctx_free might sleep. */ for (i = 0; i < num_vfs; i++) sysctl_ctx_free(&vfs[i].ctx); free(vfs, M_IXL); } static int ixl_vf_reserve_queues(struct ixl_pf *pf, struct ixl_vf *vf, int num_queues) { device_t dev = pf->dev; int error; /* Validate, and clamp value if invalid */ if (num_queues < 1 || num_queues > 16) device_printf(dev, "Invalid num-queues (%d) for VF %d\n", num_queues, vf->vf_num); if (num_queues < 1) { device_printf(dev, "Setting VF %d num-queues to 1\n", vf->vf_num); num_queues = 1; } else if (num_queues > 16) { device_printf(dev, "Setting VF %d num-queues to 16\n", vf->vf_num); num_queues = 16; } error = ixl_pf_qmgr_alloc_scattered(&pf->qmgr, num_queues, &vf->qtag); if (error) { device_printf(dev, "Error allocating %d queues for VF %d's VSI\n", num_queues, vf->vf_num); return (ENOSPC); } DDPRINTF(dev, "VF %d: %d allocated, %d active", vf->vf_num, vf->qtag.num_allocated, vf->qtag.num_active); DDPRINTF(dev, "Unallocated total: %d", ixl_pf_qmgr_get_num_free(&pf->qmgr)); return (0); } int ixl_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params) { char sysctl_name[QUEUE_NAME_LEN]; struct ixl_pf *pf; struct ixl_vf *vf; const void *mac; size_t size; int error; int vf_num_queues; pf = device_get_softc(dev); vf = &pf->vfs[vfnum]; IXL_PF_LOCK(pf); vf->vf_num = vfnum; vf->vsi.back = pf; vf->vf_flags = VF_FLAG_ENABLED; SLIST_INIT(&vf->vsi.ftl); /* Reserve queue allocation from PF */ vf_num_queues = nvlist_get_number(params, "num-queues"); error = ixl_vf_reserve_queues(pf, vf, vf_num_queues); if (error != 0) goto out; error = ixl_vf_setup_vsi(pf, vf); if (error != 0) goto out; if (nvlist_exists_binary(params, "mac-addr")) { mac = nvlist_get_binary(params, "mac-addr", &size); bcopy(mac, vf->mac, ETHER_ADDR_LEN); if (nvlist_get_bool(params, "allow-set-mac")) vf->vf_flags |= VF_FLAG_SET_MAC_CAP; } else /* * If the administrator has not specified a MAC address then * we must allow the VF to choose one. */ vf->vf_flags |= VF_FLAG_SET_MAC_CAP; if (nvlist_get_bool(params, "mac-anti-spoof")) vf->vf_flags |= VF_FLAG_MAC_ANTI_SPOOF; if (nvlist_get_bool(params, "allow-promisc")) vf->vf_flags |= VF_FLAG_PROMISC_CAP; vf->vf_flags |= VF_FLAG_VLAN_CAP; ixl_reset_vf(pf, vf); out: IXL_PF_UNLOCK(pf); if (error == 0) { snprintf(sysctl_name, sizeof(sysctl_name), "vf%d", vfnum); ixl_add_vsi_sysctls(pf, &vf->vsi, &vf->ctx, sysctl_name); } return (error); } Index: head/sys/dev/ixl/ixl_pf_main.c =================================================================== --- head/sys/dev/ixl/ixl_pf_main.c (revision 327827) +++ head/sys/dev/ixl/ixl_pf_main.c (revision 327828) @@ -1,6090 +1,6091 @@ /****************************************************************************** Copyright (c) 2013-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "ixl_pf.h" #ifdef PCI_IOV #include "ixl_pf_iov.h" #endif #ifdef IXL_IW #include "ixl_iw.h" #include "ixl_iw_int.h" #endif #ifdef DEV_NETMAP #include #include #include #endif /* DEV_NETMAP */ static int ixl_setup_queue(struct ixl_queue *, struct ixl_pf *, int); static u64 ixl_max_aq_speed_to_value(u8); static u8 ixl_convert_sysctl_aq_link_speed(u8, bool); /* Sysctls */ static int ixl_set_flowcntl(SYSCTL_HANDLER_ARGS); static int ixl_set_advertise(SYSCTL_HANDLER_ARGS); static int ixl_current_speed(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_show_fw(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_unallocated_queues(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_pf_tx_itr(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_pf_rx_itr(SYSCTL_HANDLER_ARGS); /* Debug Sysctls */ static int ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_phy_abilities(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_switch_config(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hkey(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hena(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hlut(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fw_link_management(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_read_i2c_byte(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_write_i2c_byte(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_fc_ability(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_rs_ability(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_fc_request(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_rs_request(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_auto_enable(SYSCTL_HANDLER_ARGS); #ifdef IXL_DEBUG static int ixl_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS); #endif #ifdef IXL_IW extern int ixl_enable_iwarp; #endif const uint8_t ixl_bcast_addr[ETHER_ADDR_LEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const char * const ixl_fc_string[6] = { "None", "Rx", "Tx", "Full", "Priority", "Default" }; MALLOC_DEFINE(M_IXL, "ixl", "ixl driver allocations"); void ixl_debug_core(struct ixl_pf *pf, enum ixl_dbg_mask mask, char *fmt, ...) { va_list args; if (!(mask & pf->dbg_mask)) return; /* Re-implement device_printf() */ device_print_prettyname(pf->dev); va_start(args, fmt); vprintf(fmt, args); va_end(args); } /* ** Put the FW, API, NVM, EEtrackID, and OEM version information into a string */ void ixl_nvm_version_str(struct i40e_hw *hw, struct sbuf *buf) { u8 oem_ver = (u8)(hw->nvm.oem_ver >> 24); u16 oem_build = (u16)((hw->nvm.oem_ver >> 16) & 0xFFFF); u8 oem_patch = (u8)(hw->nvm.oem_ver & 0xFF); sbuf_printf(buf, "fw %d.%d.%05d api %d.%d nvm %x.%02x etid %08x oem %d.%d.%d", hw->aq.fw_maj_ver, hw->aq.fw_min_ver, hw->aq.fw_build, hw->aq.api_maj_ver, hw->aq.api_min_ver, (hw->nvm.version & IXL_NVM_VERSION_HI_MASK) >> IXL_NVM_VERSION_HI_SHIFT, (hw->nvm.version & IXL_NVM_VERSION_LO_MASK) >> IXL_NVM_VERSION_LO_SHIFT, hw->nvm.eetrack, oem_ver, oem_build, oem_patch); } void ixl_print_nvm_version(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *sbuf; sbuf = sbuf_new_auto(); ixl_nvm_version_str(hw, sbuf); sbuf_finish(sbuf); device_printf(dev, "%s\n", sbuf_data(sbuf)); sbuf_delete(sbuf); } static void ixl_configure_tx_itr(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; vsi->tx_itr_setting = pf->tx_itr; for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, i), vsi->tx_itr_setting); txr->itr = vsi->tx_itr_setting; txr->latency = IXL_AVE_LATENCY; } } static void ixl_configure_rx_itr(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; vsi->rx_itr_setting = pf->rx_itr; for (int i = 0; i < vsi->num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, i), vsi->rx_itr_setting); rxr->itr = vsi->rx_itr_setting; rxr->latency = IXL_AVE_LATENCY; } } /* * Write PF ITR values to queue ITR registers. */ void ixl_configure_itr(struct ixl_pf *pf) { ixl_configure_tx_itr(pf); ixl_configure_rx_itr(pf); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ void ixl_init_locked(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ifnet *ifp = vsi->ifp; device_t dev = pf->dev; struct i40e_filter_control_settings filter; u8 tmpaddr[ETHER_ADDR_LEN]; int ret; INIT_DEBUGOUT("ixl_init_locked: begin"); IXL_PF_LOCK_ASSERT(pf); ixl_stop_locked(pf); /* * If the aq is dead here, it probably means something outside of the driver * did something to the adapter, like a PF reset. * So rebuild the driver's state here if that occurs. */ if (!i40e_check_asq_alive(&pf->hw)) { device_printf(dev, "Admin Queue is down; resetting...\n"); ixl_teardown_hw_structs(pf); ixl_reset(pf); } /* Get the latest mac address... User might use a LAA */ bcopy(IF_LLADDR(vsi->ifp), tmpaddr, I40E_ETH_LENGTH_OF_ADDRESS); if (!cmp_etheraddr(hw->mac.addr, tmpaddr) && (i40e_validate_mac_addr(tmpaddr) == I40E_SUCCESS)) { ixl_del_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); bcopy(tmpaddr, hw->mac.addr, I40E_ETH_LENGTH_OF_ADDRESS); ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_ONLY, hw->mac.addr, NULL); if (ret) { device_printf(dev, "LLA address" "change failed!!\n"); return; } } ixl_add_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); /* Set the various hardware offload abilities */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6); /* Set up the device filtering */ bzero(&filter, sizeof(filter)); filter.enable_ethtype = TRUE; filter.enable_macvlan = TRUE; filter.enable_fdir = FALSE; filter.hash_lut_size = I40E_HASH_LUT_SIZE_512; if (i40e_set_filter_control(hw, &filter)) device_printf(dev, "i40e_set_filter_control() failed\n"); /* Prepare the VSI: rings, hmc contexts, etc... */ if (ixl_initialize_vsi(vsi)) { device_printf(dev, "initialize vsi failed!!\n"); return; } /* Set up RSS */ ixl_config_rss(pf); /* Add protocol filters to list */ ixl_init_filters(vsi); /* Setup vlan's if needed */ ixl_setup_vlan_filters(vsi); /* Set up MSI/X routing and the ITR settings */ if (pf->msix > 1) { ixl_configure_queue_intr_msix(pf); ixl_configure_itr(pf); } else ixl_configure_legacy(pf); ixl_enable_rings(vsi); i40e_aq_set_default_vsi(hw, vsi->seid, NULL); ixl_reconfigure_filters(vsi); /* And now turn on interrupts */ ixl_enable_intr(vsi); /* Get link info */ hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); ixl_update_link_status(pf); /* Start the local timer */ callout_reset(&pf->timer, hz, ixl_local_timer, pf); /* Now inform the stack we're ready */ ifp->if_drv_flags |= IFF_DRV_RUNNING; #ifdef IXL_IW if (ixl_enable_iwarp && pf->iw_enabled) { ret = ixl_iw_pf_init(pf); if (ret) device_printf(dev, "initialize iwarp failed, code %d\n", ret); } #endif } /********************************************************************* * * Get the hardware capabilities * **********************************************************************/ int ixl_get_hw_capabilities(struct ixl_pf *pf) { struct i40e_aqc_list_capabilities_element_resp *buf; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int error, len; u16 needed; bool again = TRUE; len = 40 * sizeof(struct i40e_aqc_list_capabilities_element_resp); retry: if (!(buf = (struct i40e_aqc_list_capabilities_element_resp *) malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate cap memory\n"); return (ENOMEM); } /* This populates the hw struct */ error = i40e_aq_discover_capabilities(hw, buf, len, &needed, i40e_aqc_opc_list_func_capabilities, NULL); free(buf, M_DEVBUF); if ((pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOMEM) && (again == TRUE)) { /* retry once with a larger buffer */ again = FALSE; len = needed; goto retry; } else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK) { device_printf(dev, "capability discovery failed: %d\n", pf->hw.aq.asq_last_status); return (ENODEV); } /* Capture this PF's starting queue pair */ pf->qbase = hw->func_caps.base_queue; #ifdef IXL_DEBUG device_printf(dev, "pf_id=%d, num_vfs=%d, msix_pf=%d, " "msix_vf=%d, fd_g=%d, fd_b=%d, tx_qp=%d rx_qp=%d qbase=%d\n", hw->pf_id, hw->func_caps.num_vfs, hw->func_caps.num_msix_vectors, hw->func_caps.num_msix_vectors_vf, hw->func_caps.fd_filters_guaranteed, hw->func_caps.fd_filters_best_effort, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, hw->func_caps.base_queue); #endif /* Print a subset of the capability information. */ device_printf(dev, "PF-ID[%d]: VFs %d, MSIX %d, VF MSIX %d, QPs %d, %s\n", hw->pf_id, hw->func_caps.num_vfs, hw->func_caps.num_msix_vectors, hw->func_caps.num_msix_vectors_vf, hw->func_caps.num_tx_qp, (hw->func_caps.mdio_port_mode == 2) ? "I2C" : (hw->func_caps.mdio_port_mode == 1) ? "MDIO dedicated" : "MDIO shared"); struct i40e_osdep *osdep = (struct i40e_osdep *)hw->back; osdep->i2c_intfc_num = ixl_find_i2c_interface(pf); if (osdep->i2c_intfc_num != -1) pf->has_i2c = true; return (error); } void ixl_cap_txcsum_tso(struct ixl_vsi *vsi, struct ifnet *ifp, int mask) { device_t dev = vsi->dev; /* Enable/disable TXCSUM/TSO4 */ if (!(ifp->if_capenable & IFCAP_TXCSUM) && !(ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) { ifp->if_capenable |= IFCAP_TXCSUM; /* enable TXCSUM, restore TSO if previously enabled */ if (vsi->flags & IXL_FLAGS_KEEP_TSO4) { vsi->flags &= ~IXL_FLAGS_KEEP_TSO4; ifp->if_capenable |= IFCAP_TSO4; } } else if (mask & IFCAP_TSO4) { ifp->if_capenable |= (IFCAP_TXCSUM | IFCAP_TSO4); vsi->flags &= ~IXL_FLAGS_KEEP_TSO4; device_printf(dev, "TSO4 requires txcsum, enabling both...\n"); } } else if((ifp->if_capenable & IFCAP_TXCSUM) && !(ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) ifp->if_capenable &= ~IFCAP_TXCSUM; else if (mask & IFCAP_TSO4) ifp->if_capenable |= IFCAP_TSO4; } else if((ifp->if_capenable & IFCAP_TXCSUM) && (ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) { vsi->flags |= IXL_FLAGS_KEEP_TSO4; ifp->if_capenable &= ~(IFCAP_TXCSUM | IFCAP_TSO4); device_printf(dev, "TSO4 requires txcsum, disabling both...\n"); } else if (mask & IFCAP_TSO4) ifp->if_capenable &= ~IFCAP_TSO4; } /* Enable/disable TXCSUM_IPV6/TSO6 */ if (!(ifp->if_capenable & IFCAP_TXCSUM_IPV6) && !(ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable |= IFCAP_TXCSUM_IPV6; if (vsi->flags & IXL_FLAGS_KEEP_TSO6) { vsi->flags &= ~IXL_FLAGS_KEEP_TSO6; ifp->if_capenable |= IFCAP_TSO6; } } else if (mask & IFCAP_TSO6) { ifp->if_capenable |= (IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); vsi->flags &= ~IXL_FLAGS_KEEP_TSO6; device_printf(dev, "TSO6 requires txcsum6, enabling both...\n"); } } else if((ifp->if_capenable & IFCAP_TXCSUM_IPV6) && !(ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) ifp->if_capenable &= ~IFCAP_TXCSUM_IPV6; else if (mask & IFCAP_TSO6) ifp->if_capenable |= IFCAP_TSO6; } else if ((ifp->if_capenable & IFCAP_TXCSUM_IPV6) && (ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) { vsi->flags |= IXL_FLAGS_KEEP_TSO6; ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); device_printf(dev, "TSO6 requires txcsum6, disabling both...\n"); } else if (mask & IFCAP_TSO6) ifp->if_capenable &= ~IFCAP_TSO6; } } /* For the set_advertise sysctl */ void ixl_get_initial_advertised_speeds(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; enum i40e_status_code status; struct i40e_aq_get_phy_abilities_resp abilities; /* Set initial sysctl values */ status = i40e_aq_get_phy_capabilities(hw, FALSE, false, &abilities, NULL); if (status) { /* Non-fatal error */ device_printf(dev, "%s: i40e_aq_get_phy_capabilities() error %d\n", __func__, status); return; } pf->advertised_speed = ixl_convert_sysctl_aq_link_speed(abilities.link_speed, false); } int ixl_teardown_hw_structs(struct ixl_pf *pf) { enum i40e_status_code status = 0; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; /* Shutdown LAN HMC */ if (hw->hmc.hmc_obj) { status = i40e_shutdown_lan_hmc(hw); if (status) { device_printf(dev, "init: LAN HMC shutdown failure; status %d\n", status); goto err_out; } } // XXX: This gets called when we know the adminq is inactive; // so we already know it's setup when we get here. /* Shutdown admin queue */ status = i40e_shutdown_adminq(hw); if (status) device_printf(dev, "init: Admin Queue shutdown failure; status %d\n", status); err_out: return (status); } int ixl_reset(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u8 set_fc_err_mask; int error = 0; // XXX: clear_hw() actually writes to hw registers -- maybe this isn't necessary i40e_clear_hw(hw); error = i40e_pf_reset(hw); if (error) { device_printf(dev, "init: PF reset failure"); error = EIO; goto err_out; } error = i40e_init_adminq(hw); if (error) { device_printf(dev, "init: Admin queue init failure;" " status code %d", error); error = EIO; goto err_out; } i40e_clear_pxe_mode(hw); error = ixl_get_hw_capabilities(pf); if (error) { device_printf(dev, "init: Error retrieving HW capabilities;" " status code %d\n", error); goto err_out; } error = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (error) { device_printf(dev, "init: LAN HMC init failed; status code %d\n", error); error = EIO; goto err_out; } error = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (error) { device_printf(dev, "init: LAN HMC config failed; status code %d\n", error); error = EIO; goto err_out; } // XXX: possible fix for panic, but our failure recovery is still broken error = ixl_switch_config(pf); if (error) { device_printf(dev, "init: ixl_switch_config() failed: %d\n", error); goto err_out; } error = i40e_aq_set_phy_int_mask(hw, IXL_DEFAULT_PHY_INT_MASK, NULL); if (error) { device_printf(dev, "init: i40e_aq_set_phy_mask() failed: err %d," " aq_err %d\n", error, hw->aq.asq_last_status); error = EIO; goto err_out; } error = i40e_set_fc(hw, &set_fc_err_mask, true); if (error) { device_printf(dev, "init: setting link flow control failed; retcode %d," " fc_err_mask 0x%02x\n", error, set_fc_err_mask); goto err_out; } // XXX: (Rebuild VSIs?) /* Firmware delay workaround */ if (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) || (hw->aq.fw_maj_ver < 4)) { i40e_msec_delay(75); error = i40e_aq_set_link_restart_an(hw, TRUE, NULL); if (error) { device_printf(dev, "init: link restart failed, aq_err %d\n", hw->aq.asq_last_status); goto err_out; } } err_out: return (error); } /* ** MSIX Interrupt Handlers and Tasklets */ void ixl_handle_que(void *context, int pending) { struct ixl_queue *que = context; struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; struct ifnet *ifp = vsi->ifp; bool more; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { more = ixl_rxeof(que, IXL_RX_LIMIT); IXL_TX_LOCK(txr); ixl_txeof(que); if (!drbr_empty(ifp, txr->br)) ixl_mq_start_locked(ifp, txr); IXL_TX_UNLOCK(txr); if (more) { taskqueue_enqueue(que->tq, &que->task); return; } } /* Reenable this interrupt - hmmm */ ixl_enable_queue(hw, que->me); return; } /********************************************************************* * * Legacy Interrupt Service routine * **********************************************************************/ void ixl_intr(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; struct ifnet *ifp = vsi->ifp; struct tx_ring *txr = &que->txr; u32 icr0; bool more_tx, more_rx; pf->admin_irq++; /* Protect against spurious interrupts */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; icr0 = rd32(hw, I40E_PFINT_ICR0); #ifdef PCI_IOV if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) taskqueue_enqueue(pf->tq, &pf->vflr_task); #endif if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) { taskqueue_enqueue(pf->tq, &pf->adminq); } if (icr0 & I40E_PFINT_ICR0_QUEUE_0_MASK) { ++que->irqs; more_rx = ixl_rxeof(que, IXL_RX_LIMIT); IXL_TX_LOCK(txr); more_tx = ixl_txeof(que); if (!drbr_empty(vsi->ifp, txr->br)) more_tx = 1; IXL_TX_UNLOCK(txr); } ixl_enable_intr0(hw); } /********************************************************************* * * MSIX VSI Interrupt Service routine * **********************************************************************/ void ixl_msix_que(void *arg) { struct ixl_queue *que = arg; struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; bool more_tx, more_rx; /* Protect against spurious interrupts */ if (!(vsi->ifp->if_drv_flags & IFF_DRV_RUNNING)) return; ++que->irqs; more_rx = ixl_rxeof(que, IXL_RX_LIMIT); IXL_TX_LOCK(txr); more_tx = ixl_txeof(que); /* ** Make certain that if the stack ** has anything queued the task gets ** scheduled to handle it. */ if (!drbr_empty(vsi->ifp, txr->br)) more_tx = 1; IXL_TX_UNLOCK(txr); ixl_set_queue_rx_itr(que); ixl_set_queue_tx_itr(que); if (more_tx || more_rx) taskqueue_enqueue(que->tq, &que->task); else ixl_enable_queue(hw, que->me); return; } /********************************************************************* * * MSIX Admin Queue Interrupt Service routine * **********************************************************************/ void ixl_msix_adminq(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u32 reg, mask, rstat_reg; bool do_task = FALSE; ++pf->admin_irq; reg = rd32(hw, I40E_PFINT_ICR0); mask = rd32(hw, I40E_PFINT_ICR0_ENA); /* Check on the cause */ if (reg & I40E_PFINT_ICR0_ADMINQ_MASK) { mask &= ~I40E_PFINT_ICR0_ADMINQ_MASK; do_task = TRUE; } if (reg & I40E_PFINT_ICR0_MAL_DETECT_MASK) { ixl_handle_mdd_event(pf); mask &= ~I40E_PFINT_ICR0_MAL_DETECT_MASK; } if (reg & I40E_PFINT_ICR0_GRST_MASK) { device_printf(dev, "Reset Requested!\n"); rstat_reg = rd32(hw, I40E_GLGEN_RSTAT); rstat_reg = (rstat_reg & I40E_GLGEN_RSTAT_RESET_TYPE_MASK) >> I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT; device_printf(dev, "Reset type: "); switch (rstat_reg) { /* These others might be handled similarly to an EMPR reset */ case I40E_RESET_CORER: printf("CORER\n"); break; case I40E_RESET_GLOBR: printf("GLOBR\n"); break; case I40E_RESET_EMPR: printf("EMPR\n"); atomic_set_int(&pf->state, IXL_PF_STATE_EMPR_RESETTING); break; default: printf("POR\n"); break; } /* overload admin queue task to check reset progress */ do_task = TRUE; } if (reg & I40E_PFINT_ICR0_ECC_ERR_MASK) { device_printf(dev, "ECC Error detected!\n"); } if (reg & I40E_PFINT_ICR0_HMC_ERR_MASK) { reg = rd32(hw, I40E_PFHMC_ERRORINFO); if (reg & I40E_PFHMC_ERRORINFO_ERROR_DETECTED_MASK) { device_printf(dev, "HMC Error detected!\n"); device_printf(dev, "INFO 0x%08x\n", reg); reg = rd32(hw, I40E_PFHMC_ERRORDATA); device_printf(dev, "DATA 0x%08x\n", reg); wr32(hw, I40E_PFHMC_ERRORINFO, 0); } } if (reg & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK) { device_printf(dev, "PCI Exception detected!\n"); } #ifdef PCI_IOV if (reg & I40E_PFINT_ICR0_VFLR_MASK) { mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK; taskqueue_enqueue(pf->tq, &pf->vflr_task); } #endif if (do_task) taskqueue_enqueue(pf->tq, &pf->adminq); else ixl_enable_intr0(hw); } void ixl_set_promisc(struct ixl_vsi *vsi) { struct ifnet *ifp = vsi->ifp; struct i40e_hw *hw = vsi->hw; int err, mcnt = 0; bool uni = FALSE, multi = FALSE; if (ifp->if_flags & IFF_ALLMULTI) multi = TRUE; else { /* Need to count the multicast addresses */ struct ifmultiaddr *ifma; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_MULTICAST_ADDR) break; mcnt++; } if_maddr_runlock(ifp); } if (mcnt >= MAX_MULTICAST_ADDR) multi = TRUE; if (ifp->if_flags & IFF_PROMISC) uni = TRUE; err = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid, uni, NULL, TRUE); err = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, multi, NULL); return; } /********************************************************************* * Filter Routines * * Routines for multicast and vlan filter management. * *********************************************************************/ void ixl_add_multi(struct ixl_vsi *vsi) { struct ifmultiaddr *ifma; struct ifnet *ifp = vsi->ifp; struct i40e_hw *hw = vsi->hw; int mcnt = 0, flags; IOCTL_DEBUGOUT("ixl_add_multi: begin"); if_maddr_rlock(ifp); /* ** First just get a count, to decide if we ** we simply use multicast promiscuous. */ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mcnt++; } if_maddr_runlock(ifp); if (__predict_false(mcnt >= MAX_MULTICAST_ADDR)) { /* delete existing MC filters */ ixl_del_hw_filters(vsi, mcnt); i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, TRUE, NULL); return; } mcnt = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; ixl_add_mc_filter(vsi, (u8*)LLADDR((struct sockaddr_dl *) ifma->ifma_addr)); mcnt++; } if_maddr_runlock(ifp); if (mcnt > 0) { flags = (IXL_FILTER_ADD | IXL_FILTER_USED | IXL_FILTER_MC); ixl_add_hw_filters(vsi, flags, mcnt); } IOCTL_DEBUGOUT("ixl_add_multi: end"); return; } void ixl_del_multi(struct ixl_vsi *vsi) { struct ifnet *ifp = vsi->ifp; struct ifmultiaddr *ifma; struct ixl_mac_filter *f; int mcnt = 0; bool match = FALSE; IOCTL_DEBUGOUT("ixl_del_multi: begin"); /* Search for removed multicast addresses */ if_maddr_rlock(ifp); SLIST_FOREACH(f, &vsi->ftl, next) { if ((f->flags & IXL_FILTER_USED) && (f->flags & IXL_FILTER_MC)) { match = FALSE; TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; u8 *mc_addr = (u8 *)LLADDR((struct sockaddr_dl *)ifma->ifma_addr); if (cmp_etheraddr(f->macaddr, mc_addr)) { match = TRUE; break; } } if (match == FALSE) { f->flags |= IXL_FILTER_DEL; mcnt++; } } } if_maddr_runlock(ifp); if (mcnt > 0) ixl_del_hw_filters(vsi, mcnt); } /********************************************************************* * Timer routine * * This routine checks for link status,updates statistics, * and runs the watchdog check. * * Only runs when the driver is configured UP and RUNNING. * **********************************************************************/ void ixl_local_timer(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; device_t dev = pf->dev; struct tx_ring *txr; int hung = 0; u32 mask; s32 timer, new_timer; IXL_PF_LOCK_ASSERT(pf); /* Fire off the adminq task */ taskqueue_enqueue(pf->tq, &pf->adminq); /* Update stats */ ixl_update_stats_counters(pf); /* Check status of the queues */ mask = (I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | I40E_PFINT_DYN_CTLN_ITR_INDX_MASK); for (int i = 0; i < vsi->num_queues; i++, que++) { txr = &que->txr; timer = atomic_load_acq_32(&txr->watchdog_timer); if (timer > 0) { new_timer = timer - hz; if (new_timer <= 0) { atomic_store_rel_32(&txr->watchdog_timer, -1); device_printf(dev, "WARNING: queue %d " "appears to be hung!\n", que->me); ++hung; } else { /* * If this fails, that means something in the TX path has updated * the watchdog, so it means the TX path is still working and * the watchdog doesn't need to countdown. */ atomic_cmpset_rel_32(&txr->watchdog_timer, timer, new_timer); /* Any queues with outstanding work get a sw irq */ wr32(hw, I40E_PFINT_DYN_CTLN(que->me), mask); } } } /* Reset when a queue shows hung */ if (hung) goto hung; callout_reset(&pf->timer, hz, ixl_local_timer, pf); return; hung: device_printf(dev, "WARNING: Resetting!\n"); pf->watchdog_events++; ixl_init_locked(pf); } void ixl_link_up_msg(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ifnet *ifp = pf->vsi.ifp; log(LOG_NOTICE, "%s: Link is up, %s Full Duplex, FEC: %s, Autoneg: %s, Flow Control: %s\n", ifp->if_xname, ixl_aq_speed_to_str(hw->phy.link_info.link_speed), (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA) ? "Clause 74 BASE-R FEC" : (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_RS_ENA) ? "Clause 108 RS-FEC" : "None", (hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED) ? "True" : "False", (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX && hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ? ixl_fc_string[3] : (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX) ? ixl_fc_string[2] : (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ? ixl_fc_string[1] : ixl_fc_string[0]); } /* ** Note: this routine updates the OS on the link state ** the real check of the hardware only happens with ** a link interrupt. */ void ixl_update_link_status(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct ifnet *ifp = vsi->ifp; device_t dev = pf->dev; if (pf->link_up) { if (vsi->link_active == FALSE) { vsi->link_active = TRUE; ifp->if_baudrate = ixl_max_aq_speed_to_value(pf->link_speed); if_link_state_change(ifp, LINK_STATE_UP); ixl_link_up_msg(pf); } } else { /* Link down */ if (vsi->link_active == TRUE) { if (bootverbose) device_printf(dev, "Link is Down\n"); if_link_state_change(ifp, LINK_STATE_DOWN); vsi->link_active = FALSE; } } return; } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ void ixl_stop_locked(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct ifnet *ifp = vsi->ifp; INIT_DEBUGOUT("ixl_stop: begin\n"); IXL_PF_LOCK_ASSERT(pf); #ifdef IXL_IW /* Stop iWARP device */ if (ixl_enable_iwarp && pf->iw_enabled) ixl_iw_pf_stop(pf); #endif /* Stop the local timer */ callout_stop(&pf->timer); ixl_disable_rings_intr(vsi); ixl_disable_rings(vsi); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING); } void ixl_stop(struct ixl_pf *pf) { IXL_PF_LOCK(pf); ixl_stop_locked(pf); IXL_PF_UNLOCK(pf); } /********************************************************************* * * Setup MSIX Interrupt resources and handlers for the VSI * **********************************************************************/ int ixl_setup_legacy(struct ixl_pf *pf) { device_t dev = pf->dev; int error, rid = 0; if (pf->msix == 1) rid = 1; pf->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (pf->res == NULL) { device_printf(dev, "bus_alloc_resource_any() for" " legacy/msi interrupt\n"); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, pf->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixl_intr, pf, &pf->tag); if (error) { pf->res = NULL; device_printf(dev, "bus_setup_intr() for legacy/msi" " interrupt handler failed, error %d\n", error); return (ENXIO); } error = bus_describe_intr(dev, pf->res, pf->tag, "irq"); if (error) { /* non-fatal */ device_printf(dev, "bus_describe_intr() for Admin Queue" " interrupt name failed, error %d\n", error); } return (0); } int ixl_setup_adminq_tq(struct ixl_pf *pf) { device_t dev = pf->dev; int error = 0; /* Tasklet for Admin Queue interrupts */ TASK_INIT(&pf->adminq, 0, ixl_do_adminq, pf); #ifdef PCI_IOV /* VFLR Tasklet */ TASK_INIT(&pf->vflr_task, 0, ixl_handle_vflr, pf); #endif /* Create and start Admin Queue taskqueue */ pf->tq = taskqueue_create_fast("ixl_aq", M_NOWAIT, taskqueue_thread_enqueue, &pf->tq); if (!pf->tq) { device_printf(dev, "taskqueue_create_fast (for AQ) returned NULL!\n"); return (ENOMEM); } error = taskqueue_start_threads(&pf->tq, 1, PI_NET, "%s aq", device_get_nameunit(dev)); if (error) { device_printf(dev, "taskqueue_start_threads (for AQ) error: %d\n", error); taskqueue_free(pf->tq); return (error); } return (0); } int ixl_setup_queue_tqs(struct ixl_vsi *vsi) { struct ixl_queue *que = vsi->queues; device_t dev = vsi->dev; #ifdef RSS int cpu_id = 0; cpuset_t cpu_mask; #endif /* Create queue tasks and start queue taskqueues */ for (int i = 0; i < vsi->num_queues; i++, que++) { TASK_INIT(&que->tx_task, 0, ixl_deferred_mq_start, que); TASK_INIT(&que->task, 0, ixl_handle_que, que); que->tq = taskqueue_create_fast("ixl_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); #ifdef RSS CPU_SETOF(cpu_id, &cpu_mask); taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, &cpu_mask, "%s (bucket %d)", device_get_nameunit(dev), cpu_id); #else taskqueue_start_threads(&que->tq, 1, PI_NET, "%s (que %d)", device_get_nameunit(dev), que->me); #endif } return (0); } void ixl_free_adminq_tq(struct ixl_pf *pf) { if (pf->tq) { taskqueue_free(pf->tq); pf->tq = NULL; } } void ixl_free_queue_tqs(struct ixl_vsi *vsi) { struct ixl_queue *que = vsi->queues; for (int i = 0; i < vsi->num_queues; i++, que++) { if (que->tq) { taskqueue_free(que->tq); que->tq = NULL; } } } int ixl_setup_adminq_msix(struct ixl_pf *pf) { device_t dev = pf->dev; int rid, error = 0; /* Admin IRQ rid is 1, vector is 0 */ rid = 1; /* Get interrupt resource from bus */ pf->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!pf->res) { device_printf(dev, "bus_alloc_resource_any() for Admin Queue" " interrupt failed [rid=%d]\n", rid); return (ENXIO); } /* Then associate interrupt with handler */ error = bus_setup_intr(dev, pf->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixl_msix_adminq, pf, &pf->tag); if (error) { pf->res = NULL; device_printf(dev, "bus_setup_intr() for Admin Queue" " interrupt handler failed, error %d\n", error); return (ENXIO); } error = bus_describe_intr(dev, pf->res, pf->tag, "aq"); if (error) { /* non-fatal */ device_printf(dev, "bus_describe_intr() for Admin Queue" " interrupt name failed, error %d\n", error); } pf->admvec = 0; return (0); } /* * Allocate interrupt resources from bus and associate an interrupt handler * to those for the VSI's queues. */ int ixl_setup_queue_msix(struct ixl_vsi *vsi) { device_t dev = vsi->dev; struct ixl_queue *que = vsi->queues; struct tx_ring *txr; int error, rid, vector = 1; /* Queue interrupt vector numbers start at 1 (adminq intr is 0) */ for (int i = 0; i < vsi->num_queues; i++, vector++, que++) { int cpu_id = i; rid = vector + 1; txr = &que->txr; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!que->res) { device_printf(dev, "bus_alloc_resource_any() for" " Queue %d interrupt failed [rid=%d]\n", que->me, rid); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixl_msix_que, que, &que->tag); if (error) { device_printf(dev, "bus_setup_intr() for Queue %d" " interrupt handler failed, error %d\n", que->me, error); bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); return (error); } error = bus_describe_intr(dev, que->res, que->tag, "q%d", i); if (error) { device_printf(dev, "bus_describe_intr() for Queue %d" " interrupt name failed, error %d\n", que->me, error); } /* Bind the vector to a CPU */ #ifdef RSS cpu_id = rss_getcpu(i % rss_getnumbuckets()); #endif error = bus_bind_intr(dev, que->res, cpu_id); if (error) { device_printf(dev, "bus_bind_intr() for Queue %d" " to CPU %d failed, error %d\n", que->me, cpu_id, error); } que->msix = vector; } return (0); } /* * When used in a virtualized environment PCI BUSMASTER capability may not be set * so explicity set it here and rewrite the ENABLE in the MSIX control register * at this point to cause the host to successfully initialize us. */ void ixl_set_busmaster(device_t dev) { u16 pci_cmd_word; pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); pci_cmd_word |= PCIM_CMD_BUSMASTEREN; pci_write_config(dev, PCIR_COMMAND, pci_cmd_word, 2); } /* * rewrite the ENABLE in the MSIX control register * to cause the host to successfully initialize us. */ void ixl_set_msix_enable(device_t dev) { int msix_ctrl, rid; pci_find_cap(dev, PCIY_MSIX, &rid); rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } /* * Allocate MSI/X vectors from the OS. * Returns 0 for legacy, 1 for MSI, >1 for MSIX. */ int ixl_init_msix(struct ixl_pf *pf) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; int auto_max_queues; int rid, want, vectors, queues, available; #ifdef IXL_IW int iw_want, iw_vectors; pf->iw_msix = 0; #endif /* Override by tuneable */ if (!pf->enable_msix) goto no_msix; /* Ensure proper operation in virtualized environment */ ixl_set_busmaster(dev); /* First try MSI/X */ rid = PCIR_BAR(IXL_MSIX_BAR); pf->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!pf->msix_mem) { /* May not be enabled */ device_printf(pf->dev, "Unable to map MSIX table\n"); goto no_msix; } available = pci_msix_count(dev); if (available < 2) { /* system has msix disabled (0), or only one vector (1) */ bus_release_resource(dev, SYS_RES_MEMORY, rid, pf->msix_mem); pf->msix_mem = NULL; goto no_msix; } /* Clamp max number of queues based on: * - # of MSI-X vectors available * - # of cpus available * - # of queues that can be assigned to the LAN VSI */ auto_max_queues = min(mp_ncpus, available - 1); if (hw->mac.type == I40E_MAC_X722) auto_max_queues = min(auto_max_queues, 128); else auto_max_queues = min(auto_max_queues, 64); /* Override with tunable value if tunable is less than autoconfig count */ if ((pf->max_queues != 0) && (pf->max_queues <= auto_max_queues)) queues = pf->max_queues; /* Use autoconfig amount if that's lower */ else if ((pf->max_queues != 0) && (pf->max_queues > auto_max_queues)) { device_printf(dev, "ixl_max_queues (%d) is too large, using " "autoconfig amount (%d)...\n", pf->max_queues, auto_max_queues); queues = auto_max_queues; } /* Limit maximum auto-configured queues to 8 if no user value is set */ else queues = min(auto_max_queues, 8); #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif /* ** Want one vector (RX/TX pair) per queue ** plus an additional for the admin queue. */ want = queues + 1; if (want <= available) /* Have enough */ vectors = want; else { device_printf(pf->dev, "MSIX Configuration Problem, " "%d vectors available but %d wanted!\n", available, want); pf->msix_mem = NULL; goto no_msix; /* Will go to Legacy setup */ } #ifdef IXL_IW if (ixl_enable_iwarp) { /* iWARP wants additional vector for CQP */ iw_want = mp_ncpus + 1; available -= vectors; if (available > 0) { iw_vectors = (available >= iw_want) ? iw_want : available; vectors += iw_vectors; } else iw_vectors = 0; } #endif ixl_set_msix_enable(dev); if (pci_alloc_msix(dev, &vectors) == 0) { device_printf(pf->dev, "Using MSIX interrupts with %d vectors\n", vectors); pf->msix = vectors; #ifdef IXL_IW if (ixl_enable_iwarp) pf->iw_msix = iw_vectors; #endif pf->vsi.num_queues = queues; #ifdef RSS /* * If we're doing RSS, the number of queues needs to * match the number of RSS buckets that are configured. * * + If there's more queues than RSS buckets, we'll end * up with queues that get no traffic. * * + If there's more RSS buckets than queues, we'll end * up having multiple RSS buckets map to the same queue, * so there'll be some contention. */ if (queues != rss_getnumbuckets()) { device_printf(dev, "%s: queues (%d) != RSS buckets (%d)" "; performance will be impacted.\n", __func__, queues, rss_getnumbuckets()); } #endif return (vectors); } no_msix: vectors = pci_msi_count(dev); pf->vsi.num_queues = 1; pf->max_queues = 1; if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) device_printf(pf->dev, "Using an MSI interrupt\n"); else { vectors = 0; device_printf(pf->dev, "Using a Legacy interrupt\n"); } return (vectors); } /* * Configure admin queue/misc interrupt cause registers in hardware. */ void ixl_configure_intr0_msix(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; u32 reg; /* First set up the adminq - vector 0 */ wr32(hw, I40E_PFINT_ICR0_ENA, 0); /* disable all */ rd32(hw, I40E_PFINT_ICR0); /* read to clear */ reg = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK | I40E_PFINT_ICR0_ENA_GRST_MASK | I40E_PFINT_ICR0_ENA_HMC_ERR_MASK | I40E_PFINT_ICR0_ENA_ADMINQ_MASK | I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK | I40E_PFINT_ICR0_ENA_VFLR_MASK | I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); /* * 0x7FF is the end of the queue list. * This means we won't use MSI-X vector 0 for a queue interrupt * in MSIX mode. */ wr32(hw, I40E_PFINT_LNKLST0, 0x7FF); /* Value is in 2 usec units, so 0x3E is 62*2 = 124 usecs. */ wr32(hw, I40E_PFINT_ITR0(IXL_RX_ITR), 0x3E); wr32(hw, I40E_PFINT_DYN_CTL0, I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK | I40E_PFINT_DYN_CTL0_INTENA_MSK_MASK); wr32(hw, I40E_PFINT_STAT_CTL0, 0); } /* * Configure queue interrupt cause registers in hardware. */ void ixl_configure_queue_intr_msix(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; u32 reg; u16 vector = 1; for (int i = 0; i < vsi->num_queues; i++, vector++) { wr32(hw, I40E_PFINT_DYN_CTLN(i), 0); /* First queue type is RX / 0 */ wr32(hw, I40E_PFINT_LNKLSTN(i), i); reg = I40E_QINT_RQCTL_CAUSE_ENA_MASK | (IXL_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | (i << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(i), reg); reg = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (IXL_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | (IXL_QUEUE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_TQCTL(i), reg); } } /* * Configure for MSI single vector operation */ void ixl_configure_legacy(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; struct rx_ring *rxr = &que->rxr; struct tx_ring *txr = &que->txr; u32 reg; /* Configure ITR */ vsi->tx_itr_setting = pf->tx_itr; wr32(hw, I40E_PFINT_ITR0(IXL_TX_ITR), vsi->tx_itr_setting); txr->itr = vsi->tx_itr_setting; vsi->rx_itr_setting = pf->rx_itr; wr32(hw, I40E_PFINT_ITR0(IXL_RX_ITR), vsi->rx_itr_setting); rxr->itr = vsi->rx_itr_setting; /* Setup "other" causes */ reg = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK | I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK | I40E_PFINT_ICR0_ENA_GRST_MASK | I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK | I40E_PFINT_ICR0_ENA_GPIO_MASK | I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_MASK | I40E_PFINT_ICR0_ENA_HMC_ERR_MASK | I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK | I40E_PFINT_ICR0_ENA_VFLR_MASK | I40E_PFINT_ICR0_ENA_ADMINQ_MASK ; wr32(hw, I40E_PFINT_ICR0_ENA, reg); /* No ITR for non-queue interrupts */ wr32(hw, I40E_PFINT_STAT_CTL0, IXL_ITR_NONE << I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT); /* FIRSTQ_INDX = 0, FIRSTQ_TYPE = 0 (rx) */ wr32(hw, I40E_PFINT_LNKLST0, 0); /* Associate the queue pair to the vector and enable the q int */ reg = I40E_QINT_RQCTL_CAUSE_ENA_MASK | (IXL_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(0), reg); reg = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (IXL_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (IXL_QUEUE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT); wr32(hw, I40E_QINT_TQCTL(0), reg); } int ixl_allocate_pci_resources(struct ixl_pf *pf) { int rid; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; /* Map BAR0 */ rid = PCIR_BAR(0); pf->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(pf->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: PCI memory\n"); return (ENXIO); } /* Save off the PCI information */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); hw->bus.device = pci_get_slot(dev); hw->bus.func = pci_get_function(dev); /* Save off register access information */ pf->osdep.mem_bus_space_tag = rman_get_bustag(pf->pci_mem); pf->osdep.mem_bus_space_handle = rman_get_bushandle(pf->pci_mem); pf->osdep.mem_bus_space_size = rman_get_size(pf->pci_mem); pf->osdep.flush_reg = I40E_GLGEN_STAT; pf->hw.hw_addr = (u8 *) &pf->osdep.mem_bus_space_handle; pf->hw.back = &pf->osdep; return (0); } /* * Teardown and release the admin queue/misc vector * interrupt. */ int ixl_teardown_adminq_msix(struct ixl_pf *pf) { device_t dev = pf->dev; int rid, error = 0; if (pf->admvec) /* we are doing MSIX */ rid = pf->admvec + 1; else (pf->msix != 0) ? (rid = 1):(rid = 0); if (pf->tag != NULL) { bus_teardown_intr(dev, pf->res, pf->tag); if (error) { device_printf(dev, "bus_teardown_intr() for" " interrupt 0 failed\n"); // return (ENXIO); } pf->tag = NULL; } if (pf->res != NULL) { bus_release_resource(dev, SYS_RES_IRQ, rid, pf->res); if (error) { device_printf(dev, "bus_release_resource() for" " interrupt 0 failed [rid=%d]\n", rid); // return (ENXIO); } pf->res = NULL; } return (0); } int ixl_teardown_queue_msix(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct ixl_queue *que = vsi->queues; device_t dev = vsi->dev; int rid, error = 0; /* We may get here before stations are setup */ if ((pf->msix < 2) || (que == NULL)) return (0); /* Release all MSIX queue resources */ for (int i = 0; i < vsi->num_queues; i++, que++) { rid = que->msix + 1; if (que->tag != NULL) { error = bus_teardown_intr(dev, que->res, que->tag); if (error) { device_printf(dev, "bus_teardown_intr() for" " Queue %d interrupt failed\n", que->me); // return (ENXIO); } que->tag = NULL; } if (que->res != NULL) { error = bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); if (error) { device_printf(dev, "bus_release_resource() for" " Queue %d interrupt failed [rid=%d]\n", que->me, rid); // return (ENXIO); } que->res = NULL; } } return (0); } void ixl_free_pci_resources(struct ixl_pf *pf) { device_t dev = pf->dev; int memrid; ixl_teardown_queue_msix(&pf->vsi); ixl_teardown_adminq_msix(pf); if (pf->msix > 0) pci_release_msi(dev); memrid = PCIR_BAR(IXL_MSIX_BAR); if (pf->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, memrid, pf->msix_mem); if (pf->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), pf->pci_mem); return; } void ixl_add_ifmedia(struct ixl_vsi *vsi, u64 phy_types) { /* Display supported media types */ if (phy_types & (I40E_CAP_PHY_TYPE_100BASE_TX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_100_TX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_T)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_T, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_SX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_SX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_LX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_LX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_XAUI) || phy_types & (I40E_CAP_PHY_TYPE_XFI) || phy_types & (I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_SR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_SR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_LR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_LR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_T)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_T, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_CR4) || phy_types & (I40E_CAP_PHY_TYPE_40GBASE_CR4_CU) || phy_types & (I40E_CAP_PHY_TYPE_40GBASE_AOC) || phy_types & (I40E_CAP_PHY_TYPE_XLAUI) || phy_types & (I40E_CAP_PHY_TYPE_40GBASE_KR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_CR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_SR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_SR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_LR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_LR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_KX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_KX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_CR1_CU) || phy_types & (I40E_CAP_PHY_TYPE_10GBASE_CR1)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_CR1, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_AOC)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_TWINAX_LONG, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_SFI)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_SFI, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_KX4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_KX4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_KR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_KR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_20GBASE_KR2)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_20G_KR2, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_KR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_KR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_XLPPI)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_XLPPI, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_KR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_25G_KR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_CR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_25G_CR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_SR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_25G_SR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_LR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_UNKNOWN, 0, NULL); } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ int ixl_setup_interface(device_t dev, struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct ifnet *ifp; struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; struct i40e_aq_get_phy_abilities_resp abilities; enum i40e_status_code aq_error = 0; INIT_DEBUGOUT("ixl_setup_interface: begin"); ifp = vsi->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_mtu = ETHERMTU; ifp->if_init = ixl_init; ifp->if_softc = vsi; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ixl_ioctl; #if __FreeBSD_version >= 1100036 if_setgetcounterfn(ifp, ixl_get_counter); #endif ifp->if_transmit = ixl_mq_start; ifp->if_qflush = ixl_qflush; ifp->if_snd.ifq_maxlen = que->num_desc - 2; vsi->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; /* Set TSO limits */ ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_CRC_LEN); ifp->if_hw_tsomaxsegcount = IXL_MAX_TSO_SEGS; ifp->if_hw_tsomaxsegsize = PAGE_SIZE; /* * Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_HWCSUM; ifp->if_capabilities |= IFCAP_HWCSUM_IPV6; ifp->if_capabilities |= IFCAP_TSO; ifp->if_capabilities |= IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_LRO; /* VLAN capabilties */ ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM; ifp->if_capenable = ifp->if_capabilities; /* ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the ixl driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&vsi->media, IFM_IMASK, ixl_media_change, ixl_media_status); aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, TRUE, &abilities, NULL); /* May need delay to detect fiber correctly */ if (aq_error == I40E_ERR_UNKNOWN_PHY) { i40e_msec_delay(200); aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, TRUE, &abilities, NULL); } if (aq_error) { if (aq_error == I40E_ERR_UNKNOWN_PHY) device_printf(dev, "Unknown PHY type detected!\n"); else device_printf(dev, "Error getting supported media types, err %d," " AQ error %d\n", aq_error, hw->aq.asq_last_status); return (0); } pf->supported_speeds = abilities.link_speed; ifp->if_baudrate = ixl_max_aq_speed_to_value(pf->supported_speeds); ixl_add_ifmedia(vsi, hw->phy.phy_types); /* Use autoselect media by default */ ifmedia_add(&vsi->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&vsi->media, IFM_ETHER | IFM_AUTO); ether_ifattach(ifp, hw->mac.addr); return (0); } /* ** Run when the Admin Queue gets a link state change interrupt. */ void ixl_link_event(struct ixl_pf *pf, struct i40e_arq_event_info *e) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct i40e_aqc_get_link_status *status = (struct i40e_aqc_get_link_status *)&e->desc.params.raw; /* Request link status from adapter */ hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); /* Print out message if an unqualified module is found */ if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) && (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) && (!(status->link_info & I40E_AQ_LINK_UP))) device_printf(dev, "Link failed because " "an unqualified module was detected!\n"); /* Update OS link info */ ixl_update_link_status(pf); } /********************************************************************* * * Get Firmware Switch configuration * - this will need to be more robust when more complex * switch configurations are enabled. * **********************************************************************/ int ixl_switch_config(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = vsi->dev; struct i40e_aqc_get_switch_config_resp *sw_config; u8 aq_buf[I40E_AQ_LARGE_BUF]; int ret; u16 next = 0; memset(&aq_buf, 0, sizeof(aq_buf)); sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf; ret = i40e_aq_get_switch_config(hw, sw_config, sizeof(aq_buf), &next, NULL); if (ret) { device_printf(dev, "aq_get_switch_config() failed, error %d," " aq_error %d\n", ret, pf->hw.aq.asq_last_status); return (ret); } if (pf->dbg_mask & IXL_DBG_SWITCH_INFO) { device_printf(dev, "Switch config: header reported: %d in structure, %d total\n", sw_config->header.num_reported, sw_config->header.num_total); for (int i = 0; i < sw_config->header.num_reported; i++) { device_printf(dev, "-> %d: type=%d seid=%d uplink=%d downlink=%d\n", i, sw_config->element[i].element_type, sw_config->element[i].seid, sw_config->element[i].uplink_seid, sw_config->element[i].downlink_seid); } } /* Simplified due to a single VSI */ vsi->uplink_seid = sw_config->element[0].uplink_seid; vsi->downlink_seid = sw_config->element[0].downlink_seid; vsi->seid = sw_config->element[0].seid; return (ret); } /********************************************************************* * * Initialize the VSI: this handles contexts, which means things * like the number of descriptors, buffer size, * plus we init the rings thru this function. * **********************************************************************/ int ixl_initialize_vsi(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; struct ixl_queue *que = vsi->queues; device_t dev = vsi->dev; struct i40e_hw *hw = vsi->hw; struct i40e_vsi_context ctxt; int tc_queues; int err = 0; memset(&ctxt, 0, sizeof(ctxt)); ctxt.seid = vsi->seid; if (pf->veb_seid != 0) ctxt.uplink_seid = pf->veb_seid; ctxt.pf_num = hw->pf_id; err = i40e_aq_get_vsi_params(hw, &ctxt, NULL); if (err) { device_printf(dev, "i40e_aq_get_vsi_params() failed, error %d" " aq_error %d\n", err, hw->aq.asq_last_status); return (err); } ixl_dbg(pf, IXL_DBG_SWITCH_INFO, "get_vsi_params: seid: %d, uplinkseid: %d, vsi_number: %d, " "vsis_allocated: %d, vsis_unallocated: %d, flags: 0x%x, " "pfnum: %d, vfnum: %d, stat idx: %d, enabled: %d\n", ctxt.seid, ctxt.uplink_seid, ctxt.vsi_number, ctxt.vsis_allocated, ctxt.vsis_unallocated, ctxt.flags, ctxt.pf_num, ctxt.vf_num, ctxt.info.stat_counter_idx, ctxt.info.up_enable_bits); /* ** Set the queue and traffic class bits ** - when multiple traffic classes are supported ** this will need to be more robust. */ ctxt.info.valid_sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; ctxt.info.mapping_flags |= I40E_AQ_VSI_QUE_MAP_CONTIG; /* In contig mode, que_mapping[0] is first queue index used by this VSI */ ctxt.info.queue_mapping[0] = 0; /* * This VSI will only use traffic class 0; start traffic class 0's * queue allocation at queue 0, and assign it 2^tc_queues queues (though * the driver may not use all of them). */ tc_queues = bsrl(pf->qtag.num_allocated); ctxt.info.tc_mapping[0] = ((0 << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) & I40E_AQ_VSI_TC_QUE_OFFSET_MASK) | ((tc_queues << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT) & I40E_AQ_VSI_TC_QUE_NUMBER_MASK); /* Set VLAN receive stripping mode */ ctxt.info.valid_sections |= I40E_AQ_VSI_PROP_VLAN_VALID; ctxt.info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL; if (vsi->ifp->if_capenable & IFCAP_VLAN_HWTAGGING) ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH; else ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_EMOD_NOTHING; #ifdef IXL_IW /* Set TCP Enable for iWARP capable VSI */ if (ixl_enable_iwarp && pf->iw_enabled) { ctxt.info.valid_sections |= htole16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA; } #endif /* Save VSI number and info for use later */ vsi->vsi_num = ctxt.vsi_number; bcopy(&ctxt.info, &vsi->info, sizeof(vsi->info)); /* Reset VSI statistics */ ixl_vsi_reset_stats(vsi); vsi->hw_filters_add = 0; vsi->hw_filters_del = 0; ctxt.flags = htole16(I40E_AQ_VSI_TYPE_PF); err = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (err) { device_printf(dev, "i40e_aq_update_vsi_params() failed, error %d," " aq_error %d\n", err, hw->aq.asq_last_status); return (err); } for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; struct i40e_hmc_obj_txq tctx; struct i40e_hmc_obj_rxq rctx; u32 txctl; u16 size; /* Setup the HMC TX Context */ size = que->num_desc * sizeof(struct i40e_tx_desc); memset(&tctx, 0, sizeof(struct i40e_hmc_obj_txq)); tctx.new_context = 1; tctx.base = (txr->dma.pa/IXL_TX_CTX_BASE_UNITS); tctx.qlen = que->num_desc; tctx.fc_ena = 0; tctx.rdylist = vsi->info.qs_handle[0]; /* index is TC */ /* Enable HEAD writeback */ tctx.head_wb_ena = 1; tctx.head_wb_addr = txr->dma.pa + (que->num_desc * sizeof(struct i40e_tx_desc)); tctx.rdylist_act = 0; err = i40e_clear_lan_tx_queue_context(hw, i); if (err) { device_printf(dev, "Unable to clear TX context\n"); break; } err = i40e_set_lan_tx_queue_context(hw, i, &tctx); if (err) { device_printf(dev, "Unable to set TX context\n"); break; } /* Associate the ring with this PF */ txctl = I40E_QTX_CTL_PF_QUEUE; txctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) & I40E_QTX_CTL_PF_INDX_MASK); wr32(hw, I40E_QTX_CTL(i), txctl); ixl_flush(hw); /* Do ring (re)init */ ixl_init_tx_ring(que); /* Next setup the HMC RX Context */ if (vsi->max_frame_size <= MCLBYTES) rxr->mbuf_sz = MCLBYTES; else rxr->mbuf_sz = MJUMPAGESIZE; u16 max_rxmax = rxr->mbuf_sz * hw->func_caps.rx_buf_chain_len; /* Set up an RX context for the HMC */ memset(&rctx, 0, sizeof(struct i40e_hmc_obj_rxq)); rctx.dbuff = rxr->mbuf_sz >> I40E_RXQ_CTX_DBUFF_SHIFT; /* ignore header split for now */ rctx.hbuff = 0 >> I40E_RXQ_CTX_HBUFF_SHIFT; rctx.rxmax = (vsi->max_frame_size < max_rxmax) ? vsi->max_frame_size : max_rxmax; rctx.dtype = 0; rctx.dsize = 1; /* do 32byte descriptors */ rctx.hsplit_0 = 0; /* no HDR split initially */ rctx.base = (rxr->dma.pa/IXL_RX_CTX_BASE_UNITS); rctx.qlen = que->num_desc; rctx.tphrdesc_ena = 1; rctx.tphwdesc_ena = 1; rctx.tphdata_ena = 0; rctx.tphhead_ena = 0; rctx.lrxqthresh = 2; rctx.crcstrip = 1; rctx.l2tsel = 1; rctx.showiv = 1; rctx.fc_ena = 0; rctx.prefena = 1; err = i40e_clear_lan_rx_queue_context(hw, i); if (err) { device_printf(dev, "Unable to clear RX context %d\n", i); break; } err = i40e_set_lan_rx_queue_context(hw, i, &rctx); if (err) { device_printf(dev, "Unable to set RX context %d\n", i); break; } err = ixl_init_rx_ring(que); if (err) { device_printf(dev, "Fail in init_rx_ring %d\n", i); break; } #ifdef DEV_NETMAP /* preserve queue */ if (vsi->ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(vsi->ifp); struct netmap_kring *kring = &na->rx_rings[i]; int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); wr32(vsi->hw, I40E_QRX_TAIL(que->me), t); } else #endif /* DEV_NETMAP */ wr32(vsi->hw, I40E_QRX_TAIL(que->me), que->num_desc - 1); } return (err); } /********************************************************************* * * Free all VSI structs. * **********************************************************************/ void ixl_free_vsi(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct ixl_queue *que = vsi->queues; /* Free station queues */ if (!vsi->queues) goto free_filters; for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; if (!mtx_initialized(&txr->mtx)) /* uninitialized */ continue; IXL_TX_LOCK(txr); ixl_free_que_tx(que); if (txr->base) i40e_free_dma_mem(&pf->hw, &txr->dma); IXL_TX_UNLOCK(txr); IXL_TX_LOCK_DESTROY(txr); if (!mtx_initialized(&rxr->mtx)) /* uninitialized */ continue; IXL_RX_LOCK(rxr); ixl_free_que_rx(que); if (rxr->base) i40e_free_dma_mem(&pf->hw, &rxr->dma); IXL_RX_UNLOCK(rxr); IXL_RX_LOCK_DESTROY(rxr); } free(vsi->queues, M_DEVBUF); free_filters: /* Free VSI filter list */ ixl_free_mac_filters(vsi); } void ixl_free_mac_filters(struct ixl_vsi *vsi) { struct ixl_mac_filter *f; while (!SLIST_EMPTY(&vsi->ftl)) { f = SLIST_FIRST(&vsi->ftl); SLIST_REMOVE_HEAD(&vsi->ftl, next); free(f, M_DEVBUF); } } /* * Fill out fields in queue struct and setup tx/rx memory and structs */ static int ixl_setup_queue(struct ixl_queue *que, struct ixl_pf *pf, int index) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; int error = 0; int rsize, tsize; que->num_desc = pf->ringsz; que->me = index; que->vsi = vsi; txr->que = que; txr->tail = I40E_QTX_TAIL(que->me); /* Initialize the TX lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), que->me); mtx_init(&txr->mtx, txr->mtx_name, NULL, MTX_DEF); /* Create the TX descriptor ring */ tsize = roundup2((que->num_desc * sizeof(struct i40e_tx_desc)) + sizeof(u32), DBA_ALIGN); if (i40e_allocate_dma_mem(hw, &txr->dma, i40e_mem_reserved, tsize, DBA_ALIGN)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto fail; } txr->base = (struct i40e_tx_desc *)txr->dma.va; bzero((void *)txr->base, tsize); /* Now allocate transmit soft structs for the ring */ if (ixl_allocate_tx_data(que)) { device_printf(dev, "Critical Failure setting up TX structures\n"); error = ENOMEM; goto fail; } /* Allocate a buf ring */ txr->br = buf_ring_alloc(DEFAULT_TXBRSZ, M_DEVBUF, M_NOWAIT, &txr->mtx); if (txr->br == NULL) { device_printf(dev, "Critical Failure setting up TX buf ring\n"); error = ENOMEM; goto fail; } rsize = roundup2(que->num_desc * sizeof(union i40e_rx_desc), DBA_ALIGN); rxr->que = que; rxr->tail = I40E_QRX_TAIL(que->me); /* Initialize the RX side lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), que->me); mtx_init(&rxr->mtx, rxr->mtx_name, NULL, MTX_DEF); if (i40e_allocate_dma_mem(hw, &rxr->dma, i40e_mem_reserved, rsize, 4096)) { device_printf(dev, "Unable to allocate RX Descriptor memory\n"); error = ENOMEM; goto fail; } rxr->base = (union i40e_rx_desc *)rxr->dma.va; bzero((void *)rxr->base, rsize); /* Allocate receive soft structs for the ring*/ if (ixl_allocate_rx_data(que)) { device_printf(dev, "Critical Failure setting up receive structs\n"); error = ENOMEM; goto fail; } return (0); fail: if (rxr->base) i40e_free_dma_mem(&pf->hw, &rxr->dma); if (mtx_initialized(&rxr->mtx)) mtx_destroy(&rxr->mtx); if (txr->br) { buf_ring_free(txr->br, M_DEVBUF); txr->br = NULL; } if (txr->base) i40e_free_dma_mem(&pf->hw, &txr->dma); if (mtx_initialized(&txr->mtx)) mtx_destroy(&txr->mtx); return (error); } /********************************************************************* * * Allocate memory for the VSI (virtual station interface) and their * associated queues, rings and the descriptors associated with each, * called only once at attach. * **********************************************************************/ int ixl_setup_stations(struct ixl_pf *pf) { device_t dev = pf->dev; struct ixl_vsi *vsi; struct ixl_queue *que; int error = 0; vsi = &pf->vsi; vsi->back = (void *)pf; vsi->hw = &pf->hw; vsi->id = 0; vsi->num_vlans = 0; vsi->back = pf; /* Get memory for the station queues */ if (!(vsi->queues = - (struct ixl_queue *) malloc(sizeof(struct ixl_queue) * - vsi->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { + (struct ixl_queue *) mallocarray(vsi->num_queues, + sizeof(struct ixl_queue), M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate queue memory\n"); error = ENOMEM; return (error); } /* Then setup each queue */ for (int i = 0; i < vsi->num_queues; i++) { que = &vsi->queues[i]; error = ixl_setup_queue(que, pf, i); if (error) return (error); } return (0); } /* ** Provide a update to the queue RX ** interrupt moderation value. */ void ixl_set_queue_rx_itr(struct ixl_queue *que) { struct ixl_vsi *vsi = que->vsi; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = vsi->hw; struct rx_ring *rxr = &que->rxr; u16 rx_itr; u16 rx_latency = 0; int rx_bytes; /* Idle, do nothing */ if (rxr->bytes == 0) return; if (pf->dynamic_rx_itr) { rx_bytes = rxr->bytes/rxr->itr; rx_itr = rxr->itr; /* Adjust latency range */ switch (rxr->latency) { case IXL_LOW_LATENCY: if (rx_bytes > 10) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (rx_bytes > 20) { rx_latency = IXL_BULK_LATENCY; rx_itr = IXL_ITR_8K; } else if (rx_bytes <= 10) { rx_latency = IXL_LOW_LATENCY; rx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (rx_bytes <= 20) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; } rxr->latency = rx_latency; if (rx_itr != rxr->itr) { /* do an exponential smoothing */ rx_itr = (10 * rx_itr * rxr->itr) / ((9 * rx_itr) + rxr->itr); rxr->itr = min(rx_itr, IXL_MAX_ITR); wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, que->me), rxr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->rx_itr_setting & IXL_ITR_DYNAMIC) vsi->rx_itr_setting = pf->rx_itr; /* Update the hardware if needed */ if (rxr->itr != vsi->rx_itr_setting) { rxr->itr = vsi->rx_itr_setting; wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, que->me), rxr->itr); } } rxr->bytes = 0; rxr->packets = 0; return; } /* ** Provide a update to the queue TX ** interrupt moderation value. */ void ixl_set_queue_tx_itr(struct ixl_queue *que) { struct ixl_vsi *vsi = que->vsi; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; u16 tx_itr; u16 tx_latency = 0; int tx_bytes; /* Idle, do nothing */ if (txr->bytes == 0) return; if (pf->dynamic_tx_itr) { tx_bytes = txr->bytes/txr->itr; tx_itr = txr->itr; switch (txr->latency) { case IXL_LOW_LATENCY: if (tx_bytes > 10) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (tx_bytes > 20) { tx_latency = IXL_BULK_LATENCY; tx_itr = IXL_ITR_8K; } else if (tx_bytes <= 10) { tx_latency = IXL_LOW_LATENCY; tx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (tx_bytes <= 20) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; } txr->latency = tx_latency; if (tx_itr != txr->itr) { /* do an exponential smoothing */ tx_itr = (10 * tx_itr * txr->itr) / ((9 * tx_itr) + txr->itr); txr->itr = min(tx_itr, IXL_MAX_ITR); wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, que->me), txr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->tx_itr_setting & IXL_ITR_DYNAMIC) vsi->tx_itr_setting = pf->tx_itr; /* Update the hardware if needed */ if (txr->itr != vsi->tx_itr_setting) { txr->itr = vsi->tx_itr_setting; wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, que->me), txr->itr); } } txr->bytes = 0; txr->packets = 0; return; } void ixl_add_vsi_sysctls(struct ixl_pf *pf, struct ixl_vsi *vsi, struct sysctl_ctx_list *ctx, const char *sysctl_name) { struct sysctl_oid *tree; struct sysctl_oid_list *child; struct sysctl_oid_list *vsi_list; tree = device_get_sysctl_tree(pf->dev); child = SYSCTL_CHILDREN(tree); vsi->vsi_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, sysctl_name, CTLFLAG_RD, NULL, "VSI Number"); vsi_list = SYSCTL_CHILDREN(vsi->vsi_node); ixl_add_sysctls_eth_stats(ctx, vsi_list, &vsi->eth_stats); } #ifdef IXL_DEBUG /** * ixl_sysctl_qtx_tail_handler * Retrieves I40E_QTX_TAIL value from hardware * for a sysctl. */ static int ixl_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS) { struct ixl_queue *que; int error; u32 val; que = ((struct ixl_queue *)oidp->oid_arg1); if (!que) return 0; val = rd32(que->vsi->hw, que->txr.tail); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /** * ixl_sysctl_qrx_tail_handler * Retrieves I40E_QRX_TAIL value from hardware * for a sysctl. */ static int ixl_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS) { struct ixl_queue *que; int error; u32 val; que = ((struct ixl_queue *)oidp->oid_arg1); if (!que) return 0; val = rd32(que->vsi->hw, que->rxr.tail); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } #endif /* * Used to set the Tx ITR value for all of the PF LAN VSI's queues. * Writes to the ITR registers immediately. */ static int ixl_sysctl_pf_tx_itr(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int error = 0; int requested_tx_itr; requested_tx_itr = pf->tx_itr; error = sysctl_handle_int(oidp, &requested_tx_itr, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (pf->dynamic_tx_itr) { device_printf(dev, "Cannot set TX itr value while dynamic TX itr is enabled\n"); return (EINVAL); } if (requested_tx_itr < 0 || requested_tx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid TX itr value; value must be between 0 and %d\n", IXL_MAX_ITR); return (EINVAL); } pf->tx_itr = requested_tx_itr; ixl_configure_tx_itr(pf); return (error); } /* * Used to set the Rx ITR value for all of the PF LAN VSI's queues. * Writes to the ITR registers immediately. */ static int ixl_sysctl_pf_rx_itr(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int error = 0; int requested_rx_itr; requested_rx_itr = pf->rx_itr; error = sysctl_handle_int(oidp, &requested_rx_itr, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (pf->dynamic_rx_itr) { device_printf(dev, "Cannot set RX itr value while dynamic RX itr is enabled\n"); return (EINVAL); } if (requested_rx_itr < 0 || requested_rx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid RX itr value; value must be between 0 and %d\n", IXL_MAX_ITR); return (EINVAL); } pf->rx_itr = requested_rx_itr; ixl_configure_rx_itr(pf); return (error); } void ixl_add_hw_stats(struct ixl_pf *pf) { device_t dev = pf->dev; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *queues = vsi->queues; struct i40e_hw_port_stats *pf_stats = &pf->stats; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct sysctl_oid_list *vsi_list; struct sysctl_oid *queue_node; struct sysctl_oid_list *queue_list; struct tx_ring *txr; struct rx_ring *rxr; char queue_namebuf[QUEUE_NAME_LEN]; /* Driver statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &pf->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "admin_irq", CTLFLAG_RD, &pf->admin_irq, "Admin Queue IRQ Handled"); ixl_add_vsi_sysctls(pf, &pf->vsi, ctx, "pf"); vsi_list = SYSCTL_CHILDREN(pf->vsi.vsi_node); /* Queue statistics */ for (int q = 0; q < vsi->num_queues; q++) { snprintf(queue_namebuf, QUEUE_NAME_LEN, "que%d", q); queue_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, queue_namebuf, CTLFLAG_RD, NULL, "Queue #"); queue_list = SYSCTL_CHILDREN(queue_node); txr = &(queues[q].txr); rxr = &(queues[q].rxr); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &(queues[q].mbuf_defrag_failed), "m_defrag() failed"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", CTLFLAG_RD, &(queues[q].irqs), "irqs on this queue"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso_tx", CTLFLAG_RD, &(queues[q].tso), "TSO"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_dmamap_failed", CTLFLAG_RD, &(queues[q].tx_dmamap_failed), "Driver tx dma failure in xmit"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mss_too_small", CTLFLAG_RD, &(queues[q].mss_too_small), "TSO sends with an MSS less than 64"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &(txr->no_desc), "Queue No Descriptor Available"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &(txr->total_packets), "Queue Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, &(txr->tx_bytes), "Queue Bytes Transmitted"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &(rxr->rx_packets), "Queue Packets Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &(rxr->rx_bytes), "Queue Bytes Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_desc_err", CTLFLAG_RD, &(rxr->desc_errs), "Queue Rx Descriptor Errors"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_itr", CTLFLAG_RD, &(rxr->itr), 0, "Queue Rx ITR Interval"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "tx_itr", CTLFLAG_RD, &(txr->itr), 0, "Queue Tx ITR Interval"); #ifdef IXL_DEBUG SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_not_done", CTLFLAG_RD, &(rxr->not_done), "Queue Rx Descriptors not Done"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_next_refresh", CTLFLAG_RD, &(rxr->next_refresh), 0, "Queue Rx Descriptors not Done"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_next_check", CTLFLAG_RD, &(rxr->next_check), 0, "Queue Rx Descriptors not Done"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "qtx_tail", CTLTYPE_UINT | CTLFLAG_RD, &queues[q], sizeof(struct ixl_queue), ixl_sysctl_qtx_tail_handler, "IU", "Queue Transmit Descriptor Tail"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "qrx_tail", CTLTYPE_UINT | CTLFLAG_RD, &queues[q], sizeof(struct ixl_queue), ixl_sysctl_qrx_tail_handler, "IU", "Queue Receive Descriptor Tail"); #endif } /* MAC stats */ ixl_add_sysctls_mac_stats(ctx, child, pf_stats); } void ixl_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct i40e_eth_stats *eth_stats) { struct ixl_sysctl_info ctls[] = { {ð_stats->rx_bytes, "good_octets_rcvd", "Good Octets Received"}, {ð_stats->rx_unicast, "ucast_pkts_rcvd", "Unicast Packets Received"}, {ð_stats->rx_multicast, "mcast_pkts_rcvd", "Multicast Packets Received"}, {ð_stats->rx_broadcast, "bcast_pkts_rcvd", "Broadcast Packets Received"}, {ð_stats->rx_discards, "rx_discards", "Discarded RX packets"}, {ð_stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted"}, {ð_stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted"}, {ð_stats->tx_multicast, "mcast_pkts_txd", "Multicast Packets Transmitted"}, {ð_stats->tx_broadcast, "bcast_pkts_txd", "Broadcast Packets Transmitted"}, // end {0,0,0} }; struct ixl_sysctl_info *entry = ctls; while (entry->stat != 0) { SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, entry->name, CTLFLAG_RD, entry->stat, entry->description); entry++; } } void ixl_add_sysctls_mac_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct i40e_hw_port_stats *stats) { struct sysctl_oid *stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac", CTLFLAG_RD, NULL, "Mac Statistics"); struct sysctl_oid_list *stat_list = SYSCTL_CHILDREN(stat_node); struct i40e_eth_stats *eth_stats = &stats->eth; ixl_add_sysctls_eth_stats(ctx, stat_list, eth_stats); struct ixl_sysctl_info ctls[] = { {&stats->crc_errors, "crc_errors", "CRC Errors"}, {&stats->illegal_bytes, "illegal_bytes", "Illegal Byte Errors"}, {&stats->mac_local_faults, "local_faults", "MAC Local Faults"}, {&stats->mac_remote_faults, "remote_faults", "MAC Remote Faults"}, {&stats->rx_length_errors, "rx_length_errors", "Receive Length Errors"}, /* Packet Reception Stats */ {&stats->rx_size_64, "rx_frames_64", "64 byte frames received"}, {&stats->rx_size_127, "rx_frames_65_127", "65-127 byte frames received"}, {&stats->rx_size_255, "rx_frames_128_255", "128-255 byte frames received"}, {&stats->rx_size_511, "rx_frames_256_511", "256-511 byte frames received"}, {&stats->rx_size_1023, "rx_frames_512_1023", "512-1023 byte frames received"}, {&stats->rx_size_1522, "rx_frames_1024_1522", "1024-1522 byte frames received"}, {&stats->rx_size_big, "rx_frames_big", "1523-9522 byte frames received"}, {&stats->rx_undersize, "rx_undersize", "Undersized packets received"}, {&stats->rx_fragments, "rx_fragmented", "Fragmented packets received"}, {&stats->rx_oversize, "rx_oversized", "Oversized packets received"}, {&stats->rx_jabber, "rx_jabber", "Received Jabber"}, {&stats->checksum_error, "checksum_errors", "Checksum Errors"}, /* Packet Transmission Stats */ {&stats->tx_size_64, "tx_frames_64", "64 byte frames transmitted"}, {&stats->tx_size_127, "tx_frames_65_127", "65-127 byte frames transmitted"}, {&stats->tx_size_255, "tx_frames_128_255", "128-255 byte frames transmitted"}, {&stats->tx_size_511, "tx_frames_256_511", "256-511 byte frames transmitted"}, {&stats->tx_size_1023, "tx_frames_512_1023", "512-1023 byte frames transmitted"}, {&stats->tx_size_1522, "tx_frames_1024_1522", "1024-1522 byte frames transmitted"}, {&stats->tx_size_big, "tx_frames_big", "1523-9522 byte frames transmitted"}, /* Flow control */ {&stats->link_xon_tx, "xon_txd", "Link XON transmitted"}, {&stats->link_xon_rx, "xon_recvd", "Link XON received"}, {&stats->link_xoff_tx, "xoff_txd", "Link XOFF transmitted"}, {&stats->link_xoff_rx, "xoff_recvd", "Link XOFF received"}, /* End */ {0,0,0} }; struct ixl_sysctl_info *entry = ctls; while (entry->stat != 0) { SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, entry->name, CTLFLAG_RD, entry->stat, entry->description); entry++; } } void ixl_set_rss_key(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = pf->dev; enum i40e_status_code status; #ifdef RSS u32 rss_seed[IXL_RSS_KEY_SIZE_REG]; #else u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687, 0x183cfd8c, 0xce880440, 0x580cbc3c, 0x35897377, 0x328b25e1, 0x4fa98922, 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1, 0x0, 0x0, 0x0}; #endif #ifdef RSS /* Fetch the configured RSS key */ rss_getkey((uint8_t *) &rss_seed); #endif /* Fill out hash function seed */ if (hw->mac.type == I40E_MAC_X722) { struct i40e_aqc_get_set_rss_key_data key_data; bcopy(rss_seed, key_data.standard_rss_key, 40); status = i40e_aq_set_rss_key(hw, vsi->vsi_num, &key_data); if (status) device_printf(dev, "i40e_aq_set_rss_key status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { for (int i = 0; i < IXL_RSS_KEY_SIZE_REG; i++) i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), rss_seed[i]); } } /* * Configure enabled PCTYPES for RSS. */ void ixl_set_rss_pctypes(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; u64 set_hena = 0, hena; #ifdef RSS u32 rss_hash_config; rss_hash_config = rss_gethashconfig(); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP); #else if (hw->mac.type == I40E_MAC_X722) set_hena = IXL_DEFAULT_RSS_HENA_X722; else set_hena = IXL_DEFAULT_RSS_HENA_XL710; #endif hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); hena |= set_hena; i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); } void ixl_set_rss_hlut(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct ixl_vsi *vsi = &pf->vsi; int i, que_id; int lut_entry_width; u32 lut = 0; enum i40e_status_code status; if (hw->mac.type == I40E_MAC_X722) lut_entry_width = 7; else lut_entry_width = pf->hw.func_caps.rss_table_entry_width; /* Populate the LUT with max no. of queues in round robin fashion */ u8 hlut_buf[512]; for (i = 0; i < pf->hw.func_caps.rss_table_size; i++) { #ifdef RSS /* * Fetch the RSS bucket id for the given indirection entry. * Cap it at the number of configured buckets (which is * num_queues.) */ que_id = rss_get_indirection_to_bucket(i); que_id = que_id % vsi->num_queues; #else que_id = i % vsi->num_queues; #endif lut = (que_id & ((0x1 << lut_entry_width) - 1)); hlut_buf[i] = lut; } if (hw->mac.type == I40E_MAC_X722) { status = i40e_aq_set_rss_lut(hw, vsi->vsi_num, TRUE, hlut_buf, sizeof(hlut_buf)); if (status) device_printf(dev, "i40e_aq_set_rss_lut status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { for (i = 0; i < pf->hw.func_caps.rss_table_size >> 2; i++) wr32(hw, I40E_PFQF_HLUT(i), ((u32 *)hlut_buf)[i]); ixl_flush(hw); } } /* ** Setup the PF's RSS parameters. */ void ixl_config_rss(struct ixl_pf *pf) { ixl_set_rss_key(pf); ixl_set_rss_pctypes(pf); ixl_set_rss_hlut(pf); } /* ** This routine is run via an vlan config EVENT, ** it enables us to use the HW Filter table since ** we can get the vlan id. This just creates the ** entry in the soft version of the VFTA, init will ** repopulate the real table. */ void ixl_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct ixl_vsi *vsi = ifp->if_softc; struct i40e_hw *hw = vsi->hw; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXL_PF_LOCK(pf); ++vsi->num_vlans; ixl_add_filter(vsi, hw->mac.addr, vtag); IXL_PF_UNLOCK(pf); } /* ** This routine is run via an vlan ** unconfig EVENT, remove our entry ** in the soft vfta. */ void ixl_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct ixl_vsi *vsi = ifp->if_softc; struct i40e_hw *hw = vsi->hw; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXL_PF_LOCK(pf); --vsi->num_vlans; ixl_del_filter(vsi, hw->mac.addr, vtag); IXL_PF_UNLOCK(pf); } /* ** This routine updates vlan filters, called by init ** it scans the filter table and then updates the hw ** after a soft reset. */ void ixl_setup_vlan_filters(struct ixl_vsi *vsi) { struct ixl_mac_filter *f; int cnt = 0, flags; if (vsi->num_vlans == 0) return; /* ** Scan the filter list for vlan entries, ** mark them for addition and then call ** for the AQ update. */ SLIST_FOREACH(f, &vsi->ftl, next) { if (f->flags & IXL_FILTER_VLAN) { f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); cnt++; } } if (cnt == 0) { printf("setup vlan: no filters found!\n"); return; } flags = IXL_FILTER_VLAN; flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); ixl_add_hw_filters(vsi, flags, cnt); return; } /* ** Initialize filter list and add filters that the hardware ** needs to know about. ** ** Requires VSI's filter list & seid to be set before calling. */ void ixl_init_filters(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; /* Add broadcast address */ ixl_add_filter(vsi, ixl_bcast_addr, IXL_VLAN_ANY); /* * Prevent Tx flow control frames from being sent out by * non-firmware transmitters. * This affects every VSI in the PF. */ if (pf->enable_tx_fc_filter) i40e_add_filter_to_drop_tx_flow_control_frames(vsi->hw, vsi->seid); } /* ** This routine adds mulicast filters */ void ixl_add_mc_filter(struct ixl_vsi *vsi, u8 *macaddr) { struct ixl_mac_filter *f; /* Does one already exist */ f = ixl_find_filter(vsi, macaddr, IXL_VLAN_ANY); if (f != NULL) return; f = ixl_get_filter(vsi); if (f == NULL) { printf("WARNING: no filter available!!\n"); return; } bcopy(macaddr, f->macaddr, ETHER_ADDR_LEN); f->vlan = IXL_VLAN_ANY; f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED | IXL_FILTER_MC); return; } void ixl_reconfigure_filters(struct ixl_vsi *vsi) { ixl_add_hw_filters(vsi, IXL_FILTER_USED, vsi->num_macs); } /* ** This routine adds macvlan filters */ void ixl_add_filter(struct ixl_vsi *vsi, const u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f, *tmp; struct ixl_pf *pf; device_t dev; DEBUGOUT("ixl_add_filter: begin"); pf = vsi->back; dev = pf->dev; /* Does one already exist */ f = ixl_find_filter(vsi, macaddr, vlan); if (f != NULL) return; /* ** Is this the first vlan being registered, if so we ** need to remove the ANY filter that indicates we are ** not in a vlan, and replace that with a 0 filter. */ if ((vlan != IXL_VLAN_ANY) && (vsi->num_vlans == 1)) { tmp = ixl_find_filter(vsi, macaddr, IXL_VLAN_ANY); if (tmp != NULL) { ixl_del_filter(vsi, macaddr, IXL_VLAN_ANY); ixl_add_filter(vsi, macaddr, 0); } } f = ixl_get_filter(vsi); if (f == NULL) { device_printf(dev, "WARNING: no filter available!!\n"); return; } bcopy(macaddr, f->macaddr, ETHER_ADDR_LEN); f->vlan = vlan; f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); if (f->vlan != IXL_VLAN_ANY) f->flags |= IXL_FILTER_VLAN; else vsi->num_macs++; ixl_add_hw_filters(vsi, f->flags, 1); return; } void ixl_del_filter(struct ixl_vsi *vsi, const u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f; f = ixl_find_filter(vsi, macaddr, vlan); if (f == NULL) return; f->flags |= IXL_FILTER_DEL; ixl_del_hw_filters(vsi, 1); vsi->num_macs--; /* Check if this is the last vlan removal */ if (vlan != IXL_VLAN_ANY && vsi->num_vlans == 0) { /* Switch back to a non-vlan filter */ ixl_del_filter(vsi, macaddr, 0); ixl_add_filter(vsi, macaddr, IXL_VLAN_ANY); } return; } /* ** Find the filter with both matching mac addr and vlan id */ struct ixl_mac_filter * ixl_find_filter(struct ixl_vsi *vsi, const u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f; bool match = FALSE; SLIST_FOREACH(f, &vsi->ftl, next) { if (!cmp_etheraddr(f->macaddr, macaddr)) continue; if (f->vlan == vlan) { match = TRUE; break; } } if (!match) f = NULL; return (f); } /* ** This routine takes additions to the vsi filter ** table and creates an Admin Queue call to create ** the filters in the hardware. */ void ixl_add_hw_filters(struct ixl_vsi *vsi, int flags, int cnt) { struct i40e_aqc_add_macvlan_element_data *a, *b; struct ixl_mac_filter *f; struct ixl_pf *pf; struct i40e_hw *hw; device_t dev; int err, j = 0; pf = vsi->back; dev = pf->dev; hw = &pf->hw; IXL_PF_LOCK_ASSERT(pf); - a = malloc(sizeof(struct i40e_aqc_add_macvlan_element_data) * cnt, + a = mallocarray(cnt, sizeof(struct i40e_aqc_add_macvlan_element_data), M_DEVBUF, M_NOWAIT | M_ZERO); if (a == NULL) { device_printf(dev, "add_hw_filters failed to get memory\n"); return; } /* ** Scan the filter list, each time we find one ** we add it to the admin queue array and turn off ** the add bit. */ SLIST_FOREACH(f, &vsi->ftl, next) { if (f->flags == flags) { b = &a[j]; // a pox on fvl long names :) bcopy(f->macaddr, b->mac_addr, ETHER_ADDR_LEN); if (f->vlan == IXL_VLAN_ANY) { b->vlan_tag = 0; b->flags = I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; } else { b->vlan_tag = f->vlan; b->flags = 0; } b->flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH; f->flags &= ~IXL_FILTER_ADD; j++; } if (j == cnt) break; } if (j > 0) { err = i40e_aq_add_macvlan(hw, vsi->seid, a, j, NULL); if (err) device_printf(dev, "aq_add_macvlan err %d, " "aq_error %d\n", err, hw->aq.asq_last_status); else vsi->hw_filters_add += j; } free(a, M_DEVBUF); return; } /* ** This routine takes removals in the vsi filter ** table and creates an Admin Queue call to delete ** the filters in the hardware. */ void ixl_del_hw_filters(struct ixl_vsi *vsi, int cnt) { struct i40e_aqc_remove_macvlan_element_data *d, *e; struct ixl_pf *pf; struct i40e_hw *hw; device_t dev; struct ixl_mac_filter *f, *f_temp; int err, j = 0; DEBUGOUT("ixl_del_hw_filters: begin\n"); pf = vsi->back; hw = &pf->hw; dev = pf->dev; - d = malloc(sizeof(struct i40e_aqc_remove_macvlan_element_data) * cnt, + d = mallocarray(cnt, + sizeof(struct i40e_aqc_remove_macvlan_element_data), M_DEVBUF, M_NOWAIT | M_ZERO); if (d == NULL) { printf("del hw filter failed to get memory\n"); return; } SLIST_FOREACH_SAFE(f, &vsi->ftl, next, f_temp) { if (f->flags & IXL_FILTER_DEL) { e = &d[j]; // a pox on fvl long names :) bcopy(f->macaddr, e->mac_addr, ETHER_ADDR_LEN); e->vlan_tag = (f->vlan == IXL_VLAN_ANY ? 0 : f->vlan); e->flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; /* delete entry from vsi list */ SLIST_REMOVE(&vsi->ftl, f, ixl_mac_filter, next); free(f, M_DEVBUF); j++; } if (j == cnt) break; } if (j > 0) { err = i40e_aq_remove_macvlan(hw, vsi->seid, d, j, NULL); if (err && hw->aq.asq_last_status != I40E_AQ_RC_ENOENT) { int sc = 0; for (int i = 0; i < j; i++) sc += (!d[i].error_code); vsi->hw_filters_del += sc; device_printf(dev, "Failed to remove %d/%d filters, aq error %d\n", j - sc, j, hw->aq.asq_last_status); } else vsi->hw_filters_del += j; } free(d, M_DEVBUF); DEBUGOUT("ixl_del_hw_filters: end\n"); return; } int ixl_enable_tx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); ixl_dbg(pf, IXL_DBG_EN_DIS, "Enabling PF TX ring %4d / VSI TX ring %4d...\n", pf_qidx, vsi_qidx); i40e_pre_tx_queue_cfg(hw, pf_qidx, TRUE); reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); reg |= I40E_QTX_ENA_QENA_REQ_MASK | I40E_QTX_ENA_QENA_STAT_MASK; wr32(hw, I40E_QTX_ENA(pf_qidx), reg); /* Verify the enable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); if (reg & I40E_QTX_ENA_QENA_STAT_MASK) break; i40e_msec_delay(10); } if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) == 0) { device_printf(pf->dev, "TX queue %d still disabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_enable_rx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); ixl_dbg(pf, IXL_DBG_EN_DIS, "Enabling PF RX ring %4d / VSI RX ring %4d...\n", pf_qidx, vsi_qidx); reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); reg |= I40E_QRX_ENA_QENA_REQ_MASK | I40E_QRX_ENA_QENA_STAT_MASK; wr32(hw, I40E_QRX_ENA(pf_qidx), reg); /* Verify the enable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); if (reg & I40E_QRX_ENA_QENA_STAT_MASK) break; i40e_msec_delay(10); } if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) == 0) { device_printf(pf->dev, "RX queue %d still disabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_enable_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { int error = 0; error = ixl_enable_tx_ring(pf, qtag, vsi_qidx); /* Called function already prints error message */ if (error) return (error); error = ixl_enable_rx_ring(pf, qtag, vsi_qidx); return (error); } /* For PF VSI only */ int ixl_enable_rings(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; int error = 0; for (int i = 0; i < vsi->num_queues; i++) { error = ixl_enable_ring(pf, &pf->qtag, i); if (error) return (error); } return (error); } int ixl_disable_tx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); i40e_pre_tx_queue_cfg(hw, pf_qidx, FALSE); i40e_usec_delay(500); reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); reg &= ~I40E_QTX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QTX_ENA(pf_qidx), reg); /* Verify the disable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); if (!(reg & I40E_QTX_ENA_QENA_STAT_MASK)) break; i40e_msec_delay(10); } if (reg & I40E_QTX_ENA_QENA_STAT_MASK) { device_printf(pf->dev, "TX queue %d still enabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_disable_rx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); reg &= ~I40E_QRX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QRX_ENA(pf_qidx), reg); /* Verify the disable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); if (!(reg & I40E_QRX_ENA_QENA_STAT_MASK)) break; i40e_msec_delay(10); } if (reg & I40E_QRX_ENA_QENA_STAT_MASK) { device_printf(pf->dev, "RX queue %d still enabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_disable_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { int error = 0; error = ixl_disable_tx_ring(pf, qtag, vsi_qidx); /* Called function already prints error message */ if (error) return (error); error = ixl_disable_rx_ring(pf, qtag, vsi_qidx); return (error); } /* For PF VSI only */ int ixl_disable_rings(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; int error = 0; for (int i = 0; i < vsi->num_queues; i++) { error = ixl_disable_ring(pf, &pf->qtag, i); if (error) return (error); } return (error); } /** * ixl_handle_mdd_event * * Called from interrupt handler to identify possibly malicious vfs * (But also detects events from the PF, as well) **/ void ixl_handle_mdd_event(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; bool mdd_detected = false; bool pf_mdd_detected = false; u32 reg; /* find what triggered the MDD event */ reg = rd32(hw, I40E_GL_MDET_TX); if (reg & I40E_GL_MDET_TX_VALID_MASK) { u8 pf_num = (reg & I40E_GL_MDET_TX_PF_NUM_MASK) >> I40E_GL_MDET_TX_PF_NUM_SHIFT; u8 event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >> I40E_GL_MDET_TX_EVENT_SHIFT; u16 queue = (reg & I40E_GL_MDET_TX_QUEUE_MASK) >> I40E_GL_MDET_TX_QUEUE_SHIFT; device_printf(dev, "Malicious Driver Detection event %d" " on TX queue %d, pf number %d\n", event, queue, pf_num); wr32(hw, I40E_GL_MDET_TX, 0xffffffff); mdd_detected = true; } reg = rd32(hw, I40E_GL_MDET_RX); if (reg & I40E_GL_MDET_RX_VALID_MASK) { u8 pf_num = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >> I40E_GL_MDET_RX_FUNCTION_SHIFT; u8 event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >> I40E_GL_MDET_RX_EVENT_SHIFT; u16 queue = (reg & I40E_GL_MDET_RX_QUEUE_MASK) >> I40E_GL_MDET_RX_QUEUE_SHIFT; device_printf(dev, "Malicious Driver Detection event %d" " on RX queue %d, pf number %d\n", event, queue, pf_num); wr32(hw, I40E_GL_MDET_RX, 0xffffffff); mdd_detected = true; } if (mdd_detected) { reg = rd32(hw, I40E_PF_MDET_TX); if (reg & I40E_PF_MDET_TX_VALID_MASK) { wr32(hw, I40E_PF_MDET_TX, 0xFFFF); device_printf(dev, "MDD TX event is for this function!"); pf_mdd_detected = true; } reg = rd32(hw, I40E_PF_MDET_RX); if (reg & I40E_PF_MDET_RX_VALID_MASK) { wr32(hw, I40E_PF_MDET_RX, 0xFFFF); device_printf(dev, "MDD RX event is for this function!"); pf_mdd_detected = true; } } /* re-enable mdd interrupt cause */ reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); ixl_flush(hw); } void ixl_enable_intr(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; if (pf->msix > 1) { for (int i = 0; i < vsi->num_queues; i++, que++) ixl_enable_queue(hw, que->me); } else ixl_enable_intr0(hw); } void ixl_disable_rings_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; for (int i = 0; i < vsi->num_queues; i++, que++) ixl_disable_queue(hw, que->me); } void ixl_enable_intr0(struct i40e_hw *hw) { u32 reg; /* Use IXL_ITR_NONE so ITR isn't updated here */ reg = I40E_PFINT_DYN_CTL0_INTENA_MASK | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTL0, reg); } void ixl_disable_intr0(struct i40e_hw *hw) { u32 reg; reg = IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT; wr32(hw, I40E_PFINT_DYN_CTL0, reg); ixl_flush(hw); } void ixl_enable_queue(struct i40e_hw *hw, int id) { u32 reg; reg = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTLN(id), reg); } void ixl_disable_queue(struct i40e_hw *hw, int id) { u32 reg; reg = IXL_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT; wr32(hw, I40E_PFINT_DYN_CTLN(id), reg); } void ixl_update_stats_counters(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_vf *vf; struct i40e_hw_port_stats *nsd = &pf->stats; struct i40e_hw_port_stats *osd = &pf->stats_offsets; /* Update hw stats */ ixl_stat_update32(hw, I40E_GLPRT_CRCERRS(hw->port), pf->stat_offsets_loaded, &osd->crc_errors, &nsd->crc_errors); ixl_stat_update32(hw, I40E_GLPRT_ILLERRC(hw->port), pf->stat_offsets_loaded, &osd->illegal_bytes, &nsd->illegal_bytes); ixl_stat_update48(hw, I40E_GLPRT_GORCH(hw->port), I40E_GLPRT_GORCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_bytes, &nsd->eth.rx_bytes); ixl_stat_update48(hw, I40E_GLPRT_GOTCH(hw->port), I40E_GLPRT_GOTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_bytes, &nsd->eth.tx_bytes); ixl_stat_update32(hw, I40E_GLPRT_RDPC(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_discards, &nsd->eth.rx_discards); ixl_stat_update48(hw, I40E_GLPRT_UPRCH(hw->port), I40E_GLPRT_UPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_unicast, &nsd->eth.rx_unicast); ixl_stat_update48(hw, I40E_GLPRT_UPTCH(hw->port), I40E_GLPRT_UPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_unicast, &nsd->eth.tx_unicast); ixl_stat_update48(hw, I40E_GLPRT_MPRCH(hw->port), I40E_GLPRT_MPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_multicast, &nsd->eth.rx_multicast); ixl_stat_update48(hw, I40E_GLPRT_MPTCH(hw->port), I40E_GLPRT_MPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_multicast, &nsd->eth.tx_multicast); ixl_stat_update48(hw, I40E_GLPRT_BPRCH(hw->port), I40E_GLPRT_BPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_broadcast, &nsd->eth.rx_broadcast); ixl_stat_update48(hw, I40E_GLPRT_BPTCH(hw->port), I40E_GLPRT_BPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_broadcast, &nsd->eth.tx_broadcast); ixl_stat_update32(hw, I40E_GLPRT_TDOLD(hw->port), pf->stat_offsets_loaded, &osd->tx_dropped_link_down, &nsd->tx_dropped_link_down); ixl_stat_update32(hw, I40E_GLPRT_MLFC(hw->port), pf->stat_offsets_loaded, &osd->mac_local_faults, &nsd->mac_local_faults); ixl_stat_update32(hw, I40E_GLPRT_MRFC(hw->port), pf->stat_offsets_loaded, &osd->mac_remote_faults, &nsd->mac_remote_faults); ixl_stat_update32(hw, I40E_GLPRT_RLEC(hw->port), pf->stat_offsets_loaded, &osd->rx_length_errors, &nsd->rx_length_errors); /* Flow control (LFC) stats */ ixl_stat_update32(hw, I40E_GLPRT_LXONRXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_rx, &nsd->link_xon_rx); ixl_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_tx, &nsd->link_xon_tx); ixl_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_rx, &nsd->link_xoff_rx); ixl_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_tx, &nsd->link_xoff_tx); /* Packet size stats rx */ ixl_stat_update48(hw, I40E_GLPRT_PRC64H(hw->port), I40E_GLPRT_PRC64L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_64, &nsd->rx_size_64); ixl_stat_update48(hw, I40E_GLPRT_PRC127H(hw->port), I40E_GLPRT_PRC127L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_127, &nsd->rx_size_127); ixl_stat_update48(hw, I40E_GLPRT_PRC255H(hw->port), I40E_GLPRT_PRC255L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_255, &nsd->rx_size_255); ixl_stat_update48(hw, I40E_GLPRT_PRC511H(hw->port), I40E_GLPRT_PRC511L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_511, &nsd->rx_size_511); ixl_stat_update48(hw, I40E_GLPRT_PRC1023H(hw->port), I40E_GLPRT_PRC1023L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_1023, &nsd->rx_size_1023); ixl_stat_update48(hw, I40E_GLPRT_PRC1522H(hw->port), I40E_GLPRT_PRC1522L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_1522, &nsd->rx_size_1522); ixl_stat_update48(hw, I40E_GLPRT_PRC9522H(hw->port), I40E_GLPRT_PRC9522L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_big, &nsd->rx_size_big); /* Packet size stats tx */ ixl_stat_update48(hw, I40E_GLPRT_PTC64H(hw->port), I40E_GLPRT_PTC64L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_64, &nsd->tx_size_64); ixl_stat_update48(hw, I40E_GLPRT_PTC127H(hw->port), I40E_GLPRT_PTC127L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_127, &nsd->tx_size_127); ixl_stat_update48(hw, I40E_GLPRT_PTC255H(hw->port), I40E_GLPRT_PTC255L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_255, &nsd->tx_size_255); ixl_stat_update48(hw, I40E_GLPRT_PTC511H(hw->port), I40E_GLPRT_PTC511L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_511, &nsd->tx_size_511); ixl_stat_update48(hw, I40E_GLPRT_PTC1023H(hw->port), I40E_GLPRT_PTC1023L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_1023, &nsd->tx_size_1023); ixl_stat_update48(hw, I40E_GLPRT_PTC1522H(hw->port), I40E_GLPRT_PTC1522L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_1522, &nsd->tx_size_1522); ixl_stat_update48(hw, I40E_GLPRT_PTC9522H(hw->port), I40E_GLPRT_PTC9522L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_big, &nsd->tx_size_big); ixl_stat_update32(hw, I40E_GLPRT_RUC(hw->port), pf->stat_offsets_loaded, &osd->rx_undersize, &nsd->rx_undersize); ixl_stat_update32(hw, I40E_GLPRT_RFC(hw->port), pf->stat_offsets_loaded, &osd->rx_fragments, &nsd->rx_fragments); ixl_stat_update32(hw, I40E_GLPRT_ROC(hw->port), pf->stat_offsets_loaded, &osd->rx_oversize, &nsd->rx_oversize); ixl_stat_update32(hw, I40E_GLPRT_RJC(hw->port), pf->stat_offsets_loaded, &osd->rx_jabber, &nsd->rx_jabber); pf->stat_offsets_loaded = true; /* End hw stats */ /* Update vsi stats */ ixl_update_vsi_stats(vsi); for (int i = 0; i < pf->num_vfs; i++) { vf = &pf->vfs[i]; if (vf->vf_flags & VF_FLAG_ENABLED) ixl_update_eth_stats(&pf->vfs[i].vsi); } } int ixl_rebuild_hw_structs_after_reset(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = pf->dev; bool is_up = false; int error = 0; is_up = !!(vsi->ifp->if_drv_flags & IFF_DRV_RUNNING); /* Teardown */ if (is_up) ixl_stop(pf); error = i40e_shutdown_lan_hmc(hw); if (error) device_printf(dev, "Shutdown LAN HMC failed with code %d\n", error); ixl_disable_intr0(hw); ixl_teardown_adminq_msix(pf); error = i40e_shutdown_adminq(hw); if (error) device_printf(dev, "Shutdown Admin queue failed with code %d\n", error); /* Setup */ error = i40e_init_adminq(hw); if (error != 0 && error != I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "Unable to initialize Admin Queue, error %d\n", error); } error = ixl_setup_adminq_msix(pf); if (error) { device_printf(dev, "ixl_setup_adminq_msix error: %d\n", error); } ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); error = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (error) { device_printf(dev, "init_lan_hmc failed: %d\n", error); } error = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (error) { device_printf(dev, "configure_lan_hmc failed: %d\n", error); } if (is_up) ixl_init(pf); return (0); } void ixl_handle_empr_reset(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int count = 0; u32 reg; /* Typically finishes within 3-4 seconds */ while (count++ < 100) { reg = rd32(hw, I40E_GLGEN_RSTAT) & I40E_GLGEN_RSTAT_DEVSTATE_MASK; if (reg) i40e_msec_delay(100); else break; } ixl_dbg(pf, IXL_DBG_INFO, "EMPR reset wait count: %d\n", count); device_printf(dev, "Rebuilding driver state...\n"); ixl_rebuild_hw_structs_after_reset(pf); device_printf(dev, "Rebuilding driver state done.\n"); atomic_clear_int(&pf->state, IXL_PF_STATE_EMPR_RESETTING); } /* ** Tasklet handler for MSIX Adminq interrupts ** - do outside interrupt since it might sleep */ void ixl_do_adminq(void *context, int pending) { struct ixl_pf *pf = context; struct i40e_hw *hw = &pf->hw; struct i40e_arq_event_info event; i40e_status ret; device_t dev = pf->dev; u32 loop = 0; u16 opcode, result; if (pf->state & IXL_PF_STATE_EMPR_RESETTING) { /* Flag cleared at end of this function */ ixl_handle_empr_reset(pf); return; } /* Admin Queue handling */ event.buf_len = IXL_AQ_BUF_SZ; event.msg_buf = malloc(event.buf_len, M_DEVBUF, M_NOWAIT | M_ZERO); if (!event.msg_buf) { device_printf(dev, "%s: Unable to allocate memory for Admin" " Queue event!\n", __func__); return; } IXL_PF_LOCK(pf); /* clean and process any events */ do { ret = i40e_clean_arq_element(hw, &event, &result); if (ret) break; opcode = LE16_TO_CPU(event.desc.opcode); ixl_dbg(pf, IXL_DBG_AQ, "Admin Queue event: %#06x\n", opcode); switch (opcode) { case i40e_aqc_opc_get_link_status: ixl_link_event(pf, &event); break; case i40e_aqc_opc_send_msg_to_pf: #ifdef PCI_IOV ixl_handle_vf_msg(pf, &event); #endif break; case i40e_aqc_opc_event_lan_overflow: default: break; } } while (result && (loop++ < IXL_ADM_LIMIT)); free(event.msg_buf, M_DEVBUF); /* * If there are still messages to process, reschedule ourselves. * Otherwise, re-enable our interrupt. */ if (result > 0) taskqueue_enqueue(pf->tq, &pf->adminq); else ixl_enable_intr0(hw); IXL_PF_UNLOCK(pf); } /** * Update VSI-specific ethernet statistics counters. **/ void ixl_update_eth_stats(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_eth_stats *es; struct i40e_eth_stats *oes; struct i40e_hw_port_stats *nsd; u16 stat_idx = vsi->info.stat_counter_idx; es = &vsi->eth_stats; oes = &vsi->eth_stats_offsets; nsd = &pf->stats; /* Gather up the stats that the hw collects */ ixl_stat_update32(hw, I40E_GLV_TEPC(stat_idx), vsi->stat_offsets_loaded, &oes->tx_errors, &es->tx_errors); ixl_stat_update32(hw, I40E_GLV_RDPC(stat_idx), vsi->stat_offsets_loaded, &oes->rx_discards, &es->rx_discards); ixl_stat_update48(hw, I40E_GLV_GORCH(stat_idx), I40E_GLV_GORCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_bytes, &es->rx_bytes); ixl_stat_update48(hw, I40E_GLV_UPRCH(stat_idx), I40E_GLV_UPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_unicast, &es->rx_unicast); ixl_stat_update48(hw, I40E_GLV_MPRCH(stat_idx), I40E_GLV_MPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_multicast, &es->rx_multicast); ixl_stat_update48(hw, I40E_GLV_BPRCH(stat_idx), I40E_GLV_BPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_broadcast, &es->rx_broadcast); ixl_stat_update48(hw, I40E_GLV_GOTCH(stat_idx), I40E_GLV_GOTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_bytes, &es->tx_bytes); ixl_stat_update48(hw, I40E_GLV_UPTCH(stat_idx), I40E_GLV_UPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_unicast, &es->tx_unicast); ixl_stat_update48(hw, I40E_GLV_MPTCH(stat_idx), I40E_GLV_MPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_multicast, &es->tx_multicast); ixl_stat_update48(hw, I40E_GLV_BPTCH(stat_idx), I40E_GLV_BPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_broadcast, &es->tx_broadcast); vsi->stat_offsets_loaded = true; } void ixl_update_vsi_stats(struct ixl_vsi *vsi) { struct ixl_pf *pf; struct ifnet *ifp; struct i40e_eth_stats *es; u64 tx_discards; struct i40e_hw_port_stats *nsd; pf = vsi->back; ifp = vsi->ifp; es = &vsi->eth_stats; nsd = &pf->stats; ixl_update_eth_stats(vsi); tx_discards = es->tx_discards + nsd->tx_dropped_link_down; for (int i = 0; i < vsi->num_queues; i++) tx_discards += vsi->queues[i].txr.br->br_drops; /* Update ifnet stats */ IXL_SET_IPACKETS(vsi, es->rx_unicast + es->rx_multicast + es->rx_broadcast); IXL_SET_OPACKETS(vsi, es->tx_unicast + es->tx_multicast + es->tx_broadcast); IXL_SET_IBYTES(vsi, es->rx_bytes); IXL_SET_OBYTES(vsi, es->tx_bytes); IXL_SET_IMCASTS(vsi, es->rx_multicast); IXL_SET_OMCASTS(vsi, es->tx_multicast); IXL_SET_IERRORS(vsi, nsd->crc_errors + nsd->illegal_bytes + nsd->rx_undersize + nsd->rx_oversize + nsd->rx_fragments + nsd->rx_jabber); IXL_SET_OERRORS(vsi, es->tx_errors); IXL_SET_IQDROPS(vsi, es->rx_discards + nsd->eth.rx_discards); IXL_SET_OQDROPS(vsi, tx_discards); IXL_SET_NOPROTO(vsi, es->rx_unknown_protocol); IXL_SET_COLLISIONS(vsi, 0); } /** * Reset all of the stats for the given pf **/ void ixl_pf_reset_stats(struct ixl_pf *pf) { bzero(&pf->stats, sizeof(struct i40e_hw_port_stats)); bzero(&pf->stats_offsets, sizeof(struct i40e_hw_port_stats)); pf->stat_offsets_loaded = false; } /** * Resets all stats of the given vsi **/ void ixl_vsi_reset_stats(struct ixl_vsi *vsi) { bzero(&vsi->eth_stats, sizeof(struct i40e_eth_stats)); bzero(&vsi->eth_stats_offsets, sizeof(struct i40e_eth_stats)); vsi->stat_offsets_loaded = false; } /** * Read and update a 48 bit stat from the hw * * Since the device stats are not reset at PFReset, they likely will not * be zeroed when the driver starts. We'll save the first values read * and use them as offsets to be subtracted from the raw values in order * to report stats that count from zero. **/ void ixl_stat_update48(struct i40e_hw *hw, u32 hireg, u32 loreg, bool offset_loaded, u64 *offset, u64 *stat) { u64 new_data; #if defined(__FreeBSD__) && (__FreeBSD_version >= 1000000) && defined(__amd64__) new_data = rd64(hw, loreg); #else /* * Use two rd32's instead of one rd64; FreeBSD versions before * 10 don't support 64-bit bus reads/writes. */ new_data = rd32(hw, loreg); new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32; #endif if (!offset_loaded) *offset = new_data; if (new_data >= *offset) *stat = new_data - *offset; else *stat = (new_data + ((u64)1 << 48)) - *offset; *stat &= 0xFFFFFFFFFFFFULL; } /** * Read and update a 32 bit stat from the hw **/ void ixl_stat_update32(struct i40e_hw *hw, u32 reg, bool offset_loaded, u64 *offset, u64 *stat) { u32 new_data; new_data = rd32(hw, reg); if (!offset_loaded) *offset = new_data; if (new_data >= *offset) *stat = (u32)(new_data - *offset); else *stat = (u32)((new_data + ((u64)1 << 32)) - *offset); } void ixl_add_device_sysctls(struct ixl_pf *pf) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid_list *ctx_list = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); struct sysctl_oid *debug_node; struct sysctl_oid_list *debug_list; struct sysctl_oid *fec_node; struct sysctl_oid_list *fec_list; /* Set up sysctls */ SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_set_flowcntl, "I", IXL_SYSCTL_HELP_FC); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_set_advertise, "I", IXL_SYSCTL_HELP_SET_ADVERTISE); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_current_speed, "A", "Current Port Speed"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_show_fw, "A", "Firmware version"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "unallocated_queues", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_unallocated_queues, "I", "Queues not allocated to a PF or VF"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "tx_itr", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_sysctl_pf_tx_itr, "I", "Immediately set TX ITR value for all queues"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "rx_itr", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_sysctl_pf_rx_itr, "I", "Immediately set RX ITR value for all queues"); SYSCTL_ADD_INT(ctx, ctx_list, OID_AUTO, "dynamic_rx_itr", CTLFLAG_RW, &pf->dynamic_rx_itr, 0, "Enable dynamic RX ITR"); SYSCTL_ADD_INT(ctx, ctx_list, OID_AUTO, "dynamic_tx_itr", CTLFLAG_RW, &pf->dynamic_tx_itr, 0, "Enable dynamic TX ITR"); /* Add FEC sysctls for 25G adapters */ /* * XXX: These settings can be changed, but that isn't supported, * so these are read-only for now. */ if (hw->device_id == I40E_DEV_ID_25G_B || hw->device_id == I40E_DEV_ID_25G_SFP28) { fec_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "fec", CTLFLAG_RD, NULL, "FEC Sysctls"); fec_list = SYSCTL_CHILDREN(fec_node); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "fc_ability", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_fec_fc_ability, "I", "FC FEC ability enabled"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "rs_ability", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_fec_rs_ability, "I", "RS FEC ability enabled"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "fc_requested", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_fec_fc_request, "I", "FC FEC mode requested on link"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "rs_requested", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_fec_rs_request, "I", "RS FEC mode requested on link"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "auto_fec_enabled", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_fec_auto_enable, "I", "Let FW decide FEC ability/request modes"); } /* Add sysctls meant to print debug information, but don't list them * in "sysctl -a" output. */ debug_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_SKIP, NULL, "Debug Sysctls"); debug_list = SYSCTL_CHILDREN(debug_node); SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "shared_debug_mask", CTLFLAG_RW, &pf->hw.debug_mask, 0, "Shared code debug message level"); SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "core_debug_mask", CTLFLAG_RW, &pf->dbg_mask, 0, "Non-hared code debug message level"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "link_status", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_link_status, "A", IXL_SYSCTL_HELP_LINK_STATUS); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_abilities", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_phy_abilities, "A", "PHY Abilities"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "filter_list", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_sw_filter_list, "A", "SW Filter List"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "hw_res_alloc", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_hw_res_alloc, "A", "HW Resource Allocation"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "switch_config", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_switch_config, "A", "HW Switch Configuration"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "rss_key", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_hkey, "A", "View RSS key"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "rss_lut", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_hlut, "A", "View RSS lookup table"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "rss_hena", CTLTYPE_ULONG | CTLFLAG_RD, pf, 0, ixl_sysctl_hena, "LU", "View enabled packet types for RSS"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "disable_fw_link_management", CTLTYPE_INT | CTLFLAG_WR, pf, 0, ixl_sysctl_fw_link_management, "I", "Disable FW Link Management"); if (pf->has_i2c) { SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "read_i2c_byte", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_sysctl_read_i2c_byte, "I", "Read byte from I2C bus"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "write_i2c_byte", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_sysctl_write_i2c_byte, "I", "Write byte to I2C bus"); } #ifdef PCI_IOV SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "vc_debug_level", CTLFLAG_RW, &pf->vc_debug_lvl, 0, "PF/VF Virtual Channel debug level"); #endif } /* * Primarily for finding out how many queues can be assigned to VFs, * at runtime. */ static int ixl_sysctl_unallocated_queues(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int queues; IXL_PF_LOCK(pf); queues = (int)ixl_pf_qmgr_get_num_free(&pf->qmgr); IXL_PF_UNLOCK(pf); return sysctl_handle_int(oidp, NULL, queues, req); } /* ** Set flow control using sysctl: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ int ixl_set_flowcntl(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int requested_fc, error = 0; enum i40e_status_code aq_error = 0; u8 fc_aq_err = 0; /* Get request */ requested_fc = pf->fc; error = sysctl_handle_int(oidp, &requested_fc, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (requested_fc < 0 || requested_fc > 3) { device_printf(dev, "Invalid fc mode; valid modes are 0 through 3\n"); return (EINVAL); } /* Set fc ability for port */ hw->fc.requested_mode = requested_fc; aq_error = i40e_set_fc(hw, &fc_aq_err, TRUE); if (aq_error) { device_printf(dev, "%s: Error setting new fc mode %d; fc_err %#x\n", __func__, aq_error, fc_aq_err); return (EIO); } pf->fc = requested_fc; /* Get new link state */ i40e_msec_delay(250); hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); return (0); } char * ixl_aq_speed_to_str(enum i40e_aq_link_speed link_speed) { int index; char *speeds[] = { "Unknown", "100 Mbps", "1 Gbps", "10 Gbps", "40 Gbps", "20 Gbps", "25 Gbps", }; switch (link_speed) { case I40E_LINK_SPEED_100MB: index = 1; break; case I40E_LINK_SPEED_1GB: index = 2; break; case I40E_LINK_SPEED_10GB: index = 3; break; case I40E_LINK_SPEED_40GB: index = 4; break; case I40E_LINK_SPEED_20GB: index = 5; break; case I40E_LINK_SPEED_25GB: index = 6; break; case I40E_LINK_SPEED_UNKNOWN: default: index = 0; break; } return speeds[index]; } int ixl_current_speed(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; int error = 0; ixl_update_link_status(pf); error = sysctl_handle_string(oidp, ixl_aq_speed_to_str(hw->phy.link_info.link_speed), 8, req); return (error); } static u8 ixl_convert_sysctl_aq_link_speed(u8 speeds, bool to_aq) { static u16 speedmap[6] = { (I40E_LINK_SPEED_100MB | (0x1 << 8)), (I40E_LINK_SPEED_1GB | (0x2 << 8)), (I40E_LINK_SPEED_10GB | (0x4 << 8)), (I40E_LINK_SPEED_20GB | (0x8 << 8)), (I40E_LINK_SPEED_25GB | (0x10 << 8)), (I40E_LINK_SPEED_40GB | (0x20 << 8)) }; u8 retval = 0; for (int i = 0; i < 6; i++) { if (to_aq) retval |= (speeds & (speedmap[i] >> 8)) ? (speedmap[i] & 0xff) : 0; else retval |= (speeds & speedmap[i]) ? (speedmap[i] >> 8) : 0; } return (retval); } int ixl_set_advertised_speeds(struct ixl_pf *pf, int speeds) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct i40e_aq_get_phy_abilities_resp abilities; struct i40e_aq_set_phy_config config; enum i40e_status_code aq_error = 0; /* Get current capability information */ aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, FALSE, &abilities, NULL); if (aq_error) { device_printf(dev, "%s: Error getting phy capabilities %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return (EIO); } /* Prepare new config */ bzero(&config, sizeof(config)); config.link_speed = ixl_convert_sysctl_aq_link_speed(speeds, true); config.phy_type = abilities.phy_type; config.phy_type_ext = abilities.phy_type_ext; config.abilities = abilities.abilities | I40E_AQ_PHY_ENABLE_ATOMIC_LINK; config.eee_capability = abilities.eee_capability; config.eeer = abilities.eeer_val; config.low_power_ctrl = abilities.d3_lpan; /* Do aq command & restart link */ aq_error = i40e_aq_set_phy_config(hw, &config, NULL); if (aq_error) { device_printf(dev, "%s: Error setting new phy config %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return (EIO); } return (0); } /* ** Control link advertise speed: ** Flags: ** 0x1 - advertise 100 Mb ** 0x2 - advertise 1G ** 0x4 - advertise 10G ** 0x8 - advertise 20G ** 0x10 - advertise 25G ** 0x20 - advertise 40G ** ** Set to 0 to disable link */ int ixl_set_advertise(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u8 converted_speeds; int requested_ls = 0; int error = 0; /* Read in new mode */ requested_ls = pf->advertised_speed; error = sysctl_handle_int(oidp, &requested_ls, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Check if changing speeds is supported */ switch (hw->device_id) { case I40E_DEV_ID_25G_B: case I40E_DEV_ID_25G_SFP28: device_printf(dev, "Changing advertised speeds not supported" " on this device.\n"); return (EINVAL); } if (requested_ls < 0 || requested_ls > 0xff) { } /* Check for valid value */ converted_speeds = ixl_convert_sysctl_aq_link_speed((u8)requested_ls, true); if ((converted_speeds | pf->supported_speeds) != pf->supported_speeds) { device_printf(dev, "Invalid advertised speed; " "valid flags are: 0x%02x\n", ixl_convert_sysctl_aq_link_speed(pf->supported_speeds, false)); return (EINVAL); } error = ixl_set_advertised_speeds(pf, requested_ls); if (error) return (error); pf->advertised_speed = requested_ls; ixl_update_link_status(pf); return (0); } /* * Input: bitmap of enum i40e_aq_link_speed */ static u64 ixl_max_aq_speed_to_value(u8 link_speeds) { if (link_speeds & I40E_LINK_SPEED_40GB) return IF_Gbps(40); if (link_speeds & I40E_LINK_SPEED_25GB) return IF_Gbps(25); if (link_speeds & I40E_LINK_SPEED_20GB) return IF_Gbps(20); if (link_speeds & I40E_LINK_SPEED_10GB) return IF_Gbps(10); if (link_speeds & I40E_LINK_SPEED_1GB) return IF_Gbps(1); if (link_speeds & I40E_LINK_SPEED_100MB) return IF_Mbps(100); else /* Minimum supported link speed */ return IF_Mbps(100); } /* ** Get the width and transaction speed of ** the bus this adapter is plugged into. */ void ixl_get_bus_info(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u16 link; u32 offset, num_ports; u64 max_speed; /* Some devices don't use PCIE */ if (hw->mac.type == I40E_MAC_X722) return; /* Read PCI Express Capabilities Link Status Register */ pci_find_cap(dev, PCIY_EXPRESS, &offset); link = pci_read_config(dev, offset + PCIER_LINK_STA, 2); /* Fill out hw struct with PCIE info */ i40e_set_pci_config_data(hw, link); /* Use info to print out bandwidth messages */ device_printf(dev,"PCI Express Bus: Speed %s %s\n", ((hw->bus.speed == i40e_bus_speed_8000) ? "8.0GT/s": (hw->bus.speed == i40e_bus_speed_5000) ? "5.0GT/s": (hw->bus.speed == i40e_bus_speed_2500) ? "2.5GT/s":"Unknown"), (hw->bus.width == i40e_bus_width_pcie_x8) ? "Width x8" : (hw->bus.width == i40e_bus_width_pcie_x4) ? "Width x4" : (hw->bus.width == i40e_bus_width_pcie_x2) ? "Width x2" : (hw->bus.width == i40e_bus_width_pcie_x1) ? "Width x1" : ("Unknown")); /* * If adapter is in slot with maximum supported speed, * no warning message needs to be printed out. */ if (hw->bus.speed >= i40e_bus_speed_8000 && hw->bus.width >= i40e_bus_width_pcie_x8) return; num_ports = bitcount32(hw->func_caps.valid_functions); max_speed = ixl_max_aq_speed_to_value(pf->supported_speeds) / 1000000; if ((num_ports * max_speed) > hw->bus.speed * hw->bus.width) { device_printf(dev, "PCI-Express bandwidth available" " for this device may be insufficient for" " optimal performance.\n"); device_printf(dev, "Please move the device to a different" " PCI-e link with more lanes and/or higher" " transfer rate.\n"); } } static int ixl_sysctl_show_fw(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; struct sbuf *sbuf; sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); ixl_nvm_version_str(hw, sbuf); sbuf_finish(sbuf); sbuf_delete(sbuf); return 0; } void ixl_print_nvm_cmd(device_t dev, struct i40e_nvm_access *nvma) { if ((nvma->command == I40E_NVM_READ) && ((nvma->config & 0xFF) == 0xF) && (((nvma->config & 0xF00) >> 8) == 0xF) && (nvma->offset == 0) && (nvma->data_size == 1)) { // device_printf(dev, "- Get Driver Status Command\n"); } else if (nvma->command == I40E_NVM_READ) { } else { switch (nvma->command) { case 0xB: device_printf(dev, "- command: I40E_NVM_READ\n"); break; case 0xC: device_printf(dev, "- command: I40E_NVM_WRITE\n"); break; default: device_printf(dev, "- command: unknown 0x%08x\n", nvma->command); break; } device_printf(dev, "- config (ptr) : 0x%02x\n", nvma->config & 0xFF); device_printf(dev, "- config (flags): 0x%01x\n", (nvma->config & 0xF00) >> 8); device_printf(dev, "- offset : 0x%08x\n", nvma->offset); device_printf(dev, "- data_s : 0x%08x\n", nvma->data_size); } } int ixl_handle_nvmupd_cmd(struct ixl_pf *pf, struct ifdrv *ifd) { struct i40e_hw *hw = &pf->hw; struct i40e_nvm_access *nvma; device_t dev = pf->dev; enum i40e_status_code status = 0; int perrno; DEBUGFUNC("ixl_handle_nvmupd_cmd"); /* Sanity checks */ if (ifd->ifd_len < sizeof(struct i40e_nvm_access) || ifd->ifd_data == NULL) { device_printf(dev, "%s: incorrect ifdrv length or data pointer\n", __func__); device_printf(dev, "%s: ifdrv length: %lu, sizeof(struct i40e_nvm_access): %lu\n", __func__, ifd->ifd_len, sizeof(struct i40e_nvm_access)); device_printf(dev, "%s: data pointer: %p\n", __func__, ifd->ifd_data); return (EINVAL); } nvma = (struct i40e_nvm_access *)ifd->ifd_data; if (pf->dbg_mask & IXL_DBG_NVMUPD) ixl_print_nvm_cmd(dev, nvma); if (pf->state & IXL_PF_STATE_EMPR_RESETTING) { int count = 0; while (count++ < 100) { i40e_msec_delay(100); if (!(pf->state & IXL_PF_STATE_EMPR_RESETTING)) break; } } if (!(pf->state & IXL_PF_STATE_EMPR_RESETTING)) { IXL_PF_LOCK(pf); status = i40e_nvmupd_command(hw, nvma, nvma->data, &perrno); IXL_PF_UNLOCK(pf); } else { perrno = -EBUSY; } if (status) device_printf(dev, "i40e_nvmupd_command status %s, perrno %d\n", i40e_stat_str(hw, status), perrno); /* * -EPERM is actually ERESTART, which the kernel interprets as it needing * to run this ioctl again. So use -EACCES for -EPERM instead. */ if (perrno == -EPERM) return (-EACCES); else return (perrno); } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ void ixl_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) { struct ixl_vsi *vsi = ifp->if_softc; struct ixl_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; INIT_DEBUGOUT("ixl_media_status: begin"); IXL_PF_LOCK(pf); hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); ixl_update_link_status(pf); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!pf->link_up) { IXL_PF_UNLOCK(pf); return; } ifmr->ifm_status |= IFM_ACTIVE; /* Hardware always does full-duplex */ ifmr->ifm_active |= IFM_FDX; switch (hw->phy.link_info.phy_type) { /* 100 M */ case I40E_PHY_TYPE_100BASE_TX: ifmr->ifm_active |= IFM_100_TX; break; /* 1 G */ case I40E_PHY_TYPE_1000BASE_T: ifmr->ifm_active |= IFM_1000_T; break; case I40E_PHY_TYPE_1000BASE_SX: ifmr->ifm_active |= IFM_1000_SX; break; case I40E_PHY_TYPE_1000BASE_LX: ifmr->ifm_active |= IFM_1000_LX; break; case I40E_PHY_TYPE_1000BASE_T_OPTICAL: ifmr->ifm_active |= IFM_OTHER; break; /* 10 G */ case I40E_PHY_TYPE_10GBASE_SFPP_CU: ifmr->ifm_active |= IFM_10G_TWINAX; break; case I40E_PHY_TYPE_10GBASE_SR: ifmr->ifm_active |= IFM_10G_SR; break; case I40E_PHY_TYPE_10GBASE_LR: ifmr->ifm_active |= IFM_10G_LR; break; case I40E_PHY_TYPE_10GBASE_T: ifmr->ifm_active |= IFM_10G_T; break; case I40E_PHY_TYPE_XAUI: case I40E_PHY_TYPE_XFI: case I40E_PHY_TYPE_10GBASE_AOC: ifmr->ifm_active |= IFM_OTHER; break; /* 25 G */ case I40E_PHY_TYPE_25GBASE_KR: ifmr->ifm_active |= IFM_25G_KR; break; case I40E_PHY_TYPE_25GBASE_CR: ifmr->ifm_active |= IFM_25G_CR; break; case I40E_PHY_TYPE_25GBASE_SR: ifmr->ifm_active |= IFM_25G_SR; break; case I40E_PHY_TYPE_25GBASE_LR: ifmr->ifm_active |= IFM_UNKNOWN; break; /* 40 G */ case I40E_PHY_TYPE_40GBASE_CR4: case I40E_PHY_TYPE_40GBASE_CR4_CU: ifmr->ifm_active |= IFM_40G_CR4; break; case I40E_PHY_TYPE_40GBASE_SR4: ifmr->ifm_active |= IFM_40G_SR4; break; case I40E_PHY_TYPE_40GBASE_LR4: ifmr->ifm_active |= IFM_40G_LR4; break; case I40E_PHY_TYPE_XLAUI: ifmr->ifm_active |= IFM_OTHER; break; case I40E_PHY_TYPE_1000BASE_KX: ifmr->ifm_active |= IFM_1000_KX; break; case I40E_PHY_TYPE_SGMII: ifmr->ifm_active |= IFM_1000_SGMII; break; /* ERJ: What's the difference between these? */ case I40E_PHY_TYPE_10GBASE_CR1_CU: case I40E_PHY_TYPE_10GBASE_CR1: ifmr->ifm_active |= IFM_10G_CR1; break; case I40E_PHY_TYPE_10GBASE_KX4: ifmr->ifm_active |= IFM_10G_KX4; break; case I40E_PHY_TYPE_10GBASE_KR: ifmr->ifm_active |= IFM_10G_KR; break; case I40E_PHY_TYPE_SFI: ifmr->ifm_active |= IFM_10G_SFI; break; /* Our single 20G media type */ case I40E_PHY_TYPE_20GBASE_KR2: ifmr->ifm_active |= IFM_20G_KR2; break; case I40E_PHY_TYPE_40GBASE_KR4: ifmr->ifm_active |= IFM_40G_KR4; break; case I40E_PHY_TYPE_XLPPI: case I40E_PHY_TYPE_40GBASE_AOC: ifmr->ifm_active |= IFM_40G_XLPPI; break; /* Unknown to driver */ default: ifmr->ifm_active |= IFM_UNKNOWN; break; } /* Report flow control status as well */ if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; IXL_PF_UNLOCK(pf); } void ixl_init(void *arg) { struct ixl_pf *pf = arg; IXL_PF_LOCK(pf); ixl_init_locked(pf); IXL_PF_UNLOCK(pf); } /* * NOTE: Fortville does not support forcing media speeds. Instead, * use the set_advertise sysctl to set the speeds Fortville * will advertise or be allowed to operate at. */ int ixl_media_change(struct ifnet * ifp) { struct ixl_vsi *vsi = ifp->if_softc; struct ifmedia *ifm = &vsi->media; INIT_DEBUGOUT("ixl_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if_printf(ifp, "Use 'advertise_speed' sysctl to change advertised speeds\n"); return (ENODEV); } /********************************************************************* * Ioctl entry point * * ixl_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ int ixl_ioctl(struct ifnet * ifp, u_long command, caddr_t data) { struct ixl_vsi *vsi = ifp->if_softc; struct ixl_pf *pf = vsi->back; struct ifreq *ifr = (struct ifreq *)data; struct ifdrv *ifd = (struct ifdrv *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; bool avoid_reset = FALSE; #endif int error = 0; switch (command) { case SIOCSIFADDR: IOCTL_DEBUGOUT("ioctl: SIOCSIFADDR (Set Interface Address)"); #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif #if defined(INET) || defined(INET6) /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ixl_init(pf); #ifdef INET if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; #endif case SIOCSIFMTU: IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (ifr->ifr_mtu > IXL_MAX_FRAME - ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN) { error = EINVAL; } else { IXL_PF_LOCK(pf); ifp->if_mtu = ifr->ifr_mtu; vsi->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixl_init_locked(pf); IXL_PF_UNLOCK(pf); } break; case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)"); IXL_PF_LOCK(pf); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ pf->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { ixl_set_promisc(vsi); } } else { IXL_PF_UNLOCK(pf); ixl_init(pf); IXL_PF_LOCK(pf); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ixl_stop_locked(pf); } } pf->if_flags = ifp->if_flags; IXL_PF_UNLOCK(pf); break; case SIOCSDRVSPEC: case SIOCGDRVSPEC: IOCTL_DEBUGOUT("ioctl: SIOCxDRVSPEC (Get/Set Driver-specific " "Info)\n"); /* NVM update command */ if (ifd->ifd_cmd == I40E_NVM_ACCESS) error = ixl_handle_nvmupd_cmd(pf, ifd); else error = EINVAL; break; case SIOCADDMULTI: IOCTL_DEBUGOUT("ioctl: SIOCADDMULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXL_PF_LOCK(pf); ixl_disable_rings_intr(vsi); ixl_add_multi(vsi); ixl_enable_intr(vsi); IXL_PF_UNLOCK(pf); } break; case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl: SIOCDELMULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXL_PF_LOCK(pf); ixl_disable_rings_intr(vsi); ixl_del_multi(vsi); ixl_enable_intr(vsi); IXL_PF_UNLOCK(pf); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &vsi->media, command); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)"); ixl_cap_txcsum_tso(vsi, ifp, mask); if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXL_PF_LOCK(pf); ixl_init_locked(pf); IXL_PF_UNLOCK(pf); } VLAN_CAPABILITIES(ifp); break; } #if __FreeBSD_version >= 1003000 case SIOCGI2C: { struct ifi2creq i2c; int i; IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)"); if (!pf->has_i2c) return (ENOTTY); error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); if (error != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { error = EINVAL; break; } if (i2c.len > sizeof(i2c.data)) { error = EINVAL; break; } for (i = 0; i < i2c.len; i++) if (ixl_read_i2c_byte(pf, i2c.offset + i, i2c.dev_addr, &i2c.data[i])) return (EIO); error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); break; } #endif default: IOCTL_DEBUGOUT("ioctl: UNKNOWN (0x%X)\n", (int)command); error = ether_ioctl(ifp, command, data); break; } return (error); } int ixl_find_i2c_interface(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; bool i2c_en, port_matched; u32 reg; for (int i = 0; i < 4; i++) { reg = rd32(hw, I40E_GLGEN_MDIO_I2C_SEL(i)); i2c_en = (reg & I40E_GLGEN_MDIO_I2C_SEL_MDIO_I2C_SEL_MASK); port_matched = ((reg & I40E_GLGEN_MDIO_I2C_SEL_PHY_PORT_NUM_MASK) >> I40E_GLGEN_MDIO_I2C_SEL_PHY_PORT_NUM_SHIFT) & BIT(hw->port); if (i2c_en && port_matched) return (i); } return (-1); } static char * ixl_phy_type_string(u32 bit_pos, bool ext) { static char * phy_types_str[32] = { "SGMII", "1000BASE-KX", "10GBASE-KX4", "10GBASE-KR", "40GBASE-KR4", "XAUI", "XFI", "SFI", "XLAUI", "XLPPI", "40GBASE-CR4", "10GBASE-CR1", "Reserved (12)", "Reserved (13)", "Reserved (14)", "Reserved (15)", "Reserved (16)", "100BASE-TX", "1000BASE-T", "10GBASE-T", "10GBASE-SR", "10GBASE-LR", "10GBASE-SFP+Cu", "10GBASE-CR1", "40GBASE-CR4", "40GBASE-SR4", "40GBASE-LR4", "1000BASE-SX", "1000BASE-LX", "1000BASE-T Optical", "20GBASE-KR2", "Reserved (31)" }; static char * ext_phy_types_str[4] = { "25GBASE-KR", "25GBASE-CR", "25GBASE-SR", "25GBASE-LR" }; if (ext && bit_pos > 3) return "Invalid_Ext"; if (bit_pos > 31) return "Invalid"; return (ext) ? ext_phy_types_str[bit_pos] : phy_types_str[bit_pos]; } int ixl_aq_get_link_status(struct ixl_pf *pf, struct i40e_aqc_get_link_status *link_status) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct i40e_aq_desc desc; enum i40e_status_code status; struct i40e_aqc_get_link_status *aq_link_status = (struct i40e_aqc_get_link_status *)&desc.params.raw; i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_get_link_status); link_status->command_flags = CPU_TO_LE16(I40E_AQ_LSE_ENABLE); status = i40e_asq_send_command(hw, &desc, NULL, 0, NULL); if (status) { device_printf(dev, "%s: i40e_aqc_opc_get_link_status status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } bcopy(aq_link_status, link_status, sizeof(struct i40e_aqc_get_link_status)); return (0); } static char * ixl_phy_type_string_ls(u8 val) { if (val >= 0x1F) return ixl_phy_type_string(val - 0x1F, true); else return ixl_phy_type_string(val, false); } static int ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; struct sbuf *buf; int error = 0; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } struct i40e_aqc_get_link_status link_status; error = ixl_aq_get_link_status(pf, &link_status); if (error) { sbuf_delete(buf); return (error); } /* TODO: Add 25G types */ sbuf_printf(buf, "\n" "PHY Type : 0x%02x<%s>\n" "Speed : 0x%02x\n" "Link info: 0x%02x\n" "AN info : 0x%02x\n" "Ext info : 0x%02x\n" "Loopback : 0x%02x\n" "Max Frame: %d\n" "Config : 0x%02x\n" "Power : 0x%02x", link_status.phy_type, ixl_phy_type_string_ls(link_status.phy_type), link_status.link_speed, link_status.link_info, link_status.an_info, link_status.ext_info, link_status.loopback, link_status.max_frame_size, link_status.config, link_status.power_desc); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_phy_abilities(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; enum i40e_status_code status; struct i40e_aq_get_phy_abilities_resp abilities; struct sbuf *buf; int error = 0; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } status = i40e_aq_get_phy_capabilities(hw, FALSE, FALSE, &abilities, NULL); if (status) { device_printf(dev, "%s: i40e_aq_get_phy_capabilities() status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_delete(buf); return (EIO); } sbuf_printf(buf, "\n" "PHY Type : %08x", abilities.phy_type); if (abilities.phy_type != 0) { sbuf_printf(buf, "<"); for (int i = 0; i < 32; i++) if ((1 << i) & abilities.phy_type) sbuf_printf(buf, "%s,", ixl_phy_type_string(i, false)); sbuf_printf(buf, ">\n"); } sbuf_printf(buf, "PHY Ext : %02x", abilities.phy_type_ext); if (abilities.phy_type_ext != 0) { sbuf_printf(buf, "<"); for (int i = 0; i < 4; i++) if ((1 << i) & abilities.phy_type_ext) sbuf_printf(buf, "%s,", ixl_phy_type_string(i, true)); sbuf_printf(buf, ">"); } sbuf_printf(buf, "\n"); sbuf_printf(buf, "Speed : %02x\n" "Abilities: %02x\n" "EEE cap : %04x\n" "EEER reg : %08x\n" "D3 Lpan : %02x\n" "ID : %02x %02x %02x %02x\n" "ModType : %02x %02x %02x\n" "ModType E: %01x\n" "FEC Cfg : %02x\n" "Ext CC : %02x", abilities.link_speed, abilities.abilities, abilities.eee_capability, abilities.eeer_val, abilities.d3_lpan, abilities.phy_id[0], abilities.phy_id[1], abilities.phy_id[2], abilities.phy_id[3], abilities.module_type[0], abilities.module_type[1], abilities.module_type[2], abilities.phy_type_ext >> 5, abilities.phy_type_ext & 0x1F, abilities.ext_comp_code); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct ixl_vsi *vsi = &pf->vsi; struct ixl_mac_filter *f; char *buf, *buf_i; int error = 0; int ftl_len = 0; int ftl_counter = 0; int buf_len = 0; int entry_len = 42; SLIST_FOREACH(f, &vsi->ftl, next) { ftl_len++; } if (ftl_len < 1) { sysctl_handle_string(oidp, "(none)", 6, req); return (0); } buf_len = sizeof(char) * (entry_len + 1) * ftl_len + 2; buf = buf_i = malloc(buf_len, M_DEVBUF, M_NOWAIT); sprintf(buf_i++, "\n"); SLIST_FOREACH(f, &vsi->ftl, next) { sprintf(buf_i, MAC_FORMAT ", vlan %4d, flags %#06x", MAC_FORMAT_ARGS(f->macaddr), f->vlan, f->flags); buf_i += entry_len; /* don't print '\n' for last entry */ if (++ftl_counter != ftl_len) { sprintf(buf_i, "\n"); buf_i++; } } error = sysctl_handle_string(oidp, buf, strlen(buf), req); if (error) printf("sysctl error: %d\n", error); free(buf, M_DEVBUF); return error; } #define IXL_SW_RES_SIZE 0x14 int ixl_res_alloc_cmp(const void *a, const void *b) { const struct i40e_aqc_switch_resource_alloc_element_resp *one, *two; one = (const struct i40e_aqc_switch_resource_alloc_element_resp *)a; two = (const struct i40e_aqc_switch_resource_alloc_element_resp *)b; return ((int)one->resource_type - (int)two->resource_type); } /* * Longest string length: 25 */ char * ixl_switch_res_type_string(u8 type) { static char * ixl_switch_res_type_strings[0x14] = { "VEB", "VSI", "Perfect Match MAC address", "S-tag", "(Reserved)", "Multicast hash entry", "Unicast hash entry", "VLAN", "VSI List entry", "(Reserved)", "VLAN Statistic Pool", "Mirror Rule", "Queue Set", "Inner VLAN Forward filter", "(Reserved)", "Inner MAC", "IP", "GRE/VN1 Key", "VN2 Key", "Tunneling Port" }; if (type < 0x14) return ixl_switch_res_type_strings[type]; else return "(Reserved)"; } static int ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; enum i40e_status_code status; int error = 0; u8 num_entries; struct i40e_aqc_switch_resource_alloc_element_resp resp[IXL_SW_RES_SIZE]; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } bzero(resp, sizeof(resp)); status = i40e_aq_get_switch_resource_alloc(hw, &num_entries, resp, IXL_SW_RES_SIZE, NULL); if (status) { device_printf(dev, "%s: get_switch_resource_alloc() error %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_delete(buf); return (error); } /* Sort entries by type for display */ qsort(resp, num_entries, sizeof(struct i40e_aqc_switch_resource_alloc_element_resp), &ixl_res_alloc_cmp); sbuf_cat(buf, "\n"); sbuf_printf(buf, "# of entries: %d\n", num_entries); sbuf_printf(buf, " Type | Guaranteed | Total | Used | Un-allocated\n" " | (this) | (all) | (this) | (all) \n"); for (int i = 0; i < num_entries; i++) { sbuf_printf(buf, "%25s | %10d %5d %6d %12d", ixl_switch_res_type_string(resp[i].resource_type), resp[i].guaranteed, resp[i].total, resp[i].used, resp[i].total_unalloced); if (i < num_entries - 1) sbuf_cat(buf, "\n"); } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } /* ** Caller must init and delete sbuf; this function will clear and ** finish it for caller. ** ** XXX: Cannot use the SEID for this, since there is no longer a ** fixed mapping between SEID and element type. */ char * ixl_switch_element_string(struct sbuf *s, struct i40e_aqc_switch_config_element_resp *element) { sbuf_clear(s); switch (element->element_type) { case I40E_AQ_SW_ELEM_TYPE_MAC: sbuf_printf(s, "MAC %3d", element->element_info); break; case I40E_AQ_SW_ELEM_TYPE_PF: sbuf_printf(s, "PF %3d", element->element_info); break; case I40E_AQ_SW_ELEM_TYPE_VF: sbuf_printf(s, "VF %3d", element->element_info); break; case I40E_AQ_SW_ELEM_TYPE_EMP: sbuf_cat(s, "EMP"); break; case I40E_AQ_SW_ELEM_TYPE_BMC: sbuf_cat(s, "BMC"); break; case I40E_AQ_SW_ELEM_TYPE_PV: sbuf_cat(s, "PV"); break; case I40E_AQ_SW_ELEM_TYPE_VEB: sbuf_cat(s, "VEB"); break; case I40E_AQ_SW_ELEM_TYPE_PA: sbuf_cat(s, "PA"); break; case I40E_AQ_SW_ELEM_TYPE_VSI: sbuf_printf(s, "VSI %3d", element->element_info); break; default: sbuf_cat(s, "?"); break; } sbuf_finish(s); return sbuf_data(s); } static int ixl_sysctl_switch_config(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; struct sbuf *nmbuf; enum i40e_status_code status; int error = 0; u16 next = 0; u8 aq_buf[I40E_AQ_LARGE_BUF]; struct i40e_aqc_get_switch_config_resp *sw_config; sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } status = i40e_aq_get_switch_config(hw, sw_config, sizeof(aq_buf), &next, NULL); if (status) { device_printf(dev, "%s: aq_get_switch_config() error %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_delete(buf); return error; } if (next) device_printf(dev, "%s: TODO: get more config with SEID %d\n", __func__, next); nmbuf = sbuf_new_auto(); if (!nmbuf) { device_printf(dev, "Could not allocate sbuf for name output.\n"); sbuf_delete(buf); return (ENOMEM); } sbuf_cat(buf, "\n"); /* Assuming <= 255 elements in switch */ sbuf_printf(buf, "# of reported elements: %d\n", sw_config->header.num_reported); sbuf_printf(buf, "total # of elements: %d\n", sw_config->header.num_total); /* Exclude: ** Revision -- all elements are revision 1 for now */ sbuf_printf(buf, "SEID ( Name ) | Uplink | Downlink | Conn Type\n" " | | | (uplink)\n"); for (int i = 0; i < sw_config->header.num_reported; i++) { // "%4d (%8s) | %8s %8s %#8x", sbuf_printf(buf, "%4d", sw_config->element[i].seid); sbuf_cat(buf, " "); sbuf_printf(buf, "(%8s)", ixl_switch_element_string(nmbuf, &sw_config->element[i])); sbuf_cat(buf, " | "); sbuf_printf(buf, "%8d", sw_config->element[i].uplink_seid); sbuf_cat(buf, " "); sbuf_printf(buf, "%8d", sw_config->element[i].downlink_seid); sbuf_cat(buf, " "); sbuf_printf(buf, "%#8x", sw_config->element[i].connection_type); if (i < sw_config->header.num_reported - 1) sbuf_cat(buf, "\n"); } sbuf_delete(nmbuf); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_hkey(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; int error = 0; enum i40e_status_code status; u32 reg; struct i40e_aqc_get_set_rss_key_data key_data; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } sbuf_cat(buf, "\n"); if (hw->mac.type == I40E_MAC_X722) { bzero(key_data.standard_rss_key, sizeof(key_data.standard_rss_key)); status = i40e_aq_get_rss_key(hw, pf->vsi.vsi_num, &key_data); if (status) device_printf(dev, "i40e_aq_get_rss_key status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_printf(buf, "%40D", (u_char *)key_data.standard_rss_key, ""); } else { for (int i = 0; i < IXL_RSS_KEY_SIZE_REG; i++) { reg = i40e_read_rx_ctl(hw, I40E_PFQF_HKEY(i)); sbuf_printf(buf, "%4D", (u_char *)®, ""); } } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_hlut(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; int error = 0; enum i40e_status_code status; u8 hlut[512]; u32 reg; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } sbuf_cat(buf, "\n"); if (hw->mac.type == I40E_MAC_X722) { bzero(hlut, sizeof(hlut)); status = i40e_aq_get_rss_lut(hw, pf->vsi.vsi_num, TRUE, hlut, sizeof(hlut)); if (status) device_printf(dev, "i40e_aq_get_rss_lut status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_printf(buf, "%512D", (u_char *)hlut, ""); } else { for (int i = 0; i < hw->func_caps.rss_table_size >> 2; i++) { reg = rd32(hw, I40E_PFQF_HLUT(i)); sbuf_printf(buf, "%4D", (u_char *)®, ""); } } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_hena(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; u64 hena; hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); return sysctl_handle_long(oidp, NULL, hena, req); } /* * Sysctl to disable firmware's link management * * 1 - Disable link management on this port * 0 - Re-enable link management * * On normal NVMs, firmware manages link by default. */ static int ixl_sysctl_fw_link_management(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int requested_mode = -1; enum i40e_status_code status = 0; int error = 0; /* Read in new mode */ error = sysctl_handle_int(oidp, &requested_mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Check for sane value */ if (requested_mode < 0 || requested_mode > 1) { device_printf(dev, "Valid modes are 0 or 1\n"); return (EINVAL); } /* Set new mode */ status = i40e_aq_set_phy_debug(hw, !!(requested_mode) << 4, NULL); if (status) { device_printf(dev, "%s: Error setting new phy debug mode %s," " aq error: %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } return (0); } /* * Sysctl to read a byte from I2C bus. * * Input: 32-bit value: * bits 0-7: device address (0xA0 or 0xA2) * bits 8-15: offset (0-255) * bits 16-31: unused * Output: 8-bit value read */ static int ixl_sysctl_read_i2c_byte(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int input = -1, error = 0; device_printf(dev, "%s: start\n", __func__); u8 dev_addr, offset, output; /* Read in I2C read parameters */ error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Validate device address */ dev_addr = input & 0xFF; if (dev_addr != 0xA0 && dev_addr != 0xA2) { return (EINVAL); } offset = (input >> 8) & 0xFF; error = ixl_read_i2c_byte(pf, offset, dev_addr, &output); if (error) return (error); device_printf(dev, "%02X\n", output); return (0); } /* * Sysctl to write a byte to the I2C bus. * * Input: 32-bit value: * bits 0-7: device address (0xA0 or 0xA2) * bits 8-15: offset (0-255) * bits 16-23: value to write * bits 24-31: unused * Output: 8-bit value written */ static int ixl_sysctl_write_i2c_byte(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int input = -1, error = 0; u8 dev_addr, offset, value; /* Read in I2C write parameters */ error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Validate device address */ dev_addr = input & 0xFF; if (dev_addr != 0xA0 && dev_addr != 0xA2) { return (EINVAL); } offset = (input >> 8) & 0xFF; value = (input >> 16) & 0xFF; error = ixl_write_i2c_byte(pf, offset, dev_addr, value); if (error) return (error); device_printf(dev, "%02X written\n", value); return (0); } static int ixl_get_fec_config(struct ixl_pf *pf, struct i40e_aq_get_phy_abilities_resp *abilities, u8 bit_pos, int *is_set) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; enum i40e_status_code status; status = i40e_aq_get_phy_capabilities(hw, FALSE, FALSE, abilities, NULL); if (status) { device_printf(dev, "%s: i40e_aq_get_phy_capabilities() status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } *is_set = !!(abilities->phy_type_ext & bit_pos); return (0); } static int ixl_set_fec_config(struct ixl_pf *pf, struct i40e_aq_get_phy_abilities_resp *abilities, u8 bit_pos, int set) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct i40e_aq_set_phy_config config; enum i40e_status_code status; /* Set new PHY config */ memset(&config, 0, sizeof(config)); config.fec_config = abilities->phy_type_ext & ~(bit_pos); if (set) config.fec_config |= bit_pos; if (config.fec_config != abilities->phy_type_ext) { config.abilities |= I40E_AQ_PHY_ENABLE_ATOMIC_LINK; config.phy_type = abilities->phy_type; config.phy_type_ext = abilities->phy_type_ext; config.link_speed = abilities->link_speed; config.eee_capability = abilities->eee_capability; config.eeer = abilities->eeer_val; config.low_power_ctrl = abilities->d3_lpan; status = i40e_aq_set_phy_config(hw, &config, NULL); if (status) { device_printf(dev, "%s: i40e_aq_set_phy_config() status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } } return (0); } static int ixl_sysctl_fec_fc_ability(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_SET_FEC_ABILITY_KR, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_ABILITY_KR, !!(mode)); } static int ixl_sysctl_fec_rs_ability(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_SET_FEC_ABILITY_RS, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_ABILITY_RS, !!(mode)); } static int ixl_sysctl_fec_fc_request(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_SET_FEC_REQUEST_KR, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_REQUEST_KR, !!(mode)); } static int ixl_sysctl_fec_rs_request(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_SET_FEC_REQUEST_RS, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_REQUEST_RS, !!(mode)); } static int ixl_sysctl_fec_auto_enable(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_SET_FEC_AUTO, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_AUTO, !!(mode)); }