Index: head/sys/dev/e1000/if_em.c =================================================================== --- head/sys/dev/e1000/if_em.c (revision 302383) +++ head/sys/dev/e1000/if_em.c (revision 302384) @@ -1,6234 +1,6235 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "opt_em.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #ifdef DDB #include #include #endif #if __FreeBSD_version >= 800000 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "e1000_api.h" #include "e1000_82571.h" #include "if_em.h" /********************************************************************* * Driver version: *********************************************************************/ char em_driver_version[] = "7.6.1-k"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static em_vendor_info_t em_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection */ { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_I218_LM2, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3, PCI_ANY_ID, PCI_ANY_ID, 0}, /* required last entry */ { 0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings for all supported NICs. *********************************************************************/ static char *em_strings[] = { "Intel(R) PRO/1000 Network Connection" }; /********************************************************************* * Function prototypes *********************************************************************/ static int em_probe(device_t); static int em_attach(device_t); static int em_detach(device_t); static int em_shutdown(device_t); static int em_suspend(device_t); static int em_resume(device_t); #ifdef EM_MULTIQUEUE static int em_mq_start(if_t, struct mbuf *); static int em_mq_start_locked(if_t, struct tx_ring *); static void em_qflush(if_t); #else static void em_start(if_t); static void em_start_locked(if_t, struct tx_ring *); #endif static int em_ioctl(if_t, u_long, caddr_t); static uint64_t em_get_counter(if_t, ift_counter); static void em_init(void *); static void em_init_locked(struct adapter *); static void em_stop(void *); static void em_media_status(if_t, struct ifmediareq *); static int em_media_change(if_t); static void em_identify_hardware(struct adapter *); static int em_allocate_pci_resources(struct adapter *); static int em_allocate_legacy(struct adapter *); static int em_allocate_msix(struct adapter *); static int em_allocate_queues(struct adapter *); static int em_setup_msix(struct adapter *); static void em_free_pci_resources(struct adapter *); static void em_local_timer(void *); static void em_reset(struct adapter *); static int em_setup_interface(device_t, struct adapter *); static void em_flush_desc_rings(struct adapter *); static void em_setup_transmit_structures(struct adapter *); static void em_initialize_transmit_unit(struct adapter *); static int em_allocate_transmit_buffers(struct tx_ring *); static void em_free_transmit_structures(struct adapter *); static void em_free_transmit_buffers(struct tx_ring *); static int em_setup_receive_structures(struct adapter *); static int em_allocate_receive_buffers(struct rx_ring *); static void em_initialize_receive_unit(struct adapter *); static void em_free_receive_structures(struct adapter *); static void em_free_receive_buffers(struct rx_ring *); static void em_enable_intr(struct adapter *); static void em_disable_intr(struct adapter *); static void em_update_stats_counters(struct adapter *); static void em_add_hw_stats(struct adapter *adapter); static void em_txeof(struct tx_ring *); static bool em_rxeof(struct rx_ring *, int, int *); #ifndef __NO_STRICT_ALIGNMENT static int em_fixup_rx(struct rx_ring *); #endif static void em_setup_rxdesc(union e1000_rx_desc_extended *, const struct em_rxbuffer *rxbuf); static void em_receive_checksum(uint32_t status, struct mbuf *); static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int, struct ip *, u32 *, u32 *); static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *, struct tcphdr *, u32 *, u32 *); static void em_set_promisc(struct adapter *); static void em_disable_promisc(struct adapter *); static void em_set_multi(struct adapter *); static void em_update_link_status(struct adapter *); static void em_refresh_mbufs(struct rx_ring *, int); static void em_register_vlan(void *, if_t, u16); static void em_unregister_vlan(void *, if_t, u16); static void em_setup_vlan_hw_support(struct adapter *); static int em_xmit(struct tx_ring *, struct mbuf **); static int em_dma_malloc(struct adapter *, bus_size_t, struct em_dma_alloc *, int); static void em_dma_free(struct adapter *, struct em_dma_alloc *); static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void em_print_nvm_info(struct adapter *); static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS); static void em_print_debug_info(struct adapter *); static int em_is_valid_ether_addr(u8 *); static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS); static void em_add_int_delay_sysctl(struct adapter *, const char *, const char *, struct em_int_delay_info *, int, int); /* Management and WOL Support */ static void em_init_manageability(struct adapter *); static void em_release_manageability(struct adapter *); static void em_get_hw_control(struct adapter *); static void em_release_hw_control(struct adapter *); static void em_get_wakeup(device_t); static void em_enable_wakeup(device_t); static int em_enable_phy_wakeup(struct adapter *); static void em_led_func(void *, int); static void em_disable_aspm(struct adapter *); static int em_irq_fast(void *); /* MSIX handlers */ static void em_msix_tx(void *); static void em_msix_rx(void *); static void em_msix_link(void *); static void em_handle_tx(void *context, int pending); static void em_handle_rx(void *context, int pending); static void em_handle_link(void *context, int pending); #ifdef EM_MULTIQUEUE static void em_enable_vectors_82574(struct adapter *); #endif static void em_set_sysctl_value(struct adapter *, const char *, const char *, int *, int); static int em_set_flowcntl(SYSCTL_HANDLER_ARGS); static int em_sysctl_eee(SYSCTL_HANDLER_ARGS); static __inline void em_rx_discard(struct rx_ring *, int); #ifdef DEVICE_POLLING static poll_handler_t em_poll; #endif /* POLLING */ /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t em_methods[] = { /* Device interface */ DEVMETHOD(device_probe, em_probe), DEVMETHOD(device_attach, em_attach), DEVMETHOD(device_detach, em_detach), DEVMETHOD(device_shutdown, em_shutdown), DEVMETHOD(device_suspend, em_suspend), DEVMETHOD(device_resume, em_resume), DEVMETHOD_END }; static driver_t em_driver = { "em", em_methods, sizeof(struct adapter), }; devclass_t em_devclass; DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0); MODULE_DEPEND(em, pci, 1, 1, 1); MODULE_DEPEND(em, ether, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(em, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ /********************************************************************* * Tunable default values. *********************************************************************/ #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) #define M_TSO_LEN 66 #define MAX_INTS_PER_SEC 8000 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) /* Allow common code without TSO */ #ifndef CSUM_TSO #define CSUM_TSO 0 #endif #define TSO_WORKAROUND 4 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters"); static int em_disable_crc_stripping = 0; SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN, &em_disable_crc_stripping, 0, "Disable CRC Stripping"); static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt, 0, "Default transmit interrupt delay in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt, 0, "Default receive interrupt delay in usecs"); static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN, &em_tx_abs_int_delay_dflt, 0, "Default transmit interrupt delay limit in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN, &em_rx_abs_int_delay_dflt, 0, "Default receive interrupt delay limit in usecs"); static int em_rxd = EM_DEFAULT_RXD; static int em_txd = EM_DEFAULT_TXD; SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0, "Number of receive descriptors per queue"); SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0, "Number of transmit descriptors per queue"); static int em_smart_pwr_down = FALSE; SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down, 0, "Set to true to leave smart power down enabled on newer adapters"); /* Controls whether promiscuous also shows bad packets */ static int em_debug_sbp = FALSE; SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0, "Show bad packets in promiscuous mode"); static int em_enable_msix = TRUE; SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0, "Enable MSI-X interrupts"); #ifdef EM_MULTIQUEUE static int em_num_queues = 1; SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0, "82574 only: Number of queues to configure, 0 indicates autoconfigure"); #endif /* ** Global variable to store last used CPU when binding queues ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a ** queue is bound to a cpu. */ static int em_last_bind_cpu = -1; /* How many packets rxeof tries to clean at a time */ static int em_rx_process_limit = 100; SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &em_rx_process_limit, 0, "Maximum number of received packets to process " "at a time, -1 means unlimited"); /* Energy efficient ethernet - default to OFF */ static int eee_setting = 1; SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0, "Enable Energy Efficient Ethernet"); /* Global used in WOL setup with multiport cards */ static int global_quad_port_a = 0; #ifdef DEV_NETMAP /* see ixgbe.c for details */ #include #endif /* DEV_NETMAP */ /********************************************************************* * Device identification routine * * em_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int em_probe(device_t dev) { char adapter_name[60]; uint16_t pci_vendor_id = 0; uint16_t pci_device_id = 0; uint16_t pci_subvendor_id = 0; uint16_t pci_subdevice_id = 0; em_vendor_info_t *ent; INIT_DEBUGOUT("em_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != EM_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = em_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == PCI_ANY_ID)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == PCI_ANY_ID))) { sprintf(adapter_name, "%s %s", em_strings[ent->index], em_driver_version); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int em_attach(device_t dev) { struct adapter *adapter; struct e1000_hw *hw; int error = 0; INIT_DEBUGOUT("em_attach: begin"); if (resource_disabled("em", device_get_unit(dev))) { device_printf(dev, "Disabled by device hint\n"); return (ENXIO); } adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; hw = &adapter->hw; EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_nvm_info, "I", "NVM Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_debug_info, "I", "Debug Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_set_flowcntl, "I", "Flow Control"); callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); /* Determine hardware and mac info */ em_identify_hardware(adapter); /* Setup PCI resources */ if (em_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* ** For ICH8 and family we need to ** map the flash memory, and this ** must happen after the MAC is ** identified */ if ((hw->mac.type == e1000_ich8lan) || (hw->mac.type == e1000_ich9lan) || (hw->mac.type == e1000_ich10lan) || (hw->mac.type == e1000_pchlan) || (hw->mac.type == e1000_pch2lan) || (hw->mac.type == e1000_pch_lpt)) { int rid = EM_BAR_TYPE_FLASH; adapter->flash = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->flash == NULL) { device_printf(dev, "Mapping of Flash failed\n"); error = ENXIO; goto err_pci; } /* This is used in the shared code */ hw->flash_address = (u8 *)adapter->flash; adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash); adapter->osdep.flash_bus_space_handle = rman_get_bushandle(adapter->flash); } /* ** In the new SPT device flash is not a ** separate BAR, rather it is also in BAR0, ** so use the same tag and an offset handle for the ** FLASH read/write macros in the shared code. */ else if (hw->mac.type == e1000_pch_spt) { adapter->osdep.flash_bus_space_tag = adapter->osdep.mem_bus_space_tag; adapter->osdep.flash_bus_space_handle = adapter->osdep.mem_bus_space_handle + E1000_FLASH_BASE_ADDR; } /* Do Shared Code initialization */ error = e1000_setup_init_funcs(hw, TRUE); if (error) { device_printf(dev, "Setup of Shared code failed, error %d\n", error); error = ENXIO; goto err_pci; } /* * Setup MSI/X or MSI if PCI Express */ adapter->msix = em_setup_msix(adapter); e1000_get_bus_info(hw); /* Set up some sysctls for the tunable interrupt delays */ em_add_int_delay_sysctl(adapter, "rx_int_delay", "receive interrupt delay in usecs", &adapter->rx_int_delay, E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_int_delay", "transmit interrupt delay in usecs", &adapter->tx_int_delay, E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "rx_abs_int_delay", "receive interrupt delay limit in usecs", &adapter->rx_abs_int_delay, E1000_REGISTER(hw, E1000_RADV), em_rx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_abs_int_delay", "transmit interrupt delay limit in usecs", &adapter->tx_abs_int_delay, E1000_REGISTER(hw, E1000_TADV), em_tx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "itr", "interrupt delay limit in usecs/4", &adapter->tx_itr, E1000_REGISTER(hw, E1000_ITR), DEFAULT_ITR); /* Sysctl for limiting the amount of work done in the taskqueue */ em_set_sysctl_value(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, em_rx_process_limit); /* * Validate number of transmit and receive descriptors. It * must not exceed hardware maximum, and must be multiple * of E1000_DBA_ALIGN. */ if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 || (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) { device_printf(dev, "Using %d TX descriptors instead of %d!\n", EM_DEFAULT_TXD, em_txd); adapter->num_tx_desc = EM_DEFAULT_TXD; } else adapter->num_tx_desc = em_txd; if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 || (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) { device_printf(dev, "Using %d RX descriptors instead of %d!\n", EM_DEFAULT_RXD, em_rxd); adapter->num_rx_desc = EM_DEFAULT_RXD; } else adapter->num_rx_desc = em_rxd; hw->mac.autoneg = DO_AUTO_NEG; hw->phy.autoneg_wait_to_complete = FALSE; hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; /* Copper options */ if (hw->phy.media_type == e1000_media_type_copper) { hw->phy.mdix = AUTO_ALL_MODES; hw->phy.disable_polarity_correction = FALSE; hw->phy.ms_type = EM_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ adapter->hw.mac.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; /* * This controls when hardware reports transmit completion * status. */ hw->mac.report_tx_early = 1; /* ** Get queue/ring memory */ if (em_allocate_queues(adapter)) { error = ENOMEM; goto err_pci; } /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Check SOL/IDER usage */ if (e1000_check_reset_block(hw)) device_printf(dev, "PHY reset is blocked" " due to SOL/IDER session.\n"); /* Sysctl for setting Energy Efficient Ethernet */ hw->dev_spec.ich8lan.eee_disable = eee_setting; SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_eee, "I", "Disable Energy Efficient Ethernet"); /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(hw); /* Make sure we have a good EEPROM before we read from it */ if (e1000_validate_nvm_checksum(hw) < 0) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } } /* Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_late; } if (!em_is_valid_ether_addr(hw->mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_late; } /* Disable ULP support */ e1000_disable_ulp_lpt_lp(hw, TRUE); /* ** Do interrupt configuration */ if (adapter->msix > 1) /* Do MSIX */ error = em_allocate_msix(adapter); else /* MSI or Legacy */ error = em_allocate_legacy(adapter); if (error) goto err_late; /* * Get Wake-on-Lan and Management info for later use */ em_get_wakeup(dev); /* Setup OS specific network interface */ if (em_setup_interface(dev, adapter) != 0) goto err_late; em_reset(adapter); /* Initialize statistics */ em_update_stats_counters(adapter); hw->mac.get_link_status = 1; em_update_link_status(adapter); /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); em_add_hw_stats(adapter); /* Non-AMT based hardware can now take control from firmware */ if (adapter->has_manage && !adapter->has_amt) em_get_hw_control(adapter); /* Tell the stack that the interface is not active */ if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); adapter->led_dev = led_create(em_led_func, adapter, device_get_nameunit(dev)); #ifdef DEV_NETMAP em_netmap_attach(adapter); #endif /* DEV_NETMAP */ INIT_DEBUGOUT("em_attach: end"); return (0); err_late: em_free_transmit_structures(adapter); em_free_receive_structures(adapter); em_release_hw_control(adapter); if (adapter->ifp != (void *)NULL) if_free(adapter->ifp); err_pci: em_free_pci_resources(adapter); free(adapter->mta, M_DEVBUF); EM_CORE_LOCK_DESTROY(adapter); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int em_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); if_t ifp = adapter->ifp; INIT_DEBUGOUT("em_detach: begin"); /* Make sure VLANS are not using driver */ if (if_vlantrunkinuse(ifp)) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } #ifdef DEVICE_POLLING if (if_getcapenable(ifp) & IFCAP_POLLING) ether_poll_deregister(ifp); #endif if (adapter->led_dev != NULL) led_destroy(adapter->led_dev); EM_CORE_LOCK(adapter); adapter->in_detach = 1; em_stop(adapter); EM_CORE_UNLOCK(adapter); EM_CORE_LOCK_DESTROY(adapter); e1000_phy_hw_reset(&adapter->hw); em_release_manageability(adapter); em_release_hw_control(adapter); /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); ether_ifdetach(adapter->ifp); callout_drain(&adapter->timer); #ifdef DEV_NETMAP netmap_detach(ifp); #endif /* DEV_NETMAP */ em_free_pci_resources(adapter); bus_generic_detach(dev); if_free(ifp); em_free_transmit_structures(adapter); em_free_receive_structures(adapter); em_release_hw_control(adapter); free(adapter->mta, M_DEVBUF); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int em_shutdown(device_t dev) { return em_suspend(dev); } /* * Suspend/resume device methods. */ static int em_suspend(device_t dev) { struct adapter *adapter = device_get_softc(dev); EM_CORE_LOCK(adapter); em_release_manageability(adapter); em_release_hw_control(adapter); em_enable_wakeup(dev); EM_CORE_UNLOCK(adapter); return bus_generic_suspend(dev); } static int em_resume(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct tx_ring *txr = adapter->tx_rings; if_t ifp = adapter->ifp; EM_CORE_LOCK(adapter); if (adapter->hw.mac.type == e1000_pch2lan) e1000_resume_workarounds_pchlan(&adapter->hw); em_init_locked(adapter); em_init_manageability(adapter); if ((if_getflags(ifp) & IFF_UP) && (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (!if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif EM_TX_UNLOCK(txr); } } EM_CORE_UNLOCK(adapter); return bus_generic_resume(dev); } #ifndef EM_MULTIQUEUE static void em_start_locked(if_t ifp, struct tx_ring *txr) { struct adapter *adapter = if_getsoftc(ifp); struct mbuf *m_head; EM_TX_LOCK_ASSERT(txr); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; if (!adapter->link_active) return; while (!if_sendq_empty(ifp)) { /* Call cleanup if number of TX descriptors low */ if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD) em_txeof(txr); if (txr->tx_avail < EM_MAX_SCATTER) { if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0); break; } m_head = if_dequeue(ifp); if (m_head == NULL) break; /* * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. */ if (em_xmit(txr, &m_head)) { if (m_head == NULL) break; if_sendq_prepend(ifp, m_head); break; } /* Mark the queue as having work */ if (txr->busy == EM_TX_IDLE) txr->busy = EM_TX_BUSY; /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); } return; } static void em_start(if_t ifp) { struct adapter *adapter = if_getsoftc(ifp); struct tx_ring *txr = adapter->tx_rings; if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { EM_TX_LOCK(txr); em_start_locked(ifp, txr); EM_TX_UNLOCK(txr); } return; } #else /* EM_MULTIQUEUE */ /********************************************************************* * Multiqueue Transmit routines * * em_mq_start is called by the stack to initiate a transmit. * however, if busy the driver can queue the request rather * than do an immediate send. It is this that is an advantage * in this driver, rather than also having multiple tx queues. **********************************************************************/ /* ** Multiqueue capable stack interface */ static int em_mq_start(if_t ifp, struct mbuf *m) { struct adapter *adapter = if_getsoftc(ifp); struct tx_ring *txr = adapter->tx_rings; unsigned int i, error; if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) i = m->m_pkthdr.flowid % adapter->num_queues; else i = curcpu % adapter->num_queues; txr = &adapter->tx_rings[i]; error = drbr_enqueue(ifp, txr->br, m); if (error) return (error); if (EM_TX_TRYLOCK(txr)) { em_mq_start_locked(ifp, txr); EM_TX_UNLOCK(txr); } else taskqueue_enqueue(txr->tq, &txr->tx_task); return (0); } static int em_mq_start_locked(if_t ifp, struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct mbuf *next; int err = 0, enq = 0; EM_TX_LOCK_ASSERT(txr); if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) || adapter->link_active == 0) { return (ENETDOWN); } /* Process the queue */ while ((next = drbr_peek(ifp, txr->br)) != NULL) { if ((err = em_xmit(txr, &next)) != 0) { if (next == NULL) { /* It was freed, move forward */ drbr_advance(ifp, txr->br); } else { /* * Still have one left, it may not be * the same since the transmit function * may have changed it. */ drbr_putback(ifp, txr->br, next); } break; } drbr_advance(ifp, txr->br); enq++; if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len); if (next->m_flags & M_MCAST) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); ETHER_BPF_MTAP(ifp, next); if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) break; } /* Mark the queue as having work */ if ((enq > 0) && (txr->busy == EM_TX_IDLE)) txr->busy = EM_TX_BUSY; if (txr->tx_avail < EM_MAX_SCATTER) em_txeof(txr); if (txr->tx_avail < EM_MAX_SCATTER) { if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0); } return (err); } /* ** Flush all ring buffers */ static void em_qflush(if_t ifp) { struct adapter *adapter = if_getsoftc(ifp); struct tx_ring *txr = adapter->tx_rings; struct mbuf *m; for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) m_freem(m); EM_TX_UNLOCK(txr); } if_qflush(ifp); } #endif /* EM_MULTIQUEUE */ /********************************************************************* * Ioctl entry point * * em_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int em_ioctl(if_t ifp, u_long command, caddr_t data) { struct adapter *adapter = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = FALSE; int error = 0; if (adapter->in_detach) return (error); switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { if_setflagbits(ifp,IFF_UP,0); if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING)) em_init(adapter); #ifdef INET if (!(if_getflags(ifp) & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: { int max_frame_size; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); EM_CORE_LOCK(adapter); switch (adapter->hw.mac.type) { case e1000_82571: case e1000_82572: case e1000_ich9lan: case e1000_ich10lan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_82574: case e1000_82583: case e1000_80003es2lan: /* 9K Jumbo Frame size */ max_frame_size = 9234; break; case e1000_pchlan: max_frame_size = 4096; break; /* Adapters that do not support jumbo frames */ case e1000_ich8lan: max_frame_size = ETHER_MAX_LEN; break; default: max_frame_size = MAX_JUMBO_FRAME_SIZE; } if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { EM_CORE_UNLOCK(adapter); error = EINVAL; break; } if_setmtu(ifp, ifr->ifr_mtu); adapter->hw.mac.max_frame_size = if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN; - em_init_locked(adapter); + if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) + em_init_locked(adapter); EM_CORE_UNLOCK(adapter); break; } case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl rcv'd:\ SIOCSIFFLAGS (Set Interface Flags)"); EM_CORE_LOCK(adapter); if (if_getflags(ifp) & IFF_UP) { if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { if ((if_getflags(ifp) ^ adapter->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { em_disable_promisc(adapter); em_set_promisc(adapter); } } else em_init_locked(adapter); } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) em_stop(adapter); adapter->if_flags = if_getflags(ifp); EM_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { EM_CORE_LOCK(adapter); em_disable_intr(adapter); em_set_multi(adapter); #ifdef DEVICE_POLLING if (!(if_getcapenable(ifp) & IFCAP_POLLING)) #endif em_enable_intr(adapter); EM_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: /* Check SOL/IDER usage */ EM_CORE_LOCK(adapter); if (e1000_check_reset_block(&adapter->hw)) { EM_CORE_UNLOCK(adapter); device_printf(adapter->dev, "Media change is" " blocked due to SOL/IDER session.\n"); break; } EM_CORE_UNLOCK(adapter); /* falls thru */ case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl rcv'd: \ SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { int mask, reinit; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); reinit = 0; mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(em_poll, ifp); if (error) return (error); EM_CORE_LOCK(adapter); em_disable_intr(adapter); if_setcapenablebit(ifp, IFCAP_POLLING, 0); EM_CORE_UNLOCK(adapter); } else { error = ether_poll_deregister(ifp); /* Enable interrupt even in error case */ EM_CORE_LOCK(adapter); em_enable_intr(adapter); if_setcapenablebit(ifp, 0, IFCAP_POLLING); EM_CORE_UNLOCK(adapter); } } #endif if (mask & IFCAP_HWCSUM) { if_togglecapenable(ifp,IFCAP_HWCSUM); reinit = 1; } if (mask & IFCAP_TSO4) { if_togglecapenable(ifp,IFCAP_TSO4); reinit = 1; } if (mask & IFCAP_VLAN_HWTAGGING) { if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING); reinit = 1; } if (mask & IFCAP_VLAN_HWFILTER) { if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER); reinit = 1; } if (mask & IFCAP_VLAN_HWTSO) { if_togglecapenable(ifp, IFCAP_VLAN_HWTSO); reinit = 1; } if ((mask & IFCAP_WOL) && (if_getcapabilities(ifp) & IFCAP_WOL) != 0) { if (mask & IFCAP_WOL_MCAST) if_togglecapenable(ifp, IFCAP_WOL_MCAST); if (mask & IFCAP_WOL_MAGIC) if_togglecapenable(ifp, IFCAP_WOL_MAGIC); } if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING)) em_init(adapter); if_vlancap(ifp); break; } default: error = ether_ioctl(ifp, command, data); break; } return (error); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void em_init_locked(struct adapter *adapter) { if_t ifp = adapter->ifp; device_t dev = adapter->dev; INIT_DEBUGOUT("em_init: begin"); EM_CORE_LOCK_ASSERT(adapter); em_disable_intr(adapter); callout_stop(&adapter->timer); /* Get the latest mac address, User can use a LAA */ bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* * With the 82571 adapter, RAR[0] may be overwritten * when the other port is reset, we make a duplicate * in RAR[14] for that eventuality, this assures * the interface continues to function. */ if (adapter->hw.mac.type == e1000_82571) { e1000_set_laa_state_82571(&adapter->hw, TRUE); e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, E1000_RAR_ENTRIES - 1); } /* Initialize the hardware */ em_reset(adapter); em_update_link_status(adapter); /* Setup VLAN support, basic and offload if available */ E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); /* Set hardware offload abilities */ if_clearhwassist(ifp); if (if_getcapenable(ifp) & IFCAP_TXCSUM) if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0); /* ** There have proven to be problems with TSO when not ** at full gigabit speed, so disable the assist automatically ** when at lower speeds. -jfv */ if (if_getcapenable(ifp) & IFCAP_TSO4) { if (adapter->link_speed == SPEED_1000) if_sethwassistbits(ifp, CSUM_TSO, 0); } /* Configure for OS presence */ em_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ em_setup_transmit_structures(adapter); em_initialize_transmit_unit(adapter); /* Setup Multicast table */ em_set_multi(adapter); /* ** Figure out the desired mbuf ** pool for doing jumbos */ if (adapter->hw.mac.max_frame_size <= 2048) adapter->rx_mbuf_sz = MCLBYTES; else if (adapter->hw.mac.max_frame_size <= 4096) adapter->rx_mbuf_sz = MJUMPAGESIZE; else adapter->rx_mbuf_sz = MJUM9BYTES; /* Prepare receive descriptors and buffers */ if (em_setup_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); em_stop(adapter); return; } em_initialize_receive_unit(adapter); /* Use real VLAN Filter support? */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) { if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) /* Use real VLAN Filter support */ em_setup_vlan_hw_support(adapter); else { u32 ctrl; ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= E1000_CTRL_VME; E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); } } /* Don't lose promiscuous settings */ em_set_promisc(adapter); /* Set the interface as ACTIVE */ if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); callout_reset(&adapter->timer, hz, em_local_timer, adapter); e1000_clear_hw_cntrs_base_generic(&adapter->hw); /* MSI/X configuration for 82574 */ if (adapter->hw.mac.type == e1000_82574) { int tmp; tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); /* Set the IVAR - interrupt vector routing. */ E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); } #ifdef DEVICE_POLLING /* * Only enable interrupts if we are not polling, make sure * they are off otherwise. */ if (if_getcapenable(ifp) & IFCAP_POLLING) em_disable_intr(adapter); else #endif /* DEVICE_POLLING */ em_enable_intr(adapter); /* AMT based hardware can now take control from firmware */ if (adapter->has_manage && adapter->has_amt) em_get_hw_control(adapter); } static void em_init(void *arg) { struct adapter *adapter = arg; EM_CORE_LOCK(adapter); em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } #ifdef DEVICE_POLLING /********************************************************************* * * Legacy polling routine: note this only works with single queue * *********************************************************************/ static int em_poll(if_t ifp, enum poll_cmd cmd, int count) { struct adapter *adapter = if_getsoftc(ifp); struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; u32 reg_icr; int rx_done; EM_CORE_LOCK(adapter); if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { EM_CORE_UNLOCK(adapter); return (0); } if (cmd == POLL_AND_CHECK_STATUS) { reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { callout_stop(&adapter->timer); adapter->hw.mac.get_link_status = 1; em_update_link_status(adapter); callout_reset(&adapter->timer, hz, em_local_timer, adapter); } } EM_CORE_UNLOCK(adapter); em_rxeof(rxr, count, &rx_done); EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (!if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif EM_TX_UNLOCK(txr); return (rx_done); } #endif /* DEVICE_POLLING */ /********************************************************************* * * Fast Legacy/MSI Combined Interrupt Service routine * *********************************************************************/ static int em_irq_fast(void *arg) { struct adapter *adapter = arg; if_t ifp; u32 reg_icr; ifp = adapter->ifp; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; /* * Starting with the 82571 chip, bit 31 should be used to * determine whether the interrupt belongs to us. */ if (adapter->hw.mac.type >= e1000_82571 && (reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; em_disable_intr(adapter); taskqueue_enqueue(adapter->tq, &adapter->que_task); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; taskqueue_enqueue(taskqueue_fast, &adapter->link_task); } if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; return FILTER_HANDLED; } /* Combined RX/TX handler, used by Legacy and MSI */ static void em_handle_que(void *context, int pending) { struct adapter *adapter = context; if_t ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL); EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (!if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif EM_TX_UNLOCK(txr); if (more) { taskqueue_enqueue(adapter->tq, &adapter->que_task); return; } } em_enable_intr(adapter); return; } /********************************************************************* * * MSIX Interrupt Service Routines * **********************************************************************/ static void em_msix_tx(void *arg) { struct tx_ring *txr = arg; struct adapter *adapter = txr->adapter; if_t ifp = adapter->ifp; ++txr->tx_irq; EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (!if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); EM_TX_UNLOCK(txr); return; } /********************************************************************* * * MSIX RX Interrupt Service routine * **********************************************************************/ static void em_msix_rx(void *arg) { struct rx_ring *rxr = arg; struct adapter *adapter = rxr->adapter; bool more; ++rxr->rx_irq; if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)) return; more = em_rxeof(rxr, adapter->rx_process_limit, NULL); if (more) taskqueue_enqueue(rxr->tq, &rxr->rx_task); else { /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); } return; } /********************************************************************* * * MSIX Link Fast Interrupt Service routine * **********************************************************************/ static void em_msix_link(void *arg) { struct adapter *adapter = arg; u32 reg_icr; ++adapter->link_irq; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; em_handle_link(adapter, 0); } else E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); /* ** Because we must read the ICR for this interrupt ** it may clear other causes using autoclear, for ** this reason we simply create a soft interrupt ** for all these vectors. */ if (reg_icr) { E1000_WRITE_REG(&adapter->hw, E1000_ICS, adapter->ims); } return; } static void em_handle_rx(void *context, int pending) { struct rx_ring *rxr = context; struct adapter *adapter = rxr->adapter; bool more; more = em_rxeof(rxr, adapter->rx_process_limit, NULL); if (more) taskqueue_enqueue(rxr->tq, &rxr->rx_task); else { /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); } } static void em_handle_tx(void *context, int pending) { struct tx_ring *txr = context; struct adapter *adapter = txr->adapter; if_t ifp = adapter->ifp; EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (!if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); EM_TX_UNLOCK(txr); } static void em_handle_link(void *context, int pending) { struct adapter *adapter = context; struct tx_ring *txr = adapter->tx_rings; if_t ifp = adapter->ifp; if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) return; EM_CORE_LOCK(adapter); callout_stop(&adapter->timer); em_update_link_status(adapter); callout_reset(&adapter->timer, hz, em_local_timer, adapter); E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); if (adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif EM_TX_UNLOCK(txr); } } EM_CORE_UNLOCK(adapter); } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void em_media_status(if_t ifp, struct ifmediareq *ifmr) { struct adapter *adapter = if_getsoftc(ifp); u_char fiber_type = IFM_1000_SX; INIT_DEBUGOUT("em_media_status: begin"); EM_CORE_LOCK(adapter); em_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { EM_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { ifmr->ifm_active |= fiber_type | IFM_FDX; } else { switch (adapter->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; } if (adapter->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } EM_CORE_UNLOCK(adapter); } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int em_media_change(if_t ifp) { struct adapter *adapter = if_getsoftc(ifp); struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("em_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); EM_CORE_LOCK(adapter); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(adapter->dev, "Unsupported media type\n"); } em_init_locked(adapter); EM_CORE_UNLOCK(adapter); return (0); } /********************************************************************* * * This routine maps the mbufs to tx descriptors. * * return 0 on success, positive on failure **********************************************************************/ static int em_xmit(struct tx_ring *txr, struct mbuf **m_headp) { struct adapter *adapter = txr->adapter; bus_dma_segment_t segs[EM_MAX_SCATTER]; bus_dmamap_t map; struct em_txbuffer *tx_buffer, *tx_buffer_mapped; struct e1000_tx_desc *ctxd = NULL; struct mbuf *m_head; struct ether_header *eh; struct ip *ip = NULL; struct tcphdr *tp = NULL; u32 txd_upper = 0, txd_lower = 0; int ip_off, poff; int nsegs, i, j, first, last = 0; int error; bool do_tso, tso_desc, remap = TRUE; m_head = *m_headp; do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO); tso_desc = FALSE; ip_off = poff = 0; /* * Intel recommends entire IP/TCP header length reside in a single * buffer. If multiple descriptors are used to describe the IP and * TCP header, each descriptor should describe one or more * complete headers; descriptors referencing only parts of headers * are not supported. If all layer headers are not coalesced into * a single buffer, each buffer should not cross a 4KB boundary, * or be larger than the maximum read request size. * Controller also requires modifing IP/TCP header to make TSO work * so we firstly get a writable mbuf chain then coalesce ethernet/ * IP/TCP header into a single buffer to meet the requirement of * controller. This also simplifies IP/TCP/UDP checksum offloading * which also has similar restrictions. */ if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { if (do_tso || (m_head->m_next != NULL && m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) { if (M_WRITABLE(*m_headp) == 0) { m_head = m_dup(*m_headp, M_NOWAIT); m_freem(*m_headp); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } *m_headp = m_head; } } /* * XXX * Assume IPv4, we don't have TSO/checksum offload support * for IPv6 yet. */ ip_off = sizeof(struct ether_header); if (m_head->m_len < ip_off) { m_head = m_pullup(m_head, ip_off); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } eh = mtod(m_head, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { ip_off = sizeof(struct ether_vlan_header); if (m_head->m_len < ip_off) { m_head = m_pullup(m_head, ip_off); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } } if (m_head->m_len < ip_off + sizeof(struct ip)) { m_head = m_pullup(m_head, ip_off + sizeof(struct ip)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } ip = (struct ip *)(mtod(m_head, char *) + ip_off); poff = ip_off + (ip->ip_hl << 2); if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) { if (m_head->m_len < poff + sizeof(struct tcphdr)) { m_head = m_pullup(m_head, poff + sizeof(struct tcphdr)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } tp = (struct tcphdr *)(mtod(m_head, char *) + poff); /* * TSO workaround: * pull 4 more bytes of data into it. */ if (m_head->m_len < poff + (tp->th_off << 2)) { m_head = m_pullup(m_head, poff + (tp->th_off << 2) + TSO_WORKAROUND); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } ip = (struct ip *)(mtod(m_head, char *) + ip_off); tp = (struct tcphdr *)(mtod(m_head, char *) + poff); if (do_tso) { ip->ip_len = htons(m_head->m_pkthdr.tso_segsz + (ip->ip_hl << 2) + (tp->th_off << 2)); ip->ip_sum = 0; /* * The pseudo TCP checksum does not include TCP * payload length so driver should recompute * the checksum here what hardware expect to * see. This is adherence of Microsoft's Large * Send specification. */ tp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); } } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) { if (m_head->m_len < poff + sizeof(struct udphdr)) { m_head = m_pullup(m_head, poff + sizeof(struct udphdr)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } ip = (struct ip *)(mtod(m_head, char *) + ip_off); } *m_headp = m_head; } /* * Map the packet for DMA * * Capture the first descriptor index, * this descriptor will have the index * of the EOP which is the only one that * now gets a DONE bit writeback. */ first = txr->next_avail_desc; tx_buffer = &txr->tx_buffers[first]; tx_buffer_mapped = tx_buffer; map = tx_buffer->map; retry: error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); /* * There are two types of errors we can (try) to handle: * - EFBIG means the mbuf chain was too long and bus_dma ran * out of segments. Defragment the mbuf chain and try again. * - ENOMEM means bus_dma could not obtain enough bounce buffers * at this point in time. Defer sending and try again later. * All other errors, in particular EINVAL, are fatal and prevent the * mbuf chain from ever going through. Drop it and report error. */ if (error == EFBIG && remap) { struct mbuf *m; m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER); if (m == NULL) { adapter->mbuf_defrag_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); } *m_headp = m; /* Try it again, but only once */ remap = FALSE; goto retry; } else if (error != 0) { adapter->no_tx_dma_setup++; m_freem(*m_headp); *m_headp = NULL; return (error); } /* * TSO Hardware workaround, if this packet is not * TSO, and is only a single descriptor long, and * it follows a TSO burst, then we need to add a * sentinel descriptor to prevent premature writeback. */ if ((!do_tso) && (txr->tx_tso == TRUE)) { if (nsegs == 1) tso_desc = TRUE; txr->tx_tso = FALSE; } if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) { txr->no_desc_avail++; bus_dmamap_unload(txr->txtag, map); return (ENOBUFS); } m_head = *m_headp; /* Do hardware assists */ if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { em_tso_setup(txr, m_head, ip_off, ip, tp, &txd_upper, &txd_lower); /* we need to make a final sentinel transmit desc */ tso_desc = TRUE; } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) em_transmit_checksum_setup(txr, m_head, ip_off, ip, &txd_upper, &txd_lower); if (m_head->m_flags & M_VLANTAG) { /* Set the vlan id. */ txd_upper |= htole16(if_getvtag(m_head)) << 16; /* Tell hardware to add tag */ txd_lower |= htole32(E1000_TXD_CMD_VLE); } i = txr->next_avail_desc; /* Set up our transmit descriptors */ for (j = 0; j < nsegs; j++) { bus_size_t seg_len; bus_addr_t seg_addr; tx_buffer = &txr->tx_buffers[i]; ctxd = &txr->tx_base[i]; seg_addr = segs[j].ds_addr; seg_len = segs[j].ds_len; /* ** TSO Workaround: ** If this is the last descriptor, we want to ** split it so we have a small final sentinel */ if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) { seg_len -= TSO_WORKAROUND; ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32( adapter->txd_cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); if (++i == adapter->num_tx_desc) i = 0; /* Now make the sentinel */ txr->tx_avail--; ctxd = &txr->tx_base[i]; tx_buffer = &txr->tx_buffers[i]; ctxd->buffer_addr = htole64(seg_addr + seg_len); ctxd->lower.data = htole32( adapter->txd_cmd | txd_lower | TSO_WORKAROUND); ctxd->upper.data = htole32(txd_upper); last = i; if (++i == adapter->num_tx_desc) i = 0; } else { ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32( adapter->txd_cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); last = i; if (++i == adapter->num_tx_desc) i = 0; } tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; } txr->next_avail_desc = i; txr->tx_avail -= nsegs; tx_buffer->m_head = m_head; /* ** Here we swap the map so the last descriptor, ** which gets the completion interrupt has the ** real map, and the first descriptor gets the ** unused map from this descriptor. */ tx_buffer_mapped->map = tx_buffer->map; tx_buffer->map = map; bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); /* * Last Descriptor of Packet * needs End Of Packet (EOP) * and Report Status (RS) */ ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); /* * Keep track in the first buffer which * descriptor will be written back */ tx_buffer = &txr->tx_buffers[first]; tx_buffer->next_eop = last; /* * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 * that this frame is available to transmit. */ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); return (0); } static void em_set_promisc(struct adapter *adapter) { if_t ifp = adapter->ifp; u32 reg_rctl; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (if_getflags(ifp) & IFF_PROMISC) { reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); /* Turn this on if you want to see bad packets */ if (em_debug_sbp) reg_rctl |= E1000_RCTL_SBP; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else if (if_getflags(ifp) & IFF_ALLMULTI) { reg_rctl |= E1000_RCTL_MPE; reg_rctl &= ~E1000_RCTL_UPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } } static void em_disable_promisc(struct adapter *adapter) { if_t ifp = adapter->ifp; u32 reg_rctl; int mcnt = 0; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= (~E1000_RCTL_UPE); if (if_getflags(ifp) & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES); /* Don't disable if in MAX groups */ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg_rctl &= (~E1000_RCTL_MPE); reg_rctl &= (~E1000_RCTL_SBP); E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void em_set_multi(struct adapter *adapter) { if_t ifp = adapter->ifp; u32 reg_rctl = 0; u8 *mta; /* Multicast array memory */ int mcnt = 0; IOCTL_DEBUGOUT("em_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_clear_mwi(&adapter->hw); reg_rctl |= E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); } if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES); if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= ~E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_set_mwi(&adapter->hw); } } /********************************************************************* * Timer routine * * This routine checks for link status and updates statistics. * **********************************************************************/ static void em_local_timer(void *arg) { struct adapter *adapter = arg; if_t ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; u32 trigger = 0; EM_CORE_LOCK_ASSERT(adapter); em_update_link_status(adapter); em_update_stats_counters(adapter); /* Reset LAA into RAR[0] on 82571 */ if ((adapter->hw.mac.type == e1000_82571) && e1000_get_laa_state_82571(&adapter->hw)) e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* Mask to use in the irq trigger */ if (adapter->msix_mem) { for (int i = 0; i < adapter->num_queues; i++, rxr++) trigger |= rxr->ims; rxr = adapter->rx_rings; } else trigger = E1000_ICS_RXDMT0; /* ** Check on the state of the TX queue(s), this ** can be done without the lock because its RO ** and the HUNG state will be static if set. */ for (int i = 0; i < adapter->num_queues; i++, txr++) { if (txr->busy == EM_TX_HUNG) goto hung; if (txr->busy >= EM_TX_MAXTRIES) txr->busy = EM_TX_HUNG; /* Schedule a TX tasklet if needed */ if (txr->tx_avail <= EM_MAX_SCATTER) taskqueue_enqueue(txr->tq, &txr->tx_task); } callout_reset(&adapter->timer, hz, em_local_timer, adapter); #ifndef DEVICE_POLLING /* Trigger an RX interrupt to guarantee mbuf refresh */ E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger); #endif return; hung: /* Looks like we're hung */ device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n", txr->me); em_print_debug_info(adapter); if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); adapter->watchdog_events++; em_init_locked(adapter); } static void em_update_link_status(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; if_t ifp = adapter->ifp; device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; u32 link_check = 0; /* Get the cached link value or read phy for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { if (hw->mac.type == e1000_pch_spt) msec_delay(50); /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; if (link_check) /* ESB2 fix */ e1000_cfg_on_link_up(hw); } else link_check = TRUE; break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = adapter->hw.mac.serdes_has_link; break; default: case e1000_media_type_unknown: break; } /* Now check for a transition */ if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(hw, &adapter->link_speed, &adapter->link_duplex); /* Check if we must disable SPEED_MODE bit on PCI-E */ if ((adapter->link_speed != SPEED_1000) && ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572))) { int tarc0; tarc0 = E1000_READ_REG(hw, E1000_TARC(0)); tarc0 &= ~TARC_SPEED_MODE_BIT; E1000_WRITE_REG(hw, E1000_TARC(0), tarc0); } if (bootverbose) device_printf(dev, "Link is up %d Mbps %s\n", adapter->link_speed, ((adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex")); adapter->link_active = 1; adapter->smartspeed = 0; if_setbaudrate(ifp, adapter->link_speed * 1000000); if_link_state_change(ifp, LINK_STATE_UP); } else if (!link_check && (adapter->link_active == 1)) { if_setbaudrate(ifp, 0); adapter->link_speed = 0; adapter->link_duplex = 0; if (bootverbose) device_printf(dev, "Link is Down\n"); adapter->link_active = 0; /* Link down, disable hang detection */ for (int i = 0; i < adapter->num_queues; i++, txr++) txr->busy = EM_TX_IDLE; if_link_state_change(ifp, LINK_STATE_DOWN); } } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * * This routine should always be called with BOTH the CORE * and TX locks. **********************************************************************/ static void em_stop(void *arg) { struct adapter *adapter = arg; if_t ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; EM_CORE_LOCK_ASSERT(adapter); INIT_DEBUGOUT("em_stop: begin"); em_disable_intr(adapter); callout_stop(&adapter->timer); /* Tell the stack that the interface is no longer active */ if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); /* Disarm Hang Detection. */ for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); txr->busy = EM_TX_IDLE; EM_TX_UNLOCK(txr); } /* I219 needs some special flushing to avoid hangs */ if (adapter->hw.mac.type == e1000_pch_spt) em_flush_desc_rings(adapter); e1000_reset_hw(&adapter->hw); E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void em_identify_hardware(struct adapter *adapter) { device_t dev = adapter->dev; /* Make sure our PCI config space has the necessary stuff set */ pci_enable_busmaster(dev); adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Do Shared Code Init and Setup */ if (e1000_set_mac_type(&adapter->hw)) { device_printf(dev, "Setup init failure\n"); return; } } static int em_allocate_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; int rid; rid = PCIR_BAR(0); adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->memory == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->memory); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->memory); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; adapter->hw.back = &adapter->osdep; return (0); } /********************************************************************* * * Setup the Legacy or MSI Interrupt handler * **********************************************************************/ int em_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; int error, rid = 0; /* Manually turn off all interrupts */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); if (adapter->msix == 1) /* using MSI */ rid = 1; /* We allocate a single interrupt resource */ adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "interrupt\n"); return (ENXIO); } /* * Allocate a fast interrupt and the associated * deferred processing contexts. */ TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter); adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); /* Use a TX only tasklet for local timer */ TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, taskqueue_thread_enqueue, &txr->tq); taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq", device_get_nameunit(adapter->dev)); TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter); if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET, em_irq_fast, NULL, adapter, &adapter->tag)) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); taskqueue_free(adapter->tq); adapter->tq = NULL; return (error); } return (0); } /********************************************************************* * * Setup the MSIX Interrupt handlers * This is not really Multiqueue, rather * its just separate interrupt vectors * for TX, RX, and Link. * **********************************************************************/ int em_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; int error, rid, vector = 0; int cpu_id = 0; /* Make sure all interrupts are disabled */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); /* First set up ring resources */ for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) { /* RX ring */ rid = vector + 1; rxr->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (rxr->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "RX MSIX Interrupt %d\n", i); return (ENXIO); } if ((error = bus_setup_intr(dev, rxr->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx, rxr, &rxr->tag)) != 0) { device_printf(dev, "Failed to register RX handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i); #endif rxr->msix = vector; if (em_last_bind_cpu < 0) em_last_bind_cpu = CPU_FIRST(); cpu_id = em_last_bind_cpu; bus_bind_intr(dev, rxr->res, cpu_id); TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr); rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT, taskqueue_thread_enqueue, &rxr->tq); taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)", device_get_nameunit(adapter->dev), cpu_id); /* ** Set the bit to enable interrupt ** in E1000_IMS -- bits 20 and 21 ** are for RX0 and RX1, note this has ** NOTHING to do with the MSIX vector */ rxr->ims = 1 << (20 + i); adapter->ims |= rxr->ims; adapter->ivars |= (8 | rxr->msix) << (i * 4); em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); } for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) { /* TX ring */ rid = vector + 1; txr->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (txr->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "TX MSIX Interrupt %d\n", i); return (ENXIO); } if ((error = bus_setup_intr(dev, txr->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx, txr, &txr->tag)) != 0) { device_printf(dev, "Failed to register TX handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i); #endif txr->msix = vector; if (em_last_bind_cpu < 0) em_last_bind_cpu = CPU_FIRST(); cpu_id = em_last_bind_cpu; bus_bind_intr(dev, txr->res, cpu_id); TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, taskqueue_thread_enqueue, &txr->tq); taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)", device_get_nameunit(adapter->dev), cpu_id); /* ** Set the bit to enable interrupt ** in E1000_IMS -- bits 22 and 23 ** are for TX0 and TX1, note this has ** NOTHING to do with the MSIX vector */ txr->ims = 1 << (22 + i); adapter->ims |= txr->ims; adapter->ivars |= (8 | txr->msix) << (8 + (i * 4)); em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); } /* Link interrupt */ rid = vector + 1; adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!adapter->res) { device_printf(dev,"Unable to allocate " "bus resource: Link interrupt [%d]\n", rid); return (ENXIO); } /* Set the link handler function */ error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_link, adapter, &adapter->tag); if (error) { adapter->res = NULL; device_printf(dev, "Failed to register LINK handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); #endif adapter->linkvec = vector; adapter->ivars |= (8 | vector) << 16; adapter->ivars |= 0x80000000; return (0); } static void em_free_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr; struct rx_ring *rxr; int rid; /* ** Release all the queue interrupt resources: */ for (int i = 0; i < adapter->num_queues; i++) { txr = &adapter->tx_rings[i]; /* an early abort? */ if (txr == NULL) break; rid = txr->msix +1; if (txr->tag != NULL) { bus_teardown_intr(dev, txr->res, txr->tag); txr->tag = NULL; } if (txr->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, txr->res); rxr = &adapter->rx_rings[i]; /* an early abort? */ if (rxr == NULL) break; rid = rxr->msix +1; if (rxr->tag != NULL) { bus_teardown_intr(dev, rxr->res, rxr->tag); rxr->tag = NULL; } if (rxr->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, rxr->res); } if (adapter->linkvec) /* we are doing MSIX */ rid = adapter->linkvec + 1; else (adapter->msix != 0) ? (rid = 1):(rid = 0); if (adapter->tag != NULL) { bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); if (adapter->msix) pci_release_msi(dev); if (adapter->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); if (adapter->memory != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->memory); if (adapter->flash != NULL) bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH, adapter->flash); } /* * Setup MSI or MSI/X */ static int em_setup_msix(struct adapter *adapter) { device_t dev = adapter->dev; int val; /* Nearly always going to use one queue */ adapter->num_queues = 1; /* ** Try using MSI-X for Hartwell adapters */ if ((adapter->hw.mac.type == e1000_82574) && (em_enable_msix == TRUE)) { #ifdef EM_MULTIQUEUE adapter->num_queues = (em_num_queues == 1) ? 1 : 2; if (adapter->num_queues > 1) em_enable_vectors_82574(adapter); #endif /* Map the MSIX BAR */ int rid = PCIR_BAR(EM_MSIX_BAR); adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->msix_mem == NULL) { /* May not be enabled */ device_printf(adapter->dev, "Unable to map MSIX table \n"); goto msi; } val = pci_msix_count(dev); #ifdef EM_MULTIQUEUE /* We need 5 vectors in the multiqueue case */ if (adapter->num_queues > 1 ) { if (val >= 5) val = 5; else { adapter->num_queues = 1; device_printf(adapter->dev, "Insufficient MSIX vectors for >1 queue, " "using single queue...\n"); goto msix_one; } } else { msix_one: #endif if (val >= 3) val = 3; else { device_printf(adapter->dev, "Insufficient MSIX vectors, using MSI\n"); goto msi; } #ifdef EM_MULTIQUEUE } #endif if ((pci_alloc_msix(dev, &val) == 0)) { device_printf(adapter->dev, "Using MSIX interrupts " "with %d vectors\n", val); return (val); } /* ** If MSIX alloc failed or provided us with ** less than needed, free and fall through to MSI */ pci_release_msi(dev); } msi: if (adapter->msix_mem != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); adapter->msix_mem = NULL; } val = 1; if (pci_alloc_msi(dev, &val) == 0) { device_printf(adapter->dev, "Using an MSI interrupt\n"); return (val); } /* Should only happen due to manual configuration */ device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n"); return (0); } /* ** The 3 following flush routines are used as a workaround in the ** I219 client parts and only for them. ** ** em_flush_tx_ring - remove all descriptors from the tx_ring ** ** We want to clear all pending descriptors from the TX ring. ** zeroing happens when the HW reads the regs. We assign the ring itself as ** the data of the next descriptor. We don't care about the data we are about ** to reset the HW. */ static void em_flush_tx_ring(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct tx_ring *txr = adapter->tx_rings; struct e1000_tx_desc *txd; u32 tctl, txd_lower = E1000_TXD_CMD_IFCS; u16 size = 512; tctl = E1000_READ_REG(hw, E1000_TCTL); E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN); txd = &txr->tx_base[txr->next_avail_desc++]; if (txr->next_avail_desc == adapter->num_tx_desc) txr->next_avail_desc = 0; /* Just use the ring as a dummy buffer addr */ txd->buffer_addr = txr->txdma.dma_paddr; txd->lower.data = htole32(txd_lower | size); txd->upper.data = 0; /* flush descriptors to memory before notifying the HW */ wmb(); E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc); mb(); usec_delay(250); } /* ** em_flush_rx_ring - remove all descriptors from the rx_ring ** ** Mark all descriptors in the RX ring as consumed and disable the rx ring */ static void em_flush_rx_ring(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 rctl, rxdctl; rctl = E1000_READ_REG(hw, E1000_RCTL); E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); E1000_WRITE_FLUSH(hw); usec_delay(150); rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); /* zero the lower 14 bits (prefetch and host thresholds) */ rxdctl &= 0xffffc000; /* * update thresholds: prefetch threshold to 31, host threshold to 1 * and make sure the granularity is "descriptors" and not "cache lines" */ rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC); E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl); /* momentarily enable the RX ring for the changes to take effect */ E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN); E1000_WRITE_FLUSH(hw); usec_delay(150); E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); } /* ** em_flush_desc_rings - remove all descriptors from the descriptor rings ** ** In i219, the descriptor rings must be emptied before resetting the HW ** or before changing the device state to D3 during runtime (runtime PM). ** ** Failure to do this will cause the HW to enter a unit hang state which can ** only be released by PCI reset on the device ** */ static void em_flush_desc_rings(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; device_t dev = adapter->dev; u16 hang_state; u32 fext_nvm11, tdlen; /* First, disable MULR fix in FEXTNVM11 */ fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11); fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX; E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11); /* do nothing if we're not in faulty state, or if the queue is empty */ tdlen = E1000_READ_REG(hw, E1000_TDLEN(0)); hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2); if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen) return; em_flush_tx_ring(adapter); /* recheck, maybe the fault is caused by the rx ring */ hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2); if (hang_state & FLUSH_DESC_REQUIRED) em_flush_rx_ring(adapter); } /********************************************************************* * * Initialize the hardware to a configuration * as specified by the adapter structure. * **********************************************************************/ static void em_reset(struct adapter *adapter) { device_t dev = adapter->dev; if_t ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u16 rx_buffer_size; u32 pba; INIT_DEBUGOUT("em_reset: begin"); /* Set up smart power down as default off on newer adapters. */ if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 || hw->mac.type == e1000_82572)) { u16 phy_tmp = 0; /* Speed up time to link by disabling smart power down. */ e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp); phy_tmp &= ~IGP02E1000_PM_SPD; e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp); } /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. */ switch (hw->mac.type) { /* Total Packet Buffer on these is 48K */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */ break; case e1000_82573: /* 82573: Total Packet Buffer is 32K */ pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */ break; case e1000_82574: case e1000_82583: pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */ break; case e1000_ich8lan: pba = E1000_PBA_8K; break; case e1000_ich9lan: case e1000_ich10lan: /* Boost Receive side for jumbo frames */ if (adapter->hw.mac.max_frame_size > 4096) pba = E1000_PBA_14K; else pba = E1000_PBA_10K; break; case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: pba = E1000_PBA_26K; break; default: if (adapter->hw.mac.max_frame_size > 8192) pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ else pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ } E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. Here we use an arbitrary value of 1500 which will * restart after one full frame is pulled from the buffer. There * could be several smaller frames in the buffer and if so they will * not trigger the XON until their total number reduces the buffer * by 1500. * - The pause time is fairly large at 1000 x 512ns = 512 usec. */ rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 ); hw->fc.high_water = rx_buffer_size - roundup2(adapter->hw.mac.max_frame_size, 1024); hw->fc.low_water = hw->fc.high_water - 1500; if (adapter->fc) /* locally set flow control value? */ hw->fc.requested_mode = adapter->fc; else hw->fc.requested_mode = e1000_fc_full; if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; else hw->fc.pause_time = EM_FC_PAUSE_TIME; hw->fc.send_xon = TRUE; /* Device specific overrides/settings */ switch (hw->mac.type) { case e1000_pchlan: /* Workaround: no TX flow ctrl for PCH */ hw->fc.requested_mode = e1000_fc_rx_pause; hw->fc.pause_time = 0xFFFF; /* override */ if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x3500; hw->fc.low_water = 0x1500; } else { hw->fc.high_water = 0x5000; hw->fc.low_water = 0x3000; } hw->fc.refresh_time = 0x1000; break; case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: hw->fc.high_water = 0x5C20; hw->fc.low_water = 0x5048; hw->fc.pause_time = 0x0650; hw->fc.refresh_time = 0x0400; /* Jumbos need adjusted PBA */ if (if_getmtu(ifp) > ETHERMTU) E1000_WRITE_REG(hw, E1000_PBA, 12); else E1000_WRITE_REG(hw, E1000_PBA, 26); break; case e1000_ich9lan: case e1000_ich10lan: if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x2800; hw->fc.low_water = hw->fc.high_water - 8; break; } /* else fall thru */ default: if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; break; } /* I219 needs some special flushing to avoid hangs */ if (hw->mac.type == e1000_pch_spt) em_flush_desc_rings(adapter); /* Issue a global reset */ e1000_reset_hw(hw); E1000_WRITE_REG(hw, E1000_WUC, 0); em_disable_aspm(adapter); /* and a re-init */ if (e1000_init_hw(hw) < 0) { device_printf(dev, "Hardware Initialization Failed\n"); return; } E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); return; } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int em_setup_interface(device_t dev, struct adapter *adapter) { if_t ifp; INIT_DEBUGOUT("em_setup_interface: begin"); ifp = adapter->ifp = if_gethandle(IFT_ETHER); if (ifp == 0) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setdev(ifp, dev); if_setinitfn(ifp, em_init); if_setsoftc(ifp, adapter); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); if_setioctlfn(ifp, em_ioctl); if_setgetcounterfn(ifp, em_get_counter); /* TSO parameters */ ifp->if_hw_tsomax = IP_MAXPACKET; /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */ ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5; ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE; #ifdef EM_MULTIQUEUE /* Multiqueue stack interface */ if_settransmitfn(ifp, em_mq_start); if_setqflushfn(ifp, em_qflush); #else if_setstartfn(ifp, em_start); if_setsendqlen(ifp, adapter->num_tx_desc - 1); if_setsendqready(ifp); #endif ether_ifattach(ifp, adapter->hw.mac.addr); if_setcapabilities(ifp, 0); if_setcapenable(ifp, 0); if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4, 0); /* * Tell the upper layer(s) we * support full VLAN capability */ if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU, 0); if_setcapenable(ifp, if_getcapabilities(ifp)); /* ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the em driver you can ** enable this and get full hardware tag filtering. */ if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0); #ifdef DEVICE_POLLING if_setcapabilitiesbit(ifp, IFCAP_POLLING,0); #endif /* Enable only WOL MAGIC by default */ if (adapter->wol) { if_setcapabilitiesbit(ifp, IFCAP_WOL, 0); if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0); } /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, em_media_change, em_media_status); if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { u_char fiber_type = IFM_1000_SX; /* default type */ ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL); } else { ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } /* * Manage DMA'able memory. */ static void em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs[0].ds_addr; } static int em_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma, int mapflags) { int error; error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ EM_DBA_ALIGN, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->dma_tag); if (error) { device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n", __func__, error); goto fail_0; } error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); if (error) { device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, error); goto fail_2; } dma->dma_paddr = 0; error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (error || dma->dma_paddr == 0) { device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n", __func__, error); goto fail_3; } return (0); fail_3: bus_dmamap_unload(dma->dma_tag, dma->dma_map); fail_2: bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); fail_0: dma->dma_tag = NULL; return (error); } static void em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma) { if (dma->dma_tag == NULL) return; if (dma->dma_paddr != 0) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); dma->dma_paddr = 0; } if (dma->dma_vaddr != NULL) { bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); dma->dma_vaddr = NULL; } bus_dma_tag_destroy(dma->dma_tag); dma->dma_tag = NULL; } /********************************************************************* * * Allocate memory for the transmit and receive rings, and then * the descriptors associated with each, called only once at attach. * **********************************************************************/ static int em_allocate_queues(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = NULL; struct rx_ring *rxr = NULL; int rsize, tsize, error = E1000_SUCCESS; int txconf = 0, rxconf = 0; /* Allocate the TX ring struct memory */ if (!(adapter->tx_rings = (struct tx_ring *) malloc(sizeof(struct tx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); error = ENOMEM; goto fail; } /* Now allocate the RX */ if (!(adapter->rx_rings = (struct rx_ring *) malloc(sizeof(struct rx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); error = ENOMEM; goto rx_fail; } tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); /* * Now set up the TX queues, txconf is needed to handle the * possibility that things fail midcourse and we need to * undo memory gracefully */ for (int i = 0; i < adapter->num_queues; i++, txconf++) { /* Set up some basics */ txr = &adapter->tx_rings[i]; txr->adapter = adapter; txr->me = i; /* Initialize the TX lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); if (em_dma_malloc(adapter, tsize, &txr->txdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto err_tx_desc; } txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr; bzero((void *)txr->tx_base, tsize); if (em_allocate_transmit_buffers(txr)) { device_printf(dev, "Critical Failure setting up transmit buffers\n"); error = ENOMEM; goto err_tx_desc; } #if __FreeBSD_version >= 800000 /* Allocate a buf ring */ txr->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &txr->tx_mtx); #endif } /* * Next the RX queues... */ rsize = roundup2(adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); for (int i = 0; i < adapter->num_queues; i++, rxconf++) { rxr = &adapter->rx_rings[i]; rxr->adapter = adapter; rxr->me = i; /* Initialize the RX lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); if (em_dma_malloc(adapter, rsize, &rxr->rxdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate RxDescriptor memory\n"); error = ENOMEM; goto err_rx_desc; } rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr; bzero((void *)rxr->rx_base, rsize); /* Allocate receive buffers for the ring*/ if (em_allocate_receive_buffers(rxr)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); error = ENOMEM; goto err_rx_desc; } } return (0); err_rx_desc: for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) em_dma_free(adapter, &rxr->rxdma); err_tx_desc: for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) em_dma_free(adapter, &txr->txdma); free(adapter->rx_rings, M_DEVBUF); rx_fail: #if __FreeBSD_version >= 800000 buf_ring_free(txr->br, M_DEVBUF); #endif free(adapter->tx_rings, M_DEVBUF); fail: return (error); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int em_allocate_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; device_t dev = adapter->dev; struct em_txbuffer *txbuf; int error, i; /* * Setup DMA descriptor areas. */ if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ EM_TSO_SIZE, /* maxsize */ EM_MAX_SCATTER, /* nsegments */ PAGE_SIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->txtag))) { device_printf(dev,"Unable to allocate TX DMA tag\n"); goto fail; } if (!(txr->tx_buffers = (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); if (error != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } return 0; fail: /* We free all, it handles case where we are in the middle */ em_free_transmit_structures(adapter); return (error); } /********************************************************************* * * Initialize a transmit ring. * **********************************************************************/ static void em_setup_transmit_ring(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct em_txbuffer *txbuf; int i; #ifdef DEV_NETMAP struct netmap_slot *slot; struct netmap_adapter *na = netmap_getna(adapter->ifp); #endif /* DEV_NETMAP */ /* Clear the old descriptor contents */ EM_TX_LOCK(txr); #ifdef DEV_NETMAP slot = netmap_reset(na, NR_TX, txr->me, 0); #endif /* DEV_NETMAP */ bzero((void *)txr->tx_base, (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc); /* Reset indices */ txr->next_avail_desc = 0; txr->next_to_clean = 0; /* Free any existing tx buffers. */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; } #ifdef DEV_NETMAP if (slot) { int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); uint64_t paddr; void *addr; addr = PNMB(na, slot + si, &paddr); txr->tx_base[i].buffer_addr = htole64(paddr); /* reload the map for netmap mode */ netmap_load_map(na, txr->txtag, txbuf->map, addr); } #endif /* DEV_NETMAP */ /* clear the watch index */ txbuf->next_eop = -1; } /* Set number of descriptors available */ txr->tx_avail = adapter->num_tx_desc; txr->busy = EM_TX_IDLE; /* Clear checksum offload context. */ txr->last_hw_offload = 0; txr->last_hw_ipcss = 0; txr->last_hw_ipcso = 0; txr->last_hw_tucss = 0; txr->last_hw_tucso = 0; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); EM_TX_UNLOCK(txr); } /********************************************************************* * * Initialize all transmit rings. * **********************************************************************/ static void em_setup_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) em_setup_transmit_ring(txr); return; } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void em_initialize_transmit_unit(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct e1000_hw *hw = &adapter->hw; u32 tctl, txdctl = 0, tarc, tipg = 0; INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); for (int i = 0; i < adapter->num_queues; i++, txr++) { u64 bus_addr = txr->txdma.dma_paddr; /* Base and Len of TX Ring */ E1000_WRITE_REG(hw, E1000_TDLEN(i), adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(hw, E1000_TDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_TDBAL(i), (u32)bus_addr); /* Init the HEAD/TAIL indices */ E1000_WRITE_REG(hw, E1000_TDT(i), 0); E1000_WRITE_REG(hw, E1000_TDH(i), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)), E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); txr->busy = EM_TX_IDLE; txdctl = 0; /* clear txdctl */ txdctl |= 0x1f; /* PTHRESH */ txdctl |= 1 << 8; /* HTHRESH */ txdctl |= 1 << 16;/* WTHRESH */ txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */ txdctl |= E1000_TXDCTL_GRAN; txdctl |= 1 << 25; /* LWTHRESH */ E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); } /* Set the default values for the Tx Inter Packet Gap timer */ switch (adapter->hw.mac.type) { case e1000_80003es2lan: tipg = DEFAULT_82543_TIPG_IPGR1; tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; default: if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) tipg = DEFAULT_82543_TIPG_IPGT_FIBER; else tipg = DEFAULT_82543_TIPG_IPGT_COPPER; tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; } E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); if(adapter->hw.mac.type >= e1000_82540) E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value); if ((adapter->hw.mac.type == e1000_82571) || (adapter->hw.mac.type == e1000_82572)) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= TARC_SPEED_MODE_BIT; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } else if (adapter->hw.mac.type == e1000_80003es2lan) { /* errata: program both queues to unweighted RR */ tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } else if (adapter->hw.mac.type == e1000_82574) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= TARC_ERRATA_BIT; if ( adapter->num_queues > 1) { tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX); E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } else E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } adapter->txd_cmd = E1000_TXD_CMD_IFCS; if (adapter->tx_int_delay.value > 0) adapter->txd_cmd |= E1000_TXD_CMD_IDE; /* Program the Transmit Control Register */ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); if (adapter->hw.mac.type >= e1000_82571) tctl |= E1000_TCTL_MULR; /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); if (hw->mac.type == e1000_pch_spt) { u32 reg; reg = E1000_READ_REG(hw, E1000_IOSFPC); reg |= E1000_RCTL_RDMTS_HEX; E1000_WRITE_REG(hw, E1000_IOSFPC, reg); reg = E1000_READ_REG(hw, E1000_TARC(0)); reg |= E1000_TARC0_CB_MULTIQ_3_REQ; E1000_WRITE_REG(hw, E1000_TARC(0), reg); } } /********************************************************************* * * Free all transmit rings. * **********************************************************************/ static void em_free_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) { EM_TX_LOCK(txr); em_free_transmit_buffers(txr); em_dma_free(adapter, &txr->txdma); EM_TX_UNLOCK(txr); EM_TX_LOCK_DESTROY(txr); } free(adapter->tx_rings, M_DEVBUF); } /********************************************************************* * * Free transmit ring related data structures. * **********************************************************************/ static void em_free_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct em_txbuffer *txbuf; INIT_DEBUGOUT("free_transmit_ring: begin"); if (txr->tx_buffers == NULL) return; for (int i = 0; i < adapter->num_tx_desc; i++) { txbuf = &txr->tx_buffers[i]; if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; if (txbuf->map != NULL) { bus_dmamap_destroy(txr->txtag, txbuf->map); txbuf->map = NULL; } } else if (txbuf->map != NULL) { bus_dmamap_unload(txr->txtag, txbuf->map); bus_dmamap_destroy(txr->txtag, txbuf->map); txbuf->map = NULL; } } #if __FreeBSD_version >= 800000 if (txr->br != NULL) buf_ring_free(txr->br, M_DEVBUF); #endif if (txr->tx_buffers != NULL) { free(txr->tx_buffers, M_DEVBUF); txr->tx_buffers = NULL; } if (txr->txtag != NULL) { bus_dma_tag_destroy(txr->txtag); txr->txtag = NULL; } return; } /********************************************************************* * The offload context is protocol specific (TCP/UDP) and thus * only needs to be set when the protocol changes. The occasion * of a context change can be a performance detriment, and * might be better just disabled. The reason arises in the way * in which the controller supports pipelined requests from the * Tx data DMA. Up to four requests can be pipelined, and they may * belong to the same packet or to multiple packets. However all * requests for one packet are issued before a request is issued * for a subsequent packet and if a request for the next packet * requires a context change, that request will be stalled * until the previous request completes. This means setting up * a new context effectively disables pipelined Tx data DMA which * in turn greatly slow down performance to send small sized * frames. **********************************************************************/ static void em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, struct ip *ip, u32 *txd_upper, u32 *txd_lower) { struct adapter *adapter = txr->adapter; struct e1000_context_desc *TXD = NULL; struct em_txbuffer *tx_buffer; int cur, hdr_len; u32 cmd = 0; u16 offload = 0; u8 ipcso, ipcss, tucso, tucss; ipcss = ipcso = tucss = tucso = 0; hdr_len = ip_off + (ip->ip_hl << 2); cur = txr->next_avail_desc; /* Setup of IP header checksum. */ if (mp->m_pkthdr.csum_flags & CSUM_IP) { *txd_upper |= E1000_TXD_POPTS_IXSM << 8; offload |= CSUM_IP; ipcss = ip_off; ipcso = ip_off + offsetof(struct ip, ip_sum); /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; TXD->lower_setup.ip_fields.ipcss = ipcss; TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len); TXD->lower_setup.ip_fields.ipcso = ipcso; cmd |= E1000_TXD_CMD_IP; } if (mp->m_pkthdr.csum_flags & CSUM_TCP) { *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; offload |= CSUM_TCP; tucss = hdr_len; tucso = hdr_len + offsetof(struct tcphdr, th_sum); /* * The 82574L can only remember the *last* context used * regardless of queue that it was use for. We cannot reuse * contexts on this hardware platform and must generate a new * context every time. 82574L hardware spec, section 7.2.6, * second note. */ if (adapter->num_queues < 2) { /* * Setting up new checksum offload context for every * frames takes a lot of processing time for hardware. * This also reduces performance a lot for small sized * frames so avoid it if driver can use previously * configured checksum offload context. */ if (txr->last_hw_offload == offload) { if (offload & CSUM_IP) { if (txr->last_hw_ipcss == ipcss && txr->last_hw_ipcso == ipcso && txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } else { if (txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } } txr->last_hw_offload = offload; txr->last_hw_tucss = tucss; txr->last_hw_tucso = tucso; } /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; TXD->upper_setup.tcp_fields.tucss = hdr_len; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = tucso; cmd |= E1000_TXD_CMD_TCP; } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) { *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; tucss = hdr_len; tucso = hdr_len + offsetof(struct udphdr, uh_sum); /* * The 82574L can only remember the *last* context used * regardless of queue that it was use for. We cannot reuse * contexts on this hardware platform and must generate a new * context every time. 82574L hardware spec, section 7.2.6, * second note. */ if (adapter->num_queues < 2) { /* * Setting up new checksum offload context for every * frames takes a lot of processing time for hardware. * This also reduces performance a lot for small sized * frames so avoid it if driver can use previously * configured checksum offload context. */ if (txr->last_hw_offload == offload) { if (offload & CSUM_IP) { if (txr->last_hw_ipcss == ipcss && txr->last_hw_ipcso == ipcso && txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } else { if (txr->last_hw_tucss == tucss && txr->last_hw_tucso == tucso) return; } } txr->last_hw_offload = offload; txr->last_hw_tucss = tucss; txr->last_hw_tucso = tucso; } /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; TXD->upper_setup.tcp_fields.tucss = tucss; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = tucso; } if (offload & CSUM_IP) { txr->last_hw_ipcss = ipcss; txr->last_hw_ipcso = ipcso; } TXD->tcp_seg_setup.data = htole32(0); TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd); tx_buffer = &txr->tx_buffers[cur]; tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; if (++cur == adapter->num_tx_desc) cur = 0; txr->tx_avail--; txr->next_avail_desc = cur; } /********************************************************************** * * Setup work for hardware segmentation offload (TSO) * **********************************************************************/ static void em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off, struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower) { struct adapter *adapter = txr->adapter; struct e1000_context_desc *TXD; struct em_txbuffer *tx_buffer; int cur, hdr_len; /* * In theory we can use the same TSO context if and only if * frame is the same type(IP/TCP) and the same MSS. However * checking whether a frame has the same IP/TCP structure is * hard thing so just ignore that and always restablish a * new TSO context. */ hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2); *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */ E1000_TXD_DTYP_D | /* Data descr type */ E1000_TXD_CMD_TSE); /* Do TSE on this packet */ /* IP and/or TCP header checksum calculation and insertion. */ *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8; cur = txr->next_avail_desc; tx_buffer = &txr->tx_buffers[cur]; TXD = (struct e1000_context_desc *) &txr->tx_base[cur]; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place put the checksum. */ TXD->lower_setup.ip_fields.ipcss = ip_off; TXD->lower_setup.ip_fields.ipcse = htole16(ip_off + (ip->ip_hl << 2) - 1); TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum); /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. * Offset of place to put the checksum. */ TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2); TXD->upper_setup.tcp_fields.tucse = 0; TXD->upper_setup.tcp_fields.tucso = ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum); /* * Payload size per packet w/o any headers. * Length of all headers up to payload. */ TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz); TXD->tcp_seg_setup.fields.hdr_len = hdr_len; TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | /* Extended descr */ E1000_TXD_CMD_TSE | /* TSE context */ E1000_TXD_CMD_IP | /* Do IP csum */ E1000_TXD_CMD_TCP | /* Do TCP checksum */ (mp->m_pkthdr.len - (hdr_len))); /* Total len */ tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; if (++cur == adapter->num_tx_desc) cur = 0; txr->tx_avail--; txr->next_avail_desc = cur; txr->tx_tso = TRUE; } /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * **********************************************************************/ static void em_txeof(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; int first, last, done, processed; struct em_txbuffer *tx_buffer; struct e1000_tx_desc *tx_desc, *eop_desc; if_t ifp = adapter->ifp; EM_TX_LOCK_ASSERT(txr); #ifdef DEV_NETMAP if (netmap_tx_irq(ifp, txr->me)) return; #endif /* DEV_NETMAP */ /* No work, make sure hang detection is disabled */ if (txr->tx_avail == adapter->num_tx_desc) { txr->busy = EM_TX_IDLE; return; } processed = 0; first = txr->next_to_clean; tx_desc = &txr->tx_base[first]; tx_buffer = &txr->tx_buffers[first]; last = tx_buffer->next_eop; eop_desc = &txr->tx_base[last]; /* * What this does is get the index of the * first descriptor AFTER the EOP of the * first packet, that way we can do the * simple comparison on the inner while loop. */ if (++last == adapter->num_tx_desc) last = 0; done = last; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) { /* We clean the range of the packet */ while (first != done) { tx_desc->upper.data = 0; tx_desc->lower.data = 0; tx_desc->buffer_addr = 0; ++txr->tx_avail; ++processed; if (tx_buffer->m_head) { bus_dmamap_sync(txr->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; } tx_buffer->next_eop = -1; if (++first == adapter->num_tx_desc) first = 0; tx_buffer = &txr->tx_buffers[first]; tx_desc = &txr->tx_base[first]; } if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* See if we can continue to the next packet */ last = tx_buffer->next_eop; if (last != -1) { eop_desc = &txr->tx_base[last]; /* Get new done point */ if (++last == adapter->num_tx_desc) last = 0; done = last; } else break; } bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); txr->next_to_clean = first; /* ** Hang detection: we know there's work outstanding ** or the entry return would have been taken, so no ** descriptor processed here indicates a potential hang. ** The local timer will examine this and do a reset if needed. */ if (processed == 0) { if (txr->busy != EM_TX_HUNG) ++txr->busy; } else /* At least one descriptor was cleaned */ txr->busy = EM_TX_BUSY; /* note this clears HUNG */ /* * If we have a minimum free, clear IFF_DRV_OACTIVE * to tell the stack that it is OK to send packets. * Notice that all writes of OACTIVE happen under the * TX lock which, with a single queue, guarantees * sanity. */ if (txr->tx_avail >= EM_MAX_SCATTER) { if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); } /* Disable hang detection if all clean */ if (txr->tx_avail == adapter->num_tx_desc) txr->busy = EM_TX_IDLE; } /********************************************************************* * * Refresh RX descriptor mbufs from system mbuf buffer pool. * **********************************************************************/ static void em_refresh_mbufs(struct rx_ring *rxr, int limit) { struct adapter *adapter = rxr->adapter; struct mbuf *m; bus_dma_segment_t segs; struct em_rxbuffer *rxbuf; int i, j, error, nsegs; bool cleaned = FALSE; i = j = rxr->next_to_refresh; /* ** Get one descriptor beyond ** our work mark to control ** the loop. */ if (++j == adapter->num_rx_desc) j = 0; while (j != limit) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head == NULL) { m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); /* ** If we have a temporary resource shortage ** that causes a failure, just abort refresh ** for now, we will return to this point when ** reinvoked from em_rxeof. */ if (m == NULL) goto update; } else m = rxbuf->m_head; m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz; m->m_flags |= M_PKTHDR; m->m_data = m->m_ext.ext_buf; /* Use bus_dma machinery to setup the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map, m, &segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("Refresh mbufs: hdr dmamap load" " failure - %d\n", error); m_free(m); rxbuf->m_head = NULL; goto update; } rxbuf->m_head = m; rxbuf->paddr = segs.ds_addr; bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD); em_setup_rxdesc(&rxr->rx_base[i], rxbuf); cleaned = TRUE; i = j; /* Next is precalulated for us */ rxr->next_to_refresh = i; /* Calculate next controlling index */ if (++j == adapter->num_rx_desc) j = 0; } update: /* ** Update the tail pointer only if, ** and as far as we have refreshed. */ if (cleaned) E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->next_to_refresh); return; } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int em_allocate_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; device_t dev = adapter->dev; struct em_rxbuffer *rxbuf; int error; rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) * adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); if (rxr->rx_buffers == NULL) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); return (ENOMEM); } error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &rxr->rxtag); if (error) { device_printf(dev, "%s: bus_dma_tag_create failed %d\n", __func__, error); goto fail; } rxbuf = rxr->rx_buffers; for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) { rxbuf = &rxr->rx_buffers[i]; error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map); if (error) { device_printf(dev, "%s: bus_dmamap_create failed: %d\n", __func__, error); goto fail; } } return (0); fail: em_free_receive_structures(adapter); return (error); } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int em_setup_receive_ring(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct em_rxbuffer *rxbuf; bus_dma_segment_t seg[1]; int rsize, nsegs, error = 0; #ifdef DEV_NETMAP struct netmap_slot *slot; struct netmap_adapter *na = netmap_getna(adapter->ifp); #endif /* Clear the ring contents */ EM_RX_LOCK(rxr); rsize = roundup2(adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); bzero((void *)rxr->rx_base, rsize); #ifdef DEV_NETMAP slot = netmap_reset(na, NR_RX, rxr->me, 0); #endif /* ** Free current RX buffer structs and their mbufs */ for (int i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->rxtag, rxbuf->map); m_freem(rxbuf->m_head); rxbuf->m_head = NULL; /* mark as freed */ } } /* Now replenish the mbufs */ for (int j = 0; j != adapter->num_rx_desc; ++j) { rxbuf = &rxr->rx_buffers[j]; #ifdef DEV_NETMAP if (slot) { int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j); uint64_t paddr; void *addr; addr = PNMB(na, slot + si, &paddr); netmap_load_map(na, rxr->rxtag, rxbuf->map, addr); em_setup_rxdesc(&rxr->rx_base[j], rxbuf); continue; } #endif /* DEV_NETMAP */ rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (rxbuf->m_head == NULL) { error = ENOBUFS; goto fail; } rxbuf->m_head->m_len = adapter->rx_mbuf_sz; rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */ rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map, rxbuf->m_head, seg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_freem(rxbuf->m_head); rxbuf->m_head = NULL; goto fail; } bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD); rxbuf->paddr = seg[0].ds_addr; em_setup_rxdesc(&rxr->rx_base[j], rxbuf); } rxr->next_to_check = 0; rxr->next_to_refresh = 0; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); fail: EM_RX_UNLOCK(rxr); return (error); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int em_setup_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; int q; for (q = 0; q < adapter->num_queues; q++, rxr++) if (em_setup_receive_ring(rxr)) goto fail; return (0); fail: /* * Free RX buffers allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'q' failed, so its the terminus. */ for (int i = 0; i < q; ++i) { rxr = &adapter->rx_rings[i]; for (int n = 0; n < adapter->num_rx_desc; n++) { struct em_rxbuffer *rxbuf; rxbuf = &rxr->rx_buffers[n]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->rxtag, rxbuf->map); m_freem(rxbuf->m_head); rxbuf->m_head = NULL; } } rxr->next_to_check = 0; rxr->next_to_refresh = 0; } return (ENOBUFS); } /********************************************************************* * * Free all receive rings. * **********************************************************************/ static void em_free_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; for (int i = 0; i < adapter->num_queues; i++, rxr++) { em_free_receive_buffers(rxr); /* Free the ring memory as well */ em_dma_free(adapter, &rxr->rxdma); EM_RX_LOCK_DESTROY(rxr); } free(adapter->rx_rings, M_DEVBUF); } /********************************************************************* * * Free receive ring data structures * **********************************************************************/ static void em_free_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct em_rxbuffer *rxbuf = NULL; INIT_DEBUGOUT("free_receive_buffers: begin"); if (rxr->rx_buffers != NULL) { for (int i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->map != NULL) { bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->rxtag, rxbuf->map); bus_dmamap_destroy(rxr->rxtag, rxbuf->map); } if (rxbuf->m_head != NULL) { m_freem(rxbuf->m_head); rxbuf->m_head = NULL; } } free(rxr->rx_buffers, M_DEVBUF); rxr->rx_buffers = NULL; rxr->next_to_check = 0; rxr->next_to_refresh = 0; } if (rxr->rxtag != NULL) { bus_dma_tag_destroy(rxr->rxtag); rxr->rxtag = NULL; } return; } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void em_initialize_receive_unit(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; if_t ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u32 rctl, rxcsum, rfctl; INIT_DEBUGOUT("em_initialize_receive_units: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(hw, E1000_RCTL); /* Do not disable if ever enabled on this hardware */ if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583)) E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Do not store bad packets */ rctl &= ~E1000_RCTL_SBP; /* Enable Long Packet receive */ if (if_getmtu(ifp) > ETHERMTU) rctl |= E1000_RCTL_LPE; else rctl &= ~E1000_RCTL_LPE; /* Strip the CRC */ if (!em_disable_crc_stripping) rctl |= E1000_RCTL_SECRC; E1000_WRITE_REG(&adapter->hw, E1000_RADV, adapter->rx_abs_int_delay.value); E1000_WRITE_REG(&adapter->hw, E1000_RDTR, adapter->rx_int_delay.value); /* * Set the interrupt throttling rate. Value is calculated * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */ E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); /* Use extended rx descriptor formats */ rfctl = E1000_READ_REG(hw, E1000_RFCTL); rfctl |= E1000_RFCTL_EXTEN; /* ** When using MSIX interrupts we need to throttle ** using the EITR register (82574 only) */ if (hw->mac.type == e1000_82574) { for (int i = 0; i < 4; i++) E1000_WRITE_REG(hw, E1000_EITR_82574(i), DEFAULT_ITR); /* Disable accelerated acknowledge */ rfctl |= E1000_RFCTL_ACK_DIS; } E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); if (if_getcapenable(ifp) & IFCAP_RXCSUM) { #ifdef EM_MULTIQUEUE rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL | E1000_RXCSUM_PCSD; #else rxcsum |= E1000_RXCSUM_TUOFL; #endif } else rxcsum &= ~E1000_RXCSUM_TUOFL; E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); #ifdef EM_MULTIQUEUE #define RSSKEYLEN 10 if (adapter->num_queues > 1) { uint8_t rss_key[4 * RSSKEYLEN]; uint32_t reta = 0; int i; /* * Configure RSS key */ arc4rand(rss_key, sizeof(rss_key), 0); for (i = 0; i < RSSKEYLEN; ++i) { uint32_t rssrk = 0; rssrk = EM_RSSRK_VAL(rss_key, i); E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk); } /* * Configure RSS redirect table in following fashion: * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] */ for (i = 0; i < sizeof(reta); ++i) { uint32_t q; q = (i % adapter->num_queues) << 7; reta |= q << (8 * i); } for (i = 0; i < 32; ++i) { E1000_WRITE_REG(hw, E1000_RETA(i), reta); } E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | E1000_MRQC_RSS_FIELD_IPV4_TCP | E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | E1000_MRQC_RSS_FIELD_IPV6_EX | E1000_MRQC_RSS_FIELD_IPV6); } #endif /* ** XXX TEMPORARY WORKAROUND: on some systems with 82573 ** long latencies are observed, like Lenovo X60. This ** change eliminates the problem, but since having positive ** values in RDTR is a known source of problems on other ** platforms another solution is being sought. */ if (hw->mac.type == e1000_82573) E1000_WRITE_REG(hw, E1000_RDTR, 0x20); for (int i = 0; i < adapter->num_queues; i++, rxr++) { /* Setup the Base and Length of the Rx Descriptor Ring */ u64 bus_addr = rxr->rxdma.dma_paddr; u32 rdt = adapter->num_rx_desc - 1; /* default */ E1000_WRITE_REG(hw, E1000_RDLEN(i), adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); /* Setup the Head and Tail Descriptor Pointers */ E1000_WRITE_REG(hw, E1000_RDH(i), 0); #ifdef DEV_NETMAP /* * an init() while a netmap client is active must * preserve the rx buffers passed to userspace. */ if (if_getcapenable(ifp) & IFCAP_NETMAP) { struct netmap_adapter *na = netmap_getna(adapter->ifp); rdt -= nm_kr_rxspace(&na->rx_rings[i]); } #endif /* DEV_NETMAP */ E1000_WRITE_REG(hw, E1000_RDT(i), rdt); } /* * Set PTHRESH for improved jumbo performance * According to 10.2.5.11 of Intel 82574 Datasheet, * RXDCTL(1) is written whenever RXDCTL(0) is written. * Only write to RXDCTL(1) if there is a need for different * settings. */ if (((adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_pch2lan) || (adapter->hw.mac.type == e1000_ich10lan)) && (if_getmtu(ifp) > ETHERMTU)) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); } else if (adapter->hw.mac.type == e1000_82574) { for (int i = 0; i < adapter->num_queues; i++) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= 0x20; /* PTHRESH */ rxdctl |= 4 << 8; /* HTHRESH */ rxdctl |= 4 << 16;/* WTHRESH */ rxdctl |= 1 << 24; /* Switch to granularity */ E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } } if (adapter->hw.mac.type >= e1000_pch2lan) { if (if_getmtu(ifp) > ETHERMTU) e1000_lv_jumbo_workaround_ich8lan(hw, TRUE); else e1000_lv_jumbo_workaround_ich8lan(hw, FALSE); } /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; if (adapter->rx_mbuf_sz == MCLBYTES) rctl |= E1000_RCTL_SZ_2048; else if (adapter->rx_mbuf_sz == MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; /* ensure we clear use DTYPE of 00 here */ rctl &= ~0x00000C00; /* Write out the settings */ E1000_WRITE_REG(hw, E1000_RCTL, rctl); return; } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * * For polling we also now return the number of cleaned packets *********************************************************************/ static bool em_rxeof(struct rx_ring *rxr, int count, int *done) { struct adapter *adapter = rxr->adapter; if_t ifp = adapter->ifp; struct mbuf *mp, *sendmp; u32 status = 0; u16 len; int i, processed, rxdone = 0; bool eop; union e1000_rx_desc_extended *cur; EM_RX_LOCK(rxr); /* Sync the ring */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, rxr->me, &processed)) { EM_RX_UNLOCK(rxr); return (FALSE); } #endif /* DEV_NETMAP */ for (i = rxr->next_to_check, processed = 0; count != 0;) { if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) break; cur = &rxr->rx_base[i]; status = le32toh(cur->wb.upper.status_error); mp = sendmp = NULL; if ((status & E1000_RXD_STAT_DD) == 0) break; len = le16toh(cur->wb.upper.length); eop = (status & E1000_RXD_STAT_EOP) != 0; if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) || (rxr->discard == TRUE)) { adapter->dropped_pkts++; ++rxr->rx_discarded; if (!eop) /* Catch subsequent segs */ rxr->discard = TRUE; else rxr->discard = FALSE; em_rx_discard(rxr, i); goto next_desc; } bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map); /* Assign correct length to the current fragment */ mp = rxr->rx_buffers[i].m_head; mp->m_len = len; /* Trigger for refresh */ rxr->rx_buffers[i].m_head = NULL; /* First segment? */ if (rxr->fmp == NULL) { mp->m_pkthdr.len = len; rxr->fmp = rxr->lmp = mp; } else { /* Chain mbuf's together */ mp->m_flags &= ~M_PKTHDR; rxr->lmp->m_next = mp; rxr->lmp = mp; rxr->fmp->m_pkthdr.len += len; } if (eop) { --count; sendmp = rxr->fmp; if_setrcvif(sendmp, ifp); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); em_receive_checksum(status, sendmp); #ifndef __NO_STRICT_ALIGNMENT if (adapter->hw.mac.max_frame_size > (MCLBYTES - ETHER_ALIGN) && em_fixup_rx(rxr) != 0) goto skip; #endif if (status & E1000_RXD_STAT_VP) { if_setvtag(sendmp, le16toh(cur->wb.upper.vlan)); sendmp->m_flags |= M_VLANTAG; } #ifndef __NO_STRICT_ALIGNMENT skip: #endif rxr->fmp = rxr->lmp = NULL; } next_desc: /* Sync the ring */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* Zero out the receive descriptors status. */ cur->wb.upper.status_error &= htole32(~0xFF); ++rxdone; /* cumulative for POLL */ ++processed; /* Advance our pointers to the next descriptor. */ if (++i == adapter->num_rx_desc) i = 0; /* Send to the stack */ if (sendmp != NULL) { rxr->next_to_check = i; EM_RX_UNLOCK(rxr); if_input(ifp, sendmp); EM_RX_LOCK(rxr); i = rxr->next_to_check; } /* Only refresh mbufs every 8 descriptors */ if (processed == 8) { em_refresh_mbufs(rxr, i); processed = 0; } } /* Catch any remaining refresh work */ if (e1000_rx_unrefreshed(rxr)) em_refresh_mbufs(rxr, i); rxr->next_to_check = i; if (done != NULL) *done = rxdone; EM_RX_UNLOCK(rxr); return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE); } static __inline void em_rx_discard(struct rx_ring *rxr, int i) { struct em_rxbuffer *rbuf; rbuf = &rxr->rx_buffers[i]; bus_dmamap_unload(rxr->rxtag, rbuf->map); /* Free any previous pieces */ if (rxr->fmp != NULL) { rxr->fmp->m_flags |= M_PKTHDR; m_freem(rxr->fmp); rxr->fmp = NULL; rxr->lmp = NULL; } /* ** Free buffer and allow em_refresh_mbufs() ** to clean up and recharge buffer. */ if (rbuf->m_head) { m_free(rbuf->m_head); rbuf->m_head = NULL; } return; } #ifndef __NO_STRICT_ALIGNMENT /* * When jumbo frames are enabled we should realign entire payload on * architecures with strict alignment. This is serious design mistake of 8254x * as it nullifies DMA operations. 8254x just allows RX buffer size to be * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its * payload. On architecures without strict alignment restrictions 8254x still * performs unaligned memory access which would reduce the performance too. * To avoid copying over an entire frame to align, we allocate a new mbuf and * copy ethernet header to the new mbuf. The new mbuf is prepended into the * existing mbuf chain. * * Be aware, best performance of the 8254x is achived only when jumbo frame is * not used at all on architectures with strict alignment. */ static int em_fixup_rx(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct mbuf *m, *n; int error; error = 0; m = rxr->fmp; if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); m->m_data += ETHER_HDR_LEN; } else { MGETHDR(n, M_NOWAIT, MT_DATA); if (n != NULL) { bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); m->m_data += ETHER_HDR_LEN; m->m_len -= ETHER_HDR_LEN; n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; rxr->fmp = n; } else { adapter->dropped_pkts++; m_freem(rxr->fmp); rxr->fmp = NULL; error = ENOMEM; } } return (error); } #endif static void em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf) { rxd->read.buffer_addr = htole64(rxbuf->paddr); /* DD bits must be cleared */ rxd->wb.upper.status_error= 0; } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void em_receive_checksum(uint32_t status, struct mbuf *mp) { mp->m_pkthdr.csum_flags = 0; /* Ignore Checksum bit is set */ if (status & E1000_RXD_STAT_IXSM) return; /* If the IP checksum exists and there is no IP Checksum error */ if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == E1000_RXD_STAT_IPCS) { mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); } /* TCP or UDP checksum */ if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == E1000_RXD_STAT_TCPCS) { mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); mp->m_pkthdr.csum_data = htons(0xffff); } if (status & E1000_RXD_STAT_UDPCS) { mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); mp->m_pkthdr.csum_data = htons(0xffff); } } /* * This routine is run via an vlan * config EVENT */ static void em_register_vlan(void *arg, if_t ifp, u16 vtag) { struct adapter *adapter = if_getsoftc(ifp); u32 index, bit; if ((void*)adapter != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */ return; EM_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } /* * This routine is run via an vlan * unconfig EVENT */ static void em_unregister_vlan(void *arg, if_t ifp, u16 vtag) { struct adapter *adapter = if_getsoftc(ifp); u32 index, bit; if (adapter != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; EM_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } static void em_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 reg; /* ** We get here thru init_locked, meaning ** a soft reset, this has already cleared ** the VFTA and other state, so if there ** have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* ** A soft reset zero's out the VFTA, so ** we need to repopulate it now. */ for (int i = 0; i < EM_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, i, adapter->shadow_vfta[i]); reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Enable the Filter Table */ reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } static void em_enable_intr(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 ims_mask = IMS_ENABLE_MASK; if (hw->mac.type == e1000_82574) { E1000_WRITE_REG(hw, EM_EIAC, adapter->ims); ims_mask |= adapter->ims; } E1000_WRITE_REG(hw, E1000_IMS, ims_mask); } static void em_disable_intr(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; if (hw->mac.type == e1000_82574) E1000_WRITE_REG(hw, EM_EIAC, 0); E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka * to disable special hardware management features */ static void em_init_manageability(struct adapter *adapter) { /* A shared code workaround */ #define E1000_82542_MANC2H E1000_MANC2H if (adapter->has_manage) { int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); /* enable receiving management packets to the host */ manc |= E1000_MANC_EN_MNG2HOST; #define E1000_MNG2HOST_PORT_623 (1 << 5) #define E1000_MNG2HOST_PORT_664 (1 << 6) manc2h |= E1000_MNG2HOST_PORT_623; manc2h |= E1000_MNG2HOST_PORT_664; E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void em_release_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; manc &= ~E1000_MANC_EN_MNG2HOST; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means * that the driver is loaded. For AMT version type f/w * this means that the network i/f is open. */ static void em_get_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm | E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); return; } /* * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is no longer loaded. For AMT versions of the * f/w this means that the network i/f is closed. */ static void em_release_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (!adapter->has_manage) return; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm & ~E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); return; } static int em_is_valid_ether_addr(u8 *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (FALSE); } return (TRUE); } /* ** Parse the interface capabilities with regard ** to both system management and wake-on-lan for ** later use. */ static void em_get_wakeup(device_t dev) { struct adapter *adapter = device_get_softc(dev); u16 eeprom_data = 0, device_id, apme_mask; adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); apme_mask = EM_EEPROM_APME; switch (adapter->hw.mac.type) { case e1000_82573: case e1000_82583: adapter->has_amt = TRUE; /* Falls thru */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: if (adapter->hw.bus.func == 1) { e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: case e1000_pchlan: case e1000_pch2lan: apme_mask = E1000_WUC_APME; adapter->has_amt = TRUE; eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC); break; default: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; } if (eeprom_data & apme_mask) adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); /* * We have the eeprom settings, now apply the special cases * where the eeprom may be wrong or the board won't support * wake on lan on a particular port */ device_id = pci_get_device(dev); switch (device_id) { case E1000_DEV_ID_82571EB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->wol = 0; break; case E1000_DEV_ID_82571EB_QUAD_COPPER: case E1000_DEV_ID_82571EB_QUAD_FIBER: case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; } return; } /* * Enable PCI Wake On Lan capability */ static void em_enable_wakeup(device_t dev) { struct adapter *adapter = device_get_softc(dev); if_t ifp = adapter->ifp; u32 pmc, ctrl, ctrl_ext, rctl; u16 status; if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0)) return; /* Advertise the wakeup capability */ ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); if ((adapter->hw.mac.type == e1000_ich8lan) || (adapter->hw.mac.type == e1000_pchlan) || (adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_ich10lan)) e1000_suspend_workarounds_ich8lan(&adapter->hw); /* Keep the laser running on Fiber adapters */ if (adapter->hw.phy.media_type == e1000_media_type_fiber || adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); } /* ** Determine type of Wakeup: note that wol ** is set with all bits on by default. */ if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0) adapter->wol &= ~E1000_WUFC_MAG; if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0) adapter->wol &= ~E1000_WUFC_MC; else { rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); } if ((adapter->hw.mac.type == e1000_pchlan) || (adapter->hw.mac.type == e1000_pch2lan)) { if (em_enable_phy_wakeup(adapter)) return; } else { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); } if (adapter->hw.phy.type == e1000_phy_igp_3) e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); /* Request PME */ status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if (if_getcapenable(ifp) & IFCAP_WOL) status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); return; } /* ** WOL in the newer chipset interfaces (pchlan) ** require thing to be copied into the phy */ static int em_enable_phy_wakeup(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mreg, ret = 0; u16 preg; /* copy MAC RARs to PHY RARs */ e1000_copy_rx_addrs_to_phy_ich8lan(hw); /* copy MAC MTA to PHY MTA */ for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); e1000_write_phy_reg(hw, BM_MTA(i) + 1, (u16)((mreg >> 16) & 0xFFFF)); } /* configure PHY Rx Control register */ e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); mreg = E1000_READ_REG(hw, E1000_RCTL); if (mreg & E1000_RCTL_UPE) preg |= BM_RCTL_UPE; if (mreg & E1000_RCTL_MPE) preg |= BM_RCTL_MPE; preg &= ~(BM_RCTL_MO_MASK); if (mreg & E1000_RCTL_MO_3) preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) << BM_RCTL_MO_SHIFT); if (mreg & E1000_RCTL_BAM) preg |= BM_RCTL_BAM; if (mreg & E1000_RCTL_PMCF) preg |= BM_RCTL_PMCF; mreg = E1000_READ_REG(hw, E1000_CTRL); if (mreg & E1000_CTRL_RFCE) preg |= BM_RCTL_RFCE; e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); /* enable PHY wakeup in MAC register */ E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN); E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); /* configure and enable PHY wakeup in PHY registers */ e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); /* activate PHY wakeup */ ret = hw->phy.ops.acquire(hw); if (ret) { printf("Could not acquire PHY\n"); return ret; } e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); if (ret) { printf("Could not read PHY page 769\n"); goto out; } preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); if (ret) printf("Could not set PHY Host Wakeup bit\n"); out: hw->phy.ops.release(hw); return ret; } static void em_led_func(void *arg, int onoff) { struct adapter *adapter = arg; EM_CORE_LOCK(adapter); if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); } else { e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } EM_CORE_UNLOCK(adapter); } /* ** Disable the L0S and L1 LINK states */ static void em_disable_aspm(struct adapter *adapter) { int base, reg; u16 link_cap,link_ctrl; device_t dev = adapter->dev; switch (adapter->hw.mac.type) { case e1000_82573: case e1000_82574: case e1000_82583: break; default: return; } if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0) return; reg = base + PCIER_LINK_CAP; link_cap = pci_read_config(dev, reg, 2); if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0) return; reg = base + PCIER_LINK_CTL; link_ctrl = pci_read_config(dev, reg, 2); link_ctrl &= ~PCIEM_LINK_CTL_ASPMC; pci_write_config(dev, reg, link_ctrl, 2); return; } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void em_update_stats_counters(struct adapter *adapter) { if(adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); } adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32); adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32); adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); /* Interrupt Counts */ adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC); adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC); adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC); adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC); adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC); adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC); adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC); adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC); adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC); if (adapter->hw.mac.type >= e1000_82543) { adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, E1000_RXERRC); adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, E1000_TNCRS); adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, E1000_CEXTERR); adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, E1000_TSCTC); adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, E1000_TSCTFC); } } static uint64_t em_get_counter(if_t ifp, ift_counter cnt) { struct adapter *adapter; adapter = if_getsoftc(ifp); switch (cnt) { case IFCOUNTER_COLLISIONS: return (adapter->stats.colc); case IFCOUNTER_IERRORS: return (adapter->dropped_pkts + adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc + adapter->stats.mpc + adapter->stats.cexterr); case IFCOUNTER_OERRORS: return (adapter->stats.ecol + adapter->stats.latecol + adapter->watchdog_events); default: return (if_get_counter_default(ifp, cnt)); } } /* Export a single 32-bit register via a read-only sysctl. */ static int em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; u_int val; adapter = oidp->oid_arg1; val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* * Add sysctl variables, one per statistic, to the system. */ static void em_add_hw_stats(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = &adapter->stats; struct sysctl_oid *stat_node, *queue_node, *int_node; struct sysctl_oid_list *stat_list, *queue_list, *int_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSIX IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", CTLFLAG_RD, &adapter->mbuf_defrag_failed, "Defragmenting mbuf chain failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &adapter->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL, em_sysctl_reg_handler, "IU", "Device Control Register"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL, em_sysctl_reg_handler, "IU", "Receiver Control Register"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "TX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq", CTLFLAG_RD, &txr->tx_irq, "Queue MSI-X Transmit Interrupts"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txr->no_desc_avail, "Queue No Descriptor Available"); snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "RX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq", CTLFLAG_RD, &rxr->rx_irq, "Queue MSI-X Receive Interrupts"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &adapter->stats.symerrs, "Symbol Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &adapter->stats.sec, "Sequence Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &adapter->stats.dc, "Defer Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &adapter->stats.mpc, "Missed Packets"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &adapter->stats.rnbc, "Receive No Buffers"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &adapter->stats.ruc, "Receive Undersize"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &adapter->stats.rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &adapter->stats.roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &adapter->stats.rjc, "Recevied Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &adapter->stats.rxerrc, "Receive Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &adapter->stats.crcerrs, "CRC errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &adapter->stats.algnerrc, "Alignment Errors"); /* On 82575 these are collision counts */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &adapter->stats.cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &adapter->stats.xonrxc, "XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &adapter->stats.xontxc, "XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &adapter->stats.xoffrxc, "XOFF Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &adapter->stats.xofftxc, "XOFF Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &adapter->stats.tpr, "Total Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &adapter->stats.gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &adapter->stats.prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &adapter->stats.prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &adapter->stats.prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &adapter->stats.prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &adapter->stats.prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &adapter->stats.gorc, "Good Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &adapter->stats.gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &adapter->stats.tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &adapter->stats.gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &adapter->stats.bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &adapter->stats.mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &adapter->stats.ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &adapter->stats.ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &adapter->stats.ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &adapter->stats.ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &adapter->stats.ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &adapter->stats.tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &adapter->stats.tsctfc, "TSO Contexts Failed"); /* Interrupt Stats */ int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", CTLFLAG_RD, NULL, "Interrupt Statistics"); int_list = SYSCTL_CHILDREN(int_node); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts", CTLFLAG_RD, &adapter->stats.iac, "Interrupt Assertion Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", CTLFLAG_RD, &adapter->stats.icrxptc, "Interrupt Cause Rx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", CTLFLAG_RD, &adapter->stats.icrxatc, "Interrupt Cause Rx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", CTLFLAG_RD, &adapter->stats.ictxptc, "Interrupt Cause Tx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", CTLFLAG_RD, &adapter->stats.ictxatc, "Interrupt Cause Tx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", CTLFLAG_RD, &adapter->stats.ictxqec, "Interrupt Cause Tx Queue Empty Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", CTLFLAG_RD, &adapter->stats.ictxqmtc, "Interrupt Cause Tx Queue Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", CTLFLAG_RD, &adapter->stats.icrxdmtc, "Interrupt Cause Rx Desc Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun", CTLFLAG_RD, &adapter->stats.icrxoc, "Interrupt Cause Receiver Overrun Count"); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) em_print_nvm_info(adapter); return (error); } static void em_print_nvm_info(struct adapter *adapter) { u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } printf("\n"); } static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { struct em_int_delay_info *info; struct adapter *adapter; u32 regval; int error, usecs, ticks; info = (struct em_int_delay_info *)arg1; usecs = info->value; error = sysctl_handle_int(oidp, &usecs, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) return (EINVAL); info->value = usecs; ticks = EM_USECS_TO_TICKS(usecs); if (info->offset == E1000_ITR) /* units are 256ns here */ ticks *= 4; adapter = info->adapter; EM_CORE_LOCK(adapter); regval = E1000_READ_OFFSET(&adapter->hw, info->offset); regval = (regval & ~0xffff) | (ticks & 0xffff); /* Handle a few special cases. */ switch (info->offset) { case E1000_RDTR: break; case E1000_TIDV: if (ticks == 0) { adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; /* Don't write 0 into the TIDV register. */ regval++; } else adapter->txd_cmd |= E1000_TXD_CMD_IDE; break; } E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); EM_CORE_UNLOCK(adapter); return (0); } static void em_add_int_delay_sysctl(struct adapter *adapter, const char *name, const char *description, struct em_int_delay_info *info, int offset, int value) { info->adapter = adapter; info->offset = offset; info->value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, em_sysctl_int_delay, "I", description); } static void em_set_sysctl_value(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLFLAG_RW, limit, value, description); } /* ** Set flow control using sysctl: ** Flow control values: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ static int em_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error; static int input = 3; /* default is full */ struct adapter *adapter = (struct adapter *) arg1; error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (input == adapter->fc) /* no change? */ return (error); switch (input) { case e1000_fc_rx_pause: case e1000_fc_tx_pause: case e1000_fc_full: case e1000_fc_none: adapter->hw.fc.requested_mode = input; adapter->fc = input; break; default: /* Do nothing */ return (error); } adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; e1000_force_mac_fc(&adapter->hw); return (error); } /* ** Manage Energy Efficient Ethernet: ** Control values: ** 0/1 - enabled/disabled */ static int em_sysctl_eee(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error, value; value = adapter->hw.dev_spec.ich8lan.eee_disable; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); EM_CORE_LOCK(adapter); adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0); em_init_locked(adapter); EM_CORE_UNLOCK(adapter); return (0); } static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { adapter = (struct adapter *)arg1; em_print_debug_info(adapter); } return (error); } /* ** This routine is meant to be fluid, add whatever is ** needed for debugging a problem. -jfv */ static void em_print_debug_info(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) printf("Interface is RUNNING "); else printf("Interface is NOT RUNNING\n"); if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE) printf("and INACTIVE\n"); else printf("and ACTIVE\n"); for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { device_printf(dev, "TX Queue %d ------\n", i); device_printf(dev, "hw tdh = %d, hw tdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_TDH(i)), E1000_READ_REG(&adapter->hw, E1000_TDT(i))); device_printf(dev, "Tx Queue Status = %d\n", txr->busy); device_printf(dev, "TX descriptors avail = %d\n", txr->tx_avail); device_printf(dev, "Tx Descriptors avail failure = %ld\n", txr->no_desc_avail); device_printf(dev, "RX Queue %d ------\n", i); device_printf(dev, "hw rdh = %d, hw rdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_RDH(i)), E1000_READ_REG(&adapter->hw, E1000_RDT(i))); device_printf(dev, "RX discarded packets = %ld\n", rxr->rx_discarded); device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check); device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh); } } #ifdef EM_MULTIQUEUE /* * 82574 only: * Write a new value to the EEPROM increasing the number of MSIX * vectors from 3 to 5, for proper multiqueue support. */ static void em_enable_vectors_82574(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; device_t dev = adapter->dev; u16 edata; e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); printf("Current cap: %#06x\n", edata); if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) { device_printf(dev, "Writing to eeprom: increasing " "reported MSIX vectors from 3 to 5...\n"); edata &= ~(EM_NVM_MSIX_N_MASK); edata |= 4 << EM_NVM_MSIX_N_SHIFT; e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); e1000_update_nvm_checksum(hw); device_printf(dev, "Writing to eeprom: done\n"); } } #endif #ifdef DDB DB_COMMAND(em_reset_dev, em_ddb_reset_dev) { devclass_t dc; int max_em; dc = devclass_find("em"); max_em = devclass_get_maxunit(dc); for (int index = 0; index < (max_em - 1); index++) { device_t dev; dev = devclass_get_device(dc, index); if (device_get_driver(dev) == &em_driver) { struct adapter *adapter = device_get_softc(dev); EM_CORE_LOCK(adapter); em_init_locked(adapter); EM_CORE_UNLOCK(adapter); } } } DB_COMMAND(em_dump_queue, em_ddb_dump_queue) { devclass_t dc; int max_em; dc = devclass_find("em"); max_em = devclass_get_maxunit(dc); for (int index = 0; index < (max_em - 1); index++) { device_t dev; dev = devclass_get_device(dc, index); if (device_get_driver(dev) == &em_driver) em_print_debug_info(device_get_softc(dev)); } } #endif Index: head/sys/dev/e1000/if_igb.c =================================================================== --- head/sys/dev/e1000/if_igb.c (revision 302383) +++ head/sys/dev/e1000/if_igb.c (revision 302384) @@ -1,6440 +1,6441 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #include "opt_altq.h" #endif #include "if_igb.h" /********************************************************************* * Driver version: *********************************************************************/ char igb_driver_version[] = "2.5.3-k"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static igb_vendor_info_t igb_vendor_info_array[] = { {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0}, {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII, 0, 0, 0}, /* required last entry */ {0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings for all supported NICs. *********************************************************************/ static char *igb_strings[] = { "Intel(R) PRO/1000 Network Connection" }; /********************************************************************* * Function prototypes *********************************************************************/ static int igb_probe(device_t); static int igb_attach(device_t); static int igb_detach(device_t); static int igb_shutdown(device_t); static int igb_suspend(device_t); static int igb_resume(device_t); #ifndef IGB_LEGACY_TX static int igb_mq_start(struct ifnet *, struct mbuf *); static int igb_mq_start_locked(struct ifnet *, struct tx_ring *); static void igb_qflush(struct ifnet *); static void igb_deferred_mq_start(void *, int); #else static void igb_start(struct ifnet *); static void igb_start_locked(struct tx_ring *, struct ifnet *ifp); #endif static int igb_ioctl(struct ifnet *, u_long, caddr_t); static uint64_t igb_get_counter(if_t, ift_counter); static void igb_init(void *); static void igb_init_locked(struct adapter *); static void igb_stop(void *); static void igb_media_status(struct ifnet *, struct ifmediareq *); static int igb_media_change(struct ifnet *); static void igb_identify_hardware(struct adapter *); static int igb_allocate_pci_resources(struct adapter *); static int igb_allocate_msix(struct adapter *); static int igb_allocate_legacy(struct adapter *); static int igb_setup_msix(struct adapter *); static void igb_free_pci_resources(struct adapter *); static void igb_local_timer(void *); static void igb_reset(struct adapter *); static int igb_setup_interface(device_t, struct adapter *); static int igb_allocate_queues(struct adapter *); static void igb_configure_queues(struct adapter *); static int igb_allocate_transmit_buffers(struct tx_ring *); static void igb_setup_transmit_structures(struct adapter *); static void igb_setup_transmit_ring(struct tx_ring *); static void igb_initialize_transmit_units(struct adapter *); static void igb_free_transmit_structures(struct adapter *); static void igb_free_transmit_buffers(struct tx_ring *); static int igb_allocate_receive_buffers(struct rx_ring *); static int igb_setup_receive_structures(struct adapter *); static int igb_setup_receive_ring(struct rx_ring *); static void igb_initialize_receive_units(struct adapter *); static void igb_free_receive_structures(struct adapter *); static void igb_free_receive_buffers(struct rx_ring *); static void igb_free_receive_ring(struct rx_ring *); static void igb_enable_intr(struct adapter *); static void igb_disable_intr(struct adapter *); static void igb_update_stats_counters(struct adapter *); static bool igb_txeof(struct tx_ring *); static __inline void igb_rx_discard(struct rx_ring *, int); static __inline void igb_rx_input(struct rx_ring *, struct ifnet *, struct mbuf *, u32); static bool igb_rxeof(struct igb_queue *, int, int *); static void igb_rx_checksum(u32, struct mbuf *, u32); static int igb_tx_ctx_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *); static int igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *); static void igb_set_promisc(struct adapter *); static void igb_disable_promisc(struct adapter *); static void igb_set_multi(struct adapter *); static void igb_update_link_status(struct adapter *); static void igb_refresh_mbufs(struct rx_ring *, int); static void igb_register_vlan(void *, struct ifnet *, u16); static void igb_unregister_vlan(void *, struct ifnet *, u16); static void igb_setup_vlan_hw_support(struct adapter *); static int igb_xmit(struct tx_ring *, struct mbuf **); static int igb_dma_malloc(struct adapter *, bus_size_t, struct igb_dma_alloc *, int); static void igb_dma_free(struct adapter *, struct igb_dma_alloc *); static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void igb_print_nvm_info(struct adapter *); static int igb_is_valid_ether_addr(u8 *); static void igb_add_hw_stats(struct adapter *); static void igb_vf_init_stats(struct adapter *); static void igb_update_vf_stats_counters(struct adapter *); /* Management and WOL Support */ static void igb_init_manageability(struct adapter *); static void igb_release_manageability(struct adapter *); static void igb_get_hw_control(struct adapter *); static void igb_release_hw_control(struct adapter *); static void igb_enable_wakeup(device_t); static void igb_led_func(void *, int); static int igb_irq_fast(void *); static void igb_msix_que(void *); static void igb_msix_link(void *); static void igb_handle_que(void *context, int pending); static void igb_handle_link(void *context, int pending); static void igb_handle_link_locked(struct adapter *); static void igb_set_sysctl_value(struct adapter *, const char *, const char *, int *, int); static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS); static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS); static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS); #ifdef DEVICE_POLLING static poll_handler_t igb_poll; #endif /* POLLING */ /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t igb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, igb_probe), DEVMETHOD(device_attach, igb_attach), DEVMETHOD(device_detach, igb_detach), DEVMETHOD(device_shutdown, igb_shutdown), DEVMETHOD(device_suspend, igb_suspend), DEVMETHOD(device_resume, igb_resume), DEVMETHOD_END }; static driver_t igb_driver = { "igb", igb_methods, sizeof(struct adapter), }; static devclass_t igb_devclass; DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0); MODULE_DEPEND(igb, pci, 1, 1, 1); MODULE_DEPEND(igb, ether, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(igb, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ /********************************************************************* * Tunable default values. *********************************************************************/ static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters"); /* Descriptor defaults */ static int igb_rxd = IGB_DEFAULT_RXD; static int igb_txd = IGB_DEFAULT_TXD; SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0, "Number of receive descriptors per queue"); SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0, "Number of transmit descriptors per queue"); /* ** AIM: Adaptive Interrupt Moderation ** which means that the interrupt rate ** is varied over time based on the ** traffic for that interrupt vector */ static int igb_enable_aim = TRUE; SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0, "Enable adaptive interrupt moderation"); /* * MSIX should be the default for best performance, * but this allows it to be forced off for testing. */ static int igb_enable_msix = 1; SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0, "Enable MSI-X interrupts"); /* ** Tuneable Interrupt rate */ static int igb_max_interrupt_rate = 8000; SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &igb_max_interrupt_rate, 0, "Maximum interrupts per second"); #ifndef IGB_LEGACY_TX /* ** Tuneable number of buffers in the buf-ring (drbr_xxx) */ static int igb_buf_ring_size = IGB_BR_SIZE; SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN, &igb_buf_ring_size, 0, "Size of the bufring"); #endif /* ** Header split causes the packet header to ** be dma'd to a separate mbuf from the payload. ** this can have memory alignment benefits. But ** another plus is that small packets often fit ** into the header and thus use no cluster. Its ** a very workload dependent type feature. */ static int igb_header_split = FALSE; SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0, "Enable receive mbuf header split"); /* ** This will autoconfigure based on the ** number of CPUs and max supported ** MSIX messages if left at 0. */ static int igb_num_queues = 0; SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0, "Number of queues to configure, 0 indicates autoconfigure"); /* ** Global variable to store last used CPU when binding queues ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a ** queue is bound to a cpu. */ static int igb_last_bind_cpu = -1; /* How many packets rxeof tries to clean at a time */ static int igb_rx_process_limit = 100; SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &igb_rx_process_limit, 0, "Maximum number of received packets to process at a time, -1 means unlimited"); /* How many packets txeof tries to clean at a time */ static int igb_tx_process_limit = -1; SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN, &igb_tx_process_limit, 0, "Maximum number of sent packets to process at a time, -1 means unlimited"); #ifdef DEV_NETMAP /* see ixgbe.c for details */ #include #endif /* DEV_NETMAP */ /********************************************************************* * Device identification routine * * igb_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int igb_probe(device_t dev) { char adapter_name[256]; uint16_t pci_vendor_id = 0; uint16_t pci_device_id = 0; uint16_t pci_subvendor_id = 0; uint16_t pci_subdevice_id = 0; igb_vendor_info_t *ent; INIT_DEBUGOUT("igb_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != IGB_INTEL_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = igb_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == 0)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == 0))) { sprintf(adapter_name, "%s, Version - %s", igb_strings[ent->index], igb_driver_version); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int igb_attach(device_t dev) { struct adapter *adapter; int error = 0; u16 eeprom_data; INIT_DEBUGOUT("igb_attach: begin"); if (resource_disabled("igb", device_get_unit(dev))) { device_printf(dev, "Disabled by device hint\n"); return (ENXIO); } adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTLs */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_sysctl_nvm_info, "I", "NVM Information"); igb_set_sysctl_value(adapter, "enable_aim", "Interrupt Moderation", &adapter->enable_aim, igb_enable_aim); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_set_flowcntl, "I", "Flow Control"); callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); /* Determine hardware and mac info */ igb_identify_hardware(adapter); /* Setup PCI resources */ if (igb_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* Do Shared Code initialization */ if (e1000_setup_init_funcs(&adapter->hw, TRUE)) { device_printf(dev, "Setup of Shared code failed\n"); error = ENXIO; goto err_pci; } e1000_get_bus_info(&adapter->hw); /* Sysctls for limiting the amount of work done in the taskqueues */ igb_set_sysctl_value(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, igb_rx_process_limit); igb_set_sysctl_value(adapter, "tx_processing_limit", "max number of tx packets to process", &adapter->tx_process_limit, igb_tx_process_limit); /* * Validate number of transmit and receive descriptors. It * must not exceed hardware maximum, and must be multiple * of E1000_DBA_ALIGN. */ if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 || (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) { device_printf(dev, "Using %d TX descriptors instead of %d!\n", IGB_DEFAULT_TXD, igb_txd); adapter->num_tx_desc = IGB_DEFAULT_TXD; } else adapter->num_tx_desc = igb_txd; if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 || (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) { device_printf(dev, "Using %d RX descriptors instead of %d!\n", IGB_DEFAULT_RXD, igb_rxd); adapter->num_rx_desc = IGB_DEFAULT_RXD; } else adapter->num_rx_desc = igb_rxd; adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_wait_to_complete = FALSE; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; /* Copper options */ if (adapter->hw.phy.media_type == e1000_media_type_copper) { adapter->hw.phy.mdix = AUTO_ALL_MODES; adapter->hw.phy.disable_polarity_correction = FALSE; adapter->hw.phy.ms_type = IGB_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; /* ** Allocate and Setup Queues */ if (igb_allocate_queues(adapter)) { error = ENOMEM; goto err_pci; } /* Allocate the appropriate stats memory */ if (adapter->vf_ifp) { adapter->stats = (struct e1000_vf_stats *)malloc(sizeof \ (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO); igb_vf_init_stats(adapter); } else adapter->stats = (struct e1000_hw_stats *)malloc(sizeof \ (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO); if (adapter->stats == NULL) { device_printf(dev, "Can not allocate stats memory\n"); error = ENOMEM; goto err_late; } /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Some adapter-specific advanced features */ if (adapter->hw.mac.type >= e1000_i350) { SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, igb_sysctl_eee, "I", "Disable Energy Efficient Ethernet"); if (adapter->hw.phy.media_type == e1000_media_type_copper) { if (adapter->hw.mac.type == e1000_i354) e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); else e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); } } /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(&adapter->hw); /* Make sure we have a good EEPROM before we read from it */ if (((adapter->hw.mac.type != e1000_i210) && (adapter->hw.mac.type != e1000_i211)) && (e1000_validate_nvm_checksum(&adapter->hw) < 0)) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } } /* ** Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(&adapter->hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_late; } /* Check its sanity */ if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_late; } /* Setup OS specific network interface */ if (igb_setup_interface(dev, adapter) != 0) goto err_late; /* Now get a good starting state */ igb_reset(adapter); /* Initialize statistics */ igb_update_stats_counters(adapter); adapter->hw.mac.get_link_status = 1; igb_update_link_status(adapter); /* Indicate SOL/IDER usage */ if (e1000_check_reset_block(&adapter->hw)) device_printf(dev, "PHY reset is blocked due to SOL/IDER session.\n"); /* Determine if we have to control management hardware */ adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); /* * Setup Wake-on-Lan */ /* APME bit in EEPROM is mapped to WUC.APME */ eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME; if (eeprom_data) adapter->wol = E1000_WUFC_MAG; /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); igb_add_hw_stats(adapter); /* Tell the stack that the interface is not active */ adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE; adapter->led_dev = led_create(igb_led_func, adapter, device_get_nameunit(dev)); /* ** Configure Interrupts */ if ((adapter->msix > 1) && (igb_enable_msix)) error = igb_allocate_msix(adapter); else /* MSI or Legacy */ error = igb_allocate_legacy(adapter); if (error) goto err_late; #ifdef DEV_NETMAP igb_netmap_attach(adapter); #endif /* DEV_NETMAP */ INIT_DEBUGOUT("igb_attach: end"); return (0); err_late: if (igb_detach(dev) == 0) /* igb_detach() already did the cleanup */ return(error); igb_free_transmit_structures(adapter); igb_free_receive_structures(adapter); igb_release_hw_control(adapter); err_pci: igb_free_pci_resources(adapter); if (adapter->ifp != NULL) if_free(adapter->ifp); free(adapter->mta, M_DEVBUF); IGB_CORE_LOCK_DESTROY(adapter); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int igb_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; INIT_DEBUGOUT("igb_detach: begin"); /* Make sure VLANS are not using driver */ if (adapter->ifp->if_vlantrunk != NULL) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } ether_ifdetach(adapter->ifp); if (adapter->led_dev != NULL) led_destroy(adapter->led_dev); #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(ifp); #endif IGB_CORE_LOCK(adapter); adapter->in_detach = 1; igb_stop(adapter); IGB_CORE_UNLOCK(adapter); e1000_phy_hw_reset(&adapter->hw); /* Give control back to firmware */ igb_release_manageability(adapter); igb_release_hw_control(adapter); if (adapter->wol) { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); igb_enable_wakeup(dev); } /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); callout_drain(&adapter->timer); #ifdef DEV_NETMAP netmap_detach(adapter->ifp); #endif /* DEV_NETMAP */ igb_free_pci_resources(adapter); bus_generic_detach(dev); if_free(ifp); igb_free_transmit_structures(adapter); igb_free_receive_structures(adapter); if (adapter->mta != NULL) free(adapter->mta, M_DEVBUF); IGB_CORE_LOCK_DESTROY(adapter); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int igb_shutdown(device_t dev) { return igb_suspend(dev); } /* * Suspend/resume device methods. */ static int igb_suspend(device_t dev) { struct adapter *adapter = device_get_softc(dev); IGB_CORE_LOCK(adapter); igb_stop(adapter); igb_release_manageability(adapter); igb_release_hw_control(adapter); if (adapter->wol) { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); igb_enable_wakeup(dev); } IGB_CORE_UNLOCK(adapter); return bus_generic_suspend(dev); } static int igb_resume(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct tx_ring *txr = adapter->tx_rings; struct ifnet *ifp = adapter->ifp; IGB_CORE_LOCK(adapter); igb_init_locked(adapter); igb_init_manageability(adapter); if ((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); #ifndef IGB_LEGACY_TX /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); } } IGB_CORE_UNLOCK(adapter); return bus_generic_resume(dev); } #ifdef IGB_LEGACY_TX /********************************************************************* * Transmit entry point * * igb_start is called by the stack to initiate a transmit. * The driver will remain in this routine as long as there are * packets to transmit and transmit resources are available. * In case resources are not available stack is notified and * the packet is requeued. **********************************************************************/ static void igb_start_locked(struct tx_ring *txr, struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct mbuf *m_head; IGB_TX_LOCK_ASSERT(txr); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; if (!adapter->link_active) return; /* Call cleanup if number of TX descriptors low */ if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) igb_txeof(txr); while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { if (txr->tx_avail <= IGB_MAX_SCATTER) { txr->queue_status |= IGB_QUEUE_DEPLETED; break; } IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; /* * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. */ if (igb_xmit(txr, &m_head)) { if (m_head != NULL) IFQ_DRV_PREPEND(&ifp->if_snd, m_head); if (txr->tx_avail <= IGB_MAX_SCATTER) txr->queue_status |= IGB_QUEUE_DEPLETED; break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); /* Set watchdog on */ txr->watchdog_time = ticks; txr->queue_status |= IGB_QUEUE_WORKING; } } /* * Legacy TX driver routine, called from the * stack, always uses tx[0], and spins for it. * Should not be used with multiqueue tx */ static void igb_start(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IGB_TX_LOCK(txr); igb_start_locked(txr, ifp); IGB_TX_UNLOCK(txr); } return; } #else /* ~IGB_LEGACY_TX */ /* ** Multiqueue Transmit Entry: ** quick turnaround to the stack ** */ static int igb_mq_start(struct ifnet *ifp, struct mbuf *m) { struct adapter *adapter = ifp->if_softc; struct igb_queue *que; struct tx_ring *txr; int i, err = 0; #ifdef RSS uint32_t bucket_id; #endif /* Which queue to use */ /* * When doing RSS, map it to the same outbound queue * as the incoming flow would be mapped to. * * If everything is setup correctly, it should be the * same bucket that the current CPU we're on is. */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { #ifdef RSS if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), &bucket_id) == 0) { /* XXX TODO: spit out something if bucket_id > num_queues? */ i = bucket_id % adapter->num_queues; } else { #endif i = m->m_pkthdr.flowid % adapter->num_queues; #ifdef RSS } #endif } else { i = curcpu % adapter->num_queues; } txr = &adapter->tx_rings[i]; que = &adapter->queues[i]; err = drbr_enqueue(ifp, txr->br, m); if (err) return (err); if (IGB_TX_TRYLOCK(txr)) { igb_mq_start_locked(ifp, txr); IGB_TX_UNLOCK(txr); } else taskqueue_enqueue(que->tq, &txr->txq_task); return (0); } static int igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct mbuf *next; int err = 0, enq = 0; IGB_TX_LOCK_ASSERT(txr); if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || adapter->link_active == 0) return (ENETDOWN); /* Process the queue */ while ((next = drbr_peek(ifp, txr->br)) != NULL) { if ((err = igb_xmit(txr, &next)) != 0) { if (next == NULL) { /* It was freed, move forward */ drbr_advance(ifp, txr->br); } else { /* * Still have one left, it may not be * the same since the transmit function * may have changed it. */ drbr_putback(ifp, txr->br, next); } break; } drbr_advance(ifp, txr->br); enq++; if (next->m_flags & M_MCAST && adapter->vf_ifp) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; } if (enq > 0) { /* Set the watchdog */ txr->queue_status |= IGB_QUEUE_WORKING; txr->watchdog_time = ticks; } if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) igb_txeof(txr); if (txr->tx_avail <= IGB_MAX_SCATTER) txr->queue_status |= IGB_QUEUE_DEPLETED; return (err); } /* * Called from a taskqueue to drain queued transmit packets. */ static void igb_deferred_mq_start(void *arg, int pending) { struct tx_ring *txr = arg; struct adapter *adapter = txr->adapter; struct ifnet *ifp = adapter->ifp; IGB_TX_LOCK(txr); if (!drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); IGB_TX_UNLOCK(txr); } /* ** Flush all ring buffers */ static void igb_qflush(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = adapter->tx_rings; struct mbuf *m; for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) m_freem(m); IGB_TX_UNLOCK(txr); } if_qflush(ifp); } #endif /* ~IGB_LEGACY_TX */ /********************************************************************* * Ioctl entry point * * igb_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct adapter *adapter = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = FALSE; int error = 0; if (adapter->in_detach) return (error); switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) igb_init(adapter); #ifdef INET if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: { int max_frame_size; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); IGB_CORE_LOCK(adapter); max_frame_size = 9234; if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { IGB_CORE_UNLOCK(adapter); error = EINVAL; break; } ifp->if_mtu = ifr->ifr_mtu; adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; - igb_init_locked(adapter); + if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) + igb_init_locked(adapter); IGB_CORE_UNLOCK(adapter); break; } case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl rcv'd:\ SIOCSIFFLAGS (Set Interface Flags)"); IGB_CORE_LOCK(adapter); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ adapter->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { igb_disable_promisc(adapter); igb_set_promisc(adapter); } } else igb_init_locked(adapter); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) igb_stop(adapter); adapter->if_flags = ifp->if_flags; IGB_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IGB_CORE_LOCK(adapter); igb_disable_intr(adapter); igb_set_multi(adapter); #ifdef DEVICE_POLLING if (!(ifp->if_capenable & IFCAP_POLLING)) #endif igb_enable_intr(adapter); IGB_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: /* Check SOL/IDER usage */ IGB_CORE_LOCK(adapter); if (e1000_check_reset_block(&adapter->hw)) { IGB_CORE_UNLOCK(adapter); device_printf(adapter->dev, "Media change is" " blocked due to SOL/IDER session.\n"); break; } IGB_CORE_UNLOCK(adapter); case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl rcv'd: \ SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { int mask, reinit; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); reinit = 0; mask = ifr->ifr_reqcap ^ ifp->if_capenable; #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(igb_poll, ifp); if (error) return (error); IGB_CORE_LOCK(adapter); igb_disable_intr(adapter); ifp->if_capenable |= IFCAP_POLLING; IGB_CORE_UNLOCK(adapter); } else { error = ether_poll_deregister(ifp); /* Enable interrupt even in error case */ IGB_CORE_LOCK(adapter); igb_enable_intr(adapter); ifp->if_capenable &= ~IFCAP_POLLING; IGB_CORE_UNLOCK(adapter); } } #endif #if __FreeBSD_version >= 1000000 /* HW cannot turn these on/off separately */ if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { ifp->if_capenable ^= IFCAP_RXCSUM; ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; reinit = 1; } if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; reinit = 1; } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; reinit = 1; } #else if (mask & IFCAP_HWCSUM) { ifp->if_capenable ^= IFCAP_HWCSUM; reinit = 1; } #endif if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; reinit = 1; } if (mask & IFCAP_TSO6) { ifp->if_capenable ^= IFCAP_TSO6; reinit = 1; } if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; reinit = 1; } if (mask & IFCAP_VLAN_HWFILTER) { ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; reinit = 1; } if (mask & IFCAP_VLAN_HWTSO) { ifp->if_capenable ^= IFCAP_VLAN_HWTSO; reinit = 1; } if (mask & IFCAP_LRO) { ifp->if_capenable ^= IFCAP_LRO; reinit = 1; } if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) igb_init(adapter); VLAN_CAPABILITIES(ifp); break; } default: error = ether_ioctl(ifp, command, data); break; } return (error); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void igb_init_locked(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; INIT_DEBUGOUT("igb_init: begin"); IGB_CORE_LOCK_ASSERT(adapter); igb_disable_intr(adapter); callout_stop(&adapter->timer); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); igb_reset(adapter); igb_update_link_status(adapter); E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); /* Set hardware offload abilities */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) { #if __FreeBSD_version >= 1000000 ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP); if (adapter->hw.mac.type != e1000_82575) ifp->if_hwassist |= CSUM_IP_SCTP; #else ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); #if __FreeBSD_version >= 800000 if (adapter->hw.mac.type != e1000_82575) ifp->if_hwassist |= CSUM_SCTP; #endif #endif } #if __FreeBSD_version >= 1000000 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) { ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP); if (adapter->hw.mac.type != e1000_82575) ifp->if_hwassist |= CSUM_IP6_SCTP; } #endif if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; /* Clear bad data from Rx FIFOs */ e1000_rx_fifo_flush_82575(&adapter->hw); /* Configure for OS presence */ igb_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ igb_setup_transmit_structures(adapter); igb_initialize_transmit_units(adapter); /* Setup Multicast table */ igb_set_multi(adapter); /* ** Figure out the desired mbuf pool ** for doing jumbo/packetsplit */ if (adapter->max_frame_size <= 2048) adapter->rx_mbuf_sz = MCLBYTES; else if (adapter->max_frame_size <= 4096) adapter->rx_mbuf_sz = MJUMPAGESIZE; else adapter->rx_mbuf_sz = MJUM9BYTES; /* Prepare receive descriptors and buffers */ if (igb_setup_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); return; } igb_initialize_receive_units(adapter); /* Enable VLAN support */ if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) igb_setup_vlan_hw_support(adapter); /* Don't lose promiscuous settings */ igb_set_promisc(adapter); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&adapter->timer, hz, igb_local_timer, adapter); e1000_clear_hw_cntrs_base_generic(&adapter->hw); if (adapter->msix > 1) /* Set up queue routing */ igb_configure_queues(adapter); /* this clears any pending interrupts */ E1000_READ_REG(&adapter->hw, E1000_ICR); #ifdef DEVICE_POLLING /* * Only enable interrupts if we are not polling, make sure * they are off otherwise. */ if (ifp->if_capenable & IFCAP_POLLING) igb_disable_intr(adapter); else #endif /* DEVICE_POLLING */ { igb_enable_intr(adapter); E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); } /* Set Energy Efficient Ethernet */ if (adapter->hw.phy.media_type == e1000_media_type_copper) { if (adapter->hw.mac.type == e1000_i354) e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); else e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); } } static void igb_init(void *arg) { struct adapter *adapter = arg; IGB_CORE_LOCK(adapter); igb_init_locked(adapter); IGB_CORE_UNLOCK(adapter); } static void igb_handle_que(void *context, int pending) { struct igb_queue *que = context; struct adapter *adapter = que->adapter; struct tx_ring *txr = que->txr; struct ifnet *ifp = adapter->ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { bool more; more = igb_rxeof(que, adapter->rx_process_limit, NULL); IGB_TX_LOCK(txr); igb_txeof(txr); #ifndef IGB_LEGACY_TX /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); /* Do we need another? */ if (more) { taskqueue_enqueue(que->tq, &que->que_task); return; } } #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) return; #endif /* Reenable this interrupt */ if (que->eims) E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); else igb_enable_intr(adapter); } /* Deal with link in a sleepable context */ static void igb_handle_link(void *context, int pending) { struct adapter *adapter = context; IGB_CORE_LOCK(adapter); igb_handle_link_locked(adapter); IGB_CORE_UNLOCK(adapter); } static void igb_handle_link_locked(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct ifnet *ifp = adapter->ifp; IGB_CORE_LOCK_ASSERT(adapter); adapter->hw.mac.get_link_status = 1; igb_update_link_status(adapter); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); #ifndef IGB_LEGACY_TX /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); } } } /********************************************************************* * * MSI/Legacy Deferred * Interrupt Service routine * *********************************************************************/ static int igb_irq_fast(void *arg) { struct adapter *adapter = arg; struct igb_queue *que = adapter->queues; u32 reg_icr; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; /* * Mask interrupts until the taskqueue is finished running. This is * cheap, just assume that it is needed. This also works around the * MSI message reordering errata on certain systems. */ igb_disable_intr(adapter); taskqueue_enqueue(que->tq, &que->que_task); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) taskqueue_enqueue(que->tq, &adapter->link_task); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; return FILTER_HANDLED; } #ifdef DEVICE_POLLING #if __FreeBSD_version >= 800000 #define POLL_RETURN_COUNT(a) (a) static int #else #define POLL_RETURN_COUNT(a) static void #endif igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct adapter *adapter = ifp->if_softc; struct igb_queue *que; struct tx_ring *txr; u32 reg_icr, rx_done = 0; u32 loop = IGB_MAX_LOOP; bool more; IGB_CORE_LOCK(adapter); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { IGB_CORE_UNLOCK(adapter); return POLL_RETURN_COUNT(rx_done); } if (cmd == POLL_AND_CHECK_STATUS) { reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) igb_handle_link_locked(adapter); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; } IGB_CORE_UNLOCK(adapter); for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; txr = que->txr; igb_rxeof(que, count, &rx_done); IGB_TX_LOCK(txr); do { more = igb_txeof(txr); } while (loop-- && more); #ifndef IGB_LEGACY_TX if (!drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); } return POLL_RETURN_COUNT(rx_done); } #endif /* DEVICE_POLLING */ /********************************************************************* * * MSIX Que Interrupt Service routine * **********************************************************************/ static void igb_msix_que(void *arg) { struct igb_queue *que = arg; struct adapter *adapter = que->adapter; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = que->txr; struct rx_ring *rxr = que->rxr; u32 newitr = 0; bool more_rx; /* Ignore spurious interrupts */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims); ++que->irqs; IGB_TX_LOCK(txr); igb_txeof(txr); #ifndef IGB_LEGACY_TX /* Process the stack queue only if not depleted */ if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && !drbr_empty(ifp, txr->br)) igb_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) igb_start_locked(txr, ifp); #endif IGB_TX_UNLOCK(txr); more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL); if (adapter->enable_aim == FALSE) goto no_calc; /* ** Do Adaptive Interrupt Moderation: ** - Write out last calculated setting ** - Calculate based on average size over ** the last interval. */ if (que->eitr_setting) E1000_WRITE_REG(&adapter->hw, E1000_EITR(que->msix), que->eitr_setting); que->eitr_setting = 0; /* Idle, do nothing */ if ((txr->bytes == 0) && (rxr->bytes == 0)) goto no_calc; /* Used half Default if sub-gig */ if (adapter->link_speed != 1000) newitr = IGB_DEFAULT_ITR / 2; else { if ((txr->bytes) && (txr->packets)) newitr = txr->bytes/txr->packets; if ((rxr->bytes) && (rxr->packets)) newitr = max(newitr, (rxr->bytes / rxr->packets)); newitr += 24; /* account for hardware frame, crc */ /* set an upper boundary */ newitr = min(newitr, 3000); /* Be nice to the mid range */ if ((newitr > 300) && (newitr < 1200)) newitr = (newitr / 3); else newitr = (newitr / 2); } newitr &= 0x7FFC; /* Mask invalid bits */ if (adapter->hw.mac.type == e1000_82575) newitr |= newitr << 16; else newitr |= E1000_EITR_CNT_IGNR; /* save for next interrupt */ que->eitr_setting = newitr; /* Reset state */ txr->bytes = 0; txr->packets = 0; rxr->bytes = 0; rxr->packets = 0; no_calc: /* Schedule a clean task if needed*/ if (more_rx) taskqueue_enqueue(que->tq, &que->que_task); else /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); return; } /********************************************************************* * * MSIX Link Interrupt Service routine * **********************************************************************/ static void igb_msix_link(void *arg) { struct adapter *adapter = arg; u32 icr; ++adapter->link_irq; icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (!(icr & E1000_ICR_LSC)) goto spurious; igb_handle_link(adapter, 0); spurious: /* Rearm */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct adapter *adapter = ifp->if_softc; INIT_DEBUGOUT("igb_media_status: begin"); IGB_CORE_LOCK(adapter); igb_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { IGB_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; switch (adapter->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: /* ** Support for 100Mb SFP - these are Fiber ** but the media type appears as serdes */ if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) ifmr->ifm_active |= IFM_100_FX; else ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; case 2500: ifmr->ifm_active |= IFM_2500_SX; break; } if (adapter->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; IGB_CORE_UNLOCK(adapter); } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int igb_media_change(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("igb_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); IGB_CORE_LOCK(adapter); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(adapter->dev, "Unsupported media type\n"); } igb_init_locked(adapter); IGB_CORE_UNLOCK(adapter); return (0); } /********************************************************************* * * This routine maps the mbufs to Advanced TX descriptors. * **********************************************************************/ static int igb_xmit(struct tx_ring *txr, struct mbuf **m_headp) { struct adapter *adapter = txr->adapter; u32 olinfo_status = 0, cmd_type_len; int i, j, error, nsegs; int first; bool remap = TRUE; struct mbuf *m_head; bus_dma_segment_t segs[IGB_MAX_SCATTER]; bus_dmamap_t map; struct igb_tx_buf *txbuf; union e1000_adv_tx_desc *txd = NULL; m_head = *m_headp; /* Basic descriptor defines */ cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT); if (m_head->m_flags & M_VLANTAG) cmd_type_len |= E1000_ADVTXD_DCMD_VLE; /* * Important to capture the first descriptor * used because it will contain the index of * the one we tell the hardware to report back */ first = txr->next_avail_desc; txbuf = &txr->tx_buffers[first]; map = txbuf->map; /* * Map the packet for DMA. */ retry: error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); if (__predict_false(error)) { struct mbuf *m; switch (error) { case EFBIG: /* Try it again? - one try */ if (remap == TRUE) { remap = FALSE; m = m_collapse(*m_headp, M_NOWAIT, IGB_MAX_SCATTER); if (m == NULL) { adapter->mbuf_defrag_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); } *m_headp = m; goto retry; } else return (error); default: txr->no_tx_dma_setup++; m_freem(*m_headp); *m_headp = NULL; return (error); } } /* Make certain there are enough descriptors */ if (txr->tx_avail < (nsegs + 2)) { txr->no_desc_avail++; bus_dmamap_unload(txr->txtag, map); return (ENOBUFS); } m_head = *m_headp; /* ** Set up the appropriate offload context ** this will consume the first descriptor */ error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); if (__predict_false(error)) { m_freem(*m_headp); *m_headp = NULL; return (error); } /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) olinfo_status |= txr->me << 4; i = txr->next_avail_desc; for (j = 0; j < nsegs; j++) { bus_size_t seglen; bus_addr_t segaddr; txbuf = &txr->tx_buffers[i]; txd = &txr->tx_base[i]; seglen = segs[j].ds_len; segaddr = htole64(segs[j].ds_addr); txd->read.buffer_addr = segaddr; txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS | cmd_type_len | seglen); txd->read.olinfo_status = htole32(olinfo_status); if (++i == txr->num_desc) i = 0; } txd->read.cmd_type_len |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); txr->tx_avail -= nsegs; txr->next_avail_desc = i; txbuf->m_head = m_head; /* ** Here we swap the map so the last descriptor, ** which gets the completion interrupt has the ** real map, and the first descriptor gets the ** unused map from this descriptor. */ txr->tx_buffers[first].map = txbuf->map; txbuf->map = map; bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); /* Set the EOP descriptor that will be marked done */ txbuf = &txr->tx_buffers[first]; txbuf->eop = txd; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * Advance the Transmit Descriptor Tail (Tdt), this tells the * hardware that this frame is available to transmit. */ ++txr->total_packets; E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); return (0); } static void igb_set_promisc(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u32 reg; if (adapter->vf_ifp) { e1000_promisc_set_vf(hw, e1000_promisc_enabled); return; } reg = E1000_READ_REG(hw, E1000_RCTL); if (ifp->if_flags & IFF_PROMISC) { reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE); E1000_WRITE_REG(hw, E1000_RCTL, reg); } else if (ifp->if_flags & IFF_ALLMULTI) { reg |= E1000_RCTL_MPE; reg &= ~E1000_RCTL_UPE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } } static void igb_disable_promisc(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; u32 reg; int mcnt = 0; if (adapter->vf_ifp) { e1000_promisc_set_vf(hw, e1000_promisc_disabled); return; } reg = E1000_READ_REG(hw, E1000_RCTL); reg &= (~E1000_RCTL_UPE); if (ifp->if_flags & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else { struct ifmultiaddr *ifma; #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif } /* Don't disable if in MAX groups */ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg &= (~E1000_RCTL_MPE); E1000_WRITE_REG(hw, E1000_RCTL, reg); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void igb_set_multi(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct ifmultiaddr *ifma; u32 reg_rctl = 0; u8 *mta; int mcnt = 0; IOCTL_DEBUGOUT("igb_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); } /********************************************************************* * Timer routine: * This routine checks for link status, * updates statistics, and does the watchdog. * **********************************************************************/ static void igb_local_timer(void *arg) { struct adapter *adapter = arg; device_t dev = adapter->dev; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; struct igb_queue *que = adapter->queues; int hung = 0, busy = 0; IGB_CORE_LOCK_ASSERT(adapter); igb_update_link_status(adapter); igb_update_stats_counters(adapter); /* ** Check the TX queues status ** - central locked handling of OACTIVE ** - watchdog only if all queues show hung */ for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { if ((txr->queue_status & IGB_QUEUE_HUNG) && (adapter->pause_frames == 0)) ++hung; if (txr->queue_status & IGB_QUEUE_DEPLETED) ++busy; if ((txr->queue_status & IGB_QUEUE_IDLE) == 0) taskqueue_enqueue(que->tq, &que->que_task); } if (hung == adapter->num_queues) goto timeout; if (busy == adapter->num_queues) ifp->if_drv_flags |= IFF_DRV_OACTIVE; else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) && (busy < adapter->num_queues)) ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; adapter->pause_frames = 0; callout_reset(&adapter->timer, hz, igb_local_timer, adapter); #ifndef DEVICE_POLLING /* Schedule all queue interrupts - deadlock protection */ E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask); #endif return; timeout: device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)), E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me))); device_printf(dev,"TX(%d) desc avail = %d," "Next TX to Clean = %d\n", txr->me, txr->tx_avail, txr->next_to_clean); adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->watchdog_events++; igb_init_locked(adapter); } static void igb_update_link_status(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_fc_info *fc = &hw->fc; struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; u32 link_check, thstat, ctrl; char *flowctl = NULL; link_check = thstat = ctrl = 0; /* Get the cached link value or read for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; } else link_check = TRUE; break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = adapter->hw.mac.serdes_has_link; break; /* VF device is type_unknown */ case e1000_media_type_unknown: e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; /* Fall thru */ default: break; } /* Check for thermal downshift or shutdown */ if (hw->mac.type == e1000_i350) { thstat = E1000_READ_REG(hw, E1000_THSTAT); ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); } /* Get the flow control for display */ switch (fc->current_mode) { case e1000_fc_rx_pause: flowctl = "RX"; break; case e1000_fc_tx_pause: flowctl = "TX"; break; case e1000_fc_full: flowctl = "Full"; break; case e1000_fc_none: default: flowctl = "None"; break; } /* Now we check if a transition has happened */ if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(&adapter->hw, &adapter->link_speed, &adapter->link_duplex); if (bootverbose) device_printf(dev, "Link is up %d Mbps %s," " Flow Control: %s\n", adapter->link_speed, ((adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex"), flowctl); adapter->link_active = 1; ifp->if_baudrate = adapter->link_speed * 1000000; if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && (thstat & E1000_THSTAT_LINK_THROTTLE)) device_printf(dev, "Link: thermal downshift\n"); /* Delay Link Up for Phy update */ if (((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) && (hw->phy.id == I210_I_PHY_ID)) msec_delay(I210_LINK_DELAY); /* Reset if the media type changed. */ if (hw->dev_spec._82575.media_changed) { hw->dev_spec._82575.media_changed = false; adapter->flags |= IGB_MEDIA_RESET; igb_reset(adapter); } /* This can sleep */ if_link_state_change(ifp, LINK_STATE_UP); } else if (!link_check && (adapter->link_active == 1)) { ifp->if_baudrate = adapter->link_speed = 0; adapter->link_duplex = 0; if (bootverbose) device_printf(dev, "Link is Down\n"); if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && (thstat & E1000_THSTAT_PWR_DOWN)) device_printf(dev, "Link: thermal shutdown\n"); adapter->link_active = 0; /* This can sleep */ if_link_state_change(ifp, LINK_STATE_DOWN); /* Reset queue state */ for (int i = 0; i < adapter->num_queues; i++, txr++) txr->queue_status = IGB_QUEUE_IDLE; } } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void igb_stop(void *arg) { struct adapter *adapter = arg; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; IGB_CORE_LOCK_ASSERT(adapter); INIT_DEBUGOUT("igb_stop: begin"); igb_disable_intr(adapter); callout_stop(&adapter->timer); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ifp->if_drv_flags |= IFF_DRV_OACTIVE; /* Disarm watchdog timer. */ for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); txr->queue_status = IGB_QUEUE_IDLE; IGB_TX_UNLOCK(txr); } e1000_reset_hw(&adapter->hw); E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void igb_identify_hardware(struct adapter *adapter) { device_t dev = adapter->dev; /* Make sure our PCI config space has the necessary stuff set */ pci_enable_busmaster(dev); adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Set MAC type early for PCI setup */ e1000_set_mac_type(&adapter->hw); /* Are we a VF device? */ if ((adapter->hw.mac.type == e1000_vfadapt) || (adapter->hw.mac.type == e1000_vfadapt_i350)) adapter->vf_ifp = 1; else adapter->vf_ifp = 0; } static int igb_allocate_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; int rid; rid = PCIR_BAR(0); adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->pci_mem == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->pci_mem); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->pci_mem); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; adapter->num_queues = 1; /* Defaults for Legacy or MSI */ /* This will setup either MSI/X or MSI */ adapter->msix = igb_setup_msix(adapter); adapter->hw.back = &adapter->osdep; return (0); } /********************************************************************* * * Setup the Legacy or MSI Interrupt handler * **********************************************************************/ static int igb_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; struct igb_queue *que = adapter->queues; #ifndef IGB_LEGACY_TX struct tx_ring *txr = adapter->tx_rings; #endif int error, rid = 0; /* Turn off all interrupts */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); /* MSI RID is 1 */ if (adapter->msix == 1) rid = 1; /* We allocate a single interrupt resource */ adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "interrupt\n"); return (ENXIO); } #ifndef IGB_LEGACY_TX TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr); #endif /* * Try allocating a fast interrupt and the associated deferred * processing contexts. */ TASK_INIT(&que->que_task, 0, igb_handle_que, que); /* Make tasklet for deferred link handling */ TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter); que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq", device_get_nameunit(adapter->dev)); if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL, adapter, &adapter->tag)) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); taskqueue_free(que->tq); que->tq = NULL; return (error); } return (0); } /********************************************************************* * * Setup the MSIX Queue Interrupt handlers: * **********************************************************************/ static int igb_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; struct igb_queue *que = adapter->queues; int error, rid, vector = 0; int cpu_id = 0; #ifdef RSS cpuset_t cpu_mask; #endif /* Be sure to start with all interrupts disabled */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); E1000_WRITE_FLUSH(&adapter->hw); #ifdef RSS /* * If we're doing RSS, the number of queues needs to * match the number of RSS buckets that are configured. * * + If there's more queues than RSS buckets, we'll end * up with queues that get no traffic. * * + If there's more RSS buckets than queues, we'll end * up having multiple RSS buckets map to the same queue, * so there'll be some contention. */ if (adapter->num_queues != rss_getnumbuckets()) { device_printf(dev, "%s: number of queues (%d) != number of RSS buckets (%d)" "; performance will be impacted.\n", __func__, adapter->num_queues, rss_getnumbuckets()); } #endif for (int i = 0; i < adapter->num_queues; i++, vector++, que++) { rid = vector +1; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (que->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "MSIX Queue Interrupt\n"); return (ENXIO); } error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_que, que, &que->tag); if (error) { que->res = NULL; device_printf(dev, "Failed to register Queue handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, que->res, que->tag, "que %d", i); #endif que->msix = vector; if (adapter->hw.mac.type == e1000_82575) que->eims = E1000_EICR_TX_QUEUE0 << i; else que->eims = 1 << vector; #ifdef RSS /* * The queue ID is used as the RSS layer bucket ID. * We look up the queue ID -> RSS CPU ID and select * that. */ cpu_id = rss_getcpu(i % rss_getnumbuckets()); #else /* * Bind the msix vector, and thus the * rings to the corresponding cpu. * * This just happens to match the default RSS round-robin * bucket -> queue -> CPU allocation. */ if (adapter->num_queues > 1) { if (igb_last_bind_cpu < 0) igb_last_bind_cpu = CPU_FIRST(); cpu_id = igb_last_bind_cpu; } #endif if (adapter->num_queues > 1) { bus_bind_intr(dev, que->res, cpu_id); #ifdef RSS device_printf(dev, "Bound queue %d to RSS bucket %d\n", i, cpu_id); #else device_printf(dev, "Bound queue %d to cpu %d\n", i, cpu_id); #endif } #ifndef IGB_LEGACY_TX TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, que->txr); #endif /* Make tasklet for deferred handling */ TASK_INIT(&que->que_task, 0, igb_handle_que, que); que->tq = taskqueue_create("igb_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); if (adapter->num_queues > 1) { /* * Only pin the taskqueue thread to a CPU if * RSS is in use. * * This again just happens to match the default RSS * round-robin bucket -> queue -> CPU allocation. */ #ifdef RSS CPU_SETOF(cpu_id, &cpu_mask); taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, &cpu_mask, "%s que (bucket %d)", device_get_nameunit(adapter->dev), cpu_id); #else taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que (qid %d)", device_get_nameunit(adapter->dev), cpu_id); #endif } else { taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); } /* Finally update the last bound CPU id */ if (adapter->num_queues > 1) igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu); } /* And Link */ rid = vector + 1; adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "MSIX Link Interrupt\n"); return (ENXIO); } if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_link, adapter, &adapter->tag)) != 0) { device_printf(dev, "Failed to register Link handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); #endif adapter->linkvec = vector; return (0); } static void igb_configure_queues(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct igb_queue *que; u32 tmp, ivar = 0, newitr = 0; /* First turn on RSS capability */ if (adapter->hw.mac.type != e1000_82575) E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | E1000_GPIE_PBA | E1000_GPIE_NSICR); /* Turn on MSIX */ switch (adapter->hw.mac.type) { case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: /* RX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i & 1) { ivar &= 0xFF00FFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 16; } else { ivar &= 0xFFFFFF00; ivar |= que->msix | E1000_IVAR_VALID; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); } /* TX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i & 1) { ivar &= 0x00FFFFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 24; } else { ivar &= 0xFFFF00FF; ivar |= (que->msix | E1000_IVAR_VALID) << 8; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82576: /* RX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i < 8) { ivar &= 0xFFFFFF00; ivar |= que->msix | E1000_IVAR_VALID; } else { ivar &= 0xFF00FFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 16; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= que->eims; } /* TX entries */ for (int i = 0; i < adapter->num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); que = &adapter->queues[i]; if (i < 8) { ivar &= 0xFFFF00FF; ivar |= (que->msix | E1000_IVAR_VALID) << 8; } else { ivar &= 0x00FFFFFF; ivar |= (que->msix | E1000_IVAR_VALID) << 24; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82575: /* enable MSI-X support*/ tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; /* Auto-Mask interrupts upon ICR read. */ tmp |= E1000_CTRL_EXT_EIAME; tmp |= E1000_CTRL_EXT_IRCA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); /* Queues */ for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; tmp = E1000_EICR_RX_QUEUE0 << i; tmp |= E1000_EICR_TX_QUEUE0 << i; que->eims = tmp; E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), i, que->eims); adapter->que_mask |= que->eims; } /* Link */ E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec), E1000_EIMS_OTHER); adapter->link_mask |= E1000_EIMS_OTHER; default: break; } /* Set the starting interrupt rate */ if (igb_max_interrupt_rate > 0) newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC; if (hw->mac.type == e1000_82575) newitr |= newitr << 16; else newitr |= E1000_EITR_CNT_IGNR; for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr); } return; } static void igb_free_pci_resources(struct adapter *adapter) { struct igb_queue *que = adapter->queues; device_t dev = adapter->dev; int rid; /* ** There is a slight possibility of a failure mode ** in attach that will result in entering this function ** before interrupt resources have been initialized, and ** in that case we do not want to execute the loops below ** We can detect this reliably by the state of the adapter ** res pointer. */ if (adapter->res == NULL) goto mem; /* * First release all the interrupt resources: */ for (int i = 0; i < adapter->num_queues; i++, que++) { rid = que->msix + 1; if (que->tag != NULL) { bus_teardown_intr(dev, que->res, que->tag); que->tag = NULL; } if (que->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); } /* Clean the Legacy or Link interrupt last */ if (adapter->linkvec) /* we are doing MSIX */ rid = adapter->linkvec + 1; else (adapter->msix != 0) ? (rid = 1):(rid = 0); que = adapter->queues; if (adapter->tag != NULL) { taskqueue_drain(que->tq, &adapter->link_task); bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); for (int i = 0; i < adapter->num_queues; i++, que++) { if (que->tq != NULL) { #ifndef IGB_LEGACY_TX taskqueue_drain(que->tq, &que->txr->txq_task); #endif taskqueue_drain(que->tq, &que->que_task); taskqueue_free(que->tq); } } mem: if (adapter->msix) pci_release_msi(dev); if (adapter->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, adapter->memrid, adapter->msix_mem); if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->pci_mem); } /* * Setup Either MSI/X or MSI */ static int igb_setup_msix(struct adapter *adapter) { device_t dev = adapter->dev; int bar, want, queues, msgs, maxqueues; /* tuneable override */ if (igb_enable_msix == 0) goto msi; /* First try MSI/X */ msgs = pci_msix_count(dev); if (msgs == 0) goto msi; /* ** Some new devices, as with ixgbe, now may ** use a different BAR, so we need to keep ** track of which is used. */ adapter->memrid = PCIR_BAR(IGB_MSIX_BAR); bar = pci_read_config(dev, adapter->memrid, 4); if (bar == 0) /* use next bar */ adapter->memrid += 4; adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE); if (adapter->msix_mem == NULL) { /* May not be enabled */ device_printf(adapter->dev, "Unable to map MSIX table \n"); goto msi; } queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus; /* Override via tuneable */ if (igb_num_queues != 0) queues = igb_num_queues; #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif /* Sanity check based on HW */ switch (adapter->hw.mac.type) { case e1000_82575: maxqueues = 4; break; case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: maxqueues = 8; break; case e1000_i210: maxqueues = 4; break; case e1000_i211: maxqueues = 2; break; default: /* VF interfaces */ maxqueues = 1; break; } /* Final clamp on the actual hardware capability */ if (queues > maxqueues) queues = maxqueues; /* ** One vector (RX/TX pair) per queue ** plus an additional for Link interrupt */ want = queues + 1; if (msgs >= want) msgs = want; else { device_printf(adapter->dev, "MSIX Configuration Problem, " "%d vectors configured, but %d queues wanted!\n", msgs, want); goto msi; } if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { device_printf(adapter->dev, "Using MSIX interrupts with %d vectors\n", msgs); adapter->num_queues = queues; return (msgs); } /* ** If MSIX alloc failed or provided us with ** less than needed, free and fall through to MSI */ pci_release_msi(dev); msi: if (adapter->msix_mem != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem); adapter->msix_mem = NULL; } msgs = 1; if (pci_alloc_msi(dev, &msgs) == 0) { device_printf(adapter->dev," Using an MSI interrupt\n"); return (msgs); } device_printf(adapter->dev," Using a Legacy interrupt\n"); return (0); } /********************************************************************* * * Initialize the DMA Coalescing feature * **********************************************************************/ static void igb_init_dmac(struct adapter *adapter, u32 pba) { device_t dev = adapter->dev; struct e1000_hw *hw = &adapter->hw; u32 dmac, reg = ~E1000_DMACR_DMAC_EN; u16 hwm; if (hw->mac.type == e1000_i211) return; if (hw->mac.type > e1000_82580) { if (adapter->dmac == 0) { /* Disabling it */ E1000_WRITE_REG(hw, E1000_DMACR, reg); return; } else device_printf(dev, "DMA Coalescing enabled\n"); /* Set starting threshold */ E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); hwm = 64 * pba - adapter->max_frame_size / 16; if (hwm < 64 * (pba - 6)) hwm = 64 * (pba - 6); reg = E1000_READ_REG(hw, E1000_FCRTC); reg &= ~E1000_FCRTC_RTH_COAL_MASK; reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) & E1000_FCRTC_RTH_COAL_MASK); E1000_WRITE_REG(hw, E1000_FCRTC, reg); dmac = pba - adapter->max_frame_size / 512; if (dmac < pba - 10) dmac = pba - 10; reg = E1000_READ_REG(hw, E1000_DMACR); reg &= ~E1000_DMACR_DMACTHR_MASK; reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT) & E1000_DMACR_DMACTHR_MASK); /* transition to L0x or L1 if available..*/ reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); /* Check if status is 2.5Gb backplane connection * before configuration of watchdog timer, which is * in msec values in 12.8usec intervals * watchdog timer= msec values in 32usec intervals * for non 2.5Gb connection */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= ((adapter->dmac * 5) >> 6); else reg |= (adapter->dmac >> 5); } else { reg |= (adapter->dmac >> 5); } E1000_WRITE_REG(hw, E1000_DMACR, reg); E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); /* Set the interval before transition */ reg = E1000_READ_REG(hw, E1000_DMCTLX); if (hw->mac.type == e1000_i350) reg |= IGB_DMCTLX_DCFLUSH_DIS; /* ** in 2.5Gb connection, TTLX unit is 0.4 usec ** which is 0x4*2 = 0xA. But delay is still 4 usec */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= 0xA; else reg |= 0x4; } else { reg |= 0x4; } E1000_WRITE_REG(hw, E1000_DMCTLX, reg); /* free space in tx packet buffer to wake from DMA coal */ E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE - (2 * adapter->max_frame_size)) >> 6); /* make low power state decision controlled by DMA coal */ reg = E1000_READ_REG(hw, E1000_PCIEMISC); reg &= ~E1000_PCIEMISC_LX_DECISION; E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); } else if (hw->mac.type == e1000_82580) { u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); E1000_WRITE_REG(hw, E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION); E1000_WRITE_REG(hw, E1000_DMACR, 0); } } /********************************************************************* * * Set up an fresh starting state * **********************************************************************/ static void igb_reset(struct adapter *adapter) { device_t dev = adapter->dev; struct e1000_hw *hw = &adapter->hw; struct e1000_fc_info *fc = &hw->fc; struct ifnet *ifp = adapter->ifp; u32 pba = 0; u16 hwm; INIT_DEBUGOUT("igb_reset: begin"); /* Let the firmware know the OS is in control */ igb_get_hw_control(adapter); /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. */ switch (hw->mac.type) { case e1000_82575: pba = E1000_PBA_32K; break; case e1000_82576: case e1000_vfadapt: pba = E1000_READ_REG(hw, E1000_RXPBS); pba &= E1000_RXPBS_SIZE_MASK_82576; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_vfadapt_i350: pba = E1000_READ_REG(hw, E1000_RXPBS); pba = e1000_rxpbs_adjust_82580(pba); break; case e1000_i210: case e1000_i211: pba = E1000_PBA_34K; default: break; } /* Special needs in case of Jumbo frames */ if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) { u32 tx_space, min_tx, min_rx; pba = E1000_READ_REG(hw, E1000_PBA); tx_space = pba >> 16; pba &= 0xffff; min_tx = (adapter->max_frame_size + sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2; min_tx = roundup2(min_tx, 1024); min_tx >>= 10; min_rx = adapter->max_frame_size; min_rx = roundup2(min_rx, 1024); min_rx >>= 10; if (tx_space < min_tx && ((min_tx - tx_space) < pba)) { pba = pba - (min_tx - tx_space); /* * if short on rx space, rx wins * and must trump tx adjustment */ if (pba < min_rx) pba = min_rx; } E1000_WRITE_REG(hw, E1000_PBA, pba); } INIT_DEBUGOUT1("igb_init: pba=%dK",pba); /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. */ hwm = min(((pba << 10) * 9 / 10), ((pba << 10) - 2 * adapter->max_frame_size)); if (hw->mac.type < e1000_82576) { fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ fc->low_water = fc->high_water - 8; } else { fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ fc->low_water = fc->high_water - 16; } fc->pause_time = IGB_FC_PAUSE_TIME; fc->send_xon = TRUE; if (adapter->fc) fc->requested_mode = adapter->fc; else fc->requested_mode = e1000_fc_default; /* Issue a global reset */ e1000_reset_hw(hw); E1000_WRITE_REG(hw, E1000_WUC, 0); /* Reset for AutoMediaDetect */ if (adapter->flags & IGB_MEDIA_RESET) { e1000_setup_init_funcs(hw, TRUE); e1000_get_bus_info(hw); adapter->flags &= ~IGB_MEDIA_RESET; } if (e1000_init_hw(hw) < 0) device_printf(dev, "Hardware Initialization Failed\n"); /* Setup DMA Coalescing */ igb_init_dmac(adapter, pba); E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); return; } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int igb_setup_interface(device_t dev, struct adapter *adapter) { struct ifnet *ifp; INIT_DEBUGOUT("igb_setup_interface: begin"); ifp = adapter->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_init = igb_init; ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = igb_ioctl; ifp->if_get_counter = igb_get_counter; /* TSO parameters */ ifp->if_hw_tsomax = IP_MAXPACKET; ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER; ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE; #ifndef IGB_LEGACY_TX ifp->if_transmit = igb_mq_start; ifp->if_qflush = igb_qflush; #else ifp->if_start = igb_start; IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1); ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1; IFQ_SET_READY(&ifp->if_snd); #endif ether_ifattach(ifp, adapter->hw.mac.addr); ifp->if_capabilities = ifp->if_capenable = 0; ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; #if __FreeBSD_version >= 1000000 ifp->if_capabilities |= IFCAP_HWCSUM_IPV6; #endif ifp->if_capabilities |= IFCAP_TSO; ifp->if_capabilities |= IFCAP_JUMBO_MTU; ifp->if_capenable = ifp->if_capabilities; /* Don't enable LRO by default */ ifp->if_capabilities |= IFCAP_LRO; #ifdef DEVICE_POLLING ifp->if_capabilities |= IFCAP_POLLING; #endif /* * Tell the upper layer(s) we * support full VLAN capability. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; /* ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the igb driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, igb_media_change, igb_media_status); if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); } else { ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } /* * Manage DMA'able memory. */ static void igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs[0].ds_addr; } static int igb_dma_malloc(struct adapter *adapter, bus_size_t size, struct igb_dma_alloc *dma, int mapflags) { int error; error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ IGB_DBA_ALIGN, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->dma_tag); if (error) { device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n", __func__, error); goto fail_0; } error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); if (error) { device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, error); goto fail_2; } dma->dma_paddr = 0; error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (error || dma->dma_paddr == 0) { device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n", __func__, error); goto fail_3; } return (0); fail_3: bus_dmamap_unload(dma->dma_tag, dma->dma_map); fail_2: bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); fail_0: dma->dma_tag = NULL; return (error); } static void igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma) { if (dma->dma_tag == NULL) return; if (dma->dma_paddr != 0) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); dma->dma_paddr = 0; } if (dma->dma_vaddr != NULL) { bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); dma->dma_vaddr = NULL; } bus_dma_tag_destroy(dma->dma_tag); dma->dma_tag = NULL; } /********************************************************************* * * Allocate memory for the transmit and receive rings, and then * the descriptors associated with each, called only once at attach. * **********************************************************************/ static int igb_allocate_queues(struct adapter *adapter) { device_t dev = adapter->dev; struct igb_queue *que = NULL; struct tx_ring *txr = NULL; struct rx_ring *rxr = NULL; int rsize, tsize, error = E1000_SUCCESS; int txconf = 0, rxconf = 0; /* First allocate the top level queue structs */ if (!(adapter->queues = (struct igb_queue *) malloc(sizeof(struct igb_queue) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate queue memory\n"); error = ENOMEM; goto fail; } /* Next allocate the TX ring struct memory */ if (!(adapter->tx_rings = (struct tx_ring *) malloc(sizeof(struct tx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); error = ENOMEM; goto tx_fail; } /* Now allocate the RX */ if (!(adapter->rx_rings = (struct rx_ring *) malloc(sizeof(struct rx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); error = ENOMEM; goto rx_fail; } tsize = roundup2(adapter->num_tx_desc * sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN); /* * Now set up the TX queues, txconf is needed to handle the * possibility that things fail midcourse and we need to * undo memory gracefully */ for (int i = 0; i < adapter->num_queues; i++, txconf++) { /* Set up some basics */ txr = &adapter->tx_rings[i]; txr->adapter = adapter; txr->me = i; txr->num_desc = adapter->num_tx_desc; /* Initialize the TX lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); if (igb_dma_malloc(adapter, tsize, &txr->txdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto err_tx_desc; } txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr; bzero((void *)txr->tx_base, tsize); /* Now allocate transmit buffers for the ring */ if (igb_allocate_transmit_buffers(txr)) { device_printf(dev, "Critical Failure setting up transmit buffers\n"); error = ENOMEM; goto err_tx_desc; } #ifndef IGB_LEGACY_TX /* Allocate a buf ring */ txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF, M_WAITOK, &txr->tx_mtx); #endif } /* * Next the RX queues... */ rsize = roundup2(adapter->num_rx_desc * sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); for (int i = 0; i < adapter->num_queues; i++, rxconf++) { rxr = &adapter->rx_rings[i]; rxr->adapter = adapter; rxr->me = i; /* Initialize the RX lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), txr->me); mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); if (igb_dma_malloc(adapter, rsize, &rxr->rxdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate RxDescriptor memory\n"); error = ENOMEM; goto err_rx_desc; } rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr; bzero((void *)rxr->rx_base, rsize); /* Allocate receive buffers for the ring*/ if (igb_allocate_receive_buffers(rxr)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); error = ENOMEM; goto err_rx_desc; } } /* ** Finally set up the queue holding structs */ for (int i = 0; i < adapter->num_queues; i++) { que = &adapter->queues[i]; que->adapter = adapter; que->txr = &adapter->tx_rings[i]; que->rxr = &adapter->rx_rings[i]; } return (0); err_rx_desc: for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) igb_dma_free(adapter, &rxr->rxdma); err_tx_desc: for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) igb_dma_free(adapter, &txr->txdma); free(adapter->rx_rings, M_DEVBUF); rx_fail: #ifndef IGB_LEGACY_TX buf_ring_free(txr->br, M_DEVBUF); #endif free(adapter->tx_rings, M_DEVBUF); tx_fail: free(adapter->queues, M_DEVBUF); fail: return (error); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int igb_allocate_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; device_t dev = adapter->dev; struct igb_tx_buf *txbuf; int error, i; /* * Setup DMA descriptor areas. */ if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ IGB_TSO_SIZE, /* maxsize */ IGB_MAX_SCATTER, /* nsegments */ PAGE_SIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->txtag))) { device_printf(dev,"Unable to allocate TX DMA tag\n"); goto fail; } if (!(txr->tx_buffers = (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); if (error != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } return 0; fail: /* We free all, it handles case where we are in the middle */ igb_free_transmit_structures(adapter); return (error); } /********************************************************************* * * Initialize a transmit ring. * **********************************************************************/ static void igb_setup_transmit_ring(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct igb_tx_buf *txbuf; int i; #ifdef DEV_NETMAP struct netmap_adapter *na = NA(adapter->ifp); struct netmap_slot *slot; #endif /* DEV_NETMAP */ /* Clear the old descriptor contents */ IGB_TX_LOCK(txr); #ifdef DEV_NETMAP slot = netmap_reset(na, NR_TX, txr->me, 0); #endif /* DEV_NETMAP */ bzero((void *)txr->tx_base, (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc); /* Reset indices */ txr->next_avail_desc = 0; txr->next_to_clean = 0; /* Free any existing tx buffers. */ txbuf = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { if (txbuf->m_head != NULL) { bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, txbuf->map); m_freem(txbuf->m_head); txbuf->m_head = NULL; } #ifdef DEV_NETMAP if (slot) { int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); /* no need to set the address */ netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si)); } #endif /* DEV_NETMAP */ /* clear the watch index */ txbuf->eop = NULL; } /* Set number of descriptors available */ txr->tx_avail = adapter->num_tx_desc; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); IGB_TX_UNLOCK(txr); } /********************************************************************* * * Initialize all transmit rings. * **********************************************************************/ static void igb_setup_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) igb_setup_transmit_ring(txr); return; } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void igb_initialize_transmit_units(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct e1000_hw *hw = &adapter->hw; u32 tctl, txdctl; INIT_DEBUGOUT("igb_initialize_transmit_units: begin"); tctl = txdctl = 0; /* Setup the Tx Descriptor Rings */ for (int i = 0; i < adapter->num_queues; i++, txr++) { u64 bus_addr = txr->txdma.dma_paddr; E1000_WRITE_REG(hw, E1000_TDLEN(i), adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(hw, E1000_TDBAH(i), (uint32_t)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr); /* Setup the HW Tx Head and Tail descriptor pointers */ E1000_WRITE_REG(hw, E1000_TDT(i), 0); E1000_WRITE_REG(hw, E1000_TDH(i), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(hw, E1000_TDBAL(i)), E1000_READ_REG(hw, E1000_TDLEN(i))); txr->queue_status = IGB_QUEUE_IDLE; txdctl |= IGB_TX_PTHRESH; txdctl |= IGB_TX_HTHRESH << 8; txdctl |= IGB_TX_WTHRESH << 16; txdctl |= E1000_TXDCTL_QUEUE_ENABLE; E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); } if (adapter->vf_ifp) return; e1000_config_collision_dist(hw); /* Program the Transmit Control Register */ tctl = E1000_READ_REG(hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(hw, E1000_TCTL, tctl); } /********************************************************************* * * Free all transmit rings. * **********************************************************************/ static void igb_free_transmit_structures(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) { IGB_TX_LOCK(txr); igb_free_transmit_buffers(txr); igb_dma_free(adapter, &txr->txdma); IGB_TX_UNLOCK(txr); IGB_TX_LOCK_DESTROY(txr); } free(adapter->tx_rings, M_DEVBUF); } /********************************************************************* * * Free transmit ring related data structures. * **********************************************************************/ static void igb_free_transmit_buffers(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct igb_tx_buf *tx_buffer; int i; INIT_DEBUGOUT("free_transmit_ring: begin"); if (txr->tx_buffers == NULL) return; tx_buffer = txr->tx_buffers; for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { if (tx_buffer->m_head != NULL) { bus_dmamap_sync(txr->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; if (tx_buffer->map != NULL) { bus_dmamap_destroy(txr->txtag, tx_buffer->map); tx_buffer->map = NULL; } } else if (tx_buffer->map != NULL) { bus_dmamap_unload(txr->txtag, tx_buffer->map); bus_dmamap_destroy(txr->txtag, tx_buffer->map); tx_buffer->map = NULL; } } #ifndef IGB_LEGACY_TX if (txr->br != NULL) buf_ring_free(txr->br, M_DEVBUF); #endif if (txr->tx_buffers != NULL) { free(txr->tx_buffers, M_DEVBUF); txr->tx_buffers = NULL; } if (txr->txtag != NULL) { bus_dma_tag_destroy(txr->txtag); txr->txtag = NULL; } return; } /********************************************************************** * * Setup work for hardware segmentation offload (TSO) on * adapters using advanced tx descriptors * **********************************************************************/ static int igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len, u32 *olinfo_status) { struct adapter *adapter = txr->adapter; struct e1000_adv_tx_context_desc *TXD; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; u32 mss_l4len_idx = 0, paylen; u16 vtag = 0, eh_type; int ctxd, ehdrlen, ip_hlen, tcp_hlen; struct ether_vlan_header *eh; #ifdef INET6 struct ip6_hdr *ip6; #endif #ifdef INET struct ip *ip; #endif struct tcphdr *th; /* * Determine where frame payload starts. * Jump over vlan headers if already present */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; eh_type = eh->evl_proto; } else { ehdrlen = ETHER_HDR_LEN; eh_type = eh->evl_encap_proto; } switch (ntohs(eh_type)) { #ifdef INET6 case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); /* XXX-BZ For now we do not pretend to support ext. hdrs. */ if (ip6->ip6_nxt != IPPROTO_TCP) return (ENXIO); ip_hlen = sizeof(struct ip6_hdr); ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; #endif #ifdef INET case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); if (ip->ip_p != IPPROTO_TCP) return (ENXIO); ip->ip_sum = 0; ip_hlen = ip->ip_hl << 2; th = (struct tcphdr *)((caddr_t)ip + ip_hlen); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; /* Tell transmit desc to also do IPv4 checksum. */ *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; break; #endif default: panic("%s: CSUM_TSO but no supported IP version (0x%04x)", __func__, ntohs(eh_type)); break; } ctxd = txr->next_avail_desc; TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; tcp_hlen = th->th_off << 2; /* This is used in the transmit desc in encap */ paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; /* VLAN MACLEN IPLEN */ if (mp->m_flags & M_VLANTAG) { vtag = htole16(mp->m_pkthdr.ether_vtag); vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); } vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= ip_hlen; TXD->vlan_macip_lens = htole32(vlan_macip_lens); /* ADV DTYPE TUCMD */ type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); /* MSS L4LEN IDX */ mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT); mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT); /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) mss_l4len_idx |= txr->me << 4; TXD->mss_l4len_idx = htole32(mss_l4len_idx); TXD->seqnum_seed = htole32(0); if (++ctxd == txr->num_desc) ctxd = 0; txr->tx_avail--; txr->next_avail_desc = ctxd; *cmd_type_len |= E1000_ADVTXD_DCMD_TSE; *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT; ++txr->tso_tx; return (0); } /********************************************************************* * * Advanced Context Descriptor setup for VLAN, CSUM or TSO * **********************************************************************/ static int igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len, u32 *olinfo_status) { struct e1000_adv_tx_context_desc *TXD; struct adapter *adapter = txr->adapter; struct ether_vlan_header *eh; struct ip *ip; struct ip6_hdr *ip6; u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0; int ehdrlen, ip_hlen = 0; u16 etype; u8 ipproto = 0; int offload = TRUE; int ctxd = txr->next_avail_desc; u16 vtag = 0; /* First check if TSO is to be used */ if (mp->m_pkthdr.csum_flags & CSUM_TSO) return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status)); if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) offload = FALSE; /* Indicate the whole packet as payload when not doing TSO */ *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT; /* Now ready a context descriptor */ TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; /* ** In advanced descriptors the vlan tag must ** be placed into the context descriptor. Hence ** we need to make one even if not doing offloads. */ if (mp->m_flags & M_VLANTAG) { vtag = htole16(mp->m_pkthdr.ether_vtag); vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); } else if (offload == FALSE) /* ... no offload to do */ return (0); /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } /* Set the ether header length */ vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; switch (etype) { case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); ip_hlen = ip->ip_hl << 2; ipproto = ip->ip_p; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); ip_hlen = sizeof(struct ip6_hdr); /* XXX-BZ this will go badly in case of ext hdrs. */ ipproto = ip6->ip6_nxt; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; default: offload = FALSE; break; } vlan_macip_lens |= ip_hlen; type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; switch (ipproto) { case IPPROTO_TCP: #if __FreeBSD_version >= 1000000 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) #else if (mp->m_pkthdr.csum_flags & CSUM_TCP) #endif type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; break; case IPPROTO_UDP: #if __FreeBSD_version >= 1000000 if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) #else if (mp->m_pkthdr.csum_flags & CSUM_UDP) #endif type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; break; #if __FreeBSD_version >= 800000 case IPPROTO_SCTP: #if __FreeBSD_version >= 1000000 if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) #else if (mp->m_pkthdr.csum_flags & CSUM_SCTP) #endif type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP; break; #endif default: offload = FALSE; break; } if (offload) /* For the TX descriptor setup */ *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) mss_l4len_idx = txr->me << 4; /* Now copy bits into descriptor */ TXD->vlan_macip_lens = htole32(vlan_macip_lens); TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); TXD->seqnum_seed = htole32(0); TXD->mss_l4len_idx = htole32(mss_l4len_idx); /* We've consumed the first desc, adjust counters */ if (++ctxd == txr->num_desc) ctxd = 0; txr->next_avail_desc = ctxd; --txr->tx_avail; return (0); } /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * * TRUE return means there's work in the ring to clean, FALSE its empty. **********************************************************************/ static bool igb_txeof(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; #ifdef DEV_NETMAP struct ifnet *ifp = adapter->ifp; #endif /* DEV_NETMAP */ u32 work, processed = 0; int limit = adapter->tx_process_limit; struct igb_tx_buf *buf; union e1000_adv_tx_desc *txd; mtx_assert(&txr->tx_mtx, MA_OWNED); #ifdef DEV_NETMAP if (netmap_tx_irq(ifp, txr->me)) return (FALSE); #endif /* DEV_NETMAP */ if (txr->tx_avail == txr->num_desc) { txr->queue_status = IGB_QUEUE_IDLE; return FALSE; } /* Get work starting point */ work = txr->next_to_clean; buf = &txr->tx_buffers[work]; txd = &txr->tx_base[work]; work -= txr->num_desc; /* The distance to ring end */ bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); do { union e1000_adv_tx_desc *eop = buf->eop; if (eop == NULL) /* No work */ break; if ((eop->wb.status & E1000_TXD_STAT_DD) == 0) break; /* I/O not complete */ if (buf->m_head) { txr->bytes += buf->m_head->m_pkthdr.len; bus_dmamap_sync(txr->txtag, buf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, buf->map); m_freem(buf->m_head); buf->m_head = NULL; } buf->eop = NULL; ++txr->tx_avail; /* We clean the range if multi segment */ while (txd != eop) { ++txd; ++buf; ++work; /* wrap the ring? */ if (__predict_false(!work)) { work -= txr->num_desc; buf = txr->tx_buffers; txd = txr->tx_base; } if (buf->m_head) { txr->bytes += buf->m_head->m_pkthdr.len; bus_dmamap_sync(txr->txtag, buf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->txtag, buf->map); m_freem(buf->m_head); buf->m_head = NULL; } ++txr->tx_avail; buf->eop = NULL; } ++txr->packets; ++processed; txr->watchdog_time = ticks; /* Try the next packet */ ++txd; ++buf; ++work; /* reset with a wrap */ if (__predict_false(!work)) { work -= txr->num_desc; buf = txr->tx_buffers; txd = txr->tx_base; } prefetch(txd); } while (__predict_true(--limit)); bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); work += txr->num_desc; txr->next_to_clean = work; /* ** Watchdog calculation, we know there's ** work outstanding or the first return ** would have been taken, so none processed ** for too long indicates a hang. */ if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG)) txr->queue_status |= IGB_QUEUE_HUNG; if (txr->tx_avail >= IGB_QUEUE_THRESHOLD) txr->queue_status &= ~IGB_QUEUE_DEPLETED; if (txr->tx_avail == txr->num_desc) { txr->queue_status = IGB_QUEUE_IDLE; return (FALSE); } return (TRUE); } /********************************************************************* * * Refresh mbuf buffers for RX descriptor rings * - now keeps its own state so discards due to resource * exhaustion are unnecessary, if an mbuf cannot be obtained * it just returns, keeping its placeholder, thus it can simply * be recalled to try again. * **********************************************************************/ static void igb_refresh_mbufs(struct rx_ring *rxr, int limit) { struct adapter *adapter = rxr->adapter; bus_dma_segment_t hseg[1]; bus_dma_segment_t pseg[1]; struct igb_rx_buf *rxbuf; struct mbuf *mh, *mp; int i, j, nsegs, error; bool refreshed = FALSE; i = j = rxr->next_to_refresh; /* ** Get one descriptor beyond ** our work mark to control ** the loop. */ if (++j == adapter->num_rx_desc) j = 0; while (j != limit) { rxbuf = &rxr->rx_buffers[i]; /* No hdr mbuf used with header split off */ if (rxr->hdr_split == FALSE) goto no_split; if (rxbuf->m_head == NULL) { mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) goto update; } else mh = rxbuf->m_head; mh->m_pkthdr.len = mh->m_len = MHLEN; mh->m_len = MHLEN; mh->m_flags |= M_PKTHDR; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->htag, rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("Refresh mbufs: hdr dmamap load" " failure - %d\n", error); m_free(mh); rxbuf->m_head = NULL; goto update; } rxbuf->m_head = mh; bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_PREREAD); rxr->rx_base[i].read.hdr_addr = htole64(hseg[0].ds_addr); no_split: if (rxbuf->m_pack == NULL) { mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (mp == NULL) goto update; } else mp = rxbuf->m_pack; mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("Refresh mbufs: payload dmamap load" " failure - %d\n", error); m_free(mp); rxbuf->m_pack = NULL; goto update; } rxbuf->m_pack = mp; bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); rxr->rx_base[i].read.pkt_addr = htole64(pseg[0].ds_addr); refreshed = TRUE; /* I feel wefreshed :) */ i = j; /* our next is precalculated */ rxr->next_to_refresh = i; if (++j == adapter->num_rx_desc) j = 0; } update: if (refreshed) /* update tail */ E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->next_to_refresh); return; } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int igb_allocate_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; device_t dev = adapter->dev; struct igb_rx_buf *rxbuf; int i, bsize, error; bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc; if (!(rxr->rx_buffers = (struct igb_rx_buf *) malloc(bsize, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); error = ENOMEM; goto fail; } if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MSIZE, /* maxsize */ 1, /* nsegments */ MSIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &rxr->htag))) { device_printf(dev, "Unable to create RX DMA tag\n"); goto fail; } if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &rxr->ptag))) { device_printf(dev, "Unable to create RX payload DMA tag\n"); goto fail; } for (i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap); if (error) { device_printf(dev, "Unable to create RX head DMA maps\n"); goto fail; } error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); if (error) { device_printf(dev, "Unable to create RX packet DMA maps\n"); goto fail; } } return (0); fail: /* Frees all, but can handle partial completion */ igb_free_receive_structures(adapter); return (error); } static void igb_free_receive_ring(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct igb_rx_buf *rxbuf; for (int i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->htag, rxbuf->hmap); rxbuf->m_head->m_flags |= M_PKTHDR; m_freem(rxbuf->m_head); } if (rxbuf->m_pack != NULL) { bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->m_pack->m_flags |= M_PKTHDR; m_freem(rxbuf->m_pack); } rxbuf->m_head = NULL; rxbuf->m_pack = NULL; } } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int igb_setup_receive_ring(struct rx_ring *rxr) { struct adapter *adapter; struct ifnet *ifp; device_t dev; struct igb_rx_buf *rxbuf; bus_dma_segment_t pseg[1], hseg[1]; struct lro_ctrl *lro = &rxr->lro; int rsize, nsegs, error = 0; #ifdef DEV_NETMAP struct netmap_adapter *na = NA(rxr->adapter->ifp); struct netmap_slot *slot; #endif /* DEV_NETMAP */ adapter = rxr->adapter; dev = adapter->dev; ifp = adapter->ifp; /* Clear the ring contents */ IGB_RX_LOCK(rxr); #ifdef DEV_NETMAP slot = netmap_reset(na, NR_RX, rxr->me, 0); #endif /* DEV_NETMAP */ rsize = roundup2(adapter->num_rx_desc * sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); bzero((void *)rxr->rx_base, rsize); /* ** Free current RX buffer structures and their mbufs */ igb_free_receive_ring(rxr); /* Configure for header split? */ if (igb_header_split) rxr->hdr_split = TRUE; /* Now replenish the ring mbufs */ for (int j = 0; j < adapter->num_rx_desc; ++j) { struct mbuf *mh, *mp; rxbuf = &rxr->rx_buffers[j]; #ifdef DEV_NETMAP if (slot) { /* slot sj is mapped to the j-th NIC-ring entry */ int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); uint64_t paddr; void *addr; addr = PNMB(na, slot + sj, &paddr); netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); /* Update descriptor */ rxr->rx_base[j].read.pkt_addr = htole64(paddr); continue; } #endif /* DEV_NETMAP */ if (rxr->hdr_split == FALSE) goto skip_head; /* First the header */ rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA); if (rxbuf->m_head == NULL) { error = ENOBUFS; goto fail; } m_adj(rxbuf->m_head, ETHER_ALIGN); mh = rxbuf->m_head; mh->m_len = mh->m_pkthdr.len = MHLEN; mh->m_flags |= M_PKTHDR; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->htag, rxbuf->hmap, rxbuf->m_head, hseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) /* Nothing elegant to do here */ goto fail; bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_PREREAD); /* Update descriptor */ rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr); skip_head: /* Now the payload cluster */ rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (rxbuf->m_pack == NULL) { error = ENOBUFS; goto fail; } mp = rxbuf->m_pack; mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) goto fail; bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); /* Update descriptor */ rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr); } /* Setup our descriptor indices */ rxr->next_to_check = 0; rxr->next_to_refresh = adapter->num_rx_desc - 1; rxr->lro_enabled = FALSE; rxr->rx_split_packets = 0; rxr->rx_bytes = 0; rxr->fmp = NULL; rxr->lmp = NULL; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* ** Now set up the LRO interface, we ** also only do head split when LRO ** is enabled, since so often they ** are undesirable in similar setups. */ if (ifp->if_capenable & IFCAP_LRO) { error = tcp_lro_init(lro); if (error) { device_printf(dev, "LRO Initialization failed!\n"); goto fail; } INIT_DEBUGOUT("RX LRO Initialized\n"); rxr->lro_enabled = TRUE; lro->ifp = adapter->ifp; } IGB_RX_UNLOCK(rxr); return (0); fail: igb_free_receive_ring(rxr); IGB_RX_UNLOCK(rxr); return (error); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int igb_setup_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; int i; for (i = 0; i < adapter->num_queues; i++, rxr++) if (igb_setup_receive_ring(rxr)) goto fail; return (0); fail: /* * Free RX buffers allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'i' is the endpoint. */ for (int j = 0; j < i; ++j) { rxr = &adapter->rx_rings[j]; IGB_RX_LOCK(rxr); igb_free_receive_ring(rxr); IGB_RX_UNLOCK(rxr); } return (ENOBUFS); } /* * Initialise the RSS mapping for NICs that support multiple transmit/ * receive rings. */ static void igb_initialise_rss_mapping(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; int i; int queue_id; u32 reta; u32 rss_key[10], mrqc, shift = 0; /* XXX? */ if (adapter->hw.mac.type == e1000_82575) shift = 6; /* * The redirection table controls which destination * queue each bucket redirects traffic to. * Each DWORD represents four queues, with the LSB * being the first queue in the DWORD. * * This just allocates buckets to queues using round-robin * allocation. * * NOTE: It Just Happens to line up with the default * RSS allocation method. */ /* Warning FM follows */ reta = 0; for (i = 0; i < 128; i++) { #ifdef RSS queue_id = rss_get_indirection_to_bucket(i); /* * If we have more queues than buckets, we'll * end up mapping buckets to a subset of the * queues. * * If we have more buckets than queues, we'll * end up instead assigning multiple buckets * to queues. * * Both are suboptimal, but we need to handle * the case so we don't go out of bounds * indexing arrays and such. */ queue_id = queue_id % adapter->num_queues; #else queue_id = (i % adapter->num_queues); #endif /* Adjust if required */ queue_id = queue_id << shift; /* * The low 8 bits are for hash value (n+0); * The next 8 bits are for hash value (n+1), etc. */ reta = reta >> 8; reta = reta | ( ((uint32_t) queue_id) << 24); if ((i & 3) == 3) { E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta); reta = 0; } } /* Now fill in hash table */ /* * MRQC: Multiple Receive Queues Command * Set queuing to RSS control, number depends on the device. */ mrqc = E1000_MRQC_ENABLE_RSS_8Q; #ifdef RSS /* XXX ew typecasting */ rss_getkey((uint8_t *) &rss_key); #else arc4rand(&rss_key, sizeof(rss_key), 0); #endif for (i = 0; i < 10; i++) E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key[i]); /* * Configure the RSS fields to hash upon. */ mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV4_TCP); mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 | E1000_MRQC_RSS_FIELD_IPV6_TCP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP | E1000_MRQC_RSS_FIELD_IPV6_UDP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); E1000_WRITE_REG(hw, E1000_MRQC, mrqc); } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void igb_initialize_receive_units(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; struct ifnet *ifp = adapter->ifp; struct e1000_hw *hw = &adapter->hw; u32 rctl, rxcsum, psize, srrctl = 0; INIT_DEBUGOUT("igb_initialize_receive_unit: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(hw, E1000_RCTL); E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); /* ** Set up for header split */ if (igb_header_split) { /* Use a standard mbuf for the header */ srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; } else srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; /* ** Set up for jumbo frames */ if (ifp->if_mtu > ETHERMTU) { rctl |= E1000_RCTL_LPE; if (adapter->rx_mbuf_sz == MJUMPAGESIZE) { srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) { srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; } /* Set maximum packet len */ psize = adapter->max_frame_size; /* are we on a vlan? */ if (adapter->ifp->if_vlantrunk != NULL) psize += VLAN_TAG_SIZE; E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); } else { rctl &= ~E1000_RCTL_LPE; srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_2048; } /* * If TX flow control is disabled and there's >1 queue defined, * enable DROP. * * This drops frames rather than hanging the RX MAC for all queues. */ if ((adapter->num_queues > 1) && (adapter->fc == e1000_fc_none || adapter->fc == e1000_fc_rx_pause)) { srrctl |= E1000_SRRCTL_DROP_EN; } /* Setup the Base and Length of the Rx Descriptor Rings */ for (int i = 0; i < adapter->num_queues; i++, rxr++) { u64 bus_addr = rxr->rxdma.dma_paddr; u32 rxdctl; E1000_WRITE_REG(hw, E1000_RDLEN(i), adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (uint32_t)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr); E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); /* Enable this Queue */ rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; rxdctl &= 0xFFF00000; rxdctl |= IGB_RX_PTHRESH; rxdctl |= IGB_RX_HTHRESH << 8; rxdctl |= IGB_RX_WTHRESH << 16; E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } /* ** Setup for RX MultiQueue */ rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); if (adapter->num_queues >1) { /* rss setup */ igb_initialise_rss_mapping(adapter); /* ** NOTE: Receive Full-Packet Checksum Offload ** is mutually exclusive with Multiqueue. However ** this is not the same as TCP/IP checksums which ** still work. */ rxcsum |= E1000_RXCSUM_PCSD; #if __FreeBSD_version >= 800000 /* For SCTP Offload */ if ((hw->mac.type != e1000_82575) && (ifp->if_capenable & IFCAP_RXCSUM)) rxcsum |= E1000_RXCSUM_CRCOFL; #endif } else { /* Non RSS setup */ if (ifp->if_capenable & IFCAP_RXCSUM) { rxcsum |= E1000_RXCSUM_IPPCSE; #if __FreeBSD_version >= 800000 if (adapter->hw.mac.type != e1000_82575) rxcsum |= E1000_RXCSUM_CRCOFL; #endif } else rxcsum &= ~E1000_RXCSUM_TUOFL; } E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Strip CRC bytes. */ rctl |= E1000_RCTL_SECRC; /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; /* Don't store bad packets */ rctl &= ~E1000_RCTL_SBP; /* Enable Receives */ E1000_WRITE_REG(hw, E1000_RCTL, rctl); /* * Setup the HW Rx Head and Tail Descriptor Pointers * - needs to be after enable */ for (int i = 0; i < adapter->num_queues; i++) { rxr = &adapter->rx_rings[i]; E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check); #ifdef DEV_NETMAP /* * an init() while a netmap client is active must * preserve the rx buffers passed to userspace. * In this driver it means we adjust RDT to * something different from next_to_refresh * (which is not used in netmap mode). */ if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->rx_rings[i]; int t = rxr->next_to_refresh - nm_kr_rxspace(kring); if (t >= adapter->num_rx_desc) t -= adapter->num_rx_desc; else if (t < 0) t += adapter->num_rx_desc; E1000_WRITE_REG(hw, E1000_RDT(i), t); } else #endif /* DEV_NETMAP */ E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh); } return; } /********************************************************************* * * Free receive rings. * **********************************************************************/ static void igb_free_receive_structures(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; for (int i = 0; i < adapter->num_queues; i++, rxr++) { struct lro_ctrl *lro = &rxr->lro; igb_free_receive_buffers(rxr); tcp_lro_free(lro); igb_dma_free(adapter, &rxr->rxdma); } free(adapter->rx_rings, M_DEVBUF); } /********************************************************************* * * Free receive ring data structures. * **********************************************************************/ static void igb_free_receive_buffers(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct igb_rx_buf *rxbuf; int i; INIT_DEBUGOUT("free_receive_structures: begin"); /* Cleanup any existing buffers */ if (rxr->rx_buffers != NULL) { for (i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->htag, rxbuf->hmap); rxbuf->m_head->m_flags |= M_PKTHDR; m_freem(rxbuf->m_head); } if (rxbuf->m_pack != NULL) { bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->m_pack->m_flags |= M_PKTHDR; m_freem(rxbuf->m_pack); } rxbuf->m_head = NULL; rxbuf->m_pack = NULL; if (rxbuf->hmap != NULL) { bus_dmamap_destroy(rxr->htag, rxbuf->hmap); rxbuf->hmap = NULL; } if (rxbuf->pmap != NULL) { bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); rxbuf->pmap = NULL; } } if (rxr->rx_buffers != NULL) { free(rxr->rx_buffers, M_DEVBUF); rxr->rx_buffers = NULL; } } if (rxr->htag != NULL) { bus_dma_tag_destroy(rxr->htag); rxr->htag = NULL; } if (rxr->ptag != NULL) { bus_dma_tag_destroy(rxr->ptag); rxr->ptag = NULL; } } static __inline void igb_rx_discard(struct rx_ring *rxr, int i) { struct igb_rx_buf *rbuf; rbuf = &rxr->rx_buffers[i]; /* Partially received? Free the chain */ if (rxr->fmp != NULL) { rxr->fmp->m_flags |= M_PKTHDR; m_freem(rxr->fmp); rxr->fmp = NULL; rxr->lmp = NULL; } /* ** With advanced descriptors the writeback ** clobbers the buffer addrs, so its easier ** to just free the existing mbufs and take ** the normal refresh path to get new buffers ** and mapping. */ if (rbuf->m_head) { m_free(rbuf->m_head); rbuf->m_head = NULL; bus_dmamap_unload(rxr->htag, rbuf->hmap); } if (rbuf->m_pack) { m_free(rbuf->m_pack); rbuf->m_pack = NULL; bus_dmamap_unload(rxr->ptag, rbuf->pmap); } return; } static __inline void igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) { /* * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet * should be computed by hardware. Also it should not have VLAN tag in * ethernet header. */ if (rxr->lro_enabled && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) == (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) && (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { /* * Send to the stack if: ** - LRO not enabled, or ** - no LRO resources, or ** - lro enqueue fails */ if (rxr->lro.lro_cnt != 0) if (tcp_lro_rx(&rxr->lro, m, 0) == 0) return; } IGB_RX_UNLOCK(rxr); (*ifp->if_input)(ifp, m); IGB_RX_LOCK(rxr); } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * * Return TRUE if more to clean, FALSE otherwise *********************************************************************/ static bool igb_rxeof(struct igb_queue *que, int count, int *done) { struct adapter *adapter = que->adapter; struct rx_ring *rxr = que->rxr; struct ifnet *ifp = adapter->ifp; struct lro_ctrl *lro = &rxr->lro; int i, processed = 0, rxdone = 0; u32 ptype, staterr = 0; union e1000_adv_rx_desc *cur; IGB_RX_LOCK(rxr); /* Sync the ring. */ bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, rxr->me, &processed)) { IGB_RX_UNLOCK(rxr); return (FALSE); } #endif /* DEV_NETMAP */ /* Main clean loop */ for (i = rxr->next_to_check; count != 0;) { struct mbuf *sendmp, *mh, *mp; struct igb_rx_buf *rxbuf; u16 hlen, plen, hdr, vtag, pkt_info; bool eop = FALSE; cur = &rxr->rx_base[i]; staterr = le32toh(cur->wb.upper.status_error); if ((staterr & E1000_RXD_STAT_DD) == 0) break; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; count--; sendmp = mh = mp = NULL; cur->wb.upper.status_error = 0; rxbuf = &rxr->rx_buffers[i]; plen = le16toh(cur->wb.upper.length); ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK; if (((adapter->hw.mac.type == e1000_i350) || (adapter->hw.mac.type == e1000_i354)) && (staterr & E1000_RXDEXT_STATERR_LB)) vtag = be16toh(cur->wb.upper.vlan); else vtag = le16toh(cur->wb.upper.vlan); hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info); pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP); /* * Free the frame (all segments) if we're at EOP and * it's an error. * * The datasheet states that EOP + status is only valid for * the final segment in a multi-segment frame. */ if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) { adapter->dropped_pkts++; ++rxr->rx_discarded; igb_rx_discard(rxr, i); goto next_desc; } /* ** The way the hardware is configured to ** split, it will ONLY use the header buffer ** when header split is enabled, otherwise we ** get normal behavior, ie, both header and ** payload are DMA'd into the payload buffer. ** ** The fmp test is to catch the case where a ** packet spans multiple descriptors, in that ** case only the first header is valid. */ if (rxr->hdr_split && rxr->fmp == NULL) { bus_dmamap_unload(rxr->htag, rxbuf->hmap); hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT; if (hlen > IGB_HDR_BUF) hlen = IGB_HDR_BUF; mh = rxr->rx_buffers[i].m_head; mh->m_len = hlen; /* clear buf pointer for refresh */ rxbuf->m_head = NULL; /* ** Get the payload length, this ** could be zero if its a small ** packet. */ if (plen > 0) { mp = rxr->rx_buffers[i].m_pack; mp->m_len = plen; mh->m_next = mp; /* clear buf pointer */ rxbuf->m_pack = NULL; rxr->rx_split_packets++; } } else { /* ** Either no header split, or a ** secondary piece of a fragmented ** split packet. */ mh = rxr->rx_buffers[i].m_pack; mh->m_len = plen; /* clear buf info for refresh */ rxbuf->m_pack = NULL; } bus_dmamap_unload(rxr->ptag, rxbuf->pmap); ++processed; /* So we know when to refresh */ /* Initial frame - setup */ if (rxr->fmp == NULL) { mh->m_pkthdr.len = mh->m_len; /* Save the head of the chain */ rxr->fmp = mh; rxr->lmp = mh; if (mp != NULL) { /* Add payload if split */ mh->m_pkthdr.len += mp->m_len; rxr->lmp = mh->m_next; } } else { /* Chain mbuf's together */ rxr->lmp->m_next = mh; rxr->lmp = rxr->lmp->m_next; rxr->fmp->m_pkthdr.len += mh->m_len; } if (eop) { rxr->fmp->m_pkthdr.rcvif = ifp; rxr->rx_packets++; /* capture data for AIM */ rxr->packets++; rxr->bytes += rxr->fmp->m_pkthdr.len; rxr->rx_bytes += rxr->fmp->m_pkthdr.len; if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) igb_rx_checksum(staterr, rxr->fmp, ptype); if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (staterr & E1000_RXD_STAT_VP) != 0) { rxr->fmp->m_pkthdr.ether_vtag = vtag; rxr->fmp->m_flags |= M_VLANTAG; } /* * In case of multiqueue, we have RXCSUM.PCSD bit set * and never cleared. This means we have RSS hash * available to be used. */ if (adapter->num_queues > 1) { rxr->fmp->m_pkthdr.flowid = le32toh(cur->wb.lower.hi_dword.rss); switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { case E1000_RXDADV_RSSTYPE_IPV4_TCP: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV4); break; case E1000_RXDADV_RSSTYPE_IPV4: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV4); break; case E1000_RXDADV_RSSTYPE_IPV6_TCP: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV6); break; case E1000_RXDADV_RSSTYPE_IPV6_EX: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV6_EX); break; case E1000_RXDADV_RSSTYPE_IPV6: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV6); break; case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV6_EX); break; default: /* XXX fallthrough */ M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE_HASH); } } else { #ifndef IGB_LEGACY_TX rxr->fmp->m_pkthdr.flowid = que->msix; M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE); #endif } sendmp = rxr->fmp; /* Make sure to set M_PKTHDR. */ sendmp->m_flags |= M_PKTHDR; rxr->fmp = NULL; rxr->lmp = NULL; } next_desc: bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Advance our pointers to the next descriptor. */ if (++i == adapter->num_rx_desc) i = 0; /* ** Send to the stack or LRO */ if (sendmp != NULL) { rxr->next_to_check = i; igb_rx_input(rxr, ifp, sendmp, ptype); i = rxr->next_to_check; rxdone++; } /* Every 8 descriptors we go to refresh mbufs */ if (processed == 8) { igb_refresh_mbufs(rxr, i); processed = 0; } } /* Catch any remainders */ if (igb_rx_unrefreshed(rxr)) igb_refresh_mbufs(rxr, i); rxr->next_to_check = i; /* * Flush any outstanding LRO work */ tcp_lro_flush_all(lro); if (done != NULL) *done += rxdone; IGB_RX_UNLOCK(rxr); return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype) { u16 status = (u16)staterr; u8 errors = (u8) (staterr >> 24); int sctp; /* Ignore Checksum bit is set */ if (status & E1000_RXD_STAT_IXSM) { mp->m_pkthdr.csum_flags = 0; return; } if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0) sctp = 1; else sctp = 0; if (status & E1000_RXD_STAT_IPCS) { /* Did it pass? */ if (!(errors & E1000_RXD_ERR_IPE)) { /* IP Checksum Good */ mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; } else mp->m_pkthdr.csum_flags = 0; } if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); #if __FreeBSD_version >= 800000 if (sctp) /* reassign */ type = CSUM_SCTP_VALID; #endif /* Did it pass? */ if (!(errors & E1000_RXD_ERR_TCPE)) { mp->m_pkthdr.csum_flags |= type; if (sctp == 0) mp->m_pkthdr.csum_data = htons(0xffff); } } return; } /* * This routine is run via an vlan * config EVENT */ static void igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u32 index, bit; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IGB_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; /* Change hw filter setting */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) igb_setup_vlan_hw_support(adapter); IGB_CORE_UNLOCK(adapter); } /* * This routine is run via an vlan * unconfig EVENT */ static void igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u32 index, bit; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IGB_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Change hw filter setting */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) igb_setup_vlan_hw_support(adapter); IGB_CORE_UNLOCK(adapter); } static void igb_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; u32 reg; if (adapter->vf_ifp) { e1000_rlpml_set_vf(hw, adapter->max_frame_size + VLAN_TAG_SIZE); return; } reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Enable the Filter Table */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } /* Update the frame size */ E1000_WRITE_REG(&adapter->hw, E1000_RLPML, adapter->max_frame_size + VLAN_TAG_SIZE); /* Don't bother with table if no vlans */ if ((adapter->num_vlans == 0) || ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)) return; /* ** A soft reset zero's out the VFTA, so ** we need to repopulate it now. */ for (int i = 0; i < IGB_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) { if (adapter->vf_ifp) e1000_vfta_set_vf(hw, adapter->shadow_vfta[i], TRUE); else e1000_write_vfta(hw, i, adapter->shadow_vfta[i]); } } static void igb_enable_intr(struct adapter *adapter) { /* With RSS set up what to auto clear */ if (adapter->msix_mem) { u32 mask = (adapter->que_mask | adapter->link_mask); E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask); E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask); E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask); E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); } else { E1000_WRITE_REG(&adapter->hw, E1000_IMS, IMS_ENABLE_MASK); } E1000_WRITE_FLUSH(&adapter->hw); return; } static void igb_disable_intr(struct adapter *adapter) { if (adapter->msix_mem) { E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0); E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0); } E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); E1000_WRITE_FLUSH(&adapter->hw); return; } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka * to disable special hardware management features */ static void igb_init_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); /* enable receiving management packets to the host */ manc |= E1000_MANC_EN_MNG2HOST; manc2h |= 1 << 5; /* Mng Port 623 */ manc2h |= 1 << 6; /* Mng Port 664 */ E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void igb_release_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; manc &= ~E1000_MANC_EN_MNG2HOST; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is loaded. * */ static void igb_get_hw_control(struct adapter *adapter) { u32 ctrl_ext; if (adapter->vf_ifp) return; /* Let firmware know the driver has taken over */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); } /* * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that the * driver is no longer loaded. * */ static void igb_release_hw_control(struct adapter *adapter) { u32 ctrl_ext; if (adapter->vf_ifp) return; /* Let firmware taken over control of h/w */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); } static int igb_is_valid_ether_addr(uint8_t *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (FALSE); } return (TRUE); } /* * Enable PCI Wake On Lan capability */ static void igb_enable_wakeup(device_t dev) { u16 cap, status; u8 id; /* First find the capabilities pointer*/ cap = pci_read_config(dev, PCIR_CAP_PTR, 2); /* Read the PM Capabilities */ id = pci_read_config(dev, cap, 1); if (id != PCIY_PMG) /* Something wrong */ return; /* OK, we have the power capabilities, so now get the status register */ cap += PCIR_POWER_STATUS; status = pci_read_config(dev, cap, 2); status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, cap, status, 2); return; } static void igb_led_func(void *arg, int onoff) { struct adapter *adapter = arg; IGB_CORE_LOCK(adapter); if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); } else { e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } IGB_CORE_UNLOCK(adapter); } static uint64_t igb_get_vf_counter(if_t ifp, ift_counter cnt) { struct adapter *adapter; struct e1000_vf_stats *stats; #ifndef IGB_LEGACY_TX struct tx_ring *txr; uint64_t rv; #endif adapter = if_getsoftc(ifp); stats = (struct e1000_vf_stats *)adapter->stats; switch (cnt) { case IFCOUNTER_IPACKETS: return (stats->gprc); case IFCOUNTER_OPACKETS: return (stats->gptc); case IFCOUNTER_IBYTES: return (stats->gorc); case IFCOUNTER_OBYTES: return (stats->gotc); case IFCOUNTER_IMCASTS: return (stats->mprc); case IFCOUNTER_IERRORS: return (adapter->dropped_pkts); case IFCOUNTER_OERRORS: return (adapter->watchdog_events); #ifndef IGB_LEGACY_TX case IFCOUNTER_OQDROPS: rv = 0; txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) rv += txr->br->br_drops; return (rv); #endif default: return (if_get_counter_default(ifp, cnt)); } } static uint64_t igb_get_counter(if_t ifp, ift_counter cnt) { struct adapter *adapter; struct e1000_hw_stats *stats; #ifndef IGB_LEGACY_TX struct tx_ring *txr; uint64_t rv; #endif adapter = if_getsoftc(ifp); if (adapter->vf_ifp) return (igb_get_vf_counter(ifp, cnt)); stats = (struct e1000_hw_stats *)adapter->stats; switch (cnt) { case IFCOUNTER_IPACKETS: return (stats->gprc); case IFCOUNTER_OPACKETS: return (stats->gptc); case IFCOUNTER_IBYTES: return (stats->gorc); case IFCOUNTER_OBYTES: return (stats->gotc); case IFCOUNTER_IMCASTS: return (stats->mprc); case IFCOUNTER_OMCASTS: return (stats->mptc); case IFCOUNTER_IERRORS: return (adapter->dropped_pkts + stats->rxerrc + stats->crcerrs + stats->algnerrc + stats->ruc + stats->roc + stats->cexterr); case IFCOUNTER_OERRORS: return (stats->ecol + stats->latecol + adapter->watchdog_events); case IFCOUNTER_COLLISIONS: return (stats->colc); case IFCOUNTER_IQDROPS: return (stats->mpc); #ifndef IGB_LEGACY_TX case IFCOUNTER_OQDROPS: rv = 0; txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) rv += txr->br->br_drops; return (rv); #endif default: return (if_get_counter_default(ifp, cnt)); } } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void igb_update_stats_counters(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_hw_stats *stats; /* ** The virtual function adapter has only a ** small controlled set of stats, do only ** those and return. */ if (adapter->vf_ifp) { igb_update_vf_stats_counters(adapter); return; } stats = (struct e1000_hw_stats *)adapter->stats; if (adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { stats->symerrs += E1000_READ_REG(hw,E1000_SYMERRS); stats->sec += E1000_READ_REG(hw, E1000_SEC); } stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); stats->mpc += E1000_READ_REG(hw, E1000_MPC); stats->scc += E1000_READ_REG(hw, E1000_SCC); stats->ecol += E1000_READ_REG(hw, E1000_ECOL); stats->mcc += E1000_READ_REG(hw, E1000_MCC); stats->latecol += E1000_READ_REG(hw, E1000_LATECOL); stats->colc += E1000_READ_REG(hw, E1000_COLC); stats->dc += E1000_READ_REG(hw, E1000_DC); stats->rlec += E1000_READ_REG(hw, E1000_RLEC); stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC); stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC); /* ** For watchdog management we need to know if we have been ** paused during the last interval, so capture that here. */ adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); stats->xoffrxc += adapter->pause_frames; stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC); stats->prc64 += E1000_READ_REG(hw, E1000_PRC64); stats->prc127 += E1000_READ_REG(hw, E1000_PRC127); stats->prc255 += E1000_READ_REG(hw, E1000_PRC255); stats->prc511 += E1000_READ_REG(hw, E1000_PRC511); stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023); stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522); stats->gprc += E1000_READ_REG(hw, E1000_GPRC); stats->bprc += E1000_READ_REG(hw, E1000_BPRC); stats->mprc += E1000_READ_REG(hw, E1000_MPRC); stats->gptc += E1000_READ_REG(hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ stats->gorc += E1000_READ_REG(hw, E1000_GORCL) + ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32); stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) + ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32); stats->rnbc += E1000_READ_REG(hw, E1000_RNBC); stats->ruc += E1000_READ_REG(hw, E1000_RUC); stats->rfc += E1000_READ_REG(hw, E1000_RFC); stats->roc += E1000_READ_REG(hw, E1000_ROC); stats->rjc += E1000_READ_REG(hw, E1000_RJC); stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC); stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC); stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC); stats->tor += E1000_READ_REG(hw, E1000_TORL) + ((u64)E1000_READ_REG(hw, E1000_TORH) << 32); stats->tot += E1000_READ_REG(hw, E1000_TOTL) + ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32); stats->tpr += E1000_READ_REG(hw, E1000_TPR); stats->tpt += E1000_READ_REG(hw, E1000_TPT); stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64); stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127); stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255); stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511); stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); stats->mptc += E1000_READ_REG(hw, E1000_MPTC); stats->bptc += E1000_READ_REG(hw, E1000_BPTC); /* Interrupt Counts */ stats->iac += E1000_READ_REG(hw, E1000_IAC); stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); /* Host to Card Statistics */ stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC); stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC); stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC); stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC); stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC); stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC); stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC); stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) + ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32)); stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) + ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32)); stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS); stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC); stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC); stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC); stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS); stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR); stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC); stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); /* Driver specific counters */ adapter->device_control = E1000_READ_REG(hw, E1000_CTRL); adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL); adapter->int_mask = E1000_READ_REG(hw, E1000_IMS); adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS); adapter->packet_buf_alloc_tx = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16); adapter->packet_buf_alloc_rx = (E1000_READ_REG(hw, E1000_PBA) & 0xffff); } /********************************************************************** * * Initialize the VF board statistics counters. * **********************************************************************/ static void igb_vf_init_stats(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_vf_stats *stats; stats = (struct e1000_vf_stats *)adapter->stats; if (stats == NULL) return; stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC); stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC); stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC); stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC); stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC); } /********************************************************************** * * Update the VF board statistics counters. * **********************************************************************/ static void igb_update_vf_stats_counters(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_vf_stats *stats; if (adapter->link_speed == 0) return; stats = (struct e1000_vf_stats *)adapter->stats; UPDATE_VF_REG(E1000_VFGPRC, stats->last_gprc, stats->gprc); UPDATE_VF_REG(E1000_VFGORC, stats->last_gorc, stats->gorc); UPDATE_VF_REG(E1000_VFGPTC, stats->last_gptc, stats->gptc); UPDATE_VF_REG(E1000_VFGOTC, stats->last_gotc, stats->gotc); UPDATE_VF_REG(E1000_VFMPRC, stats->last_mprc, stats->mprc); } /* Export a single 32-bit register via a read-only sysctl. */ static int igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; u_int val; adapter = oidp->oid_arg1; val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* ** Tuneable interrupt rate handler */ static int igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) { struct igb_queue *que = ((struct igb_queue *)oidp->oid_arg1); int error; u32 reg, usec, rate; reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix)); usec = ((reg & 0x7FFC) >> 2); if (usec > 0) rate = 1000000 / usec; else rate = 0; error = sysctl_handle_int(oidp, &rate, 0, req); if (error || !req->newptr) return error; return 0; } /* * Add sysctl variables, one per statistic, to the system. */ static void igb_add_hw_stats(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = adapter->stats; struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node; struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSIX IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", CTLFLAG_RD, &adapter->mbuf_defrag_failed, "Defragmenting mbuf chain failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &adapter->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", CTLFLAG_RD, &adapter->device_control, "Device Control Register"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", CTLFLAG_RD, &adapter->rx_control, "Receiver Control Register"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", CTLFLAG_RD, &adapter->int_mask, "Interrupt Mask"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", CTLFLAG_RD, &adapter->eint_mask, "Extended Interrupt Mask"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", CTLFLAG_RD, &adapter->packet_buf_alloc_tx, "Transmit Buffer Packet Allocation"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", CTLFLAG_RD, &adapter->packet_buf_alloc_rx, "Receive Buffer Packet Allocation"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) { struct lro_ctrl *lro = &rxr->lro; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i], sizeof(&adapter->queues[i]), igb_sysctl_interrupt_rate_handler, "IU", "Interrupt Rate"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), igb_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), igb_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txr->no_desc_avail, "Queue Descriptors Unavailable"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &txr->total_packets, "Queue Packets Transmitted"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), igb_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), igb_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &rxr->rx_packets, "Queue Packets Received"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &rxr->rx_bytes, "Queue Bytes Received"); SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued", CTLFLAG_RD, &lro->lro_queued, 0, "LRO Queued"); SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed", CTLFLAG_RD, &lro->lro_flushed, 0, "LRO Flushed"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "MAC Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); /* ** VF adapter has a very limited set of stats ** since its not managing the metal, so to speak. */ if (adapter->vf_ifp) { SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &stats->gprc, "Good Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->gptc, "Good Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &stats->gorc, "Good Octets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->gotc, "Good Octets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &stats->mprc, "Multicast Packets Received"); return; } SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &stats->symerrs, "Symbol Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &stats->sec, "Sequence Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &stats->dc, "Defer Count"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &stats->mpc, "Missed Packets"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors", CTLFLAG_RD, &stats->rlec, "Receive Length Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &stats->rnbc, "Receive No Buffers"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &stats->ruc, "Receive Undersize"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &stats->rfc, "Fragmented Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &stats->roc, "Oversized Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &stats->rjc, "Recevied Jabber"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &stats->rxerrc, "Receive Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &stats->crcerrs, "CRC errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &stats->algnerrc, "Alignment Errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs", CTLFLAG_RD, &stats->tncrs, "Transmit with No CRS"); /* On 82575 these are collision counts */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &stats->cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &stats->xonrxc, "XON Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &stats->xontxc, "XON Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &stats->xoffrxc, "XOFF Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &stats->xofftxc, "XOFF Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd", CTLFLAG_RD, &stats->fcruc, "Unsupported Flow Control Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd", CTLFLAG_RD, &stats->mgprc, "Management Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop", CTLFLAG_RD, &stats->mgpdc, "Management Packets Dropped"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd", CTLFLAG_RD, &stats->mgptc, "Management Packets Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &stats->tpr, "Total Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &stats->gprc, "Good Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &stats->bprc, "Broadcast Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &stats->mprc, "Multicast Packets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &stats->prc64, "64 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &stats->prc127, "65-127 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &stats->prc255, "128-255 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &stats->prc511, "256-511 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &stats->prc1023, "512-1023 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &stats->prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &stats->gorc, "Good Octets Received"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", CTLFLAG_RD, &stats->tor, "Total Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->gotc, "Good Octets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", CTLFLAG_RD, &stats->tot, "Total Octets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &stats->tpt, "Total Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->gptc, "Good Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &stats->bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &stats->mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &stats->ptc64, "64 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &stats->ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &stats->ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &stats->ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &stats->ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &stats->ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &stats->tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &stats->tsctfc, "TSO Contexts Failed"); /* Interrupt Stats */ int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", CTLFLAG_RD, NULL, "Interrupt Statistics"); int_list = SYSCTL_CHILDREN(int_node); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts", CTLFLAG_RD, &stats->iac, "Interrupt Assertion Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", CTLFLAG_RD, &stats->icrxptc, "Interrupt Cause Rx Pkt Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", CTLFLAG_RD, &stats->icrxatc, "Interrupt Cause Rx Abs Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", CTLFLAG_RD, &stats->ictxptc, "Interrupt Cause Tx Pkt Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", CTLFLAG_RD, &stats->ictxatc, "Interrupt Cause Tx Abs Timer Expire Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", CTLFLAG_RD, &stats->ictxqec, "Interrupt Cause Tx Queue Empty Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", CTLFLAG_RD, &stats->ictxqmtc, "Interrupt Cause Tx Queue Min Thresh Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", CTLFLAG_RD, &stats->icrxdmtc, "Interrupt Cause Rx Desc Min Thresh Count"); SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun", CTLFLAG_RD, &stats->icrxoc, "Interrupt Cause Receiver Overrun Count"); /* Host to Card Stats */ host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", CTLFLAG_RD, NULL, "Host to Card Statistics"); host_list = SYSCTL_CHILDREN(host_node); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt", CTLFLAG_RD, &stats->cbtmpc, "Circuit Breaker Tx Packet Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard", CTLFLAG_RD, &stats->htdpmc, "Host Transmit Discarded Packets"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt", CTLFLAG_RD, &stats->rpthc, "Rx Packets To Host"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts", CTLFLAG_RD, &stats->cbrmpc, "Circuit Breaker Rx Packet Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop", CTLFLAG_RD, &stats->cbrdpc, "Circuit Breaker Rx Dropped Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt", CTLFLAG_RD, &stats->hgptc, "Host Good Packets Tx Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop", CTLFLAG_RD, &stats->htcbdpc, "Host Tx Circuit Breaker Dropped Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes", CTLFLAG_RD, &stats->hgorc, "Host Good Octets Received Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes", CTLFLAG_RD, &stats->hgotc, "Host Good Octets Transmit Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors", CTLFLAG_RD, &stats->lenerrs, "Length Errors"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt", CTLFLAG_RD, &stats->scvpc, "SerDes/SGMII Code Violation Pkt Count"); SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed", CTLFLAG_RD, &stats->hrmpc, "Header Redirection Missed Packet Count"); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) { adapter = (struct adapter *)arg1; igb_print_nvm_info(adapter); } return (error); } static void igb_print_nvm_info(struct adapter *adapter) { u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } printf("\n"); } static void igb_set_sysctl_value(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLFLAG_RW, limit, value, description); } /* ** Set flow control using sysctl: ** Flow control values: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error; static int input = 3; /* default is full */ struct adapter *adapter = (struct adapter *) arg1; error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); switch (input) { case e1000_fc_rx_pause: case e1000_fc_tx_pause: case e1000_fc_full: case e1000_fc_none: adapter->hw.fc.requested_mode = input; adapter->fc = input; break; default: /* Do nothing */ return (error); } adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; e1000_force_mac_fc(&adapter->hw); /* XXX TODO: update DROP_EN on each RX queue if appropriate */ return (error); } /* ** Manage DMA Coalesce: ** Control values: ** 0/1 - off/on ** Legal timer values are: ** 250,500,1000-10000 in thousands */ static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error; error = sysctl_handle_int(oidp, &adapter->dmac, 0, req); if ((error) || (req->newptr == NULL)) return (error); switch (adapter->dmac) { case 0: /* Disabling */ break; case 1: /* Just enable and use default */ adapter->dmac = 1000; break; case 250: case 500: case 1000: case 2000: case 3000: case 4000: case 5000: case 6000: case 7000: case 8000: case 9000: case 10000: /* Legal values - allow */ break; default: /* Do nothing, illegal value */ adapter->dmac = 0; return (EINVAL); } /* Reinit the interface */ igb_init(adapter); return (error); } /* ** Manage Energy Efficient Ethernet: ** Control values: ** 0/1 - enabled/disabled */ static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error, value; value = adapter->hw.dev_spec._82575.eee_disable; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); IGB_CORE_LOCK(adapter); adapter->hw.dev_spec._82575.eee_disable = (value != 0); igb_init_locked(adapter); IGB_CORE_UNLOCK(adapter); return (0); } Index: head/sys/dev/e1000/if_lem.c =================================================================== --- head/sys/dev/e1000/if_lem.c (revision 302383) +++ head/sys/dev/e1000/if_lem.c (revision 302384) @@ -1,4874 +1,4875 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ /* * Uncomment the following extensions for better performance in a VM, * especially if you have support in the hypervisor. * See http://info.iet.unipi.it/~luigi/netmap/ */ // #define BATCH_DISPATCH // #define NIC_SEND_COMBINING // #define NIC_PARAVIRT /* enable virtio-like synchronization */ #include "opt_inet.h" #include "opt_inet6.h" #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "e1000_api.h" #include "if_lem.h" /********************************************************************* * Legacy Em Driver version: *********************************************************************/ char lem_driver_version[] = "1.1.0"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static em_vendor_info_t lem_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection */ { 0x8086, E1000_DEV_ID_82540EM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82540EM_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82540EP, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82540EP_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82540EP_LP, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82541EI, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82541ER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82541ER_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82541EI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82541GI, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82541GI_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82541GI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82542, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82543GC_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82543GC_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82544EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82544EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82544GC_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82544GC_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82545EM_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82545EM_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82545GM_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82545GM_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82545GM_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546GB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546GB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546GB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546GB_PCIE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82547EI, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82547EI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82547GI, PCI_ANY_ID, PCI_ANY_ID, 0}, /* required last entry */ { 0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings for all supported NICs. *********************************************************************/ static char *lem_strings[] = { "Intel(R) PRO/1000 Legacy Network Connection" }; /********************************************************************* * Function prototypes *********************************************************************/ static int lem_probe(device_t); static int lem_attach(device_t); static int lem_detach(device_t); static int lem_shutdown(device_t); static int lem_suspend(device_t); static int lem_resume(device_t); static void lem_start(if_t); static void lem_start_locked(if_t ifp); static int lem_ioctl(if_t, u_long, caddr_t); static uint64_t lem_get_counter(if_t, ift_counter); static void lem_init(void *); static void lem_init_locked(struct adapter *); static void lem_stop(void *); static void lem_media_status(if_t, struct ifmediareq *); static int lem_media_change(if_t); static void lem_identify_hardware(struct adapter *); static int lem_allocate_pci_resources(struct adapter *); static int lem_allocate_irq(struct adapter *adapter); static void lem_free_pci_resources(struct adapter *); static void lem_local_timer(void *); static int lem_hardware_init(struct adapter *); static int lem_setup_interface(device_t, struct adapter *); static void lem_setup_transmit_structures(struct adapter *); static void lem_initialize_transmit_unit(struct adapter *); static int lem_setup_receive_structures(struct adapter *); static void lem_initialize_receive_unit(struct adapter *); static void lem_enable_intr(struct adapter *); static void lem_disable_intr(struct adapter *); static void lem_free_transmit_structures(struct adapter *); static void lem_free_receive_structures(struct adapter *); static void lem_update_stats_counters(struct adapter *); static void lem_add_hw_stats(struct adapter *adapter); static void lem_txeof(struct adapter *); static void lem_tx_purge(struct adapter *); static int lem_allocate_receive_structures(struct adapter *); static int lem_allocate_transmit_structures(struct adapter *); static bool lem_rxeof(struct adapter *, int, int *); #ifndef __NO_STRICT_ALIGNMENT static int lem_fixup_rx(struct adapter *); #endif static void lem_receive_checksum(struct adapter *, struct e1000_rx_desc *, struct mbuf *); static void lem_transmit_checksum_setup(struct adapter *, struct mbuf *, u32 *, u32 *); static void lem_set_promisc(struct adapter *); static void lem_disable_promisc(struct adapter *); static void lem_set_multi(struct adapter *); static void lem_update_link_status(struct adapter *); static int lem_get_buf(struct adapter *, int); static void lem_register_vlan(void *, if_t, u16); static void lem_unregister_vlan(void *, if_t, u16); static void lem_setup_vlan_hw_support(struct adapter *); static int lem_xmit(struct adapter *, struct mbuf **); static void lem_smartspeed(struct adapter *); static int lem_82547_fifo_workaround(struct adapter *, int); static void lem_82547_update_fifo_head(struct adapter *, int); static int lem_82547_tx_fifo_reset(struct adapter *); static void lem_82547_move_tail(void *); static int lem_dma_malloc(struct adapter *, bus_size_t, struct em_dma_alloc *, int); static void lem_dma_free(struct adapter *, struct em_dma_alloc *); static int lem_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void lem_print_nvm_info(struct adapter *); static int lem_is_valid_ether_addr(u8 *); static u32 lem_fill_descriptors (bus_addr_t address, u32 length, PDESC_ARRAY desc_array); static int lem_sysctl_int_delay(SYSCTL_HANDLER_ARGS); static void lem_add_int_delay_sysctl(struct adapter *, const char *, const char *, struct em_int_delay_info *, int, int); static void lem_set_flow_cntrl(struct adapter *, const char *, const char *, int *, int); /* Management and WOL Support */ static void lem_init_manageability(struct adapter *); static void lem_release_manageability(struct adapter *); static void lem_get_hw_control(struct adapter *); static void lem_release_hw_control(struct adapter *); static void lem_get_wakeup(device_t); static void lem_enable_wakeup(device_t); static int lem_enable_phy_wakeup(struct adapter *); static void lem_led_func(void *, int); static void lem_intr(void *); static int lem_irq_fast(void *); static void lem_handle_rxtx(void *context, int pending); static void lem_handle_link(void *context, int pending); static void lem_add_rx_process_limit(struct adapter *, const char *, const char *, int *, int); #ifdef DEVICE_POLLING static poll_handler_t lem_poll; #endif /* POLLING */ /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t lem_methods[] = { /* Device interface */ DEVMETHOD(device_probe, lem_probe), DEVMETHOD(device_attach, lem_attach), DEVMETHOD(device_detach, lem_detach), DEVMETHOD(device_shutdown, lem_shutdown), DEVMETHOD(device_suspend, lem_suspend), DEVMETHOD(device_resume, lem_resume), DEVMETHOD_END }; static driver_t lem_driver = { "em", lem_methods, sizeof(struct adapter), }; extern devclass_t em_devclass; DRIVER_MODULE(lem, pci, lem_driver, em_devclass, 0, 0); MODULE_DEPEND(lem, pci, 1, 1, 1); MODULE_DEPEND(lem, ether, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(lem, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ /********************************************************************* * Tunable default values. *********************************************************************/ #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) #define MAX_INTS_PER_SEC 8000 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) static int lem_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int lem_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); static int lem_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); static int lem_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); /* * increase lem_rxd and lem_txd to at least 2048 in netmap mode * for better performance. */ static int lem_rxd = EM_DEFAULT_RXD; static int lem_txd = EM_DEFAULT_TXD; static int lem_smart_pwr_down = FALSE; /* Controls whether promiscuous also shows bad packets */ static int lem_debug_sbp = FALSE; TUNABLE_INT("hw.em.tx_int_delay", &lem_tx_int_delay_dflt); TUNABLE_INT("hw.em.rx_int_delay", &lem_rx_int_delay_dflt); TUNABLE_INT("hw.em.tx_abs_int_delay", &lem_tx_abs_int_delay_dflt); TUNABLE_INT("hw.em.rx_abs_int_delay", &lem_rx_abs_int_delay_dflt); TUNABLE_INT("hw.em.rxd", &lem_rxd); TUNABLE_INT("hw.em.txd", &lem_txd); TUNABLE_INT("hw.em.smart_pwr_down", &lem_smart_pwr_down); TUNABLE_INT("hw.em.sbp", &lem_debug_sbp); /* Interrupt style - default to fast */ static int lem_use_legacy_irq = 0; TUNABLE_INT("hw.em.use_legacy_irq", &lem_use_legacy_irq); /* How many packets rxeof tries to clean at a time */ static int lem_rx_process_limit = 100; TUNABLE_INT("hw.em.rx_process_limit", &lem_rx_process_limit); /* Flow control setting - default to FULL */ static int lem_fc_setting = e1000_fc_full; TUNABLE_INT("hw.em.fc_setting", &lem_fc_setting); /* Global used in WOL setup with multiport cards */ static int global_quad_port_a = 0; #ifdef DEV_NETMAP /* see ixgbe.c for details */ #include #endif /* DEV_NETMAP */ /********************************************************************* * Device identification routine * * em_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int lem_probe(device_t dev) { char adapter_name[60]; u16 pci_vendor_id = 0; u16 pci_device_id = 0; u16 pci_subvendor_id = 0; u16 pci_subdevice_id = 0; em_vendor_info_t *ent; INIT_DEBUGOUT("em_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != EM_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = lem_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == PCI_ANY_ID)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == PCI_ANY_ID))) { sprintf(adapter_name, "%s %s", lem_strings[ent->index], lem_driver_version); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int lem_attach(device_t dev) { struct adapter *adapter; int tsize, rsize; int error = 0; INIT_DEBUGOUT("lem_attach: begin"); adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); EM_TX_LOCK_INIT(adapter, device_get_nameunit(dev)); EM_RX_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, lem_sysctl_nvm_info, "I", "NVM Information"); callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); callout_init_mtx(&adapter->tx_fifo_timer, &adapter->tx_mtx, 0); /* Determine hardware and mac info */ lem_identify_hardware(adapter); /* Setup PCI resources */ if (lem_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* Do Shared Code initialization */ if (e1000_setup_init_funcs(&adapter->hw, TRUE)) { device_printf(dev, "Setup of Shared code failed\n"); error = ENXIO; goto err_pci; } e1000_get_bus_info(&adapter->hw); /* Set up some sysctls for the tunable interrupt delays */ lem_add_int_delay_sysctl(adapter, "rx_int_delay", "receive interrupt delay in usecs", &adapter->rx_int_delay, E1000_REGISTER(&adapter->hw, E1000_RDTR), lem_rx_int_delay_dflt); lem_add_int_delay_sysctl(adapter, "tx_int_delay", "transmit interrupt delay in usecs", &adapter->tx_int_delay, E1000_REGISTER(&adapter->hw, E1000_TIDV), lem_tx_int_delay_dflt); if (adapter->hw.mac.type >= e1000_82540) { lem_add_int_delay_sysctl(adapter, "rx_abs_int_delay", "receive interrupt delay limit in usecs", &adapter->rx_abs_int_delay, E1000_REGISTER(&adapter->hw, E1000_RADV), lem_rx_abs_int_delay_dflt); lem_add_int_delay_sysctl(adapter, "tx_abs_int_delay", "transmit interrupt delay limit in usecs", &adapter->tx_abs_int_delay, E1000_REGISTER(&adapter->hw, E1000_TADV), lem_tx_abs_int_delay_dflt); lem_add_int_delay_sysctl(adapter, "itr", "interrupt delay limit in usecs/4", &adapter->tx_itr, E1000_REGISTER(&adapter->hw, E1000_ITR), DEFAULT_ITR); } /* Sysctls for limiting the amount of work done in the taskqueue */ lem_add_rx_process_limit(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, lem_rx_process_limit); #ifdef NIC_SEND_COMBINING /* Sysctls to control mitigation */ lem_add_rx_process_limit(adapter, "sc_enable", "driver TDT mitigation", &adapter->sc_enable, 0); #endif /* NIC_SEND_COMBINING */ #ifdef BATCH_DISPATCH lem_add_rx_process_limit(adapter, "batch_enable", "driver rx batch", &adapter->batch_enable, 0); #endif /* BATCH_DISPATCH */ #ifdef NIC_PARAVIRT lem_add_rx_process_limit(adapter, "rx_retries", "driver rx retries", &adapter->rx_retries, 0); #endif /* NIC_PARAVIRT */ /* Sysctl for setting the interface flow control */ lem_set_flow_cntrl(adapter, "flow_control", "flow control setting", &adapter->fc_setting, lem_fc_setting); /* * Validate number of transmit and receive descriptors. It * must not exceed hardware maximum, and must be multiple * of E1000_DBA_ALIGN. */ if (((lem_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 || (adapter->hw.mac.type >= e1000_82544 && lem_txd > EM_MAX_TXD) || (adapter->hw.mac.type < e1000_82544 && lem_txd > EM_MAX_TXD_82543) || (lem_txd < EM_MIN_TXD)) { device_printf(dev, "Using %d TX descriptors instead of %d!\n", EM_DEFAULT_TXD, lem_txd); adapter->num_tx_desc = EM_DEFAULT_TXD; } else adapter->num_tx_desc = lem_txd; if (((lem_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 || (adapter->hw.mac.type >= e1000_82544 && lem_rxd > EM_MAX_RXD) || (adapter->hw.mac.type < e1000_82544 && lem_rxd > EM_MAX_RXD_82543) || (lem_rxd < EM_MIN_RXD)) { device_printf(dev, "Using %d RX descriptors instead of %d!\n", EM_DEFAULT_RXD, lem_rxd); adapter->num_rx_desc = EM_DEFAULT_RXD; } else adapter->num_rx_desc = lem_rxd; adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_wait_to_complete = FALSE; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; adapter->rx_buffer_len = 2048; e1000_init_script_state_82541(&adapter->hw, TRUE); e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE); /* Copper options */ if (adapter->hw.phy.media_type == e1000_media_type_copper) { adapter->hw.phy.mdix = AUTO_ALL_MODES; adapter->hw.phy.disable_polarity_correction = FALSE; adapter->hw.phy.ms_type = EM_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE; /* * This controls when hardware reports transmit completion * status. */ adapter->hw.mac.report_tx_early = 1; #ifdef NIC_PARAVIRT device_printf(dev, "driver supports paravirt, subdev 0x%x\n", adapter->hw.subsystem_device_id); if (adapter->hw.subsystem_device_id == E1000_PARA_SUBDEV) { uint64_t bus_addr; device_printf(dev, "paravirt support on dev %p\n", adapter); tsize = 4096; // XXX one page for the csb if (lem_dma_malloc(adapter, tsize, &adapter->csb_mem, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate csb memory\n"); error = ENOMEM; goto err_csb; } /* Setup the Base of the CSB */ adapter->csb = (struct paravirt_csb *)adapter->csb_mem.dma_vaddr; /* force the first kick */ adapter->csb->host_need_txkick = 1; /* txring empty */ adapter->csb->guest_need_rxkick = 1; /* no rx packets */ bus_addr = adapter->csb_mem.dma_paddr; lem_add_rx_process_limit(adapter, "csb_on", "enable paravirt.", &adapter->csb->guest_csb_on, 0); lem_add_rx_process_limit(adapter, "txc_lim", "txc_lim", &adapter->csb->host_txcycles_lim, 1); /* some stats */ #define PA_SC(name, var, val) \ lem_add_rx_process_limit(adapter, name, name, var, val) PA_SC("host_need_txkick",&adapter->csb->host_need_txkick, 1); PA_SC("host_rxkick_at",&adapter->csb->host_rxkick_at, ~0); PA_SC("guest_need_txkick",&adapter->csb->guest_need_txkick, 0); PA_SC("guest_need_rxkick",&adapter->csb->guest_need_rxkick, 1); PA_SC("tdt_reg_count",&adapter->tdt_reg_count, 0); PA_SC("tdt_csb_count",&adapter->tdt_csb_count, 0); PA_SC("tdt_int_count",&adapter->tdt_int_count, 0); PA_SC("guest_need_kick_count",&adapter->guest_need_kick_count, 0); /* tell the host where the block is */ E1000_WRITE_REG(&adapter->hw, E1000_CSBAH, (u32)(bus_addr >> 32)); E1000_WRITE_REG(&adapter->hw, E1000_CSBAL, (u32)bus_addr); } #endif /* NIC_PARAVIRT */ tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); /* Allocate Transmit Descriptor ring */ if (lem_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate tx_desc memory\n"); error = ENOMEM; goto err_tx_desc; } adapter->tx_desc_base = (struct e1000_tx_desc *)adapter->txdma.dma_vaddr; rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); /* Allocate Receive Descriptor ring */ if (lem_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate rx_desc memory\n"); error = ENOMEM; goto err_rx_desc; } adapter->rx_desc_base = (struct e1000_rx_desc *)adapter->rxdma.dma_vaddr; /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_hw_init; } /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(&adapter->hw); /* Make sure we have a good EEPROM before we read from it */ if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_hw_init; } } /* Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(&adapter->hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_hw_init; } if (!lem_is_valid_ether_addr(adapter->hw.mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_hw_init; } /* Initialize the hardware */ if (lem_hardware_init(adapter)) { device_printf(dev, "Unable to initialize the hardware\n"); error = EIO; goto err_hw_init; } /* Allocate transmit descriptors and buffers */ if (lem_allocate_transmit_structures(adapter)) { device_printf(dev, "Could not setup transmit structures\n"); error = ENOMEM; goto err_tx_struct; } /* Allocate receive descriptors and buffers */ if (lem_allocate_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); error = ENOMEM; goto err_rx_struct; } /* ** Do interrupt configuration */ error = lem_allocate_irq(adapter); if (error) goto err_rx_struct; /* * Get Wake-on-Lan and Management info for later use */ lem_get_wakeup(dev); /* Setup OS specific network interface */ if (lem_setup_interface(dev, adapter) != 0) goto err_rx_struct; /* Initialize statistics */ lem_update_stats_counters(adapter); adapter->hw.mac.get_link_status = 1; lem_update_link_status(adapter); /* Indicate SOL/IDER usage */ if (e1000_check_reset_block(&adapter->hw)) device_printf(dev, "PHY reset is blocked due to SOL/IDER session.\n"); /* Do we need workaround for 82544 PCI-X adapter? */ if (adapter->hw.bus.type == e1000_bus_type_pcix && adapter->hw.mac.type == e1000_82544) adapter->pcix_82544 = TRUE; else adapter->pcix_82544 = FALSE; /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, lem_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, lem_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); lem_add_hw_stats(adapter); /* Non-AMT based hardware can now take control from firmware */ if (adapter->has_manage && !adapter->has_amt) lem_get_hw_control(adapter); /* Tell the stack that the interface is not active */ if_setdrvflagbits(adapter->ifp, 0, IFF_DRV_OACTIVE | IFF_DRV_RUNNING); adapter->led_dev = led_create(lem_led_func, adapter, device_get_nameunit(dev)); #ifdef DEV_NETMAP lem_netmap_attach(adapter); #endif /* DEV_NETMAP */ INIT_DEBUGOUT("lem_attach: end"); return (0); err_rx_struct: lem_free_transmit_structures(adapter); err_tx_struct: err_hw_init: lem_release_hw_control(adapter); lem_dma_free(adapter, &adapter->rxdma); err_rx_desc: lem_dma_free(adapter, &adapter->txdma); err_tx_desc: #ifdef NIC_PARAVIRT lem_dma_free(adapter, &adapter->csb_mem); err_csb: #endif /* NIC_PARAVIRT */ err_pci: if (adapter->ifp != (void *)NULL) if_free(adapter->ifp); lem_free_pci_resources(adapter); free(adapter->mta, M_DEVBUF); EM_TX_LOCK_DESTROY(adapter); EM_RX_LOCK_DESTROY(adapter); EM_CORE_LOCK_DESTROY(adapter); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int lem_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); if_t ifp = adapter->ifp; INIT_DEBUGOUT("em_detach: begin"); /* Make sure VLANS are not using driver */ if (if_vlantrunkinuse(ifp)) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } #ifdef DEVICE_POLLING if (if_getcapenable(ifp) & IFCAP_POLLING) ether_poll_deregister(ifp); #endif if (adapter->led_dev != NULL) led_destroy(adapter->led_dev); EM_CORE_LOCK(adapter); EM_TX_LOCK(adapter); adapter->in_detach = 1; lem_stop(adapter); e1000_phy_hw_reset(&adapter->hw); lem_release_manageability(adapter); EM_TX_UNLOCK(adapter); EM_CORE_UNLOCK(adapter); /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); ether_ifdetach(adapter->ifp); callout_drain(&adapter->timer); callout_drain(&adapter->tx_fifo_timer); #ifdef DEV_NETMAP netmap_detach(ifp); #endif /* DEV_NETMAP */ lem_free_pci_resources(adapter); bus_generic_detach(dev); if_free(ifp); lem_free_transmit_structures(adapter); lem_free_receive_structures(adapter); /* Free Transmit Descriptor ring */ if (adapter->tx_desc_base) { lem_dma_free(adapter, &adapter->txdma); adapter->tx_desc_base = NULL; } /* Free Receive Descriptor ring */ if (adapter->rx_desc_base) { lem_dma_free(adapter, &adapter->rxdma); adapter->rx_desc_base = NULL; } #ifdef NIC_PARAVIRT if (adapter->csb) { lem_dma_free(adapter, &adapter->csb_mem); adapter->csb = NULL; } #endif /* NIC_PARAVIRT */ lem_release_hw_control(adapter); free(adapter->mta, M_DEVBUF); EM_TX_LOCK_DESTROY(adapter); EM_RX_LOCK_DESTROY(adapter); EM_CORE_LOCK_DESTROY(adapter); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int lem_shutdown(device_t dev) { return lem_suspend(dev); } /* * Suspend/resume device methods. */ static int lem_suspend(device_t dev) { struct adapter *adapter = device_get_softc(dev); EM_CORE_LOCK(adapter); lem_release_manageability(adapter); lem_release_hw_control(adapter); lem_enable_wakeup(dev); EM_CORE_UNLOCK(adapter); return bus_generic_suspend(dev); } static int lem_resume(device_t dev) { struct adapter *adapter = device_get_softc(dev); if_t ifp = adapter->ifp; EM_CORE_LOCK(adapter); lem_init_locked(adapter); lem_init_manageability(adapter); EM_CORE_UNLOCK(adapter); lem_start(ifp); return bus_generic_resume(dev); } static void lem_start_locked(if_t ifp) { struct adapter *adapter = if_getsoftc(ifp); struct mbuf *m_head; EM_TX_LOCK_ASSERT(adapter); if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; if (!adapter->link_active) return; /* * Force a cleanup if number of TX descriptors * available hits the threshold */ if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) { lem_txeof(adapter); /* Now do we at least have a minimal? */ if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) { adapter->no_tx_desc_avail1++; return; } } while (!if_sendq_empty(ifp)) { m_head = if_dequeue(ifp); if (m_head == NULL) break; /* * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. */ if (lem_xmit(adapter, &m_head)) { if (m_head == NULL) break; if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0); if_sendq_prepend(ifp, m_head); break; } /* Send a copy of the frame to the BPF listener */ if_etherbpfmtap(ifp, m_head); /* Set timeout in case hardware has problems transmitting. */ adapter->watchdog_check = TRUE; adapter->watchdog_time = ticks; } if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0); #ifdef NIC_PARAVIRT if (if_getdrvflags(ifp) & IFF_DRV_OACTIVE && adapter->csb && adapter->csb->guest_csb_on && !(adapter->csb->guest_need_txkick & 1)) { adapter->csb->guest_need_txkick = 1; adapter->guest_need_kick_count++; // XXX memory barrier lem_txeof(adapter); // XXX possibly clear IFF_DRV_OACTIVE } #endif /* NIC_PARAVIRT */ return; } static void lem_start(if_t ifp) { struct adapter *adapter = if_getsoftc(ifp); EM_TX_LOCK(adapter); if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) lem_start_locked(ifp); EM_TX_UNLOCK(adapter); } /********************************************************************* * Ioctl entry point * * em_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int lem_ioctl(if_t ifp, u_long command, caddr_t data) { struct adapter *adapter = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = FALSE; int error = 0; if (adapter->in_detach) return (error); switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { if_setflagbits(ifp, IFF_UP, 0); if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) lem_init(adapter); #ifdef INET if (!(if_getflags(ifp) & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: { int max_frame_size; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); EM_CORE_LOCK(adapter); switch (adapter->hw.mac.type) { case e1000_82542: max_frame_size = ETHER_MAX_LEN; break; default: max_frame_size = MAX_JUMBO_FRAME_SIZE; } if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { EM_CORE_UNLOCK(adapter); error = EINVAL; break; } if_setmtu(ifp, ifr->ifr_mtu); adapter->max_frame_size = if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN; - lem_init_locked(adapter); + if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING)) + lem_init_locked(adapter); EM_CORE_UNLOCK(adapter); break; } case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl rcv'd:\ SIOCSIFFLAGS (Set Interface Flags)"); EM_CORE_LOCK(adapter); if (if_getflags(ifp) & IFF_UP) { if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING)) { if ((if_getflags(ifp) ^ adapter->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { lem_disable_promisc(adapter); lem_set_promisc(adapter); } } else lem_init_locked(adapter); } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { EM_TX_LOCK(adapter); lem_stop(adapter); EM_TX_UNLOCK(adapter); } adapter->if_flags = if_getflags(ifp); EM_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { EM_CORE_LOCK(adapter); lem_disable_intr(adapter); lem_set_multi(adapter); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { lem_initialize_receive_unit(adapter); } #ifdef DEVICE_POLLING if (!(if_getcapenable(ifp) & IFCAP_POLLING)) #endif lem_enable_intr(adapter); EM_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: /* Check SOL/IDER usage */ EM_CORE_LOCK(adapter); if (e1000_check_reset_block(&adapter->hw)) { EM_CORE_UNLOCK(adapter); device_printf(adapter->dev, "Media change is" " blocked due to SOL/IDER session.\n"); break; } EM_CORE_UNLOCK(adapter); case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl rcv'd: \ SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { int mask, reinit; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); reinit = 0; mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(lem_poll, ifp); if (error) return (error); EM_CORE_LOCK(adapter); lem_disable_intr(adapter); if_setcapenablebit(ifp, IFCAP_POLLING, 0); EM_CORE_UNLOCK(adapter); } else { error = ether_poll_deregister(ifp); /* Enable interrupt even in error case */ EM_CORE_LOCK(adapter); lem_enable_intr(adapter); if_setcapenablebit(ifp, 0, IFCAP_POLLING); EM_CORE_UNLOCK(adapter); } } #endif if (mask & IFCAP_HWCSUM) { if_togglecapenable(ifp, IFCAP_HWCSUM); reinit = 1; } if (mask & IFCAP_VLAN_HWTAGGING) { if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING); reinit = 1; } if ((mask & IFCAP_WOL) && (if_getcapabilities(ifp) & IFCAP_WOL) != 0) { if (mask & IFCAP_WOL_MCAST) if_togglecapenable(ifp, IFCAP_WOL_MCAST); if (mask & IFCAP_WOL_MAGIC) if_togglecapenable(ifp, IFCAP_WOL_MAGIC); } if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING)) lem_init(adapter); if_vlancap(ifp); break; } default: error = ether_ioctl(ifp, command, data); break; } return (error); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void lem_init_locked(struct adapter *adapter) { if_t ifp = adapter->ifp; device_t dev = adapter->dev; u32 pba; INIT_DEBUGOUT("lem_init: begin"); EM_CORE_LOCK_ASSERT(adapter); EM_TX_LOCK(adapter); lem_stop(adapter); EM_TX_UNLOCK(adapter); /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. * * Devices before the 82547 had a Packet Buffer of 64K. * Default allocation: PBA=48K for Rx, leaving 16K for Tx. * After the 82547 the buffer was reduced to 40K. * Default allocation: PBA=30K for Rx, leaving 10K for Tx. * Note: default does not leave enough room for Jumbo Frame >10k. */ switch (adapter->hw.mac.type) { case e1000_82547: case e1000_82547_rev_2: /* 82547: Total Packet Buffer is 40K */ if (adapter->max_frame_size > 8192) pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */ else pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */ adapter->tx_fifo_head = 0; adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT; adapter->tx_fifo_size = (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT; break; default: /* Devices before 82547 had a Packet Buffer of 64K. */ if (adapter->max_frame_size > 8192) pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ else pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ } INIT_DEBUGOUT1("lem_init: pba=%dK",pba); E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); /* Get the latest mac address, User can use a LAA */ bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* Initialize the hardware */ if (lem_hardware_init(adapter)) { device_printf(dev, "Unable to initialize the hardware\n"); return; } lem_update_link_status(adapter); /* Setup VLAN support, basic and offload if available */ E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); /* Set hardware offload abilities */ if_clearhwassist(ifp); if (adapter->hw.mac.type >= e1000_82543) { if (if_getcapenable(ifp) & IFCAP_TXCSUM) if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0); } /* Configure for OS presence */ lem_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ lem_setup_transmit_structures(adapter); lem_initialize_transmit_unit(adapter); /* Setup Multicast table */ lem_set_multi(adapter); /* Prepare receive descriptors and buffers */ if (lem_setup_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); EM_TX_LOCK(adapter); lem_stop(adapter); EM_TX_UNLOCK(adapter); return; } lem_initialize_receive_unit(adapter); /* Use real VLAN Filter support? */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) { if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) /* Use real VLAN Filter support */ lem_setup_vlan_hw_support(adapter); else { u32 ctrl; ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= E1000_CTRL_VME; E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); } } /* Don't lose promiscuous settings */ lem_set_promisc(adapter); if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); callout_reset(&adapter->timer, hz, lem_local_timer, adapter); e1000_clear_hw_cntrs_base_generic(&adapter->hw); #ifdef DEVICE_POLLING /* * Only enable interrupts if we are not polling, make sure * they are off otherwise. */ if (if_getcapenable(ifp) & IFCAP_POLLING) lem_disable_intr(adapter); else #endif /* DEVICE_POLLING */ lem_enable_intr(adapter); /* AMT based hardware can now take control from firmware */ if (adapter->has_manage && adapter->has_amt) lem_get_hw_control(adapter); } static void lem_init(void *arg) { struct adapter *adapter = arg; EM_CORE_LOCK(adapter); lem_init_locked(adapter); EM_CORE_UNLOCK(adapter); } #ifdef DEVICE_POLLING /********************************************************************* * * Legacy polling routine * *********************************************************************/ static int lem_poll(if_t ifp, enum poll_cmd cmd, int count) { struct adapter *adapter = if_getsoftc(ifp); u32 reg_icr, rx_done = 0; EM_CORE_LOCK(adapter); if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { EM_CORE_UNLOCK(adapter); return (rx_done); } if (cmd == POLL_AND_CHECK_STATUS) { reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { callout_stop(&adapter->timer); adapter->hw.mac.get_link_status = 1; lem_update_link_status(adapter); callout_reset(&adapter->timer, hz, lem_local_timer, adapter); } } EM_CORE_UNLOCK(adapter); lem_rxeof(adapter, count, &rx_done); EM_TX_LOCK(adapter); lem_txeof(adapter); if(!if_sendq_empty(ifp)) lem_start_locked(ifp); EM_TX_UNLOCK(adapter); return (rx_done); } #endif /* DEVICE_POLLING */ /********************************************************************* * * Legacy Interrupt Service routine * *********************************************************************/ static void lem_intr(void *arg) { struct adapter *adapter = arg; if_t ifp = adapter->ifp; u32 reg_icr; if ((if_getcapenable(ifp) & IFCAP_POLLING) || ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)) return; EM_CORE_LOCK(adapter); reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; if ((reg_icr == 0xffffffff) || (reg_icr == 0)) { EM_CORE_UNLOCK(adapter); return; } if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { callout_stop(&adapter->timer); adapter->hw.mac.get_link_status = 1; lem_update_link_status(adapter); /* Deal with TX cruft when link lost */ lem_tx_purge(adapter); callout_reset(&adapter->timer, hz, lem_local_timer, adapter); EM_CORE_UNLOCK(adapter); return; } EM_CORE_UNLOCK(adapter); lem_rxeof(adapter, -1, NULL); EM_TX_LOCK(adapter); lem_txeof(adapter); if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) && (!if_sendq_empty(ifp))) lem_start_locked(ifp); EM_TX_UNLOCK(adapter); return; } static void lem_handle_link(void *context, int pending) { struct adapter *adapter = context; if_t ifp = adapter->ifp; if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) return; EM_CORE_LOCK(adapter); callout_stop(&adapter->timer); lem_update_link_status(adapter); /* Deal with TX cruft when link lost */ lem_tx_purge(adapter); callout_reset(&adapter->timer, hz, lem_local_timer, adapter); EM_CORE_UNLOCK(adapter); } /* Combined RX/TX handler, used by Legacy and MSI */ static void lem_handle_rxtx(void *context, int pending) { struct adapter *adapter = context; if_t ifp = adapter->ifp; if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { bool more = lem_rxeof(adapter, adapter->rx_process_limit, NULL); EM_TX_LOCK(adapter); lem_txeof(adapter); if(!if_sendq_empty(ifp)) lem_start_locked(ifp); EM_TX_UNLOCK(adapter); if (more) { taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); return; } } if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) lem_enable_intr(adapter); } /********************************************************************* * * Fast Legacy/MSI Combined Interrupt Service routine * *********************************************************************/ static int lem_irq_fast(void *arg) { struct adapter *adapter = arg; if_t ifp; u32 reg_icr; ifp = adapter->ifp; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; /* * Mask interrupts until the taskqueue is finished running. This is * cheap, just assume that it is needed. This also works around the * MSI message reordering errata on certain systems. */ lem_disable_intr(adapter); taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; taskqueue_enqueue(taskqueue_fast, &adapter->link_task); } if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; return FILTER_HANDLED; } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void lem_media_status(if_t ifp, struct ifmediareq *ifmr) { struct adapter *adapter = if_getsoftc(ifp); u_char fiber_type = IFM_1000_SX; INIT_DEBUGOUT("lem_media_status: begin"); EM_CORE_LOCK(adapter); lem_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { EM_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { if (adapter->hw.mac.type == e1000_82545) fiber_type = IFM_1000_LX; ifmr->ifm_active |= fiber_type | IFM_FDX; } else { switch (adapter->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; } if (adapter->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } EM_CORE_UNLOCK(adapter); } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int lem_media_change(if_t ifp) { struct adapter *adapter = if_getsoftc(ifp); struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("lem_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); EM_CORE_LOCK(adapter); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(adapter->dev, "Unsupported media type\n"); } lem_init_locked(adapter); EM_CORE_UNLOCK(adapter); return (0); } /********************************************************************* * * This routine maps the mbufs to tx descriptors. * * return 0 on success, positive on failure **********************************************************************/ static int lem_xmit(struct adapter *adapter, struct mbuf **m_headp) { bus_dma_segment_t segs[EM_MAX_SCATTER]; bus_dmamap_t map; struct em_buffer *tx_buffer, *tx_buffer_mapped; struct e1000_tx_desc *ctxd = NULL; struct mbuf *m_head; u32 txd_upper, txd_lower, txd_used, txd_saved; int error, nsegs, i, j, first, last = 0; m_head = *m_headp; txd_upper = txd_lower = txd_used = txd_saved = 0; /* ** When doing checksum offload, it is critical to ** make sure the first mbuf has more than header, ** because that routine expects data to be present. */ if ((m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) && (m_head->m_len < ETHER_HDR_LEN + sizeof(struct ip))) { m_head = m_pullup(m_head, ETHER_HDR_LEN + sizeof(struct ip)); *m_headp = m_head; if (m_head == NULL) return (ENOBUFS); } /* * Map the packet for DMA * * Capture the first descriptor index, * this descriptor will have the index * of the EOP which is the only one that * now gets a DONE bit writeback. */ first = adapter->next_avail_tx_desc; tx_buffer = &adapter->tx_buffer_area[first]; tx_buffer_mapped = tx_buffer; map = tx_buffer->map; error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); /* * There are two types of errors we can (try) to handle: * - EFBIG means the mbuf chain was too long and bus_dma ran * out of segments. Defragment the mbuf chain and try again. * - ENOMEM means bus_dma could not obtain enough bounce buffers * at this point in time. Defer sending and try again later. * All other errors, in particular EINVAL, are fatal and prevent the * mbuf chain from ever going through. Drop it and report error. */ if (error == EFBIG) { struct mbuf *m; m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER); if (m == NULL) { adapter->mbuf_defrag_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); } *m_headp = m; /* Try it again */ error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); if (error) { adapter->no_tx_dma_setup++; m_freem(*m_headp); *m_headp = NULL; return (error); } } else if (error != 0) { adapter->no_tx_dma_setup++; return (error); } if (adapter->num_tx_desc_avail < (nsegs + 2)) { adapter->no_tx_desc_avail2++; bus_dmamap_unload(adapter->txtag, map); return (ENOBUFS); } m_head = *m_headp; /* Do hardware assists */ if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) lem_transmit_checksum_setup(adapter, m_head, &txd_upper, &txd_lower); i = adapter->next_avail_tx_desc; if (adapter->pcix_82544) txd_saved = i; /* Set up our transmit descriptors */ for (j = 0; j < nsegs; j++) { bus_size_t seg_len; bus_addr_t seg_addr; /* If adapter is 82544 and on PCIX bus */ if(adapter->pcix_82544) { DESC_ARRAY desc_array; u32 array_elements, counter; /* * Check the Address and Length combination and * split the data accordingly */ array_elements = lem_fill_descriptors(segs[j].ds_addr, segs[j].ds_len, &desc_array); for (counter = 0; counter < array_elements; counter++) { if (txd_used == adapter->num_tx_desc_avail) { adapter->next_avail_tx_desc = txd_saved; adapter->no_tx_desc_avail2++; bus_dmamap_unload(adapter->txtag, map); return (ENOBUFS); } tx_buffer = &adapter->tx_buffer_area[i]; ctxd = &adapter->tx_desc_base[i]; ctxd->buffer_addr = htole64( desc_array.descriptor[counter].address); ctxd->lower.data = htole32( (adapter->txd_cmd | txd_lower | (u16) desc_array.descriptor[counter].length)); ctxd->upper.data = htole32((txd_upper)); last = i; if (++i == adapter->num_tx_desc) i = 0; tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; txd_used++; } } else { tx_buffer = &adapter->tx_buffer_area[i]; ctxd = &adapter->tx_desc_base[i]; seg_addr = segs[j].ds_addr; seg_len = segs[j].ds_len; ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32( adapter->txd_cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); last = i; if (++i == adapter->num_tx_desc) i = 0; tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; } } adapter->next_avail_tx_desc = i; if (adapter->pcix_82544) adapter->num_tx_desc_avail -= txd_used; else adapter->num_tx_desc_avail -= nsegs; if (m_head->m_flags & M_VLANTAG) { /* Set the vlan id. */ ctxd->upper.fields.special = htole16(m_head->m_pkthdr.ether_vtag); /* Tell hardware to add tag */ ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE); } tx_buffer->m_head = m_head; tx_buffer_mapped->map = tx_buffer->map; tx_buffer->map = map; bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE); /* * Last Descriptor of Packet * needs End Of Packet (EOP) * and Report Status (RS) */ ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); /* * Keep track in the first buffer which * descriptor will be written back */ tx_buffer = &adapter->tx_buffer_area[first]; tx_buffer->next_eop = last; adapter->watchdog_time = ticks; /* * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 * that this frame is available to transmit. */ bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); #ifdef NIC_PARAVIRT if (adapter->csb) { adapter->csb->guest_tdt = i; /* XXX memory barrier ? */ if (adapter->csb->guest_csb_on && !(adapter->csb->host_need_txkick & 1)) { /* XXX maybe useless * clean the ring. maybe do it before ? * maybe a little bit of histeresys ? */ if (adapter->num_tx_desc_avail <= 64) {// XXX lem_txeof(adapter); } return (0); } } #endif /* NIC_PARAVIRT */ #ifdef NIC_SEND_COMBINING if (adapter->sc_enable) { if (adapter->shadow_tdt & MIT_PENDING_INT) { /* signal intr and data pending */ adapter->shadow_tdt = MIT_PENDING_TDT | (i & 0xffff); return (0); } else { adapter->shadow_tdt = MIT_PENDING_INT; } } #endif /* NIC_SEND_COMBINING */ if (adapter->hw.mac.type == e1000_82547 && adapter->link_duplex == HALF_DUPLEX) lem_82547_move_tail(adapter); else { E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), i); if (adapter->hw.mac.type == e1000_82547) lem_82547_update_fifo_head(adapter, m_head->m_pkthdr.len); } return (0); } /********************************************************************* * * 82547 workaround to avoid controller hang in half-duplex environment. * The workaround is to avoid queuing a large packet that would span * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers * in this case. We do that only when FIFO is quiescent. * **********************************************************************/ static void lem_82547_move_tail(void *arg) { struct adapter *adapter = arg; struct e1000_tx_desc *tx_desc; u16 hw_tdt, sw_tdt, length = 0; bool eop = 0; EM_TX_LOCK_ASSERT(adapter); hw_tdt = E1000_READ_REG(&adapter->hw, E1000_TDT(0)); sw_tdt = adapter->next_avail_tx_desc; while (hw_tdt != sw_tdt) { tx_desc = &adapter->tx_desc_base[hw_tdt]; length += tx_desc->lower.flags.length; eop = tx_desc->lower.data & E1000_TXD_CMD_EOP; if (++hw_tdt == adapter->num_tx_desc) hw_tdt = 0; if (eop) { if (lem_82547_fifo_workaround(adapter, length)) { adapter->tx_fifo_wrk_cnt++; callout_reset(&adapter->tx_fifo_timer, 1, lem_82547_move_tail, adapter); break; } E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), hw_tdt); lem_82547_update_fifo_head(adapter, length); length = 0; } } } static int lem_82547_fifo_workaround(struct adapter *adapter, int len) { int fifo_space, fifo_pkt_len; fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR); if (adapter->link_duplex == HALF_DUPLEX) { fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head; if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) { if (lem_82547_tx_fifo_reset(adapter)) return (0); else return (1); } } return (0); } static void lem_82547_update_fifo_head(struct adapter *adapter, int len) { int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR); /* tx_fifo_head is always 16 byte aligned */ adapter->tx_fifo_head += fifo_pkt_len; if (adapter->tx_fifo_head >= adapter->tx_fifo_size) { adapter->tx_fifo_head -= adapter->tx_fifo_size; } } static int lem_82547_tx_fifo_reset(struct adapter *adapter) { u32 tctl; if ((E1000_READ_REG(&adapter->hw, E1000_TDT(0)) == E1000_READ_REG(&adapter->hw, E1000_TDH(0))) && (E1000_READ_REG(&adapter->hw, E1000_TDFT) == E1000_READ_REG(&adapter->hw, E1000_TDFH)) && (E1000_READ_REG(&adapter->hw, E1000_TDFTS) == E1000_READ_REG(&adapter->hw, E1000_TDFHS)) && (E1000_READ_REG(&adapter->hw, E1000_TDFPC) == 0)) { /* Disable TX unit */ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl & ~E1000_TCTL_EN); /* Reset FIFO pointers */ E1000_WRITE_REG(&adapter->hw, E1000_TDFT, adapter->tx_head_addr); E1000_WRITE_REG(&adapter->hw, E1000_TDFH, adapter->tx_head_addr); E1000_WRITE_REG(&adapter->hw, E1000_TDFTS, adapter->tx_head_addr); E1000_WRITE_REG(&adapter->hw, E1000_TDFHS, adapter->tx_head_addr); /* Re-enable TX unit */ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); E1000_WRITE_FLUSH(&adapter->hw); adapter->tx_fifo_head = 0; adapter->tx_fifo_reset_cnt++; return (TRUE); } else { return (FALSE); } } static void lem_set_promisc(struct adapter *adapter) { if_t ifp = adapter->ifp; u32 reg_rctl; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (if_getflags(ifp) & IFF_PROMISC) { reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); /* Turn this on if you want to see bad packets */ if (lem_debug_sbp) reg_rctl |= E1000_RCTL_SBP; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else if (if_getflags(ifp) & IFF_ALLMULTI) { reg_rctl |= E1000_RCTL_MPE; reg_rctl &= ~E1000_RCTL_UPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } } static void lem_disable_promisc(struct adapter *adapter) { if_t ifp = adapter->ifp; u32 reg_rctl; int mcnt = 0; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= (~E1000_RCTL_UPE); if (if_getflags(ifp) & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES); /* Don't disable if in MAX groups */ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg_rctl &= (~E1000_RCTL_MPE); reg_rctl &= (~E1000_RCTL_SBP); E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void lem_set_multi(struct adapter *adapter) { if_t ifp = adapter->ifp; u32 reg_rctl = 0; u8 *mta; /* Multicast array memory */ int mcnt = 0; IOCTL_DEBUGOUT("lem_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_clear_mwi(&adapter->hw); reg_rctl |= E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); } if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES); if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= ~E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_set_mwi(&adapter->hw); } } /********************************************************************* * Timer routine * * This routine checks for link status and updates statistics. * **********************************************************************/ static void lem_local_timer(void *arg) { struct adapter *adapter = arg; EM_CORE_LOCK_ASSERT(adapter); lem_update_link_status(adapter); lem_update_stats_counters(adapter); lem_smartspeed(adapter); #ifdef NIC_PARAVIRT /* recover space if needed */ if (adapter->csb && adapter->csb->guest_csb_on && (adapter->watchdog_check == TRUE) && (ticks - adapter->watchdog_time > EM_WATCHDOG) && (adapter->num_tx_desc_avail != adapter->num_tx_desc) ) { lem_txeof(adapter); /* * lem_txeof() normally (except when space in the queue * runs low XXX) cleans watchdog_check so that * we do not hung. */ } #endif /* NIC_PARAVIRT */ /* * We check the watchdog: the time since * the last TX descriptor was cleaned. * This implies a functional TX engine. */ if ((adapter->watchdog_check == TRUE) && (ticks - adapter->watchdog_time > EM_WATCHDOG)) goto hung; callout_reset(&adapter->timer, hz, lem_local_timer, adapter); return; hung: device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); if_setdrvflagbits(adapter->ifp, 0, IFF_DRV_RUNNING); adapter->watchdog_events++; lem_init_locked(adapter); } static void lem_update_link_status(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; if_t ifp = adapter->ifp; device_t dev = adapter->dev; u32 link_check = 0; /* Get the cached link value or read phy for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; if (link_check) /* ESB2 fix */ e1000_cfg_on_link_up(hw); } else link_check = TRUE; break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = adapter->hw.mac.serdes_has_link; break; default: case e1000_media_type_unknown: break; } /* Now check for a transition */ if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(hw, &adapter->link_speed, &adapter->link_duplex); if (bootverbose) device_printf(dev, "Link is up %d Mbps %s\n", adapter->link_speed, ((adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex")); adapter->link_active = 1; adapter->smartspeed = 0; if_setbaudrate(ifp, adapter->link_speed * 1000000); if_link_state_change(ifp, LINK_STATE_UP); } else if (!link_check && (adapter->link_active == 1)) { if_setbaudrate(ifp, 0); adapter->link_speed = 0; adapter->link_duplex = 0; if (bootverbose) device_printf(dev, "Link is Down\n"); adapter->link_active = 0; /* Link down, disable watchdog */ adapter->watchdog_check = FALSE; if_link_state_change(ifp, LINK_STATE_DOWN); } } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * * This routine should always be called with BOTH the CORE * and TX locks. **********************************************************************/ static void lem_stop(void *arg) { struct adapter *adapter = arg; if_t ifp = adapter->ifp; EM_CORE_LOCK_ASSERT(adapter); EM_TX_LOCK_ASSERT(adapter); INIT_DEBUGOUT("lem_stop: begin"); lem_disable_intr(adapter); callout_stop(&adapter->timer); callout_stop(&adapter->tx_fifo_timer); /* Tell the stack that the interface is no longer active */ if_setdrvflagbits(ifp, 0, (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)); e1000_reset_hw(&adapter->hw); if (adapter->hw.mac.type >= e1000_82544) E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void lem_identify_hardware(struct adapter *adapter) { device_t dev = adapter->dev; /* Make sure our PCI config space has the necessary stuff set */ pci_enable_busmaster(dev); adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Do Shared Code Init and Setup */ if (e1000_set_mac_type(&adapter->hw)) { device_printf(dev, "Setup init failure\n"); return; } } static int lem_allocate_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; int val, rid, error = E1000_SUCCESS; rid = PCIR_BAR(0); adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->memory == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->memory); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->memory); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; /* Only older adapters use IO mapping */ if (adapter->hw.mac.type > e1000_82543) { /* Figure our where our IO BAR is ? */ for (rid = PCIR_BAR(0); rid < PCIR_CIS;) { val = pci_read_config(dev, rid, 4); if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) { adapter->io_rid = rid; break; } rid += 4; /* check for 64bit BAR */ if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT) rid += 4; } if (rid >= PCIR_CIS) { device_printf(dev, "Unable to locate IO BAR\n"); return (ENXIO); } adapter->ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE); if (adapter->ioport == NULL) { device_printf(dev, "Unable to allocate bus resource: " "ioport\n"); return (ENXIO); } adapter->hw.io_base = 0; adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->ioport); adapter->osdep.io_bus_space_handle = rman_get_bushandle(adapter->ioport); } adapter->hw.back = &adapter->osdep; return (error); } /********************************************************************* * * Setup the Legacy or MSI Interrupt handler * **********************************************************************/ int lem_allocate_irq(struct adapter *adapter) { device_t dev = adapter->dev; int error, rid = 0; /* Manually turn off all interrupts */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); /* We allocate a single interrupt resource */ adapter->res[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res[0] == NULL) { device_printf(dev, "Unable to allocate bus resource: " "interrupt\n"); return (ENXIO); } /* Do Legacy setup? */ if (lem_use_legacy_irq) { if ((error = bus_setup_intr(dev, adapter->res[0], INTR_TYPE_NET | INTR_MPSAFE, NULL, lem_intr, adapter, &adapter->tag[0])) != 0) { device_printf(dev, "Failed to register interrupt handler"); return (error); } return (0); } /* * Use a Fast interrupt and the associated * deferred processing contexts. */ TASK_INIT(&adapter->rxtx_task, 0, lem_handle_rxtx, adapter); TASK_INIT(&adapter->link_task, 0, lem_handle_link, adapter); adapter->tq = taskqueue_create_fast("lem_taskq", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq", device_get_nameunit(adapter->dev)); if ((error = bus_setup_intr(dev, adapter->res[0], INTR_TYPE_NET, lem_irq_fast, NULL, adapter, &adapter->tag[0])) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); taskqueue_free(adapter->tq); adapter->tq = NULL; return (error); } return (0); } static void lem_free_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; if (adapter->tag[0] != NULL) { bus_teardown_intr(dev, adapter->res[0], adapter->tag[0]); adapter->tag[0] = NULL; } if (adapter->res[0] != NULL) { bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res[0]); } if (adapter->memory != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->memory); if (adapter->ioport != NULL) bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid, adapter->ioport); } /********************************************************************* * * Initialize the hardware to a configuration * as specified by the adapter structure. * **********************************************************************/ static int lem_hardware_init(struct adapter *adapter) { device_t dev = adapter->dev; u16 rx_buffer_size; INIT_DEBUGOUT("lem_hardware_init: begin"); /* Issue a global reset */ e1000_reset_hw(&adapter->hw); /* When hardware is reset, fifo_head is also reset */ adapter->tx_fifo_head = 0; /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. Here we use an arbitrary value of 1500 which will * restart after one full frame is pulled from the buffer. There * could be several smaller frames in the buffer and if so they will * not trigger the XON until their total number reduces the buffer * by 1500. * - The pause time is fairly large at 1000 x 512ns = 512 usec. */ rx_buffer_size = ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) << 10 ); adapter->hw.fc.high_water = rx_buffer_size - roundup2(adapter->max_frame_size, 1024); adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500; adapter->hw.fc.pause_time = EM_FC_PAUSE_TIME; adapter->hw.fc.send_xon = TRUE; /* Set Flow control, use the tunable location if sane */ if ((lem_fc_setting >= 0) && (lem_fc_setting < 4)) adapter->hw.fc.requested_mode = lem_fc_setting; else adapter->hw.fc.requested_mode = e1000_fc_none; if (e1000_init_hw(&adapter->hw) < 0) { device_printf(dev, "Hardware Initialization Failed\n"); return (EIO); } e1000_check_for_link(&adapter->hw); return (0); } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int lem_setup_interface(device_t dev, struct adapter *adapter) { if_t ifp; INIT_DEBUGOUT("lem_setup_interface: begin"); ifp = adapter->ifp = if_gethandle(IFT_ETHER); if (ifp == (void *)NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setinitfn(ifp, lem_init); if_setsoftc(ifp, adapter); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); if_setioctlfn(ifp, lem_ioctl); if_setstartfn(ifp, lem_start); if_setgetcounterfn(ifp, lem_get_counter); if_setsendqlen(ifp, adapter->num_tx_desc - 1); if_setsendqready(ifp); ether_ifattach(ifp, adapter->hw.mac.addr); if_setcapabilities(ifp, 0); if (adapter->hw.mac.type >= e1000_82543) { if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM, 0); if_setcapenablebit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM, 0); } /* * Tell the upper layer(s) we support long frames. */ if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU, 0); if_setcapenablebit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU, 0); /* ** Dont turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the em driver you can ** enable this and get full hardware tag filtering. */ if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0); #ifdef DEVICE_POLLING if_setcapabilitiesbit(ifp, IFCAP_POLLING, 0); #endif /* Enable only WOL MAGIC by default */ if (adapter->wol) { if_setcapabilitiesbit(ifp, IFCAP_WOL, 0); if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0); } /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, lem_media_change, lem_media_status); if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { u_char fiber_type = IFM_1000_SX; /* default type */ if (adapter->hw.mac.type == e1000_82545) fiber_type = IFM_1000_LX; ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL); } else { ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } /********************************************************************* * * Workaround for SmartSpeed on 82541 and 82547 controllers * **********************************************************************/ static void lem_smartspeed(struct adapter *adapter) { u16 phy_tmp; if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) || adapter->hw.mac.autoneg == 0 || (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0) return; if (adapter->smartspeed == 0) { /* If Master/Slave config fault is asserted twice, * we assume back-to-back */ e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT)) return; e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) { e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); if(phy_tmp & CR_1000T_MS_ENABLE) { phy_tmp &= ~CR_1000T_MS_ENABLE; e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); adapter->smartspeed++; if(adapter->hw.mac.autoneg && !e1000_copper_link_autoneg(&adapter->hw) && !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { phy_tmp |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); } } } return; } else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) { /* If still no link, perhaps using 2/3 pair cable */ e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); phy_tmp |= CR_1000T_MS_ENABLE; e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); if(adapter->hw.mac.autoneg && !e1000_copper_link_autoneg(&adapter->hw) && !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { phy_tmp |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); } } /* Restart process after EM_SMARTSPEED_MAX iterations */ if(adapter->smartspeed++ == EM_SMARTSPEED_MAX) adapter->smartspeed = 0; } /* * Manage DMA'able memory. */ static void lem_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs[0].ds_addr; } static int lem_dma_malloc(struct adapter *adapter, bus_size_t size, struct em_dma_alloc *dma, int mapflags) { int error; error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ EM_DBA_ALIGN, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->dma_tag); if (error) { device_printf(adapter->dev, "%s: bus_dma_tag_create failed: %d\n", __func__, error); goto fail_0; } error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); if (error) { device_printf(adapter->dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, error); goto fail_2; } dma->dma_paddr = 0; error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, lem_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (error || dma->dma_paddr == 0) { device_printf(adapter->dev, "%s: bus_dmamap_load failed: %d\n", __func__, error); goto fail_3; } return (0); fail_3: bus_dmamap_unload(dma->dma_tag, dma->dma_map); fail_2: bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); fail_0: dma->dma_tag = NULL; return (error); } static void lem_dma_free(struct adapter *adapter, struct em_dma_alloc *dma) { if (dma->dma_tag == NULL) return; if (dma->dma_paddr != 0) { bus_dmamap_sync(dma->dma_tag, dma->dma_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->dma_tag, dma->dma_map); dma->dma_paddr = 0; } if (dma->dma_vaddr != NULL) { bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); dma->dma_vaddr = NULL; } bus_dma_tag_destroy(dma->dma_tag); dma->dma_tag = NULL; } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. * **********************************************************************/ static int lem_allocate_transmit_structures(struct adapter *adapter) { device_t dev = adapter->dev; struct em_buffer *tx_buffer; int error; /* * Create DMA tags for tx descriptors */ if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES * EM_MAX_SCATTER, /* maxsize */ EM_MAX_SCATTER, /* nsegments */ MCLBYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &adapter->txtag)) != 0) { device_printf(dev, "Unable to allocate TX DMA tag\n"); goto fail; } adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); if (adapter->tx_buffer_area == NULL) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ for (int i = 0; i < adapter->num_tx_desc; i++) { tx_buffer = &adapter->tx_buffer_area[i]; error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map); if (error != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } tx_buffer->next_eop = -1; } return (0); fail: lem_free_transmit_structures(adapter); return (error); } /********************************************************************* * * (Re)Initialize transmit structures. * **********************************************************************/ static void lem_setup_transmit_structures(struct adapter *adapter) { struct em_buffer *tx_buffer; #ifdef DEV_NETMAP /* we are already locked */ struct netmap_adapter *na = netmap_getna(adapter->ifp); struct netmap_slot *slot = netmap_reset(na, NR_TX, 0, 0); #endif /* DEV_NETMAP */ /* Clear the old ring contents */ bzero(adapter->tx_desc_base, (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc); /* Free any existing TX buffers */ for (int i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { tx_buffer = &adapter->tx_buffer_area[i]; bus_dmamap_sync(adapter->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(adapter->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; #ifdef DEV_NETMAP if (slot) { /* the i-th NIC entry goes to slot si */ int si = netmap_idx_n2k(&na->tx_rings[0], i); uint64_t paddr; void *addr; addr = PNMB(na, slot + si, &paddr); adapter->tx_desc_base[i].buffer_addr = htole64(paddr); /* reload the map for netmap mode */ netmap_load_map(na, adapter->txtag, tx_buffer->map, addr); } #endif /* DEV_NETMAP */ tx_buffer->next_eop = -1; } /* Reset state */ adapter->last_hw_offload = 0; adapter->next_avail_tx_desc = 0; adapter->next_tx_to_clean = 0; adapter->num_tx_desc_avail = adapter->num_tx_desc; bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return; } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void lem_initialize_transmit_unit(struct adapter *adapter) { u32 tctl, tipg = 0; u64 bus_addr; INIT_DEBUGOUT("lem_initialize_transmit_unit: begin"); /* Setup the Base and Length of the Tx Descriptor Ring */ bus_addr = adapter->txdma.dma_paddr; E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(0), adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(0), (u32)(bus_addr >> 32)); E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(0), (u32)bus_addr); /* Setup the HW Tx Head and Tail descriptor pointers */ E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), 0); E1000_WRITE_REG(&adapter->hw, E1000_TDH(0), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, E1000_TDBAL(0)), E1000_READ_REG(&adapter->hw, E1000_TDLEN(0))); /* Set the default values for the Tx Inter Packet Gap timer */ switch (adapter->hw.mac.type) { case e1000_82542: tipg = DEFAULT_82542_TIPG_IPGT; tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; default: if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) tipg = DEFAULT_82543_TIPG_IPGT_FIBER; else tipg = DEFAULT_82543_TIPG_IPGT_COPPER; tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; } E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); if(adapter->hw.mac.type >= e1000_82540) E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value); /* Program the Transmit Control Register */ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); /* Setup Transmit Descriptor Base Settings */ adapter->txd_cmd = E1000_TXD_CMD_IFCS; if (adapter->tx_int_delay.value > 0) adapter->txd_cmd |= E1000_TXD_CMD_IDE; } /********************************************************************* * * Free all transmit related data structures. * **********************************************************************/ static void lem_free_transmit_structures(struct adapter *adapter) { struct em_buffer *tx_buffer; INIT_DEBUGOUT("free_transmit_structures: begin"); if (adapter->tx_buffer_area != NULL) { for (int i = 0; i < adapter->num_tx_desc; i++) { tx_buffer = &adapter->tx_buffer_area[i]; if (tx_buffer->m_head != NULL) { bus_dmamap_sync(adapter->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(adapter->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; } else if (tx_buffer->map != NULL) bus_dmamap_unload(adapter->txtag, tx_buffer->map); if (tx_buffer->map != NULL) { bus_dmamap_destroy(adapter->txtag, tx_buffer->map); tx_buffer->map = NULL; } } } if (adapter->tx_buffer_area != NULL) { free(adapter->tx_buffer_area, M_DEVBUF); adapter->tx_buffer_area = NULL; } if (adapter->txtag != NULL) { bus_dma_tag_destroy(adapter->txtag); adapter->txtag = NULL; } } /********************************************************************* * * The offload context needs to be set when we transfer the first * packet of a particular protocol (TCP/UDP). This routine has been * enhanced to deal with inserted VLAN headers, and IPV6 (not complete) * * Added back the old method of keeping the current context type * and not setting if unnecessary, as this is reported to be a * big performance win. -jfv **********************************************************************/ static void lem_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp, u32 *txd_upper, u32 *txd_lower) { struct e1000_context_desc *TXD = NULL; struct em_buffer *tx_buffer; struct ether_vlan_header *eh; struct ip *ip = NULL; struct ip6_hdr *ip6; int curr_txd, ehdrlen; u32 cmd, hdr_len, ip_hlen; u16 etype; u8 ipproto; cmd = hdr_len = ipproto = 0; *txd_upper = *txd_lower = 0; curr_txd = adapter->next_avail_tx_desc; /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } /* * We only support TCP/UDP for IPv4 and IPv6 for the moment. * TODO: Support SCTP too when it hits the tree. */ switch (etype) { case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); ip_hlen = ip->ip_hl << 2; /* Setup of IP header checksum. */ if (mp->m_pkthdr.csum_flags & CSUM_IP) { /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd]; TXD->lower_setup.ip_fields.ipcss = ehdrlen; TXD->lower_setup.ip_fields.ipcse = htole16(ehdrlen + ip_hlen); TXD->lower_setup.ip_fields.ipcso = ehdrlen + offsetof(struct ip, ip_sum); cmd |= E1000_TXD_CMD_IP; *txd_upper |= E1000_TXD_POPTS_IXSM << 8; } hdr_len = ehdrlen + ip_hlen; ipproto = ip->ip_p; break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */ /* IPv6 doesn't have a header checksum. */ hdr_len = ehdrlen + ip_hlen; ipproto = ip6->ip6_nxt; break; default: return; } switch (ipproto) { case IPPROTO_TCP: if (mp->m_pkthdr.csum_flags & CSUM_TCP) { *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; /* no need for context if already set */ if (adapter->last_hw_offload == CSUM_TCP) return; adapter->last_hw_offload = CSUM_TCP; /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd]; TXD->upper_setup.tcp_fields.tucss = hdr_len; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = hdr_len + offsetof(struct tcphdr, th_sum); cmd |= E1000_TXD_CMD_TCP; } break; case IPPROTO_UDP: { if (mp->m_pkthdr.csum_flags & CSUM_UDP) { *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; /* no need for context if already set */ if (adapter->last_hw_offload == CSUM_UDP) return; adapter->last_hw_offload = CSUM_UDP; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd]; TXD->upper_setup.tcp_fields.tucss = hdr_len; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = hdr_len + offsetof(struct udphdr, uh_sum); } /* Fall Thru */ } default: break; } if (TXD == NULL) return; TXD->tcp_seg_setup.data = htole32(0); TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd); tx_buffer = &adapter->tx_buffer_area[curr_txd]; tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; if (++curr_txd == adapter->num_tx_desc) curr_txd = 0; adapter->num_tx_desc_avail--; adapter->next_avail_tx_desc = curr_txd; } /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * **********************************************************************/ static void lem_txeof(struct adapter *adapter) { int first, last, done, num_avail; struct em_buffer *tx_buffer; struct e1000_tx_desc *tx_desc, *eop_desc; if_t ifp = adapter->ifp; EM_TX_LOCK_ASSERT(adapter); #ifdef DEV_NETMAP if (netmap_tx_irq(ifp, 0)) return; #endif /* DEV_NETMAP */ if (adapter->num_tx_desc_avail == adapter->num_tx_desc) return; num_avail = adapter->num_tx_desc_avail; first = adapter->next_tx_to_clean; tx_desc = &adapter->tx_desc_base[first]; tx_buffer = &adapter->tx_buffer_area[first]; last = tx_buffer->next_eop; eop_desc = &adapter->tx_desc_base[last]; /* * What this does is get the index of the * first descriptor AFTER the EOP of the * first packet, that way we can do the * simple comparison on the inner while loop. */ if (++last == adapter->num_tx_desc) last = 0; done = last; bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, BUS_DMASYNC_POSTREAD); while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) { /* We clean the range of the packet */ while (first != done) { tx_desc->upper.data = 0; tx_desc->lower.data = 0; tx_desc->buffer_addr = 0; ++num_avail; if (tx_buffer->m_head) { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); bus_dmamap_sync(adapter->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(adapter->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; } tx_buffer->next_eop = -1; adapter->watchdog_time = ticks; if (++first == adapter->num_tx_desc) first = 0; tx_buffer = &adapter->tx_buffer_area[first]; tx_desc = &adapter->tx_desc_base[first]; } /* See if we can continue to the next packet */ last = tx_buffer->next_eop; if (last != -1) { eop_desc = &adapter->tx_desc_base[last]; /* Get new done point */ if (++last == adapter->num_tx_desc) last = 0; done = last; } else break; } bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); adapter->next_tx_to_clean = first; adapter->num_tx_desc_avail = num_avail; #ifdef NIC_SEND_COMBINING if ((adapter->shadow_tdt & MIT_PENDING_TDT) == MIT_PENDING_TDT) { /* a tdt write is pending, do it */ E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), 0xffff & adapter->shadow_tdt); adapter->shadow_tdt = MIT_PENDING_INT; } else { adapter->shadow_tdt = 0; // disable } #endif /* NIC_SEND_COMBINING */ /* * If we have enough room, clear IFF_DRV_OACTIVE to * tell the stack that it is OK to send packets. * If there are no pending descriptors, clear the watchdog. */ if (adapter->num_tx_desc_avail > EM_TX_CLEANUP_THRESHOLD) { if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); #ifdef NIC_PARAVIRT if (adapter->csb) { // XXX also csb_on ? adapter->csb->guest_need_txkick = 2; /* acked */ // XXX memory barrier } #endif /* NIC_PARAVIRT */ if (adapter->num_tx_desc_avail == adapter->num_tx_desc) { adapter->watchdog_check = FALSE; return; } } } /********************************************************************* * * When Link is lost sometimes there is work still in the TX ring * which may result in a watchdog, rather than allow that we do an * attempted cleanup and then reinit here. Note that this has been * seens mostly with fiber adapters. * **********************************************************************/ static void lem_tx_purge(struct adapter *adapter) { if ((!adapter->link_active) && (adapter->watchdog_check)) { EM_TX_LOCK(adapter); lem_txeof(adapter); EM_TX_UNLOCK(adapter); if (adapter->watchdog_check) /* Still outstanding? */ lem_init_locked(adapter); } } /********************************************************************* * * Get a buffer from system mbuf buffer pool. * **********************************************************************/ static int lem_get_buf(struct adapter *adapter, int i) { struct mbuf *m; bus_dma_segment_t segs[1]; bus_dmamap_t map; struct em_buffer *rx_buffer; int error, nsegs; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) { adapter->mbuf_cluster_failed++; return (ENOBUFS); } m->m_len = m->m_pkthdr.len = MCLBYTES; if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) m_adj(m, ETHER_ALIGN); /* * Using memory from the mbuf cluster pool, invoke the * bus_dma machinery to arrange the memory mapping. */ error = bus_dmamap_load_mbuf_sg(adapter->rxtag, adapter->rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { m_free(m); return (error); } /* If nsegs is wrong then the stack is corrupt. */ KASSERT(nsegs == 1, ("Too many segments returned!")); rx_buffer = &adapter->rx_buffer_area[i]; if (rx_buffer->m_head != NULL) bus_dmamap_unload(adapter->rxtag, rx_buffer->map); map = rx_buffer->map; rx_buffer->map = adapter->rx_sparemap; adapter->rx_sparemap = map; bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD); rx_buffer->m_head = m; adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr); return (0); } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int lem_allocate_receive_structures(struct adapter *adapter) { device_t dev = adapter->dev; struct em_buffer *rx_buffer; int i, error; adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); if (adapter->rx_buffer_area == NULL) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); return (ENOMEM); } error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES, /* maxsize */ 1, /* nsegments */ MCLBYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &adapter->rxtag); if (error) { device_printf(dev, "%s: bus_dma_tag_create failed %d\n", __func__, error); goto fail; } /* Create the spare map (used by getbuf) */ error = bus_dmamap_create(adapter->rxtag, 0, &adapter->rx_sparemap); if (error) { device_printf(dev, "%s: bus_dmamap_create failed: %d\n", __func__, error); goto fail; } rx_buffer = adapter->rx_buffer_area; for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { error = bus_dmamap_create(adapter->rxtag, 0, &rx_buffer->map); if (error) { device_printf(dev, "%s: bus_dmamap_create failed: %d\n", __func__, error); goto fail; } } return (0); fail: lem_free_receive_structures(adapter); return (error); } /********************************************************************* * * (Re)initialize receive structures. * **********************************************************************/ static int lem_setup_receive_structures(struct adapter *adapter) { struct em_buffer *rx_buffer; int i, error; #ifdef DEV_NETMAP /* we are already under lock */ struct netmap_adapter *na = netmap_getna(adapter->ifp); struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0); #endif /* Reset descriptor ring */ bzero(adapter->rx_desc_base, (sizeof(struct e1000_rx_desc)) * adapter->num_rx_desc); /* Free current RX buffers. */ rx_buffer = adapter->rx_buffer_area; for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { if (rx_buffer->m_head != NULL) { bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(adapter->rxtag, rx_buffer->map); m_freem(rx_buffer->m_head); rx_buffer->m_head = NULL; } } /* Allocate new ones. */ for (i = 0; i < adapter->num_rx_desc; i++) { #ifdef DEV_NETMAP if (slot) { /* the i-th NIC entry goes to slot si */ int si = netmap_idx_n2k(&na->rx_rings[0], i); uint64_t paddr; void *addr; addr = PNMB(na, slot + si, &paddr); netmap_load_map(na, adapter->rxtag, rx_buffer->map, addr); /* Update descriptor */ adapter->rx_desc_base[i].buffer_addr = htole64(paddr); continue; } #endif /* DEV_NETMAP */ error = lem_get_buf(adapter, i); if (error) return (error); } /* Setup our descriptor pointers */ adapter->next_rx_desc_to_check = 0; bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void lem_initialize_receive_unit(struct adapter *adapter) { if_t ifp = adapter->ifp; u64 bus_addr; u32 rctl, rxcsum; INIT_DEBUGOUT("lem_initialize_receive_unit: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); if (adapter->hw.mac.type >= e1000_82540) { E1000_WRITE_REG(&adapter->hw, E1000_RADV, adapter->rx_abs_int_delay.value); /* * Set the interrupt throttling rate. Value is calculated * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */ E1000_WRITE_REG(&adapter->hw, E1000_ITR, DEFAULT_ITR); } /* Setup the Base and Length of the Rx Descriptor Ring */ bus_addr = adapter->rxdma.dma_paddr; E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(0), adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(0), (u32)(bus_addr >> 32)); E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(0), (u32)bus_addr); /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; if (e1000_tbi_sbp_enabled_82543(&adapter->hw)) rctl |= E1000_RCTL_SBP; else rctl &= ~E1000_RCTL_SBP; switch (adapter->rx_buffer_len) { default: case 2048: rctl |= E1000_RCTL_SZ_2048; break; case 4096: rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX | E1000_RCTL_LPE; break; case 8192: rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX | E1000_RCTL_LPE; break; case 16384: rctl |= E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX | E1000_RCTL_LPE; break; } if (if_getmtu(ifp) > ETHERMTU) rctl |= E1000_RCTL_LPE; else rctl &= ~E1000_RCTL_LPE; /* Enable 82543 Receive Checksum Offload for TCP and UDP */ if ((adapter->hw.mac.type >= e1000_82543) && (if_getcapenable(ifp) & IFCAP_RXCSUM)) { rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM); rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL); E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum); } /* Enable Receives */ E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); /* * Setup the HW Rx Head and * Tail Descriptor Pointers */ E1000_WRITE_REG(&adapter->hw, E1000_RDH(0), 0); rctl = adapter->num_rx_desc - 1; /* default RDT value */ #ifdef DEV_NETMAP /* preserve buffers already made available to clients */ if (if_getcapenable(ifp) & IFCAP_NETMAP) { struct netmap_adapter *na = netmap_getna(adapter->ifp); rctl -= nm_kr_rxspace(&na->rx_rings[0]); } #endif /* DEV_NETMAP */ E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), rctl); return; } /********************************************************************* * * Free receive related data structures. * **********************************************************************/ static void lem_free_receive_structures(struct adapter *adapter) { struct em_buffer *rx_buffer; int i; INIT_DEBUGOUT("free_receive_structures: begin"); if (adapter->rx_sparemap) { bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap); adapter->rx_sparemap = NULL; } /* Cleanup any existing buffers */ if (adapter->rx_buffer_area != NULL) { rx_buffer = adapter->rx_buffer_area; for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { if (rx_buffer->m_head != NULL) { bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(adapter->rxtag, rx_buffer->map); m_freem(rx_buffer->m_head); rx_buffer->m_head = NULL; } else if (rx_buffer->map != NULL) bus_dmamap_unload(adapter->rxtag, rx_buffer->map); if (rx_buffer->map != NULL) { bus_dmamap_destroy(adapter->rxtag, rx_buffer->map); rx_buffer->map = NULL; } } } if (adapter->rx_buffer_area != NULL) { free(adapter->rx_buffer_area, M_DEVBUF); adapter->rx_buffer_area = NULL; } if (adapter->rxtag != NULL) { bus_dma_tag_destroy(adapter->rxtag); adapter->rxtag = NULL; } } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * * For polling we also now return the number of cleaned packets *********************************************************************/ static bool lem_rxeof(struct adapter *adapter, int count, int *done) { if_t ifp = adapter->ifp; struct mbuf *mp; u8 status = 0, accept_frame = 0, eop = 0; u16 len, desc_len, prev_len_adj; int i, rx_sent = 0; struct e1000_rx_desc *current_desc; #ifdef BATCH_DISPATCH struct mbuf *mh = NULL, *mt = NULL; #endif /* BATCH_DISPATCH */ #ifdef NIC_PARAVIRT int retries = 0; struct paravirt_csb* csb = adapter->csb; int csb_mode = csb && csb->guest_csb_on; //ND("clear guest_rxkick at %d", adapter->next_rx_desc_to_check); if (csb_mode && csb->guest_need_rxkick) csb->guest_need_rxkick = 0; #endif /* NIC_PARAVIRT */ EM_RX_LOCK(adapter); #ifdef BATCH_DISPATCH batch_again: #endif /* BATCH_DISPATCH */ i = adapter->next_rx_desc_to_check; current_desc = &adapter->rx_desc_base[i]; bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, BUS_DMASYNC_POSTREAD); #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, 0, &rx_sent)) { EM_RX_UNLOCK(adapter); return (FALSE); } #endif /* DEV_NETMAP */ #if 1 // XXX optimization ? if (!((current_desc->status) & E1000_RXD_STAT_DD)) { if (done != NULL) *done = rx_sent; EM_RX_UNLOCK(adapter); return (FALSE); } #endif /* 0 */ while (count != 0 && if_getdrvflags(ifp) & IFF_DRV_RUNNING) { struct mbuf *m = NULL; status = current_desc->status; if ((status & E1000_RXD_STAT_DD) == 0) { #ifdef NIC_PARAVIRT if (csb_mode) { /* buffer not ready yet. Retry a few times before giving up */ if (++retries <= adapter->rx_retries) { continue; } if (csb->guest_need_rxkick == 0) { // ND("set guest_rxkick at %d", adapter->next_rx_desc_to_check); csb->guest_need_rxkick = 1; // XXX memory barrier, status volatile ? continue; /* double check */ } } /* no buffer ready, give up */ #endif /* NIC_PARAVIRT */ break; } #ifdef NIC_PARAVIRT if (csb_mode) { if (csb->guest_need_rxkick) // ND("clear again guest_rxkick at %d", adapter->next_rx_desc_to_check); csb->guest_need_rxkick = 0; retries = 0; } #endif /* NIC_PARAVIRT */ mp = adapter->rx_buffer_area[i].m_head; /* * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT * needs to access the last received byte in the mbuf. */ bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map, BUS_DMASYNC_POSTREAD); accept_frame = 1; prev_len_adj = 0; desc_len = le16toh(current_desc->length); if (status & E1000_RXD_STAT_EOP) { count--; eop = 1; if (desc_len < ETHER_CRC_LEN) { len = 0; prev_len_adj = ETHER_CRC_LEN - desc_len; } else len = desc_len - ETHER_CRC_LEN; } else { eop = 0; len = desc_len; } if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) { u8 last_byte; u32 pkt_len = desc_len; if (adapter->fmp != NULL) pkt_len += adapter->fmp->m_pkthdr.len; last_byte = *(mtod(mp, caddr_t) + desc_len - 1); if (TBI_ACCEPT(&adapter->hw, status, current_desc->errors, pkt_len, last_byte, adapter->min_frame_size, adapter->max_frame_size)) { e1000_tbi_adjust_stats_82543(&adapter->hw, &adapter->stats, pkt_len, adapter->hw.mac.addr, adapter->max_frame_size); if (len > 0) len--; } else accept_frame = 0; } if (accept_frame) { if (lem_get_buf(adapter, i) != 0) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); goto discard; } /* Assign correct length to the current fragment */ mp->m_len = len; if (adapter->fmp == NULL) { mp->m_pkthdr.len = len; adapter->fmp = mp; /* Store the first mbuf */ adapter->lmp = mp; } else { /* Chain mbuf's together */ mp->m_flags &= ~M_PKTHDR; /* * Adjust length of previous mbuf in chain if * we received less than 4 bytes in the last * descriptor. */ if (prev_len_adj > 0) { adapter->lmp->m_len -= prev_len_adj; adapter->fmp->m_pkthdr.len -= prev_len_adj; } adapter->lmp->m_next = mp; adapter->lmp = adapter->lmp->m_next; adapter->fmp->m_pkthdr.len += len; } if (eop) { if_setrcvif(adapter->fmp, ifp); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); lem_receive_checksum(adapter, current_desc, adapter->fmp); #ifndef __NO_STRICT_ALIGNMENT if (adapter->max_frame_size > (MCLBYTES - ETHER_ALIGN) && lem_fixup_rx(adapter) != 0) goto skip; #endif if (status & E1000_RXD_STAT_VP) { adapter->fmp->m_pkthdr.ether_vtag = le16toh(current_desc->special); adapter->fmp->m_flags |= M_VLANTAG; } #ifndef __NO_STRICT_ALIGNMENT skip: #endif m = adapter->fmp; adapter->fmp = NULL; adapter->lmp = NULL; } } else { adapter->dropped_pkts++; discard: /* Reuse loaded DMA map and just update mbuf chain */ mp = adapter->rx_buffer_area[i].m_head; mp->m_len = mp->m_pkthdr.len = MCLBYTES; mp->m_data = mp->m_ext.ext_buf; mp->m_next = NULL; if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) m_adj(mp, ETHER_ALIGN); if (adapter->fmp != NULL) { m_freem(adapter->fmp); adapter->fmp = NULL; adapter->lmp = NULL; } m = NULL; } /* Zero out the receive descriptors status. */ current_desc->status = 0; bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); #ifdef NIC_PARAVIRT if (csb_mode) { /* the buffer at i has been already replaced by lem_get_buf() * so it is safe to set guest_rdt = i and possibly send a kick. * XXX see if we can optimize it later. */ csb->guest_rdt = i; // XXX memory barrier if (i == csb->host_rxkick_at) E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i); } #endif /* NIC_PARAVIRT */ /* Advance our pointers to the next descriptor. */ if (++i == adapter->num_rx_desc) i = 0; /* Call into the stack */ if (m != NULL) { #ifdef BATCH_DISPATCH if (adapter->batch_enable) { if (mh == NULL) mh = mt = m; else mt->m_nextpkt = m; mt = m; m->m_nextpkt = NULL; rx_sent++; current_desc = &adapter->rx_desc_base[i]; continue; } #endif /* BATCH_DISPATCH */ adapter->next_rx_desc_to_check = i; EM_RX_UNLOCK(adapter); if_input(ifp, m); EM_RX_LOCK(adapter); rx_sent++; i = adapter->next_rx_desc_to_check; } current_desc = &adapter->rx_desc_base[i]; } adapter->next_rx_desc_to_check = i; #ifdef BATCH_DISPATCH if (mh) { EM_RX_UNLOCK(adapter); while ( (mt = mh) != NULL) { mh = mh->m_nextpkt; mt->m_nextpkt = NULL; if_input(ifp, mt); } EM_RX_LOCK(adapter); i = adapter->next_rx_desc_to_check; /* in case of interrupts */ if (count > 0) goto batch_again; } #endif /* BATCH_DISPATCH */ /* Advance the E1000's Receive Queue #0 "Tail Pointer". */ if (--i < 0) i = adapter->num_rx_desc - 1; #ifdef NIC_PARAVIRT if (!csb_mode) /* filter out writes */ #endif /* NIC_PARAVIRT */ E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i); if (done != NULL) *done = rx_sent; EM_RX_UNLOCK(adapter); return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE); } #ifndef __NO_STRICT_ALIGNMENT /* * When jumbo frames are enabled we should realign entire payload on * architecures with strict alignment. This is serious design mistake of 8254x * as it nullifies DMA operations. 8254x just allows RX buffer size to be * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its * payload. On architecures without strict alignment restrictions 8254x still * performs unaligned memory access which would reduce the performance too. * To avoid copying over an entire frame to align, we allocate a new mbuf and * copy ethernet header to the new mbuf. The new mbuf is prepended into the * existing mbuf chain. * * Be aware, best performance of the 8254x is achieved only when jumbo frame is * not used at all on architectures with strict alignment. */ static int lem_fixup_rx(struct adapter *adapter) { struct mbuf *m, *n; int error; error = 0; m = adapter->fmp; if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); m->m_data += ETHER_HDR_LEN; } else { MGETHDR(n, M_NOWAIT, MT_DATA); if (n != NULL) { bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); m->m_data += ETHER_HDR_LEN; m->m_len -= ETHER_HDR_LEN; n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; adapter->fmp = n; } else { adapter->dropped_pkts++; m_freem(adapter->fmp); adapter->fmp = NULL; error = ENOMEM; } } return (error); } #endif /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void lem_receive_checksum(struct adapter *adapter, struct e1000_rx_desc *rx_desc, struct mbuf *mp) { /* 82543 or newer only */ if ((adapter->hw.mac.type < e1000_82543) || /* Ignore Checksum bit is set */ (rx_desc->status & E1000_RXD_STAT_IXSM)) { mp->m_pkthdr.csum_flags = 0; return; } if (rx_desc->status & E1000_RXD_STAT_IPCS) { /* Did it pass? */ if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) { /* IP Checksum Good */ mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; } else { mp->m_pkthdr.csum_flags = 0; } } if (rx_desc->status & E1000_RXD_STAT_TCPCS) { /* Did it pass? */ if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) { mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); mp->m_pkthdr.csum_data = htons(0xffff); } } } /* * This routine is run via an vlan * config EVENT */ static void lem_register_vlan(void *arg, if_t ifp, u16 vtag) { struct adapter *adapter = if_getsoftc(ifp); u32 index, bit; if (if_getsoftc(ifp) != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */ return; EM_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) lem_init_locked(adapter); EM_CORE_UNLOCK(adapter); } /* * This routine is run via an vlan * unconfig EVENT */ static void lem_unregister_vlan(void *arg, if_t ifp, u16 vtag) { struct adapter *adapter = if_getsoftc(ifp); u32 index, bit; if (if_getsoftc(ifp) != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; EM_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) lem_init_locked(adapter); EM_CORE_UNLOCK(adapter); } static void lem_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 reg; /* ** We get here thru init_locked, meaning ** a soft reset, this has already cleared ** the VFTA and other state, so if there ** have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* ** A soft reset zero's out the VFTA, so ** we need to repopulate it now. */ for (int i = 0; i < EM_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, i, adapter->shadow_vfta[i]); reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Enable the Filter Table */ reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } static void lem_enable_intr(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 ims_mask = IMS_ENABLE_MASK; E1000_WRITE_REG(hw, E1000_IMS, ims_mask); } static void lem_disable_intr(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka * to disable special hardware management features */ static void lem_init_manageability(struct adapter *adapter) { /* A shared code workaround */ if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void lem_release_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * lem_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means * that the driver is loaded. For AMT version type f/w * this means that the network i/f is open. */ static void lem_get_hw_control(struct adapter *adapter) { u32 ctrl_ext; ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); return; } /* * lem_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is no longer loaded. For AMT versions of the * f/w this means that the network i/f is closed. */ static void lem_release_hw_control(struct adapter *adapter) { u32 ctrl_ext; if (!adapter->has_manage) return; ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); return; } static int lem_is_valid_ether_addr(u8 *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (FALSE); } return (TRUE); } /* ** Parse the interface capabilities with regard ** to both system management and wake-on-lan for ** later use. */ static void lem_get_wakeup(device_t dev) { struct adapter *adapter = device_get_softc(dev); u16 eeprom_data = 0, device_id, apme_mask; adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); apme_mask = EM_EEPROM_APME; switch (adapter->hw.mac.type) { case e1000_82542: case e1000_82543: break; case e1000_82544: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL2_REG, 1, &eeprom_data); apme_mask = EM_82544_APME; break; case e1000_82546: case e1000_82546_rev_3: if (adapter->hw.bus.func == 1) { e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; default: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; } if (eeprom_data & apme_mask) adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); /* * We have the eeprom settings, now apply the special cases * where the eeprom may be wrong or the board won't support * wake on lan on a particular port */ device_id = pci_get_device(dev); switch (device_id) { case E1000_DEV_ID_82546GB_PCIE: adapter->wol = 0; break; case E1000_DEV_ID_82546EB_FIBER: case E1000_DEV_ID_82546GB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->wol = 0; break; case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; } return; } /* * Enable PCI Wake On Lan capability */ static void lem_enable_wakeup(device_t dev) { struct adapter *adapter = device_get_softc(dev); if_t ifp = adapter->ifp; u32 pmc, ctrl, ctrl_ext, rctl; u16 status; if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0)) return; /* Advertise the wakeup capability */ ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); /* Keep the laser running on Fiber adapters */ if (adapter->hw.phy.media_type == e1000_media_type_fiber || adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); } /* ** Determine type of Wakeup: note that wol ** is set with all bits on by default. */ if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0) adapter->wol &= ~E1000_WUFC_MAG; if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0) adapter->wol &= ~E1000_WUFC_MC; else { rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); } if (adapter->hw.mac.type == e1000_pchlan) { if (lem_enable_phy_wakeup(adapter)) return; } else { E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); } /* Request PME */ status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if (if_getcapenable(ifp) & IFCAP_WOL) status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); return; } /* ** WOL in the newer chipset interfaces (pchlan) ** require thing to be copied into the phy */ static int lem_enable_phy_wakeup(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mreg, ret = 0; u16 preg; /* copy MAC RARs to PHY RARs */ for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) { mreg = E1000_READ_REG(hw, E1000_RAL(i)); e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF)); e1000_write_phy_reg(hw, BM_RAR_M(i), (u16)((mreg >> 16) & 0xFFFF)); mreg = E1000_READ_REG(hw, E1000_RAH(i)); e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF)); e1000_write_phy_reg(hw, BM_RAR_CTRL(i), (u16)((mreg >> 16) & 0xFFFF)); } /* copy MAC MTA to PHY MTA */ for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); e1000_write_phy_reg(hw, BM_MTA(i) + 1, (u16)((mreg >> 16) & 0xFFFF)); } /* configure PHY Rx Control register */ e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); mreg = E1000_READ_REG(hw, E1000_RCTL); if (mreg & E1000_RCTL_UPE) preg |= BM_RCTL_UPE; if (mreg & E1000_RCTL_MPE) preg |= BM_RCTL_MPE; preg &= ~(BM_RCTL_MO_MASK); if (mreg & E1000_RCTL_MO_3) preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) << BM_RCTL_MO_SHIFT); if (mreg & E1000_RCTL_BAM) preg |= BM_RCTL_BAM; if (mreg & E1000_RCTL_PMCF) preg |= BM_RCTL_PMCF; mreg = E1000_READ_REG(hw, E1000_CTRL); if (mreg & E1000_CTRL_RFCE) preg |= BM_RCTL_RFCE; e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); /* enable PHY wakeup in MAC register */ E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN); E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); /* configure and enable PHY wakeup in PHY registers */ e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); /* activate PHY wakeup */ ret = hw->phy.ops.acquire(hw); if (ret) { printf("Could not acquire PHY\n"); return ret; } e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); if (ret) { printf("Could not read PHY page 769\n"); goto out; } preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); if (ret) printf("Could not set PHY Host Wakeup bit\n"); out: hw->phy.ops.release(hw); return ret; } static void lem_led_func(void *arg, int onoff) { struct adapter *adapter = arg; EM_CORE_LOCK(adapter); if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); } else { e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } EM_CORE_UNLOCK(adapter); } /********************************************************************* * 82544 Coexistence issue workaround. * There are 2 issues. * 1. Transmit Hang issue. * To detect this issue, following equation can be used... * SIZE[3:0] + ADDR[2:0] = SUM[3:0]. * If SUM[3:0] is in between 1 to 4, we will have this issue. * * 2. DAC issue. * To detect this issue, following equation can be used... * SIZE[3:0] + ADDR[2:0] = SUM[3:0]. * If SUM[3:0] is in between 9 to c, we will have this issue. * * * WORKAROUND: * Make sure we do not have ending address * as 1,2,3,4(Hang) or 9,a,b,c (DAC) * *************************************************************************/ static u32 lem_fill_descriptors (bus_addr_t address, u32 length, PDESC_ARRAY desc_array) { u32 safe_terminator; /* Since issue is sensitive to length and address.*/ /* Let us first check the address...*/ if (length <= 4) { desc_array->descriptor[0].address = address; desc_array->descriptor[0].length = length; desc_array->elements = 1; return (desc_array->elements); } safe_terminator = (u32)((((u32)address & 0x7) + (length & 0xF)) & 0xF); /* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */ if (safe_terminator == 0 || (safe_terminator > 4 && safe_terminator < 9) || (safe_terminator > 0xC && safe_terminator <= 0xF)) { desc_array->descriptor[0].address = address; desc_array->descriptor[0].length = length; desc_array->elements = 1; return (desc_array->elements); } desc_array->descriptor[0].address = address; desc_array->descriptor[0].length = length - 4; desc_array->descriptor[1].address = address + (length - 4); desc_array->descriptor[1].length = 4; desc_array->elements = 2; return (desc_array->elements); } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void lem_update_stats_counters(struct adapter *adapter) { if(adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); } adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32); adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32); adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); if (adapter->hw.mac.type >= e1000_82543) { adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, E1000_RXERRC); adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, E1000_TNCRS); adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, E1000_CEXTERR); adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, E1000_TSCTC); adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, E1000_TSCTFC); } } static uint64_t lem_get_counter(if_t ifp, ift_counter cnt) { struct adapter *adapter; adapter = if_getsoftc(ifp); switch (cnt) { case IFCOUNTER_COLLISIONS: return (adapter->stats.colc); case IFCOUNTER_IERRORS: return (adapter->dropped_pkts + adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc + adapter->stats.mpc + adapter->stats.cexterr); case IFCOUNTER_OERRORS: return (adapter->stats.ecol + adapter->stats.latecol + adapter->watchdog_events); default: return (if_get_counter_default(ifp, cnt)); } } /* Export a single 32-bit register via a read-only sysctl. */ static int lem_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; u_int val; adapter = oidp->oid_arg1; val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* * Add sysctl variables, one per statistic, to the system. */ static void lem_add_hw_stats(struct adapter *adapter) { device_t dev = adapter->dev; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = &adapter->stats; struct sysctl_oid *stat_node; struct sysctl_oid_list *stat_list; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", CTLFLAG_RD, &adapter->mbuf_cluster_failed, "Std mbuf cluster failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", CTLFLAG_RD, &adapter->mbuf_defrag_failed, "Defragmenting mbuf chain failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_desc_fail1", CTLFLAG_RD, &adapter->no_tx_desc_avail1, "Not enough tx descriptors failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_desc_fail2", CTLFLAG_RD, &adapter->no_tx_desc_avail2, "Not enough tx descriptors failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &adapter->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL, lem_sysctl_reg_handler, "IU", "Device Control Register"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL, lem_sysctl_reg_handler, "IU", "Receiver Control Register"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "fifo_workaround", CTLFLAG_RD, &adapter->tx_fifo_wrk_cnt, "TX FIFO workaround events"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "fifo_reset", CTLFLAG_RD, &adapter->tx_fifo_reset_cnt, "TX FIFO resets"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(0), lem_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(0), lem_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(0), lem_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(0), lem_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &adapter->stats.symerrs, "Symbol Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &adapter->stats.sec, "Sequence Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &adapter->stats.dc, "Defer Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &adapter->stats.mpc, "Missed Packets"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &adapter->stats.rnbc, "Receive No Buffers"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &adapter->stats.ruc, "Receive Undersize"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &adapter->stats.rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &adapter->stats.roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &adapter->stats.rjc, "Recevied Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &adapter->stats.rxerrc, "Receive Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &adapter->stats.crcerrs, "CRC errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &adapter->stats.algnerrc, "Alignment Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &adapter->stats.cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &adapter->stats.xonrxc, "XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &adapter->stats.xontxc, "XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &adapter->stats.xoffrxc, "XOFF Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &adapter->stats.xofftxc, "XOFF Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &adapter->stats.tpr, "Total Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &adapter->stats.gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &adapter->stats.prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &adapter->stats.prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &adapter->stats.prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &adapter->stats.prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &adapter->stats.prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &adapter->stats.gorc, "Good Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &adapter->stats.gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &adapter->stats.tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &adapter->stats.gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &adapter->stats.bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &adapter->stats.mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &adapter->stats.ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &adapter->stats.ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &adapter->stats.ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &adapter->stats.ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &adapter->stats.ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &adapter->stats.tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &adapter->stats.tsctfc, "TSO Contexts Failed"); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int lem_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) { adapter = (struct adapter *)arg1; lem_print_nvm_info(adapter); } return (error); } static void lem_print_nvm_info(struct adapter *adapter) { u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } printf("\n"); } static int lem_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { struct em_int_delay_info *info; struct adapter *adapter; u32 regval; int error; int usecs; int ticks; info = (struct em_int_delay_info *)arg1; usecs = info->value; error = sysctl_handle_int(oidp, &usecs, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) return (EINVAL); info->value = usecs; ticks = EM_USECS_TO_TICKS(usecs); if (info->offset == E1000_ITR) /* units are 256ns here */ ticks *= 4; adapter = info->adapter; EM_CORE_LOCK(adapter); regval = E1000_READ_OFFSET(&adapter->hw, info->offset); regval = (regval & ~0xffff) | (ticks & 0xffff); /* Handle a few special cases. */ switch (info->offset) { case E1000_RDTR: break; case E1000_TIDV: if (ticks == 0) { adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; /* Don't write 0 into the TIDV register. */ regval++; } else adapter->txd_cmd |= E1000_TXD_CMD_IDE; break; } E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); EM_CORE_UNLOCK(adapter); return (0); } static void lem_add_int_delay_sysctl(struct adapter *adapter, const char *name, const char *description, struct em_int_delay_info *info, int offset, int value) { info->adapter = adapter; info->offset = offset; info->value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, lem_sysctl_int_delay, "I", description); } static void lem_set_flow_cntrl(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLFLAG_RW, limit, value, description); } static void lem_add_rx_process_limit(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLFLAG_RW, limit, value, description); } Index: head/sys/dev/ixgb/if_ixgb.c =================================================================== --- head/sys/dev/ixgb/if_ixgb.c (revision 302383) +++ head/sys/dev/ixgb/if_ixgb.c (revision 302384) @@ -1,2536 +1,2537 @@ /******************************************************************************* Copyright (c) 2001-2004, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ /*$FreeBSD$*/ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include /********************************************************************* * Set this to one to display debug statistics *********************************************************************/ int ixgb_display_debug_stats = 0; /********************************************************************* * Linked list of board private structures for all NICs found *********************************************************************/ struct adapter *ixgb_adapter_list = NULL; /********************************************************************* * Driver version *********************************************************************/ char ixgb_driver_version[] = "1.0.6"; char ixgb_copyright[] = "Copyright (c) 2001-2004 Intel Corporation."; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into ixgb_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static ixgb_vendor_info_t ixgb_vendor_info_array[] = { /* Intel(R) PRO/10000 Network Connection */ {IXGB_VENDOR_ID, IXGB_DEVICE_ID_82597EX, PCI_ANY_ID, PCI_ANY_ID, 0}, {IXGB_VENDOR_ID, IXGB_DEVICE_ID_82597EX_SR, PCI_ANY_ID, PCI_ANY_ID, 0}, /* required last entry */ {0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings for all supported NICs. *********************************************************************/ static char *ixgb_strings[] = { "Intel(R) PRO/10GbE Network Driver" }; /********************************************************************* * Function prototypes *********************************************************************/ static int ixgb_probe(device_t); static int ixgb_attach(device_t); static int ixgb_detach(device_t); static int ixgb_shutdown(device_t); static void ixgb_intr(void *); static void ixgb_start(struct ifnet *); static void ixgb_start_locked(struct ifnet *); static int ixgb_ioctl(struct ifnet *, IOCTL_CMD_TYPE, caddr_t); static uint64_t ixgb_get_counter(struct ifnet *, ift_counter); static void ixgb_watchdog(struct adapter *); static void ixgb_init(void *); static void ixgb_init_locked(struct adapter *); static void ixgb_stop(void *); static void ixgb_media_status(struct ifnet *, struct ifmediareq *); static int ixgb_media_change(struct ifnet *); static void ixgb_identify_hardware(struct adapter *); static int ixgb_allocate_pci_resources(struct adapter *); static void ixgb_free_pci_resources(struct adapter *); static void ixgb_local_timer(void *); static int ixgb_hardware_init(struct adapter *); static int ixgb_setup_interface(device_t, struct adapter *); static int ixgb_setup_transmit_structures(struct adapter *); static void ixgb_initialize_transmit_unit(struct adapter *); static int ixgb_setup_receive_structures(struct adapter *); static void ixgb_initialize_receive_unit(struct adapter *); static void ixgb_enable_intr(struct adapter *); static void ixgb_disable_intr(struct adapter *); static void ixgb_free_transmit_structures(struct adapter *); static void ixgb_free_receive_structures(struct adapter *); static void ixgb_update_stats_counters(struct adapter *); static void ixgb_clean_transmit_interrupts(struct adapter *); static int ixgb_allocate_receive_structures(struct adapter *); static int ixgb_allocate_transmit_structures(struct adapter *); static int ixgb_process_receive_interrupts(struct adapter *, int); static void ixgb_receive_checksum(struct adapter *, struct ixgb_rx_desc * rx_desc, struct mbuf *); static void ixgb_transmit_checksum_setup(struct adapter *, struct mbuf *, u_int8_t *); static void ixgb_set_promisc(struct adapter *); static void ixgb_disable_promisc(struct adapter *); static void ixgb_set_multi(struct adapter *); static void ixgb_print_hw_stats(struct adapter *); static void ixgb_print_link_status(struct adapter *); static int ixgb_get_buf(int i, struct adapter *, struct mbuf *); static void ixgb_enable_vlans(struct adapter * adapter); static int ixgb_encap(struct adapter * adapter, struct mbuf * m_head); static int ixgb_sysctl_stats(SYSCTL_HANDLER_ARGS); static int ixgb_dma_malloc(struct adapter *, bus_size_t, struct ixgb_dma_alloc *, int); static void ixgb_dma_free(struct adapter *, struct ixgb_dma_alloc *); #ifdef DEVICE_POLLING static poll_handler_t ixgb_poll; #endif /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t ixgb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ixgb_probe), DEVMETHOD(device_attach, ixgb_attach), DEVMETHOD(device_detach, ixgb_detach), DEVMETHOD(device_shutdown, ixgb_shutdown), DEVMETHOD_END }; static driver_t ixgb_driver = { "ixgb", ixgb_methods, sizeof(struct adapter), }; static devclass_t ixgb_devclass; DRIVER_MODULE(ixgb, pci, ixgb_driver, ixgb_devclass, 0, 0); MODULE_DEPEND(ixgb, pci, 1, 1, 1); MODULE_DEPEND(ixgb, ether, 1, 1, 1); /* some defines for controlling descriptor fetches in h/w */ #define RXDCTL_PTHRESH_DEFAULT 128 /* chip considers prefech below this */ #define RXDCTL_HTHRESH_DEFAULT 16 /* chip will only prefetch if tail is * pushed this many descriptors from * head */ #define RXDCTL_WTHRESH_DEFAULT 0 /* chip writes back at this many or RXT0 */ /********************************************************************* * Device identification routine * * ixgb_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return 0 on success, positive on failure *********************************************************************/ static int ixgb_probe(device_t dev) { ixgb_vendor_info_t *ent; u_int16_t pci_vendor_id = 0; u_int16_t pci_device_id = 0; u_int16_t pci_subvendor_id = 0; u_int16_t pci_subdevice_id = 0; char adapter_name[60]; INIT_DEBUGOUT("ixgb_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != IXGB_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = ixgb_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == PCI_ANY_ID)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == PCI_ANY_ID))) { sprintf(adapter_name, "%s, Version - %s", ixgb_strings[ent->index], ixgb_driver_version); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int ixgb_attach(device_t dev) { struct adapter *adapter; int tsize, rsize; int error = 0; device_printf(dev, "%s\n", ixgb_copyright); INIT_DEBUGOUT("ixgb_attach: begin"); /* Allocate, clear, and link in our adapter structure */ if (!(adapter = device_get_softc(dev))) { device_printf(dev, "adapter structure allocation failed\n"); return (ENOMEM); } bzero(adapter, sizeof(struct adapter)); adapter->dev = dev; adapter->osdep.dev = dev; IXGB_LOCK_INIT(adapter, device_get_nameunit(dev)); if (ixgb_adapter_list != NULL) ixgb_adapter_list->prev = adapter; adapter->next = ixgb_adapter_list; ixgb_adapter_list = adapter; /* SYSCTL APIs */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "stats", CTLTYPE_INT | CTLFLAG_RW, (void *)adapter, 0, ixgb_sysctl_stats, "I", "Statistics"); callout_init_mtx(&adapter->timer, &adapter->mtx, 0); /* Determine hardware revision */ ixgb_identify_hardware(adapter); /* Parameters (to be read from user) */ adapter->num_tx_desc = IXGB_MAX_TXD; adapter->num_rx_desc = IXGB_MAX_RXD; adapter->tx_int_delay = TIDV; adapter->rx_int_delay = RDTR; adapter->rx_buffer_len = IXGB_RXBUFFER_2048; adapter->hw.fc.high_water = FCRTH; adapter->hw.fc.low_water = FCRTL; adapter->hw.fc.pause_time = FCPAUSE; adapter->hw.fc.send_xon = TRUE; adapter->hw.fc.type = FLOW_CONTROL; /* Set the max frame size assuming standard ethernet sized frames */ adapter->hw.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN; if (ixgb_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } tsize = IXGB_ROUNDUP(adapter->num_tx_desc * sizeof(struct ixgb_tx_desc), 4096); /* Allocate Transmit Descriptor ring */ if (ixgb_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate TxDescriptor memory\n"); error = ENOMEM; goto err_tx_desc; } adapter->tx_desc_base = (struct ixgb_tx_desc *) adapter->txdma.dma_vaddr; rsize = IXGB_ROUNDUP(adapter->num_rx_desc * sizeof(struct ixgb_rx_desc), 4096); /* Allocate Receive Descriptor ring */ if (ixgb_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate rx_desc memory\n"); error = ENOMEM; goto err_rx_desc; } adapter->rx_desc_base = (struct ixgb_rx_desc *) adapter->rxdma.dma_vaddr; /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u_int8_t) * IXGB_ETH_LENGTH_OF_ADDRESS * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_hw_init; } /* Initialize the hardware */ if (ixgb_hardware_init(adapter)) { device_printf(dev, "Unable to initialize the hardware\n"); error = EIO; goto err_hw_init; } /* Setup OS specific network interface */ if (ixgb_setup_interface(dev, adapter) != 0) goto err_hw_init; /* Initialize statistics */ ixgb_clear_hw_cntrs(&adapter->hw); ixgb_update_stats_counters(adapter); INIT_DEBUGOUT("ixgb_attach: end"); return (0); err_hw_init: ixgb_dma_free(adapter, &adapter->rxdma); err_rx_desc: ixgb_dma_free(adapter, &adapter->txdma); err_tx_desc: err_pci: if (adapter->ifp != NULL) if_free(adapter->ifp); ixgb_free_pci_resources(adapter); sysctl_ctx_free(&adapter->sysctl_ctx); free(adapter->mta, M_DEVBUF); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int ixgb_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; INIT_DEBUGOUT("ixgb_detach: begin"); #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(ifp); #endif IXGB_LOCK(adapter); adapter->in_detach = 1; ixgb_stop(adapter); IXGB_UNLOCK(adapter); #if __FreeBSD_version < 500000 ether_ifdetach(ifp, ETHER_BPF_SUPPORTED); #else ether_ifdetach(ifp); #endif callout_drain(&adapter->timer); ixgb_free_pci_resources(adapter); #if __FreeBSD_version >= 500000 if_free(ifp); #endif /* Free Transmit Descriptor ring */ if (adapter->tx_desc_base) { ixgb_dma_free(adapter, &adapter->txdma); adapter->tx_desc_base = NULL; } /* Free Receive Descriptor ring */ if (adapter->rx_desc_base) { ixgb_dma_free(adapter, &adapter->rxdma); adapter->rx_desc_base = NULL; } /* Remove from the adapter list */ if (ixgb_adapter_list == adapter) ixgb_adapter_list = adapter->next; if (adapter->next != NULL) adapter->next->prev = adapter->prev; if (adapter->prev != NULL) adapter->prev->next = adapter->next; free(adapter->mta, M_DEVBUF); IXGB_LOCK_DESTROY(adapter); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int ixgb_shutdown(device_t dev) { struct adapter *adapter = device_get_softc(dev); IXGB_LOCK(adapter); ixgb_stop(adapter); IXGB_UNLOCK(adapter); return (0); } /********************************************************************* * Transmit entry point * * ixgb_start is called by the stack to initiate a transmit. * The driver will remain in this routine as long as there are * packets to transmit and transmit resources are available. * In case resources are not available stack is notified and * the packet is requeued. **********************************************************************/ static void ixgb_start_locked(struct ifnet * ifp) { struct mbuf *m_head; struct adapter *adapter = ifp->if_softc; IXGB_LOCK_ASSERT(adapter); if (!adapter->link_active) return; while (ifp->if_snd.ifq_head != NULL) { IF_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (ixgb_encap(adapter, m_head)) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; IF_PREPEND(&ifp->if_snd, m_head); break; } /* Send a copy of the frame to the BPF listener */ #if __FreeBSD_version < 500000 if (ifp->if_bpf) bpf_mtap(ifp, m_head); #else ETHER_BPF_MTAP(ifp, m_head); #endif /* Set timeout in case hardware has problems transmitting */ adapter->tx_timer = IXGB_TX_TIMEOUT; } return; } static void ixgb_start(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; IXGB_LOCK(adapter); ixgb_start_locked(ifp); IXGB_UNLOCK(adapter); return; } /********************************************************************* * Ioctl entry point * * ixgb_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int ixgb_ioctl(struct ifnet * ifp, IOCTL_CMD_TYPE command, caddr_t data) { int mask, error = 0; struct ifreq *ifr = (struct ifreq *) data; struct adapter *adapter = ifp->if_softc; if (adapter->in_detach) goto out; switch (command) { case SIOCSIFADDR: case SIOCGIFADDR: IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFADDR (Get/Set Interface Addr)"); ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); if (ifr->ifr_mtu > IXGB_MAX_JUMBO_FRAME_SIZE - ETHER_HDR_LEN) { error = EINVAL; } else { IXGB_LOCK(adapter); ifp->if_mtu = ifr->ifr_mtu; adapter->hw.max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; - ixgb_init_locked(adapter); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + ixgb_init_locked(adapter); IXGB_UNLOCK(adapter); } break; case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFFLAGS (Set Interface Flags)"); IXGB_LOCK(adapter); if (ifp->if_flags & IFF_UP) { if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { ixgb_init_locked(adapter); } ixgb_disable_promisc(adapter); ixgb_set_promisc(adapter); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ixgb_stop(adapter); } } IXGB_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXGB_LOCK(adapter); ixgb_disable_intr(adapter); ixgb_set_multi(adapter); ixgb_enable_intr(adapter); IXGB_UNLOCK(adapter); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl rcv'd: SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); mask = ifr->ifr_reqcap ^ ifp->if_capenable; #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(ixgb_poll, ifp); if (error) return(error); IXGB_LOCK(adapter); ixgb_disable_intr(adapter); ifp->if_capenable |= IFCAP_POLLING; IXGB_UNLOCK(adapter); } else { error = ether_poll_deregister(ifp); /* Enable interrupt even in error case */ IXGB_LOCK(adapter); ixgb_enable_intr(adapter); ifp->if_capenable &= ~IFCAP_POLLING; IXGB_UNLOCK(adapter); } } #endif /* DEVICE_POLLING */ if (mask & IFCAP_HWCSUM) { if (IFCAP_HWCSUM & ifp->if_capenable) ifp->if_capenable &= ~IFCAP_HWCSUM; else ifp->if_capenable |= IFCAP_HWCSUM; if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixgb_init(adapter); } break; default: IOCTL_DEBUGOUT1("ioctl received: UNKNOWN (0x%X)\n", (int)command); error = EINVAL; } out: return (error); } /********************************************************************* * Watchdog entry point * * This routine is called whenever hardware quits transmitting. * **********************************************************************/ static void ixgb_watchdog(struct adapter *adapter) { struct ifnet *ifp; ifp = adapter->ifp; /* * If we are in this routine because of pause frames, then don't * reset the hardware. */ if (IXGB_READ_REG(&adapter->hw, STATUS) & IXGB_STATUS_TXOFF) { adapter->tx_timer = IXGB_TX_TIMEOUT; return; } if_printf(ifp, "watchdog timeout -- resetting\n"); ixgb_stop(adapter); ixgb_init_locked(adapter); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return; } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void ixgb_init_locked(struct adapter *adapter) { struct ifnet *ifp; INIT_DEBUGOUT("ixgb_init: begin"); IXGB_LOCK_ASSERT(adapter); ixgb_stop(adapter); ifp = adapter->ifp; /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(ifp), adapter->hw.curr_mac_addr, IXGB_ETH_LENGTH_OF_ADDRESS); /* Initialize the hardware */ if (ixgb_hardware_init(adapter)) { if_printf(ifp, "Unable to initialize the hardware\n"); return; } ixgb_enable_vlans(adapter); /* Prepare transmit descriptors and buffers */ if (ixgb_setup_transmit_structures(adapter)) { if_printf(ifp, "Could not setup transmit structures\n"); ixgb_stop(adapter); return; } ixgb_initialize_transmit_unit(adapter); /* Setup Multicast table */ ixgb_set_multi(adapter); /* Prepare receive descriptors and buffers */ if (ixgb_setup_receive_structures(adapter)) { if_printf(ifp, "Could not setup receive structures\n"); ixgb_stop(adapter); return; } ixgb_initialize_receive_unit(adapter); /* Don't lose promiscuous settings */ ixgb_set_promisc(adapter); ifp = adapter->ifp; ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist = IXGB_CHECKSUM_FEATURES; else ifp->if_hwassist = 0; /* Enable jumbo frames */ if (ifp->if_mtu > ETHERMTU) { uint32_t temp_reg; IXGB_WRITE_REG(&adapter->hw, MFS, adapter->hw.max_frame_size << IXGB_MFS_SHIFT); temp_reg = IXGB_READ_REG(&adapter->hw, CTRL0); temp_reg |= IXGB_CTRL0_JFE; IXGB_WRITE_REG(&adapter->hw, CTRL0, temp_reg); } callout_reset(&adapter->timer, hz, ixgb_local_timer, adapter); ixgb_clear_hw_cntrs(&adapter->hw); #ifdef DEVICE_POLLING /* * Only disable interrupts if we are polling, make sure they are on * otherwise. */ if (ifp->if_capenable & IFCAP_POLLING) ixgb_disable_intr(adapter); else #endif ixgb_enable_intr(adapter); return; } static void ixgb_init(void *arg) { struct adapter *adapter = arg; IXGB_LOCK(adapter); ixgb_init_locked(adapter); IXGB_UNLOCK(adapter); return; } #ifdef DEVICE_POLLING static int ixgb_poll_locked(struct ifnet * ifp, enum poll_cmd cmd, int count) { struct adapter *adapter = ifp->if_softc; u_int32_t reg_icr; int rx_npkts; IXGB_LOCK_ASSERT(adapter); if (cmd == POLL_AND_CHECK_STATUS) { reg_icr = IXGB_READ_REG(&adapter->hw, ICR); if (reg_icr & (IXGB_INT_RXSEQ | IXGB_INT_LSC)) { ixgb_check_for_link(&adapter->hw); ixgb_print_link_status(adapter); } } rx_npkts = ixgb_process_receive_interrupts(adapter, count); ixgb_clean_transmit_interrupts(adapter); if (ifp->if_snd.ifq_head != NULL) ixgb_start_locked(ifp); return (rx_npkts); } static int ixgb_poll(struct ifnet * ifp, enum poll_cmd cmd, int count) { struct adapter *adapter = ifp->if_softc; int rx_npkts = 0; IXGB_LOCK(adapter); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rx_npkts = ixgb_poll_locked(ifp, cmd, count); IXGB_UNLOCK(adapter); return (rx_npkts); } #endif /* DEVICE_POLLING */ /********************************************************************* * * Interrupt Service routine * **********************************************************************/ static void ixgb_intr(void *arg) { u_int32_t loop_cnt = IXGB_MAX_INTR; u_int32_t reg_icr; struct ifnet *ifp; struct adapter *adapter = arg; boolean_t rxdmt0 = FALSE; IXGB_LOCK(adapter); ifp = adapter->ifp; #ifdef DEVICE_POLLING if (ifp->if_capenable & IFCAP_POLLING) { IXGB_UNLOCK(adapter); return; } #endif reg_icr = IXGB_READ_REG(&adapter->hw, ICR); if (reg_icr == 0) { IXGB_UNLOCK(adapter); return; } if (reg_icr & IXGB_INT_RXDMT0) rxdmt0 = TRUE; #ifdef _SV_ if (reg_icr & IXGB_INT_RXDMT0) adapter->sv_stats.icr_rxdmt0++; if (reg_icr & IXGB_INT_RXO) adapter->sv_stats.icr_rxo++; if (reg_icr & IXGB_INT_RXT0) adapter->sv_stats.icr_rxt0++; if (reg_icr & IXGB_INT_TXDW) adapter->sv_stats.icr_TXDW++; #endif /* _SV_ */ /* Link status change */ if (reg_icr & (IXGB_INT_RXSEQ | IXGB_INT_LSC)) { ixgb_check_for_link(&adapter->hw); ixgb_print_link_status(adapter); } while (loop_cnt > 0) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ixgb_process_receive_interrupts(adapter, -1); ixgb_clean_transmit_interrupts(adapter); } loop_cnt--; } if (rxdmt0 && adapter->raidc) { IXGB_WRITE_REG(&adapter->hw, IMC, IXGB_INT_RXDMT0); IXGB_WRITE_REG(&adapter->hw, IMS, IXGB_INT_RXDMT0); } if (ifp->if_drv_flags & IFF_DRV_RUNNING && ifp->if_snd.ifq_head != NULL) ixgb_start_locked(ifp); IXGB_UNLOCK(adapter); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void ixgb_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) { struct adapter *adapter = ifp->if_softc; INIT_DEBUGOUT("ixgb_media_status: begin"); ixgb_check_for_link(&adapter->hw); ixgb_print_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->hw.link_up) return; ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_1000_SX | IFM_FDX; return; } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int ixgb_media_change(struct ifnet * ifp) { struct adapter *adapter = ifp->if_softc; struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("ixgb_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); return (0); } /********************************************************************* * * This routine maps the mbufs to tx descriptors. * * return 0 on success, positive on failure **********************************************************************/ static int ixgb_encap(struct adapter * adapter, struct mbuf * m_head) { u_int8_t txd_popts; int i, j, error, nsegs; #if __FreeBSD_version < 500000 struct ifvlan *ifv = NULL; #endif bus_dma_segment_t segs[IXGB_MAX_SCATTER]; bus_dmamap_t map; struct ixgb_buffer *tx_buffer = NULL; struct ixgb_tx_desc *current_tx_desc = NULL; struct ifnet *ifp = adapter->ifp; /* * Force a cleanup if number of TX descriptors available hits the * threshold */ if (adapter->num_tx_desc_avail <= IXGB_TX_CLEANUP_THRESHOLD) { ixgb_clean_transmit_interrupts(adapter); } if (adapter->num_tx_desc_avail <= IXGB_TX_CLEANUP_THRESHOLD) { adapter->no_tx_desc_avail1++; return (ENOBUFS); } /* * Map the packet for DMA. */ if (bus_dmamap_create(adapter->txtag, BUS_DMA_NOWAIT, &map)) { adapter->no_tx_map_avail++; return (ENOMEM); } error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { adapter->no_tx_dma_setup++; if_printf(ifp, "ixgb_encap: bus_dmamap_load_mbuf failed; " "error %u\n", error); bus_dmamap_destroy(adapter->txtag, map); return (error); } KASSERT(nsegs != 0, ("ixgb_encap: empty packet")); if (nsegs > adapter->num_tx_desc_avail) { adapter->no_tx_desc_avail2++; bus_dmamap_destroy(adapter->txtag, map); return (ENOBUFS); } if (ifp->if_hwassist > 0) { ixgb_transmit_checksum_setup(adapter, m_head, &txd_popts); } else txd_popts = 0; /* Find out if we are in vlan mode */ #if __FreeBSD_version < 500000 if ((m_head->m_flags & (M_PROTO1 | M_PKTHDR)) == (M_PROTO1 | M_PKTHDR) && m_head->m_pkthdr.rcvif != NULL && m_head->m_pkthdr.rcvif->if_type == IFT_L2VLAN) ifv = m_head->m_pkthdr.rcvif->if_softc; #elseif __FreeBSD_version < 700000 mtag = VLAN_OUTPUT_TAG(ifp, m_head); #endif i = adapter->next_avail_tx_desc; for (j = 0; j < nsegs; j++) { tx_buffer = &adapter->tx_buffer_area[i]; current_tx_desc = &adapter->tx_desc_base[i]; current_tx_desc->buff_addr = htole64(segs[j].ds_addr); current_tx_desc->cmd_type_len = (adapter->txd_cmd | segs[j].ds_len); current_tx_desc->popts = txd_popts; if (++i == adapter->num_tx_desc) i = 0; tx_buffer->m_head = NULL; } adapter->num_tx_desc_avail -= nsegs; adapter->next_avail_tx_desc = i; #if __FreeBSD_version < 500000 if (ifv != NULL) { /* Set the vlan id */ current_tx_desc->vlan = ifv->ifv_tag; #elseif __FreeBSD_version < 700000 if (mtag != NULL) { /* Set the vlan id */ current_tx_desc->vlan = VLAN_TAG_VALUE(mtag); #else if (m_head->m_flags & M_VLANTAG) { current_tx_desc->vlan = m_head->m_pkthdr.ether_vtag; #endif /* Tell hardware to add tag */ current_tx_desc->cmd_type_len |= IXGB_TX_DESC_CMD_VLE; } tx_buffer->m_head = m_head; tx_buffer->map = map; bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE); /* * Last Descriptor of Packet needs End Of Packet (EOP) */ current_tx_desc->cmd_type_len |= (IXGB_TX_DESC_CMD_EOP); /* * Advance the Transmit Descriptor Tail (Tdt), this tells the E1000 * that this frame is available to transmit. */ IXGB_WRITE_REG(&adapter->hw, TDT, i); return (0); } static void ixgb_set_promisc(struct adapter * adapter) { u_int32_t reg_rctl; struct ifnet *ifp = adapter->ifp; reg_rctl = IXGB_READ_REG(&adapter->hw, RCTL); if (ifp->if_flags & IFF_PROMISC) { reg_rctl |= (IXGB_RCTL_UPE | IXGB_RCTL_MPE); IXGB_WRITE_REG(&adapter->hw, RCTL, reg_rctl); } else if (ifp->if_flags & IFF_ALLMULTI) { reg_rctl |= IXGB_RCTL_MPE; reg_rctl &= ~IXGB_RCTL_UPE; IXGB_WRITE_REG(&adapter->hw, RCTL, reg_rctl); } return; } static void ixgb_disable_promisc(struct adapter * adapter) { u_int32_t reg_rctl; reg_rctl = IXGB_READ_REG(&adapter->hw, RCTL); reg_rctl &= (~IXGB_RCTL_UPE); reg_rctl &= (~IXGB_RCTL_MPE); IXGB_WRITE_REG(&adapter->hw, RCTL, reg_rctl); return; } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void ixgb_set_multi(struct adapter * adapter) { u_int32_t reg_rctl = 0; u_int8_t *mta; struct ifmultiaddr *ifma; int mcnt = 0; struct ifnet *ifp = adapter->ifp; IOCTL_DEBUGOUT("ixgb_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(u_int8_t) * IXGB_ETH_LENGTH_OF_ADDRESS * MAX_NUM_MULTICAST_ADDRESSES); if_maddr_rlock(ifp); #if __FreeBSD_version < 500000 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { #else TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { #endif if (ifma->ifma_addr->sa_family != AF_LINK) continue; bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), &mta[mcnt * IXGB_ETH_LENGTH_OF_ADDRESS], IXGB_ETH_LENGTH_OF_ADDRESS); mcnt++; } if_maddr_runlock(ifp); if (mcnt > MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = IXGB_READ_REG(&adapter->hw, RCTL); reg_rctl |= IXGB_RCTL_MPE; IXGB_WRITE_REG(&adapter->hw, RCTL, reg_rctl); } else ixgb_mc_addr_list_update(&adapter->hw, mta, mcnt, 0); return; } /********************************************************************* * Timer routine * * This routine checks for link status and updates statistics. * **********************************************************************/ static void ixgb_local_timer(void *arg) { struct ifnet *ifp; struct adapter *adapter = arg; ifp = adapter->ifp; IXGB_LOCK_ASSERT(adapter); ixgb_check_for_link(&adapter->hw); ixgb_print_link_status(adapter); ixgb_update_stats_counters(adapter); if (ixgb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING) { ixgb_print_hw_stats(adapter); } if (adapter->tx_timer != 0 && --adapter->tx_timer == 0) ixgb_watchdog(adapter); callout_reset(&adapter->timer, hz, ixgb_local_timer, adapter); } static void ixgb_print_link_status(struct adapter * adapter) { if (adapter->hw.link_up) { if (!adapter->link_active) { if_printf(adapter->ifp, "Link is up %d Mbps %s \n", 10000, "Full Duplex"); adapter->link_active = 1; } } else { if (adapter->link_active) { if_printf(adapter->ifp, "Link is Down \n"); adapter->link_active = 0; } } return; } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void ixgb_stop(void *arg) { struct ifnet *ifp; struct adapter *adapter = arg; ifp = adapter->ifp; IXGB_LOCK_ASSERT(adapter); INIT_DEBUGOUT("ixgb_stop: begin\n"); ixgb_disable_intr(adapter); adapter->hw.adapter_stopped = FALSE; ixgb_adapter_stop(&adapter->hw); callout_stop(&adapter->timer); ixgb_free_transmit_structures(adapter); ixgb_free_receive_structures(adapter); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); adapter->tx_timer = 0; return; } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void ixgb_identify_hardware(struct adapter * adapter) { device_t dev = adapter->dev; /* Make sure our PCI config space has the necessary stuff set */ pci_enable_busmaster(dev); adapter->hw.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Set MacType, etc. based on this PCI info */ switch (adapter->hw.device_id) { case IXGB_DEVICE_ID_82597EX: case IXGB_DEVICE_ID_82597EX_SR: adapter->hw.mac_type = ixgb_82597; break; default: INIT_DEBUGOUT1("Unknown device if 0x%x", adapter->hw.device_id); device_printf(dev, "unsupported device id 0x%x\n", adapter->hw.device_id); } return; } static int ixgb_allocate_pci_resources(struct adapter * adapter) { int rid; device_t dev = adapter->dev; rid = IXGB_MMBA; adapter->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(adapter->res_memory)) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->res_memory); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->res_memory); adapter->hw.hw_addr = (uint8_t *) & adapter->osdep.mem_bus_space_handle; rid = 0x0; adapter->res_interrupt = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!(adapter->res_interrupt)) { device_printf(dev, "Unable to allocate bus resource: interrupt\n"); return (ENXIO); } if (bus_setup_intr(dev, adapter->res_interrupt, INTR_TYPE_NET | INTR_MPSAFE, NULL, (void (*) (void *))ixgb_intr, adapter, &adapter->int_handler_tag)) { device_printf(dev, "Error registering interrupt handler!\n"); return (ENXIO); } adapter->hw.back = &adapter->osdep; return (0); } static void ixgb_free_pci_resources(struct adapter * adapter) { device_t dev = adapter->dev; if (adapter->res_interrupt != NULL) { bus_teardown_intr(dev, adapter->res_interrupt, adapter->int_handler_tag); bus_release_resource(dev, SYS_RES_IRQ, 0, adapter->res_interrupt); } if (adapter->res_memory != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, IXGB_MMBA, adapter->res_memory); } if (adapter->res_ioport != NULL) { bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid, adapter->res_ioport); } return; } /********************************************************************* * * Initialize the hardware to a configuration as specified by the * adapter structure. The controller is reset, the EEPROM is * verified, the MAC address is set, then the shared initialization * routines are called. * **********************************************************************/ static int ixgb_hardware_init(struct adapter * adapter) { /* Issue a global reset */ adapter->hw.adapter_stopped = FALSE; ixgb_adapter_stop(&adapter->hw); /* Make sure we have a good EEPROM before we read from it */ if (!ixgb_validate_eeprom_checksum(&adapter->hw)) { device_printf(adapter->dev, "The EEPROM Checksum Is Not Valid\n"); return (EIO); } if (!ixgb_init_hw(&adapter->hw)) { device_printf(adapter->dev, "Hardware Initialization Failed"); return (EIO); } return (0); } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int ixgb_setup_interface(device_t dev, struct adapter * adapter) { struct ifnet *ifp; INIT_DEBUGOUT("ixgb_setup_interface: begin"); ifp = adapter->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } #if __FreeBSD_version >= 502000 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); #else ifp->if_unit = device_get_unit(dev); ifp->if_name = "ixgb"; #endif ifp->if_baudrate = 1000000000; ifp->if_init = ixgb_init; ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ixgb_ioctl; ifp->if_start = ixgb_start; ifp->if_get_counter = ixgb_get_counter; ifp->if_snd.ifq_maxlen = adapter->num_tx_desc - 1; #if __FreeBSD_version < 500000 ether_ifattach(ifp, ETHER_BPF_SUPPORTED); #else ether_ifattach(ifp, adapter->hw.curr_mac_addr); #endif ifp->if_capabilities = IFCAP_HWCSUM; /* * Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); #if __FreeBSD_version >= 500000 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; #endif ifp->if_capenable = ifp->if_capabilities; #ifdef DEVICE_POLLING ifp->if_capabilities |= IFCAP_POLLING; #endif /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, ixgb_media_change, ixgb_media_status); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } /******************************************************************** * Manage DMA'able memory. *******************************************************************/ static void ixgb_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs->ds_addr; return; } static int ixgb_dma_malloc(struct adapter * adapter, bus_size_t size, struct ixgb_dma_alloc * dma, int mapflags) { device_t dev; int r; dev = adapter->dev; r = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ PAGE_SIZE, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ #if __FreeBSD_version >= 502000 NULL, /* lockfunc */ NULL, /* lockfuncarg */ #endif &dma->dma_tag); if (r != 0) { device_printf(dev, "ixgb_dma_malloc: bus_dma_tag_create failed; " "error %u\n", r); goto fail_0; } r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, BUS_DMA_NOWAIT, &dma->dma_map); if (r != 0) { device_printf(dev, "ixgb_dma_malloc: bus_dmamem_alloc failed; " "error %u\n", r); goto fail_1; } r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, ixgb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (r != 0) { device_printf(dev, "ixgb_dma_malloc: bus_dmamap_load failed; " "error %u\n", r); goto fail_2; } dma->dma_size = size; return (0); fail_2: bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); fail_1: bus_dma_tag_destroy(dma->dma_tag); fail_0: dma->dma_tag = NULL; return (r); } static void ixgb_dma_free(struct adapter * adapter, struct ixgb_dma_alloc * dma) { bus_dmamap_unload(dma->dma_tag, dma->dma_map); bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); bus_dma_tag_destroy(dma->dma_tag); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. * **********************************************************************/ static int ixgb_allocate_transmit_structures(struct adapter * adapter) { if (!(adapter->tx_buffer_area = (struct ixgb_buffer *) malloc(sizeof(struct ixgb_buffer) * adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(adapter->dev, "Unable to allocate tx_buffer memory\n"); return ENOMEM; } bzero(adapter->tx_buffer_area, sizeof(struct ixgb_buffer) * adapter->num_tx_desc); return 0; } /********************************************************************* * * Allocate and initialize transmit structures. * **********************************************************************/ static int ixgb_setup_transmit_structures(struct adapter * adapter) { /* * Setup DMA descriptor areas. */ if (bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ PAGE_SIZE, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES * IXGB_MAX_SCATTER, /* maxsize */ IXGB_MAX_SCATTER, /* nsegments */ MCLBYTES, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ #if __FreeBSD_version >= 502000 NULL, /* lockfunc */ NULL, /* lockfuncarg */ #endif &adapter->txtag)) { device_printf(adapter->dev, "Unable to allocate TX DMA tag\n"); return (ENOMEM); } if (ixgb_allocate_transmit_structures(adapter)) return ENOMEM; bzero((void *)adapter->tx_desc_base, (sizeof(struct ixgb_tx_desc)) * adapter->num_tx_desc); adapter->next_avail_tx_desc = 0; adapter->oldest_used_tx_desc = 0; /* Set number of descriptors available */ adapter->num_tx_desc_avail = adapter->num_tx_desc; /* Set checksum context */ adapter->active_checksum_context = OFFLOAD_NONE; return 0; } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void ixgb_initialize_transmit_unit(struct adapter * adapter) { u_int32_t reg_tctl; u_int64_t tdba = adapter->txdma.dma_paddr; /* Setup the Base and Length of the Tx Descriptor Ring */ IXGB_WRITE_REG(&adapter->hw, TDBAL, (tdba & 0x00000000ffffffffULL)); IXGB_WRITE_REG(&adapter->hw, TDBAH, (tdba >> 32)); IXGB_WRITE_REG(&adapter->hw, TDLEN, adapter->num_tx_desc * sizeof(struct ixgb_tx_desc)); /* Setup the HW Tx Head and Tail descriptor pointers */ IXGB_WRITE_REG(&adapter->hw, TDH, 0); IXGB_WRITE_REG(&adapter->hw, TDT, 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", IXGB_READ_REG(&adapter->hw, TDBAL), IXGB_READ_REG(&adapter->hw, TDLEN)); IXGB_WRITE_REG(&adapter->hw, TIDV, adapter->tx_int_delay); /* Program the Transmit Control Register */ reg_tctl = IXGB_READ_REG(&adapter->hw, TCTL); reg_tctl = IXGB_TCTL_TCE | IXGB_TCTL_TXEN | IXGB_TCTL_TPDE; IXGB_WRITE_REG(&adapter->hw, TCTL, reg_tctl); /* Setup Transmit Descriptor Settings for this adapter */ adapter->txd_cmd = IXGB_TX_DESC_TYPE | IXGB_TX_DESC_CMD_RS; if (adapter->tx_int_delay > 0) adapter->txd_cmd |= IXGB_TX_DESC_CMD_IDE; return; } /********************************************************************* * * Free all transmit related data structures. * **********************************************************************/ static void ixgb_free_transmit_structures(struct adapter * adapter) { struct ixgb_buffer *tx_buffer; int i; INIT_DEBUGOUT("free_transmit_structures: begin"); if (adapter->tx_buffer_area != NULL) { tx_buffer = adapter->tx_buffer_area; for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { if (tx_buffer->m_head != NULL) { bus_dmamap_unload(adapter->txtag, tx_buffer->map); bus_dmamap_destroy(adapter->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); } tx_buffer->m_head = NULL; } } if (adapter->tx_buffer_area != NULL) { free(adapter->tx_buffer_area, M_DEVBUF); adapter->tx_buffer_area = NULL; } if (adapter->txtag != NULL) { bus_dma_tag_destroy(adapter->txtag); adapter->txtag = NULL; } return; } /********************************************************************* * * The offload context needs to be set when we transfer the first * packet of a particular protocol (TCP/UDP). We change the * context only if the protocol type changes. * **********************************************************************/ static void ixgb_transmit_checksum_setup(struct adapter * adapter, struct mbuf * mp, u_int8_t * txd_popts) { struct ixgb_context_desc *TXD; struct ixgb_buffer *tx_buffer; int curr_txd; if (mp->m_pkthdr.csum_flags) { if (mp->m_pkthdr.csum_flags & CSUM_TCP) { *txd_popts = IXGB_TX_DESC_POPTS_TXSM; if (adapter->active_checksum_context == OFFLOAD_TCP_IP) return; else adapter->active_checksum_context = OFFLOAD_TCP_IP; } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) { *txd_popts = IXGB_TX_DESC_POPTS_TXSM; if (adapter->active_checksum_context == OFFLOAD_UDP_IP) return; else adapter->active_checksum_context = OFFLOAD_UDP_IP; } else { *txd_popts = 0; return; } } else { *txd_popts = 0; return; } /* * If we reach this point, the checksum offload context needs to be * reset. */ curr_txd = adapter->next_avail_tx_desc; tx_buffer = &adapter->tx_buffer_area[curr_txd]; TXD = (struct ixgb_context_desc *) & adapter->tx_desc_base[curr_txd]; TXD->tucss = ENET_HEADER_SIZE + sizeof(struct ip); TXD->tucse = 0; TXD->mss = 0; if (adapter->active_checksum_context == OFFLOAD_TCP_IP) { TXD->tucso = ENET_HEADER_SIZE + sizeof(struct ip) + offsetof(struct tcphdr, th_sum); } else if (adapter->active_checksum_context == OFFLOAD_UDP_IP) { TXD->tucso = ENET_HEADER_SIZE + sizeof(struct ip) + offsetof(struct udphdr, uh_sum); } TXD->cmd_type_len = IXGB_CONTEXT_DESC_CMD_TCP | IXGB_TX_DESC_CMD_RS | IXGB_CONTEXT_DESC_CMD_IDE; tx_buffer->m_head = NULL; if (++curr_txd == adapter->num_tx_desc) curr_txd = 0; adapter->num_tx_desc_avail--; adapter->next_avail_tx_desc = curr_txd; return; } /********************************************************************** * * Examine each tx_buffer in the used queue. If the hardware is done * processing the packet then free associated resources. The * tx_buffer is put back on the free queue. * **********************************************************************/ static void ixgb_clean_transmit_interrupts(struct adapter * adapter) { int i, num_avail; struct ixgb_buffer *tx_buffer; struct ixgb_tx_desc *tx_desc; IXGB_LOCK_ASSERT(adapter); if (adapter->num_tx_desc_avail == adapter->num_tx_desc) return; #ifdef _SV_ adapter->clean_tx_interrupts++; #endif num_avail = adapter->num_tx_desc_avail; i = adapter->oldest_used_tx_desc; tx_buffer = &adapter->tx_buffer_area[i]; tx_desc = &adapter->tx_desc_base[i]; while (tx_desc->status & IXGB_TX_DESC_STATUS_DD) { tx_desc->status = 0; num_avail++; if (tx_buffer->m_head) { bus_dmamap_sync(adapter->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(adapter->txtag, tx_buffer->map); bus_dmamap_destroy(adapter->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; } if (++i == adapter->num_tx_desc) i = 0; tx_buffer = &adapter->tx_buffer_area[i]; tx_desc = &adapter->tx_desc_base[i]; } adapter->oldest_used_tx_desc = i; /* * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack that * it is OK to send packets. If there are no pending descriptors, * clear the timeout. Otherwise, if some descriptors have been freed, * restart the timeout. */ if (num_avail > IXGB_TX_CLEANUP_THRESHOLD) { struct ifnet *ifp = adapter->ifp; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if (num_avail == adapter->num_tx_desc) adapter->tx_timer = 0; else if (num_avail == adapter->num_tx_desc_avail) adapter->tx_timer = IXGB_TX_TIMEOUT; } adapter->num_tx_desc_avail = num_avail; return; } /********************************************************************* * * Get a buffer from system mbuf buffer pool. * **********************************************************************/ static int ixgb_get_buf(int i, struct adapter * adapter, struct mbuf * nmp) { register struct mbuf *mp = nmp; struct ixgb_buffer *rx_buffer; struct ifnet *ifp; bus_addr_t paddr; int error; ifp = adapter->ifp; if (mp == NULL) { mp = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (mp == NULL) { adapter->mbuf_alloc_failed++; return (ENOBUFS); } mp->m_len = mp->m_pkthdr.len = MCLBYTES; } else { mp->m_len = mp->m_pkthdr.len = MCLBYTES; mp->m_data = mp->m_ext.ext_buf; mp->m_next = NULL; } if (ifp->if_mtu <= ETHERMTU) { m_adj(mp, ETHER_ALIGN); } rx_buffer = &adapter->rx_buffer_area[i]; /* * Using memory from the mbuf cluster pool, invoke the bus_dma * machinery to arrange the memory mapping. */ error = bus_dmamap_load(adapter->rxtag, rx_buffer->map, mtod(mp, void *), mp->m_len, ixgb_dmamap_cb, &paddr, 0); if (error) { m_free(mp); return (error); } rx_buffer->m_head = mp; adapter->rx_desc_base[i].buff_addr = htole64(paddr); bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD); return (0); } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int ixgb_allocate_receive_structures(struct adapter * adapter) { int i, error; struct ixgb_buffer *rx_buffer; if (!(adapter->rx_buffer_area = (struct ixgb_buffer *) malloc(sizeof(struct ixgb_buffer) * adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(adapter->dev, "Unable to allocate rx_buffer memory\n"); return (ENOMEM); } bzero(adapter->rx_buffer_area, sizeof(struct ixgb_buffer) * adapter->num_rx_desc); error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),/* parent */ PAGE_SIZE, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MCLBYTES, /* maxsize */ 1, /* nsegments */ MCLBYTES, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ #if __FreeBSD_version >= 502000 NULL, /* lockfunc */ NULL, /* lockfuncarg */ #endif &adapter->rxtag); if (error != 0) { device_printf(adapter->dev, "ixgb_allocate_receive_structures: " "bus_dma_tag_create failed; error %u\n", error); goto fail_0; } rx_buffer = adapter->rx_buffer_area; for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT, &rx_buffer->map); if (error != 0) { device_printf(adapter->dev, "ixgb_allocate_receive_structures: " "bus_dmamap_create failed; error %u\n", error); goto fail_1; } } for (i = 0; i < adapter->num_rx_desc; i++) { if (ixgb_get_buf(i, adapter, NULL) == ENOBUFS) { adapter->rx_buffer_area[i].m_head = NULL; adapter->rx_desc_base[i].buff_addr = 0; return (ENOBUFS); } } return (0); fail_1: bus_dma_tag_destroy(adapter->rxtag); fail_0: adapter->rxtag = NULL; free(adapter->rx_buffer_area, M_DEVBUF); adapter->rx_buffer_area = NULL; return (error); } /********************************************************************* * * Allocate and initialize receive structures. * **********************************************************************/ static int ixgb_setup_receive_structures(struct adapter * adapter) { bzero((void *)adapter->rx_desc_base, (sizeof(struct ixgb_rx_desc)) * adapter->num_rx_desc); if (ixgb_allocate_receive_structures(adapter)) return ENOMEM; /* Setup our descriptor pointers */ adapter->next_rx_desc_to_check = 0; adapter->next_rx_desc_to_use = 0; return (0); } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void ixgb_initialize_receive_unit(struct adapter * adapter) { u_int32_t reg_rctl; u_int32_t reg_rxcsum; u_int32_t reg_rxdctl; struct ifnet *ifp; u_int64_t rdba = adapter->rxdma.dma_paddr; ifp = adapter->ifp; /* * Make sure receives are disabled while setting up the descriptor * ring */ reg_rctl = IXGB_READ_REG(&adapter->hw, RCTL); IXGB_WRITE_REG(&adapter->hw, RCTL, reg_rctl & ~IXGB_RCTL_RXEN); /* Set the Receive Delay Timer Register */ IXGB_WRITE_REG(&adapter->hw, RDTR, adapter->rx_int_delay); /* Setup the Base and Length of the Rx Descriptor Ring */ IXGB_WRITE_REG(&adapter->hw, RDBAL, (rdba & 0x00000000ffffffffULL)); IXGB_WRITE_REG(&adapter->hw, RDBAH, (rdba >> 32)); IXGB_WRITE_REG(&adapter->hw, RDLEN, adapter->num_rx_desc * sizeof(struct ixgb_rx_desc)); /* Setup the HW Rx Head and Tail Descriptor Pointers */ IXGB_WRITE_REG(&adapter->hw, RDH, 0); IXGB_WRITE_REG(&adapter->hw, RDT, adapter->num_rx_desc - 1); reg_rxdctl = RXDCTL_WTHRESH_DEFAULT << IXGB_RXDCTL_WTHRESH_SHIFT | RXDCTL_HTHRESH_DEFAULT << IXGB_RXDCTL_HTHRESH_SHIFT | RXDCTL_PTHRESH_DEFAULT << IXGB_RXDCTL_PTHRESH_SHIFT; IXGB_WRITE_REG(&adapter->hw, RXDCTL, reg_rxdctl); adapter->raidc = 1; if (adapter->raidc) { uint32_t raidc; uint8_t poll_threshold; #define IXGB_RAIDC_POLL_DEFAULT 120 poll_threshold = ((adapter->num_rx_desc - 1) >> 3); poll_threshold >>= 1; poll_threshold &= 0x3F; raidc = IXGB_RAIDC_EN | IXGB_RAIDC_RXT_GATE | (IXGB_RAIDC_POLL_DEFAULT << IXGB_RAIDC_POLL_SHIFT) | (adapter->rx_int_delay << IXGB_RAIDC_DELAY_SHIFT) | poll_threshold; IXGB_WRITE_REG(&adapter->hw, RAIDC, raidc); } /* Enable Receive Checksum Offload for TCP and UDP ? */ if (ifp->if_capenable & IFCAP_RXCSUM) { reg_rxcsum = IXGB_READ_REG(&adapter->hw, RXCSUM); reg_rxcsum |= IXGB_RXCSUM_TUOFL; IXGB_WRITE_REG(&adapter->hw, RXCSUM, reg_rxcsum); } /* Setup the Receive Control Register */ reg_rctl = IXGB_READ_REG(&adapter->hw, RCTL); reg_rctl &= ~(3 << IXGB_RCTL_MO_SHIFT); reg_rctl |= IXGB_RCTL_BAM | IXGB_RCTL_RDMTS_1_2 | IXGB_RCTL_SECRC | IXGB_RCTL_CFF | (adapter->hw.mc_filter_type << IXGB_RCTL_MO_SHIFT); switch (adapter->rx_buffer_len) { default: case IXGB_RXBUFFER_2048: reg_rctl |= IXGB_RCTL_BSIZE_2048; break; case IXGB_RXBUFFER_4096: reg_rctl |= IXGB_RCTL_BSIZE_4096; break; case IXGB_RXBUFFER_8192: reg_rctl |= IXGB_RCTL_BSIZE_8192; break; case IXGB_RXBUFFER_16384: reg_rctl |= IXGB_RCTL_BSIZE_16384; break; } reg_rctl |= IXGB_RCTL_RXEN; /* Enable Receives */ IXGB_WRITE_REG(&adapter->hw, RCTL, reg_rctl); return; } /********************************************************************* * * Free receive related data structures. * **********************************************************************/ static void ixgb_free_receive_structures(struct adapter * adapter) { struct ixgb_buffer *rx_buffer; int i; INIT_DEBUGOUT("free_receive_structures: begin"); if (adapter->rx_buffer_area != NULL) { rx_buffer = adapter->rx_buffer_area; for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { if (rx_buffer->map != NULL) { bus_dmamap_unload(adapter->rxtag, rx_buffer->map); bus_dmamap_destroy(adapter->rxtag, rx_buffer->map); } if (rx_buffer->m_head != NULL) m_freem(rx_buffer->m_head); rx_buffer->m_head = NULL; } } if (adapter->rx_buffer_area != NULL) { free(adapter->rx_buffer_area, M_DEVBUF); adapter->rx_buffer_area = NULL; } if (adapter->rxtag != NULL) { bus_dma_tag_destroy(adapter->rxtag); adapter->rxtag = NULL; } return; } /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * *********************************************************************/ static int ixgb_process_receive_interrupts(struct adapter * adapter, int count) { struct ifnet *ifp; struct mbuf *mp; #if __FreeBSD_version < 500000 struct ether_header *eh; #endif int eop = 0; int len; u_int8_t accept_frame = 0; int i; int next_to_use = 0; int eop_desc; int rx_npkts = 0; /* Pointer to the receive descriptor being examined. */ struct ixgb_rx_desc *current_desc; IXGB_LOCK_ASSERT(adapter); ifp = adapter->ifp; i = adapter->next_rx_desc_to_check; next_to_use = adapter->next_rx_desc_to_use; eop_desc = adapter->next_rx_desc_to_check; current_desc = &adapter->rx_desc_base[i]; if (!((current_desc->status) & IXGB_RX_DESC_STATUS_DD)) { #ifdef _SV_ adapter->no_pkts_avail++; #endif return (rx_npkts); } while ((current_desc->status & IXGB_RX_DESC_STATUS_DD) && (count != 0)) { mp = adapter->rx_buffer_area[i].m_head; bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map, BUS_DMASYNC_POSTREAD); accept_frame = 1; if (current_desc->status & IXGB_RX_DESC_STATUS_EOP) { count--; eop = 1; } else { eop = 0; } len = current_desc->length; if (current_desc->errors & (IXGB_RX_DESC_ERRORS_CE | IXGB_RX_DESC_ERRORS_SE | IXGB_RX_DESC_ERRORS_P | IXGB_RX_DESC_ERRORS_RXE)) { accept_frame = 0; } if (accept_frame) { /* Assign correct length to the current fragment */ mp->m_len = len; if (adapter->fmp == NULL) { mp->m_pkthdr.len = len; adapter->fmp = mp; /* Store the first mbuf */ adapter->lmp = mp; } else { /* Chain mbuf's together */ mp->m_flags &= ~M_PKTHDR; adapter->lmp->m_next = mp; adapter->lmp = adapter->lmp->m_next; adapter->fmp->m_pkthdr.len += len; } if (eop) { eop_desc = i; adapter->fmp->m_pkthdr.rcvif = ifp; #if __FreeBSD_version < 500000 eh = mtod(adapter->fmp, struct ether_header *); /* Remove ethernet header from mbuf */ m_adj(adapter->fmp, sizeof(struct ether_header)); ixgb_receive_checksum(adapter, current_desc, adapter->fmp); if (current_desc->status & IXGB_RX_DESC_STATUS_VP) VLAN_INPUT_TAG(eh, adapter->fmp, current_desc->special); else ether_input(ifp, eh, adapter->fmp); #else ixgb_receive_checksum(adapter, current_desc, adapter->fmp); #if __FreeBSD_version < 700000 if (current_desc->status & IXGB_RX_DESC_STATUS_VP) VLAN_INPUT_TAG(ifp, adapter->fmp, current_desc->special); #else if (current_desc->status & IXGB_RX_DESC_STATUS_VP) { adapter->fmp->m_pkthdr.ether_vtag = current_desc->special; adapter->fmp->m_flags |= M_VLANTAG; } #endif if (adapter->fmp != NULL) { IXGB_UNLOCK(adapter); (*ifp->if_input) (ifp, adapter->fmp); IXGB_LOCK(adapter); rx_npkts++; } #endif adapter->fmp = NULL; adapter->lmp = NULL; } adapter->rx_buffer_area[i].m_head = NULL; } else { adapter->dropped_pkts++; if (adapter->fmp != NULL) m_freem(adapter->fmp); adapter->fmp = NULL; adapter->lmp = NULL; } /* Zero out the receive descriptors status */ current_desc->status = 0; /* Advance our pointers to the next descriptor */ if (++i == adapter->num_rx_desc) { i = 0; current_desc = adapter->rx_desc_base; } else current_desc++; } adapter->next_rx_desc_to_check = i; if (--i < 0) i = (adapter->num_rx_desc - 1); /* * 82597EX: Workaround for redundent write back in receive descriptor ring (causes * memory corruption). Avoid using and re-submitting the most recently received RX * descriptor back to hardware. * * if(Last written back descriptor == EOP bit set descriptor) * then avoid re-submitting the most recently received RX descriptor * back to hardware. * if(Last written back descriptor != EOP bit set descriptor) * then avoid re-submitting the most recently received RX descriptors * till last EOP bit set descriptor. */ if (eop_desc != i) { if (++eop_desc == adapter->num_rx_desc) eop_desc = 0; i = eop_desc; } /* Replenish the descriptors with new mbufs till last EOP bit set descriptor */ while (next_to_use != i) { current_desc = &adapter->rx_desc_base[next_to_use]; if ((current_desc->errors & (IXGB_RX_DESC_ERRORS_CE | IXGB_RX_DESC_ERRORS_SE | IXGB_RX_DESC_ERRORS_P | IXGB_RX_DESC_ERRORS_RXE))) { mp = adapter->rx_buffer_area[next_to_use].m_head; ixgb_get_buf(next_to_use, adapter, mp); } else { if (ixgb_get_buf(next_to_use, adapter, NULL) == ENOBUFS) break; } /* Advance our pointers to the next descriptor */ if (++next_to_use == adapter->num_rx_desc) { next_to_use = 0; current_desc = adapter->rx_desc_base; } else current_desc++; } adapter->next_rx_desc_to_use = next_to_use; if (--next_to_use < 0) next_to_use = (adapter->num_rx_desc - 1); /* Advance the IXGB's Receive Queue #0 "Tail Pointer" */ IXGB_WRITE_REG(&adapter->hw, RDT, next_to_use); return (rx_npkts); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void ixgb_receive_checksum(struct adapter * adapter, struct ixgb_rx_desc * rx_desc, struct mbuf * mp) { if (rx_desc->status & IXGB_RX_DESC_STATUS_IXSM) { mp->m_pkthdr.csum_flags = 0; return; } if (rx_desc->status & IXGB_RX_DESC_STATUS_IPCS) { /* Did it pass? */ if (!(rx_desc->errors & IXGB_RX_DESC_ERRORS_IPE)) { /* IP Checksum Good */ mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; } else { mp->m_pkthdr.csum_flags = 0; } } if (rx_desc->status & IXGB_RX_DESC_STATUS_TCPCS) { /* Did it pass? */ if (!(rx_desc->errors & IXGB_RX_DESC_ERRORS_TCPE)) { mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); mp->m_pkthdr.csum_data = htons(0xffff); } } return; } static void ixgb_enable_vlans(struct adapter * adapter) { uint32_t ctrl; ctrl = IXGB_READ_REG(&adapter->hw, CTRL0); ctrl |= IXGB_CTRL0_VME; IXGB_WRITE_REG(&adapter->hw, CTRL0, ctrl); return; } static void ixgb_enable_intr(struct adapter * adapter) { IXGB_WRITE_REG(&adapter->hw, IMS, (IXGB_INT_RXT0 | IXGB_INT_TXDW | IXGB_INT_RXDMT0 | IXGB_INT_LSC | IXGB_INT_RXO)); return; } static void ixgb_disable_intr(struct adapter * adapter) { IXGB_WRITE_REG(&adapter->hw, IMC, ~0); return; } void ixgb_write_pci_cfg(struct ixgb_hw * hw, uint32_t reg, uint16_t * value) { pci_write_config(((struct ixgb_osdep *) hw->back)->dev, reg, *value, 2); } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void ixgb_update_stats_counters(struct adapter * adapter) { adapter->stats.crcerrs += IXGB_READ_REG(&adapter->hw, CRCERRS); adapter->stats.gprcl += IXGB_READ_REG(&adapter->hw, GPRCL); adapter->stats.gprch += IXGB_READ_REG(&adapter->hw, GPRCH); adapter->stats.gorcl += IXGB_READ_REG(&adapter->hw, GORCL); adapter->stats.gorch += IXGB_READ_REG(&adapter->hw, GORCH); adapter->stats.bprcl += IXGB_READ_REG(&adapter->hw, BPRCL); adapter->stats.bprch += IXGB_READ_REG(&adapter->hw, BPRCH); adapter->stats.mprcl += IXGB_READ_REG(&adapter->hw, MPRCL); adapter->stats.mprch += IXGB_READ_REG(&adapter->hw, MPRCH); adapter->stats.roc += IXGB_READ_REG(&adapter->hw, ROC); adapter->stats.mpc += IXGB_READ_REG(&adapter->hw, MPC); adapter->stats.dc += IXGB_READ_REG(&adapter->hw, DC); adapter->stats.rlec += IXGB_READ_REG(&adapter->hw, RLEC); adapter->stats.xonrxc += IXGB_READ_REG(&adapter->hw, XONRXC); adapter->stats.xontxc += IXGB_READ_REG(&adapter->hw, XONTXC); adapter->stats.xoffrxc += IXGB_READ_REG(&adapter->hw, XOFFRXC); adapter->stats.xofftxc += IXGB_READ_REG(&adapter->hw, XOFFTXC); adapter->stats.gptcl += IXGB_READ_REG(&adapter->hw, GPTCL); adapter->stats.gptch += IXGB_READ_REG(&adapter->hw, GPTCH); adapter->stats.gotcl += IXGB_READ_REG(&adapter->hw, GOTCL); adapter->stats.gotch += IXGB_READ_REG(&adapter->hw, GOTCH); adapter->stats.ruc += IXGB_READ_REG(&adapter->hw, RUC); adapter->stats.rfc += IXGB_READ_REG(&adapter->hw, RFC); adapter->stats.rjc += IXGB_READ_REG(&adapter->hw, RJC); adapter->stats.torl += IXGB_READ_REG(&adapter->hw, TORL); adapter->stats.torh += IXGB_READ_REG(&adapter->hw, TORH); adapter->stats.totl += IXGB_READ_REG(&adapter->hw, TOTL); adapter->stats.toth += IXGB_READ_REG(&adapter->hw, TOTH); adapter->stats.tprl += IXGB_READ_REG(&adapter->hw, TPRL); adapter->stats.tprh += IXGB_READ_REG(&adapter->hw, TPRH); adapter->stats.tptl += IXGB_READ_REG(&adapter->hw, TPTL); adapter->stats.tpth += IXGB_READ_REG(&adapter->hw, TPTH); adapter->stats.plt64c += IXGB_READ_REG(&adapter->hw, PLT64C); adapter->stats.mptcl += IXGB_READ_REG(&adapter->hw, MPTCL); adapter->stats.mptch += IXGB_READ_REG(&adapter->hw, MPTCH); adapter->stats.bptcl += IXGB_READ_REG(&adapter->hw, BPTCL); adapter->stats.bptch += IXGB_READ_REG(&adapter->hw, BPTCH); adapter->stats.uprcl += IXGB_READ_REG(&adapter->hw, UPRCL); adapter->stats.uprch += IXGB_READ_REG(&adapter->hw, UPRCH); adapter->stats.vprcl += IXGB_READ_REG(&adapter->hw, VPRCL); adapter->stats.vprch += IXGB_READ_REG(&adapter->hw, VPRCH); adapter->stats.jprcl += IXGB_READ_REG(&adapter->hw, JPRCL); adapter->stats.jprch += IXGB_READ_REG(&adapter->hw, JPRCH); adapter->stats.rnbc += IXGB_READ_REG(&adapter->hw, RNBC); adapter->stats.icbc += IXGB_READ_REG(&adapter->hw, ICBC); adapter->stats.ecbc += IXGB_READ_REG(&adapter->hw, ECBC); adapter->stats.uptcl += IXGB_READ_REG(&adapter->hw, UPTCL); adapter->stats.uptch += IXGB_READ_REG(&adapter->hw, UPTCH); adapter->stats.vptcl += IXGB_READ_REG(&adapter->hw, VPTCL); adapter->stats.vptch += IXGB_READ_REG(&adapter->hw, VPTCH); adapter->stats.jptcl += IXGB_READ_REG(&adapter->hw, JPTCL); adapter->stats.jptch += IXGB_READ_REG(&adapter->hw, JPTCH); adapter->stats.tsctc += IXGB_READ_REG(&adapter->hw, TSCTC); adapter->stats.tsctfc += IXGB_READ_REG(&adapter->hw, TSCTFC); adapter->stats.ibic += IXGB_READ_REG(&adapter->hw, IBIC); adapter->stats.lfc += IXGB_READ_REG(&adapter->hw, LFC); adapter->stats.pfrc += IXGB_READ_REG(&adapter->hw, PFRC); adapter->stats.pftc += IXGB_READ_REG(&adapter->hw, PFTC); adapter->stats.mcfrc += IXGB_READ_REG(&adapter->hw, MCFRC); } static uint64_t ixgb_get_counter(struct ifnet *ifp, ift_counter cnt) { struct adapter *adapter; adapter = if_getsoftc(ifp); switch (cnt) { case IFCOUNTER_IPACKETS: return (adapter->stats.gprcl); case IFCOUNTER_OPACKETS: return ( adapter->stats.gptcl); case IFCOUNTER_IBYTES: return (adapter->stats.gorcl); case IFCOUNTER_OBYTES: return (adapter->stats.gotcl); case IFCOUNTER_IMCASTS: return ( adapter->stats.mprcl); case IFCOUNTER_COLLISIONS: return (0); case IFCOUNTER_IERRORS: return (adapter->dropped_pkts + adapter->stats.crcerrs + adapter->stats.rnbc + adapter->stats.mpc + adapter->stats.rlec); default: return (if_get_counter_default(ifp, cnt)); } } /********************************************************************** * * This routine is called only when ixgb_display_debug_stats is enabled. * This routine provides a way to take a look at important statistics * maintained by the driver and hardware. * **********************************************************************/ static void ixgb_print_hw_stats(struct adapter * adapter) { char buf_speed[100], buf_type[100]; ixgb_bus_speed bus_speed; ixgb_bus_type bus_type; device_t dev; dev = adapter->dev; #ifdef _SV_ device_printf(dev, "Packets not Avail = %ld\n", adapter->no_pkts_avail); device_printf(dev, "CleanTxInterrupts = %ld\n", adapter->clean_tx_interrupts); device_printf(dev, "ICR RXDMT0 = %lld\n", (long long)adapter->sv_stats.icr_rxdmt0); device_printf(dev, "ICR RXO = %lld\n", (long long)adapter->sv_stats.icr_rxo); device_printf(dev, "ICR RXT0 = %lld\n", (long long)adapter->sv_stats.icr_rxt0); device_printf(dev, "ICR TXDW = %lld\n", (long long)adapter->sv_stats.icr_TXDW); #endif /* _SV_ */ bus_speed = adapter->hw.bus.speed; bus_type = adapter->hw.bus.type; sprintf(buf_speed, bus_speed == ixgb_bus_speed_33 ? "33MHz" : bus_speed == ixgb_bus_speed_66 ? "66MHz" : bus_speed == ixgb_bus_speed_100 ? "100MHz" : bus_speed == ixgb_bus_speed_133 ? "133MHz" : "UNKNOWN"); device_printf(dev, "PCI_Bus_Speed = %s\n", buf_speed); sprintf(buf_type, bus_type == ixgb_bus_type_pci ? "PCI" : bus_type == ixgb_bus_type_pcix ? "PCI-X" : "UNKNOWN"); device_printf(dev, "PCI_Bus_Type = %s\n", buf_type); device_printf(dev, "Tx Descriptors not Avail1 = %ld\n", adapter->no_tx_desc_avail1); device_printf(dev, "Tx Descriptors not Avail2 = %ld\n", adapter->no_tx_desc_avail2); device_printf(dev, "Std Mbuf Failed = %ld\n", adapter->mbuf_alloc_failed); device_printf(dev, "Std Cluster Failed = %ld\n", adapter->mbuf_cluster_failed); device_printf(dev, "Defer count = %lld\n", (long long)adapter->stats.dc); device_printf(dev, "Missed Packets = %lld\n", (long long)adapter->stats.mpc); device_printf(dev, "Receive No Buffers = %lld\n", (long long)adapter->stats.rnbc); device_printf(dev, "Receive length errors = %lld\n", (long long)adapter->stats.rlec); device_printf(dev, "Crc errors = %lld\n", (long long)adapter->stats.crcerrs); device_printf(dev, "Driver dropped packets = %ld\n", adapter->dropped_pkts); device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc); device_printf(dev, "XON Xmtd = %lld\n", (long long)adapter->stats.xontxc); device_printf(dev, "XOFF Rcvd = %lld\n", (long long)adapter->stats.xoffrxc); device_printf(dev, "XOFF Xmtd = %lld\n", (long long)adapter->stats.xofftxc); device_printf(dev, "Good Packets Rcvd = %lld\n", (long long)adapter->stats.gprcl); device_printf(dev, "Good Packets Xmtd = %lld\n", (long long)adapter->stats.gptcl); device_printf(dev, "Jumbo frames recvd = %lld\n", (long long)adapter->stats.jprcl); device_printf(dev, "Jumbo frames Xmtd = %lld\n", (long long)adapter->stats.jptcl); return; } static int ixgb_sysctl_stats(SYSCTL_HANDLER_ARGS) { int error; int result; struct adapter *adapter; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { adapter = (struct adapter *) arg1; ixgb_print_hw_stats(adapter); } return error; } Index: head/sys/dev/ixgbe/if_ix.c =================================================================== --- head/sys/dev/ixgbe/if_ix.c (revision 302383) +++ head/sys/dev/ixgbe/if_ix.c (revision 302384) @@ -1,6000 +1,6001 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef IXGBE_STANDALONE_BUILD #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #endif #include "ixgbe.h" #ifdef RSS #include #include #endif /********************************************************************* * Driver version *********************************************************************/ char ixgbe_driver_version[] = "3.1.13-k"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into ixgbe_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static ixgbe_vendor_info_t ixgbe_vendor_info_array[] = { {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_QSFP_SF_QP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T1, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KR, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KX4, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_10G_T, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_SFP, 0, 0, 0}, /* required last entry */ {0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings *********************************************************************/ static char *ixgbe_strings[] = { "Intel(R) PRO/10GbE PCI-Express Network Driver" }; /********************************************************************* * Function prototypes *********************************************************************/ static int ixgbe_probe(device_t); static int ixgbe_attach(device_t); static int ixgbe_detach(device_t); static int ixgbe_shutdown(device_t); static int ixgbe_suspend(device_t); static int ixgbe_resume(device_t); static int ixgbe_ioctl(struct ifnet *, u_long, caddr_t); static void ixgbe_init(void *); static void ixgbe_init_locked(struct adapter *); static void ixgbe_stop(void *); #if __FreeBSD_version >= 1100036 static uint64_t ixgbe_get_counter(struct ifnet *, ift_counter); #endif static void ixgbe_add_media_types(struct adapter *); static void ixgbe_media_status(struct ifnet *, struct ifmediareq *); static int ixgbe_media_change(struct ifnet *); static void ixgbe_identify_hardware(struct adapter *); static int ixgbe_allocate_pci_resources(struct adapter *); static void ixgbe_get_slot_info(struct adapter *); static int ixgbe_allocate_msix(struct adapter *); static int ixgbe_allocate_legacy(struct adapter *); static int ixgbe_setup_msix(struct adapter *); static void ixgbe_free_pci_resources(struct adapter *); static void ixgbe_local_timer(void *); static int ixgbe_setup_interface(device_t, struct adapter *); static void ixgbe_config_gpie(struct adapter *); static void ixgbe_config_dmac(struct adapter *); static void ixgbe_config_delay_values(struct adapter *); static void ixgbe_config_link(struct adapter *); static void ixgbe_check_wol_support(struct adapter *); static int ixgbe_setup_low_power_mode(struct adapter *); static void ixgbe_rearm_queues(struct adapter *, u64); static void ixgbe_initialize_transmit_units(struct adapter *); static void ixgbe_initialize_receive_units(struct adapter *); static void ixgbe_enable_rx_drop(struct adapter *); static void ixgbe_disable_rx_drop(struct adapter *); static void ixgbe_initialize_rss_mapping(struct adapter *); static void ixgbe_enable_intr(struct adapter *); static void ixgbe_disable_intr(struct adapter *); static void ixgbe_update_stats_counters(struct adapter *); static void ixgbe_set_promisc(struct adapter *); static void ixgbe_set_multi(struct adapter *); static void ixgbe_update_link_status(struct adapter *); static void ixgbe_set_ivar(struct adapter *, u8, u8, s8); static void ixgbe_configure_ivars(struct adapter *); static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); static void ixgbe_setup_vlan_hw_support(struct adapter *); static void ixgbe_register_vlan(void *, struct ifnet *, u16); static void ixgbe_unregister_vlan(void *, struct ifnet *, u16); static void ixgbe_add_device_sysctls(struct adapter *); static void ixgbe_add_hw_stats(struct adapter *); static int ixgbe_set_flowcntl(struct adapter *, int); static int ixgbe_set_advertise(struct adapter *, int); /* Sysctl handlers */ static void ixgbe_set_sysctl_value(struct adapter *, const char *, const char *, int *, int); static int ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_thermal_test(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS); #ifdef IXGBE_DEBUG static int ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS); #endif static int ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_eee_enable(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_eee_negotiated(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_eee_rx_lpi_status(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_eee_tx_lpi_status(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_eee_tx_lpi_delay(SYSCTL_HANDLER_ARGS); /* Support for pluggable optic modules */ static bool ixgbe_sfp_probe(struct adapter *); static void ixgbe_setup_optics(struct adapter *); /* Legacy (single vector interrupt handler */ static void ixgbe_legacy_irq(void *); /* The MSI/X Interrupt handlers */ static void ixgbe_msix_que(void *); static void ixgbe_msix_link(void *); /* Deferred interrupt tasklets */ static void ixgbe_handle_que(void *, int); static void ixgbe_handle_link(void *, int); static void ixgbe_handle_msf(void *, int); static void ixgbe_handle_mod(void *, int); static void ixgbe_handle_phy(void *, int); #ifdef IXGBE_FDIR static void ixgbe_reinit_fdir(void *, int); #endif #ifdef PCI_IOV static void ixgbe_ping_all_vfs(struct adapter *); static void ixgbe_handle_mbx(void *, int); static int ixgbe_init_iov(device_t, u16, const nvlist_t *); static void ixgbe_uninit_iov(device_t); static int ixgbe_add_vf(device_t, u16, const nvlist_t *); static void ixgbe_initialize_iov(struct adapter *); static void ixgbe_recalculate_max_frame(struct adapter *); static void ixgbe_init_vf(struct adapter *, struct ixgbe_vf *); #endif /* PCI_IOV */ /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t ix_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ixgbe_probe), DEVMETHOD(device_attach, ixgbe_attach), DEVMETHOD(device_detach, ixgbe_detach), DEVMETHOD(device_shutdown, ixgbe_shutdown), DEVMETHOD(device_suspend, ixgbe_suspend), DEVMETHOD(device_resume, ixgbe_resume), #ifdef PCI_IOV DEVMETHOD(pci_iov_init, ixgbe_init_iov), DEVMETHOD(pci_iov_uninit, ixgbe_uninit_iov), DEVMETHOD(pci_iov_add_vf, ixgbe_add_vf), #endif /* PCI_IOV */ DEVMETHOD_END }; static driver_t ix_driver = { "ix", ix_methods, sizeof(struct adapter), }; devclass_t ix_devclass; DRIVER_MODULE(ix, pci, ix_driver, ix_devclass, 0, 0); MODULE_DEPEND(ix, pci, 1, 1, 1); MODULE_DEPEND(ix, ether, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(ix, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ /* ** TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0, "IXGBE driver parameters"); /* ** AIM: Adaptive Interrupt Moderation ** which means that the interrupt rate ** is varied over time based on the ** traffic for that interrupt vector */ static int ixgbe_enable_aim = TRUE; SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &ixgbe_enable_aim, 0, "Enable adaptive interrupt moderation"); static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY); SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second"); /* How many packets rxeof tries to clean at a time */ static int ixgbe_rx_process_limit = 256; SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &ixgbe_rx_process_limit, 0, "Maximum number of received packets to process at a time," "-1 means unlimited"); /* How many packets txeof tries to clean at a time */ static int ixgbe_tx_process_limit = 256; SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN, &ixgbe_tx_process_limit, 0, "Maximum number of sent packets to process at a time," "-1 means unlimited"); /* Flow control setting, default to full */ static int ixgbe_flow_control = ixgbe_fc_full; SYSCTL_INT(_hw_ix, OID_AUTO, flow_control, CTLFLAG_RDTUN, &ixgbe_flow_control, 0, "Default flow control used for all adapters"); /* Advertise Speed, default to 0 (auto) */ static int ixgbe_advertise_speed = 0; SYSCTL_INT(_hw_ix, OID_AUTO, advertise_speed, CTLFLAG_RDTUN, &ixgbe_advertise_speed, 0, "Default advertised speed for all adapters"); /* ** Smart speed setting, default to on ** this only works as a compile option ** right now as its during attach, set ** this to 'ixgbe_smart_speed_off' to ** disable. */ static int ixgbe_smart_speed = ixgbe_smart_speed_on; /* * MSIX should be the default for best performance, * but this allows it to be forced off for testing. */ static int ixgbe_enable_msix = 1; SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0, "Enable MSI-X interrupts"); /* * Number of Queues, can be set to 0, * it then autoconfigures based on the * number of cpus with a max of 8. This * can be overriden manually here. */ static int ixgbe_num_queues = 0; SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0, "Number of queues to configure, 0 indicates autoconfigure"); /* ** Number of TX descriptors per ring, ** setting higher than RX as this seems ** the better performing choice. */ static int ixgbe_txd = PERFORM_TXD; SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0, "Number of transmit descriptors per queue"); /* Number of RX descriptors per ring */ static int ixgbe_rxd = PERFORM_RXD; SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0, "Number of receive descriptors per queue"); /* ** Defining this on will allow the use ** of unsupported SFP+ modules, note that ** doing so you are on your own :) */ static int allow_unsupported_sfp = FALSE; TUNABLE_INT("hw.ix.unsupported_sfp", &allow_unsupported_sfp); /* Keep running tab on them for sanity check */ static int ixgbe_total_ports; #ifdef IXGBE_FDIR /* ** Flow Director actually 'steals' ** part of the packet buffer as its ** filter pool, this variable controls ** how much it uses: ** 0 = 64K, 1 = 128K, 2 = 256K */ static int fdir_pballoc = 1; #endif #ifdef DEV_NETMAP /* * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to * be a reference on how to implement netmap support in a driver. * Additional comments are in ixgbe_netmap.h . * * contains functions for netmap support * that extend the standard driver. */ #include #endif /* DEV_NETMAP */ static MALLOC_DEFINE(M_IXGBE, "ix", "ix driver allocations"); /********************************************************************* * Device identification routine * * ixgbe_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int ixgbe_probe(device_t dev) { ixgbe_vendor_info_t *ent; u16 pci_vendor_id = 0; u16 pci_device_id = 0; u16 pci_subvendor_id = 0; u16 pci_subdevice_id = 0; char adapter_name[256]; INIT_DEBUGOUT("ixgbe_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = ixgbe_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == 0)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == 0))) { sprintf(adapter_name, "%s, Version - %s", ixgbe_strings[ent->index], ixgbe_driver_version); device_set_desc_copy(dev, adapter_name); ++ixgbe_total_ports; return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int ixgbe_attach(device_t dev) { struct adapter *adapter; struct ixgbe_hw *hw; int error = 0; u16 csum; u32 ctrl_ext; INIT_DEBUGOUT("ixgbe_attach: begin"); /* Allocate, clear, and link in our adapter structure */ adapter = device_get_softc(dev); adapter->dev = dev; hw = &adapter->hw; #ifdef DEV_NETMAP adapter->init_locked = ixgbe_init_locked; adapter->stop_locked = ixgbe_stop; #endif /* Core Lock Init*/ IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* Set up the timer callout */ callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); /* Determine hardware revision */ ixgbe_identify_hardware(adapter); /* Do base PCI setup - map BAR0 */ if (ixgbe_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_out; } /* Sysctls for limiting the amount of work done in the taskqueues */ ixgbe_set_sysctl_value(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, ixgbe_rx_process_limit); ixgbe_set_sysctl_value(adapter, "tx_processing_limit", "max number of tx packets to process", &adapter->tx_process_limit, ixgbe_tx_process_limit); /* Do descriptor calc and sanity checks */ if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 || ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) { device_printf(dev, "TXD config issue, using default!\n"); adapter->num_tx_desc = DEFAULT_TXD; } else adapter->num_tx_desc = ixgbe_txd; /* ** With many RX rings it is easy to exceed the ** system mbuf allocation. Tuning nmbclusters ** can alleviate this. */ if (nmbclusters > 0) { int s; s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports; if (s > nmbclusters) { device_printf(dev, "RX Descriptors exceed " "system mbuf max, using default instead!\n"); ixgbe_rxd = DEFAULT_RXD; } } if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 || ixgbe_rxd < MIN_RXD || ixgbe_rxd > MAX_RXD) { device_printf(dev, "RXD config issue, using default!\n"); adapter->num_rx_desc = DEFAULT_RXD; } else adapter->num_rx_desc = ixgbe_rxd; /* Allocate our TX/RX Queues */ if (ixgbe_allocate_queues(adapter)) { error = ENOMEM; goto err_out; } /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(*adapter->mta) * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Initialize the shared code */ hw->allow_unsupported_sfp = allow_unsupported_sfp; error = ixgbe_init_shared_code(hw); if (error == IXGBE_ERR_SFP_NOT_PRESENT) { /* ** No optics in this port, set up ** so the timer routine will probe ** for later insertion. */ adapter->sfp_probe = TRUE; error = 0; } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module detected!\n"); error = EIO; goto err_late; } else if (error) { device_printf(dev, "Unable to initialize the shared code\n"); error = EIO; goto err_late; } /* Make sure we have a good EEPROM before we read from it */ if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } error = ixgbe_init_hw(hw); switch (error) { case IXGBE_ERR_EEPROM_VERSION: device_printf(dev, "This device is a pre-production adapter/" "LOM. Please be aware there may be issues associated " "with your hardware.\nIf you are experiencing problems " "please contact your Intel or hardware representative " "who provided you with this hardware.\n"); break; case IXGBE_ERR_SFP_NOT_SUPPORTED: device_printf(dev, "Unsupported SFP+ Module\n"); error = EIO; goto err_late; case IXGBE_ERR_SFP_NOT_PRESENT: device_printf(dev, "No SFP+ Module found\n"); /* falls thru */ default: break; } /* hw.ix defaults init */ ixgbe_set_advertise(adapter, ixgbe_advertise_speed); ixgbe_set_flowcntl(adapter, ixgbe_flow_control); adapter->enable_aim = ixgbe_enable_aim; if ((adapter->msix > 1) && (ixgbe_enable_msix)) error = ixgbe_allocate_msix(adapter); else error = ixgbe_allocate_legacy(adapter); if (error) goto err_late; /* Enable the optics for 82599 SFP+ fiber */ ixgbe_enable_tx_laser(hw); /* Enable power to the phy. */ ixgbe_set_phy_power(hw, TRUE); /* Setup OS specific network interface */ if (ixgbe_setup_interface(dev, adapter) != 0) goto err_late; /* Initialize statistics */ ixgbe_update_stats_counters(adapter); /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); /* Check PCIE slot type/speed/width */ ixgbe_get_slot_info(adapter); /* Set an initial default flow control & dmac value */ adapter->fc = ixgbe_fc_full; adapter->dmac = 0; adapter->eee_enabled = 0; #ifdef PCI_IOV if ((hw->mac.type != ixgbe_mac_82598EB) && (adapter->msix > 1)) { nvlist_t *pf_schema, *vf_schema; hw->mbx.ops.init_params(hw); pf_schema = pci_iov_schema_alloc_node(); vf_schema = pci_iov_schema_alloc_node(); pci_iov_schema_add_unicast_mac(vf_schema, "mac-addr", 0, NULL); pci_iov_schema_add_bool(vf_schema, "mac-anti-spoof", IOV_SCHEMA_HASDEFAULT, TRUE); pci_iov_schema_add_bool(vf_schema, "allow-set-mac", IOV_SCHEMA_HASDEFAULT, FALSE); pci_iov_schema_add_bool(vf_schema, "allow-promisc", IOV_SCHEMA_HASDEFAULT, FALSE); error = pci_iov_attach(dev, pf_schema, vf_schema); if (error != 0) { device_printf(dev, "Error %d setting up SR-IOV\n", error); } } #endif /* PCI_IOV */ /* Check for certain supported features */ ixgbe_check_wol_support(adapter); /* Add sysctls */ ixgbe_add_device_sysctls(adapter); ixgbe_add_hw_stats(adapter); /* let hardware know driver is loaded */ ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); #ifdef DEV_NETMAP ixgbe_netmap_attach(adapter); #endif /* DEV_NETMAP */ INIT_DEBUGOUT("ixgbe_attach: end"); return (0); err_late: ixgbe_free_transmit_structures(adapter); ixgbe_free_receive_structures(adapter); err_out: if (adapter->ifp != NULL) if_free(adapter->ifp); ixgbe_free_pci_resources(adapter); free(adapter->mta, M_DEVBUF); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int ixgbe_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ix_queue *que = adapter->queues; struct tx_ring *txr = adapter->tx_rings; u32 ctrl_ext; INIT_DEBUGOUT("ixgbe_detach: begin"); /* Make sure VLANS are not using driver */ if (adapter->ifp->if_vlantrunk != NULL) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } #ifdef PCI_IOV if (pci_iov_detach(dev) != 0) { device_printf(dev, "SR-IOV in use; detach first.\n"); return (EBUSY); } #endif /* PCI_IOV */ ether_ifdetach(adapter->ifp); /* Stop the adapter */ IXGBE_CORE_LOCK(adapter); ixgbe_setup_low_power_mode(adapter); IXGBE_CORE_UNLOCK(adapter); for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { if (que->tq) { #ifndef IXGBE_LEGACY_TX taskqueue_drain(que->tq, &txr->txq_task); #endif taskqueue_drain(que->tq, &que->que_task); taskqueue_free(que->tq); } } /* Drain the Link queue */ if (adapter->tq) { taskqueue_drain(adapter->tq, &adapter->link_task); taskqueue_drain(adapter->tq, &adapter->mod_task); taskqueue_drain(adapter->tq, &adapter->msf_task); #ifdef PCI_IOV taskqueue_drain(adapter->tq, &adapter->mbx_task); #endif taskqueue_drain(adapter->tq, &adapter->phy_task); #ifdef IXGBE_FDIR taskqueue_drain(adapter->tq, &adapter->fdir_task); #endif taskqueue_free(adapter->tq); } /* let hardware know driver is unloading */ ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext); /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); callout_drain(&adapter->timer); #ifdef DEV_NETMAP netmap_detach(adapter->ifp); #endif /* DEV_NETMAP */ ixgbe_free_pci_resources(adapter); bus_generic_detach(dev); if_free(adapter->ifp); ixgbe_free_transmit_structures(adapter); ixgbe_free_receive_structures(adapter); free(adapter->mta, M_DEVBUF); IXGBE_CORE_LOCK_DESTROY(adapter); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int ixgbe_shutdown(device_t dev) { struct adapter *adapter = device_get_softc(dev); int error = 0; INIT_DEBUGOUT("ixgbe_shutdown: begin"); IXGBE_CORE_LOCK(adapter); error = ixgbe_setup_low_power_mode(adapter); IXGBE_CORE_UNLOCK(adapter); return (error); } /** * Methods for going from: * D0 -> D3: ixgbe_suspend * D3 -> D0: ixgbe_resume */ static int ixgbe_suspend(device_t dev) { struct adapter *adapter = device_get_softc(dev); int error = 0; INIT_DEBUGOUT("ixgbe_suspend: begin"); IXGBE_CORE_LOCK(adapter); error = ixgbe_setup_low_power_mode(adapter); IXGBE_CORE_UNLOCK(adapter); return (error); } static int ixgbe_resume(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; struct ixgbe_hw *hw = &adapter->hw; u32 wus; INIT_DEBUGOUT("ixgbe_resume: begin"); IXGBE_CORE_LOCK(adapter); /* Read & clear WUS register */ wus = IXGBE_READ_REG(hw, IXGBE_WUS); if (wus) device_printf(dev, "Woken up by (WUS): %#010x\n", IXGBE_READ_REG(hw, IXGBE_WUS)); IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff); /* And clear WUFC until next low-power transition */ IXGBE_WRITE_REG(hw, IXGBE_WUFC, 0); /* * Required after D3->D0 transition; * will re-advertise all previous advertised speeds */ if (ifp->if_flags & IFF_UP) ixgbe_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); return (0); } /********************************************************************* * Ioctl entry point * * ixgbe_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data) { struct adapter *adapter = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif int error = 0; bool avoid_reset = FALSE; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ixgbe_init(adapter); #ifdef INET if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (ifr->ifr_mtu > IXGBE_MAX_MTU) { error = EINVAL; } else { IXGBE_CORE_LOCK(adapter); ifp->if_mtu = ifr->ifr_mtu; adapter->max_frame_size = ifp->if_mtu + IXGBE_MTU_HDR; - ixgbe_init_locked(adapter); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + ixgbe_init_locked(adapter); #ifdef PCI_IOV ixgbe_recalculate_max_frame(adapter); #endif IXGBE_CORE_UNLOCK(adapter); } break; case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)"); IXGBE_CORE_LOCK(adapter); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ adapter->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { ixgbe_set_promisc(adapter); } } else ixgbe_init_locked(adapter); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixgbe_stop(adapter); adapter->if_flags = ifp->if_flags; IXGBE_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXGBE_CORE_LOCK(adapter); ixgbe_disable_intr(adapter); ixgbe_set_multi(adapter); ixgbe_enable_intr(adapter); IXGBE_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)"); int mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (!mask) break; /* HW cannot turn these on/off separately */ if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { ifp->if_capenable ^= IFCAP_RXCSUM; ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; } if (mask & IFCAP_TXCSUM) ifp->if_capenable ^= IFCAP_TXCSUM; if (mask & IFCAP_TXCSUM_IPV6) ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_TSO6) ifp->if_capenable ^= IFCAP_TSO6; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXGBE_CORE_LOCK(adapter); ixgbe_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); } VLAN_CAPABILITIES(ifp); break; } #if __FreeBSD_version >= 1100036 case SIOCGI2C: { struct ixgbe_hw *hw = &adapter->hw; struct ifi2creq i2c; int i; IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)"); error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); if (error != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { error = EINVAL; break; } if (i2c.len > sizeof(i2c.data)) { error = EINVAL; break; } for (i = 0; i < i2c.len; i++) hw->phy.ops.read_i2c_byte(hw, i2c.offset + i, i2c.dev_addr, &i2c.data[i]); error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); break; } #endif default: IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command); error = ether_ioctl(ifp, command, data); break; } return (error); } /* * Set the various hardware offload abilities. * * This takes the ifnet's if_capenable flags (e.g. set by the user using * ifconfig) and indicates to the OS via the ifnet's if_hwassist field what * mbuf offload flags the driver will understand. */ static void ixgbe_set_if_hwassist(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct ixgbe_hw *hw = &adapter->hw; ifp->if_hwassist = 0; #if __FreeBSD_version >= 1000000 if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_IP_TSO; if (ifp->if_capenable & IFCAP_TSO6) ifp->if_hwassist |= CSUM_IP6_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= (CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP); if (hw->mac.type != ixgbe_mac_82598EB) ifp->if_hwassist |= CSUM_IP_SCTP; } if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) { ifp->if_hwassist |= (CSUM_IP6_UDP | CSUM_IP6_TCP); if (hw->mac.type != ixgbe_mac_82598EB) ifp->if_hwassist |= CSUM_IP6_SCTP; } #else if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); if (hw->mac.type != ixgbe_mac_82598EB) ifp->if_hwassist |= CSUM_SCTP; } #endif } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ #define IXGBE_MHADD_MFS_SHIFT 16 static void ixgbe_init_locked(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; struct tx_ring *txr; struct rx_ring *rxr; u32 txdctl, mhadd; u32 rxdctl, rxctrl; int err = 0; #ifdef PCI_IOV enum ixgbe_iov_mode mode; #endif mtx_assert(&adapter->core_mtx, MA_OWNED); INIT_DEBUGOUT("ixgbe_init_locked: begin"); hw->adapter_stopped = FALSE; ixgbe_stop_adapter(hw); callout_stop(&adapter->timer); #ifdef PCI_IOV mode = ixgbe_get_iov_mode(adapter); adapter->pool = ixgbe_max_vfs(mode); /* Queue indices may change with IOV mode */ for (int i = 0; i < adapter->num_queues; i++) { adapter->rx_rings[i].me = ixgbe_pf_que_index(mode, i); adapter->tx_rings[i].me = ixgbe_pf_que_index(mode, i); } #endif /* reprogram the RAR[0] in case user changed it. */ ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, IXGBE_RAH_AV); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS); ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, 1); hw->addr_ctrl.rar_used_count = 1; /* Set hardware offload abilities from ifnet flags */ ixgbe_set_if_hwassist(adapter); /* Prepare transmit descriptors and buffers */ if (ixgbe_setup_transmit_structures(adapter)) { device_printf(dev, "Could not setup transmit structures\n"); ixgbe_stop(adapter); return; } ixgbe_init_hw(hw); #ifdef PCI_IOV ixgbe_initialize_iov(adapter); #endif ixgbe_initialize_transmit_units(adapter); /* Setup Multicast table */ ixgbe_set_multi(adapter); /* Determine the correct mbuf pool, based on frame size */ if (adapter->max_frame_size <= MCLBYTES) adapter->rx_mbuf_sz = MCLBYTES; else adapter->rx_mbuf_sz = MJUMPAGESIZE; /* Prepare receive descriptors and buffers */ if (ixgbe_setup_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); ixgbe_stop(adapter); return; } /* Configure RX settings */ ixgbe_initialize_receive_units(adapter); /* Enable SDP & MSIX interrupts based on adapter */ ixgbe_config_gpie(adapter); /* Set MTU size */ if (ifp->if_mtu > ETHERMTU) { /* aka IXGBE_MAXFRS on 82599 and newer */ mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); mhadd &= ~IXGBE_MHADD_MFS_MASK; mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT; IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); } /* Now enable all the queues */ for (int i = 0; i < adapter->num_queues; i++) { txr = &adapter->tx_rings[i]; txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txr->me)); txdctl |= IXGBE_TXDCTL_ENABLE; /* Set WTHRESH to 8, burst writeback */ txdctl |= (8 << 16); /* * When the internal queue falls below PTHRESH (32), * start prefetching as long as there are at least * HTHRESH (1) buffers ready. The values are taken * from the Intel linux driver 3.8.21. * Prefetching enables tx line rate even with 1 queue. */ txdctl |= (32 << 0) | (1 << 8); IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txr->me), txdctl); } for (int i = 0, j = 0; i < adapter->num_queues; i++) { rxr = &adapter->rx_rings[i]; rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); if (hw->mac.type == ixgbe_mac_82598EB) { /* ** PTHRESH = 21 ** HTHRESH = 4 ** WTHRESH = 8 */ rxdctl &= ~0x3FFFFF; rxdctl |= 0x080420; } rxdctl |= IXGBE_RXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), rxdctl); for (; j < 10; j++) { if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)) & IXGBE_RXDCTL_ENABLE) break; else msec_delay(1); } wmb(); #ifdef DEV_NETMAP /* * In netmap mode, we must preserve the buffers made * available to userspace before the if_init() * (this is true by default on the TX side, because * init makes all buffers available to userspace). * * netmap_reset() and the device specific routines * (e.g. ixgbe_setup_receive_rings()) map these * buffers at the end of the NIC ring, so here we * must set the RDT (tail) register to make sure * they are not overwritten. * * In this driver the NIC ring starts at RDH = 0, * RDT points to the last slot available for reception (?), * so RDT = num_rx_desc - 1 means the whole ring is available. */ if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->rx_rings[i]; int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); IXGBE_WRITE_REG(hw, IXGBE_RDT(rxr->me), t); } else #endif /* DEV_NETMAP */ IXGBE_WRITE_REG(hw, IXGBE_RDT(rxr->me), adapter->num_rx_desc - 1); } /* Enable Receive engine */ rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); if (hw->mac.type == ixgbe_mac_82598EB) rxctrl |= IXGBE_RXCTRL_DMBYPS; rxctrl |= IXGBE_RXCTRL_RXEN; ixgbe_enable_rx_dma(hw, rxctrl); callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); /* Set up MSI/X routing */ if (ixgbe_enable_msix) { ixgbe_configure_ivars(adapter); /* Set up auto-mask */ if (hw->mac.type == ixgbe_mac_82598EB) IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); else { IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF); IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF); } } else { /* Simple settings for Legacy/MSI */ ixgbe_set_ivar(adapter, 0, 0, 0); ixgbe_set_ivar(adapter, 0, 0, 1); IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); } #ifdef IXGBE_FDIR /* Init Flow director */ if (hw->mac.type != ixgbe_mac_82598EB) { u32 hdrm = 32 << fdir_pballoc; hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL); ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc); } #endif /* * Check on any SFP devices that * need to be kick-started */ if (hw->phy.type == ixgbe_phy_none) { err = hw->phy.ops.identify(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module type was detected.\n"); return; } } /* Set moderation on the Link interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->vector), IXGBE_LINK_ITR); /* Configure Energy Efficient Ethernet for supported devices */ if (hw->mac.ops.setup_eee) { err = hw->mac.ops.setup_eee(hw, adapter->eee_enabled); if (err) device_printf(dev, "Error setting up EEE: %d\n", err); } /* Enable power to the phy. */ ixgbe_set_phy_power(hw, TRUE); /* Config/Enable Link */ ixgbe_config_link(adapter); /* Hardware Packet Buffer & Flow Control setup */ ixgbe_config_delay_values(adapter); /* Initialize the FC settings */ ixgbe_start_hw(hw); /* Set up VLAN support and filter */ ixgbe_setup_vlan_hw_support(adapter); /* Setup DMA Coalescing */ ixgbe_config_dmac(adapter); /* And now turn on interrupts */ ixgbe_enable_intr(adapter); #ifdef PCI_IOV /* Enable the use of the MBX by the VF's */ { u32 reg = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); reg |= IXGBE_CTRL_EXT_PFRSTD; IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, reg); } #endif /* Now inform the stack we're ready */ ifp->if_drv_flags |= IFF_DRV_RUNNING; return; } static void ixgbe_init(void *arg) { struct adapter *adapter = arg; IXGBE_CORE_LOCK(adapter); ixgbe_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); return; } static void ixgbe_config_gpie(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 gpie; gpie = IXGBE_READ_REG(hw, IXGBE_GPIE); /* Fan Failure Interrupt */ if (hw->device_id == IXGBE_DEV_ID_82598AT) gpie |= IXGBE_SDP1_GPIEN; /* * Module detection (SDP2) * Media ready (SDP1) */ if (hw->mac.type == ixgbe_mac_82599EB) { gpie |= IXGBE_SDP2_GPIEN; if (hw->device_id != IXGBE_DEV_ID_82599_QSFP_SF_QP) gpie |= IXGBE_SDP1_GPIEN; } /* * Thermal Failure Detection (X540) * Link Detection (X552 SFP+, X552/X557-AT) */ if (hw->mac.type == ixgbe_mac_X540 || hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP || hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) gpie |= IXGBE_SDP0_GPIEN_X540; if (adapter->msix > 1) { /* Enable Enhanced MSIX mode */ gpie |= IXGBE_GPIE_MSIX_MODE; gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT | IXGBE_GPIE_OCD; } IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); return; } /* * Requires adapter->max_frame_size to be set. */ static void ixgbe_config_delay_values(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 rxpb, frame, size, tmp; frame = adapter->max_frame_size; /* Calculate High Water */ switch (hw->mac.type) { case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: tmp = IXGBE_DV_X540(frame, frame); break; default: tmp = IXGBE_DV(frame, frame); break; } size = IXGBE_BT2KB(tmp); rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10; hw->fc.high_water[0] = rxpb - size; /* Now calculate Low Water */ switch (hw->mac.type) { case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: tmp = IXGBE_LOW_DV_X540(frame); break; default: tmp = IXGBE_LOW_DV(frame); break; } hw->fc.low_water[0] = IXGBE_BT2KB(tmp); hw->fc.requested_mode = adapter->fc; hw->fc.pause_time = IXGBE_FC_PAUSE; hw->fc.send_xon = TRUE; } /* ** ** MSIX Interrupt Handlers and Tasklets ** */ static inline void ixgbe_enable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u64 queue = (u64)(1 << vector); u32 mask; if (hw->mac.type == ixgbe_mac_82598EB) { mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); } else { mask = (queue & 0xFFFFFFFF); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); mask = (queue >> 32); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); } } static inline void ixgbe_disable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u64 queue = (u64)(1 << vector); u32 mask; if (hw->mac.type == ixgbe_mac_82598EB) { mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask); } else { mask = (queue & 0xFFFFFFFF); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask); mask = (queue >> 32); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask); } } static void ixgbe_handle_que(void *context, int pending) { struct ix_queue *que = context; struct adapter *adapter = que->adapter; struct tx_ring *txr = que->txr; struct ifnet *ifp = adapter->ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ixgbe_rxeof(que); IXGBE_TX_LOCK(txr); ixgbe_txeof(txr); #ifndef IXGBE_LEGACY_TX if (!drbr_empty(ifp, txr->br)) ixgbe_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) ixgbe_start_locked(txr, ifp); #endif IXGBE_TX_UNLOCK(txr); } /* Reenable this interrupt */ if (que->res != NULL) ixgbe_enable_queue(adapter, que->msix); else ixgbe_enable_intr(adapter); return; } /********************************************************************* * * Legacy Interrupt Service routine * **********************************************************************/ static void ixgbe_legacy_irq(void *arg) { struct ix_queue *que = arg; struct adapter *adapter = que->adapter; struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; bool more; u32 reg_eicr; reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR); ++que->irqs; if (reg_eicr == 0) { ixgbe_enable_intr(adapter); return; } more = ixgbe_rxeof(que); IXGBE_TX_LOCK(txr); ixgbe_txeof(txr); #ifdef IXGBE_LEGACY_TX if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) ixgbe_start_locked(txr, ifp); #else if (!drbr_empty(ifp, txr->br)) ixgbe_mq_start_locked(ifp, txr); #endif IXGBE_TX_UNLOCK(txr); /* Check for fan failure */ if ((hw->device_id == IXGBE_DEV_ID_82598AT) && (reg_eicr & IXGBE_EICR_GPI_SDP1)) { device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! " "REPLACE IMMEDIATELY!!\n"); IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); } /* Link status change */ if (reg_eicr & IXGBE_EICR_LSC) taskqueue_enqueue(adapter->tq, &adapter->link_task); /* External PHY interrupt */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T && (reg_eicr & IXGBE_EICR_GPI_SDP0_X540)) taskqueue_enqueue(adapter->tq, &adapter->phy_task); if (more) taskqueue_enqueue(que->tq, &que->que_task); else ixgbe_enable_intr(adapter); return; } /********************************************************************* * * MSIX Queue Interrupt Service routine * **********************************************************************/ void ixgbe_msix_que(void *arg) { struct ix_queue *que = arg; struct adapter *adapter = que->adapter; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = que->txr; struct rx_ring *rxr = que->rxr; bool more; u32 newitr = 0; /* Protect against spurious interrupts */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; ixgbe_disable_queue(adapter, que->msix); ++que->irqs; more = ixgbe_rxeof(que); IXGBE_TX_LOCK(txr); ixgbe_txeof(txr); #ifdef IXGBE_LEGACY_TX if (!IFQ_DRV_IS_EMPTY(ifp->if_snd)) ixgbe_start_locked(txr, ifp); #else if (!drbr_empty(ifp, txr->br)) ixgbe_mq_start_locked(ifp, txr); #endif IXGBE_TX_UNLOCK(txr); /* Do AIM now? */ if (adapter->enable_aim == FALSE) goto no_calc; /* ** Do Adaptive Interrupt Moderation: ** - Write out last calculated setting ** - Calculate based on average size over ** the last interval. */ if (que->eitr_setting) IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix), que->eitr_setting); que->eitr_setting = 0; /* Idle, do nothing */ if ((txr->bytes == 0) && (rxr->bytes == 0)) goto no_calc; if ((txr->bytes) && (txr->packets)) newitr = txr->bytes/txr->packets; if ((rxr->bytes) && (rxr->packets)) newitr = max(newitr, (rxr->bytes / rxr->packets)); newitr += 24; /* account for hardware frame, crc */ /* set an upper boundary */ newitr = min(newitr, 3000); /* Be nice to the mid range */ if ((newitr > 300) && (newitr < 1200)) newitr = (newitr / 3); else newitr = (newitr / 2); if (adapter->hw.mac.type == ixgbe_mac_82598EB) newitr |= newitr << 16; else newitr |= IXGBE_EITR_CNT_WDIS; /* save for next interrupt */ que->eitr_setting = newitr; /* Reset state */ txr->bytes = 0; txr->packets = 0; rxr->bytes = 0; rxr->packets = 0; no_calc: if (more) taskqueue_enqueue(que->tq, &que->que_task); else ixgbe_enable_queue(adapter, que->msix); return; } static void ixgbe_msix_link(void *arg) { struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; u32 reg_eicr, mod_mask; ++adapter->link_irq; /* Pause other interrupts */ IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_OTHER); /* First get the cause */ reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS); /* Be sure the queue bits are not cleared */ reg_eicr &= ~IXGBE_EICR_RTX_QUEUE; /* Clear interrupt with write */ IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr); /* Link status change */ if (reg_eicr & IXGBE_EICR_LSC) { IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC); taskqueue_enqueue(adapter->tq, &adapter->link_task); } if (adapter->hw.mac.type != ixgbe_mac_82598EB) { #ifdef IXGBE_FDIR if (reg_eicr & IXGBE_EICR_FLOW_DIR) { /* This is probably overkill :) */ if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1)) return; /* Disable the interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR); taskqueue_enqueue(adapter->tq, &adapter->fdir_task); } else #endif if (reg_eicr & IXGBE_EICR_ECC) { device_printf(adapter->dev, "CRITICAL: ECC ERROR!! " "Please Reboot!!\n"); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); } /* Check for over temp condition */ if (reg_eicr & IXGBE_EICR_TS) { device_printf(adapter->dev, "CRITICAL: OVER TEMP!! " "PHY IS SHUT DOWN!!\n"); device_printf(adapter->dev, "System shutdown required!\n"); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS); } #ifdef PCI_IOV if (reg_eicr & IXGBE_EICR_MAILBOX) taskqueue_enqueue(adapter->tq, &adapter->mbx_task); #endif } /* Pluggable optics-related interrupt */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP) mod_mask = IXGBE_EICR_GPI_SDP0_X540; else mod_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw); if (ixgbe_is_sfp(hw)) { if (reg_eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw)) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); taskqueue_enqueue(adapter->tq, &adapter->msf_task); } else if (reg_eicr & mod_mask) { IXGBE_WRITE_REG(hw, IXGBE_EICR, mod_mask); taskqueue_enqueue(adapter->tq, &adapter->mod_task); } } /* Check for fan failure */ if ((hw->device_id == IXGBE_DEV_ID_82598AT) && (reg_eicr & IXGBE_EICR_GPI_SDP1)) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1); device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! " "REPLACE IMMEDIATELY!!\n"); } /* External PHY interrupt */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T && (reg_eicr & IXGBE_EICR_GPI_SDP0_X540)) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0_X540); taskqueue_enqueue(adapter->tq, &adapter->phy_task); } /* Re-enable other interrupts */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) { struct adapter *adapter = ifp->if_softc; struct ixgbe_hw *hw = &adapter->hw; int layer; INIT_DEBUGOUT("ixgbe_media_status: begin"); IXGBE_CORE_LOCK(adapter); ixgbe_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { IXGBE_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; layer = adapter->phy_layer; if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T || layer & IXGBE_PHYSICAL_LAYER_1000BASE_T || layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_T | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_T | IFM_FDX; break; case IXGBE_LINK_SPEED_100_FULL: ifmr->ifm_active |= IFM_100_TX | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_LR | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_LX | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_LX | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR || layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_SX | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX; break; } /* ** XXX: These need to use the proper media types once ** they're added. */ #ifndef IFM_ETH_XTYPE if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; break; case IXGBE_LINK_SPEED_2_5GB_FULL: ifmr->ifm_active |= IFM_2500_SX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_CX | IFM_FDX; break; } else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4 || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX; break; case IXGBE_LINK_SPEED_2_5GB_FULL: ifmr->ifm_active |= IFM_2500_SX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_CX | IFM_FDX; break; } #else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_KR | IFM_FDX; break; case IXGBE_LINK_SPEED_2_5GB_FULL: ifmr->ifm_active |= IFM_2500_KX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_KX | IFM_FDX; break; } else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4 || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_KX4 | IFM_FDX; break; case IXGBE_LINK_SPEED_2_5GB_FULL: ifmr->ifm_active |= IFM_2500_KX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_KX | IFM_FDX; break; } #endif /* If nothing is recognized... */ if (IFM_SUBTYPE(ifmr->ifm_active) == 0) ifmr->ifm_active |= IFM_UNKNOWN; #if __FreeBSD_version >= 900025 /* Display current flow control setting used on link */ if (hw->fc.current_mode == ixgbe_fc_rx_pause || hw->fc.current_mode == ixgbe_fc_full) ifmr->ifm_active |= IFM_ETH_RXPAUSE; if (hw->fc.current_mode == ixgbe_fc_tx_pause || hw->fc.current_mode == ixgbe_fc_full) ifmr->ifm_active |= IFM_ETH_TXPAUSE; #endif IXGBE_CORE_UNLOCK(adapter); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int ixgbe_media_change(struct ifnet * ifp) { struct adapter *adapter = ifp->if_softc; struct ifmedia *ifm = &adapter->media; struct ixgbe_hw *hw = &adapter->hw; ixgbe_link_speed speed = 0; INIT_DEBUGOUT("ixgbe_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if (hw->phy.media_type == ixgbe_media_type_backplane) return (ENODEV); /* ** We don't actually need to check against the supported ** media types of the adapter; ifmedia will take care of ** that for us. */ #ifndef IFM_ETH_XTYPE switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: case IFM_10G_T: speed |= IXGBE_LINK_SPEED_100_FULL; case IFM_10G_LRM: case IFM_10G_SR: /* KR, too */ case IFM_10G_LR: case IFM_10G_CX4: /* KX4 */ speed |= IXGBE_LINK_SPEED_1GB_FULL; case IFM_10G_TWINAX: speed |= IXGBE_LINK_SPEED_10GB_FULL; break; case IFM_1000_T: speed |= IXGBE_LINK_SPEED_100_FULL; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_CX: /* KX */ speed |= IXGBE_LINK_SPEED_1GB_FULL; break; case IFM_100_TX: speed |= IXGBE_LINK_SPEED_100_FULL; break; default: goto invalid; } #else switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: case IFM_10G_T: speed |= IXGBE_LINK_SPEED_100_FULL; case IFM_10G_LRM: case IFM_10G_KR: case IFM_10G_LR: case IFM_10G_KX4: speed |= IXGBE_LINK_SPEED_1GB_FULL; case IFM_10G_TWINAX: speed |= IXGBE_LINK_SPEED_10GB_FULL; break; case IFM_1000_T: speed |= IXGBE_LINK_SPEED_100_FULL; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_KX: speed |= IXGBE_LINK_SPEED_1GB_FULL; break; case IFM_100_TX: speed |= IXGBE_LINK_SPEED_100_FULL; break; default: goto invalid; } #endif hw->mac.autotry_restart = TRUE; hw->mac.ops.setup_link(hw, speed, TRUE); if (IFM_SUBTYPE(ifm->ifm_media) == IFM_AUTO) { adapter->advertise = 0; } else { if ((speed & IXGBE_LINK_SPEED_10GB_FULL) != 0) adapter->advertise |= 1 << 2; if ((speed & IXGBE_LINK_SPEED_1GB_FULL) != 0) adapter->advertise |= 1 << 1; if ((speed & IXGBE_LINK_SPEED_100_FULL) != 0) adapter->advertise |= 1 << 0; } return (0); invalid: device_printf(adapter->dev, "Invalid media type!\n"); return (EINVAL); } static void ixgbe_set_promisc(struct adapter *adapter) { u_int32_t reg_rctl; struct ifnet *ifp = adapter->ifp; int mcnt = 0; reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); reg_rctl &= (~IXGBE_FCTRL_UPE); if (ifp->if_flags & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else { struct ifmultiaddr *ifma; #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif } if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg_rctl &= (~IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); if (ifp->if_flags & IFF_PROMISC) { reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); } else if (ifp->if_flags & IFF_ALLMULTI) { reg_rctl |= IXGBE_FCTRL_MPE; reg_rctl &= ~IXGBE_FCTRL_UPE; IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); } return; } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ #define IXGBE_RAR_ENTRIES 16 static void ixgbe_set_multi(struct adapter *adapter) { u32 fctrl; u8 *update_ptr; struct ifmultiaddr *ifma; struct ixgbe_mc_addr *mta; int mcnt = 0; struct ifnet *ifp = adapter->ifp; IOCTL_DEBUGOUT("ixgbe_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(*mta) * MAX_NUM_MULTICAST_ADDRESSES); #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), mta[mcnt].addr, IXGBE_ETH_LENGTH_OF_ADDRESS); mta[mcnt].vmdq = adapter->pool; mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); if (ifp->if_flags & IFF_PROMISC) fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES || ifp->if_flags & IFF_ALLMULTI) { fctrl |= IXGBE_FCTRL_MPE; fctrl &= ~IXGBE_FCTRL_UPE; } else fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl); if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) { update_ptr = (u8 *)mta; ixgbe_update_mc_addr_list(&adapter->hw, update_ptr, mcnt, ixgbe_mc_array_itr, TRUE); } return; } /* * This is an iterator function now needed by the multicast * shared code. It simply feeds the shared code routine the * addresses in the array of ixgbe_set_multi() one by one. */ static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) { struct ixgbe_mc_addr *mta; mta = (struct ixgbe_mc_addr *)*update_ptr; *vmdq = mta->vmdq; *update_ptr = (u8*)(mta + 1); return (mta->addr); } /********************************************************************* * Timer routine * * This routine checks for link status,updates statistics, * and runs the watchdog check. * **********************************************************************/ static void ixgbe_local_timer(void *arg) { struct adapter *adapter = arg; device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; u64 queues = 0; int hung = 0; mtx_assert(&adapter->core_mtx, MA_OWNED); /* Check for pluggable optics */ if (adapter->sfp_probe) if (!ixgbe_sfp_probe(adapter)) goto out; /* Nothing to do */ ixgbe_update_link_status(adapter); ixgbe_update_stats_counters(adapter); /* ** Check the TX queues status ** - mark hung queues so we don't schedule on them ** - watchdog only if all queues show hung */ for (int i = 0; i < adapter->num_queues; i++, que++) { /* Keep track of queues with work for soft irq */ if (que->txr->busy) queues |= ((u64)1 << que->me); /* ** Each time txeof runs without cleaning, but there ** are uncleaned descriptors it increments busy. If ** we get to the MAX we declare it hung. */ if (que->busy == IXGBE_QUEUE_HUNG) { ++hung; /* Mark the queue as inactive */ adapter->active_queues &= ~((u64)1 << que->me); continue; } else { /* Check if we've come back from hung */ if ((adapter->active_queues & ((u64)1 << que->me)) == 0) adapter->active_queues |= ((u64)1 << que->me); } if (que->busy >= IXGBE_MAX_TX_BUSY) { device_printf(dev,"Warning queue %d " "appears to be hung!\n", i); que->txr->busy = IXGBE_QUEUE_HUNG; ++hung; } } /* Only truly watchdog if all queues show hung */ if (hung == adapter->num_queues) goto watchdog; else if (queues != 0) { /* Force an IRQ on queues with work */ ixgbe_rearm_queues(adapter, queues); } out: callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); return; watchdog: device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->watchdog_events++; ixgbe_init_locked(adapter); } /* ** Note: this routine updates the OS on the link state ** the real check of the hardware only happens with ** a link interrupt. */ static void ixgbe_update_link_status(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; if (adapter->link_up){ if (adapter->link_active == FALSE) { if (bootverbose) device_printf(dev,"Link is up %d Gbps %s \n", ((adapter->link_speed == 128)? 10:1), "Full Duplex"); adapter->link_active = TRUE; /* Update any Flow Control changes */ ixgbe_fc_enable(&adapter->hw); /* Update DMA coalescing config */ ixgbe_config_dmac(adapter); if_link_state_change(ifp, LINK_STATE_UP); #ifdef PCI_IOV ixgbe_ping_all_vfs(adapter); #endif } } else { /* Link down */ if (adapter->link_active == TRUE) { if (bootverbose) device_printf(dev,"Link is Down\n"); if_link_state_change(ifp, LINK_STATE_DOWN); adapter->link_active = FALSE; #ifdef PCI_IOV ixgbe_ping_all_vfs(adapter); #endif } } return; } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void ixgbe_stop(void *arg) { struct ifnet *ifp; struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; ifp = adapter->ifp; mtx_assert(&adapter->core_mtx, MA_OWNED); INIT_DEBUGOUT("ixgbe_stop: begin\n"); ixgbe_disable_intr(adapter); callout_stop(&adapter->timer); /* Let the stack know...*/ ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ixgbe_reset_hw(hw); hw->adapter_stopped = FALSE; ixgbe_stop_adapter(hw); if (hw->mac.type == ixgbe_mac_82599EB) ixgbe_stop_mac_link_on_d3_82599(hw); /* Turn off the laser - noop with no optics */ ixgbe_disable_tx_laser(hw); /* Update the stack */ adapter->link_up = FALSE; ixgbe_update_link_status(adapter); /* reprogram the RAR[0] in case user changed it. */ ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV); return; } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void ixgbe_identify_hardware(struct adapter *adapter) { device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; /* Save off the information about this board */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* ** Make sure BUSMASTER is set */ pci_enable_busmaster(dev); /* We need this here to set the num_segs below */ ixgbe_set_mac_type(hw); /* Pick up the 82599 settings */ if (hw->mac.type != ixgbe_mac_82598EB) { hw->phy.smart_speed = ixgbe_smart_speed; adapter->num_segs = IXGBE_82599_SCATTER; } else adapter->num_segs = IXGBE_82598_SCATTER; return; } /********************************************************************* * * Determine optic type * **********************************************************************/ static void ixgbe_setup_optics(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; int layer; layer = adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) { adapter->optics = IFM_10G_T; return; } if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) { adapter->optics = IFM_1000_T; return; } if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) { adapter->optics = IFM_1000_SX; return; } if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR | IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) { adapter->optics = IFM_10G_LR; return; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) { adapter->optics = IFM_10G_SR; return; } if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) { adapter->optics = IFM_10G_TWINAX; return; } if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 | IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) { adapter->optics = IFM_10G_CX4; return; } /* If we get here just set the default */ adapter->optics = IFM_ETHER | IFM_AUTO; return; } /********************************************************************* * * Setup the Legacy or MSI Interrupt handler * **********************************************************************/ static int ixgbe_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; #ifndef IXGBE_LEGACY_TX struct tx_ring *txr = adapter->tx_rings; #endif int error, rid = 0; /* MSI RID at 1 */ if (adapter->msix == 1) rid = 1; /* We allocate a single interrupt resource */ adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "interrupt\n"); return (ENXIO); } /* * Try allocating a fast interrupt and the associated deferred * processing contexts. */ #ifndef IXGBE_LEGACY_TX TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr); #endif TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que); que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq", device_get_nameunit(adapter->dev)); /* Tasklets for Link, SFP and Multispeed Fiber */ TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter); TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter); TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter); TASK_INIT(&adapter->phy_task, 0, ixgbe_handle_phy, adapter); #ifdef IXGBE_FDIR TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter); #endif adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq", device_get_nameunit(adapter->dev)); if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq, que, &adapter->tag)) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); taskqueue_free(que->tq); taskqueue_free(adapter->tq); que->tq = NULL; adapter->tq = NULL; return (error); } /* For simplicity in the handlers */ adapter->active_queues = IXGBE_EIMS_ENABLE_MASK; return (0); } /********************************************************************* * * Setup MSIX Interrupt resources and handlers * **********************************************************************/ static int ixgbe_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; struct tx_ring *txr = adapter->tx_rings; int error, rid, vector = 0; int cpu_id = 0; #ifdef RSS cpuset_t cpu_mask; #endif #ifdef RSS /* * If we're doing RSS, the number of queues needs to * match the number of RSS buckets that are configured. * * + If there's more queues than RSS buckets, we'll end * up with queues that get no traffic. * * + If there's more RSS buckets than queues, we'll end * up having multiple RSS buckets map to the same queue, * so there'll be some contention. */ if (adapter->num_queues != rss_getnumbuckets()) { device_printf(dev, "%s: number of queues (%d) != number of RSS buckets (%d)" "; performance will be impacted.\n", __func__, adapter->num_queues, rss_getnumbuckets()); } #endif for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) { rid = vector + 1; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (que->res == NULL) { device_printf(dev,"Unable to allocate" " bus resource: que interrupt [%d]\n", vector); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_msix_que, que, &que->tag); if (error) { que->res = NULL; device_printf(dev, "Failed to register QUE handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, que->res, que->tag, "q%d", i); #endif que->msix = vector; adapter->active_queues |= (u64)(1 << que->msix); #ifdef RSS /* * The queue ID is used as the RSS layer bucket ID. * We look up the queue ID -> RSS CPU ID and select * that. */ cpu_id = rss_getcpu(i % rss_getnumbuckets()); #else /* * Bind the msix vector, and thus the * rings to the corresponding cpu. * * This just happens to match the default RSS round-robin * bucket -> queue -> CPU allocation. */ if (adapter->num_queues > 1) cpu_id = i; #endif if (adapter->num_queues > 1) bus_bind_intr(dev, que->res, cpu_id); #ifdef IXGBE_DEBUG #ifdef RSS device_printf(dev, "Bound RSS bucket %d to CPU %d\n", i, cpu_id); #else device_printf(dev, "Bound queue %d to cpu %d\n", i, cpu_id); #endif #endif /* IXGBE_DEBUG */ #ifndef IXGBE_LEGACY_TX TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr); #endif TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que); que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); #ifdef RSS CPU_SETOF(cpu_id, &cpu_mask); taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, &cpu_mask, "%s (bucket %d)", device_get_nameunit(adapter->dev), cpu_id); #else taskqueue_start_threads(&que->tq, 1, PI_NET, "%s:q%d", device_get_nameunit(adapter->dev), i); #endif } /* and Link */ rid = vector + 1; adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!adapter->res) { device_printf(dev,"Unable to allocate" " bus resource: Link interrupt [%d]\n", rid); return (ENXIO); } /* Set the link handler function */ error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_msix_link, adapter, &adapter->tag); if (error) { adapter->res = NULL; device_printf(dev, "Failed to register LINK handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); #endif adapter->vector = vector; /* Tasklets for Link, SFP and Multispeed Fiber */ TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter); TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter); TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter); #ifdef PCI_IOV TASK_INIT(&adapter->mbx_task, 0, ixgbe_handle_mbx, adapter); #endif TASK_INIT(&adapter->phy_task, 0, ixgbe_handle_phy, adapter); #ifdef IXGBE_FDIR TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter); #endif adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq", device_get_nameunit(adapter->dev)); return (0); } /* * Setup Either MSI/X or MSI */ static int ixgbe_setup_msix(struct adapter *adapter) { device_t dev = adapter->dev; int rid, want, queues, msgs; /* Override by tuneable */ if (ixgbe_enable_msix == 0) goto msi; /* First try MSI/X */ msgs = pci_msix_count(dev); if (msgs == 0) goto msi; rid = PCIR_BAR(MSIX_82598_BAR); adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->msix_mem == NULL) { rid += 4; /* 82599 maps in higher BAR */ adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); } if (adapter->msix_mem == NULL) { /* May not be enabled */ device_printf(adapter->dev, "Unable to map MSIX table \n"); goto msi; } /* Figure out a reasonable auto config value */ queues = (mp_ncpus > (msgs - 1)) ? (msgs - 1) : mp_ncpus; #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif if (ixgbe_num_queues != 0) queues = ixgbe_num_queues; /* Set max queues to 8 when autoconfiguring */ else if ((ixgbe_num_queues == 0) && (queues > 8)) queues = 8; /* reflect correct sysctl value */ ixgbe_num_queues = queues; /* ** Want one vector (RX/TX pair) per queue ** plus an additional for Link. */ want = queues + 1; if (msgs >= want) msgs = want; else { device_printf(adapter->dev, "MSIX Configuration Problem, " "%d vectors but %d queues wanted!\n", msgs, want); goto msi; } if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { device_printf(adapter->dev, "Using MSIX interrupts with %d vectors\n", msgs); adapter->num_queues = queues; return (msgs); } /* ** If MSIX alloc failed or provided us with ** less than needed, free and fall through to MSI */ pci_release_msi(dev); msi: if (adapter->msix_mem != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rid, adapter->msix_mem); adapter->msix_mem = NULL; } msgs = 1; if (pci_alloc_msi(dev, &msgs) == 0) { device_printf(adapter->dev, "Using an MSI interrupt\n"); return (msgs); } device_printf(adapter->dev, "Using a Legacy interrupt\n"); return (0); } static int ixgbe_allocate_pci_resources(struct adapter *adapter) { int rid; device_t dev = adapter->dev; rid = PCIR_BAR(0); adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(adapter->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } /* Save bus_space values for READ/WRITE_REG macros */ adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->pci_mem); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->pci_mem); /* Set hw values for shared code */ adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle; adapter->hw.back = adapter; /* Default to 1 queue if MSI-X setup fails */ adapter->num_queues = 1; /* ** Now setup MSI or MSI-X, should ** return us the number of supported ** vectors. (Will be 1 for MSI) */ adapter->msix = ixgbe_setup_msix(adapter); return (0); } static void ixgbe_free_pci_resources(struct adapter * adapter) { struct ix_queue *que = adapter->queues; device_t dev = adapter->dev; int rid, memrid; if (adapter->hw.mac.type == ixgbe_mac_82598EB) memrid = PCIR_BAR(MSIX_82598_BAR); else memrid = PCIR_BAR(MSIX_82599_BAR); /* ** There is a slight possibility of a failure mode ** in attach that will result in entering this function ** before interrupt resources have been initialized, and ** in that case we do not want to execute the loops below ** We can detect this reliably by the state of the adapter ** res pointer. */ if (adapter->res == NULL) goto mem; /* ** Release all msix queue resources: */ for (int i = 0; i < adapter->num_queues; i++, que++) { rid = que->msix + 1; if (que->tag != NULL) { bus_teardown_intr(dev, que->res, que->tag); que->tag = NULL; } if (que->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); } /* Clean the Legacy or Link interrupt last */ if (adapter->vector) /* we are doing MSIX */ rid = adapter->vector + 1; else (adapter->msix != 0) ? (rid = 1):(rid = 0); if (adapter->tag != NULL) { bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); mem: if (adapter->msix) pci_release_msi(dev); if (adapter->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, memrid, adapter->msix_mem); if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->pci_mem); return; } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int ixgbe_setup_interface(device_t dev, struct adapter *adapter) { struct ifnet *ifp; INIT_DEBUGOUT("ixgbe_setup_interface: begin"); ifp = adapter->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_baudrate = IF_Gbps(10); ifp->if_init = ixgbe_init; ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ixgbe_ioctl; #if __FreeBSD_version >= 1100036 if_setgetcounterfn(ifp, ixgbe_get_counter); #endif #if __FreeBSD_version >= 1100045 /* TSO parameters */ ifp->if_hw_tsomax = 65518; ifp->if_hw_tsomaxsegcount = IXGBE_82599_SCATTER; ifp->if_hw_tsomaxsegsize = 2048; #endif #ifndef IXGBE_LEGACY_TX ifp->if_transmit = ixgbe_mq_start; ifp->if_qflush = ixgbe_qflush; #else ifp->if_start = ixgbe_start; IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2); ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2; IFQ_SET_READY(&ifp->if_snd); #endif ether_ifattach(ifp, adapter->hw.mac.addr); adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; /* * Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); /* Set capability flags */ ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6 | IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_LRO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM | IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU | IFCAP_HWSTATS; /* Enable the above capabilities by default */ ifp->if_capenable = ifp->if_capabilities; /* ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the ixgbe driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change, ixgbe_media_status); adapter->phy_layer = ixgbe_get_supported_physical_layer(&adapter->hw); ixgbe_add_media_types(adapter); /* Set autoselect media by default */ ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } static void ixgbe_add_media_types(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; device_t dev = adapter->dev; int layer; layer = adapter->phy_layer; /* Media types with matching FreeBSD media defines */ if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_T, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_LR, 0, NULL); if (hw->phy.multispeed_fiber) ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_LX, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); if (hw->phy.multispeed_fiber) ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); } else if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); #ifdef IFM_ETH_XTYPE if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_KR, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_KX4, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_KX, 0, NULL); #else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) { device_printf(dev, "Media supported: 10GbaseKR\n"); device_printf(dev, "10GbaseKR mapped to 10GbaseSR\n"); ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) { device_printf(dev, "Media supported: 10GbaseKX4\n"); device_printf(dev, "10GbaseKX4 mapped to 10GbaseCX4\n"); ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) { device_printf(dev, "Media supported: 1000baseKX\n"); device_printf(dev, "1000baseKX mapped to 1000baseCX\n"); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_CX, 0, NULL); } #endif if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) device_printf(dev, "Media supported: 1000baseBX\n"); if (hw->device_id == IXGBE_DEV_ID_82598AT) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); } static void ixgbe_config_link(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 autoneg, err = 0; bool sfp, negotiate; sfp = ixgbe_is_sfp(hw); if (sfp) { taskqueue_enqueue(adapter->tq, &adapter->mod_task); } else { if (hw->mac.ops.check_link) err = ixgbe_check_link(hw, &adapter->link_speed, &adapter->link_up, FALSE); if (err) goto out; autoneg = hw->phy.autoneg_advertised; if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) err = hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); if (err) goto out; if (hw->mac.ops.setup_link) err = hw->mac.ops.setup_link(hw, autoneg, adapter->link_up); } out: return; } /********************************************************************* * * Enable transmit units. * **********************************************************************/ static void ixgbe_initialize_transmit_units(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct ixgbe_hw *hw = &adapter->hw; /* Setup the Base and Length of the Tx Descriptor Ring */ for (int i = 0; i < adapter->num_queues; i++, txr++) { u64 tdba = txr->txdma.dma_paddr; u32 txctrl = 0; int j = txr->me; IXGBE_WRITE_REG(hw, IXGBE_TDBAL(j), (tdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_TDBAH(j), (tdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_TDLEN(j), adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc)); /* Setup the HW Tx Head and Tail descriptor pointers */ IXGBE_WRITE_REG(hw, IXGBE_TDH(j), 0); IXGBE_WRITE_REG(hw, IXGBE_TDT(j), 0); /* Cache the tail address */ txr->tail = IXGBE_TDT(j); /* Disable Head Writeback */ /* * Note: for X550 series devices, these registers are actually * prefixed with TPH_ isntead of DCA_, but the addresses and * fields remain the same. */ switch (hw->mac.type) { case ixgbe_mac_82598EB: txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(j)); break; default: txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(j)); break; } txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; switch (hw->mac.type) { case ixgbe_mac_82598EB: IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(j), txctrl); break; default: IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(j), txctrl); break; } } if (hw->mac.type != ixgbe_mac_82598EB) { u32 dmatxctl, rttdcs; #ifdef PCI_IOV enum ixgbe_iov_mode mode = ixgbe_get_iov_mode(adapter); #endif dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL); dmatxctl |= IXGBE_DMATXCTL_TE; IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl); /* Disable arbiter to set MTQC */ rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS); rttdcs |= IXGBE_RTTDCS_ARBDIS; IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); #ifdef PCI_IOV IXGBE_WRITE_REG(hw, IXGBE_MTQC, ixgbe_get_mtqc(mode)); #else IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB); #endif rttdcs &= ~IXGBE_RTTDCS_ARBDIS; IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); } return; } static void ixgbe_initialize_rss_mapping(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 reta = 0, mrqc, rss_key[10]; int queue_id, table_size, index_mult; #ifdef RSS u32 rss_hash_config; #endif #ifdef PCI_IOV enum ixgbe_iov_mode mode; #endif #ifdef RSS /* Fetch the configured RSS key */ rss_getkey((uint8_t *) &rss_key); #else /* set up random bits */ arc4rand(&rss_key, sizeof(rss_key), 0); #endif /* Set multiplier for RETA setup and table size based on MAC */ index_mult = 0x1; table_size = 128; switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB: index_mult = 0x11; break; case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: table_size = 512; break; default: break; } /* Set up the redirection table */ for (int i = 0, j = 0; i < table_size; i++, j++) { if (j == adapter->num_queues) j = 0; #ifdef RSS /* * Fetch the RSS bucket id for the given indirection entry. * Cap it at the number of configured buckets (which is * num_queues.) */ queue_id = rss_get_indirection_to_bucket(i); queue_id = queue_id % adapter->num_queues; #else queue_id = (j * index_mult); #endif /* * The low 8 bits are for hash value (n+0); * The next 8 bits are for hash value (n+1), etc. */ reta = reta >> 8; reta = reta | ( ((uint32_t) queue_id) << 24); if ((i & 3) == 3) { if (i < 128) IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); else IXGBE_WRITE_REG(hw, IXGBE_ERETA((i >> 2) - 32), reta); reta = 0; } } /* Now fill our hash function seeds */ for (int i = 0; i < 10; i++) IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]); /* Perform hash on these packet types */ #ifdef RSS mrqc = IXGBE_MRQC_RSSEN; rss_hash_config = rss_gethashconfig(); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4; if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP; if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6; if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP; if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX; if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP; if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP; if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4_EX) device_printf(adapter->dev, "%s: RSS_HASHTYPE_RSS_UDP_IPV4_EX defined, " "but not supported\n", __func__); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP; if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; #else /* * Disable UDP - IP fragments aren't currently being handled * and so we end up with a mix of 2-tuple and 4-tuple * traffic. */ mrqc = IXGBE_MRQC_RSSEN | IXGBE_MRQC_RSS_FIELD_IPV4 | IXGBE_MRQC_RSS_FIELD_IPV4_TCP | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP | IXGBE_MRQC_RSS_FIELD_IPV6_EX | IXGBE_MRQC_RSS_FIELD_IPV6 | IXGBE_MRQC_RSS_FIELD_IPV6_TCP ; #endif /* RSS */ #ifdef PCI_IOV mode = ixgbe_get_iov_mode(adapter); mrqc |= ixgbe_get_mrqc(mode); #endif IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); } /********************************************************************* * * Setup receive registers and features. * **********************************************************************/ #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 #define BSIZEPKT_ROUNDUP ((1<rx_rings; struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; u32 bufsz, fctrl, srrctl, rxcsum; u32 hlreg; /* * Make sure receives are disabled while * setting up the descriptor ring */ ixgbe_disable_rx(hw); /* Enable broadcasts */ fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); fctrl |= IXGBE_FCTRL_BAM; if (adapter->hw.mac.type == ixgbe_mac_82598EB) { fctrl |= IXGBE_FCTRL_DPF; fctrl |= IXGBE_FCTRL_PMCF; } IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); /* Set for Jumbo Frames? */ hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); if (ifp->if_mtu > ETHERMTU) hlreg |= IXGBE_HLREG0_JUMBOEN; else hlreg &= ~IXGBE_HLREG0_JUMBOEN; #ifdef DEV_NETMAP /* crcstrip is conditional in netmap (in RDRXCTL too ?) */ if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) hlreg &= ~IXGBE_HLREG0_RXCRCSTRP; else hlreg |= IXGBE_HLREG0_RXCRCSTRP; #endif /* DEV_NETMAP */ IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); bufsz = (adapter->rx_mbuf_sz + BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; for (int i = 0; i < adapter->num_queues; i++, rxr++) { u64 rdba = rxr->rxdma.dma_paddr; int j = rxr->me; /* Setup the Base and Length of the Rx Descriptor Ring */ IXGBE_WRITE_REG(hw, IXGBE_RDBAL(j), (rdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_RDBAH(j), (rdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_RDLEN(j), adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc)); /* Set up the SRRCTL register */ srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(j)); srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; srrctl |= bufsz; srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; /* * Set DROP_EN iff we have no flow control and >1 queue. * Note that srrctl was cleared shortly before during reset, * so we do not need to clear the bit, but do it just in case * this code is moved elsewhere. */ if (adapter->num_queues > 1 && adapter->hw.fc.requested_mode == ixgbe_fc_none) { srrctl |= IXGBE_SRRCTL_DROP_EN; } else { srrctl &= ~IXGBE_SRRCTL_DROP_EN; } IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(j), srrctl); /* Setup the HW Rx Head and Tail Descriptor Pointers */ IXGBE_WRITE_REG(hw, IXGBE_RDH(j), 0); IXGBE_WRITE_REG(hw, IXGBE_RDT(j), 0); /* Set the driver rx tail address */ rxr->tail = IXGBE_RDT(rxr->me); } if (adapter->hw.mac.type != ixgbe_mac_82598EB) { u32 psrtype = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR; IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype); } rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); ixgbe_initialize_rss_mapping(adapter); if (adapter->num_queues > 1) { /* RSS and RX IPP Checksum are mutually exclusive */ rxcsum |= IXGBE_RXCSUM_PCSD; } if (ifp->if_capenable & IFCAP_RXCSUM) rxcsum |= IXGBE_RXCSUM_PCSD; /* This is useful for calculating UDP/IP fragment checksums */ if (!(rxcsum & IXGBE_RXCSUM_PCSD)) rxcsum |= IXGBE_RXCSUM_IPPCSE; IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); return; } /* ** This routine is run via an vlan config EVENT, ** it enables us to use the HW Filter table since ** we can get the vlan id. This just creates the ** entry in the soft version of the VFTA, init will ** repopulate the real table. */ static void ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u16 index, bit; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXGBE_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; ixgbe_setup_vlan_hw_support(adapter); IXGBE_CORE_UNLOCK(adapter); } /* ** This routine is run via an vlan ** unconfig EVENT, remove our entry ** in the soft vfta. */ static void ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u16 index, bit; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXGBE_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Re-init to load the changes */ ixgbe_setup_vlan_hw_support(adapter); IXGBE_CORE_UNLOCK(adapter); } static void ixgbe_setup_vlan_hw_support(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct ixgbe_hw *hw = &adapter->hw; struct rx_ring *rxr; u32 ctrl; /* ** We get here thru init_locked, meaning ** a soft reset, this has already cleared ** the VFTA and other state, so if there ** have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* Setup the queues for vlans */ for (int i = 0; i < adapter->num_queues; i++) { rxr = &adapter->rx_rings[i]; /* On 82599 the VLAN enable is per/queue in RXDCTL */ if (hw->mac.type != ixgbe_mac_82598EB) { ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); ctrl |= IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), ctrl); } rxr->vtag_strip = TRUE; } if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) return; /* ** A soft reset zero's out the VFTA, so ** we need to repopulate it now. */ for (int i = 0; i < IXGBE_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), adapter->shadow_vfta[i]); ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); /* Enable the Filter Table if enabled */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { ctrl &= ~IXGBE_VLNCTRL_CFIEN; ctrl |= IXGBE_VLNCTRL_VFE; } if (hw->mac.type == ixgbe_mac_82598EB) ctrl |= IXGBE_VLNCTRL_VME; IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl); } static void ixgbe_enable_intr(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ix_queue *que = adapter->queues; u32 mask, fwsm; mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); /* Enable Fan Failure detection */ if (hw->device_id == IXGBE_DEV_ID_82598AT) mask |= IXGBE_EIMS_GPI_SDP1; switch (adapter->hw.mac.type) { case ixgbe_mac_82599EB: mask |= IXGBE_EIMS_ECC; /* Temperature sensor on some adapters */ mask |= IXGBE_EIMS_GPI_SDP0; /* SFP+ (RX_LOS_N & MOD_ABS_N) */ mask |= IXGBE_EIMS_GPI_SDP1; mask |= IXGBE_EIMS_GPI_SDP2; #ifdef IXGBE_FDIR mask |= IXGBE_EIMS_FLOW_DIR; #endif #ifdef PCI_IOV mask |= IXGBE_EIMS_MAILBOX; #endif break; case ixgbe_mac_X540: /* Detect if Thermal Sensor is enabled */ fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM); if (fwsm & IXGBE_FWSM_TS_ENABLED) mask |= IXGBE_EIMS_TS; mask |= IXGBE_EIMS_ECC; #ifdef IXGBE_FDIR mask |= IXGBE_EIMS_FLOW_DIR; #endif break; case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: /* MAC thermal sensor is automatically enabled */ mask |= IXGBE_EIMS_TS; /* Some devices use SDP0 for important information */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP || hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw); mask |= IXGBE_EIMS_ECC; #ifdef IXGBE_FDIR mask |= IXGBE_EIMS_FLOW_DIR; #endif #ifdef PCI_IOV mask |= IXGBE_EIMS_MAILBOX; #endif /* falls through */ default: break; } IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); /* With MSI-X we use auto clear */ if (adapter->msix_mem) { mask = IXGBE_EIMS_ENABLE_MASK; /* Don't autoclear Link */ mask &= ~IXGBE_EIMS_OTHER; mask &= ~IXGBE_EIMS_LSC; #ifdef PCI_IOV mask &= ~IXGBE_EIMS_MAILBOX; #endif IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask); } /* ** Now enable all queues, this is done separately to ** allow for handling the extended (beyond 32) MSIX ** vectors that can be used by 82599 */ for (int i = 0; i < adapter->num_queues; i++, que++) ixgbe_enable_queue(adapter, que->msix); IXGBE_WRITE_FLUSH(hw); return; } static void ixgbe_disable_intr(struct adapter *adapter) { if (adapter->msix_mem) IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0); if (adapter->hw.mac.type == ixgbe_mac_82598EB) { IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0); } else { IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0); } IXGBE_WRITE_FLUSH(&adapter->hw); return; } /* ** Get the width and transaction speed of ** the slot this adapter is plugged into. */ static void ixgbe_get_slot_info(struct adapter *adapter) { device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; struct ixgbe_mac_info *mac = &hw->mac; u16 link; u32 offset; /* For most devices simply call the shared code routine */ if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) { ixgbe_get_bus_info(hw); /* These devices don't use PCI-E */ switch (hw->mac.type) { case ixgbe_mac_X550EM_x: return; default: goto display; } } /* ** For the Quad port adapter we need to parse back ** up the PCI tree to find the speed of the expansion ** slot into which this adapter is plugged. A bit more work. */ dev = device_get_parent(device_get_parent(dev)); #ifdef IXGBE_DEBUG device_printf(dev, "parent pcib = %x,%x,%x\n", pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); #endif dev = device_get_parent(device_get_parent(dev)); #ifdef IXGBE_DEBUG device_printf(dev, "slot pcib = %x,%x,%x\n", pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); #endif /* Now get the PCI Express Capabilities offset */ pci_find_cap(dev, PCIY_EXPRESS, &offset); /* ...and read the Link Status Register */ link = pci_read_config(dev, offset + PCIER_LINK_STA, 2); switch (link & IXGBE_PCI_LINK_WIDTH) { case IXGBE_PCI_LINK_WIDTH_1: hw->bus.width = ixgbe_bus_width_pcie_x1; break; case IXGBE_PCI_LINK_WIDTH_2: hw->bus.width = ixgbe_bus_width_pcie_x2; break; case IXGBE_PCI_LINK_WIDTH_4: hw->bus.width = ixgbe_bus_width_pcie_x4; break; case IXGBE_PCI_LINK_WIDTH_8: hw->bus.width = ixgbe_bus_width_pcie_x8; break; default: hw->bus.width = ixgbe_bus_width_unknown; break; } switch (link & IXGBE_PCI_LINK_SPEED) { case IXGBE_PCI_LINK_SPEED_2500: hw->bus.speed = ixgbe_bus_speed_2500; break; case IXGBE_PCI_LINK_SPEED_5000: hw->bus.speed = ixgbe_bus_speed_5000; break; case IXGBE_PCI_LINK_SPEED_8000: hw->bus.speed = ixgbe_bus_speed_8000; break; default: hw->bus.speed = ixgbe_bus_speed_unknown; break; } mac->ops.set_lan_id(hw); display: device_printf(dev,"PCI Express Bus: Speed %s %s\n", ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s": (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s": (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"), (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" : (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" : (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" : ("Unknown")); if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) && ((hw->bus.width <= ixgbe_bus_width_pcie_x4) && (hw->bus.speed == ixgbe_bus_speed_2500))) { device_printf(dev, "PCI-Express bandwidth available" " for this card\n is not sufficient for" " optimal performance.\n"); device_printf(dev, "For optimal performance a x8 " "PCIE, or x4 PCIE Gen2 slot is required.\n"); } if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) && ((hw->bus.width <= ixgbe_bus_width_pcie_x8) && (hw->bus.speed < ixgbe_bus_speed_8000))) { device_printf(dev, "PCI-Express bandwidth available" " for this card\n is not sufficient for" " optimal performance.\n"); device_printf(dev, "For optimal performance a x8 " "PCIE Gen3 slot is required.\n"); } return; } /* ** Setup the correct IVAR register for a particular MSIX interrupt ** (yes this is all very magic and confusing :) ** - entry is the register array entry ** - vector is the MSIX vector for this queue ** - type is RX/TX/MISC */ static void ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) { struct ixgbe_hw *hw = &adapter->hw; u32 ivar, index; vector |= IXGBE_IVAR_ALLOC_VAL; switch (hw->mac.type) { case ixgbe_mac_82598EB: if (type == -1) entry = IXGBE_IVAR_OTHER_CAUSES_INDEX; else entry += (type * 64); index = (entry >> 2) & 0x1F; ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index)); ivar &= ~(0xFF << (8 * (entry & 0x3))); ivar |= (vector << (8 * (entry & 0x3))); IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: if (type == -1) { /* MISC IVAR */ index = (entry & 1) * 8; ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC); ivar &= ~(0xFF << index); ivar |= (vector << index); IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar); } else { /* RX/TX IVARS */ index = (16 * (entry & 1)) + (8 * type); ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1)); ivar &= ~(0xFF << index); ivar |= (vector << index); IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar); } default: break; } } static void ixgbe_configure_ivars(struct adapter *adapter) { struct ix_queue *que = adapter->queues; u32 newitr; if (ixgbe_max_interrupt_rate > 0) newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8; else { /* ** Disable DMA coalescing if interrupt moderation is ** disabled. */ adapter->dmac = 0; newitr = 0; } for (int i = 0; i < adapter->num_queues; i++, que++) { struct rx_ring *rxr = &adapter->rx_rings[i]; struct tx_ring *txr = &adapter->tx_rings[i]; /* First the RX queue entry */ ixgbe_set_ivar(adapter, rxr->me, que->msix, 0); /* ... and the TX */ ixgbe_set_ivar(adapter, txr->me, que->msix, 1); /* Set an Initial EITR value */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix), newitr); } /* For the Link interrupt */ ixgbe_set_ivar(adapter, 1, adapter->vector, -1); } /* ** ixgbe_sfp_probe - called in the local timer to ** determine if a port had optics inserted. */ static bool ixgbe_sfp_probe(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; device_t dev = adapter->dev; bool result = FALSE; if ((hw->phy.type == ixgbe_phy_nl) && (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) { s32 ret = hw->phy.ops.identify_sfp(hw); if (ret) goto out; ret = hw->phy.ops.reset(hw); if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module detected!"); device_printf(dev, "Reload driver with supported module.\n"); adapter->sfp_probe = FALSE; goto out; } else device_printf(dev, "SFP+ module detected!\n"); /* We now have supported optics */ adapter->sfp_probe = FALSE; /* Set the optics type so system reports correctly */ ixgbe_setup_optics(adapter); result = TRUE; } out: return (result); } /* ** Tasklet handler for MSIX Link interrupts ** - do outside interrupt since it might sleep */ static void ixgbe_handle_link(void *context, int pending) { struct adapter *adapter = context; struct ixgbe_hw *hw = &adapter->hw; ixgbe_check_link(hw, &adapter->link_speed, &adapter->link_up, 0); ixgbe_update_link_status(adapter); /* Re-enable link interrupts */ IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_LSC); } /* ** Tasklet for handling SFP module interrupts */ static void ixgbe_handle_mod(void *context, int pending) { struct adapter *adapter = context; struct ixgbe_hw *hw = &adapter->hw; enum ixgbe_phy_type orig_type = hw->phy.type; device_t dev = adapter->dev; u32 err; IXGBE_CORE_LOCK(adapter); /* Check to see if the PHY type changed */ if (hw->phy.ops.identify) { hw->phy.type = ixgbe_phy_unknown; hw->phy.ops.identify(hw); } if (hw->phy.type != orig_type) { device_printf(dev, "Detected phy_type %d\n", hw->phy.type); if (hw->phy.type == ixgbe_phy_none) { hw->phy.sfp_type = ixgbe_sfp_type_unknown; goto out; } /* Try to do the initialization that was skipped before */ if (hw->phy.ops.init) hw->phy.ops.init(hw); if (hw->phy.ops.reset) hw->phy.ops.reset(hw); } err = hw->phy.ops.identify_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module type was detected.\n"); goto out; } err = hw->mac.ops.setup_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Setup failure - unsupported SFP+ module type.\n"); goto out; } if (hw->phy.multispeed_fiber) taskqueue_enqueue(adapter->tq, &adapter->msf_task); out: /* Update media type */ switch (hw->mac.ops.get_media_type(hw)) { case ixgbe_media_type_fiber: adapter->optics = IFM_10G_SR; break; case ixgbe_media_type_copper: adapter->optics = IFM_10G_TWINAX; break; case ixgbe_media_type_cx4: adapter->optics = IFM_10G_CX4; break; default: adapter->optics = 0; break; } IXGBE_CORE_UNLOCK(adapter); return; } /* ** Tasklet for handling MSF (multispeed fiber) interrupts */ static void ixgbe_handle_msf(void *context, int pending) { struct adapter *adapter = context; struct ixgbe_hw *hw = &adapter->hw; u32 autoneg; bool negotiate; IXGBE_CORE_LOCK(adapter); /* get_supported_phy_layer will call hw->phy.ops.identify_sfp() */ adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); autoneg = hw->phy.autoneg_advertised; if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); if (hw->mac.ops.setup_link) hw->mac.ops.setup_link(hw, autoneg, TRUE); /* Adjust media types shown in ifconfig */ ifmedia_removeall(&adapter->media); ixgbe_add_media_types(adapter); IXGBE_CORE_UNLOCK(adapter); return; } /* ** Tasklet for handling interrupts from an external PHY */ static void ixgbe_handle_phy(void *context, int pending) { struct adapter *adapter = context; struct ixgbe_hw *hw = &adapter->hw; int error; error = hw->phy.ops.handle_lasi(hw); if (error == IXGBE_ERR_OVERTEMP) device_printf(adapter->dev, "CRITICAL: EXTERNAL PHY OVER TEMP!! " " PHY will downshift to lower power state!\n"); else if (error) device_printf(adapter->dev, "Error handling LASI interrupt: %d\n", error); return; } #ifdef IXGBE_FDIR /* ** Tasklet for reinitializing the Flow Director filter table */ static void ixgbe_reinit_fdir(void *context, int pending) { struct adapter *adapter = context; struct ifnet *ifp = adapter->ifp; if (adapter->fdir_reinit != 1) /* Shouldn't happen */ return; ixgbe_reinit_fdir_tables_82599(&adapter->hw); adapter->fdir_reinit = 0; /* re-enable flow director interrupts */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR); /* Restart the interface */ ifp->if_drv_flags |= IFF_DRV_RUNNING; return; } #endif /********************************************************************* * * Configure DMA Coalescing * **********************************************************************/ static void ixgbe_config_dmac(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config; if (hw->mac.type < ixgbe_mac_X550 || !hw->mac.ops.dmac_config) return; if (dcfg->watchdog_timer ^ adapter->dmac || dcfg->link_speed ^ adapter->link_speed) { dcfg->watchdog_timer = adapter->dmac; dcfg->fcoe_en = false; dcfg->link_speed = adapter->link_speed; dcfg->num_tcs = 1; INIT_DEBUGOUT2("dmac settings: watchdog %d, link speed %d\n", dcfg->watchdog_timer, dcfg->link_speed); hw->mac.ops.dmac_config(hw); } } /* * Checks whether the adapter's ports are capable of * Wake On LAN by reading the adapter's NVM. * * Sets each port's hw->wol_enabled value depending * on the value read here. */ static void ixgbe_check_wol_support(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u16 dev_caps = 0; /* Find out WoL support for port */ adapter->wol_support = hw->wol_enabled = 0; ixgbe_get_device_caps(hw, &dev_caps); if ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0_1) || ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0) && hw->bus.func == 0)) adapter->wol_support = hw->wol_enabled = 1; /* Save initial wake up filter configuration */ adapter->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC); return; } /* * Prepare the adapter/port for LPLU and/or WoL */ static int ixgbe_setup_low_power_mode(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; device_t dev = adapter->dev; s32 error = 0; mtx_assert(&adapter->core_mtx, MA_OWNED); if (!hw->wol_enabled) ixgbe_set_phy_power(hw, FALSE); /* Limit power management flow to X550EM baseT */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T && hw->phy.ops.enter_lplu) { /* Turn off support for APM wakeup. (Using ACPI instead) */ IXGBE_WRITE_REG(hw, IXGBE_GRC, IXGBE_READ_REG(hw, IXGBE_GRC) & ~(u32)2); /* * Clear Wake Up Status register to prevent any previous wakeup * events from waking us up immediately after we suspend. */ IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff); /* * Program the Wakeup Filter Control register with user filter * settings */ IXGBE_WRITE_REG(hw, IXGBE_WUFC, adapter->wufc); /* Enable wakeups and power management in Wakeup Control */ IXGBE_WRITE_REG(hw, IXGBE_WUC, IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN); /* X550EM baseT adapters need a special LPLU flow */ hw->phy.reset_disable = true; ixgbe_stop(adapter); error = hw->phy.ops.enter_lplu(hw); if (error) device_printf(dev, "Error entering LPLU: %d\n", error); hw->phy.reset_disable = false; } else { /* Just stop for other adapters */ ixgbe_stop(adapter); } return error; } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void ixgbe_update_stats_counters(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 missed_rx = 0, bprc, lxon, lxoff, total; u64 total_missed_rx = 0; adapter->stats.pf.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS); adapter->stats.pf.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC); adapter->stats.pf.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC); adapter->stats.pf.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC); for (int i = 0; i < 16; i++) { adapter->stats.pf.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); adapter->stats.pf.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); adapter->stats.pf.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); } adapter->stats.pf.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC); adapter->stats.pf.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC); adapter->stats.pf.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC); /* Hardware workaround, gprc counts missed packets */ adapter->stats.pf.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC); adapter->stats.pf.gprc -= missed_rx; if (hw->mac.type != ixgbe_mac_82598EB) { adapter->stats.pf.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) + ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32); adapter->stats.pf.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) + ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32); adapter->stats.pf.tor += IXGBE_READ_REG(hw, IXGBE_TORL) + ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32); adapter->stats.pf.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); adapter->stats.pf.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); } else { adapter->stats.pf.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC); adapter->stats.pf.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); /* 82598 only has a counter in the high register */ adapter->stats.pf.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH); adapter->stats.pf.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH); adapter->stats.pf.tor += IXGBE_READ_REG(hw, IXGBE_TORH); } /* * Workaround: mprc hardware is incorrectly counting * broadcasts, so for now we subtract those. */ bprc = IXGBE_READ_REG(hw, IXGBE_BPRC); adapter->stats.pf.bprc += bprc; adapter->stats.pf.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC); if (hw->mac.type == ixgbe_mac_82598EB) adapter->stats.pf.mprc -= bprc; adapter->stats.pf.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64); adapter->stats.pf.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127); adapter->stats.pf.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255); adapter->stats.pf.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511); adapter->stats.pf.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023); adapter->stats.pf.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522); lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC); adapter->stats.pf.lxontxc += lxon; lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); adapter->stats.pf.lxofftxc += lxoff; total = lxon + lxoff; adapter->stats.pf.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC); adapter->stats.pf.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC); adapter->stats.pf.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64); adapter->stats.pf.gptc -= total; adapter->stats.pf.mptc -= total; adapter->stats.pf.ptc64 -= total; adapter->stats.pf.gotc -= total * ETHER_MIN_LEN; adapter->stats.pf.ruc += IXGBE_READ_REG(hw, IXGBE_RUC); adapter->stats.pf.rfc += IXGBE_READ_REG(hw, IXGBE_RFC); adapter->stats.pf.roc += IXGBE_READ_REG(hw, IXGBE_ROC); adapter->stats.pf.rjc += IXGBE_READ_REG(hw, IXGBE_RJC); adapter->stats.pf.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC); adapter->stats.pf.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC); adapter->stats.pf.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC); adapter->stats.pf.tpr += IXGBE_READ_REG(hw, IXGBE_TPR); adapter->stats.pf.tpt += IXGBE_READ_REG(hw, IXGBE_TPT); adapter->stats.pf.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127); adapter->stats.pf.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255); adapter->stats.pf.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511); adapter->stats.pf.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023); adapter->stats.pf.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522); adapter->stats.pf.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC); adapter->stats.pf.xec += IXGBE_READ_REG(hw, IXGBE_XEC); adapter->stats.pf.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC); adapter->stats.pf.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST); /* Only read FCOE on 82599 */ if (hw->mac.type != ixgbe_mac_82598EB) { adapter->stats.pf.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC); adapter->stats.pf.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC); adapter->stats.pf.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC); adapter->stats.pf.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC); adapter->stats.pf.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC); } /* Fill out the OS statistics structure */ IXGBE_SET_IPACKETS(adapter, adapter->stats.pf.gprc); IXGBE_SET_OPACKETS(adapter, adapter->stats.pf.gptc); IXGBE_SET_IBYTES(adapter, adapter->stats.pf.gorc); IXGBE_SET_OBYTES(adapter, adapter->stats.pf.gotc); IXGBE_SET_IMCASTS(adapter, adapter->stats.pf.mprc); IXGBE_SET_OMCASTS(adapter, adapter->stats.pf.mptc); IXGBE_SET_COLLISIONS(adapter, 0); IXGBE_SET_IQDROPS(adapter, total_missed_rx); IXGBE_SET_IERRORS(adapter, adapter->stats.pf.crcerrs + adapter->stats.pf.rlec); } #if __FreeBSD_version >= 1100036 static uint64_t ixgbe_get_counter(struct ifnet *ifp, ift_counter cnt) { struct adapter *adapter; struct tx_ring *txr; uint64_t rv; adapter = if_getsoftc(ifp); switch (cnt) { case IFCOUNTER_IPACKETS: return (adapter->ipackets); case IFCOUNTER_OPACKETS: return (adapter->opackets); case IFCOUNTER_IBYTES: return (adapter->ibytes); case IFCOUNTER_OBYTES: return (adapter->obytes); case IFCOUNTER_IMCASTS: return (adapter->imcasts); case IFCOUNTER_OMCASTS: return (adapter->omcasts); case IFCOUNTER_COLLISIONS: return (0); case IFCOUNTER_IQDROPS: return (adapter->iqdrops); case IFCOUNTER_OQDROPS: rv = 0; txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) rv += txr->br->br_drops; return (rv); case IFCOUNTER_IERRORS: return (adapter->ierrors); default: return (if_get_counter_default(ifp, cnt)); } } #endif /** ixgbe_sysctl_tdh_handler - Handler function * Retrieves the TDH value from the hardware */ static int ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS) { int error; struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); if (!txr) return 0; unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return 0; } /** ixgbe_sysctl_tdt_handler - Handler function * Retrieves the TDT value from the hardware */ static int ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS) { int error; struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); if (!txr) return 0; unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return 0; } /** ixgbe_sysctl_rdh_handler - Handler function * Retrieves the RDH value from the hardware */ static int ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS) { int error; struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); if (!rxr) return 0; unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return 0; } /** ixgbe_sysctl_rdt_handler - Handler function * Retrieves the RDT value from the hardware */ static int ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS) { int error; struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); if (!rxr) return 0; unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return 0; } static int ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) { int error; struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1); unsigned int reg, usec, rate; reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix)); usec = ((reg & 0x0FF8) >> 3); if (usec > 0) rate = 500000 / usec; else rate = 0; error = sysctl_handle_int(oidp, &rate, 0, req); if (error || !req->newptr) return error; reg &= ~0xfff; /* default, no limitation */ ixgbe_max_interrupt_rate = 0; if (rate > 0 && rate < 500000) { if (rate < 1000) rate = 1000; ixgbe_max_interrupt_rate = rate; reg |= ((4000000/rate) & 0xff8 ); } IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg); return 0; } static void ixgbe_add_device_sysctls(struct adapter *adapter) { device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); /* Sysctls for all devices */ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_flowcntl, "I", IXGBE_SYSCTL_DESC_SET_FC); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "enable_aim", CTLFLAG_RW, &ixgbe_enable_aim, 1, "Interrupt Moderation"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_advertise, "I", IXGBE_SYSCTL_DESC_ADV_SPEED); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "thermal_test", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_thermal_test, "I", "Thermal Test"); #ifdef IXGBE_DEBUG /* testing sysctls (for all devices) */ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "power_state", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_power_state, "I", "PCI Power State"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "print_rss_config", CTLTYPE_STRING | CTLFLAG_RD, adapter, 0, ixgbe_sysctl_print_rss_config, "A", "Prints RSS Configuration"); #endif /* for X550 series devices */ if (hw->mac.type >= ixgbe_mac_X550) SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "dmac", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_dmac, "I", "DMA Coalesce"); /* for X552 backplane devices */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_KR) { struct sysctl_oid *eee_node; struct sysctl_oid_list *eee_list; eee_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eee", CTLFLAG_RD, NULL, "Energy Efficient Ethernet sysctls"); eee_list = SYSCTL_CHILDREN(eee_node); SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "enable", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_eee_enable, "I", "Enable or Disable EEE"); SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "negotiated", CTLTYPE_INT | CTLFLAG_RD, adapter, 0, ixgbe_sysctl_eee_negotiated, "I", "EEE negotiated on link"); SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "tx_lpi_status", CTLTYPE_INT | CTLFLAG_RD, adapter, 0, ixgbe_sysctl_eee_tx_lpi_status, "I", "Whether or not TX link is in LPI state"); SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "rx_lpi_status", CTLTYPE_INT | CTLFLAG_RD, adapter, 0, ixgbe_sysctl_eee_rx_lpi_status, "I", "Whether or not RX link is in LPI state"); SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "tx_lpi_delay", CTLTYPE_INT | CTLFLAG_RD, adapter, 0, ixgbe_sysctl_eee_tx_lpi_delay, "I", "TX LPI entry delay in microseconds"); } /* for WoL-capable devices */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "wol_enable", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_wol_enable, "I", "Enable/Disable Wake on LAN"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "wufc", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_wufc, "I", "Enable/Disable Wake Up Filters"); } /* for X552/X557-AT devices */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { struct sysctl_oid *phy_node; struct sysctl_oid_list *phy_list; phy_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "phy", CTLFLAG_RD, NULL, "External PHY sysctls"); phy_list = SYSCTL_CHILDREN(phy_node); SYSCTL_ADD_PROC(ctx, phy_list, OID_AUTO, "temp", CTLTYPE_INT | CTLFLAG_RD, adapter, 0, ixgbe_sysctl_phy_temp, "I", "Current External PHY Temperature (Celsius)"); SYSCTL_ADD_PROC(ctx, phy_list, OID_AUTO, "overtemp_occurred", CTLTYPE_INT | CTLFLAG_RD, adapter, 0, ixgbe_sysctl_phy_overtemp_occurred, "I", "External PHY High Temperature Event Occurred"); } } /* * Add sysctl variables, one per statistic, to the system. */ static void ixgbe_add_hw_stats(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct ixgbe_hw_stats *stats = &adapter->stats.pf; struct sysctl_oid *stat_node, *queue_node; struct sysctl_oid_list *stat_list, *queue_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &adapter->mbuf_defrag_failed, "m_defrag() failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSIX IRQ Handled"); for (int i = 0; i < adapter->num_queues; i++, txr++) { snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i], sizeof(&adapter->queues[i]), ixgbe_sysctl_interrupt_rate_handler, "IU", "Interrupt Rate"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", CTLFLAG_RD, &(adapter->queues[i].irqs), "irqs on this queue"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr), ixgbe_sysctl_tdh_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr), ixgbe_sysctl_tdt_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx", CTLFLAG_RD, &txr->tso_tx, "TSO"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_tx_dma_setup", CTLFLAG_RD, &txr->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txr->no_desc_avail, "Queue No Descriptor Available"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &txr->total_packets, "Queue Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "br_drops", CTLFLAG_RD, &txr->br->br_drops, "Packets dropped in buf_ring"); } for (int i = 0; i < adapter->num_queues; i++, rxr++) { snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); struct lro_ctrl *lro = &rxr->lro; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr), ixgbe_sysctl_rdh_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr), ixgbe_sysctl_rdt_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &rxr->rx_packets, "Queue Packets Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &rxr->rx_bytes, "Queue Bytes Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies", CTLFLAG_RD, &rxr->rx_copies, "Copied RX Frames"); SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued", CTLFLAG_RD, &lro->lro_queued, 0, "LRO Queued"); SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed", CTLFLAG_RD, &lro->lro_flushed, 0, "LRO Flushed"); } /* MAC stats get the own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "MAC Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &stats->crcerrs, "CRC Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs", CTLFLAG_RD, &stats->illerrc, "Illegal Byte Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs", CTLFLAG_RD, &stats->errbc, "Byte Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards", CTLFLAG_RD, &stats->mspdc, "MAC Short Packets Discarded"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults", CTLFLAG_RD, &stats->mlfc, "MAC Local Faults"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults", CTLFLAG_RD, &stats->mrfc, "MAC Remote Faults"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs", CTLFLAG_RD, &stats->rlec, "Receive Length Errors"); /* Flow Control stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &stats->lxontxc, "Link XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &stats->lxonrxc, "Link XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &stats->lxofftxc, "Link XOFF Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &stats->lxoffrxc, "Link XOFF Received"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd", CTLFLAG_RD, &stats->tor, "Total Octets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd", CTLFLAG_RD, &stats->gorc, "Good Octets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd", CTLFLAG_RD, &stats->tpr, "Total Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd", CTLFLAG_RD, &stats->gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd", CTLFLAG_RD, &stats->mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd", CTLFLAG_RD, &stats->bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &stats->prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &stats->prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &stats->prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &stats->prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &stats->prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &stats->prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized", CTLFLAG_RD, &stats->ruc, "Receive Undersized"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &stats->rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized", CTLFLAG_RD, &stats->roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd", CTLFLAG_RD, &stats->rjc, "Received Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd", CTLFLAG_RD, &stats->mngprc, "Management Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd", CTLFLAG_RD, &stats->mngptc, "Management Packets Dropped"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs", CTLFLAG_RD, &stats->xec, "Checksum Errors"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &stats->tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &stats->bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &stats->mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd", CTLFLAG_RD, &stats->mngptc, "Management Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &stats->ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &stats->ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &stats->ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &stats->ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &stats->ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &stats->ptc1522, "1024-1522 byte frames transmitted"); } static void ixgbe_set_sysctl_value(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLFLAG_RW, limit, value, description); } /* ** Set flow control using sysctl: ** Flow control values: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ static int ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS) { int error, fc; struct adapter *adapter; adapter = (struct adapter *) arg1; fc = adapter->fc; error = sysctl_handle_int(oidp, &fc, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Don't bother if it's not changed */ if (adapter->fc == fc) return (0); return ixgbe_set_flowcntl(adapter, fc); } static int ixgbe_set_flowcntl(struct adapter *adapter, int fc) { switch (fc) { case ixgbe_fc_rx_pause: case ixgbe_fc_tx_pause: case ixgbe_fc_full: adapter->hw.fc.requested_mode = adapter->fc; if (adapter->num_queues > 1) ixgbe_disable_rx_drop(adapter); break; case ixgbe_fc_none: adapter->hw.fc.requested_mode = ixgbe_fc_none; if (adapter->num_queues > 1) ixgbe_enable_rx_drop(adapter); break; default: return (EINVAL); } adapter->fc = fc; /* Don't autoneg if forcing a value */ adapter->hw.fc.disable_fc_autoneg = TRUE; ixgbe_fc_enable(&adapter->hw); return (0); } /* ** Control advertised link speed: ** Flags: ** 0x1 - advertise 100 Mb ** 0x2 - advertise 1G ** 0x4 - advertise 10G */ static int ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS) { int error, advertise; struct adapter *adapter; adapter = (struct adapter *) arg1; advertise = adapter->advertise; error = sysctl_handle_int(oidp, &advertise, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixgbe_set_advertise(adapter, advertise); } static int ixgbe_set_advertise(struct adapter *adapter, int advertise) { device_t dev; struct ixgbe_hw *hw; ixgbe_link_speed speed; /* Checks to validate new value */ if (adapter->advertise == advertise) /* no change */ return (0); hw = &adapter->hw; dev = adapter->dev; /* No speed changes for backplane media */ if (hw->phy.media_type == ixgbe_media_type_backplane) return (ENODEV); if (!((hw->phy.media_type == ixgbe_media_type_copper) || (hw->phy.multispeed_fiber))) { device_printf(dev, "Advertised speed can only be set on copper or " "multispeed fiber media types.\n"); return (EINVAL); } if (advertise < 0x1 || advertise > 0x7) { device_printf(dev, "Invalid advertised speed; valid modes are 0x1 through 0x7\n"); return (EINVAL); } if ((advertise & 0x1) && (hw->mac.type != ixgbe_mac_X540) && (hw->mac.type != ixgbe_mac_X550)) { device_printf(dev, "Set Advertise: 100Mb on X540/X550 only\n"); return (EINVAL); } /* Set new value and report new advertised mode */ speed = 0; if (advertise & 0x1) speed |= IXGBE_LINK_SPEED_100_FULL; if (advertise & 0x2) speed |= IXGBE_LINK_SPEED_1GB_FULL; if (advertise & 0x4) speed |= IXGBE_LINK_SPEED_10GB_FULL; adapter->advertise = advertise; hw->mac.autotry_restart = TRUE; hw->mac.ops.setup_link(hw, speed, TRUE); return (0); } /* * The following two sysctls are for X552/X557-AT devices; * they deal with the external PHY used in them. */ static int ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; struct ixgbe_hw *hw = &adapter->hw; u16 reg; if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { device_printf(adapter->dev, "Device has no supported external thermal sensor.\n"); return (ENODEV); } if (hw->phy.ops.read_reg(hw, IXGBE_PHY_CURRENT_TEMP, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®)) { device_printf(adapter->dev, "Error reading from PHY's current temperature register\n"); return (EAGAIN); } /* Shift temp for output */ reg = reg >> 8; return (sysctl_handle_int(oidp, NULL, reg, req)); } /* * Reports whether the current PHY temperature is over * the overtemp threshold. * - This is reported directly from the PHY */ static int ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; struct ixgbe_hw *hw = &adapter->hw; u16 reg; if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { device_printf(adapter->dev, "Device has no supported external thermal sensor.\n"); return (ENODEV); } if (hw->phy.ops.read_reg(hw, IXGBE_PHY_OVERTEMP_STATUS, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®)) { device_printf(adapter->dev, "Error reading from PHY's temperature status register\n"); return (EAGAIN); } /* Get occurrence bit */ reg = !!(reg & 0x4000); return (sysctl_handle_int(oidp, 0, reg, req)); } /* ** Thermal Shutdown Trigger (internal MAC) ** - Set this to 1 to cause an overtemp event to occur */ static int ixgbe_sysctl_thermal_test(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; struct ixgbe_hw *hw = &adapter->hw; int error, fire = 0; error = sysctl_handle_int(oidp, &fire, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (fire) { u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS); reg |= IXGBE_EICR_TS; IXGBE_WRITE_REG(hw, IXGBE_EICS, reg); } return (0); } /* ** Manage DMA Coalescing. ** Control values: ** 0/1 - off / on (use default value of 1000) ** ** Legal timer values are: ** 50,100,250,500,1000,2000,5000,10000 ** ** Turning off interrupt moderation will also turn this off. */ static int ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; struct ifnet *ifp = adapter->ifp; int error; u32 newval; newval = adapter->dmac; error = sysctl_handle_int(oidp, &newval, 0, req); if ((error) || (req->newptr == NULL)) return (error); switch (newval) { case 0: /* Disabled */ adapter->dmac = 0; break; case 1: /* Enable and use default */ adapter->dmac = 1000; break; case 50: case 100: case 250: case 500: case 1000: case 2000: case 5000: case 10000: /* Legal values - allow */ adapter->dmac = newval; break; default: /* Do nothing, illegal value */ return (EINVAL); } /* Re-initialize hardware if it's already running */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixgbe_init(adapter); return (0); } #ifdef IXGBE_DEBUG /** * Sysctl to test power states * Values: * 0 - set device to D0 * 3 - set device to D3 * (none) - get current device power state */ static int ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; device_t dev = adapter->dev; int curr_ps, new_ps, error = 0; curr_ps = new_ps = pci_get_powerstate(dev); error = sysctl_handle_int(oidp, &new_ps, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (new_ps == curr_ps) return (0); if (new_ps == 3 && curr_ps == 0) error = DEVICE_SUSPEND(dev); else if (new_ps == 0 && curr_ps == 3) error = DEVICE_RESUME(dev); else return (EINVAL); device_printf(dev, "New state: %d\n", pci_get_powerstate(dev)); return (error); } #endif /* * Sysctl to enable/disable the WoL capability, if supported by the adapter. * Values: * 0 - disabled * 1 - enabled */ static int ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; struct ixgbe_hw *hw = &adapter->hw; int new_wol_enabled; int error = 0; new_wol_enabled = hw->wol_enabled; error = sysctl_handle_int(oidp, &new_wol_enabled, 0, req); if ((error) || (req->newptr == NULL)) return (error); new_wol_enabled = !!(new_wol_enabled); if (new_wol_enabled == hw->wol_enabled) return (0); if (new_wol_enabled > 0 && !adapter->wol_support) return (ENODEV); else hw->wol_enabled = new_wol_enabled; return (0); } /* * Sysctl to enable/disable the Energy Efficient Ethernet capability, * if supported by the adapter. * Values: * 0 - disabled * 1 - enabled */ static int ixgbe_sysctl_eee_enable(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; int new_eee_enabled, error = 0; new_eee_enabled = adapter->eee_enabled; error = sysctl_handle_int(oidp, &new_eee_enabled, 0, req); if ((error) || (req->newptr == NULL)) return (error); new_eee_enabled = !!(new_eee_enabled); if (new_eee_enabled == adapter->eee_enabled) return (0); if (new_eee_enabled > 0 && !hw->mac.ops.setup_eee) return (ENODEV); else adapter->eee_enabled = new_eee_enabled; /* Re-initialize hardware if it's already running */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixgbe_init(adapter); return (0); } /* * Read-only sysctl indicating whether EEE support was negotiated * on the link. */ static int ixgbe_sysctl_eee_negotiated(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; struct ixgbe_hw *hw = &adapter->hw; bool status; status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & IXGBE_EEE_STAT_NEG); return (sysctl_handle_int(oidp, 0, status, req)); } /* * Read-only sysctl indicating whether RX Link is in LPI state. */ static int ixgbe_sysctl_eee_rx_lpi_status(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; struct ixgbe_hw *hw = &adapter->hw; bool status; status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & IXGBE_EEE_RX_LPI_STATUS); return (sysctl_handle_int(oidp, 0, status, req)); } /* * Read-only sysctl indicating whether TX Link is in LPI state. */ static int ixgbe_sysctl_eee_tx_lpi_status(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; struct ixgbe_hw *hw = &adapter->hw; bool status; status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & IXGBE_EEE_TX_LPI_STATUS); return (sysctl_handle_int(oidp, 0, status, req)); } /* * Read-only sysctl indicating TX Link LPI delay */ static int ixgbe_sysctl_eee_tx_lpi_delay(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; struct ixgbe_hw *hw = &adapter->hw; u32 reg; reg = IXGBE_READ_REG(hw, IXGBE_EEE_SU); return (sysctl_handle_int(oidp, 0, reg >> 26, req)); } /* * Sysctl to enable/disable the types of packets that the * adapter will wake up on upon receipt. * WUFC - Wake Up Filter Control * Flags: * 0x1 - Link Status Change * 0x2 - Magic Packet * 0x4 - Direct Exact * 0x8 - Directed Multicast * 0x10 - Broadcast * 0x20 - ARP/IPv4 Request Packet * 0x40 - Direct IPv4 Packet * 0x80 - Direct IPv6 Packet * * Setting another flag will cause the sysctl to return an * error. */ static int ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error = 0; u32 new_wufc; new_wufc = adapter->wufc; error = sysctl_handle_int(oidp, &new_wufc, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (new_wufc == adapter->wufc) return (0); if (new_wufc & 0xffffff00) return (EINVAL); else { new_wufc &= 0xff; new_wufc |= (0xffffff & adapter->wufc); adapter->wufc = new_wufc; } return (0); } #ifdef IXGBE_DEBUG static int ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct ixgbe_hw *hw = &adapter->hw; device_t dev = adapter->dev; int error = 0, reta_size; struct sbuf *buf; u32 reg; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } // TODO: use sbufs to make a string to print out /* Set multiplier for RETA setup and table size based on MAC */ switch (adapter->hw.mac.type) { case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: reta_size = 128; break; default: reta_size = 32; break; } /* Print out the redirection table */ sbuf_cat(buf, "\n"); for (int i = 0; i < reta_size; i++) { if (i < 32) { reg = IXGBE_READ_REG(hw, IXGBE_RETA(i)); sbuf_printf(buf, "RETA(%2d): 0x%08x\n", i, reg); } else { reg = IXGBE_READ_REG(hw, IXGBE_ERETA(i - 32)); sbuf_printf(buf, "ERETA(%2d): 0x%08x\n", i - 32, reg); } } // TODO: print more config error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (0); } #endif /* IXGBE_DEBUG */ /* ** Enable the hardware to drop packets when the buffer is ** full. This is useful when multiqueue,so that no single ** queue being full stalls the entire RX engine. We only ** enable this when Multiqueue AND when Flow Control is ** disabled. */ static void ixgbe_enable_rx_drop(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; for (int i = 0; i < adapter->num_queues; i++) { struct rx_ring *rxr = &adapter->rx_rings[i]; u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); srrctl |= IXGBE_SRRCTL_DROP_EN; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); } #ifdef PCI_IOV /* enable drop for each vf */ for (int i = 0; i < adapter->num_vfs; i++) { IXGBE_WRITE_REG(hw, IXGBE_QDE, (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT) | IXGBE_QDE_ENABLE)); } #endif } static void ixgbe_disable_rx_drop(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; for (int i = 0; i < adapter->num_queues; i++) { struct rx_ring *rxr = &adapter->rx_rings[i]; u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); srrctl &= ~IXGBE_SRRCTL_DROP_EN; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); } #ifdef PCI_IOV /* disable drop for each vf */ for (int i = 0; i < adapter->num_vfs; i++) { IXGBE_WRITE_REG(hw, IXGBE_QDE, (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT))); } #endif } static void ixgbe_rearm_queues(struct adapter *adapter, u64 queues) { u32 mask; switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB: mask = (IXGBE_EIMS_RTX_QUEUE & queues); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: mask = (queues & 0xFFFFFFFF); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask); mask = (queues >> 32); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask); break; default: break; } } #ifdef PCI_IOV /* ** Support functions for SRIOV/VF management */ static void ixgbe_ping_all_vfs(struct adapter *adapter) { struct ixgbe_vf *vf; for (int i = 0; i < adapter->num_vfs; i++) { vf = &adapter->vfs[i]; if (vf->flags & IXGBE_VF_ACTIVE) ixgbe_send_vf_msg(adapter, vf, IXGBE_PF_CONTROL_MSG); } } static void ixgbe_vf_set_default_vlan(struct adapter *adapter, struct ixgbe_vf *vf, uint16_t tag) { struct ixgbe_hw *hw; uint32_t vmolr, vmvir; hw = &adapter->hw; vf->vlan_tag = tag; vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf->pool)); /* Do not receive packets that pass inexact filters. */ vmolr &= ~(IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_ROPE); /* Disable Multicast Promicuous Mode. */ vmolr &= ~IXGBE_VMOLR_MPE; /* Accept broadcasts. */ vmolr |= IXGBE_VMOLR_BAM; if (tag == 0) { /* Accept non-vlan tagged traffic. */ //vmolr |= IXGBE_VMOLR_AUPE; /* Allow VM to tag outgoing traffic; no default tag. */ vmvir = 0; } else { /* Require vlan-tagged traffic. */ vmolr &= ~IXGBE_VMOLR_AUPE; /* Tag all traffic with provided vlan tag. */ vmvir = (tag | IXGBE_VMVIR_VLANA_DEFAULT); } IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf->pool), vmolr); IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf->pool), vmvir); } static boolean_t ixgbe_vf_frame_size_compatible(struct adapter *adapter, struct ixgbe_vf *vf) { /* * Frame size compatibility between PF and VF is only a problem on * 82599-based cards. X540 and later support any combination of jumbo * frames on PFs and VFs. */ if (adapter->hw.mac.type != ixgbe_mac_82599EB) return (TRUE); switch (vf->api_ver) { case IXGBE_API_VER_1_0: case IXGBE_API_VER_UNKNOWN: /* * On legacy (1.0 and older) VF versions, we don't support jumbo * frames on either the PF or the VF. */ if (adapter->max_frame_size > ETHER_MAX_LEN || vf->max_frame_size > ETHER_MAX_LEN) return (FALSE); return (TRUE); break; case IXGBE_API_VER_1_1: default: /* * 1.1 or later VF versions always work if they aren't using * jumbo frames. */ if (vf->max_frame_size <= ETHER_MAX_LEN) return (TRUE); /* * Jumbo frames only work with VFs if the PF is also using jumbo * frames. */ if (adapter->max_frame_size <= ETHER_MAX_LEN) return (TRUE); return (FALSE); } } static void ixgbe_process_vf_reset(struct adapter *adapter, struct ixgbe_vf *vf) { ixgbe_vf_set_default_vlan(adapter, vf, vf->default_vlan); // XXX clear multicast addresses ixgbe_clear_rar(&adapter->hw, vf->rar_index); vf->api_ver = IXGBE_API_VER_UNKNOWN; } static void ixgbe_vf_enable_transmit(struct adapter *adapter, struct ixgbe_vf *vf) { struct ixgbe_hw *hw; uint32_t vf_index, vfte; hw = &adapter->hw; vf_index = IXGBE_VF_INDEX(vf->pool); vfte = IXGBE_READ_REG(hw, IXGBE_VFTE(vf_index)); vfte |= IXGBE_VF_BIT(vf->pool); IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_index), vfte); } static void ixgbe_vf_enable_receive(struct adapter *adapter, struct ixgbe_vf *vf) { struct ixgbe_hw *hw; uint32_t vf_index, vfre; hw = &adapter->hw; vf_index = IXGBE_VF_INDEX(vf->pool); vfre = IXGBE_READ_REG(hw, IXGBE_VFRE(vf_index)); if (ixgbe_vf_frame_size_compatible(adapter, vf)) vfre |= IXGBE_VF_BIT(vf->pool); else vfre &= ~IXGBE_VF_BIT(vf->pool); IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_index), vfre); } static void ixgbe_vf_reset_msg(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) { struct ixgbe_hw *hw; uint32_t ack; uint32_t resp[IXGBE_VF_PERMADDR_MSG_LEN]; hw = &adapter->hw; ixgbe_process_vf_reset(adapter, vf); if (ixgbe_validate_mac_addr(vf->ether_addr) == 0) { ixgbe_set_rar(&adapter->hw, vf->rar_index, vf->ether_addr, vf->pool, TRUE); ack = IXGBE_VT_MSGTYPE_ACK; } else ack = IXGBE_VT_MSGTYPE_NACK; ixgbe_vf_enable_transmit(adapter, vf); ixgbe_vf_enable_receive(adapter, vf); vf->flags |= IXGBE_VF_CTS; resp[0] = IXGBE_VF_RESET | ack | IXGBE_VT_MSGTYPE_CTS; bcopy(vf->ether_addr, &resp[1], ETHER_ADDR_LEN); resp[3] = hw->mac.mc_filter_type; ixgbe_write_mbx(hw, resp, IXGBE_VF_PERMADDR_MSG_LEN, vf->pool); } static void ixgbe_vf_set_mac(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) { uint8_t *mac; mac = (uint8_t*)&msg[1]; /* Check that the VF has permission to change the MAC address. */ if (!(vf->flags & IXGBE_VF_CAP_MAC) && ixgbe_vf_mac_changed(vf, mac)) { ixgbe_send_vf_nack(adapter, vf, msg[0]); return; } if (ixgbe_validate_mac_addr(mac) != 0) { ixgbe_send_vf_nack(adapter, vf, msg[0]); return; } bcopy(mac, vf->ether_addr, ETHER_ADDR_LEN); ixgbe_set_rar(&adapter->hw, vf->rar_index, vf->ether_addr, vf->pool, TRUE); ixgbe_send_vf_ack(adapter, vf, msg[0]); } /* ** VF multicast addresses are set by using the appropriate bit in ** 1 of 128 32 bit addresses (4096 possible). */ static void ixgbe_vf_set_mc_addr(struct adapter *adapter, struct ixgbe_vf *vf, u32 *msg) { u16 *list = (u16*)&msg[1]; int entries; u32 vmolr, vec_bit, vec_reg, mta_reg; entries = (msg[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT; entries = min(entries, IXGBE_MAX_VF_MC); vmolr = IXGBE_READ_REG(&adapter->hw, IXGBE_VMOLR(vf->pool)); vf->num_mc_hashes = entries; /* Set the appropriate MTA bit */ for (int i = 0; i < entries; i++) { vf->mc_hash[i] = list[i]; vec_reg = (vf->mc_hash[i] >> 5) & 0x7F; vec_bit = vf->mc_hash[i] & 0x1F; mta_reg = IXGBE_READ_REG(&adapter->hw, IXGBE_MTA(vec_reg)); mta_reg |= (1 << vec_bit); IXGBE_WRITE_REG(&adapter->hw, IXGBE_MTA(vec_reg), mta_reg); } vmolr |= IXGBE_VMOLR_ROMPE; IXGBE_WRITE_REG(&adapter->hw, IXGBE_VMOLR(vf->pool), vmolr); ixgbe_send_vf_ack(adapter, vf, msg[0]); return; } static void ixgbe_vf_set_vlan(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) { struct ixgbe_hw *hw; int enable; uint16_t tag; hw = &adapter->hw; enable = IXGBE_VT_MSGINFO(msg[0]); tag = msg[1] & IXGBE_VLVF_VLANID_MASK; if (!(vf->flags & IXGBE_VF_CAP_VLAN)) { ixgbe_send_vf_nack(adapter, vf, msg[0]); return; } /* It is illegal to enable vlan tag 0. */ if (tag == 0 && enable != 0){ ixgbe_send_vf_nack(adapter, vf, msg[0]); return; } ixgbe_set_vfta(hw, tag, vf->pool, enable); ixgbe_send_vf_ack(adapter, vf, msg[0]); } static void ixgbe_vf_set_lpe(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) { struct ixgbe_hw *hw; uint32_t vf_max_size, pf_max_size, mhadd; hw = &adapter->hw; vf_max_size = msg[1]; if (vf_max_size < ETHER_CRC_LEN) { /* We intentionally ACK invalid LPE requests. */ ixgbe_send_vf_ack(adapter, vf, msg[0]); return; } vf_max_size -= ETHER_CRC_LEN; if (vf_max_size > IXGBE_MAX_FRAME_SIZE) { /* We intentionally ACK invalid LPE requests. */ ixgbe_send_vf_ack(adapter, vf, msg[0]); return; } vf->max_frame_size = vf_max_size; ixgbe_update_max_frame(adapter, vf->max_frame_size); /* * We might have to disable reception to this VF if the frame size is * not compatible with the config on the PF. */ ixgbe_vf_enable_receive(adapter, vf); mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); pf_max_size = (mhadd & IXGBE_MHADD_MFS_MASK) >> IXGBE_MHADD_MFS_SHIFT; if (pf_max_size < adapter->max_frame_size) { mhadd &= ~IXGBE_MHADD_MFS_MASK; mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT; IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); } ixgbe_send_vf_ack(adapter, vf, msg[0]); } static void ixgbe_vf_set_macvlan(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) { //XXX implement this ixgbe_send_vf_nack(adapter, vf, msg[0]); } static void ixgbe_vf_api_negotiate(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) { switch (msg[1]) { case IXGBE_API_VER_1_0: case IXGBE_API_VER_1_1: vf->api_ver = msg[1]; ixgbe_send_vf_ack(adapter, vf, msg[0]); break; default: vf->api_ver = IXGBE_API_VER_UNKNOWN; ixgbe_send_vf_nack(adapter, vf, msg[0]); break; } } static void ixgbe_vf_get_queues(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) { struct ixgbe_hw *hw; uint32_t resp[IXGBE_VF_GET_QUEUES_RESP_LEN]; int num_queues; hw = &adapter->hw; /* GET_QUEUES is not supported on pre-1.1 APIs. */ switch (msg[0]) { case IXGBE_API_VER_1_0: case IXGBE_API_VER_UNKNOWN: ixgbe_send_vf_nack(adapter, vf, msg[0]); return; } resp[0] = IXGBE_VF_GET_QUEUES | IXGBE_VT_MSGTYPE_ACK | IXGBE_VT_MSGTYPE_CTS; num_queues = ixgbe_vf_queues(ixgbe_get_iov_mode(adapter)); resp[IXGBE_VF_TX_QUEUES] = num_queues; resp[IXGBE_VF_RX_QUEUES] = num_queues; resp[IXGBE_VF_TRANS_VLAN] = (vf->default_vlan != 0); resp[IXGBE_VF_DEF_QUEUE] = 0; ixgbe_write_mbx(hw, resp, IXGBE_VF_GET_QUEUES_RESP_LEN, vf->pool); } static void ixgbe_process_vf_msg(struct adapter *adapter, struct ixgbe_vf *vf) { struct ixgbe_hw *hw; uint32_t msg[IXGBE_VFMAILBOX_SIZE]; int error; hw = &adapter->hw; error = ixgbe_read_mbx(hw, msg, IXGBE_VFMAILBOX_SIZE, vf->pool); if (error != 0) return; CTR3(KTR_MALLOC, "%s: received msg %x from %d", adapter->ifp->if_xname, msg[0], vf->pool); if (msg[0] == IXGBE_VF_RESET) { ixgbe_vf_reset_msg(adapter, vf, msg); return; } if (!(vf->flags & IXGBE_VF_CTS)) { ixgbe_send_vf_nack(adapter, vf, msg[0]); return; } switch (msg[0] & IXGBE_VT_MSG_MASK) { case IXGBE_VF_SET_MAC_ADDR: ixgbe_vf_set_mac(adapter, vf, msg); break; case IXGBE_VF_SET_MULTICAST: ixgbe_vf_set_mc_addr(adapter, vf, msg); break; case IXGBE_VF_SET_VLAN: ixgbe_vf_set_vlan(adapter, vf, msg); break; case IXGBE_VF_SET_LPE: ixgbe_vf_set_lpe(adapter, vf, msg); break; case IXGBE_VF_SET_MACVLAN: ixgbe_vf_set_macvlan(adapter, vf, msg); break; case IXGBE_VF_API_NEGOTIATE: ixgbe_vf_api_negotiate(adapter, vf, msg); break; case IXGBE_VF_GET_QUEUES: ixgbe_vf_get_queues(adapter, vf, msg); break; default: ixgbe_send_vf_nack(adapter, vf, msg[0]); } } /* * Tasklet for handling VF -> PF mailbox messages. */ static void ixgbe_handle_mbx(void *context, int pending) { struct adapter *adapter; struct ixgbe_hw *hw; struct ixgbe_vf *vf; int i; adapter = context; hw = &adapter->hw; IXGBE_CORE_LOCK(adapter); for (i = 0; i < adapter->num_vfs; i++) { vf = &adapter->vfs[i]; if (vf->flags & IXGBE_VF_ACTIVE) { if (ixgbe_check_for_rst(hw, vf->pool) == 0) ixgbe_process_vf_reset(adapter, vf); if (ixgbe_check_for_msg(hw, vf->pool) == 0) ixgbe_process_vf_msg(adapter, vf); if (ixgbe_check_for_ack(hw, vf->pool) == 0) ixgbe_process_vf_ack(adapter, vf); } } IXGBE_CORE_UNLOCK(adapter); } static int ixgbe_init_iov(device_t dev, u16 num_vfs, const nvlist_t *config) { struct adapter *adapter; enum ixgbe_iov_mode mode; adapter = device_get_softc(dev); adapter->num_vfs = num_vfs; mode = ixgbe_get_iov_mode(adapter); if (num_vfs > ixgbe_max_vfs(mode)) { adapter->num_vfs = 0; return (ENOSPC); } IXGBE_CORE_LOCK(adapter); adapter->vfs = malloc(sizeof(*adapter->vfs) * num_vfs, M_IXGBE, M_NOWAIT | M_ZERO); if (adapter->vfs == NULL) { adapter->num_vfs = 0; IXGBE_CORE_UNLOCK(adapter); return (ENOMEM); } ixgbe_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); return (0); } static void ixgbe_uninit_iov(device_t dev) { struct ixgbe_hw *hw; struct adapter *adapter; uint32_t pf_reg, vf_reg; adapter = device_get_softc(dev); hw = &adapter->hw; IXGBE_CORE_LOCK(adapter); /* Enable rx/tx for the PF and disable it for all VFs. */ pf_reg = IXGBE_VF_INDEX(adapter->pool); IXGBE_WRITE_REG(hw, IXGBE_VFRE(pf_reg), IXGBE_VF_BIT(adapter->pool)); IXGBE_WRITE_REG(hw, IXGBE_VFTE(pf_reg), IXGBE_VF_BIT(adapter->pool)); if (pf_reg == 0) vf_reg = 1; else vf_reg = 0; IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_reg), 0); IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_reg), 0); IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0); free(adapter->vfs, M_IXGBE); adapter->vfs = NULL; adapter->num_vfs = 0; IXGBE_CORE_UNLOCK(adapter); } static void ixgbe_initialize_iov(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; uint32_t mrqc, mtqc, vt_ctl, vf_reg, gcr_ext, gpie; enum ixgbe_iov_mode mode; int i; mode = ixgbe_get_iov_mode(adapter); if (mode == IXGBE_NO_VM) return; IXGBE_CORE_LOCK_ASSERT(adapter); mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC); mrqc &= ~IXGBE_MRQC_MRQE_MASK; switch (mode) { case IXGBE_64_VM: mrqc |= IXGBE_MRQC_VMDQRSS64EN; break; case IXGBE_32_VM: mrqc |= IXGBE_MRQC_VMDQRSS32EN; break; default: panic("Unexpected SR-IOV mode %d", mode); } IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); mtqc = IXGBE_MTQC_VT_ENA; switch (mode) { case IXGBE_64_VM: mtqc |= IXGBE_MTQC_64VF; break; case IXGBE_32_VM: mtqc |= IXGBE_MTQC_32VF; break; default: panic("Unexpected SR-IOV mode %d", mode); } IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc); gcr_ext = IXGBE_READ_REG(hw, IXGBE_GCR_EXT); gcr_ext |= IXGBE_GCR_EXT_MSIX_EN; gcr_ext &= ~IXGBE_GCR_EXT_VT_MODE_MASK; switch (mode) { case IXGBE_64_VM: gcr_ext |= IXGBE_GCR_EXT_VT_MODE_64; break; case IXGBE_32_VM: gcr_ext |= IXGBE_GCR_EXT_VT_MODE_32; break; default: panic("Unexpected SR-IOV mode %d", mode); } IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext); gpie = IXGBE_READ_REG(hw, IXGBE_GPIE); gcr_ext &= ~IXGBE_GPIE_VTMODE_MASK; switch (mode) { case IXGBE_64_VM: gpie |= IXGBE_GPIE_VTMODE_64; break; case IXGBE_32_VM: gpie |= IXGBE_GPIE_VTMODE_32; break; default: panic("Unexpected SR-IOV mode %d", mode); } IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); /* Enable rx/tx for the PF. */ vf_reg = IXGBE_VF_INDEX(adapter->pool); IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_reg), IXGBE_VF_BIT(adapter->pool)); IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_reg), IXGBE_VF_BIT(adapter->pool)); /* Allow VM-to-VM communication. */ IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN); vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; vt_ctl |= (adapter->pool << IXGBE_VT_CTL_POOL_SHIFT); IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl); for (i = 0; i < adapter->num_vfs; i++) ixgbe_init_vf(adapter, &adapter->vfs[i]); } /* ** Check the max frame setting of all active VF's */ static void ixgbe_recalculate_max_frame(struct adapter *adapter) { struct ixgbe_vf *vf; IXGBE_CORE_LOCK_ASSERT(adapter); for (int i = 0; i < adapter->num_vfs; i++) { vf = &adapter->vfs[i]; if (vf->flags & IXGBE_VF_ACTIVE) ixgbe_update_max_frame(adapter, vf->max_frame_size); } } static void ixgbe_init_vf(struct adapter *adapter, struct ixgbe_vf *vf) { struct ixgbe_hw *hw; uint32_t vf_index, pfmbimr; IXGBE_CORE_LOCK_ASSERT(adapter); hw = &adapter->hw; if (!(vf->flags & IXGBE_VF_ACTIVE)) return; vf_index = IXGBE_VF_INDEX(vf->pool); pfmbimr = IXGBE_READ_REG(hw, IXGBE_PFMBIMR(vf_index)); pfmbimr |= IXGBE_VF_BIT(vf->pool); IXGBE_WRITE_REG(hw, IXGBE_PFMBIMR(vf_index), pfmbimr); ixgbe_vf_set_default_vlan(adapter, vf, vf->vlan_tag); // XXX multicast addresses if (ixgbe_validate_mac_addr(vf->ether_addr) == 0) { ixgbe_set_rar(&adapter->hw, vf->rar_index, vf->ether_addr, vf->pool, TRUE); } ixgbe_vf_enable_transmit(adapter, vf); ixgbe_vf_enable_receive(adapter, vf); ixgbe_send_vf_msg(adapter, vf, IXGBE_PF_CONTROL_MSG); } static int ixgbe_add_vf(device_t dev, u16 vfnum, const nvlist_t *config) { struct adapter *adapter; struct ixgbe_vf *vf; const void *mac; adapter = device_get_softc(dev); KASSERT(vfnum < adapter->num_vfs, ("VF index %d is out of range %d", vfnum, adapter->num_vfs)); IXGBE_CORE_LOCK(adapter); vf = &adapter->vfs[vfnum]; vf->pool= vfnum; /* RAR[0] is used by the PF so use vfnum + 1 for VF RAR. */ vf->rar_index = vfnum + 1; vf->default_vlan = 0; vf->max_frame_size = ETHER_MAX_LEN; ixgbe_update_max_frame(adapter, vf->max_frame_size); if (nvlist_exists_binary(config, "mac-addr")) { mac = nvlist_get_binary(config, "mac-addr", NULL); bcopy(mac, vf->ether_addr, ETHER_ADDR_LEN); if (nvlist_get_bool(config, "allow-set-mac")) vf->flags |= IXGBE_VF_CAP_MAC; } else /* * If the administrator has not specified a MAC address then * we must allow the VF to choose one. */ vf->flags |= IXGBE_VF_CAP_MAC; vf->flags = IXGBE_VF_ACTIVE; ixgbe_init_vf(adapter, vf); IXGBE_CORE_UNLOCK(adapter); return (0); } #endif /* PCI_IOV */ Index: head/sys/dev/ixgbe/if_ixv.c =================================================================== --- head/sys/dev/ixgbe/if_ixv.c (revision 302383) +++ head/sys/dev/ixgbe/if_ixv.c (revision 302384) @@ -1,2203 +1,2204 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef IXGBE_STANDALONE_BUILD #include "opt_inet.h" #include "opt_inet6.h" #endif #include "ixgbe.h" /********************************************************************* * Driver version *********************************************************************/ char ixv_driver_version[] = "1.4.6-k"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into ixv_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static ixgbe_vendor_info_t ixv_vendor_info_array[] = { {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_VF, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_VF, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550_VF, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_VF, 0, 0, 0}, /* required last entry */ {0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings *********************************************************************/ static char *ixv_strings[] = { "Intel(R) PRO/10GbE Virtual Function Network Driver" }; /********************************************************************* * Function prototypes *********************************************************************/ static int ixv_probe(device_t); static int ixv_attach(device_t); static int ixv_detach(device_t); static int ixv_shutdown(device_t); static int ixv_ioctl(struct ifnet *, u_long, caddr_t); static void ixv_init(void *); static void ixv_init_locked(struct adapter *); static void ixv_stop(void *); static void ixv_media_status(struct ifnet *, struct ifmediareq *); static int ixv_media_change(struct ifnet *); static void ixv_identify_hardware(struct adapter *); static int ixv_allocate_pci_resources(struct adapter *); static int ixv_allocate_msix(struct adapter *); static int ixv_setup_msix(struct adapter *); static void ixv_free_pci_resources(struct adapter *); static void ixv_local_timer(void *); static void ixv_setup_interface(device_t, struct adapter *); static void ixv_config_link(struct adapter *); static void ixv_initialize_transmit_units(struct adapter *); static void ixv_initialize_receive_units(struct adapter *); static void ixv_enable_intr(struct adapter *); static void ixv_disable_intr(struct adapter *); static void ixv_set_multi(struct adapter *); static void ixv_update_link_status(struct adapter *); static int ixv_sysctl_debug(SYSCTL_HANDLER_ARGS); static void ixv_set_ivar(struct adapter *, u8, u8, s8); static void ixv_configure_ivars(struct adapter *); static u8 * ixv_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); static void ixv_setup_vlan_support(struct adapter *); static void ixv_register_vlan(void *, struct ifnet *, u16); static void ixv_unregister_vlan(void *, struct ifnet *, u16); static void ixv_save_stats(struct adapter *); static void ixv_init_stats(struct adapter *); static void ixv_update_stats(struct adapter *); static void ixv_add_stats_sysctls(struct adapter *); static void ixv_set_sysctl_value(struct adapter *, const char *, const char *, int *, int); /* The MSI/X Interrupt handlers */ static void ixv_msix_que(void *); static void ixv_msix_mbx(void *); /* Deferred interrupt tasklets */ static void ixv_handle_que(void *, int); static void ixv_handle_mbx(void *, int); #ifdef DEV_NETMAP /* * This is defined in , which is included by * if_ix.c. */ extern void ixgbe_netmap_attach(struct adapter *adapter); #include #include #include #endif /* DEV_NETMAP */ /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t ixv_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ixv_probe), DEVMETHOD(device_attach, ixv_attach), DEVMETHOD(device_detach, ixv_detach), DEVMETHOD(device_shutdown, ixv_shutdown), DEVMETHOD_END }; static driver_t ixv_driver = { "ixv", ixv_methods, sizeof(struct adapter), }; devclass_t ixv_devclass; DRIVER_MODULE(ixv, pci, ixv_driver, ixv_devclass, 0, 0); MODULE_DEPEND(ixv, pci, 1, 1, 1); MODULE_DEPEND(ixv, ether, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(ix, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ /* XXX depend on 'ix' ? */ /* ** TUNEABLE PARAMETERS: */ /* Number of Queues - do not exceed MSIX vectors - 1 */ static int ixv_num_queues = 1; TUNABLE_INT("hw.ixv.num_queues", &ixv_num_queues); /* ** AIM: Adaptive Interrupt Moderation ** which means that the interrupt rate ** is varied over time based on the ** traffic for that interrupt vector */ static int ixv_enable_aim = FALSE; TUNABLE_INT("hw.ixv.enable_aim", &ixv_enable_aim); /* How many packets rxeof tries to clean at a time */ static int ixv_rx_process_limit = 256; TUNABLE_INT("hw.ixv.rx_process_limit", &ixv_rx_process_limit); /* How many packets txeof tries to clean at a time */ static int ixv_tx_process_limit = 256; TUNABLE_INT("hw.ixv.tx_process_limit", &ixv_tx_process_limit); /* Flow control setting, default to full */ static int ixv_flow_control = ixgbe_fc_full; TUNABLE_INT("hw.ixv.flow_control", &ixv_flow_control); /* * Header split: this causes the hardware to DMA * the header into a separate mbuf from the payload, * it can be a performance win in some workloads, but * in others it actually hurts, its off by default. */ static int ixv_header_split = FALSE; TUNABLE_INT("hw.ixv.hdr_split", &ixv_header_split); /* ** Number of TX descriptors per ring, ** setting higher than RX as this seems ** the better performing choice. */ static int ixv_txd = DEFAULT_TXD; TUNABLE_INT("hw.ixv.txd", &ixv_txd); /* Number of RX descriptors per ring */ static int ixv_rxd = DEFAULT_RXD; TUNABLE_INT("hw.ixv.rxd", &ixv_rxd); /* ** Shadow VFTA table, this is needed because ** the real filter table gets cleared during ** a soft reset and we need to repopulate it. */ static u32 ixv_shadow_vfta[IXGBE_VFTA_SIZE]; /********************************************************************* * Device identification routine * * ixv_probe determines if the driver should be loaded on * adapter based on PCI vendor/device id of the adapter. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int ixv_probe(device_t dev) { ixgbe_vendor_info_t *ent; u16 pci_vendor_id = 0; u16 pci_device_id = 0; u16 pci_subvendor_id = 0; u16 pci_subdevice_id = 0; char adapter_name[256]; pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = ixv_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == 0)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == 0))) { sprintf(adapter_name, "%s, Version - %s", ixv_strings[ent->index], ixv_driver_version); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int ixv_attach(device_t dev) { struct adapter *adapter; struct ixgbe_hw *hw; int error = 0; INIT_DEBUGOUT("ixv_attach: begin"); /* Allocate, clear, and link in our adapter structure */ adapter = device_get_softc(dev); adapter->dev = dev; hw = &adapter->hw; #ifdef DEV_NETMAP adapter->init_locked = ixv_init_locked; adapter->stop_locked = ixv_stop; #endif /* Core Lock Init*/ IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTL APIs */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixv_sysctl_debug, "I", "Debug Info"); SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "enable_aim", CTLFLAG_RW, &ixv_enable_aim, 1, "Interrupt Moderation"); /* Set up the timer callout */ callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); /* Determine hardware revision */ ixv_identify_hardware(adapter); /* Do base PCI setup - map BAR0 */ if (ixv_allocate_pci_resources(adapter)) { device_printf(dev, "ixv_allocate_pci_resources() failed!\n"); error = ENXIO; goto err_out; } /* Sysctls for limiting the amount of work done in the taskqueues */ ixv_set_sysctl_value(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, ixv_rx_process_limit); ixv_set_sysctl_value(adapter, "tx_processing_limit", "max number of tx packets to process", &adapter->tx_process_limit, ixv_tx_process_limit); /* Do descriptor calc and sanity checks */ if (((ixv_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 || ixv_txd < MIN_TXD || ixv_txd > MAX_TXD) { device_printf(dev, "TXD config issue, using default!\n"); adapter->num_tx_desc = DEFAULT_TXD; } else adapter->num_tx_desc = ixv_txd; if (((ixv_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 || ixv_rxd < MIN_RXD || ixv_rxd > MAX_RXD) { device_printf(dev, "RXD config issue, using default!\n"); adapter->num_rx_desc = DEFAULT_RXD; } else adapter->num_rx_desc = ixv_rxd; /* Allocate our TX/RX Queues */ if (ixgbe_allocate_queues(adapter)) { device_printf(dev, "ixgbe_allocate_queues() failed!\n"); error = ENOMEM; goto err_out; } /* ** Initialize the shared code: its ** at this point the mac type is set. */ error = ixgbe_init_shared_code(hw); if (error) { device_printf(dev, "ixgbe_init_shared_code() failed!\n"); error = EIO; goto err_late; } /* Setup the mailbox */ ixgbe_init_mbx_params_vf(hw); /* Reset mbox api to 1.0 */ error = ixgbe_reset_hw(hw); if (error == IXGBE_ERR_RESET_FAILED) device_printf(dev, "ixgbe_reset_hw() failure: Reset Failed!\n"); else if (error) device_printf(dev, "ixgbe_reset_hw() failed with error %d\n", error); if (error) { error = EIO; goto err_late; } /* Negotiate mailbox API version */ error = ixgbevf_negotiate_api_version(hw, ixgbe_mbox_api_11); if (error) { device_printf(dev, "MBX API 1.1 negotiation failed! Error %d\n", error); error = EIO; goto err_late; } error = ixgbe_init_hw(hw); if (error) { device_printf(dev, "ixgbe_init_hw() failed!\n"); error = EIO; goto err_late; } error = ixv_allocate_msix(adapter); if (error) { device_printf(dev, "ixv_allocate_msix() failed!\n"); goto err_late; } /* If no mac address was assigned, make a random one */ if (!ixv_check_ether_addr(hw->mac.addr)) { u8 addr[ETHER_ADDR_LEN]; arc4rand(&addr, sizeof(addr), 0); addr[0] &= 0xFE; addr[0] |= 0x02; bcopy(addr, hw->mac.addr, sizeof(addr)); } /* Setup OS specific network interface */ ixv_setup_interface(dev, adapter); /* Do the stats setup */ ixv_save_stats(adapter); ixv_init_stats(adapter); ixv_add_stats_sysctls(adapter); /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, ixv_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, ixv_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); #ifdef DEV_NETMAP ixgbe_netmap_attach(adapter); #endif /* DEV_NETMAP */ INIT_DEBUGOUT("ixv_attach: end"); return (0); err_late: ixgbe_free_transmit_structures(adapter); ixgbe_free_receive_structures(adapter); err_out: ixv_free_pci_resources(adapter); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int ixv_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ix_queue *que = adapter->queues; INIT_DEBUGOUT("ixv_detach: begin"); /* Make sure VLANS are not using driver */ if (adapter->ifp->if_vlantrunk != NULL) { device_printf(dev, "Vlan in use, detach first\n"); return (EBUSY); } IXGBE_CORE_LOCK(adapter); ixv_stop(adapter); IXGBE_CORE_UNLOCK(adapter); for (int i = 0; i < adapter->num_queues; i++, que++) { if (que->tq) { struct tx_ring *txr = que->txr; taskqueue_drain(que->tq, &txr->txq_task); taskqueue_drain(que->tq, &que->que_task); taskqueue_free(que->tq); } } /* Drain the Mailbox(link) queue */ if (adapter->tq) { taskqueue_drain(adapter->tq, &adapter->link_task); taskqueue_free(adapter->tq); } /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); ether_ifdetach(adapter->ifp); callout_drain(&adapter->timer); #ifdef DEV_NETMAP netmap_detach(adapter->ifp); #endif /* DEV_NETMAP */ ixv_free_pci_resources(adapter); bus_generic_detach(dev); if_free(adapter->ifp); ixgbe_free_transmit_structures(adapter); ixgbe_free_receive_structures(adapter); IXGBE_CORE_LOCK_DESTROY(adapter); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int ixv_shutdown(device_t dev) { struct adapter *adapter = device_get_softc(dev); IXGBE_CORE_LOCK(adapter); ixv_stop(adapter); IXGBE_CORE_UNLOCK(adapter); return (0); } /********************************************************************* * Ioctl entry point * * ixv_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int ixv_ioctl(struct ifnet * ifp, u_long command, caddr_t data) { struct adapter *adapter = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *) data; bool avoid_reset = FALSE; #endif int error = 0; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif #if defined(INET) || defined(INET6) /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ixv_init(adapter); if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); } else error = ether_ioctl(ifp, command, data); break; #endif case SIOCSIFMTU: IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - IXGBE_MTU_HDR) { error = EINVAL; } else { IXGBE_CORE_LOCK(adapter); ifp->if_mtu = ifr->ifr_mtu; adapter->max_frame_size = ifp->if_mtu + IXGBE_MTU_HDR; - ixv_init_locked(adapter); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + ixv_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); } break; case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)"); IXGBE_CORE_LOCK(adapter); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ixv_init_locked(adapter); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixv_stop(adapter); adapter->if_flags = ifp->if_flags; IXGBE_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXGBE_CORE_LOCK(adapter); ixv_disable_intr(adapter); ixv_set_multi(adapter); ixv_enable_intr(adapter); IXGBE_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)"); if (mask & IFCAP_HWCSUM) ifp->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXGBE_CORE_LOCK(adapter); ixv_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); } VLAN_CAPABILITIES(ifp); break; } default: IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command); error = ether_ioctl(ifp, command, data); break; } return (error); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ #define IXGBE_MHADD_MFS_SHIFT 16 static void ixv_init_locked(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; int error = 0; INIT_DEBUGOUT("ixv_init_locked: begin"); mtx_assert(&adapter->core_mtx, MA_OWNED); hw->adapter_stopped = FALSE; ixgbe_stop_adapter(hw); callout_stop(&adapter->timer); /* reprogram the RAR[0] in case user changed it. */ ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS); ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1); hw->addr_ctrl.rar_used_count = 1; /* Prepare transmit descriptors and buffers */ if (ixgbe_setup_transmit_structures(adapter)) { device_printf(dev, "Could not setup transmit structures\n"); ixv_stop(adapter); return; } /* Reset VF and renegotiate mailbox API version */ ixgbe_reset_hw(hw); error = ixgbevf_negotiate_api_version(hw, ixgbe_mbox_api_11); if (error) device_printf(dev, "MBX API 1.1 negotiation failed! Error %d\n", error); ixv_initialize_transmit_units(adapter); /* Setup Multicast table */ ixv_set_multi(adapter); /* ** Determine the correct mbuf pool ** for doing jumbo/headersplit */ if (ifp->if_mtu > ETHERMTU) adapter->rx_mbuf_sz = MJUMPAGESIZE; else adapter->rx_mbuf_sz = MCLBYTES; /* Prepare receive descriptors and buffers */ if (ixgbe_setup_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); ixv_stop(adapter); return; } /* Configure RX settings */ ixv_initialize_receive_units(adapter); /* Set the various hardware offload abilities */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); #if __FreeBSD_version >= 800000 ifp->if_hwassist |= CSUM_SCTP; #endif } /* Set up VLAN offload and filter */ ixv_setup_vlan_support(adapter); /* Set up MSI/X routing */ ixv_configure_ivars(adapter); /* Set up auto-mask */ IXGBE_WRITE_REG(hw, IXGBE_VTEIAM, IXGBE_EICS_RTX_QUEUE); /* Set moderation on the Link interrupt */ IXGBE_WRITE_REG(hw, IXGBE_VTEITR(adapter->vector), IXGBE_LINK_ITR); /* Stats init */ ixv_init_stats(adapter); /* Config/Enable Link */ ixv_config_link(adapter); /* Start watchdog */ callout_reset(&adapter->timer, hz, ixv_local_timer, adapter); /* And now turn on interrupts */ ixv_enable_intr(adapter); /* Now inform the stack we're ready */ ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; return; } static void ixv_init(void *arg) { struct adapter *adapter = arg; IXGBE_CORE_LOCK(adapter); ixv_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); return; } /* ** ** MSIX Interrupt Handlers and Tasklets ** */ static inline void ixv_enable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u32 queue = 1 << vector; u32 mask; mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); } static inline void ixv_disable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u64 queue = (u64)(1 << vector); u32 mask; mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_VTEIMC, mask); } static inline void ixv_rearm_queues(struct adapter *adapter, u64 queues) { u32 mask = (IXGBE_EIMS_RTX_QUEUE & queues); IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEICS, mask); } static void ixv_handle_que(void *context, int pending) { struct ix_queue *que = context; struct adapter *adapter = que->adapter; struct tx_ring *txr = que->txr; struct ifnet *ifp = adapter->ifp; bool more; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { more = ixgbe_rxeof(que); IXGBE_TX_LOCK(txr); ixgbe_txeof(txr); #if __FreeBSD_version >= 800000 if (!drbr_empty(ifp, txr->br)) ixgbe_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) ixgbe_start_locked(txr, ifp); #endif IXGBE_TX_UNLOCK(txr); if (more) { taskqueue_enqueue(que->tq, &que->que_task); return; } } /* Reenable this interrupt */ ixv_enable_queue(adapter, que->msix); return; } /********************************************************************* * * MSI Queue Interrupt Service routine * **********************************************************************/ void ixv_msix_que(void *arg) { struct ix_queue *que = arg; struct adapter *adapter = que->adapter; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = que->txr; struct rx_ring *rxr = que->rxr; bool more; u32 newitr = 0; ixv_disable_queue(adapter, que->msix); ++que->irqs; more = ixgbe_rxeof(que); IXGBE_TX_LOCK(txr); ixgbe_txeof(txr); /* ** Make certain that if the stack ** has anything queued the task gets ** scheduled to handle it. */ #ifdef IXGBE_LEGACY_TX if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd)) ixgbe_start_locked(txr, ifp); #else if (!drbr_empty(adapter->ifp, txr->br)) ixgbe_mq_start_locked(ifp, txr); #endif IXGBE_TX_UNLOCK(txr); /* Do AIM now? */ if (ixv_enable_aim == FALSE) goto no_calc; /* ** Do Adaptive Interrupt Moderation: ** - Write out last calculated setting ** - Calculate based on average size over ** the last interval. */ if (que->eitr_setting) IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEITR(que->msix), que->eitr_setting); que->eitr_setting = 0; /* Idle, do nothing */ if ((txr->bytes == 0) && (rxr->bytes == 0)) goto no_calc; if ((txr->bytes) && (txr->packets)) newitr = txr->bytes/txr->packets; if ((rxr->bytes) && (rxr->packets)) newitr = max(newitr, (rxr->bytes / rxr->packets)); newitr += 24; /* account for hardware frame, crc */ /* set an upper boundary */ newitr = min(newitr, 3000); /* Be nice to the mid range */ if ((newitr > 300) && (newitr < 1200)) newitr = (newitr / 3); else newitr = (newitr / 2); newitr |= newitr << 16; /* save for next interrupt */ que->eitr_setting = newitr; /* Reset state */ txr->bytes = 0; txr->packets = 0; rxr->bytes = 0; rxr->packets = 0; no_calc: if (more) taskqueue_enqueue(que->tq, &que->que_task); else /* Reenable this interrupt */ ixv_enable_queue(adapter, que->msix); return; } static void ixv_msix_mbx(void *arg) { struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; u32 reg; ++adapter->link_irq; /* First get the cause */ reg = IXGBE_READ_REG(hw, IXGBE_VTEICS); /* Clear interrupt with write */ IXGBE_WRITE_REG(hw, IXGBE_VTEICR, reg); /* Link status change */ if (reg & IXGBE_EICR_LSC) taskqueue_enqueue(adapter->tq, &adapter->link_task); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, IXGBE_EIMS_OTHER); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void ixv_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) { struct adapter *adapter = ifp->if_softc; INIT_DEBUGOUT("ixv_media_status: begin"); IXGBE_CORE_LOCK(adapter); ixv_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { IXGBE_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; switch (adapter->link_speed) { case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_T | IFM_FDX; break; case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_FDX; break; } IXGBE_CORE_UNLOCK(adapter); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int ixv_media_change(struct ifnet * ifp) { struct adapter *adapter = ifp->if_softc; struct ifmedia *ifm = &adapter->media; INIT_DEBUGOUT("ixv_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: break; default: device_printf(adapter->dev, "Only auto media type\n"); return (EINVAL); } return (0); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ #define IXGBE_RAR_ENTRIES 16 static void ixv_set_multi(struct adapter *adapter) { u8 mta[MAX_NUM_MULTICAST_ADDRESSES * IXGBE_ETH_LENGTH_OF_ADDRESS]; u8 *update_ptr; struct ifmultiaddr *ifma; int mcnt = 0; struct ifnet *ifp = adapter->ifp; IOCTL_DEBUGOUT("ixv_set_multi: begin"); #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS], IXGBE_ETH_LENGTH_OF_ADDRESS); mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif update_ptr = mta; ixgbe_update_mc_addr_list(&adapter->hw, update_ptr, mcnt, ixv_mc_array_itr, TRUE); return; } /* * This is an iterator function now needed by the multicast * shared code. It simply feeds the shared code routine the * addresses in the array of ixv_set_multi() one by one. */ static u8 * ixv_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) { u8 *addr = *update_ptr; u8 *newptr; *vmdq = 0; newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS; *update_ptr = newptr; return addr; } /********************************************************************* * Timer routine * * This routine checks for link status,updates statistics, * and runs the watchdog check. * **********************************************************************/ static void ixv_local_timer(void *arg) { struct adapter *adapter = arg; device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; u64 queues = 0; int hung = 0; mtx_assert(&adapter->core_mtx, MA_OWNED); ixv_update_link_status(adapter); /* Stats Update */ ixv_update_stats(adapter); /* ** Check the TX queues status ** - mark hung queues so we don't schedule on them ** - watchdog only if all queues show hung */ for (int i = 0; i < adapter->num_queues; i++, que++) { /* Keep track of queues with work for soft irq */ if (que->txr->busy) queues |= ((u64)1 << que->me); /* ** Each time txeof runs without cleaning, but there ** are uncleaned descriptors it increments busy. If ** we get to the MAX we declare it hung. */ if (que->busy == IXGBE_QUEUE_HUNG) { ++hung; /* Mark the queue as inactive */ adapter->active_queues &= ~((u64)1 << que->me); continue; } else { /* Check if we've come back from hung */ if ((adapter->active_queues & ((u64)1 << que->me)) == 0) adapter->active_queues |= ((u64)1 << que->me); } if (que->busy >= IXGBE_MAX_TX_BUSY) { device_printf(dev,"Warning queue %d " "appears to be hung!\n", i); que->txr->busy = IXGBE_QUEUE_HUNG; ++hung; } } /* Only truly watchdog if all queues show hung */ if (hung == adapter->num_queues) goto watchdog; else if (queues != 0) { /* Force an IRQ on queues with work */ ixv_rearm_queues(adapter, queues); } callout_reset(&adapter->timer, hz, ixv_local_timer, adapter); return; watchdog: device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->watchdog_events++; ixv_init_locked(adapter); } /* ** Note: this routine updates the OS on the link state ** the real check of the hardware only happens with ** a link interrupt. */ static void ixv_update_link_status(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; if (adapter->link_up){ if (adapter->link_active == FALSE) { if (bootverbose) device_printf(dev,"Link is up %d Gbps %s \n", ((adapter->link_speed == 128)? 10:1), "Full Duplex"); adapter->link_active = TRUE; if_link_state_change(ifp, LINK_STATE_UP); } } else { /* Link down */ if (adapter->link_active == TRUE) { if (bootverbose) device_printf(dev,"Link is Down\n"); if_link_state_change(ifp, LINK_STATE_DOWN); adapter->link_active = FALSE; } } return; } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void ixv_stop(void *arg) { struct ifnet *ifp; struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; ifp = adapter->ifp; mtx_assert(&adapter->core_mtx, MA_OWNED); INIT_DEBUGOUT("ixv_stop: begin\n"); ixv_disable_intr(adapter); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); ixgbe_reset_hw(hw); adapter->hw.adapter_stopped = FALSE; ixgbe_stop_adapter(hw); callout_stop(&adapter->timer); /* reprogram the RAR[0] in case user changed it. */ ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); return; } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void ixv_identify_hardware(struct adapter *adapter) { device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; /* ** Make sure BUSMASTER is set, on a VM under ** KVM it may not be and will break things. */ pci_enable_busmaster(dev); /* Save off the information about this board */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* We need this to determine device-specific things */ ixgbe_set_mac_type(hw); /* Set the right number of segments */ adapter->num_segs = IXGBE_82599_SCATTER; return; } /********************************************************************* * * Setup MSIX Interrupt resources and handlers * **********************************************************************/ static int ixv_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; struct tx_ring *txr = adapter->tx_rings; int error, rid, vector = 0; for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) { rid = vector + 1; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (que->res == NULL) { device_printf(dev,"Unable to allocate" " bus resource: que interrupt [%d]\n", vector); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixv_msix_que, que, &que->tag); if (error) { que->res = NULL; device_printf(dev, "Failed to register QUE handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, que->res, que->tag, "que %d", i); #endif que->msix = vector; adapter->active_queues |= (u64)(1 << que->msix); /* ** Bind the msix vector, and thus the ** ring to the corresponding cpu. */ if (adapter->num_queues > 1) bus_bind_intr(dev, que->res, i); TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr); TASK_INIT(&que->que_task, 0, ixv_handle_que, que); que->tq = taskqueue_create_fast("ixv_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); } /* and Mailbox */ rid = vector + 1; adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!adapter->res) { device_printf(dev,"Unable to allocate" " bus resource: MBX interrupt [%d]\n", rid); return (ENXIO); } /* Set the mbx handler function */ error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixv_msix_mbx, adapter, &adapter->tag); if (error) { adapter->res = NULL; device_printf(dev, "Failed to register LINK handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, adapter->res, adapter->tag, "mbx"); #endif adapter->vector = vector; /* Tasklets for Mailbox */ TASK_INIT(&adapter->link_task, 0, ixv_handle_mbx, adapter); adapter->tq = taskqueue_create_fast("ixv_mbx", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s mbxq", device_get_nameunit(adapter->dev)); /* ** Due to a broken design QEMU will fail to properly ** enable the guest for MSIX unless the vectors in ** the table are all set up, so we must rewrite the ** ENABLE in the MSIX control register again at this ** point to cause it to successfully initialize us. */ if (adapter->hw.mac.type == ixgbe_mac_82599_vf) { int msix_ctrl; pci_find_cap(dev, PCIY_MSIX, &rid); rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } return (0); } /* * Setup MSIX resources, note that the VF * device MUST use MSIX, there is no fallback. */ static int ixv_setup_msix(struct adapter *adapter) { device_t dev = adapter->dev; int rid, want, msgs; /* Must have at least 2 MSIX vectors */ msgs = pci_msix_count(dev); if (msgs < 2) goto out; rid = PCIR_BAR(3); adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->msix_mem == NULL) { device_printf(adapter->dev, "Unable to map MSIX table \n"); goto out; } /* ** Want vectors for the queues, ** plus an additional for mailbox. */ want = adapter->num_queues + 1; if (want > msgs) { want = msgs; adapter->num_queues = msgs - 1; } else msgs = want; if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { device_printf(adapter->dev, "Using MSIX interrupts with %d vectors\n", want); return (want); } /* Release in case alloc was insufficient */ pci_release_msi(dev); out: if (adapter->msix_mem != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rid, adapter->msix_mem); adapter->msix_mem = NULL; } device_printf(adapter->dev,"MSIX config error\n"); return (ENXIO); } static int ixv_allocate_pci_resources(struct adapter *adapter) { int rid; device_t dev = adapter->dev; rid = PCIR_BAR(0); adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(adapter->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->pci_mem); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->pci_mem); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; /* Pick up the tuneable queues */ adapter->num_queues = ixv_num_queues; adapter->hw.back = adapter; /* ** Now setup MSI/X, should ** return us the number of ** configured vectors. */ adapter->msix = ixv_setup_msix(adapter); if (adapter->msix == ENXIO) return (ENXIO); else return (0); } static void ixv_free_pci_resources(struct adapter * adapter) { struct ix_queue *que = adapter->queues; device_t dev = adapter->dev; int rid, memrid; memrid = PCIR_BAR(MSIX_82598_BAR); /* ** There is a slight possibility of a failure mode ** in attach that will result in entering this function ** before interrupt resources have been initialized, and ** in that case we do not want to execute the loops below ** We can detect this reliably by the state of the adapter ** res pointer. */ if (adapter->res == NULL) goto mem; /* ** Release all msix queue resources: */ for (int i = 0; i < adapter->num_queues; i++, que++) { rid = que->msix + 1; if (que->tag != NULL) { bus_teardown_intr(dev, que->res, que->tag); que->tag = NULL; } if (que->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); } /* Clean the Legacy or Link interrupt last */ if (adapter->vector) /* we are doing MSIX */ rid = adapter->vector + 1; else (adapter->msix != 0) ? (rid = 1):(rid = 0); if (adapter->tag != NULL) { bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); mem: if (adapter->msix) pci_release_msi(dev); if (adapter->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, memrid, adapter->msix_mem); if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->pci_mem); return; } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static void ixv_setup_interface(device_t dev, struct adapter *adapter) { struct ifnet *ifp; INIT_DEBUGOUT("ixv_setup_interface: begin"); ifp = adapter->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) panic("%s: can not if_alloc()\n", device_get_nameunit(dev)); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_baudrate = 1000000000; ifp->if_init = ixv_init; ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ixv_ioctl; #if __FreeBSD_version >= 800000 ifp->if_transmit = ixgbe_mq_start; ifp->if_qflush = ixgbe_qflush; #else ifp->if_start = ixgbe_start; #endif ifp->if_snd.ifq_maxlen = adapter->num_tx_desc - 2; ether_ifattach(ifp, adapter->hw.mac.addr); adapter->max_frame_size = ifp->if_mtu + IXGBE_MTU_HDR_VLAN; /* * Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_VLAN_HWCSUM; ifp->if_capabilities |= IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; ifp->if_capabilities |= IFCAP_LRO; ifp->if_capenable = ifp->if_capabilities; /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, ixv_media_change, ixv_media_status); ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return; } static void ixv_config_link(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 autoneg; if (hw->mac.ops.check_link) hw->mac.ops.check_link(hw, &autoneg, &adapter->link_up, FALSE); } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void ixv_initialize_transmit_units(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct ixgbe_hw *hw = &adapter->hw; for (int i = 0; i < adapter->num_queues; i++, txr++) { u64 tdba = txr->txdma.dma_paddr; u32 txctrl, txdctl; /* Set WTHRESH to 8, burst writeback */ txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i)); txdctl |= (8 << 16); IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl); /* Set the HW Tx Head and Tail indices */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDH(i), 0); IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDT(i), 0); /* Set Tx Tail register */ txr->tail = IXGBE_VFTDT(i); /* Set Ring parameters */ IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i), (tdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i), (tdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i), adapter->num_tx_desc * sizeof(struct ixgbe_legacy_tx_desc)); txctrl = IXGBE_READ_REG(hw, IXGBE_VFDCA_TXCTRL(i)); txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i), txctrl); /* Now enable */ txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i)); txdctl |= IXGBE_TXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl); } return; } /********************************************************************* * * Setup receive registers and features. * **********************************************************************/ #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 static void ixv_initialize_receive_units(struct adapter *adapter) { struct rx_ring *rxr = adapter->rx_rings; struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; u32 bufsz, rxcsum, psrtype; if (ifp->if_mtu > ETHERMTU) bufsz = 4096 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; else bufsz = 2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; psrtype = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR | IXGBE_PSRTYPE_L2HDR; IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype); /* Tell PF our max_frame size */ ixgbevf_rlpml_set_vf(hw, adapter->max_frame_size); for (int i = 0; i < adapter->num_queues; i++, rxr++) { u64 rdba = rxr->rxdma.dma_paddr; u32 reg, rxdctl; /* Disable the queue */ rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)); rxdctl &= ~IXGBE_RXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl); for (int j = 0; j < 10; j++) { if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)) & IXGBE_RXDCTL_ENABLE) msec_delay(1); else break; } wmb(); /* Setup the Base and Length of the Rx Descriptor Ring */ IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i), (rdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i), (rdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i), adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc)); /* Reset the ring indices */ IXGBE_WRITE_REG(hw, IXGBE_VFRDH(rxr->me), 0); IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), 0); /* Set up the SRRCTL register */ reg = IXGBE_READ_REG(hw, IXGBE_VFSRRCTL(i)); reg &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; reg &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; reg |= bufsz; reg |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), reg); /* Capture Rx Tail index */ rxr->tail = IXGBE_VFRDT(rxr->me); /* Do the queue enabling last */ rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl); for (int k = 0; k < 10; k++) { if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)) & IXGBE_RXDCTL_ENABLE) break; else msec_delay(1); } wmb(); /* Set the Tail Pointer */ #ifdef DEV_NETMAP /* * In netmap mode, we must preserve the buffers made * available to userspace before the if_init() * (this is true by default on the TX side, because * init makes all buffers available to userspace). * * netmap_reset() and the device specific routines * (e.g. ixgbe_setup_receive_rings()) map these * buffers at the end of the NIC ring, so here we * must set the RDT (tail) register to make sure * they are not overwritten. * * In this driver the NIC ring starts at RDH = 0, * RDT points to the last slot available for reception (?), * so RDT = num_rx_desc - 1 means the whole ring is available. */ if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->rx_rings[i]; int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), t); } else #endif /* DEV_NETMAP */ IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), adapter->num_rx_desc - 1); } rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); if (ifp->if_capenable & IFCAP_RXCSUM) rxcsum |= IXGBE_RXCSUM_PCSD; if (!(rxcsum & IXGBE_RXCSUM_PCSD)) rxcsum |= IXGBE_RXCSUM_IPPCSE; IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); return; } static void ixv_setup_vlan_support(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 ctrl, vid, vfta, retry; struct rx_ring *rxr; /* ** We get here thru init_locked, meaning ** a soft reset, this has already cleared ** the VFTA and other state, so if there ** have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* Enable the queues */ for (int i = 0; i < adapter->num_queues; i++) { ctrl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)); ctrl |= IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), ctrl); /* * Let Rx path know that it needs to store VLAN tag * as part of extra mbuf info. */ rxr = &adapter->rx_rings[i]; rxr->vtag_strip = TRUE; } /* ** A soft reset zero's out the VFTA, so ** we need to repopulate it now. */ for (int i = 0; i < IXGBE_VFTA_SIZE; i++) { if (ixv_shadow_vfta[i] == 0) continue; vfta = ixv_shadow_vfta[i]; /* ** Reconstruct the vlan id's ** based on the bits set in each ** of the array ints. */ for (int j = 0; j < 32; j++) { retry = 0; if ((vfta & (1 << j)) == 0) continue; vid = (i * 32) + j; /* Call the shared code mailbox routine */ while (ixgbe_set_vfta(hw, vid, 0, TRUE)) { if (++retry > 5) break; } } } } /* ** This routine is run via an vlan config EVENT, ** it enables us to use the HW Filter table since ** we can get the vlan id. This just creates the ** entry in the soft version of the VFTA, init will ** repopulate the real table. */ static void ixv_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u16 index, bit; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXGBE_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; ixv_shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; /* Re-init to load the changes */ ixv_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); } /* ** This routine is run via an vlan ** unconfig EVENT, remove our entry ** in the soft vfta. */ static void ixv_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u16 index, bit; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXGBE_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; ixv_shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Re-init to load the changes */ ixv_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); } static void ixv_enable_intr(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ix_queue *que = adapter->queues; u32 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); mask = IXGBE_EIMS_ENABLE_MASK; mask &= ~(IXGBE_EIMS_OTHER | IXGBE_EIMS_LSC); IXGBE_WRITE_REG(hw, IXGBE_VTEIAC, mask); for (int i = 0; i < adapter->num_queues; i++, que++) ixv_enable_queue(adapter, que->msix); IXGBE_WRITE_FLUSH(hw); return; } static void ixv_disable_intr(struct adapter *adapter) { IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEIAC, 0); IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEIMC, ~0); IXGBE_WRITE_FLUSH(&adapter->hw); return; } /* ** Setup the correct IVAR register for a particular MSIX interrupt ** - entry is the register array entry ** - vector is the MSIX vector for this queue ** - type is RX/TX/MISC */ static void ixv_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) { struct ixgbe_hw *hw = &adapter->hw; u32 ivar, index; vector |= IXGBE_IVAR_ALLOC_VAL; if (type == -1) { /* MISC IVAR */ ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR_MISC); ivar &= ~0xFF; ivar |= vector; IXGBE_WRITE_REG(hw, IXGBE_VTIVAR_MISC, ivar); } else { /* RX/TX IVARS */ index = (16 * (entry & 1)) + (8 * type); ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR(entry >> 1)); ivar &= ~(0xFF << index); ivar |= (vector << index); IXGBE_WRITE_REG(hw, IXGBE_VTIVAR(entry >> 1), ivar); } } static void ixv_configure_ivars(struct adapter *adapter) { struct ix_queue *que = adapter->queues; for (int i = 0; i < adapter->num_queues; i++, que++) { /* First the RX queue entry */ ixv_set_ivar(adapter, i, que->msix, 0); /* ... and the TX */ ixv_set_ivar(adapter, i, que->msix, 1); /* Set an initial value in EITR */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEITR(que->msix), IXV_EITR_DEFAULT); } /* For the mailbox interrupt */ ixv_set_ivar(adapter, 1, adapter->vector, -1); } /* ** Tasklet handler for MSIX MBX interrupts ** - do outside interrupt since it might sleep */ static void ixv_handle_mbx(void *context, int pending) { struct adapter *adapter = context; ixgbe_check_link(&adapter->hw, &adapter->link_speed, &adapter->link_up, 0); ixv_update_link_status(adapter); } /* ** The VF stats registers never have a truly virgin ** starting point, so this routine tries to make an ** artificial one, marking ground zero on attach as ** it were. */ static void ixv_save_stats(struct adapter *adapter) { if (adapter->stats.vf.vfgprc || adapter->stats.vf.vfgptc) { adapter->stats.vf.saved_reset_vfgprc += adapter->stats.vf.vfgprc - adapter->stats.vf.base_vfgprc; adapter->stats.vf.saved_reset_vfgptc += adapter->stats.vf.vfgptc - adapter->stats.vf.base_vfgptc; adapter->stats.vf.saved_reset_vfgorc += adapter->stats.vf.vfgorc - adapter->stats.vf.base_vfgorc; adapter->stats.vf.saved_reset_vfgotc += adapter->stats.vf.vfgotc - adapter->stats.vf.base_vfgotc; adapter->stats.vf.saved_reset_vfmprc += adapter->stats.vf.vfmprc - adapter->stats.vf.base_vfmprc; } } static void ixv_init_stats(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; adapter->stats.vf.last_vfgprc = IXGBE_READ_REG(hw, IXGBE_VFGPRC); adapter->stats.vf.last_vfgorc = IXGBE_READ_REG(hw, IXGBE_VFGORC_LSB); adapter->stats.vf.last_vfgorc |= (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGORC_MSB))) << 32); adapter->stats.vf.last_vfgptc = IXGBE_READ_REG(hw, IXGBE_VFGPTC); adapter->stats.vf.last_vfgotc = IXGBE_READ_REG(hw, IXGBE_VFGOTC_LSB); adapter->stats.vf.last_vfgotc |= (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGOTC_MSB))) << 32); adapter->stats.vf.last_vfmprc = IXGBE_READ_REG(hw, IXGBE_VFMPRC); adapter->stats.vf.base_vfgprc = adapter->stats.vf.last_vfgprc; adapter->stats.vf.base_vfgorc = adapter->stats.vf.last_vfgorc; adapter->stats.vf.base_vfgptc = adapter->stats.vf.last_vfgptc; adapter->stats.vf.base_vfgotc = adapter->stats.vf.last_vfgotc; adapter->stats.vf.base_vfmprc = adapter->stats.vf.last_vfmprc; } #define UPDATE_STAT_32(reg, last, count) \ { \ u32 current = IXGBE_READ_REG(hw, reg); \ if (current < last) \ count += 0x100000000LL; \ last = current; \ count &= 0xFFFFFFFF00000000LL; \ count |= current; \ } #define UPDATE_STAT_36(lsb, msb, last, count) \ { \ u64 cur_lsb = IXGBE_READ_REG(hw, lsb); \ u64 cur_msb = IXGBE_READ_REG(hw, msb); \ u64 current = ((cur_msb << 32) | cur_lsb); \ if (current < last) \ count += 0x1000000000LL; \ last = current; \ count &= 0xFFFFFFF000000000LL; \ count |= current; \ } /* ** ixv_update_stats - Update the board statistics counters. */ void ixv_update_stats(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; UPDATE_STAT_32(IXGBE_VFGPRC, adapter->stats.vf.last_vfgprc, adapter->stats.vf.vfgprc); UPDATE_STAT_32(IXGBE_VFGPTC, adapter->stats.vf.last_vfgptc, adapter->stats.vf.vfgptc); UPDATE_STAT_36(IXGBE_VFGORC_LSB, IXGBE_VFGORC_MSB, adapter->stats.vf.last_vfgorc, adapter->stats.vf.vfgorc); UPDATE_STAT_36(IXGBE_VFGOTC_LSB, IXGBE_VFGOTC_MSB, adapter->stats.vf.last_vfgotc, adapter->stats.vf.vfgotc); UPDATE_STAT_32(IXGBE_VFMPRC, adapter->stats.vf.last_vfmprc, adapter->stats.vf.vfmprc); } /* * Add statistic sysctls for the VF. */ static void ixv_add_stats_sysctls(struct adapter *adapter) { device_t dev = adapter->dev; struct ix_queue *que = &adapter->queues[0]; struct tx_ring *txr = que->txr; struct rx_ring *rxr = que->rxr; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct ixgbevf_hw_stats *stats = &adapter->stats.vf; struct sysctl_oid *stat_node, *queue_node; struct sysctl_oid_list *stat_list, *queue_list; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &adapter->mbuf_defrag_failed, "m_defrag() failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac", CTLFLAG_RD, NULL, "VF Statistics (read from HW registers)"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd", CTLFLAG_RD, &stats->vfgprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd", CTLFLAG_RD, &stats->vfgorc, "Good Octets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd", CTLFLAG_RD, &stats->vfmprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->vfgptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->vfgotc, "Good Octets Transmitted"); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "que", CTLFLAG_RD, NULL, "Queue Statistics (collected by SW)"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", CTLFLAG_RD, &(que->irqs), "IRQs on queue"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_irqs", CTLFLAG_RD, &(rxr->rx_irq), "RX irqs on queue"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &(rxr->rx_packets), "RX packets"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &(rxr->rx_bytes), "RX bytes"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_discarded", CTLFLAG_RD, &(rxr->rx_discarded), "Discarded RX packets"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &(txr->total_packets), "TX Packets"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_no_desc", CTLFLAG_RD, &(txr->no_desc_avail), "# of times not enough descriptors were available during TX"); } static void ixv_set_sysctl_value(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLFLAG_RW, limit, value, description); } /********************************************************************** * * This routine is called only when em_display_debug_stats is enabled. * This routine provides a way to take a look at important statistics * maintained by the driver and hardware. * **********************************************************************/ static void ixv_print_debug_info(struct adapter *adapter) { device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; struct ix_queue *que = adapter->queues; struct rx_ring *rxr; struct tx_ring *txr; struct lro_ctrl *lro; device_printf(dev,"Error Byte Count = %u \n", IXGBE_READ_REG(hw, IXGBE_ERRBC)); for (int i = 0; i < adapter->num_queues; i++, que++) { txr = que->txr; rxr = que->rxr; lro = &rxr->lro; device_printf(dev,"QUE(%d) IRQs Handled: %lu\n", que->msix, (long)que->irqs); device_printf(dev,"RX(%d) Packets Received: %lld\n", rxr->me, (long long)rxr->rx_packets); device_printf(dev,"RX(%d) Bytes Received: %lu\n", rxr->me, (long)rxr->rx_bytes); device_printf(dev,"RX(%d) LRO Queued= %lld\n", rxr->me, (long long)lro->lro_queued); device_printf(dev,"RX(%d) LRO Flushed= %lld\n", rxr->me, (long long)lro->lro_flushed); device_printf(dev,"TX(%d) Packets Sent: %lu\n", txr->me, (long)txr->total_packets); device_printf(dev,"TX(%d) NO Desc Avail: %lu\n", txr->me, (long)txr->no_desc_avail); } device_printf(dev,"MBX IRQ Handled: %lu\n", (long)adapter->link_irq); return; } static int ixv_sysctl_debug(SYSCTL_HANDLER_ARGS) { int error, result; struct adapter *adapter; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { adapter = (struct adapter *) arg1; ixv_print_debug_info(adapter); } return error; } Index: head/sys/dev/ixl/if_ixl.c =================================================================== --- head/sys/dev/ixl/if_ixl.c (revision 302383) +++ head/sys/dev/ixl/if_ixl.c (revision 302384) @@ -1,7478 +1,7479 @@ /****************************************************************************** Copyright (c) 2013-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef IXL_STANDALONE_BUILD #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #endif #include "ixl.h" #include "ixl_pf.h" #ifdef RSS #include #endif /********************************************************************* * Driver version *********************************************************************/ char ixl_driver_version[] = "1.4.27-k"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into ixl_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static ixl_vendor_info_t ixl_vendor_info_array[] = { {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_XL710, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_B, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_C, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_A, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_B, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_C, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T4, 0, 0, 0}, /* required last entry */ {0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings *********************************************************************/ static char *ixl_strings[] = { "Intel(R) Ethernet Connection XL710 Driver" }; /********************************************************************* * Function prototypes *********************************************************************/ static int ixl_probe(device_t); static int ixl_attach(device_t); static int ixl_detach(device_t); static int ixl_shutdown(device_t); static int ixl_get_hw_capabilities(struct ixl_pf *); static void ixl_cap_txcsum_tso(struct ixl_vsi *, struct ifnet *, int); static int ixl_ioctl(struct ifnet *, u_long, caddr_t); static void ixl_init(void *); static void ixl_init_locked(struct ixl_pf *); static void ixl_stop(struct ixl_pf *); static void ixl_stop_locked(struct ixl_pf *); static void ixl_media_status(struct ifnet *, struct ifmediareq *); static int ixl_media_change(struct ifnet *); static void ixl_update_link_status(struct ixl_pf *); static int ixl_allocate_pci_resources(struct ixl_pf *); static u16 ixl_get_bus_info(struct i40e_hw *, device_t); static int ixl_setup_stations(struct ixl_pf *); static int ixl_switch_config(struct ixl_pf *); static int ixl_initialize_vsi(struct ixl_vsi *); static int ixl_setup_adminq_msix(struct ixl_pf *); static int ixl_setup_adminq_tq(struct ixl_pf *); static int ixl_setup_queue_msix(struct ixl_vsi *); static int ixl_setup_queue_tqs(struct ixl_vsi *); static int ixl_teardown_adminq_msix(struct ixl_pf *); static int ixl_teardown_queue_msix(struct ixl_vsi *); static void ixl_configure_intr0_msix(struct ixl_pf *); static void ixl_configure_queue_intr_msix(struct ixl_pf *); static void ixl_free_queue_tqs(struct ixl_vsi *); static void ixl_free_adminq_tq(struct ixl_pf *); static int ixl_assign_vsi_legacy(struct ixl_pf *); static int ixl_init_msix(struct ixl_pf *); static void ixl_configure_itr(struct ixl_pf *); static void ixl_configure_legacy(struct ixl_pf *); static void ixl_free_pci_resources(struct ixl_pf *); static void ixl_local_timer(void *); static int ixl_setup_interface(device_t, struct ixl_vsi *); static void ixl_link_event(struct ixl_pf *, struct i40e_arq_event_info *); static void ixl_config_rss(struct ixl_vsi *); static void ixl_set_queue_rx_itr(struct ixl_queue *); static void ixl_set_queue_tx_itr(struct ixl_queue *); static int ixl_set_advertised_speeds(struct ixl_pf *, int); static void ixl_get_initial_advertised_speeds(struct ixl_pf *); static int ixl_enable_rings(struct ixl_vsi *); static int ixl_disable_rings(struct ixl_vsi *); static void ixl_enable_intr(struct ixl_vsi *); static void ixl_disable_intr(struct ixl_vsi *); static void ixl_disable_rings_intr(struct ixl_vsi *); static void ixl_enable_adminq(struct i40e_hw *); static void ixl_disable_adminq(struct i40e_hw *); static void ixl_enable_queue(struct i40e_hw *, int); static void ixl_disable_queue(struct i40e_hw *, int); static void ixl_enable_legacy(struct i40e_hw *); static void ixl_disable_legacy(struct i40e_hw *); static void ixl_set_promisc(struct ixl_vsi *); static void ixl_add_multi(struct ixl_vsi *); static void ixl_del_multi(struct ixl_vsi *); static void ixl_register_vlan(void *, struct ifnet *, u16); static void ixl_unregister_vlan(void *, struct ifnet *, u16); static void ixl_setup_vlan_filters(struct ixl_vsi *); static void ixl_init_filters(struct ixl_vsi *); static void ixl_reconfigure_filters(struct ixl_vsi *vsi); static void ixl_add_filter(struct ixl_vsi *, u8 *, s16 vlan); static void ixl_del_filter(struct ixl_vsi *, u8 *, s16 vlan); static void ixl_add_hw_filters(struct ixl_vsi *, int, int); static void ixl_del_hw_filters(struct ixl_vsi *, int); static struct ixl_mac_filter * ixl_find_filter(struct ixl_vsi *, u8 *, s16); static void ixl_add_mc_filter(struct ixl_vsi *, u8 *); static void ixl_free_mac_filters(struct ixl_vsi *vsi); /* Sysctls*/ static void ixl_add_device_sysctls(struct ixl_pf *); static int ixl_set_flowcntl(SYSCTL_HANDLER_ARGS); static int ixl_set_advertise(SYSCTL_HANDLER_ARGS); static int ixl_current_speed(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_show_fw(SYSCTL_HANDLER_ARGS); #ifdef IXL_DEBUG_SYSCTL static int ixl_debug_info(SYSCTL_HANDLER_ARGS); static void ixl_print_debug_info(struct ixl_pf *); static int ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_phy_abilities(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_switch_config(SYSCTL_HANDLER_ARGS); #endif /* The MSI/X Interrupt handlers */ static void ixl_intr(void *); static void ixl_msix_que(void *); static void ixl_msix_adminq(void *); static void ixl_handle_mdd_event(struct ixl_pf *); /* Deferred interrupt tasklets */ static void ixl_do_adminq(void *, int); /* Statistics */ static void ixl_add_hw_stats(struct ixl_pf *); static void ixl_add_sysctls_mac_stats(struct sysctl_ctx_list *, struct sysctl_oid_list *, struct i40e_hw_port_stats *); static void ixl_add_sysctls_eth_stats(struct sysctl_ctx_list *, struct sysctl_oid_list *, struct i40e_eth_stats *); static void ixl_update_stats_counters(struct ixl_pf *); static void ixl_update_eth_stats(struct ixl_vsi *); static void ixl_update_vsi_stats(struct ixl_vsi *); static void ixl_pf_reset_stats(struct ixl_pf *); static void ixl_vsi_reset_stats(struct ixl_vsi *); static void ixl_stat_update48(struct i40e_hw *, u32, u32, bool, u64 *, u64 *); static void ixl_stat_update32(struct i40e_hw *, u32, bool, u64 *, u64 *); /* NVM update */ static int ixl_handle_nvmupd_cmd(struct ixl_pf *, struct ifdrv *); static void ixl_handle_empr_reset(struct ixl_pf *); static int ixl_rebuild_hw_structs_after_reset(struct ixl_pf *); /* Debug helper functions */ #ifdef IXL_DEBUG static void ixl_print_nvm_cmd(device_t, struct i40e_nvm_access *); #endif #ifdef PCI_IOV static int ixl_adminq_err_to_errno(enum i40e_admin_queue_err err); static int ixl_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t*); static void ixl_iov_uninit(device_t dev); static int ixl_add_vf(device_t dev, uint16_t vfnum, const nvlist_t*); static void ixl_handle_vf_msg(struct ixl_pf *, struct i40e_arq_event_info *); static void ixl_handle_vflr(void *arg, int pending); static void ixl_reset_vf(struct ixl_pf *pf, struct ixl_vf *vf); static void ixl_reinit_vf(struct ixl_pf *pf, struct ixl_vf *vf); #endif /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t ixl_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ixl_probe), DEVMETHOD(device_attach, ixl_attach), DEVMETHOD(device_detach, ixl_detach), DEVMETHOD(device_shutdown, ixl_shutdown), #ifdef PCI_IOV DEVMETHOD(pci_iov_init, ixl_iov_init), DEVMETHOD(pci_iov_uninit, ixl_iov_uninit), DEVMETHOD(pci_iov_add_vf, ixl_add_vf), #endif {0, 0} }; static driver_t ixl_driver = { "ixl", ixl_methods, sizeof(struct ixl_pf), }; devclass_t ixl_devclass; DRIVER_MODULE(ixl, pci, ixl_driver, ixl_devclass, 0, 0); MODULE_DEPEND(ixl, pci, 1, 1, 1); MODULE_DEPEND(ixl, ether, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(ixl, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ /* ** Global reset mutex */ static struct mtx ixl_reset_mtx; /* ** TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, ixl, CTLFLAG_RD, 0, "IXL driver parameters"); /* * MSIX should be the default for best performance, * but this allows it to be forced off for testing. */ static int ixl_enable_msix = 1; TUNABLE_INT("hw.ixl.enable_msix", &ixl_enable_msix); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixl_enable_msix, 0, "Enable MSI-X interrupts"); /* ** Number of descriptors per ring: ** - TX and RX are the same size */ static int ixl_ringsz = DEFAULT_RING; TUNABLE_INT("hw.ixl.ringsz", &ixl_ringsz); SYSCTL_INT(_hw_ixl, OID_AUTO, ring_size, CTLFLAG_RDTUN, &ixl_ringsz, 0, "Descriptor Ring Size"); /* ** This can be set manually, if left as 0 the ** number of queues will be calculated based ** on cpus and msix vectors available. */ int ixl_max_queues = 0; TUNABLE_INT("hw.ixl.max_queues", &ixl_max_queues); SYSCTL_INT(_hw_ixl, OID_AUTO, max_queues, CTLFLAG_RDTUN, &ixl_max_queues, 0, "Number of Queues"); /* ** Controls for Interrupt Throttling ** - true/false for dynamic adjustment ** - default values for static ITR */ int ixl_dynamic_rx_itr = 1; TUNABLE_INT("hw.ixl.dynamic_rx_itr", &ixl_dynamic_rx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, dynamic_rx_itr, CTLFLAG_RDTUN, &ixl_dynamic_rx_itr, 0, "Dynamic RX Interrupt Rate"); int ixl_dynamic_tx_itr = 1; TUNABLE_INT("hw.ixl.dynamic_tx_itr", &ixl_dynamic_tx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, dynamic_tx_itr, CTLFLAG_RDTUN, &ixl_dynamic_tx_itr, 0, "Dynamic TX Interrupt Rate"); int ixl_rx_itr = IXL_ITR_8K; TUNABLE_INT("hw.ixl.rx_itr", &ixl_rx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, rx_itr, CTLFLAG_RDTUN, &ixl_rx_itr, 0, "RX Interrupt Rate"); int ixl_tx_itr = IXL_ITR_4K; TUNABLE_INT("hw.ixl.tx_itr", &ixl_tx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, tx_itr, CTLFLAG_RDTUN, &ixl_tx_itr, 0, "TX Interrupt Rate"); #ifdef IXL_FDIR static int ixl_enable_fdir = 1; TUNABLE_INT("hw.ixl.enable_fdir", &ixl_enable_fdir); /* Rate at which we sample */ int ixl_atr_rate = 20; TUNABLE_INT("hw.ixl.atr_rate", &ixl_atr_rate); #endif #ifdef DEV_NETMAP #define NETMAP_IXL_MAIN /* only bring in one part of the netmap code */ #include #endif /* DEV_NETMAP */ static char *ixl_fc_string[6] = { "None", "Rx", "Tx", "Full", "Priority", "Default" }; static MALLOC_DEFINE(M_IXL, "ixl", "ixl driver allocations"); static uint8_t ixl_bcast_addr[ETHER_ADDR_LEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; /********************************************************************* * Device identification routine * * ixl_probe determines if the driver should be loaded on * the hardware based on PCI vendor/device id of the device. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int ixl_probe(device_t dev) { ixl_vendor_info_t *ent; u16 pci_vendor_id, pci_device_id; u16 pci_subvendor_id, pci_subdevice_id; char device_name[256]; static bool lock_init = FALSE; #if 0 INIT_DEBUGOUT("ixl_probe: begin"); #endif pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != I40E_INTEL_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = ixl_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == 0)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == 0))) { sprintf(device_name, "%s, Version - %s", ixl_strings[ent->index], ixl_driver_version); device_set_desc_copy(dev, device_name); /* One shot mutex init */ if (lock_init == FALSE) { lock_init = TRUE; mtx_init(&ixl_reset_mtx, "ixl_reset", "IXL RESET Lock", MTX_DEF); } return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int ixl_attach(device_t dev) { struct ixl_pf *pf; struct i40e_hw *hw; struct ixl_vsi *vsi; u16 bus; int error = 0; #ifdef PCI_IOV nvlist_t *pf_schema, *vf_schema; int iov_error; #endif INIT_DEBUGOUT("ixl_attach: begin"); /* Allocate, clear, and link in our primary soft structure */ pf = device_get_softc(dev); pf->dev = pf->osdep.dev = dev; hw = &pf->hw; /* ** Note this assumes we have a single embedded VSI, ** this could be enhanced later to allocate multiple */ vsi = &pf->vsi; vsi->dev = pf->dev; /* Core Lock Init*/ IXL_PF_LOCK_INIT(pf, device_get_nameunit(dev)); /* Set up the timer callout */ callout_init_mtx(&pf->timer, &pf->pf_mtx, 0); /* Save off the PCI information */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); hw->bus.device = pci_get_slot(dev); hw->bus.func = pci_get_function(dev); pf->vc_debug_lvl = 1; /* Do PCI setup - map BAR0, etc */ if (ixl_allocate_pci_resources(pf)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_out; } /* Establish a clean starting point */ i40e_clear_hw(hw); error = i40e_pf_reset(hw); if (error) { device_printf(dev, "PF reset failure %d\n", error); error = EIO; goto err_out; } /* Set admin queue parameters */ hw->aq.num_arq_entries = IXL_AQ_LEN; hw->aq.num_asq_entries = IXL_AQ_LEN; hw->aq.arq_buf_size = IXL_AQ_BUFSZ; hw->aq.asq_buf_size = IXL_AQ_BUFSZ; /* Initialize mac filter list for VSI */ SLIST_INIT(&vsi->ftl); /* Initialize the shared code */ error = i40e_init_shared_code(hw); if (error) { device_printf(dev, "Unable to initialize shared code, error %d\n", error); error = EIO; goto err_out; } /* Set up the admin queue */ error = i40e_init_adminq(hw); if (error != 0 && error != I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "Unable to initialize Admin Queue, error %d\n", error); error = EIO; goto err_out; } ixl_print_nvm_version(pf); if (error == I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "The driver for the device stopped " "because the NVM image is newer than expected.\n" "You must install the most recent version of " "the network driver.\n"); error = EIO; goto err_out; } if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR) device_printf(dev, "The driver for the device detected " "a newer version of the NVM image than expected.\n" "Please install the most recent version of the network driver.\n"); else if (hw->aq.api_maj_ver < I40E_FW_API_VERSION_MAJOR || hw->aq.api_min_ver < (I40E_FW_API_VERSION_MINOR - 1)) device_printf(dev, "The driver for the device detected " "an older version of the NVM image than expected.\n" "Please update the NVM image.\n"); /* Clear PXE mode */ i40e_clear_pxe_mode(hw); /* Get capabilities from the device */ error = ixl_get_hw_capabilities(pf); if (error) { device_printf(dev, "HW capabilities failure!\n"); goto err_get_cap; } /* Set up host memory cache */ error = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (error) { device_printf(dev, "init_lan_hmc failed: %d\n", error); goto err_get_cap; } error = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (error) { device_printf(dev, "configure_lan_hmc failed: %d\n", error); goto err_mac_hmc; } /* Disable LLDP from the firmware for certain NVM versions */ if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) || (pf->hw.aq.fw_maj_ver < 4)) i40e_aq_stop_lldp(hw, TRUE, NULL); i40e_get_mac_addr(hw, hw->mac.addr); error = i40e_validate_mac_addr(hw->mac.addr); if (error) { device_printf(dev, "validate_mac_addr failed: %d\n", error); goto err_mac_hmc; } bcopy(hw->mac.addr, hw->mac.perm_addr, ETHER_ADDR_LEN); i40e_get_port_mac_addr(hw, hw->mac.port_addr); /* Set up VSI and queues */ if (ixl_setup_stations(pf) != 0) { device_printf(dev, "setup stations failed!\n"); error = ENOMEM; goto err_mac_hmc; } if (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) || (hw->aq.fw_maj_ver < 4)) { i40e_msec_delay(75); error = i40e_aq_set_link_restart_an(hw, TRUE, NULL); if (error) { device_printf(dev, "link restart failed, aq_err=%d\n", pf->hw.aq.asq_last_status); goto err_late; } } /* Determine link state */ hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); /* Setup OS network interface / ifnet */ if (ixl_setup_interface(dev, vsi) != 0) { device_printf(dev, "interface setup failed!\n"); error = EIO; goto err_late; } error = ixl_switch_config(pf); if (error) { device_printf(dev, "Initial ixl_switch_config() failed: %d\n", error); goto err_late; } /* Limit PHY interrupts to link, autoneg, and modules failure */ error = i40e_aq_set_phy_int_mask(hw, IXL_DEFAULT_PHY_INT_MASK, NULL); if (error) { device_printf(dev, "i40e_aq_set_phy_mask() failed: err %d," " aq_err %d\n", error, hw->aq.asq_last_status); goto err_late; } /* Get the bus configuration and set the shared code's config */ bus = ixl_get_bus_info(hw, dev); i40e_set_pci_config_data(hw, bus); /* * In MSI-X mode, initialize the Admin Queue interrupt, * so userland tools can communicate with the adapter regardless of * the ifnet interface's status. */ if (pf->msix > 1) { error = ixl_setup_adminq_msix(pf); if (error) { device_printf(dev, "ixl_setup_adminq_msix error: %d\n", error); goto err_late; } error = ixl_setup_adminq_tq(pf); if (error) { device_printf(dev, "ixl_setup_adminq_tq error: %d\n", error); goto err_late; } ixl_configure_intr0_msix(pf); ixl_enable_adminq(hw); } /* Initialize statistics & add sysctls */ ixl_add_device_sysctls(pf); ixl_pf_reset_stats(pf); ixl_update_stats_counters(pf); ixl_add_hw_stats(pf); /* Register for VLAN events */ vsi->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, ixl_register_vlan, vsi, EVENTHANDLER_PRI_FIRST); vsi->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, ixl_unregister_vlan, vsi, EVENTHANDLER_PRI_FIRST); #ifdef PCI_IOV /* SR-IOV is only supported when MSI-X is in use. */ if (pf->msix > 1) { pf_schema = pci_iov_schema_alloc_node(); vf_schema = pci_iov_schema_alloc_node(); pci_iov_schema_add_unicast_mac(vf_schema, "mac-addr", 0, NULL); pci_iov_schema_add_bool(vf_schema, "mac-anti-spoof", IOV_SCHEMA_HASDEFAULT, TRUE); pci_iov_schema_add_bool(vf_schema, "allow-set-mac", IOV_SCHEMA_HASDEFAULT, FALSE); pci_iov_schema_add_bool(vf_schema, "allow-promisc", IOV_SCHEMA_HASDEFAULT, FALSE); iov_error = pci_iov_attach(dev, pf_schema, vf_schema); if (iov_error != 0) { device_printf(dev, "Failed to initialize SR-IOV (error=%d)\n", iov_error); } else device_printf(dev, "SR-IOV ready\n"); } #endif #ifdef DEV_NETMAP ixl_netmap_attach(vsi); #endif /* DEV_NETMAP */ INIT_DEBUGOUT("ixl_attach: end"); return (0); err_late: if (vsi->ifp != NULL) if_free(vsi->ifp); err_mac_hmc: i40e_shutdown_lan_hmc(hw); err_get_cap: i40e_shutdown_adminq(hw); err_out: ixl_free_pci_resources(pf); ixl_free_vsi(vsi); IXL_PF_LOCK_DESTROY(pf); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int ixl_detach(device_t dev) { struct ixl_pf *pf = device_get_softc(dev); struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; enum i40e_status_code status; #ifdef PCI_IOV int error; #endif INIT_DEBUGOUT("ixl_detach: begin"); /* Make sure VLANS are not using driver */ if (vsi->ifp->if_vlantrunk != NULL) { device_printf(dev, "Vlan in use, detach first\n"); return (EBUSY); } #ifdef PCI_IOV error = pci_iov_detach(dev); if (error != 0) { device_printf(dev, "SR-IOV in use; detach first.\n"); return (error); } #endif ether_ifdetach(vsi->ifp); if (vsi->ifp->if_drv_flags & IFF_DRV_RUNNING) ixl_stop(pf); ixl_free_queue_tqs(vsi); /* Shutdown LAN HMC */ status = i40e_shutdown_lan_hmc(hw); if (status) device_printf(dev, "Shutdown LAN HMC failed with code %d\n", status); /* Shutdown admin queue */ ixl_disable_adminq(hw); ixl_free_adminq_tq(pf); ixl_teardown_adminq_msix(pf); status = i40e_shutdown_adminq(hw); if (status) device_printf(dev, "Shutdown Admin queue failed with code %d\n", status); /* Unregister VLAN events */ if (vsi->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, vsi->vlan_attach); if (vsi->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, vsi->vlan_detach); callout_drain(&pf->timer); #ifdef DEV_NETMAP netmap_detach(vsi->ifp); #endif /* DEV_NETMAP */ ixl_free_pci_resources(pf); bus_generic_detach(dev); if_free(vsi->ifp); ixl_free_vsi(vsi); IXL_PF_LOCK_DESTROY(pf); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int ixl_shutdown(device_t dev) { struct ixl_pf *pf = device_get_softc(dev); ixl_stop(pf); return (0); } /********************************************************************* * * Get the hardware capabilities * **********************************************************************/ static int ixl_get_hw_capabilities(struct ixl_pf *pf) { struct i40e_aqc_list_capabilities_element_resp *buf; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int error, len; u16 needed; bool again = TRUE; len = 40 * sizeof(struct i40e_aqc_list_capabilities_element_resp); retry: if (!(buf = (struct i40e_aqc_list_capabilities_element_resp *) malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate cap memory\n"); return (ENOMEM); } /* This populates the hw struct */ error = i40e_aq_discover_capabilities(hw, buf, len, &needed, i40e_aqc_opc_list_func_capabilities, NULL); free(buf, M_DEVBUF); if ((pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOMEM) && (again == TRUE)) { /* retry once with a larger buffer */ again = FALSE; len = needed; goto retry; } else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK) { device_printf(dev, "capability discovery failed: %d\n", pf->hw.aq.asq_last_status); return (ENODEV); } /* Capture this PF's starting queue pair */ pf->qbase = hw->func_caps.base_queue; #ifdef IXL_DEBUG device_printf(dev, "pf_id=%d, num_vfs=%d, msix_pf=%d, " "msix_vf=%d, fd_g=%d, fd_b=%d, tx_qp=%d rx_qp=%d qbase=%d\n", hw->pf_id, hw->func_caps.num_vfs, hw->func_caps.num_msix_vectors, hw->func_caps.num_msix_vectors_vf, hw->func_caps.fd_filters_guaranteed, hw->func_caps.fd_filters_best_effort, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, hw->func_caps.base_queue); #endif return (error); } static void ixl_cap_txcsum_tso(struct ixl_vsi *vsi, struct ifnet *ifp, int mask) { device_t dev = vsi->dev; /* Enable/disable TXCSUM/TSO4 */ if (!(ifp->if_capenable & IFCAP_TXCSUM) && !(ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) { ifp->if_capenable |= IFCAP_TXCSUM; /* enable TXCSUM, restore TSO if previously enabled */ if (vsi->flags & IXL_FLAGS_KEEP_TSO4) { vsi->flags &= ~IXL_FLAGS_KEEP_TSO4; ifp->if_capenable |= IFCAP_TSO4; } } else if (mask & IFCAP_TSO4) { ifp->if_capenable |= (IFCAP_TXCSUM | IFCAP_TSO4); vsi->flags &= ~IXL_FLAGS_KEEP_TSO4; device_printf(dev, "TSO4 requires txcsum, enabling both...\n"); } } else if((ifp->if_capenable & IFCAP_TXCSUM) && !(ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) ifp->if_capenable &= ~IFCAP_TXCSUM; else if (mask & IFCAP_TSO4) ifp->if_capenable |= IFCAP_TSO4; } else if((ifp->if_capenable & IFCAP_TXCSUM) && (ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) { vsi->flags |= IXL_FLAGS_KEEP_TSO4; ifp->if_capenable &= ~(IFCAP_TXCSUM | IFCAP_TSO4); device_printf(dev, "TSO4 requires txcsum, disabling both...\n"); } else if (mask & IFCAP_TSO4) ifp->if_capenable &= ~IFCAP_TSO4; } /* Enable/disable TXCSUM_IPV6/TSO6 */ if (!(ifp->if_capenable & IFCAP_TXCSUM_IPV6) && !(ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable |= IFCAP_TXCSUM_IPV6; if (vsi->flags & IXL_FLAGS_KEEP_TSO6) { vsi->flags &= ~IXL_FLAGS_KEEP_TSO6; ifp->if_capenable |= IFCAP_TSO6; } } else if (mask & IFCAP_TSO6) { ifp->if_capenable |= (IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); vsi->flags &= ~IXL_FLAGS_KEEP_TSO6; device_printf(dev, "TSO6 requires txcsum6, enabling both...\n"); } } else if((ifp->if_capenable & IFCAP_TXCSUM_IPV6) && !(ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) ifp->if_capenable &= ~IFCAP_TXCSUM_IPV6; else if (mask & IFCAP_TSO6) ifp->if_capenable |= IFCAP_TSO6; } else if ((ifp->if_capenable & IFCAP_TXCSUM_IPV6) && (ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) { vsi->flags |= IXL_FLAGS_KEEP_TSO6; ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); device_printf(dev, "TSO6 requires txcsum6, disabling both...\n"); } else if (mask & IFCAP_TSO6) ifp->if_capenable &= ~IFCAP_TSO6; } } /********************************************************************* * Ioctl entry point * * ixl_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int ixl_ioctl(struct ifnet * ifp, u_long command, caddr_t data) { struct ixl_vsi *vsi = ifp->if_softc; struct ixl_pf *pf = vsi->back; struct ifreq *ifr = (struct ifreq *)data; struct ifdrv *ifd = (struct ifdrv *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; bool avoid_reset = FALSE; #endif int error = 0; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif #if defined(INET) || defined(INET6) /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ixl_init(pf); #ifdef INET if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; #endif case SIOCSIFMTU: IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (ifr->ifr_mtu > IXL_MAX_FRAME - ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN) { error = EINVAL; } else { IXL_PF_LOCK(pf); ifp->if_mtu = ifr->ifr_mtu; vsi->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; - ixl_init_locked(pf); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + ixl_init_locked(pf); IXL_PF_UNLOCK(pf); } break; case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)"); IXL_PF_LOCK(pf); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ pf->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { ixl_set_promisc(vsi); } } else { IXL_PF_UNLOCK(pf); ixl_init(pf); IXL_PF_LOCK(pf); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXL_PF_UNLOCK(pf); ixl_stop(pf); IXL_PF_LOCK(pf); } } pf->if_flags = ifp->if_flags; IXL_PF_UNLOCK(pf); break; case SIOCSDRVSPEC: case SIOCGDRVSPEC: IOCTL_DEBUGOUT("ioctl: SIOCxDRVSPEC (Get/Set Driver-specific " "Info)\n"); /* NVM update command */ if (ifd->ifd_cmd == I40E_NVM_ACCESS) error = ixl_handle_nvmupd_cmd(pf, ifd); else error = EINVAL; break; case SIOCADDMULTI: IOCTL_DEBUGOUT("ioctl: SIOCADDMULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXL_PF_LOCK(pf); ixl_disable_intr(vsi); ixl_add_multi(vsi); ixl_enable_intr(vsi); IXL_PF_UNLOCK(pf); } break; case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl: SIOCDELMULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXL_PF_LOCK(pf); ixl_disable_intr(vsi); ixl_del_multi(vsi); ixl_enable_intr(vsi); IXL_PF_UNLOCK(pf); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: #ifdef IFM_ETH_XTYPE case SIOCGIFXMEDIA: #endif IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &vsi->media, command); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)"); ixl_cap_txcsum_tso(vsi, ifp, mask); if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXL_PF_LOCK(pf); ixl_init_locked(pf); IXL_PF_UNLOCK(pf); } VLAN_CAPABILITIES(ifp); break; } default: IOCTL_DEBUGOUT("ioctl: UNKNOWN (0x%X)\n", (int)command); error = ether_ioctl(ifp, command, data); break; } return (error); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void ixl_init_locked(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ifnet *ifp = vsi->ifp; device_t dev = pf->dev; struct i40e_filter_control_settings filter; u8 tmpaddr[ETHER_ADDR_LEN]; int ret; mtx_assert(&pf->pf_mtx, MA_OWNED); INIT_DEBUGOUT("ixl_init_locked: begin"); ixl_stop_locked(pf); /* Get the latest mac address... User might use a LAA */ bcopy(IF_LLADDR(vsi->ifp), tmpaddr, I40E_ETH_LENGTH_OF_ADDRESS); if (!cmp_etheraddr(hw->mac.addr, tmpaddr) && (i40e_validate_mac_addr(tmpaddr) == I40E_SUCCESS)) { ixl_del_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); bcopy(tmpaddr, hw->mac.addr, I40E_ETH_LENGTH_OF_ADDRESS); ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_ONLY, hw->mac.addr, NULL); if (ret) { device_printf(dev, "LLA address" "change failed!!\n"); return; } } ixl_add_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); /* Set the various hardware offload abilities */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6); /* Set up the device filtering */ bzero(&filter, sizeof(filter)); filter.enable_ethtype = TRUE; filter.enable_macvlan = TRUE; #ifdef IXL_FDIR filter.enable_fdir = TRUE; #endif filter.hash_lut_size = I40E_HASH_LUT_SIZE_512; if (i40e_set_filter_control(hw, &filter)) device_printf(dev, "i40e_set_filter_control() failed\n"); /* Set up RSS */ ixl_config_rss(vsi); /* Prepare the VSI: rings, hmc contexts, etc... */ if (ixl_initialize_vsi(vsi)) { device_printf(dev, "initialize vsi failed!!\n"); return; } /* Add protocol filters to list */ ixl_init_filters(vsi); /* Setup vlan's if needed */ ixl_setup_vlan_filters(vsi); /* Set up MSI/X routing and the ITR settings */ if (ixl_enable_msix) { ixl_configure_queue_intr_msix(pf); ixl_configure_itr(pf); } else ixl_configure_legacy(pf); ixl_enable_rings(vsi); i40e_aq_set_default_vsi(hw, vsi->seid, NULL); ixl_reconfigure_filters(vsi); /* And now turn on interrupts */ ixl_enable_intr(vsi); /* Get link info */ hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); ixl_update_link_status(pf); /* Set initial advertised speed sysctl value */ ixl_get_initial_advertised_speeds(pf); /* Start the local timer */ callout_reset(&pf->timer, hz, ixl_local_timer, pf); /* Now inform the stack we're ready */ ifp->if_drv_flags |= IFF_DRV_RUNNING; return; } /* For the set_advertise sysctl */ static void ixl_get_initial_advertised_speeds(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; enum i40e_status_code status; struct i40e_aq_get_phy_abilities_resp abilities; /* Set initial sysctl values */ status = i40e_aq_get_phy_capabilities(hw, FALSE, false, &abilities, NULL); if (status) { /* Non-fatal error */ device_printf(dev, "%s: i40e_aq_get_phy_capabilities() error %d\n", __func__, status); return; } if (abilities.link_speed & I40E_LINK_SPEED_40GB) pf->advertised_speed |= 0x10; if (abilities.link_speed & I40E_LINK_SPEED_20GB) pf->advertised_speed |= 0x8; if (abilities.link_speed & I40E_LINK_SPEED_10GB) pf->advertised_speed |= 0x4; if (abilities.link_speed & I40E_LINK_SPEED_1GB) pf->advertised_speed |= 0x2; if (abilities.link_speed & I40E_LINK_SPEED_100MB) pf->advertised_speed |= 0x1; } static int ixl_teardown_hw_structs(struct ixl_pf *pf) { enum i40e_status_code status = 0; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; /* Shutdown LAN HMC */ if (hw->hmc.hmc_obj) { status = i40e_shutdown_lan_hmc(hw); if (status) { device_printf(dev, "init: LAN HMC shutdown failure; status %d\n", status); goto err_out; } } // XXX: This gets called when we know the adminq is inactive; // so we already know it's setup when we get here. /* Shutdown admin queue */ status = i40e_shutdown_adminq(hw); if (status) device_printf(dev, "init: Admin Queue shutdown failure; status %d\n", status); err_out: return (status); } static int ixl_reset(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u8 set_fc_err_mask; int error = 0; // XXX: clear_hw() actually writes to hw registers -- maybe this isn't necessary i40e_clear_hw(hw); error = i40e_pf_reset(hw); if (error) { device_printf(dev, "init: PF reset failure"); error = EIO; goto err_out; } error = i40e_init_adminq(hw); if (error) { device_printf(dev, "init: Admin queue init failure;" " status code %d", error); error = EIO; goto err_out; } i40e_clear_pxe_mode(hw); error = ixl_get_hw_capabilities(pf); if (error) { device_printf(dev, "init: Error retrieving HW capabilities;" " status code %d\n", error); goto err_out; } error = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (error) { device_printf(dev, "init: LAN HMC init failed; status code %d\n", error); error = EIO; goto err_out; } error = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (error) { device_printf(dev, "init: LAN HMC config failed; status code %d\n", error); error = EIO; goto err_out; } // XXX: possible fix for panic, but our failure recovery is still broken error = ixl_switch_config(pf); if (error) { device_printf(dev, "init: ixl_switch_config() failed: %d\n", error); goto err_out; } error = i40e_aq_set_phy_int_mask(hw, IXL_DEFAULT_PHY_INT_MASK, NULL); if (error) { device_printf(dev, "init: i40e_aq_set_phy_mask() failed: err %d," " aq_err %d\n", error, hw->aq.asq_last_status); error = EIO; goto err_out; } error = i40e_set_fc(hw, &set_fc_err_mask, true); if (error) { device_printf(dev, "init: setting link flow control failed; retcode %d," " fc_err_mask 0x%02x\n", error, set_fc_err_mask); goto err_out; } // XXX: (Rebuild VSIs?) /* Firmware delay workaround */ if (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) || (hw->aq.fw_maj_ver < 4)) { i40e_msec_delay(75); error = i40e_aq_set_link_restart_an(hw, TRUE, NULL); if (error) { device_printf(dev, "init: link restart failed, aq_err %d\n", hw->aq.asq_last_status); goto err_out; } } err_out: return (error); } static void ixl_init(void *arg) { struct ixl_pf *pf = arg; struct ixl_vsi *vsi = &pf->vsi; device_t dev = pf->dev; int error = 0; /* * If the aq is dead here, it probably means something outside of the driver * did something to the adapter, like a PF reset. * So rebuild the driver's state here if that occurs. */ if (!i40e_check_asq_alive(&pf->hw)) { device_printf(dev, "Admin Queue is down; resetting...\n"); IXL_PF_LOCK(pf); ixl_teardown_hw_structs(pf); ixl_reset(pf); IXL_PF_UNLOCK(pf); } /* * Set up LAN queue interrupts here. * Kernel interrupt setup functions cannot be called while holding a lock, * so this is done outside of init_locked(). */ if (pf->msix > 1) { /* Teardown existing interrupts, if they exist */ ixl_teardown_queue_msix(vsi); ixl_free_queue_tqs(vsi); /* Then set them up again */ error = ixl_setup_queue_msix(vsi); if (error) device_printf(dev, "ixl_setup_queue_msix() error: %d\n", error); error = ixl_setup_queue_tqs(vsi); if (error) device_printf(dev, "ixl_setup_queue_tqs() error: %d\n", error); } else // possibly broken error = ixl_assign_vsi_legacy(pf); if (error) { device_printf(pf->dev, "assign_vsi_msix/legacy error: %d\n", error); return; } IXL_PF_LOCK(pf); ixl_init_locked(pf); IXL_PF_UNLOCK(pf); } /* ** MSIX Interrupt Handlers and Tasklets */ static void ixl_handle_que(void *context, int pending) { struct ixl_queue *que = context; struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; struct ifnet *ifp = vsi->ifp; bool more; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { more = ixl_rxeof(que, IXL_RX_LIMIT); IXL_TX_LOCK(txr); ixl_txeof(que); if (!drbr_empty(ifp, txr->br)) ixl_mq_start_locked(ifp, txr); IXL_TX_UNLOCK(txr); if (more) { taskqueue_enqueue(que->tq, &que->task); return; } } /* Reenable this interrupt - hmmm */ ixl_enable_queue(hw, que->me); return; } /********************************************************************* * * Legacy Interrupt Service routine * **********************************************************************/ void ixl_intr(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; struct ifnet *ifp = vsi->ifp; struct tx_ring *txr = &que->txr; u32 reg, icr0, mask; bool more_tx, more_rx; ++que->irqs; /* Protect against spurious interrupts */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; icr0 = rd32(hw, I40E_PFINT_ICR0); reg = rd32(hw, I40E_PFINT_DYN_CTL0); reg = reg | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK; wr32(hw, I40E_PFINT_DYN_CTL0, reg); mask = rd32(hw, I40E_PFINT_ICR0_ENA); #ifdef PCI_IOV if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) taskqueue_enqueue(pf->tq, &pf->vflr_task); #endif if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) { taskqueue_enqueue(pf->tq, &pf->adminq); return; } more_rx = ixl_rxeof(que, IXL_RX_LIMIT); IXL_TX_LOCK(txr); more_tx = ixl_txeof(que); if (!drbr_empty(vsi->ifp, txr->br)) more_tx = 1; IXL_TX_UNLOCK(txr); /* re-enable other interrupt causes */ wr32(hw, I40E_PFINT_ICR0_ENA, mask); /* And now the queues */ reg = rd32(hw, I40E_QINT_RQCTL(0)); reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK; wr32(hw, I40E_QINT_RQCTL(0), reg); reg = rd32(hw, I40E_QINT_TQCTL(0)); reg |= I40E_QINT_TQCTL_CAUSE_ENA_MASK; reg &= ~I40E_PFINT_ICR0_INTEVENT_MASK; wr32(hw, I40E_QINT_TQCTL(0), reg); ixl_enable_legacy(hw); return; } /********************************************************************* * * MSIX VSI Interrupt Service routine * **********************************************************************/ void ixl_msix_que(void *arg) { struct ixl_queue *que = arg; struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; bool more_tx, more_rx; /* Protect against spurious interrupts */ if (!(vsi->ifp->if_drv_flags & IFF_DRV_RUNNING)) return; ++que->irqs; more_rx = ixl_rxeof(que, IXL_RX_LIMIT); IXL_TX_LOCK(txr); more_tx = ixl_txeof(que); /* ** Make certain that if the stack ** has anything queued the task gets ** scheduled to handle it. */ if (!drbr_empty(vsi->ifp, txr->br)) more_tx = 1; IXL_TX_UNLOCK(txr); ixl_set_queue_rx_itr(que); ixl_set_queue_tx_itr(que); if (more_tx || more_rx) taskqueue_enqueue(que->tq, &que->task); else ixl_enable_queue(hw, que->me); return; } /********************************************************************* * * MSIX Admin Queue Interrupt Service routine * **********************************************************************/ static void ixl_msix_adminq(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; u32 reg, mask, rstat_reg; bool do_task = FALSE; ++pf->admin_irq; reg = rd32(hw, I40E_PFINT_ICR0); mask = rd32(hw, I40E_PFINT_ICR0_ENA); /* Check on the cause */ if (reg & I40E_PFINT_ICR0_ADMINQ_MASK) { mask &= ~I40E_PFINT_ICR0_ADMINQ_MASK; do_task = TRUE; } if (reg & I40E_PFINT_ICR0_MAL_DETECT_MASK) { ixl_handle_mdd_event(pf); mask &= ~I40E_PFINT_ICR0_MAL_DETECT_MASK; } if (reg & I40E_PFINT_ICR0_GRST_MASK) { device_printf(pf->dev, "Reset Requested!\n"); rstat_reg = rd32(hw, I40E_GLGEN_RSTAT); rstat_reg = (rstat_reg & I40E_GLGEN_RSTAT_RESET_TYPE_MASK) >> I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT; device_printf(pf->dev, "Reset type: "); switch (rstat_reg) { /* These others might be handled similarly to an EMPR reset */ case I40E_RESET_CORER: printf("CORER\n"); break; case I40E_RESET_GLOBR: printf("GLOBR\n"); break; case I40E_RESET_EMPR: printf("EMPR\n"); atomic_set_int(&pf->state, IXL_PF_STATE_EMPR_RESETTING); break; default: printf("?\n"); break; } // overload admin queue task to check reset progress? do_task = TRUE; } if (reg & I40E_PFINT_ICR0_ECC_ERR_MASK) { device_printf(pf->dev, "ECC Error detected!\n"); } if (reg & I40E_PFINT_ICR0_HMC_ERR_MASK) { device_printf(pf->dev, "HMC Error detected!\n"); } if (reg & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK) { device_printf(pf->dev, "PCI Exception detected!\n"); } #ifdef PCI_IOV if (reg & I40E_PFINT_ICR0_VFLR_MASK) { mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK; taskqueue_enqueue(pf->tq, &pf->vflr_task); } #endif reg = rd32(hw, I40E_PFINT_DYN_CTL0); reg = reg | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK; wr32(hw, I40E_PFINT_DYN_CTL0, reg); if (do_task) taskqueue_enqueue(pf->tq, &pf->adminq); } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void ixl_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) { struct ixl_vsi *vsi = ifp->if_softc; struct ixl_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; INIT_DEBUGOUT("ixl_media_status: begin"); IXL_PF_LOCK(pf); hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); ixl_update_link_status(pf); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!pf->link_up) { IXL_PF_UNLOCK(pf); return; } ifmr->ifm_status |= IFM_ACTIVE; /* Hardware always does full-duplex */ ifmr->ifm_active |= IFM_FDX; switch (hw->phy.link_info.phy_type) { /* 100 M */ case I40E_PHY_TYPE_100BASE_TX: ifmr->ifm_active |= IFM_100_TX; break; /* 1 G */ case I40E_PHY_TYPE_1000BASE_T: ifmr->ifm_active |= IFM_1000_T; break; case I40E_PHY_TYPE_1000BASE_SX: ifmr->ifm_active |= IFM_1000_SX; break; case I40E_PHY_TYPE_1000BASE_LX: ifmr->ifm_active |= IFM_1000_LX; break; case I40E_PHY_TYPE_1000BASE_T_OPTICAL: ifmr->ifm_active |= IFM_OTHER; break; /* 10 G */ case I40E_PHY_TYPE_10GBASE_SFPP_CU: ifmr->ifm_active |= IFM_10G_TWINAX; break; case I40E_PHY_TYPE_10GBASE_SR: ifmr->ifm_active |= IFM_10G_SR; break; case I40E_PHY_TYPE_10GBASE_LR: ifmr->ifm_active |= IFM_10G_LR; break; case I40E_PHY_TYPE_10GBASE_T: ifmr->ifm_active |= IFM_10G_T; break; case I40E_PHY_TYPE_XAUI: case I40E_PHY_TYPE_XFI: case I40E_PHY_TYPE_10GBASE_AOC: ifmr->ifm_active |= IFM_OTHER; break; /* 40 G */ case I40E_PHY_TYPE_40GBASE_CR4: case I40E_PHY_TYPE_40GBASE_CR4_CU: ifmr->ifm_active |= IFM_40G_CR4; break; case I40E_PHY_TYPE_40GBASE_SR4: ifmr->ifm_active |= IFM_40G_SR4; break; case I40E_PHY_TYPE_40GBASE_LR4: ifmr->ifm_active |= IFM_40G_LR4; break; case I40E_PHY_TYPE_XLAUI: ifmr->ifm_active |= IFM_OTHER; break; #ifndef IFM_ETH_XTYPE case I40E_PHY_TYPE_1000BASE_KX: ifmr->ifm_active |= IFM_1000_CX; break; case I40E_PHY_TYPE_SGMII: ifmr->ifm_active |= IFM_OTHER; break; case I40E_PHY_TYPE_10GBASE_CR1_CU: case I40E_PHY_TYPE_10GBASE_CR1: ifmr->ifm_active |= IFM_10G_TWINAX; break; case I40E_PHY_TYPE_10GBASE_KX4: ifmr->ifm_active |= IFM_10G_CX4; break; case I40E_PHY_TYPE_10GBASE_KR: ifmr->ifm_active |= IFM_10G_SR; break; case I40E_PHY_TYPE_SFI: ifmr->ifm_active |= IFM_OTHER; break; case I40E_PHY_TYPE_40GBASE_KR4: case I40E_PHY_TYPE_XLPPI: case I40E_PHY_TYPE_40GBASE_AOC: ifmr->ifm_active |= IFM_40G_SR4; break; #else case I40E_PHY_TYPE_1000BASE_KX: ifmr->ifm_active |= IFM_1000_KX; break; case I40E_PHY_TYPE_SGMII: ifmr->ifm_active |= IFM_1000_SGMII; break; /* ERJ: What's the difference between these? */ case I40E_PHY_TYPE_10GBASE_CR1_CU: case I40E_PHY_TYPE_10GBASE_CR1: ifmr->ifm_active |= IFM_10G_CR1; break; case I40E_PHY_TYPE_10GBASE_KX4: ifmr->ifm_active |= IFM_10G_KX4; break; case I40E_PHY_TYPE_10GBASE_KR: ifmr->ifm_active |= IFM_10G_KR; break; case I40E_PHY_TYPE_SFI: ifmr->ifm_active |= IFM_10G_SFI; break; /* Our single 20G media type */ case I40E_PHY_TYPE_20GBASE_KR2: ifmr->ifm_active |= IFM_20G_KR2; break; case I40E_PHY_TYPE_40GBASE_KR4: ifmr->ifm_active |= IFM_40G_KR4; break; case I40E_PHY_TYPE_XLPPI: case I40E_PHY_TYPE_40GBASE_AOC: ifmr->ifm_active |= IFM_40G_XLPPI; break; #endif /* Unknown to driver */ default: ifmr->ifm_active |= IFM_UNKNOWN; break; } /* Report flow control status as well */ if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; IXL_PF_UNLOCK(pf); return; } /* * NOTE: Fortville does not support forcing media speeds. Instead, * use the set_advertise sysctl to set the speeds Fortville * will advertise or be allowed to operate at. */ static int ixl_media_change(struct ifnet * ifp) { struct ixl_vsi *vsi = ifp->if_softc; struct ifmedia *ifm = &vsi->media; INIT_DEBUGOUT("ixl_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if_printf(ifp, "Media change is not supported.\n"); return (ENODEV); } #ifdef IXL_FDIR /* ** ATR: Application Targetted Receive - creates a filter ** based on TX flow info that will keep the receive ** portion of the flow on the same queue. Based on the ** implementation this is only available for TCP connections */ void ixl_atr(struct ixl_queue *que, struct tcphdr *th, int etype) { struct ixl_vsi *vsi = que->vsi; struct tx_ring *txr = &que->txr; struct i40e_filter_program_desc *FDIR; u32 ptype, dtype; int idx; /* check if ATR is enabled and sample rate */ if ((!ixl_enable_fdir) || (!txr->atr_rate)) return; /* ** We sample all TCP SYN/FIN packets, ** or at the selected sample rate */ txr->atr_count++; if (((th->th_flags & (TH_FIN | TH_SYN)) == 0) && (txr->atr_count < txr->atr_rate)) return; txr->atr_count = 0; /* Get a descriptor to use */ idx = txr->next_avail; FDIR = (struct i40e_filter_program_desc *) &txr->base[idx]; if (++idx == que->num_desc) idx = 0; txr->avail--; txr->next_avail = idx; ptype = (que->me << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & I40E_TXD_FLTR_QW0_QINDEX_MASK; ptype |= (etype == ETHERTYPE_IP) ? (I40E_FILTER_PCTYPE_NONF_IPV4_TCP << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) : (I40E_FILTER_PCTYPE_NONF_IPV6_TCP << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT); ptype |= vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT; dtype = I40E_TX_DESC_DTYPE_FILTER_PROG; /* ** We use the TCP TH_FIN as a trigger to remove ** the filter, otherwise its an update. */ dtype |= (th->th_flags & TH_FIN) ? (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << I40E_TXD_FLTR_QW1_PCMD_SHIFT) : (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << I40E_TXD_FLTR_QW1_PCMD_SHIFT); dtype |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX << I40E_TXD_FLTR_QW1_DEST_SHIFT; dtype |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT; FDIR->qindex_flex_ptype_vsi = htole32(ptype); FDIR->dtype_cmd_cntindex = htole32(dtype); return; } #endif static void ixl_set_promisc(struct ixl_vsi *vsi) { struct ifnet *ifp = vsi->ifp; struct i40e_hw *hw = vsi->hw; int err, mcnt = 0; bool uni = FALSE, multi = FALSE; if (ifp->if_flags & IFF_ALLMULTI) multi = TRUE; else { /* Need to count the multicast addresses */ struct ifmultiaddr *ifma; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_MULTICAST_ADDR) break; mcnt++; } if_maddr_runlock(ifp); } if (mcnt >= MAX_MULTICAST_ADDR) multi = TRUE; if (ifp->if_flags & IFF_PROMISC) uni = TRUE; err = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid, uni, NULL); err = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, multi, NULL); return; } /********************************************************************* * Filter Routines * * Routines for multicast and vlan filter management. * *********************************************************************/ static void ixl_add_multi(struct ixl_vsi *vsi) { struct ifmultiaddr *ifma; struct ifnet *ifp = vsi->ifp; struct i40e_hw *hw = vsi->hw; int mcnt = 0, flags; IOCTL_DEBUGOUT("ixl_add_multi: begin"); if_maddr_rlock(ifp); /* ** First just get a count, to decide if we ** we simply use multicast promiscuous. */ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mcnt++; } if_maddr_runlock(ifp); if (__predict_false(mcnt >= MAX_MULTICAST_ADDR)) { /* delete existing MC filters */ ixl_del_hw_filters(vsi, mcnt); i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, TRUE, NULL); return; } mcnt = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; ixl_add_mc_filter(vsi, (u8*)LLADDR((struct sockaddr_dl *) ifma->ifma_addr)); mcnt++; } if_maddr_runlock(ifp); if (mcnt > 0) { flags = (IXL_FILTER_ADD | IXL_FILTER_USED | IXL_FILTER_MC); ixl_add_hw_filters(vsi, flags, mcnt); } IOCTL_DEBUGOUT("ixl_add_multi: end"); return; } static void ixl_del_multi(struct ixl_vsi *vsi) { struct ifnet *ifp = vsi->ifp; struct ifmultiaddr *ifma; struct ixl_mac_filter *f; int mcnt = 0; bool match = FALSE; IOCTL_DEBUGOUT("ixl_del_multi: begin"); /* Search for removed multicast addresses */ if_maddr_rlock(ifp); SLIST_FOREACH(f, &vsi->ftl, next) { if ((f->flags & IXL_FILTER_USED) && (f->flags & IXL_FILTER_MC)) { match = FALSE; TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; u8 *mc_addr = (u8 *)LLADDR((struct sockaddr_dl *)ifma->ifma_addr); if (cmp_etheraddr(f->macaddr, mc_addr)) { match = TRUE; break; } } if (match == FALSE) { f->flags |= IXL_FILTER_DEL; mcnt++; } } } if_maddr_runlock(ifp); if (mcnt > 0) ixl_del_hw_filters(vsi, mcnt); } /********************************************************************* * Timer routine * * This routine checks for link status,updates statistics, * and runs the watchdog check. * * Only runs when the driver is configured UP and RUNNING. * **********************************************************************/ static void ixl_local_timer(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; device_t dev = pf->dev; int hung = 0; u32 mask; mtx_assert(&pf->pf_mtx, MA_OWNED); /* Fire off the adminq task */ taskqueue_enqueue(pf->tq, &pf->adminq); /* Update stats */ ixl_update_stats_counters(pf); /* ** Check status of the queues */ mask = (I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK); for (int i = 0; i < vsi->num_queues; i++, que++) { /* Any queues with outstanding work get a sw irq */ if (que->busy) wr32(hw, I40E_PFINT_DYN_CTLN(que->me), mask); /* ** Each time txeof runs without cleaning, but there ** are uncleaned descriptors it increments busy. If ** we get to 5 we declare it hung. */ if (que->busy == IXL_QUEUE_HUNG) { ++hung; /* Mark the queue as inactive */ vsi->active_queues &= ~((u64)1 << que->me); continue; } else { /* Check if we've come back from hung */ if ((vsi->active_queues & ((u64)1 << que->me)) == 0) vsi->active_queues |= ((u64)1 << que->me); } if (que->busy >= IXL_MAX_TX_BUSY) { #ifdef IXL_DEBUG device_printf(dev,"Warning queue %d " "appears to be hung!\n", i); #endif que->busy = IXL_QUEUE_HUNG; ++hung; } } /* Only reinit if all queues show hung */ if (hung == vsi->num_queues) goto hung; callout_reset(&pf->timer, hz, ixl_local_timer, pf); return; hung: device_printf(dev, "Local Timer: HANG DETECT - Resetting!!\n"); ixl_init_locked(pf); } /* ** Note: this routine updates the OS on the link state ** the real check of the hardware only happens with ** a link interrupt. */ static void ixl_update_link_status(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; struct ifnet *ifp = vsi->ifp; device_t dev = pf->dev; if (pf->link_up) { if (vsi->link_active == FALSE) { pf->fc = hw->fc.current_mode; if (bootverbose) { device_printf(dev,"Link is up %d Gbps %s," " Flow Control: %s\n", ((pf->link_speed == I40E_LINK_SPEED_40GB)? 40:10), "Full Duplex", ixl_fc_string[pf->fc]); } vsi->link_active = TRUE; /* ** Warn user if link speed on NPAR enabled ** partition is not at least 10GB */ if (hw->func_caps.npar_enable && (hw->phy.link_info.link_speed == I40E_LINK_SPEED_1GB || hw->phy.link_info.link_speed == I40E_LINK_SPEED_100MB)) device_printf(dev, "The partition detected" "link speed that is less than 10Gbps\n"); if_link_state_change(ifp, LINK_STATE_UP); } } else { /* Link down */ if (vsi->link_active == TRUE) { if (bootverbose) device_printf(dev, "Link is Down\n"); if_link_state_change(ifp, LINK_STATE_DOWN); vsi->link_active = FALSE; } } return; } static void ixl_stop(struct ixl_pf *pf) { IXL_PF_LOCK(pf); ixl_stop_locked(pf); IXL_PF_UNLOCK(pf); ixl_teardown_queue_msix(&pf->vsi); ixl_free_queue_tqs(&pf->vsi); } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void ixl_stop_locked(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct ifnet *ifp = vsi->ifp; INIT_DEBUGOUT("ixl_stop: begin\n"); IXL_PF_LOCK_ASSERT(pf); /* Stop the local timer */ callout_stop(&pf->timer); ixl_disable_rings_intr(vsi); ixl_disable_rings(vsi); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING); } /********************************************************************* * * Setup MSIX Interrupt resources and handlers for the VSI * **********************************************************************/ static int ixl_assign_vsi_legacy(struct ixl_pf *pf) { device_t dev = pf->dev; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; int error, rid = 0; if (pf->msix == 1) rid = 1; pf->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (pf->res == NULL) { device_printf(dev, "Unable to allocate" " bus resource: vsi legacy/msi interrupt\n"); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, pf->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixl_intr, pf, &pf->tag); if (error) { pf->res = NULL; device_printf(dev, "Failed to register legacy/msi handler\n"); return (error); } bus_describe_intr(dev, pf->res, pf->tag, "irq0"); TASK_INIT(&que->tx_task, 0, ixl_deferred_mq_start, que); TASK_INIT(&que->task, 0, ixl_handle_que, que); que->tq = taskqueue_create_fast("ixl_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(dev)); TASK_INIT(&pf->adminq, 0, ixl_do_adminq, pf); pf->tq = taskqueue_create_fast("ixl_adm", M_NOWAIT, taskqueue_thread_enqueue, &pf->tq); taskqueue_start_threads(&pf->tq, 1, PI_NET, "%s adminq", device_get_nameunit(dev)); return (0); } static int ixl_setup_adminq_tq(struct ixl_pf *pf) { device_t dev = pf->dev; int error = 0; /* Tasklet for Admin Queue interrupts */ TASK_INIT(&pf->adminq, 0, ixl_do_adminq, pf); #ifdef PCI_IOV /* VFLR Tasklet */ TASK_INIT(&pf->vflr_task, 0, ixl_handle_vflr, pf); #endif /* Create and start Admin Queue taskqueue */ pf->tq = taskqueue_create_fast("ixl_aq", M_NOWAIT, taskqueue_thread_enqueue, &pf->tq); if (!pf->tq) { device_printf(dev, "taskqueue_create_fast (for AQ) returned NULL!\n"); return (ENOMEM); } error = taskqueue_start_threads(&pf->tq, 1, PI_NET, "%s aq", device_get_nameunit(dev)); if (error) { device_printf(dev, "taskqueue_start_threads (for AQ) error: %d\n", error); taskqueue_free(pf->tq); return (error); } return (0); } static int ixl_setup_queue_tqs(struct ixl_vsi *vsi) { struct ixl_queue *que = vsi->queues; device_t dev = vsi->dev; #ifdef RSS cpuset_t cpu_mask; int cpu_id; #endif /* Create queue tasks and start queue taskqueues */ for (int i = 0; i < vsi->num_queues; i++, que++) { TASK_INIT(&que->tx_task, 0, ixl_deferred_mq_start, que); TASK_INIT(&que->task, 0, ixl_handle_que, que); que->tq = taskqueue_create_fast("ixl_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); #ifdef RSS cpu_id = rss_getcpu(i % rss_getnumbuckets()); CPU_SETOF(cpu_id, &cpu_mask); taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, &cpu_mask, "%s (bucket %d)", device_get_nameunit(dev), cpu_id); #else taskqueue_start_threads(&que->tq, 1, PI_NET, "%s (que %d)", device_get_nameunit(dev), que->me); #endif } return (0); } static void ixl_free_adminq_tq(struct ixl_pf *pf) { if (pf->tq) { taskqueue_free(pf->tq); pf->tq = NULL; } } static void ixl_free_queue_tqs(struct ixl_vsi *vsi) { struct ixl_queue *que = vsi->queues; for (int i = 0; i < vsi->num_queues; i++, que++) { if (que->tq) { taskqueue_free(que->tq); que->tq = NULL; } } } static int ixl_setup_adminq_msix(struct ixl_pf *pf) { device_t dev = pf->dev; int rid, error = 0; /* Admin IRQ rid is 1, vector is 0 */ rid = 1; /* Get interrupt resource from bus */ pf->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!pf->res) { device_printf(dev, "bus_alloc_resource_any() for Admin Queue" " interrupt failed [rid=%d]\n", rid); return (ENXIO); } /* Then associate interrupt with handler */ error = bus_setup_intr(dev, pf->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixl_msix_adminq, pf, &pf->tag); if (error) { pf->res = NULL; device_printf(dev, "bus_setup_intr() for Admin Queue" " interrupt handler failed, error %d\n", error); return (ENXIO); } error = bus_describe_intr(dev, pf->res, pf->tag, "aq"); if (error) { /* Probably non-fatal? */ device_printf(dev, "bus_describe_intr() for Admin Queue" " interrupt name failed, error %d\n", error); } pf->admvec = 0; return (0); } /* * Allocate interrupt resources from bus and associate an interrupt handler * to those for the VSI's queues. */ static int ixl_setup_queue_msix(struct ixl_vsi *vsi) { device_t dev = vsi->dev; struct ixl_queue *que = vsi->queues; struct tx_ring *txr; int error, rid, vector = 1; /* Queue interrupt vector numbers start at 1 (adminq intr is 0) */ for (int i = 0; i < vsi->num_queues; i++, vector++, que++) { int cpu_id = i; rid = vector + 1; txr = &que->txr; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!que->res) { device_printf(dev, "bus_alloc_resource_any() for" " Queue %d interrupt failed [rid=%d]\n", que->me, rid); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixl_msix_que, que, &que->tag); if (error) { device_printf(dev, "bus_setup_intr() for Queue %d" " interrupt handler failed, error %d\n", que->me, error); // TODO: Check for error from this? bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); return (error); } error = bus_describe_intr(dev, que->res, que->tag, "q%d", i); if (error) { device_printf(dev, "bus_describe_intr() for Queue %d" " interrupt name failed, error %d\n", que->me, error); } /* Bind the vector to a CPU */ #ifdef RSS cpu_id = rss_getcpu(i % rss_getnumbuckets()); #endif error = bus_bind_intr(dev, que->res, cpu_id); if (error) { device_printf(dev, "bus_bind_intr() for Queue %d" " to CPU %d failed, error %d\n", que->me, cpu_id, error); } que->msix = vector; } return (0); } /* * Allocate MSI/X vectors */ static int ixl_init_msix(struct ixl_pf *pf) { device_t dev = pf->dev; int rid, want, vectors, queues, available; /* Override by tuneable */ if (ixl_enable_msix == 0) goto no_msix; /* ** When used in a virtualized environment ** PCI BUSMASTER capability may not be set ** so explicity set it here and rewrite ** the ENABLE in the MSIX control register ** at this point to cause the host to ** successfully initialize us. */ { u16 pci_cmd_word; int msix_ctrl; pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); pci_cmd_word |= PCIM_CMD_BUSMASTEREN; pci_write_config(dev, PCIR_COMMAND, pci_cmd_word, 2); pci_find_cap(dev, PCIY_MSIX, &rid); rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } /* First try MSI/X */ rid = PCIR_BAR(IXL_BAR); pf->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!pf->msix_mem) { /* May not be enabled */ device_printf(pf->dev, "Unable to map MSIX table\n"); goto no_msix; } available = pci_msix_count(dev); if (available == 0) { /* system has msix disabled */ bus_release_resource(dev, SYS_RES_MEMORY, rid, pf->msix_mem); pf->msix_mem = NULL; goto no_msix; } /* Figure out a reasonable auto config value */ queues = (mp_ncpus > (available - 1)) ? (available - 1) : mp_ncpus; /* Override with tunable value if tunable is less than autoconfig count */ if ((ixl_max_queues != 0) && (ixl_max_queues <= queues)) queues = ixl_max_queues; else if ((ixl_max_queues != 0) && (ixl_max_queues > queues)) device_printf(dev, "ixl_max_queues > # of cpus, using " "autoconfig amount...\n"); /* Or limit maximum auto-configured queues to 8 */ else if ((ixl_max_queues == 0) && (queues > 8)) queues = 8; #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif /* ** Want one vector (RX/TX pair) per queue ** plus an additional for the admin queue. */ want = queues + 1; if (want <= available) /* Have enough */ vectors = want; else { device_printf(pf->dev, "MSIX Configuration Problem, " "%d vectors available but %d wanted!\n", available, want); return (0); /* Will go to Legacy setup */ } if (pci_alloc_msix(dev, &vectors) == 0) { device_printf(pf->dev, "Using MSIX interrupts with %d vectors\n", vectors); pf->msix = vectors; pf->vsi.num_queues = queues; #ifdef RSS /* * If we're doing RSS, the number of queues needs to * match the number of RSS buckets that are configured. * * + If there's more queues than RSS buckets, we'll end * up with queues that get no traffic. * * + If there's more RSS buckets than queues, we'll end * up having multiple RSS buckets map to the same queue, * so there'll be some contention. */ if (queues != rss_getnumbuckets()) { device_printf(dev, "%s: queues (%d) != RSS buckets (%d)" "; performance will be impacted.\n", __func__, queues, rss_getnumbuckets()); } #endif return (vectors); } no_msix: vectors = pci_msi_count(dev); pf->vsi.num_queues = 1; ixl_max_queues = 1; ixl_enable_msix = 0; if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) device_printf(pf->dev, "Using an MSI interrupt\n"); else { vectors = 0; device_printf(pf->dev, "Using a Legacy interrupt\n"); } return (vectors); } /* * Configure admin queue/misc interrupt cause registers in hardware. */ static void ixl_configure_intr0_msix(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; u32 reg; /* First set up the adminq - vector 0 */ wr32(hw, I40E_PFINT_ICR0_ENA, 0); /* disable all */ rd32(hw, I40E_PFINT_ICR0); /* read to clear */ reg = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK | I40E_PFINT_ICR0_ENA_GRST_MASK | I40E_PFINT_ICR0_ENA_HMC_ERR_MASK | I40E_PFINT_ICR0_ENA_ADMINQ_MASK | I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK | I40E_PFINT_ICR0_ENA_VFLR_MASK | I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); /* * 0x7FF is the end of the queue list. * This means we won't use MSI-X vector 0 for a queue interrupt * in MSIX mode. */ wr32(hw, I40E_PFINT_LNKLST0, 0x7FF); /* Value is in 2 usec units, so 0x3E is 62*2 = 124 usecs. */ wr32(hw, I40E_PFINT_ITR0(IXL_RX_ITR), 0x3E); wr32(hw, I40E_PFINT_DYN_CTL0, I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK | I40E_PFINT_DYN_CTL0_INTENA_MSK_MASK); wr32(hw, I40E_PFINT_STAT_CTL0, 0); } /* * Configure queue interrupt cause registers in hardware. */ static void ixl_configure_queue_intr_msix(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; u32 reg; u16 vector = 1; for (int i = 0; i < vsi->num_queues; i++, vector++) { wr32(hw, I40E_PFINT_DYN_CTLN(i), 0); /* First queue type is RX / 0 */ wr32(hw, I40E_PFINT_LNKLSTN(i), i); reg = I40E_QINT_RQCTL_CAUSE_ENA_MASK | (IXL_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | (i << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(i), reg); reg = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (IXL_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | (IXL_QUEUE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_TQCTL(i), reg); } } /* * Configure for MSI single vector operation */ static void ixl_configure_legacy(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; u32 reg; wr32(hw, I40E_PFINT_ITR0(0), 0); wr32(hw, I40E_PFINT_ITR0(1), 0); /* Setup "other" causes */ reg = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK | I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK | I40E_PFINT_ICR0_ENA_GRST_MASK | I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK | I40E_PFINT_ICR0_ENA_GPIO_MASK | I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_MASK | I40E_PFINT_ICR0_ENA_HMC_ERR_MASK | I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK | I40E_PFINT_ICR0_ENA_VFLR_MASK | I40E_PFINT_ICR0_ENA_ADMINQ_MASK ; wr32(hw, I40E_PFINT_ICR0_ENA, reg); /* SW_ITR_IDX = 0, but don't change INTENA */ wr32(hw, I40E_PFINT_DYN_CTL0, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK | I40E_PFINT_DYN_CTLN_INTENA_MSK_MASK); /* SW_ITR_IDX = 0, OTHER_ITR_IDX = 0 */ wr32(hw, I40E_PFINT_STAT_CTL0, 0); /* FIRSTQ_INDX = 0, FIRSTQ_TYPE = 0 (rx) */ wr32(hw, I40E_PFINT_LNKLST0, 0); /* Associate the queue pair to the vector and enable the q int */ reg = I40E_QINT_RQCTL_CAUSE_ENA_MASK | (IXL_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(0), reg); reg = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (IXL_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (IXL_QUEUE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT); wr32(hw, I40E_QINT_TQCTL(0), reg); } /* * Get initial ITR values from tunable values. */ static void ixl_configure_itr(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; vsi->rx_itr_setting = ixl_rx_itr; vsi->tx_itr_setting = ixl_tx_itr; for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, i), vsi->rx_itr_setting); rxr->itr = vsi->rx_itr_setting; rxr->latency = IXL_AVE_LATENCY; wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, i), vsi->tx_itr_setting); txr->itr = vsi->tx_itr_setting; txr->latency = IXL_AVE_LATENCY; } } static int ixl_allocate_pci_resources(struct ixl_pf *pf) { int rid; device_t dev = pf->dev; rid = PCIR_BAR(0); pf->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(pf->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: PCI memory\n"); return (ENXIO); } pf->osdep.mem_bus_space_tag = rman_get_bustag(pf->pci_mem); pf->osdep.mem_bus_space_handle = rman_get_bushandle(pf->pci_mem); pf->osdep.mem_bus_space_size = rman_get_size(pf->pci_mem); pf->osdep.flush_reg = I40E_GLGEN_STAT; pf->hw.hw_addr = (u8 *) &pf->osdep.mem_bus_space_handle; pf->hw.back = &pf->osdep; /* ** Now setup MSI or MSI/X, should ** return us the number of supported ** vectors. (Will be 1 for MSI) */ pf->msix = ixl_init_msix(pf); return (0); } /* * Teardown and release the admin queue/misc vector * interrupt. */ static int ixl_teardown_adminq_msix(struct ixl_pf *pf) { device_t dev = pf->dev; int rid; if (pf->admvec) /* we are doing MSIX */ rid = pf->admvec + 1; else (pf->msix != 0) ? (rid = 1):(rid = 0); // TODO: Check for errors from bus_teardown_intr // TODO: Check for errors from bus_release_resource if (pf->tag != NULL) { bus_teardown_intr(dev, pf->res, pf->tag); pf->tag = NULL; } if (pf->res != NULL) { bus_release_resource(dev, SYS_RES_IRQ, rid, pf->res); pf->res = NULL; } return (0); } static int ixl_teardown_queue_msix(struct ixl_vsi *vsi) { struct ixl_queue *que = vsi->queues; device_t dev = vsi->dev; int rid, error = 0; /* We may get here before stations are setup */ if ((!ixl_enable_msix) || (que == NULL)) return (0); /* Release all MSIX queue resources */ for (int i = 0; i < vsi->num_queues; i++, que++) { rid = que->msix + 1; if (que->tag != NULL) { error = bus_teardown_intr(dev, que->res, que->tag); if (error) { device_printf(dev, "bus_teardown_intr() for" " Queue %d interrupt failed\n", que->me); // return (ENXIO); } que->tag = NULL; } if (que->res != NULL) { error = bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); if (error) { device_printf(dev, "bus_release_resource() for" " Queue %d interrupt failed [rid=%d]\n", que->me, rid); // return (ENXIO); } que->res = NULL; } } return (0); } static void ixl_free_pci_resources(struct ixl_pf *pf) { device_t dev = pf->dev; int memrid; ixl_teardown_queue_msix(&pf->vsi); ixl_teardown_adminq_msix(pf); if (pf->msix) pci_release_msi(dev); memrid = PCIR_BAR(IXL_BAR); if (pf->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, memrid, pf->msix_mem); if (pf->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), pf->pci_mem); return; } static void ixl_add_ifmedia(struct ixl_vsi *vsi, u32 phy_type) { /* Display supported media types */ if (phy_type & (1 << I40E_PHY_TYPE_100BASE_TX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_100_TX, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_1000BASE_T)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_T, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_1000BASE_SX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_SX, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_1000BASE_LX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_LX, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_XAUI) || phy_type & (1 << I40E_PHY_TYPE_XFI) || phy_type & (1 << I40E_PHY_TYPE_10GBASE_SFPP_CU)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_SR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_SR, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_LR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_LR, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_T)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_T, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_40GBASE_CR4) || phy_type & (1 << I40E_PHY_TYPE_40GBASE_CR4_CU) || phy_type & (1 << I40E_PHY_TYPE_40GBASE_AOC) || phy_type & (1 << I40E_PHY_TYPE_XLAUI) || phy_type & (1 << I40E_PHY_TYPE_40GBASE_KR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_CR4, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_40GBASE_SR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_SR4, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_40GBASE_LR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_LR4, 0, NULL); #ifndef IFM_ETH_XTYPE if (phy_type & (1 << I40E_PHY_TYPE_1000BASE_KX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_CX, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_CR1_CU) || phy_type & (1 << I40E_PHY_TYPE_10GBASE_CR1) || phy_type & (1 << I40E_PHY_TYPE_10GBASE_AOC) || phy_type & (1 << I40E_PHY_TYPE_SFI)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_KX4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_KR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_SR, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_40GBASE_KR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_SR4, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_XLPPI)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_CR4, 0, NULL); #else if (phy_type & (1 << I40E_PHY_TYPE_1000BASE_KX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_KX, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_CR1_CU) || phy_type & (1 << I40E_PHY_TYPE_10GBASE_CR1)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_CR1, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_AOC)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_TWINAX_LONG, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_SFI)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_SFI, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_KX4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_KX4, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_10GBASE_KR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_KR, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_20GBASE_KR2)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_20G_KR2, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_40GBASE_KR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_KR4, 0, NULL); if (phy_type & (1 << I40E_PHY_TYPE_XLPPI)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_XLPPI, 0, NULL); #endif } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int ixl_setup_interface(device_t dev, struct ixl_vsi *vsi) { struct ifnet *ifp; struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; struct i40e_aq_get_phy_abilities_resp abilities; enum i40e_status_code aq_error = 0; INIT_DEBUGOUT("ixl_setup_interface: begin"); ifp = vsi->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_mtu = ETHERMTU; ifp->if_baudrate = IF_Gbps(40); ifp->if_init = ixl_init; ifp->if_softc = vsi; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ixl_ioctl; #if __FreeBSD_version >= 1100036 if_setgetcounterfn(ifp, ixl_get_counter); #endif ifp->if_transmit = ixl_mq_start; ifp->if_qflush = ixl_qflush; ifp->if_snd.ifq_maxlen = que->num_desc - 2; vsi->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; /* * Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_HWCSUM; ifp->if_capabilities |= IFCAP_HWCSUM_IPV6; ifp->if_capabilities |= IFCAP_TSO; ifp->if_capabilities |= IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_LRO; /* VLAN capabilties */ ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM; ifp->if_capenable = ifp->if_capabilities; /* ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the ixl driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&vsi->media, IFM_IMASK, ixl_media_change, ixl_media_status); aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, TRUE, &abilities, NULL); /* May need delay to detect fiber correctly */ if (aq_error == I40E_ERR_UNKNOWN_PHY) { i40e_msec_delay(200); aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, TRUE, &abilities, NULL); } if (aq_error) { if (aq_error == I40E_ERR_UNKNOWN_PHY) device_printf(dev, "Unknown PHY type detected!\n"); else device_printf(dev, "Error getting supported media types, err %d," " AQ error %d\n", aq_error, hw->aq.asq_last_status); return (0); } ixl_add_ifmedia(vsi, abilities.phy_type); /* Use autoselect media by default */ ifmedia_add(&vsi->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&vsi->media, IFM_ETHER | IFM_AUTO); ether_ifattach(ifp, hw->mac.addr); return (0); } /* ** Run when the Admin Queue gets a link state change interrupt. */ static void ixl_link_event(struct ixl_pf *pf, struct i40e_arq_event_info *e) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct i40e_aqc_get_link_status *status = (struct i40e_aqc_get_link_status *)&e->desc.params.raw; /* Request link status from adapter */ hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); /* Print out message if an unqualified module is found */ if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) && (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) && (!(status->link_info & I40E_AQ_LINK_UP))) device_printf(dev, "Link failed because " "an unqualified module was detected!\n"); /* Update OS link info */ ixl_update_link_status(pf); } /********************************************************************* * * Get Firmware Switch configuration * - this will need to be more robust when more complex * switch configurations are enabled. * **********************************************************************/ static int ixl_switch_config(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = vsi->dev; struct i40e_aqc_get_switch_config_resp *sw_config; u8 aq_buf[I40E_AQ_LARGE_BUF]; int ret; u16 next = 0; memset(&aq_buf, 0, sizeof(aq_buf)); sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf; ret = i40e_aq_get_switch_config(hw, sw_config, sizeof(aq_buf), &next, NULL); if (ret) { device_printf(dev, "aq_get_switch_config() failed, error %d," " aq_error %d\n", ret, pf->hw.aq.asq_last_status); return (ret); } #ifdef IXL_DEBUG device_printf(dev, "Switch config: header reported: %d in structure, %d total\n", sw_config->header.num_reported, sw_config->header.num_total); for (int i = 0; i < sw_config->header.num_reported; i++) { device_printf(dev, "%d: type=%d seid=%d uplink=%d downlink=%d\n", i, sw_config->element[i].element_type, sw_config->element[i].seid, sw_config->element[i].uplink_seid, sw_config->element[i].downlink_seid); } #endif /* Simplified due to a single VSI at the moment */ vsi->uplink_seid = sw_config->element[0].uplink_seid; vsi->downlink_seid = sw_config->element[0].downlink_seid; vsi->seid = sw_config->element[0].seid; return (ret); } /********************************************************************* * * Initialize the VSI: this handles contexts, which means things * like the number of descriptors, buffer size, * plus we init the rings thru this function. * **********************************************************************/ static int ixl_initialize_vsi(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; struct ixl_queue *que = vsi->queues; device_t dev = vsi->dev; struct i40e_hw *hw = vsi->hw; struct i40e_vsi_context ctxt; int err = 0; memset(&ctxt, 0, sizeof(ctxt)); ctxt.seid = vsi->seid; if (pf->veb_seid != 0) ctxt.uplink_seid = pf->veb_seid; ctxt.pf_num = hw->pf_id; err = i40e_aq_get_vsi_params(hw, &ctxt, NULL); if (err) { device_printf(dev, "i40e_aq_get_vsi_params() failed, error %d" " aq_error %d\n", err, hw->aq.asq_last_status); return (err); } #ifdef IXL_DEBUG device_printf(dev, "get_vsi_params: seid: %d, uplinkseid: %d, vsi_number: %d, " "vsis_allocated: %d, vsis_unallocated: %d, flags: 0x%x, " "pfnum: %d, vfnum: %d, stat idx: %d, enabled: %d\n", ctxt.seid, ctxt.uplink_seid, ctxt.vsi_number, ctxt.vsis_allocated, ctxt.vsis_unallocated, ctxt.flags, ctxt.pf_num, ctxt.vf_num, ctxt.info.stat_counter_idx, ctxt.info.up_enable_bits); #endif /* ** Set the queue and traffic class bits ** - when multiple traffic classes are supported ** this will need to be more robust. */ ctxt.info.valid_sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; ctxt.info.mapping_flags |= I40E_AQ_VSI_QUE_MAP_CONTIG; /* In contig mode, que_mapping[0] is first queue index used by this VSI */ ctxt.info.queue_mapping[0] = 0; /* * This VSI will only use traffic class 0; start traffic class 0's * queue allocation at queue 0, and assign it 64 (2^6) queues (though * the driver may not use all of them). */ ctxt.info.tc_mapping[0] = ((0 << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) & I40E_AQ_VSI_TC_QUE_OFFSET_MASK) | ((6 << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT) & I40E_AQ_VSI_TC_QUE_NUMBER_MASK); /* Set VLAN receive stripping mode */ ctxt.info.valid_sections |= I40E_AQ_VSI_PROP_VLAN_VALID; ctxt.info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL; if (vsi->ifp->if_capenable & IFCAP_VLAN_HWTAGGING) ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH; else ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_EMOD_NOTHING; /* Keep copy of VSI info in VSI for statistic counters */ memcpy(&vsi->info, &ctxt.info, sizeof(ctxt.info)); /* Reset VSI statistics */ ixl_vsi_reset_stats(vsi); vsi->hw_filters_add = 0; vsi->hw_filters_del = 0; ctxt.flags = htole16(I40E_AQ_VSI_TYPE_PF); err = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (err) { device_printf(dev, "i40e_aq_update_vsi_params() failed, error %d, aq_error %d\n", err, hw->aq.asq_last_status); return (err); } for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; struct i40e_hmc_obj_txq tctx; struct i40e_hmc_obj_rxq rctx; u32 txctl; u16 size; /* Setup the HMC TX Context */ size = que->num_desc * sizeof(struct i40e_tx_desc); memset(&tctx, 0, sizeof(struct i40e_hmc_obj_txq)); tctx.new_context = 1; tctx.base = (txr->dma.pa/IXL_TX_CTX_BASE_UNITS); tctx.qlen = que->num_desc; tctx.fc_ena = 0; tctx.rdylist = vsi->info.qs_handle[0]; /* index is TC */ /* Enable HEAD writeback */ tctx.head_wb_ena = 1; tctx.head_wb_addr = txr->dma.pa + (que->num_desc * sizeof(struct i40e_tx_desc)); tctx.rdylist_act = 0; err = i40e_clear_lan_tx_queue_context(hw, i); if (err) { device_printf(dev, "Unable to clear TX context\n"); break; } err = i40e_set_lan_tx_queue_context(hw, i, &tctx); if (err) { device_printf(dev, "Unable to set TX context\n"); break; } /* Associate the ring with this PF */ txctl = I40E_QTX_CTL_PF_QUEUE; txctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) & I40E_QTX_CTL_PF_INDX_MASK); wr32(hw, I40E_QTX_CTL(i), txctl); ixl_flush(hw); /* Do ring (re)init */ ixl_init_tx_ring(que); /* Next setup the HMC RX Context */ if (vsi->max_frame_size <= MCLBYTES) rxr->mbuf_sz = MCLBYTES; else rxr->mbuf_sz = MJUMPAGESIZE; u16 max_rxmax = rxr->mbuf_sz * hw->func_caps.rx_buf_chain_len; /* Set up an RX context for the HMC */ memset(&rctx, 0, sizeof(struct i40e_hmc_obj_rxq)); rctx.dbuff = rxr->mbuf_sz >> I40E_RXQ_CTX_DBUFF_SHIFT; /* ignore header split for now */ rctx.hbuff = 0 >> I40E_RXQ_CTX_HBUFF_SHIFT; rctx.rxmax = (vsi->max_frame_size < max_rxmax) ? vsi->max_frame_size : max_rxmax; rctx.dtype = 0; rctx.dsize = 1; /* do 32byte descriptors */ rctx.hsplit_0 = 0; /* no HDR split initially */ rctx.base = (rxr->dma.pa/IXL_RX_CTX_BASE_UNITS); rctx.qlen = que->num_desc; rctx.tphrdesc_ena = 1; rctx.tphwdesc_ena = 1; rctx.tphdata_ena = 0; rctx.tphhead_ena = 0; rctx.lrxqthresh = 2; rctx.crcstrip = 1; rctx.l2tsel = 1; rctx.showiv = 1; rctx.fc_ena = 0; rctx.prefena = 1; err = i40e_clear_lan_rx_queue_context(hw, i); if (err) { device_printf(dev, "Unable to clear RX context %d\n", i); break; } err = i40e_set_lan_rx_queue_context(hw, i, &rctx); if (err) { device_printf(dev, "Unable to set RX context %d\n", i); break; } err = ixl_init_rx_ring(que); if (err) { device_printf(dev, "Fail in init_rx_ring %d\n", i); break; } #ifdef DEV_NETMAP /* preserve queue */ if (vsi->ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(vsi->ifp); struct netmap_kring *kring = &na->rx_rings[i]; int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); wr32(vsi->hw, I40E_QRX_TAIL(que->me), t); } else #endif /* DEV_NETMAP */ wr32(vsi->hw, I40E_QRX_TAIL(que->me), que->num_desc - 1); } return (err); } /********************************************************************* * * Free all VSI structs. * **********************************************************************/ void ixl_free_vsi(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct ixl_queue *que = vsi->queues; /* Free station queues */ if (!vsi->queues) goto free_filters; for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; if (!mtx_initialized(&txr->mtx)) /* uninitialized */ continue; IXL_TX_LOCK(txr); ixl_free_que_tx(que); if (txr->base) i40e_free_dma_mem(&pf->hw, &txr->dma); IXL_TX_UNLOCK(txr); IXL_TX_LOCK_DESTROY(txr); if (!mtx_initialized(&rxr->mtx)) /* uninitialized */ continue; IXL_RX_LOCK(rxr); ixl_free_que_rx(que); if (rxr->base) i40e_free_dma_mem(&pf->hw, &rxr->dma); IXL_RX_UNLOCK(rxr); IXL_RX_LOCK_DESTROY(rxr); } free(vsi->queues, M_DEVBUF); free_filters: /* Free VSI filter list */ ixl_free_mac_filters(vsi); } static void ixl_free_mac_filters(struct ixl_vsi *vsi) { struct ixl_mac_filter *f; while (!SLIST_EMPTY(&vsi->ftl)) { f = SLIST_FIRST(&vsi->ftl); SLIST_REMOVE_HEAD(&vsi->ftl, next); free(f, M_DEVBUF); } } /********************************************************************* * * Allocate memory for the VSI (virtual station interface) and their * associated queues, rings and the descriptors associated with each, * called only once at attach. * **********************************************************************/ static int ixl_setup_stations(struct ixl_pf *pf) { device_t dev = pf->dev; struct ixl_vsi *vsi; struct ixl_queue *que; struct tx_ring *txr; struct rx_ring *rxr; int rsize, tsize; int error = I40E_SUCCESS; vsi = &pf->vsi; vsi->back = (void *)pf; vsi->hw = &pf->hw; vsi->id = 0; vsi->num_vlans = 0; vsi->back = pf; /* Get memory for the station queues */ if (!(vsi->queues = (struct ixl_queue *) malloc(sizeof(struct ixl_queue) * vsi->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate queue memory\n"); error = ENOMEM; goto early; } for (int i = 0; i < vsi->num_queues; i++) { que = &vsi->queues[i]; que->num_desc = ixl_ringsz; que->me = i; que->vsi = vsi; /* mark the queue as active */ vsi->active_queues |= (u64)1 << que->me; txr = &que->txr; txr->que = que; txr->tail = I40E_QTX_TAIL(que->me); /* Initialize the TX lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), que->me); mtx_init(&txr->mtx, txr->mtx_name, NULL, MTX_DEF); /* Create the TX descriptor ring */ tsize = roundup2((que->num_desc * sizeof(struct i40e_tx_desc)) + sizeof(u32), DBA_ALIGN); if (i40e_allocate_dma_mem(&pf->hw, &txr->dma, i40e_mem_reserved, tsize, DBA_ALIGN)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto fail; } txr->base = (struct i40e_tx_desc *)txr->dma.va; bzero((void *)txr->base, tsize); /* Now allocate transmit soft structs for the ring */ if (ixl_allocate_tx_data(que)) { device_printf(dev, "Critical Failure setting up TX structures\n"); error = ENOMEM; goto fail; } /* Allocate a buf ring */ txr->br = buf_ring_alloc(4096, M_DEVBUF, M_NOWAIT, &txr->mtx); if (txr->br == NULL) { device_printf(dev, "Critical Failure setting up TX buf ring\n"); error = ENOMEM; goto fail; } /* * Next the RX queues... */ rsize = roundup2(que->num_desc * sizeof(union i40e_rx_desc), DBA_ALIGN); rxr = &que->rxr; rxr->que = que; rxr->tail = I40E_QRX_TAIL(que->me); /* Initialize the RX side lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), que->me); mtx_init(&rxr->mtx, rxr->mtx_name, NULL, MTX_DEF); if (i40e_allocate_dma_mem(&pf->hw, &rxr->dma, i40e_mem_reserved, rsize, 4096)) { device_printf(dev, "Unable to allocate RX Descriptor memory\n"); error = ENOMEM; goto fail; } rxr->base = (union i40e_rx_desc *)rxr->dma.va; bzero((void *)rxr->base, rsize); /* Allocate receive soft structs for the ring*/ if (ixl_allocate_rx_data(que)) { device_printf(dev, "Critical Failure setting up receive structs\n"); error = ENOMEM; goto fail; } } return (0); fail: for (int i = 0; i < vsi->num_queues; i++) { que = &vsi->queues[i]; rxr = &que->rxr; txr = &que->txr; if (rxr->base) i40e_free_dma_mem(&pf->hw, &rxr->dma); if (txr->base) i40e_free_dma_mem(&pf->hw, &txr->dma); } early: return (error); } /* ** Provide a update to the queue RX ** interrupt moderation value. */ static void ixl_set_queue_rx_itr(struct ixl_queue *que) { struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct rx_ring *rxr = &que->rxr; u16 rx_itr; u16 rx_latency = 0; int rx_bytes; /* Idle, do nothing */ if (rxr->bytes == 0) return; if (ixl_dynamic_rx_itr) { rx_bytes = rxr->bytes/rxr->itr; rx_itr = rxr->itr; /* Adjust latency range */ switch (rxr->latency) { case IXL_LOW_LATENCY: if (rx_bytes > 10) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (rx_bytes > 20) { rx_latency = IXL_BULK_LATENCY; rx_itr = IXL_ITR_8K; } else if (rx_bytes <= 10) { rx_latency = IXL_LOW_LATENCY; rx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (rx_bytes <= 20) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; } rxr->latency = rx_latency; if (rx_itr != rxr->itr) { /* do an exponential smoothing */ rx_itr = (10 * rx_itr * rxr->itr) / ((9 * rx_itr) + rxr->itr); rxr->itr = rx_itr & IXL_MAX_ITR; wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, que->me), rxr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->rx_itr_setting & IXL_ITR_DYNAMIC) vsi->rx_itr_setting = ixl_rx_itr; /* Update the hardware if needed */ if (rxr->itr != vsi->rx_itr_setting) { rxr->itr = vsi->rx_itr_setting; wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, que->me), rxr->itr); } } rxr->bytes = 0; rxr->packets = 0; return; } /* ** Provide a update to the queue TX ** interrupt moderation value. */ static void ixl_set_queue_tx_itr(struct ixl_queue *que) { struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; u16 tx_itr; u16 tx_latency = 0; int tx_bytes; /* Idle, do nothing */ if (txr->bytes == 0) return; if (ixl_dynamic_tx_itr) { tx_bytes = txr->bytes/txr->itr; tx_itr = txr->itr; switch (txr->latency) { case IXL_LOW_LATENCY: if (tx_bytes > 10) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (tx_bytes > 20) { tx_latency = IXL_BULK_LATENCY; tx_itr = IXL_ITR_8K; } else if (tx_bytes <= 10) { tx_latency = IXL_LOW_LATENCY; tx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (tx_bytes <= 20) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; } txr->latency = tx_latency; if (tx_itr != txr->itr) { /* do an exponential smoothing */ tx_itr = (10 * tx_itr * txr->itr) / ((9 * tx_itr) + txr->itr); txr->itr = tx_itr & IXL_MAX_ITR; wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, que->me), txr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->tx_itr_setting & IXL_ITR_DYNAMIC) vsi->tx_itr_setting = ixl_tx_itr; /* Update the hardware if needed */ if (txr->itr != vsi->tx_itr_setting) { txr->itr = vsi->tx_itr_setting; wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, que->me), txr->itr); } } txr->bytes = 0; txr->packets = 0; return; } #define QUEUE_NAME_LEN 32 static void ixl_add_vsi_sysctls(struct ixl_pf *pf, struct ixl_vsi *vsi, struct sysctl_ctx_list *ctx, const char *sysctl_name) { struct sysctl_oid *tree; struct sysctl_oid_list *child; struct sysctl_oid_list *vsi_list; tree = device_get_sysctl_tree(pf->dev); child = SYSCTL_CHILDREN(tree); vsi->vsi_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, sysctl_name, CTLFLAG_RD, NULL, "VSI Number"); vsi_list = SYSCTL_CHILDREN(vsi->vsi_node); ixl_add_sysctls_eth_stats(ctx, vsi_list, &vsi->eth_stats); } #ifdef IXL_DEBUG /** * ixl_sysctl_qtx_tail_handler * Retrieves I40E_QTX_TAIL value from hardware * for a sysctl. */ static int ixl_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS) { struct ixl_queue *que; int error; u32 val; que = ((struct ixl_queue *)oidp->oid_arg1); if (!que) return 0; val = rd32(que->vsi->hw, que->txr.tail); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /** * ixl_sysctl_qrx_tail_handler * Retrieves I40E_QRX_TAIL value from hardware * for a sysctl. */ static int ixl_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS) { struct ixl_queue *que; int error; u32 val; que = ((struct ixl_queue *)oidp->oid_arg1); if (!que) return 0; val = rd32(que->vsi->hw, que->rxr.tail); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } #endif static void ixl_add_hw_stats(struct ixl_pf *pf) { device_t dev = pf->dev; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *queues = vsi->queues; struct i40e_hw_port_stats *pf_stats = &pf->stats; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct sysctl_oid_list *vsi_list; struct sysctl_oid *queue_node; struct sysctl_oid_list *queue_list; struct tx_ring *txr; struct rx_ring *rxr; char queue_namebuf[QUEUE_NAME_LEN]; /* Driver statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &pf->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "admin_irq", CTLFLAG_RD, &pf->admin_irq, "Admin Queue IRQ Handled"); ixl_add_vsi_sysctls(pf, &pf->vsi, ctx, "pf"); vsi_list = SYSCTL_CHILDREN(pf->vsi.vsi_node); /* Queue statistics */ for (int q = 0; q < vsi->num_queues; q++) { snprintf(queue_namebuf, QUEUE_NAME_LEN, "que%d", q); queue_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, queue_namebuf, CTLFLAG_RD, NULL, "Queue #"); queue_list = SYSCTL_CHILDREN(queue_node); txr = &(queues[q].txr); rxr = &(queues[q].rxr); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &(queues[q].mbuf_defrag_failed), "m_defrag() failed"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", CTLFLAG_RD, &(queues[q].irqs), "irqs on this queue"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso_tx", CTLFLAG_RD, &(queues[q].tso), "TSO"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_dma_setup", CTLFLAG_RD, &(queues[q].tx_dma_setup), "Driver tx dma failure in xmit"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &(txr->no_desc), "Queue No Descriptor Available"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &(txr->total_packets), "Queue Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, &(txr->tx_bytes), "Queue Bytes Transmitted"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &(rxr->rx_packets), "Queue Packets Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &(rxr->rx_bytes), "Queue Bytes Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_desc_err", CTLFLAG_RD, &(rxr->desc_errs), "Queue Rx Descriptor Errors"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_itr", CTLFLAG_RD, &(rxr->itr), 0, "Queue Rx ITR Interval"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "tx_itr", CTLFLAG_RD, &(txr->itr), 0, "Queue Tx ITR Interval"); // Not actual latency; just a calculated value to put in a register // TODO: Put in better descriptions here SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_latency", CTLFLAG_RD, &(rxr->latency), 0, "Queue Rx ITRL Average Interval"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "tx_latency", CTLFLAG_RD, &(txr->latency), 0, "Queue Tx ITRL Average Interval"); #ifdef IXL_DEBUG SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_not_done", CTLFLAG_RD, &(rxr->not_done), "Queue Rx Descriptors not Done"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_next_refresh", CTLFLAG_RD, &(rxr->next_refresh), 0, "Queue Rx Descriptors not Done"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_next_check", CTLFLAG_RD, &(rxr->next_check), 0, "Queue Rx Descriptors not Done"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "qtx_tail", CTLTYPE_UINT | CTLFLAG_RD, &queues[q], sizeof(struct ixl_queue), ixl_sysctl_qtx_tail_handler, "IU", "Queue Transmit Descriptor Tail"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "qrx_tail", CTLTYPE_UINT | CTLFLAG_RD, &queues[q], sizeof(struct ixl_queue), ixl_sysctl_qrx_tail_handler, "IU", "Queue Receive Descriptor Tail"); #endif } /* MAC stats */ ixl_add_sysctls_mac_stats(ctx, child, pf_stats); } static void ixl_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct i40e_eth_stats *eth_stats) { struct ixl_sysctl_info ctls[] = { {ð_stats->rx_bytes, "good_octets_rcvd", "Good Octets Received"}, {ð_stats->rx_unicast, "ucast_pkts_rcvd", "Unicast Packets Received"}, {ð_stats->rx_multicast, "mcast_pkts_rcvd", "Multicast Packets Received"}, {ð_stats->rx_broadcast, "bcast_pkts_rcvd", "Broadcast Packets Received"}, {ð_stats->rx_discards, "rx_discards", "Discarded RX packets"}, {ð_stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted"}, {ð_stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted"}, {ð_stats->tx_multicast, "mcast_pkts_txd", "Multicast Packets Transmitted"}, {ð_stats->tx_broadcast, "bcast_pkts_txd", "Broadcast Packets Transmitted"}, // end {0,0,0} }; struct ixl_sysctl_info *entry = ctls; while (entry->stat != NULL) { SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, entry->name, CTLFLAG_RD, entry->stat, entry->description); entry++; } } static void ixl_add_sysctls_mac_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct i40e_hw_port_stats *stats) { struct sysctl_oid *stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac", CTLFLAG_RD, NULL, "Mac Statistics"); struct sysctl_oid_list *stat_list = SYSCTL_CHILDREN(stat_node); struct i40e_eth_stats *eth_stats = &stats->eth; ixl_add_sysctls_eth_stats(ctx, stat_list, eth_stats); struct ixl_sysctl_info ctls[] = { {&stats->crc_errors, "crc_errors", "CRC Errors"}, {&stats->illegal_bytes, "illegal_bytes", "Illegal Byte Errors"}, {&stats->mac_local_faults, "local_faults", "MAC Local Faults"}, {&stats->mac_remote_faults, "remote_faults", "MAC Remote Faults"}, {&stats->rx_length_errors, "rx_length_errors", "Receive Length Errors"}, /* Packet Reception Stats */ {&stats->rx_size_64, "rx_frames_64", "64 byte frames received"}, {&stats->rx_size_127, "rx_frames_65_127", "65-127 byte frames received"}, {&stats->rx_size_255, "rx_frames_128_255", "128-255 byte frames received"}, {&stats->rx_size_511, "rx_frames_256_511", "256-511 byte frames received"}, {&stats->rx_size_1023, "rx_frames_512_1023", "512-1023 byte frames received"}, {&stats->rx_size_1522, "rx_frames_1024_1522", "1024-1522 byte frames received"}, {&stats->rx_size_big, "rx_frames_big", "1523-9522 byte frames received"}, {&stats->rx_undersize, "rx_undersize", "Undersized packets received"}, {&stats->rx_fragments, "rx_fragmented", "Fragmented packets received"}, {&stats->rx_oversize, "rx_oversized", "Oversized packets received"}, {&stats->rx_jabber, "rx_jabber", "Received Jabber"}, {&stats->checksum_error, "checksum_errors", "Checksum Errors"}, /* Packet Transmission Stats */ {&stats->tx_size_64, "tx_frames_64", "64 byte frames transmitted"}, {&stats->tx_size_127, "tx_frames_65_127", "65-127 byte frames transmitted"}, {&stats->tx_size_255, "tx_frames_128_255", "128-255 byte frames transmitted"}, {&stats->tx_size_511, "tx_frames_256_511", "256-511 byte frames transmitted"}, {&stats->tx_size_1023, "tx_frames_512_1023", "512-1023 byte frames transmitted"}, {&stats->tx_size_1522, "tx_frames_1024_1522", "1024-1522 byte frames transmitted"}, {&stats->tx_size_big, "tx_frames_big", "1523-9522 byte frames transmitted"}, /* Flow control */ {&stats->link_xon_tx, "xon_txd", "Link XON transmitted"}, {&stats->link_xon_rx, "xon_recvd", "Link XON received"}, {&stats->link_xoff_tx, "xoff_txd", "Link XOFF transmitted"}, {&stats->link_xoff_rx, "xoff_recvd", "Link XOFF received"}, /* End */ {0,0,0} }; struct ixl_sysctl_info *entry = ctls; while (entry->stat != NULL) { SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, entry->name, CTLFLAG_RD, entry->stat, entry->description); entry++; } } /* ** ixl_config_rss - setup RSS ** - note this is done for the single vsi */ static void ixl_config_rss(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = vsi->hw; u32 lut = 0; u64 set_hena = 0, hena; int i, j, que_id; #ifdef RSS u32 rss_hash_config; u32 rss_seed[IXL_KEYSZ]; #else u32 rss_seed[IXL_KEYSZ] = {0x41b01687, 0x183cfd8c, 0xce880440, 0x580cbc3c, 0x35897377, 0x328b25e1, 0x4fa98922, 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1}; #endif #ifdef RSS /* Fetch the configured RSS key */ rss_getkey((uint8_t *) &rss_seed); #endif /* Fill out hash function seed */ for (i = 0; i < IXL_KEYSZ; i++) i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), rss_seed[i]); /* Enable PCTYPES for RSS: */ #ifdef RSS rss_hash_config = rss_gethashconfig(); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP); #else set_hena = ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) | ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV4) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_SCTP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) | ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6) | ((u64)1 << I40E_FILTER_PCTYPE_L2_PAYLOAD); #endif hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); hena |= set_hena; i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); /* Populate the LUT with max no. of queues in round robin fashion */ for (i = j = 0; i < pf->hw.func_caps.rss_table_size; i++, j++) { if (j == vsi->num_queues) j = 0; #ifdef RSS /* * Fetch the RSS bucket id for the given indirection entry. * Cap it at the number of configured buckets (which is * num_queues.) */ que_id = rss_get_indirection_to_bucket(i); que_id = que_id % vsi->num_queues; #else que_id = j; #endif /* lut = 4-byte sliding window of 4 lut entries */ lut = (lut << 8) | (que_id & ((0x1 << pf->hw.func_caps.rss_table_entry_width) - 1)); /* On i = 3, we have 4 entries in lut; write to the register */ if ((i & 3) == 3) wr32(hw, I40E_PFQF_HLUT(i >> 2), lut); } ixl_flush(hw); } /* ** This routine is run via an vlan config EVENT, ** it enables us to use the HW Filter table since ** we can get the vlan id. This just creates the ** entry in the soft version of the VFTA, init will ** repopulate the real table. */ static void ixl_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct ixl_vsi *vsi = ifp->if_softc; struct i40e_hw *hw = vsi->hw; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXL_PF_LOCK(pf); ++vsi->num_vlans; ixl_add_filter(vsi, hw->mac.addr, vtag); IXL_PF_UNLOCK(pf); } /* ** This routine is run via an vlan ** unconfig EVENT, remove our entry ** in the soft vfta. */ static void ixl_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct ixl_vsi *vsi = ifp->if_softc; struct i40e_hw *hw = vsi->hw; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXL_PF_LOCK(pf); --vsi->num_vlans; ixl_del_filter(vsi, hw->mac.addr, vtag); IXL_PF_UNLOCK(pf); } /* ** This routine updates vlan filters, called by init ** it scans the filter table and then updates the hw ** after a soft reset. */ static void ixl_setup_vlan_filters(struct ixl_vsi *vsi) { struct ixl_mac_filter *f; int cnt = 0, flags; if (vsi->num_vlans == 0) return; /* ** Scan the filter list for vlan entries, ** mark them for addition and then call ** for the AQ update. */ SLIST_FOREACH(f, &vsi->ftl, next) { if (f->flags & IXL_FILTER_VLAN) { f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); cnt++; } } if (cnt == 0) { printf("setup vlan: no filters found!\n"); return; } flags = IXL_FILTER_VLAN; flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); ixl_add_hw_filters(vsi, flags, cnt); return; } /* ** Initialize filter list and add filters that the hardware ** needs to know about. ** ** Requires VSI's filter list & seid to be set before calling. */ static void ixl_init_filters(struct ixl_vsi *vsi) { /* Add broadcast address */ ixl_add_filter(vsi, ixl_bcast_addr, IXL_VLAN_ANY); /* * Prevent Tx flow control frames from being sent out by * non-firmware transmitters. */ i40e_add_filter_to_drop_tx_flow_control_frames(vsi->hw, vsi->seid); } /* ** This routine adds mulicast filters */ static void ixl_add_mc_filter(struct ixl_vsi *vsi, u8 *macaddr) { struct ixl_mac_filter *f; /* Does one already exist */ f = ixl_find_filter(vsi, macaddr, IXL_VLAN_ANY); if (f != NULL) return; f = ixl_get_filter(vsi); if (f == NULL) { printf("WARNING: no filter available!!\n"); return; } bcopy(macaddr, f->macaddr, ETHER_ADDR_LEN); f->vlan = IXL_VLAN_ANY; f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED | IXL_FILTER_MC); return; } static void ixl_reconfigure_filters(struct ixl_vsi *vsi) { ixl_add_hw_filters(vsi, IXL_FILTER_USED, vsi->num_macs); } /* ** This routine adds macvlan filters */ static void ixl_add_filter(struct ixl_vsi *vsi, u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f, *tmp; struct ixl_pf *pf; device_t dev; DEBUGOUT("ixl_add_filter: begin"); pf = vsi->back; dev = pf->dev; /* Does one already exist */ f = ixl_find_filter(vsi, macaddr, vlan); if (f != NULL) return; /* ** Is this the first vlan being registered, if so we ** need to remove the ANY filter that indicates we are ** not in a vlan, and replace that with a 0 filter. */ if ((vlan != IXL_VLAN_ANY) && (vsi->num_vlans == 1)) { tmp = ixl_find_filter(vsi, macaddr, IXL_VLAN_ANY); if (tmp != NULL) { ixl_del_filter(vsi, macaddr, IXL_VLAN_ANY); ixl_add_filter(vsi, macaddr, 0); } } f = ixl_get_filter(vsi); if (f == NULL) { device_printf(dev, "WARNING: no filter available!!\n"); return; } bcopy(macaddr, f->macaddr, ETHER_ADDR_LEN); f->vlan = vlan; f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); if (f->vlan != IXL_VLAN_ANY) f->flags |= IXL_FILTER_VLAN; else vsi->num_macs++; ixl_add_hw_filters(vsi, f->flags, 1); return; } static void ixl_del_filter(struct ixl_vsi *vsi, u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f; f = ixl_find_filter(vsi, macaddr, vlan); if (f == NULL) return; f->flags |= IXL_FILTER_DEL; ixl_del_hw_filters(vsi, 1); vsi->num_macs--; /* Check if this is the last vlan removal */ if (vlan != IXL_VLAN_ANY && vsi->num_vlans == 0) { /* Switch back to a non-vlan filter */ ixl_del_filter(vsi, macaddr, 0); ixl_add_filter(vsi, macaddr, IXL_VLAN_ANY); } return; } /* ** Find the filter with both matching mac addr and vlan id */ static struct ixl_mac_filter * ixl_find_filter(struct ixl_vsi *vsi, u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f; bool match = FALSE; SLIST_FOREACH(f, &vsi->ftl, next) { if (!cmp_etheraddr(f->macaddr, macaddr)) continue; if (f->vlan == vlan) { match = TRUE; break; } } if (!match) f = NULL; return (f); } /* ** This routine takes additions to the vsi filter ** table and creates an Admin Queue call to create ** the filters in the hardware. */ static void ixl_add_hw_filters(struct ixl_vsi *vsi, int flags, int cnt) { struct i40e_aqc_add_macvlan_element_data *a, *b; struct ixl_mac_filter *f; struct ixl_pf *pf; struct i40e_hw *hw; device_t dev; int err, j = 0; pf = vsi->back; dev = pf->dev; hw = &pf->hw; IXL_PF_LOCK_ASSERT(pf); a = malloc(sizeof(struct i40e_aqc_add_macvlan_element_data) * cnt, M_DEVBUF, M_NOWAIT | M_ZERO); if (a == NULL) { device_printf(dev, "add_hw_filters failed to get memory\n"); return; } /* ** Scan the filter list, each time we find one ** we add it to the admin queue array and turn off ** the add bit. */ SLIST_FOREACH(f, &vsi->ftl, next) { if (f->flags == flags) { b = &a[j]; // a pox on fvl long names :) bcopy(f->macaddr, b->mac_addr, ETHER_ADDR_LEN); if (f->vlan == IXL_VLAN_ANY) { b->vlan_tag = 0; b->flags = I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; } else { b->vlan_tag = f->vlan; b->flags = 0; } b->flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH; f->flags &= ~IXL_FILTER_ADD; j++; } if (j == cnt) break; } if (j > 0) { err = i40e_aq_add_macvlan(hw, vsi->seid, a, j, NULL); if (err) device_printf(dev, "aq_add_macvlan err %d, " "aq_error %d\n", err, hw->aq.asq_last_status); else vsi->hw_filters_add += j; } free(a, M_DEVBUF); return; } /* ** This routine takes removals in the vsi filter ** table and creates an Admin Queue call to delete ** the filters in the hardware. */ static void ixl_del_hw_filters(struct ixl_vsi *vsi, int cnt) { struct i40e_aqc_remove_macvlan_element_data *d, *e; struct ixl_pf *pf; struct i40e_hw *hw; device_t dev; struct ixl_mac_filter *f, *f_temp; int err, j = 0; DEBUGOUT("ixl_del_hw_filters: begin\n"); pf = vsi->back; hw = &pf->hw; dev = pf->dev; d = malloc(sizeof(struct i40e_aqc_remove_macvlan_element_data) * cnt, M_DEVBUF, M_NOWAIT | M_ZERO); if (d == NULL) { printf("del hw filter failed to get memory\n"); return; } SLIST_FOREACH_SAFE(f, &vsi->ftl, next, f_temp) { if (f->flags & IXL_FILTER_DEL) { e = &d[j]; // a pox on fvl long names :) bcopy(f->macaddr, e->mac_addr, ETHER_ADDR_LEN); e->vlan_tag = (f->vlan == IXL_VLAN_ANY ? 0 : f->vlan); e->flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; /* delete entry from vsi list */ SLIST_REMOVE(&vsi->ftl, f, ixl_mac_filter, next); free(f, M_DEVBUF); j++; } if (j == cnt) break; } if (j > 0) { err = i40e_aq_remove_macvlan(hw, vsi->seid, d, j, NULL); /* NOTE: returns ENOENT every time but seems to work fine, so we'll ignore that specific error. */ // TODO: Does this still occur on current firmwares? if (err && hw->aq.asq_last_status != I40E_AQ_RC_ENOENT) { int sc = 0; for (int i = 0; i < j; i++) sc += (!d[i].error_code); vsi->hw_filters_del += sc; device_printf(dev, "Failed to remove %d/%d filters, aq error %d\n", j - sc, j, hw->aq.asq_last_status); } else vsi->hw_filters_del += j; } free(d, M_DEVBUF); DEBUGOUT("ixl_del_hw_filters: end\n"); return; } static int ixl_enable_rings(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; int index, error; u32 reg; error = 0; for (int i = 0; i < vsi->num_queues; i++) { index = vsi->first_queue + i; i40e_pre_tx_queue_cfg(hw, index, TRUE); reg = rd32(hw, I40E_QTX_ENA(index)); reg |= I40E_QTX_ENA_QENA_REQ_MASK | I40E_QTX_ENA_QENA_STAT_MASK; wr32(hw, I40E_QTX_ENA(index), reg); /* Verify the enable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QTX_ENA(index)); if (reg & I40E_QTX_ENA_QENA_STAT_MASK) break; i40e_msec_delay(10); } if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) == 0) { device_printf(pf->dev, "TX queue %d disabled!\n", index); error = ETIMEDOUT; } reg = rd32(hw, I40E_QRX_ENA(index)); reg |= I40E_QRX_ENA_QENA_REQ_MASK | I40E_QRX_ENA_QENA_STAT_MASK; wr32(hw, I40E_QRX_ENA(index), reg); /* Verify the enable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QRX_ENA(index)); if (reg & I40E_QRX_ENA_QENA_STAT_MASK) break; i40e_msec_delay(10); } if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) == 0) { device_printf(pf->dev, "RX queue %d disabled!\n", index); error = ETIMEDOUT; } } return (error); } static int ixl_disable_rings(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; int index, error; u32 reg; error = 0; for (int i = 0; i < vsi->num_queues; i++) { index = vsi->first_queue + i; i40e_pre_tx_queue_cfg(hw, index, FALSE); i40e_usec_delay(500); reg = rd32(hw, I40E_QTX_ENA(index)); reg &= ~I40E_QTX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QTX_ENA(index), reg); /* Verify the disable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QTX_ENA(index)); if (!(reg & I40E_QTX_ENA_QENA_STAT_MASK)) break; i40e_msec_delay(10); } if (reg & I40E_QTX_ENA_QENA_STAT_MASK) { device_printf(pf->dev, "TX queue %d still enabled!\n", index); error = ETIMEDOUT; } reg = rd32(hw, I40E_QRX_ENA(index)); reg &= ~I40E_QRX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QRX_ENA(index), reg); /* Verify the disable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QRX_ENA(index)); if (!(reg & I40E_QRX_ENA_QENA_STAT_MASK)) break; i40e_msec_delay(10); } if (reg & I40E_QRX_ENA_QENA_STAT_MASK) { device_printf(pf->dev, "RX queue %d still enabled!\n", index); error = ETIMEDOUT; } } return (error); } /** * ixl_handle_mdd_event * * Called from interrupt handler to identify possibly malicious vfs * (But also detects events from the PF, as well) **/ static void ixl_handle_mdd_event(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; bool mdd_detected = false; bool pf_mdd_detected = false; u32 reg; /* find what triggered the MDD event */ reg = rd32(hw, I40E_GL_MDET_TX); if (reg & I40E_GL_MDET_TX_VALID_MASK) { u8 pf_num = (reg & I40E_GL_MDET_TX_PF_NUM_MASK) >> I40E_GL_MDET_TX_PF_NUM_SHIFT; u8 event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >> I40E_GL_MDET_TX_EVENT_SHIFT; u8 queue = (reg & I40E_GL_MDET_TX_QUEUE_MASK) >> I40E_GL_MDET_TX_QUEUE_SHIFT; device_printf(dev, "Malicious Driver Detection event 0x%02x" " on TX queue %d pf number 0x%02x\n", event, queue, pf_num); wr32(hw, I40E_GL_MDET_TX, 0xffffffff); mdd_detected = true; } reg = rd32(hw, I40E_GL_MDET_RX); if (reg & I40E_GL_MDET_RX_VALID_MASK) { u8 func = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >> I40E_GL_MDET_RX_FUNCTION_SHIFT; u8 event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >> I40E_GL_MDET_RX_EVENT_SHIFT; u8 queue = (reg & I40E_GL_MDET_RX_QUEUE_MASK) >> I40E_GL_MDET_RX_QUEUE_SHIFT; device_printf(dev, "Malicious Driver Detection event 0x%02x" " on RX queue %d of function 0x%02x\n", event, queue, func); wr32(hw, I40E_GL_MDET_RX, 0xffffffff); mdd_detected = true; } if (mdd_detected) { reg = rd32(hw, I40E_PF_MDET_TX); if (reg & I40E_PF_MDET_TX_VALID_MASK) { wr32(hw, I40E_PF_MDET_TX, 0xFFFF); device_printf(dev, "MDD TX event is for this function 0x%08x", reg); pf_mdd_detected = true; } reg = rd32(hw, I40E_PF_MDET_RX); if (reg & I40E_PF_MDET_RX_VALID_MASK) { wr32(hw, I40E_PF_MDET_RX, 0xFFFF); device_printf(dev, "MDD RX event is for this function 0x%08x", reg); pf_mdd_detected = true; } } /* re-enable mdd interrupt cause */ reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); ixl_flush(hw); } static void ixl_enable_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; if (ixl_enable_msix) { for (int i = 0; i < vsi->num_queues; i++, que++) ixl_enable_queue(hw, que->me); } else ixl_enable_legacy(hw); } static void ixl_disable_rings_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; for (int i = 0; i < vsi->num_queues; i++, que++) ixl_disable_queue(hw, que->me); } static void ixl_disable_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; if (ixl_enable_msix) ixl_disable_adminq(hw); else ixl_disable_legacy(hw); } static void ixl_enable_adminq(struct i40e_hw *hw) { u32 reg; reg = I40E_PFINT_DYN_CTL0_INTENA_MASK | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTL0, reg); ixl_flush(hw); } static void ixl_disable_adminq(struct i40e_hw *hw) { u32 reg; reg = IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT; wr32(hw, I40E_PFINT_DYN_CTL0, reg); ixl_flush(hw); } static void ixl_enable_queue(struct i40e_hw *hw, int id) { u32 reg; reg = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTLN(id), reg); } static void ixl_disable_queue(struct i40e_hw *hw, int id) { u32 reg; reg = IXL_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT; wr32(hw, I40E_PFINT_DYN_CTLN(id), reg); } static void ixl_enable_legacy(struct i40e_hw *hw) { u32 reg; reg = I40E_PFINT_DYN_CTL0_INTENA_MASK | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTL0, reg); } static void ixl_disable_legacy(struct i40e_hw *hw) { u32 reg; reg = IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT; wr32(hw, I40E_PFINT_DYN_CTL0, reg); } static void ixl_update_stats_counters(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_vf *vf; struct i40e_hw_port_stats *nsd = &pf->stats; struct i40e_hw_port_stats *osd = &pf->stats_offsets; /* Update hw stats */ ixl_stat_update32(hw, I40E_GLPRT_CRCERRS(hw->port), pf->stat_offsets_loaded, &osd->crc_errors, &nsd->crc_errors); ixl_stat_update32(hw, I40E_GLPRT_ILLERRC(hw->port), pf->stat_offsets_loaded, &osd->illegal_bytes, &nsd->illegal_bytes); ixl_stat_update48(hw, I40E_GLPRT_GORCH(hw->port), I40E_GLPRT_GORCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_bytes, &nsd->eth.rx_bytes); ixl_stat_update48(hw, I40E_GLPRT_GOTCH(hw->port), I40E_GLPRT_GOTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_bytes, &nsd->eth.tx_bytes); ixl_stat_update32(hw, I40E_GLPRT_RDPC(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_discards, &nsd->eth.rx_discards); ixl_stat_update48(hw, I40E_GLPRT_UPRCH(hw->port), I40E_GLPRT_UPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_unicast, &nsd->eth.rx_unicast); ixl_stat_update48(hw, I40E_GLPRT_UPTCH(hw->port), I40E_GLPRT_UPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_unicast, &nsd->eth.tx_unicast); ixl_stat_update48(hw, I40E_GLPRT_MPRCH(hw->port), I40E_GLPRT_MPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_multicast, &nsd->eth.rx_multicast); ixl_stat_update48(hw, I40E_GLPRT_MPTCH(hw->port), I40E_GLPRT_MPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_multicast, &nsd->eth.tx_multicast); ixl_stat_update48(hw, I40E_GLPRT_BPRCH(hw->port), I40E_GLPRT_BPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_broadcast, &nsd->eth.rx_broadcast); ixl_stat_update48(hw, I40E_GLPRT_BPTCH(hw->port), I40E_GLPRT_BPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_broadcast, &nsd->eth.tx_broadcast); ixl_stat_update32(hw, I40E_GLPRT_TDOLD(hw->port), pf->stat_offsets_loaded, &osd->tx_dropped_link_down, &nsd->tx_dropped_link_down); ixl_stat_update32(hw, I40E_GLPRT_MLFC(hw->port), pf->stat_offsets_loaded, &osd->mac_local_faults, &nsd->mac_local_faults); ixl_stat_update32(hw, I40E_GLPRT_MRFC(hw->port), pf->stat_offsets_loaded, &osd->mac_remote_faults, &nsd->mac_remote_faults); ixl_stat_update32(hw, I40E_GLPRT_RLEC(hw->port), pf->stat_offsets_loaded, &osd->rx_length_errors, &nsd->rx_length_errors); /* Flow control (LFC) stats */ ixl_stat_update32(hw, I40E_GLPRT_LXONRXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_rx, &nsd->link_xon_rx); ixl_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_tx, &nsd->link_xon_tx); ixl_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_rx, &nsd->link_xoff_rx); ixl_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_tx, &nsd->link_xoff_tx); /* Packet size stats rx */ ixl_stat_update48(hw, I40E_GLPRT_PRC64H(hw->port), I40E_GLPRT_PRC64L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_64, &nsd->rx_size_64); ixl_stat_update48(hw, I40E_GLPRT_PRC127H(hw->port), I40E_GLPRT_PRC127L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_127, &nsd->rx_size_127); ixl_stat_update48(hw, I40E_GLPRT_PRC255H(hw->port), I40E_GLPRT_PRC255L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_255, &nsd->rx_size_255); ixl_stat_update48(hw, I40E_GLPRT_PRC511H(hw->port), I40E_GLPRT_PRC511L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_511, &nsd->rx_size_511); ixl_stat_update48(hw, I40E_GLPRT_PRC1023H(hw->port), I40E_GLPRT_PRC1023L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_1023, &nsd->rx_size_1023); ixl_stat_update48(hw, I40E_GLPRT_PRC1522H(hw->port), I40E_GLPRT_PRC1522L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_1522, &nsd->rx_size_1522); ixl_stat_update48(hw, I40E_GLPRT_PRC9522H(hw->port), I40E_GLPRT_PRC9522L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_big, &nsd->rx_size_big); /* Packet size stats tx */ ixl_stat_update48(hw, I40E_GLPRT_PTC64H(hw->port), I40E_GLPRT_PTC64L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_64, &nsd->tx_size_64); ixl_stat_update48(hw, I40E_GLPRT_PTC127H(hw->port), I40E_GLPRT_PTC127L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_127, &nsd->tx_size_127); ixl_stat_update48(hw, I40E_GLPRT_PTC255H(hw->port), I40E_GLPRT_PTC255L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_255, &nsd->tx_size_255); ixl_stat_update48(hw, I40E_GLPRT_PTC511H(hw->port), I40E_GLPRT_PTC511L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_511, &nsd->tx_size_511); ixl_stat_update48(hw, I40E_GLPRT_PTC1023H(hw->port), I40E_GLPRT_PTC1023L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_1023, &nsd->tx_size_1023); ixl_stat_update48(hw, I40E_GLPRT_PTC1522H(hw->port), I40E_GLPRT_PTC1522L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_1522, &nsd->tx_size_1522); ixl_stat_update48(hw, I40E_GLPRT_PTC9522H(hw->port), I40E_GLPRT_PTC9522L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_big, &nsd->tx_size_big); ixl_stat_update32(hw, I40E_GLPRT_RUC(hw->port), pf->stat_offsets_loaded, &osd->rx_undersize, &nsd->rx_undersize); ixl_stat_update32(hw, I40E_GLPRT_RFC(hw->port), pf->stat_offsets_loaded, &osd->rx_fragments, &nsd->rx_fragments); ixl_stat_update32(hw, I40E_GLPRT_ROC(hw->port), pf->stat_offsets_loaded, &osd->rx_oversize, &nsd->rx_oversize); ixl_stat_update32(hw, I40E_GLPRT_RJC(hw->port), pf->stat_offsets_loaded, &osd->rx_jabber, &nsd->rx_jabber); pf->stat_offsets_loaded = true; /* End hw stats */ /* Update vsi stats */ ixl_update_vsi_stats(vsi); for (int i = 0; i < pf->num_vfs; i++) { vf = &pf->vfs[i]; if (vf->vf_flags & VF_FLAG_ENABLED) ixl_update_eth_stats(&pf->vfs[i].vsi); } } static int ixl_rebuild_hw_structs_after_reset(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = pf->dev; bool is_up = false; int error = 0; is_up = !!(vsi->ifp->if_drv_flags & IFF_DRV_RUNNING); /* Teardown */ if (is_up) ixl_stop(pf); error = i40e_shutdown_lan_hmc(hw); if (error) device_printf(dev, "Shutdown LAN HMC failed with code %d\n", error); ixl_disable_adminq(hw); ixl_teardown_adminq_msix(pf); error = i40e_shutdown_adminq(hw); if (error) device_printf(dev, "Shutdown Admin queue failed with code %d\n", error); /* Setup */ error = i40e_init_adminq(hw); if (error != 0 && error != I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "Unable to initialize Admin Queue, error %d\n", error); } error = ixl_setup_adminq_msix(pf); if (error) { device_printf(dev, "ixl_setup_adminq_msix error: %d\n", error); } ixl_configure_intr0_msix(pf); ixl_enable_adminq(hw); error = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (error) { device_printf(dev, "init_lan_hmc failed: %d\n", error); } error = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (error) { device_printf(dev, "configure_lan_hmc failed: %d\n", error); } if (is_up) ixl_init(pf); return (0); } static void ixl_handle_empr_reset(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int count = 0; u32 reg; /* Typically finishes within 3-4 seconds */ while (count++ < 100) { reg = rd32(hw, I40E_GLGEN_RSTAT) & I40E_GLGEN_RSTAT_DEVSTATE_MASK; if (reg) i40e_msec_delay(100); else break; } #ifdef IXL_DEBUG // Reset-related device_printf(dev, "EMPR reset wait count: %d\n", count); #endif device_printf(dev, "Rebuilding driver state...\n"); ixl_rebuild_hw_structs_after_reset(pf); device_printf(dev, "Rebuilding driver state done.\n"); atomic_clear_int(&pf->state, IXL_PF_STATE_EMPR_RESETTING); } /* ** Tasklet handler for MSIX Adminq interrupts ** - do outside interrupt since it might sleep */ static void ixl_do_adminq(void *context, int pending) { struct ixl_pf *pf = context; struct i40e_hw *hw = &pf->hw; struct i40e_arq_event_info event; i40e_status ret; device_t dev = pf->dev; u32 loop = 0; u16 opcode, result; if (pf->state & IXL_PF_STATE_EMPR_RESETTING) { /* Flag cleared at end of this function */ ixl_handle_empr_reset(pf); return; } /* Admin Queue handling */ event.buf_len = IXL_AQ_BUF_SZ; event.msg_buf = malloc(event.buf_len, M_DEVBUF, M_NOWAIT | M_ZERO); if (!event.msg_buf) { device_printf(dev, "%s: Unable to allocate memory for Admin" " Queue event!\n", __func__); return; } IXL_PF_LOCK(pf); /* clean and process any events */ do { ret = i40e_clean_arq_element(hw, &event, &result); if (ret) break; opcode = LE16_TO_CPU(event.desc.opcode); #ifdef IXL_DEBUG device_printf(dev, "%s: Admin Queue event: %#06x\n", __func__, opcode); #endif switch (opcode) { case i40e_aqc_opc_get_link_status: ixl_link_event(pf, &event); break; case i40e_aqc_opc_send_msg_to_pf: #ifdef PCI_IOV ixl_handle_vf_msg(pf, &event); #endif break; case i40e_aqc_opc_event_lan_overflow: default: break; } } while (result && (loop++ < IXL_ADM_LIMIT)); free(event.msg_buf, M_DEVBUF); /* * If there are still messages to process, reschedule ourselves. * Otherwise, re-enable our interrupt and go to sleep. */ if (result > 0) taskqueue_enqueue(pf->tq, &pf->adminq); else ixl_enable_adminq(hw); IXL_PF_UNLOCK(pf); } /** * Update VSI-specific ethernet statistics counters. **/ void ixl_update_eth_stats(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_eth_stats *es; struct i40e_eth_stats *oes; struct i40e_hw_port_stats *nsd; u16 stat_idx = vsi->info.stat_counter_idx; es = &vsi->eth_stats; oes = &vsi->eth_stats_offsets; nsd = &pf->stats; /* Gather up the stats that the hw collects */ ixl_stat_update32(hw, I40E_GLV_TEPC(stat_idx), vsi->stat_offsets_loaded, &oes->tx_errors, &es->tx_errors); ixl_stat_update32(hw, I40E_GLV_RDPC(stat_idx), vsi->stat_offsets_loaded, &oes->rx_discards, &es->rx_discards); ixl_stat_update48(hw, I40E_GLV_GORCH(stat_idx), I40E_GLV_GORCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_bytes, &es->rx_bytes); ixl_stat_update48(hw, I40E_GLV_UPRCH(stat_idx), I40E_GLV_UPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_unicast, &es->rx_unicast); ixl_stat_update48(hw, I40E_GLV_MPRCH(stat_idx), I40E_GLV_MPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_multicast, &es->rx_multicast); ixl_stat_update48(hw, I40E_GLV_BPRCH(stat_idx), I40E_GLV_BPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_broadcast, &es->rx_broadcast); ixl_stat_update48(hw, I40E_GLV_GOTCH(stat_idx), I40E_GLV_GOTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_bytes, &es->tx_bytes); ixl_stat_update48(hw, I40E_GLV_UPTCH(stat_idx), I40E_GLV_UPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_unicast, &es->tx_unicast); ixl_stat_update48(hw, I40E_GLV_MPTCH(stat_idx), I40E_GLV_MPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_multicast, &es->tx_multicast); ixl_stat_update48(hw, I40E_GLV_BPTCH(stat_idx), I40E_GLV_BPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_broadcast, &es->tx_broadcast); vsi->stat_offsets_loaded = true; } static void ixl_update_vsi_stats(struct ixl_vsi *vsi) { struct ixl_pf *pf; struct ifnet *ifp; struct i40e_eth_stats *es; u64 tx_discards; struct i40e_hw_port_stats *nsd; pf = vsi->back; ifp = vsi->ifp; es = &vsi->eth_stats; nsd = &pf->stats; ixl_update_eth_stats(vsi); tx_discards = es->tx_discards + nsd->tx_dropped_link_down; for (int i = 0; i < vsi->num_queues; i++) tx_discards += vsi->queues[i].txr.br->br_drops; /* Update ifnet stats */ IXL_SET_IPACKETS(vsi, es->rx_unicast + es->rx_multicast + es->rx_broadcast); IXL_SET_OPACKETS(vsi, es->tx_unicast + es->tx_multicast + es->tx_broadcast); IXL_SET_IBYTES(vsi, es->rx_bytes); IXL_SET_OBYTES(vsi, es->tx_bytes); IXL_SET_IMCASTS(vsi, es->rx_multicast); IXL_SET_OMCASTS(vsi, es->tx_multicast); IXL_SET_IERRORS(vsi, nsd->crc_errors + nsd->illegal_bytes + nsd->rx_undersize + nsd->rx_oversize + nsd->rx_fragments + nsd->rx_jabber); IXL_SET_OERRORS(vsi, es->tx_errors); IXL_SET_IQDROPS(vsi, es->rx_discards + nsd->eth.rx_discards); IXL_SET_OQDROPS(vsi, tx_discards); IXL_SET_NOPROTO(vsi, es->rx_unknown_protocol); IXL_SET_COLLISIONS(vsi, 0); } /** * Reset all of the stats for the given pf **/ void ixl_pf_reset_stats(struct ixl_pf *pf) { bzero(&pf->stats, sizeof(struct i40e_hw_port_stats)); bzero(&pf->stats_offsets, sizeof(struct i40e_hw_port_stats)); pf->stat_offsets_loaded = false; } /** * Resets all stats of the given vsi **/ void ixl_vsi_reset_stats(struct ixl_vsi *vsi) { bzero(&vsi->eth_stats, sizeof(struct i40e_eth_stats)); bzero(&vsi->eth_stats_offsets, sizeof(struct i40e_eth_stats)); vsi->stat_offsets_loaded = false; } /** * Read and update a 48 bit stat from the hw * * Since the device stats are not reset at PFReset, they likely will not * be zeroed when the driver starts. We'll save the first values read * and use them as offsets to be subtracted from the raw values in order * to report stats that count from zero. **/ static void ixl_stat_update48(struct i40e_hw *hw, u32 hireg, u32 loreg, bool offset_loaded, u64 *offset, u64 *stat) { u64 new_data; #if defined(__FreeBSD__) && (__FreeBSD_version >= 1000000) && defined(__amd64__) new_data = rd64(hw, loreg); #else /* * Use two rd32's instead of one rd64; FreeBSD versions before * 10 don't support 8 byte bus reads/writes. */ new_data = rd32(hw, loreg); new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32; #endif if (!offset_loaded) *offset = new_data; if (new_data >= *offset) *stat = new_data - *offset; else *stat = (new_data + ((u64)1 << 48)) - *offset; *stat &= 0xFFFFFFFFFFFFULL; } /** * Read and update a 32 bit stat from the hw **/ static void ixl_stat_update32(struct i40e_hw *hw, u32 reg, bool offset_loaded, u64 *offset, u64 *stat) { u32 new_data; new_data = rd32(hw, reg); if (!offset_loaded) *offset = new_data; if (new_data >= *offset) *stat = (u32)(new_data - *offset); else *stat = (u32)((new_data + ((u64)1 << 32)) - *offset); } static void ixl_add_device_sysctls(struct ixl_pf *pf) { device_t dev = pf->dev; /* Set up sysctls */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_set_flowcntl, "I", IXL_SYSCTL_HELP_FC); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_set_advertise, "I", IXL_SYSCTL_HELP_SET_ADVERTISE); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_current_speed, "A", "Current Port Speed"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_show_fw, "A", "Firmware version"); #if 0 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "rx_itr", CTLFLAG_RW, &ixl_rx_itr, 0, "RX ITR"); SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "dynamic_rx_itr", CTLFLAG_RW, &ixl_dynamic_rx_itr, 0, "Dynamic RX ITR"); SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "tx_itr", CTLFLAG_RW, &ixl_tx_itr, 0, "TX ITR"); SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "dynamic_tx_itr", CTLFLAG_RW, &ixl_dynamic_tx_itr, 0, "Dynamic TX ITR"); #endif #ifdef IXL_DEBUG_SYSCTL SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, pf, 0, ixl_debug_info, "I", "Debug Information"); /* Shared-code debug message level */ SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug_mask", CTLFLAG_RW, &pf->hw.debug_mask, 0, "Debug Message Level"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "link_status", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_link_status, "A", IXL_SYSCTL_HELP_LINK_STATUS); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "phy_abilities", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_phy_abilities, "A", "PHY Abilities"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "filter_list", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_sw_filter_list, "A", "SW Filter List"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "hw_res_alloc", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_hw_res_alloc, "A", "HW Resource Allocation"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "switch_config", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_switch_config, "A", "HW Switch Configuration"); #ifdef PCI_IOV SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "vc_debug_level", CTLFLAG_RW, &pf->vc_debug_lvl, 0, "PF/VF Virtual Channel debug level"); #endif #endif } /* ** Set flow control using sysctl: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ static int ixl_set_flowcntl(SYSCTL_HANDLER_ARGS) { /* * TODO: ensure tx CRC by hardware should be enabled * if tx flow control is enabled. * ^ N/A for 40G ports */ struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int requested_fc, error = 0; enum i40e_status_code aq_error = 0; u8 fc_aq_err = 0; /* Get request */ requested_fc = pf->fc; error = sysctl_handle_int(oidp, &requested_fc, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (requested_fc < 0 || requested_fc > 3) { device_printf(dev, "Invalid fc mode; valid modes are 0 through 3\n"); return (EINVAL); } /* Set fc ability for port */ hw->fc.requested_mode = requested_fc; aq_error = i40e_set_fc(hw, &fc_aq_err, TRUE); if (aq_error) { device_printf(dev, "%s: Error setting new fc mode %d; fc_err %#x\n", __func__, aq_error, fc_aq_err); return (EIO); } pf->fc = requested_fc; /* Get new link state */ i40e_msec_delay(250); hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); return (0); } static int ixl_current_speed(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; int error = 0, index = 0; char *speeds[] = { "Unknown", "100M", "1G", "10G", "40G", "20G" }; ixl_update_link_status(pf); switch (hw->phy.link_info.link_speed) { case I40E_LINK_SPEED_100MB: index = 1; break; case I40E_LINK_SPEED_1GB: index = 2; break; case I40E_LINK_SPEED_10GB: index = 3; break; case I40E_LINK_SPEED_40GB: index = 4; break; case I40E_LINK_SPEED_20GB: index = 5; break; case I40E_LINK_SPEED_UNKNOWN: default: index = 0; break; } error = sysctl_handle_string(oidp, speeds[index], strlen(speeds[index]), req); return (error); } static int ixl_set_advertised_speeds(struct ixl_pf *pf, int speeds) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct i40e_aq_get_phy_abilities_resp abilities; struct i40e_aq_set_phy_config config; enum i40e_status_code aq_error = 0; /* Get current capability information */ aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, FALSE, &abilities, NULL); if (aq_error) { device_printf(dev, "%s: Error getting phy capabilities %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return (EAGAIN); } /* Prepare new config */ bzero(&config, sizeof(config)); config.phy_type = abilities.phy_type; config.abilities = abilities.abilities | I40E_AQ_PHY_ENABLE_ATOMIC_LINK; config.eee_capability = abilities.eee_capability; config.eeer = abilities.eeer_val; config.low_power_ctrl = abilities.d3_lpan; /* Translate into aq cmd link_speed */ if (speeds & 0x10) config.link_speed |= I40E_LINK_SPEED_40GB; if (speeds & 0x8) config.link_speed |= I40E_LINK_SPEED_20GB; if (speeds & 0x4) config.link_speed |= I40E_LINK_SPEED_10GB; if (speeds & 0x2) config.link_speed |= I40E_LINK_SPEED_1GB; if (speeds & 0x1) config.link_speed |= I40E_LINK_SPEED_100MB; /* Do aq command & restart link */ aq_error = i40e_aq_set_phy_config(hw, &config, NULL); if (aq_error) { device_printf(dev, "%s: Error setting new phy config %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return (EAGAIN); } /* ** This seems a bit heavy handed, but we ** need to get a reinit on some devices */ IXL_PF_LOCK(pf); ixl_stop_locked(pf); ixl_init_locked(pf); IXL_PF_UNLOCK(pf); return (0); } /* ** Control link advertise speed: ** Flags: ** 0x1 - advertise 100 Mb ** 0x2 - advertise 1G ** 0x4 - advertise 10G ** 0x8 - advertise 20G ** 0x10 - advertise 40G ** ** Set to 0 to disable link */ static int ixl_set_advertise(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int requested_ls = 0; int error = 0; /* Read in new mode */ requested_ls = pf->advertised_speed; error = sysctl_handle_int(oidp, &requested_ls, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Check for sane value */ if (requested_ls > 0x10) { device_printf(dev, "Invalid advertised speed; " "valid modes are 0x1 through 0x10\n"); return (EINVAL); } /* Then check for validity based on adapter type */ switch (hw->device_id) { case I40E_DEV_ID_10G_BASE_T: case I40E_DEV_ID_10G_BASE_T4: /* BaseT */ if (requested_ls & ~(0x7)) { device_printf(dev, "Only 100M/1G/10G speeds supported on this device.\n"); return (EINVAL); } break; case I40E_DEV_ID_20G_KR2: case I40E_DEV_ID_20G_KR2_A: /* 20G */ if (requested_ls & ~(0xE)) { device_printf(dev, "Only 1G/10G/20G speeds supported on this device.\n"); return (EINVAL); } break; case I40E_DEV_ID_KX_B: case I40E_DEV_ID_QSFP_A: case I40E_DEV_ID_QSFP_B: /* 40G */ if (requested_ls & ~(0x10)) { device_printf(dev, "Only 40G speeds supported on this device.\n"); return (EINVAL); } break; default: /* 10G (1G) */ if (requested_ls & ~(0x6)) { device_printf(dev, "Only 1/10Gbs speeds are supported on this device.\n"); return (EINVAL); } break; } /* Exit if no change */ if (pf->advertised_speed == requested_ls) return (0); error = ixl_set_advertised_speeds(pf, requested_ls); if (error) return (error); pf->advertised_speed = requested_ls; ixl_update_link_status(pf); return (0); } /* ** Get the width and transaction speed of ** the bus this adapter is plugged into. */ static u16 ixl_get_bus_info(struct i40e_hw *hw, device_t dev) { u16 link; u32 offset; /* Get the PCI Express Capabilities offset */ pci_find_cap(dev, PCIY_EXPRESS, &offset); /* ...and read the Link Status Register */ link = pci_read_config(dev, offset + PCIER_LINK_STA, 2); switch (link & I40E_PCI_LINK_WIDTH) { case I40E_PCI_LINK_WIDTH_1: hw->bus.width = i40e_bus_width_pcie_x1; break; case I40E_PCI_LINK_WIDTH_2: hw->bus.width = i40e_bus_width_pcie_x2; break; case I40E_PCI_LINK_WIDTH_4: hw->bus.width = i40e_bus_width_pcie_x4; break; case I40E_PCI_LINK_WIDTH_8: hw->bus.width = i40e_bus_width_pcie_x8; break; default: hw->bus.width = i40e_bus_width_unknown; break; } switch (link & I40E_PCI_LINK_SPEED) { case I40E_PCI_LINK_SPEED_2500: hw->bus.speed = i40e_bus_speed_2500; break; case I40E_PCI_LINK_SPEED_5000: hw->bus.speed = i40e_bus_speed_5000; break; case I40E_PCI_LINK_SPEED_8000: hw->bus.speed = i40e_bus_speed_8000; break; default: hw->bus.speed = i40e_bus_speed_unknown; break; } device_printf(dev,"PCI Express Bus: Speed %s %s\n", ((hw->bus.speed == i40e_bus_speed_8000) ? "8.0GT/s": (hw->bus.speed == i40e_bus_speed_5000) ? "5.0GT/s": (hw->bus.speed == i40e_bus_speed_2500) ? "2.5GT/s":"Unknown"), (hw->bus.width == i40e_bus_width_pcie_x8) ? "Width x8" : (hw->bus.width == i40e_bus_width_pcie_x4) ? "Width x4" : (hw->bus.width == i40e_bus_width_pcie_x1) ? "Width x1" : ("Unknown")); if ((hw->bus.width <= i40e_bus_width_pcie_x8) && (hw->bus.speed < i40e_bus_speed_8000)) { device_printf(dev, "PCI-Express bandwidth available" " for this device\n may be insufficient for" " optimal performance.\n"); device_printf(dev, "For expected performance a x8 " "PCIE Gen3 slot is required.\n"); } return (link); } static int ixl_sysctl_show_fw(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; struct sbuf *sbuf; sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); ixl_nvm_version_str(hw, sbuf); sbuf_finish(sbuf); sbuf_delete(sbuf); return 0; } #ifdef IXL_DEBUG static void ixl_print_nvm_cmd(device_t dev, struct i40e_nvm_access *nvma) { if ((nvma->command == I40E_NVM_READ) && ((nvma->config & 0xFF) == 0xF) && (((nvma->config & 0xF00) >> 8) == 0xF) && (nvma->offset == 0) && (nvma->data_size == 1)) { // device_printf(dev, "- Get Driver Status Command\n"); } else if (nvma->command == I40E_NVM_READ) { } else { switch (nvma->command) { case 0xB: device_printf(dev, "- command: I40E_NVM_READ\n"); break; case 0xC: device_printf(dev, "- command: I40E_NVM_WRITE\n"); break; default: device_printf(dev, "- command: unknown 0x%08x\n", nvma->command); break; } device_printf(dev, "- config (ptr) : 0x%02x\n", nvma->config & 0xFF); device_printf(dev, "- config (flags): 0x%01x\n", (nvma->config & 0xF00) >> 8); device_printf(dev, "- offset : 0x%08x\n", nvma->offset); device_printf(dev, "- data_s : 0x%08x\n", nvma->data_size); } } #endif static int ixl_handle_nvmupd_cmd(struct ixl_pf *pf, struct ifdrv *ifd) { struct i40e_hw *hw = &pf->hw; struct i40e_nvm_access *nvma; device_t dev = pf->dev; enum i40e_status_code status = 0; int perrno; DEBUGFUNC("ixl_handle_nvmupd_cmd"); /* Sanity checks */ if (ifd->ifd_len < sizeof(struct i40e_nvm_access) || ifd->ifd_data == NULL) { device_printf(dev, "%s: incorrect ifdrv length or data pointer\n", __func__); device_printf(dev, "%s: ifdrv length: %lu, sizeof(struct i40e_nvm_access): %lu\n", __func__, ifd->ifd_len, sizeof(struct i40e_nvm_access)); device_printf(dev, "%s: data pointer: %p\n", __func__, ifd->ifd_data); return (EINVAL); } nvma = (struct i40e_nvm_access *)ifd->ifd_data; #ifdef IXL_DEBUG ixl_print_nvm_cmd(dev, nvma); #endif if (pf->state & IXL_PF_STATE_EMPR_RESETTING) { int count = 0; while (count++ < 100) { i40e_msec_delay(100); if (!(pf->state & IXL_PF_STATE_EMPR_RESETTING)) break; } } if (!(pf->state & IXL_PF_STATE_EMPR_RESETTING)) { IXL_PF_LOCK(pf); status = i40e_nvmupd_command(hw, nvma, nvma->data, &perrno); IXL_PF_UNLOCK(pf); } else { perrno = -EBUSY; } if (status) device_printf(dev, "i40e_nvmupd_command status %d, perrno %d\n", status, perrno); /* * -EPERM is actually ERESTART, which the kernel interprets as it needing * to run this ioctl again. So use -EACCES for -EPERM instead. */ if (perrno == -EPERM) return (-EACCES); else return (perrno); } #ifdef IXL_DEBUG_SYSCTL static int ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; struct i40e_link_status link_status; char buf[512]; enum i40e_status_code aq_error = 0; aq_error = i40e_aq_get_link_info(hw, TRUE, &link_status, NULL); if (aq_error) { printf("i40e_aq_get_link_info() error %d\n", aq_error); return (EPERM); } sprintf(buf, "\n" "PHY Type : %#04x\n" "Speed : %#04x\n" "Link info: %#04x\n" "AN info : %#04x\n" "Ext info : %#04x\n" "Max Frame: %d\n" "Pacing : %#04x\n" "CRC En? : %d", link_status.phy_type, link_status.link_speed, link_status.link_info, link_status.an_info, link_status.ext_info, link_status.max_frame_size, link_status.pacing, link_status.crc_enable); return (sysctl_handle_string(oidp, buf, strlen(buf), req)); } static int ixl_sysctl_phy_abilities(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; char buf[512]; enum i40e_status_code aq_error = 0; struct i40e_aq_get_phy_abilities_resp abilities; aq_error = i40e_aq_get_phy_capabilities(hw, TRUE, FALSE, &abilities, NULL); if (aq_error) { printf("i40e_aq_get_phy_capabilities() error %d\n", aq_error); return (EPERM); } sprintf(buf, "\n" "PHY Type : %#010x\n" "Speed : %#04x\n" "Abilities: %#04x\n" "EEE cap : %#06x\n" "EEER reg : %#010x\n" "D3 Lpan : %#04x", abilities.phy_type, abilities.link_speed, abilities.abilities, abilities.eee_capability, abilities.eeer_val, abilities.d3_lpan); return (sysctl_handle_string(oidp, buf, strlen(buf), req)); } static int ixl_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct ixl_vsi *vsi = &pf->vsi; struct ixl_mac_filter *f; char *buf, *buf_i; int error = 0; int ftl_len = 0; int ftl_counter = 0; int buf_len = 0; int entry_len = 42; SLIST_FOREACH(f, &vsi->ftl, next) { ftl_len++; } if (ftl_len < 1) { sysctl_handle_string(oidp, "(none)", 6, req); return (0); } buf_len = sizeof(char) * (entry_len + 1) * ftl_len + 2; buf = buf_i = malloc(buf_len, M_DEVBUF, M_NOWAIT); sprintf(buf_i++, "\n"); SLIST_FOREACH(f, &vsi->ftl, next) { sprintf(buf_i, MAC_FORMAT ", vlan %4d, flags %#06x", MAC_FORMAT_ARGS(f->macaddr), f->vlan, f->flags); buf_i += entry_len; /* don't print '\n' for last entry */ if (++ftl_counter != ftl_len) { sprintf(buf_i, "\n"); buf_i++; } } error = sysctl_handle_string(oidp, buf, strlen(buf), req); if (error) printf("sysctl error: %d\n", error); free(buf, M_DEVBUF); return error; } #define IXL_SW_RES_SIZE 0x14 static int ixl_res_alloc_cmp(const void *a, const void *b) { const struct i40e_aqc_switch_resource_alloc_element_resp *one, *two; one = (const struct i40e_aqc_switch_resource_alloc_element_resp *)a; two = (const struct i40e_aqc_switch_resource_alloc_element_resp *)b; return ((int)one->resource_type - (int)two->resource_type); } /* * Longest string length: 25 */ static char * ixl_switch_res_type_string(u8 type) { static char * ixl_switch_res_type_strings[0x14] = { "VEB", "VSI", "Perfect Match MAC address", "S-tag", "(Reserved)", "Multicast hash entry", "Unicast hash entry", "VLAN", "VSI List entry", "(Reserved)", "VLAN Statistic Pool", "Mirror Rule", "Queue Set", "Inner VLAN Forward filter", "(Reserved)", "Inner MAC", "IP", "GRE/VN1 Key", "VN2 Key", "Tunneling Port" }; if (type < 0x14) return ixl_switch_res_type_strings[type]; else return "(Reserved)"; } static int ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; int error = 0; u8 num_entries; struct i40e_aqc_switch_resource_alloc_element_resp resp[IXL_SW_RES_SIZE]; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } bzero(resp, sizeof(resp)); error = i40e_aq_get_switch_resource_alloc(hw, &num_entries, resp, IXL_SW_RES_SIZE, NULL); if (error) { device_printf(dev, "%s: get_switch_resource_alloc() error %d, aq error %d\n", __func__, error, hw->aq.asq_last_status); sbuf_delete(buf); return error; } /* Sort entries by type for display */ qsort(resp, num_entries, sizeof(struct i40e_aqc_switch_resource_alloc_element_resp), &ixl_res_alloc_cmp); sbuf_cat(buf, "\n"); sbuf_printf(buf, "# of entries: %d\n", num_entries); sbuf_printf(buf, " Type | Guaranteed | Total | Used | Un-allocated\n" " | (this) | (all) | (this) | (all) \n"); for (int i = 0; i < num_entries; i++) { sbuf_printf(buf, "%25s | %10d %5d %6d %12d", ixl_switch_res_type_string(resp[i].resource_type), resp[i].guaranteed, resp[i].total, resp[i].used, resp[i].total_unalloced); if (i < num_entries - 1) sbuf_cat(buf, "\n"); } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return error; } /* ** Caller must init and delete sbuf; this function will clear and ** finish it for caller. ** ** XXX: Cannot use the SEID for this, since there is no longer a ** fixed mapping between SEID and element type. */ static char * ixl_switch_element_string(struct sbuf *s, struct i40e_aqc_switch_config_element_resp *element) { sbuf_clear(s); switch (element->element_type) { case I40E_AQ_SW_ELEM_TYPE_MAC: sbuf_printf(s, "MAC %3d", element->element_info); break; case I40E_AQ_SW_ELEM_TYPE_PF: sbuf_printf(s, "PF %3d", element->element_info); break; case I40E_AQ_SW_ELEM_TYPE_VF: sbuf_printf(s, "VF %3d", element->element_info); break; case I40E_AQ_SW_ELEM_TYPE_EMP: sbuf_cat(s, "EMP"); break; case I40E_AQ_SW_ELEM_TYPE_BMC: sbuf_cat(s, "BMC"); break; case I40E_AQ_SW_ELEM_TYPE_PV: sbuf_cat(s, "PV"); break; case I40E_AQ_SW_ELEM_TYPE_VEB: sbuf_cat(s, "VEB"); break; case I40E_AQ_SW_ELEM_TYPE_PA: sbuf_cat(s, "PA"); break; case I40E_AQ_SW_ELEM_TYPE_VSI: sbuf_printf(s, "VSI %3d", element->element_info); break; default: sbuf_cat(s, "?"); break; } sbuf_finish(s); return sbuf_data(s); } static int ixl_sysctl_switch_config(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; struct sbuf *nmbuf; int error = 0; u16 next = 0; u8 aq_buf[I40E_AQ_LARGE_BUF]; struct i40e_aqc_get_switch_config_resp *sw_config; sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } error = i40e_aq_get_switch_config(hw, sw_config, sizeof(aq_buf), &next, NULL); if (error) { device_printf(dev, "%s: aq_get_switch_config() error %d, aq error %d\n", __func__, error, hw->aq.asq_last_status); sbuf_delete(buf); return error; } if (next) device_printf(dev, "%s: TODO: get more config with SEID %d\n", __func__, next); nmbuf = sbuf_new_auto(); if (!nmbuf) { device_printf(dev, "Could not allocate sbuf for name output.\n"); sbuf_delete(buf); return (ENOMEM); } sbuf_cat(buf, "\n"); // Assuming <= 255 elements in switch sbuf_printf(buf, "# of reported elements: %d\n", sw_config->header.num_reported); sbuf_printf(buf, "total # of elements: %d\n", sw_config->header.num_total); /* Exclude: ** Revision -- all elements are revision 1 for now */ sbuf_printf(buf, "SEID ( Name ) | Uplink | Downlink | Conn Type\n" " | | | (uplink)\n"); for (int i = 0; i < sw_config->header.num_reported; i++) { // "%4d (%8s) | %8s %8s %#8x", sbuf_printf(buf, "%4d", sw_config->element[i].seid); sbuf_cat(buf, " "); sbuf_printf(buf, "(%8s)", ixl_switch_element_string(nmbuf, &sw_config->element[i])); sbuf_cat(buf, " | "); sbuf_printf(buf, "%8d", sw_config->element[i].uplink_seid); sbuf_cat(buf, " "); sbuf_printf(buf, "%8d", sw_config->element[i].downlink_seid); sbuf_cat(buf, " "); sbuf_printf(buf, "%#8x", sw_config->element[i].connection_type); if (i < sw_config->header.num_reported - 1) sbuf_cat(buf, "\n"); } sbuf_delete(nmbuf); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_debug_info(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf; int error, input = 0; error = sysctl_handle_int(oidp, &input, 0, req); if (error || !req->newptr) return (error); if (input == 1) { pf = (struct ixl_pf *)arg1; ixl_print_debug_info(pf); } return (error); } static void ixl_print_debug_info(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; struct rx_ring *rxr = &que->rxr; struct tx_ring *txr = &que->txr; u32 reg; printf("Queue irqs = %jx\n", (uintmax_t)que->irqs); printf("AdminQ irqs = %jx\n", (uintmax_t)pf->admin_irq); printf("RX next check = %x\n", rxr->next_check); printf("RX not ready = %jx\n", (uintmax_t)rxr->not_done); printf("RX packets = %jx\n", (uintmax_t)rxr->rx_packets); printf("TX desc avail = %x\n", txr->avail); reg = rd32(hw, I40E_GLV_GORCL(0xc)); printf("RX Bytes = %x\n", reg); reg = rd32(hw, I40E_GLPRT_GORCL(hw->port)); printf("Port RX Bytes = %x\n", reg); reg = rd32(hw, I40E_GLV_RDPC(0xc)); printf("RX discard = %x\n", reg); reg = rd32(hw, I40E_GLPRT_RDPC(hw->port)); printf("Port RX discard = %x\n", reg); reg = rd32(hw, I40E_GLV_TEPC(0xc)); printf("TX errors = %x\n", reg); reg = rd32(hw, I40E_GLV_GOTCL(0xc)); printf("TX Bytes = %x\n", reg); reg = rd32(hw, I40E_GLPRT_RUC(hw->port)); printf("RX undersize = %x\n", reg); reg = rd32(hw, I40E_GLPRT_RFC(hw->port)); printf("RX fragments = %x\n", reg); reg = rd32(hw, I40E_GLPRT_ROC(hw->port)); printf("RX oversize = %x\n", reg); reg = rd32(hw, I40E_GLPRT_RLEC(hw->port)); printf("RX length error = %x\n", reg); reg = rd32(hw, I40E_GLPRT_MRFC(hw->port)); printf("mac remote fault = %x\n", reg); reg = rd32(hw, I40E_GLPRT_MLFC(hw->port)); printf("mac local fault = %x\n", reg); } #endif /* IXL_DEBUG_SYSCTL */ #ifdef PCI_IOV static int ixl_vf_alloc_vsi(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; struct ixl_vsi *vsi; struct i40e_vsi_context vsi_ctx; int i; uint16_t first_queue; enum i40e_status_code code; hw = &pf->hw; vsi = &pf->vsi; vsi_ctx.pf_num = hw->pf_id; vsi_ctx.uplink_seid = pf->veb_seid; vsi_ctx.connection_type = IXL_VSI_DATA_PORT; vsi_ctx.vf_num = hw->func_caps.vf_base_id + vf->vf_num; vsi_ctx.flags = I40E_AQ_VSI_TYPE_VF; bzero(&vsi_ctx.info, sizeof(vsi_ctx.info)); vsi_ctx.info.valid_sections = htole16(I40E_AQ_VSI_PROP_SWITCH_VALID); vsi_ctx.info.switch_id = htole16(0); vsi_ctx.info.valid_sections |= htole16(I40E_AQ_VSI_PROP_SECURITY_VALID); vsi_ctx.info.sec_flags = 0; if (vf->vf_flags & VF_FLAG_MAC_ANTI_SPOOF) vsi_ctx.info.sec_flags |= I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK; /* TODO: If a port VLAN is set, then this needs to be changed */ vsi_ctx.info.valid_sections |= htole16(I40E_AQ_VSI_PROP_VLAN_VALID); vsi_ctx.info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL | I40E_AQ_VSI_PVLAN_EMOD_NOTHING; vsi_ctx.info.valid_sections |= htole16(I40E_AQ_VSI_PROP_QUEUE_MAP_VALID); vsi_ctx.info.mapping_flags = htole16(I40E_AQ_VSI_QUE_MAP_NONCONTIG); first_queue = vsi->num_queues + vf->vf_num * IXLV_MAX_QUEUES; for (i = 0; i < IXLV_MAX_QUEUES; i++) vsi_ctx.info.queue_mapping[i] = htole16(first_queue + i); for (; i < nitems(vsi_ctx.info.queue_mapping); i++) vsi_ctx.info.queue_mapping[i] = htole16(I40E_AQ_VSI_QUEUE_MASK); vsi_ctx.info.tc_mapping[0] = htole16( (0 << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) | (1 << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT)); code = i40e_aq_add_vsi(hw, &vsi_ctx, NULL); if (code != I40E_SUCCESS) return (ixl_adminq_err_to_errno(hw->aq.asq_last_status)); vf->vsi.seid = vsi_ctx.seid; vf->vsi.vsi_num = vsi_ctx.vsi_number; vf->vsi.first_queue = first_queue; vf->vsi.num_queues = IXLV_MAX_QUEUES; code = i40e_aq_get_vsi_params(hw, &vsi_ctx, NULL); if (code != I40E_SUCCESS) return (ixl_adminq_err_to_errno(hw->aq.asq_last_status)); code = i40e_aq_config_vsi_bw_limit(hw, vf->vsi.seid, 0, 0, NULL); if (code != I40E_SUCCESS) { device_printf(pf->dev, "Failed to disable BW limit: %d\n", ixl_adminq_err_to_errno(hw->aq.asq_last_status)); return (ixl_adminq_err_to_errno(hw->aq.asq_last_status)); } memcpy(&vf->vsi.info, &vsi_ctx.info, sizeof(vf->vsi.info)); return (0); } static int ixl_vf_setup_vsi(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; int error; hw = &pf->hw; error = ixl_vf_alloc_vsi(pf, vf); if (error != 0) return (error); vf->vsi.hw_filters_add = 0; vf->vsi.hw_filters_del = 0; ixl_add_filter(&vf->vsi, ixl_bcast_addr, IXL_VLAN_ANY); ixl_reconfigure_filters(&vf->vsi); return (0); } static void ixl_vf_map_vsi_queue(struct i40e_hw *hw, struct ixl_vf *vf, int qnum, uint32_t val) { uint32_t qtable; int index, shift; /* * Two queues are mapped in a single register, so we have to do some * gymnastics to convert the queue number into a register index and * shift. */ index = qnum / 2; shift = (qnum % 2) * I40E_VSILAN_QTABLE_QINDEX_1_SHIFT; qtable = i40e_read_rx_ctl(hw, I40E_VSILAN_QTABLE(index, vf->vsi.vsi_num)); qtable &= ~(I40E_VSILAN_QTABLE_QINDEX_0_MASK << shift); qtable |= val << shift; i40e_write_rx_ctl(hw, I40E_VSILAN_QTABLE(index, vf->vsi.vsi_num), qtable); } static void ixl_vf_map_queues(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; uint32_t qtable; int i; hw = &pf->hw; /* * Contiguous mappings aren't actually supported by the hardware, * so we have to use non-contiguous mappings. */ i40e_write_rx_ctl(hw, I40E_VSILAN_QBASE(vf->vsi.vsi_num), I40E_VSILAN_QBASE_VSIQTABLE_ENA_MASK); wr32(hw, I40E_VPLAN_MAPENA(vf->vf_num), I40E_VPLAN_MAPENA_TXRX_ENA_MASK); for (i = 0; i < vf->vsi.num_queues; i++) { qtable = (vf->vsi.first_queue + i) << I40E_VPLAN_QTABLE_QINDEX_SHIFT; wr32(hw, I40E_VPLAN_QTABLE(i, vf->vf_num), qtable); } /* Map queues allocated to VF to its VSI. */ for (i = 0; i < vf->vsi.num_queues; i++) ixl_vf_map_vsi_queue(hw, vf, i, vf->vsi.first_queue + i); /* Set rest of VSI queues as unused. */ for (; i < IXL_MAX_VSI_QUEUES; i++) ixl_vf_map_vsi_queue(hw, vf, i, I40E_VSILAN_QTABLE_QINDEX_0_MASK); ixl_flush(hw); } static void ixl_vf_vsi_release(struct ixl_pf *pf, struct ixl_vsi *vsi) { struct i40e_hw *hw; hw = &pf->hw; if (vsi->seid == 0) return; i40e_aq_delete_element(hw, vsi->seid, NULL); } static void ixl_vf_disable_queue_intr(struct i40e_hw *hw, uint32_t vfint_reg) { wr32(hw, vfint_reg, I40E_VFINT_DYN_CTLN_CLEARPBA_MASK); ixl_flush(hw); } static void ixl_vf_unregister_intr(struct i40e_hw *hw, uint32_t vpint_reg) { wr32(hw, vpint_reg, I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_MASK | I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK); ixl_flush(hw); } static void ixl_vf_release_resources(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; uint32_t vfint_reg, vpint_reg; int i; hw = &pf->hw; ixl_vf_vsi_release(pf, &vf->vsi); /* Index 0 has a special register. */ ixl_vf_disable_queue_intr(hw, I40E_VFINT_DYN_CTL0(vf->vf_num)); for (i = 1; i < hw->func_caps.num_msix_vectors_vf; i++) { vfint_reg = IXL_VFINT_DYN_CTLN_REG(hw, i , vf->vf_num); ixl_vf_disable_queue_intr(hw, vfint_reg); } /* Index 0 has a special register. */ ixl_vf_unregister_intr(hw, I40E_VPINT_LNKLST0(vf->vf_num)); for (i = 1; i < hw->func_caps.num_msix_vectors_vf; i++) { vpint_reg = IXL_VPINT_LNKLSTN_REG(hw, i, vf->vf_num); ixl_vf_unregister_intr(hw, vpint_reg); } vf->vsi.num_queues = 0; } static int ixl_flush_pcie(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; int i; uint16_t global_vf_num; uint32_t ciad; hw = &pf->hw; global_vf_num = hw->func_caps.vf_base_id + vf->vf_num; wr32(hw, I40E_PF_PCI_CIAA, IXL_PF_PCI_CIAA_VF_DEVICE_STATUS | (global_vf_num << I40E_PF_PCI_CIAA_VF_NUM_SHIFT)); for (i = 0; i < IXL_VF_RESET_TIMEOUT; i++) { ciad = rd32(hw, I40E_PF_PCI_CIAD); if ((ciad & IXL_PF_PCI_CIAD_VF_TRANS_PENDING_MASK) == 0) return (0); DELAY(1); } return (ETIMEDOUT); } static void ixl_reset_vf(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; uint32_t vfrtrig; hw = &pf->hw; vfrtrig = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_num)); vfrtrig |= I40E_VPGEN_VFRTRIG_VFSWR_MASK; wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_num), vfrtrig); ixl_flush(hw); ixl_reinit_vf(pf, vf); } static void ixl_reinit_vf(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_hw *hw; uint32_t vfrstat, vfrtrig; int i, error; hw = &pf->hw; error = ixl_flush_pcie(pf, vf); if (error != 0) device_printf(pf->dev, "Timed out waiting for PCIe activity to stop on VF-%d\n", vf->vf_num); for (i = 0; i < IXL_VF_RESET_TIMEOUT; i++) { DELAY(10); vfrstat = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_num)); if (vfrstat & I40E_VPGEN_VFRSTAT_VFRD_MASK) break; } if (i == IXL_VF_RESET_TIMEOUT) device_printf(pf->dev, "VF %d failed to reset\n", vf->vf_num); wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_num), I40E_VFR_COMPLETED); vfrtrig = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_num)); vfrtrig &= ~I40E_VPGEN_VFRTRIG_VFSWR_MASK; wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_num), vfrtrig); if (vf->vsi.seid != 0) ixl_disable_rings(&vf->vsi); ixl_vf_release_resources(pf, vf); ixl_vf_setup_vsi(pf, vf); ixl_vf_map_queues(pf, vf); wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_num), I40E_VFR_VFACTIVE); ixl_flush(hw); } static const char * ixl_vc_opcode_str(uint16_t op) { switch (op) { case I40E_VIRTCHNL_OP_VERSION: return ("VERSION"); case I40E_VIRTCHNL_OP_RESET_VF: return ("RESET_VF"); case I40E_VIRTCHNL_OP_GET_VF_RESOURCES: return ("GET_VF_RESOURCES"); case I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE: return ("CONFIG_TX_QUEUE"); case I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE: return ("CONFIG_RX_QUEUE"); case I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES: return ("CONFIG_VSI_QUEUES"); case I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP: return ("CONFIG_IRQ_MAP"); case I40E_VIRTCHNL_OP_ENABLE_QUEUES: return ("ENABLE_QUEUES"); case I40E_VIRTCHNL_OP_DISABLE_QUEUES: return ("DISABLE_QUEUES"); case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS: return ("ADD_ETHER_ADDRESS"); case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS: return ("DEL_ETHER_ADDRESS"); case I40E_VIRTCHNL_OP_ADD_VLAN: return ("ADD_VLAN"); case I40E_VIRTCHNL_OP_DEL_VLAN: return ("DEL_VLAN"); case I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE: return ("CONFIG_PROMISCUOUS_MODE"); case I40E_VIRTCHNL_OP_GET_STATS: return ("GET_STATS"); case I40E_VIRTCHNL_OP_FCOE: return ("FCOE"); case I40E_VIRTCHNL_OP_EVENT: return ("EVENT"); default: return ("UNKNOWN"); } } static int ixl_vc_opcode_level(uint16_t opcode) { switch (opcode) { case I40E_VIRTCHNL_OP_GET_STATS: return (10); default: return (5); } } static void ixl_send_vf_msg(struct ixl_pf *pf, struct ixl_vf *vf, uint16_t op, enum i40e_status_code status, void *msg, uint16_t len) { struct i40e_hw *hw; int global_vf_id; hw = &pf->hw; global_vf_id = hw->func_caps.vf_base_id + vf->vf_num; I40E_VC_DEBUG(pf, ixl_vc_opcode_level(op), "Sending msg (op=%s[%d], status=%d) to VF-%d\n", ixl_vc_opcode_str(op), op, status, vf->vf_num); i40e_aq_send_msg_to_vf(hw, global_vf_id, op, status, msg, len, NULL); } static void ixl_send_vf_ack(struct ixl_pf *pf, struct ixl_vf *vf, uint16_t op) { ixl_send_vf_msg(pf, vf, op, I40E_SUCCESS, NULL, 0); } static void ixl_send_vf_nack_msg(struct ixl_pf *pf, struct ixl_vf *vf, uint16_t op, enum i40e_status_code status, const char *file, int line) { I40E_VC_DEBUG(pf, 1, "Sending NACK (op=%s[%d], err=%d) to VF-%d from %s:%d\n", ixl_vc_opcode_str(op), op, status, vf->vf_num, file, line); ixl_send_vf_msg(pf, vf, op, status, NULL, 0); } static void ixl_vf_version_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_version_info reply; if (msg_size != sizeof(struct i40e_virtchnl_version_info)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_VERSION, I40E_ERR_PARAM); return; } vf->version = ((struct i40e_virtchnl_version_info *)msg)->minor; reply.major = I40E_VIRTCHNL_VERSION_MAJOR; reply.minor = I40E_VIRTCHNL_VERSION_MINOR; ixl_send_vf_msg(pf, vf, I40E_VIRTCHNL_OP_VERSION, I40E_SUCCESS, &reply, sizeof(reply)); } static void ixl_vf_reset_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { if (msg_size != 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_RESET_VF, I40E_ERR_PARAM); return; } ixl_reset_vf(pf, vf); /* No response to a reset message. */ } static void ixl_vf_get_resources_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_vf_resource reply; if ((vf->version == 0 && msg_size != 0) || (vf->version == 1 && msg_size != 4)) { device_printf(pf->dev, "Invalid GET_VF_RESOURCES message size," " for VF version %d.%d\n", I40E_VIRTCHNL_VERSION_MAJOR, vf->version); i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_GET_VF_RESOURCES, I40E_ERR_PARAM); return; } bzero(&reply, sizeof(reply)); if (vf->version == I40E_VIRTCHNL_VERSION_MINOR_NO_VF_CAPS) reply.vf_offload_flags = I40E_VIRTCHNL_VF_OFFLOAD_L2 | I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG | I40E_VIRTCHNL_VF_OFFLOAD_VLAN; else reply.vf_offload_flags = *(u32 *)msg; reply.num_vsis = 1; reply.num_queue_pairs = vf->vsi.num_queues; reply.max_vectors = pf->hw.func_caps.num_msix_vectors_vf; reply.vsi_res[0].vsi_id = vf->vsi.vsi_num; reply.vsi_res[0].vsi_type = I40E_VSI_SRIOV; reply.vsi_res[0].num_queue_pairs = vf->vsi.num_queues; memcpy(reply.vsi_res[0].default_mac_addr, vf->mac, ETHER_ADDR_LEN); ixl_send_vf_msg(pf, vf, I40E_VIRTCHNL_OP_GET_VF_RESOURCES, I40E_SUCCESS, &reply, sizeof(reply)); } static int ixl_vf_config_tx_queue(struct ixl_pf *pf, struct ixl_vf *vf, struct i40e_virtchnl_txq_info *info) { struct i40e_hw *hw; struct i40e_hmc_obj_txq txq; uint16_t global_queue_num, global_vf_num; enum i40e_status_code status; uint32_t qtx_ctl; hw = &pf->hw; global_queue_num = vf->vsi.first_queue + info->queue_id; global_vf_num = hw->func_caps.vf_base_id + vf->vf_num; bzero(&txq, sizeof(txq)); status = i40e_clear_lan_tx_queue_context(hw, global_queue_num); if (status != I40E_SUCCESS) return (EINVAL); txq.base = info->dma_ring_addr / IXL_TX_CTX_BASE_UNITS; txq.head_wb_ena = info->headwb_enabled; txq.head_wb_addr = info->dma_headwb_addr; txq.qlen = info->ring_len; txq.rdylist = le16_to_cpu(vf->vsi.info.qs_handle[0]); txq.rdylist_act = 0; status = i40e_set_lan_tx_queue_context(hw, global_queue_num, &txq); if (status != I40E_SUCCESS) return (EINVAL); qtx_ctl = I40E_QTX_CTL_VF_QUEUE | (hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) | (global_vf_num << I40E_QTX_CTL_VFVM_INDX_SHIFT); wr32(hw, I40E_QTX_CTL(global_queue_num), qtx_ctl); ixl_flush(hw); return (0); } static int ixl_vf_config_rx_queue(struct ixl_pf *pf, struct ixl_vf *vf, struct i40e_virtchnl_rxq_info *info) { struct i40e_hw *hw; struct i40e_hmc_obj_rxq rxq; uint16_t global_queue_num; enum i40e_status_code status; hw = &pf->hw; global_queue_num = vf->vsi.first_queue + info->queue_id; bzero(&rxq, sizeof(rxq)); if (info->databuffer_size > IXL_VF_MAX_BUFFER) return (EINVAL); if (info->max_pkt_size > IXL_VF_MAX_FRAME || info->max_pkt_size < ETHER_MIN_LEN) return (EINVAL); if (info->splithdr_enabled) { if (info->hdr_size > IXL_VF_MAX_HDR_BUFFER) return (EINVAL); rxq.hsplit_0 = info->rx_split_pos & (I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_L2 | I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_IP | I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_TCP_UDP | I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_SCTP); rxq.hbuff = info->hdr_size >> I40E_RXQ_CTX_HBUFF_SHIFT; rxq.dtype = 2; } status = i40e_clear_lan_rx_queue_context(hw, global_queue_num); if (status != I40E_SUCCESS) return (EINVAL); rxq.base = info->dma_ring_addr / IXL_RX_CTX_BASE_UNITS; rxq.qlen = info->ring_len; rxq.dbuff = info->databuffer_size >> I40E_RXQ_CTX_DBUFF_SHIFT; rxq.dsize = 1; rxq.crcstrip = 1; rxq.l2tsel = 1; rxq.rxmax = info->max_pkt_size; rxq.tphrdesc_ena = 1; rxq.tphwdesc_ena = 1; rxq.tphdata_ena = 1; rxq.tphhead_ena = 1; rxq.lrxqthresh = 2; rxq.prefena = 1; status = i40e_set_lan_rx_queue_context(hw, global_queue_num, &rxq); if (status != I40E_SUCCESS) return (EINVAL); return (0); } static void ixl_vf_config_vsi_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_vsi_queue_config_info *info; struct i40e_virtchnl_queue_pair_info *pair; int i; if (msg_size < sizeof(*info)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } info = msg; if (info->num_queue_pairs == 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } if (msg_size != sizeof(*info) + info->num_queue_pairs * sizeof(*pair)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } if (info->vsi_id != vf->vsi.vsi_num) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } for (i = 0; i < info->num_queue_pairs; i++) { pair = &info->qpair[i]; if (pair->txq.vsi_id != vf->vsi.vsi_num || pair->rxq.vsi_id != vf->vsi.vsi_num || pair->txq.queue_id != pair->rxq.queue_id || pair->txq.queue_id >= vf->vsi.num_queues) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } if (ixl_vf_config_tx_queue(pf, vf, &pair->txq) != 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } if (ixl_vf_config_rx_queue(pf, vf, &pair->rxq) != 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, I40E_ERR_PARAM); return; } } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES); } static void ixl_vf_set_qctl(struct ixl_pf *pf, const struct i40e_virtchnl_vector_map *vector, enum i40e_queue_type cur_type, uint16_t cur_queue, enum i40e_queue_type *last_type, uint16_t *last_queue) { uint32_t offset, qctl; uint16_t itr_indx; if (cur_type == I40E_QUEUE_TYPE_RX) { offset = I40E_QINT_RQCTL(cur_queue); itr_indx = vector->rxitr_idx; } else { offset = I40E_QINT_TQCTL(cur_queue); itr_indx = vector->txitr_idx; } qctl = htole32((vector->vector_id << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | (*last_type << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | (*last_queue << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | I40E_QINT_RQCTL_CAUSE_ENA_MASK | (itr_indx << I40E_QINT_RQCTL_ITR_INDX_SHIFT)); wr32(&pf->hw, offset, qctl); *last_type = cur_type; *last_queue = cur_queue; } static void ixl_vf_config_vector(struct ixl_pf *pf, struct ixl_vf *vf, const struct i40e_virtchnl_vector_map *vector) { struct i40e_hw *hw; u_int qindex; enum i40e_queue_type type, last_type; uint32_t lnklst_reg; uint16_t rxq_map, txq_map, cur_queue, last_queue; hw = &pf->hw; rxq_map = vector->rxq_map; txq_map = vector->txq_map; last_queue = IXL_END_OF_INTR_LNKLST; last_type = I40E_QUEUE_TYPE_RX; /* * The datasheet says to optimize performance, RX queues and TX queues * should be interleaved in the interrupt linked list, so we process * both at once here. */ while ((rxq_map != 0) || (txq_map != 0)) { if (txq_map != 0) { qindex = ffs(txq_map) - 1; type = I40E_QUEUE_TYPE_TX; cur_queue = vf->vsi.first_queue + qindex; ixl_vf_set_qctl(pf, vector, type, cur_queue, &last_type, &last_queue); txq_map &= ~(1 << qindex); } if (rxq_map != 0) { qindex = ffs(rxq_map) - 1; type = I40E_QUEUE_TYPE_RX; cur_queue = vf->vsi.first_queue + qindex; ixl_vf_set_qctl(pf, vector, type, cur_queue, &last_type, &last_queue); rxq_map &= ~(1 << qindex); } } if (vector->vector_id == 0) lnklst_reg = I40E_VPINT_LNKLST0(vf->vf_num); else lnklst_reg = IXL_VPINT_LNKLSTN_REG(hw, vector->vector_id, vf->vf_num); wr32(hw, lnklst_reg, (last_queue << I40E_VPINT_LNKLST0_FIRSTQ_INDX_SHIFT) | (last_type << I40E_VPINT_LNKLST0_FIRSTQ_TYPE_SHIFT)); ixl_flush(hw); } static void ixl_vf_config_irq_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_irq_map_info *map; struct i40e_virtchnl_vector_map *vector; struct i40e_hw *hw; int i, largest_txq, largest_rxq; hw = &pf->hw; if (msg_size < sizeof(*map)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } map = msg; if (map->num_vectors == 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } if (msg_size != sizeof(*map) + map->num_vectors * sizeof(*vector)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } for (i = 0; i < map->num_vectors; i++) { vector = &map->vecmap[i]; if ((vector->vector_id >= hw->func_caps.num_msix_vectors_vf) || vector->vsi_id != vf->vsi.vsi_num) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } if (vector->rxq_map != 0) { largest_rxq = fls(vector->rxq_map) - 1; if (largest_rxq >= vf->vsi.num_queues) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } } if (vector->txq_map != 0) { largest_txq = fls(vector->txq_map) - 1; if (largest_txq >= vf->vsi.num_queues) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } } if (vector->rxitr_idx > IXL_MAX_ITR_IDX || vector->txitr_idx > IXL_MAX_ITR_IDX) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, I40E_ERR_PARAM); return; } ixl_vf_config_vector(pf, vf, vector); } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP); } static void ixl_vf_enable_queues_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_queue_select *select; int error; if (msg_size != sizeof(*select)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES, I40E_ERR_PARAM); return; } select = msg; if (select->vsi_id != vf->vsi.vsi_num || select->rx_queues == 0 || select->tx_queues == 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES, I40E_ERR_PARAM); return; } error = ixl_enable_rings(&vf->vsi); if (error) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES, I40E_ERR_TIMEOUT); return; } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES); } static void ixl_vf_disable_queues_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_queue_select *select; int error; if (msg_size != sizeof(*select)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_DISABLE_QUEUES, I40E_ERR_PARAM); return; } select = msg; if (select->vsi_id != vf->vsi.vsi_num || select->rx_queues == 0 || select->tx_queues == 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_DISABLE_QUEUES, I40E_ERR_PARAM); return; } error = ixl_disable_rings(&vf->vsi); if (error) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_DISABLE_QUEUES, I40E_ERR_TIMEOUT); return; } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_DISABLE_QUEUES); } static boolean_t ixl_zero_mac(const uint8_t *addr) { uint8_t zero[ETHER_ADDR_LEN] = {0, 0, 0, 0, 0, 0}; return (cmp_etheraddr(addr, zero)); } static boolean_t ixl_bcast_mac(const uint8_t *addr) { return (cmp_etheraddr(addr, ixl_bcast_addr)); } static int ixl_vf_mac_valid(struct ixl_vf *vf, const uint8_t *addr) { if (ixl_zero_mac(addr) || ixl_bcast_mac(addr)) return (EINVAL); /* * If the VF is not allowed to change its MAC address, don't let it * set a MAC filter for an address that is not a multicast address and * is not its assigned MAC. */ if (!(vf->vf_flags & VF_FLAG_SET_MAC_CAP) && !(ETHER_IS_MULTICAST(addr) || cmp_etheraddr(addr, vf->mac))) return (EPERM); return (0); } static void ixl_vf_add_mac_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_ether_addr_list *addr_list; struct i40e_virtchnl_ether_addr *addr; struct ixl_vsi *vsi; int i; size_t expected_size; vsi = &vf->vsi; if (msg_size < sizeof(*addr_list)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } addr_list = msg; expected_size = sizeof(*addr_list) + addr_list->num_elements * sizeof(*addr); if (addr_list->num_elements == 0 || addr_list->vsi_id != vsi->vsi_num || msg_size != expected_size) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } for (i = 0; i < addr_list->num_elements; i++) { if (ixl_vf_mac_valid(vf, addr_list->list[i].addr) != 0) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } } for (i = 0; i < addr_list->num_elements; i++) { addr = &addr_list->list[i]; ixl_add_filter(vsi, addr->addr, IXL_VLAN_ANY); } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS); } static void ixl_vf_del_mac_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_ether_addr_list *addr_list; struct i40e_virtchnl_ether_addr *addr; size_t expected_size; int i; if (msg_size < sizeof(*addr_list)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } addr_list = msg; expected_size = sizeof(*addr_list) + addr_list->num_elements * sizeof(*addr); if (addr_list->num_elements == 0 || addr_list->vsi_id != vf->vsi.vsi_num || msg_size != expected_size) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } for (i = 0; i < addr_list->num_elements; i++) { addr = &addr_list->list[i]; if (ixl_zero_mac(addr->addr) || ixl_bcast_mac(addr->addr)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, I40E_ERR_PARAM); return; } } for (i = 0; i < addr_list->num_elements; i++) { addr = &addr_list->list[i]; ixl_del_filter(&vf->vsi, addr->addr, IXL_VLAN_ANY); } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS); } static enum i40e_status_code ixl_vf_enable_vlan_strip(struct ixl_pf *pf, struct ixl_vf *vf) { struct i40e_vsi_context vsi_ctx; vsi_ctx.seid = vf->vsi.seid; bzero(&vsi_ctx.info, sizeof(vsi_ctx.info)); vsi_ctx.info.valid_sections = htole16(I40E_AQ_VSI_PROP_VLAN_VALID); vsi_ctx.info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL | I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH; return (i40e_aq_update_vsi_params(&pf->hw, &vsi_ctx, NULL)); } static void ixl_vf_add_vlan_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_vlan_filter_list *filter_list; enum i40e_status_code code; size_t expected_size; int i; if (msg_size < sizeof(*filter_list)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } filter_list = msg; expected_size = sizeof(*filter_list) + filter_list->num_elements * sizeof(uint16_t); if (filter_list->num_elements == 0 || filter_list->vsi_id != vf->vsi.vsi_num || msg_size != expected_size) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } if (!(vf->vf_flags & VF_FLAG_VLAN_CAP)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } for (i = 0; i < filter_list->num_elements; i++) { if (filter_list->vlan_id[i] > EVL_VLID_MASK) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } } code = ixl_vf_enable_vlan_strip(pf, vf); if (code != I40E_SUCCESS) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); } for (i = 0; i < filter_list->num_elements; i++) ixl_add_filter(&vf->vsi, vf->mac, filter_list->vlan_id[i]); ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN); } static void ixl_vf_del_vlan_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_vlan_filter_list *filter_list; int i; size_t expected_size; if (msg_size < sizeof(*filter_list)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_DEL_VLAN, I40E_ERR_PARAM); return; } filter_list = msg; expected_size = sizeof(*filter_list) + filter_list->num_elements * sizeof(uint16_t); if (filter_list->num_elements == 0 || filter_list->vsi_id != vf->vsi.vsi_num || msg_size != expected_size) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_DEL_VLAN, I40E_ERR_PARAM); return; } for (i = 0; i < filter_list->num_elements; i++) { if (filter_list->vlan_id[i] > EVL_VLID_MASK) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } } if (!(vf->vf_flags & VF_FLAG_VLAN_CAP)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_ADD_VLAN, I40E_ERR_PARAM); return; } for (i = 0; i < filter_list->num_elements; i++) ixl_del_filter(&vf->vsi, vf->mac, filter_list->vlan_id[i]); ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_DEL_VLAN); } static void ixl_vf_config_promisc_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_promisc_info *info; enum i40e_status_code code; if (msg_size != sizeof(*info)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, I40E_ERR_PARAM); return; } if (!(vf->vf_flags & VF_FLAG_PROMISC_CAP)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, I40E_ERR_PARAM); return; } info = msg; if (info->vsi_id != vf->vsi.vsi_num) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, I40E_ERR_PARAM); return; } code = i40e_aq_set_vsi_unicast_promiscuous(&pf->hw, info->vsi_id, info->flags & I40E_FLAG_VF_UNICAST_PROMISC, NULL); if (code != I40E_SUCCESS) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, code); return; } code = i40e_aq_set_vsi_multicast_promiscuous(&pf->hw, info->vsi_id, info->flags & I40E_FLAG_VF_MULTICAST_PROMISC, NULL); if (code != I40E_SUCCESS) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, code); return; } ixl_send_vf_ack(pf, vf, I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE); } static void ixl_vf_get_stats_msg(struct ixl_pf *pf, struct ixl_vf *vf, void *msg, uint16_t msg_size) { struct i40e_virtchnl_queue_select *queue; if (msg_size != sizeof(*queue)) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_GET_STATS, I40E_ERR_PARAM); return; } queue = msg; if (queue->vsi_id != vf->vsi.vsi_num) { i40e_send_vf_nack(pf, vf, I40E_VIRTCHNL_OP_GET_STATS, I40E_ERR_PARAM); return; } ixl_update_eth_stats(&vf->vsi); ixl_send_vf_msg(pf, vf, I40E_VIRTCHNL_OP_GET_STATS, I40E_SUCCESS, &vf->vsi.eth_stats, sizeof(vf->vsi.eth_stats)); } static void ixl_handle_vf_msg(struct ixl_pf *pf, struct i40e_arq_event_info *event) { struct ixl_vf *vf; void *msg; uint16_t vf_num, msg_size; uint32_t opcode; vf_num = le16toh(event->desc.retval) - pf->hw.func_caps.vf_base_id; opcode = le32toh(event->desc.cookie_high); if (vf_num >= pf->num_vfs) { device_printf(pf->dev, "Got msg from illegal VF: %d\n", vf_num); return; } vf = &pf->vfs[vf_num]; msg = event->msg_buf; msg_size = event->msg_len; I40E_VC_DEBUG(pf, ixl_vc_opcode_level(opcode), "Got msg %s(%d) from VF-%d of size %d\n", ixl_vc_opcode_str(opcode), opcode, vf_num, msg_size); switch (opcode) { case I40E_VIRTCHNL_OP_VERSION: ixl_vf_version_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_RESET_VF: ixl_vf_reset_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_GET_VF_RESOURCES: ixl_vf_get_resources_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES: ixl_vf_config_vsi_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP: ixl_vf_config_irq_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_ENABLE_QUEUES: ixl_vf_enable_queues_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_DISABLE_QUEUES: ixl_vf_disable_queues_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS: ixl_vf_add_mac_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS: ixl_vf_del_mac_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_ADD_VLAN: ixl_vf_add_vlan_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_DEL_VLAN: ixl_vf_del_vlan_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE: ixl_vf_config_promisc_msg(pf, vf, msg, msg_size); break; case I40E_VIRTCHNL_OP_GET_STATS: ixl_vf_get_stats_msg(pf, vf, msg, msg_size); break; /* These two opcodes have been superseded by CONFIG_VSI_QUEUES. */ case I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE: case I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE: default: i40e_send_vf_nack(pf, vf, opcode, I40E_ERR_NOT_IMPLEMENTED); break; } } /* Handle any VFs that have reset themselves via a Function Level Reset(FLR). */ static void ixl_handle_vflr(void *arg, int pending) { struct ixl_pf *pf; struct i40e_hw *hw; uint16_t global_vf_num; uint32_t vflrstat_index, vflrstat_mask, vflrstat, icr0; int i; pf = arg; hw = &pf->hw; IXL_PF_LOCK(pf); for (i = 0; i < pf->num_vfs; i++) { global_vf_num = hw->func_caps.vf_base_id + i; vflrstat_index = IXL_GLGEN_VFLRSTAT_INDEX(global_vf_num); vflrstat_mask = IXL_GLGEN_VFLRSTAT_MASK(global_vf_num); vflrstat = rd32(hw, I40E_GLGEN_VFLRSTAT(vflrstat_index)); if (vflrstat & vflrstat_mask) { wr32(hw, I40E_GLGEN_VFLRSTAT(vflrstat_index), vflrstat_mask); ixl_reinit_vf(pf, &pf->vfs[i]); } } icr0 = rd32(hw, I40E_PFINT_ICR0_ENA); icr0 |= I40E_PFINT_ICR0_ENA_VFLR_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, icr0); ixl_flush(hw); IXL_PF_UNLOCK(pf); } static int ixl_adminq_err_to_errno(enum i40e_admin_queue_err err) { switch (err) { case I40E_AQ_RC_EPERM: return (EPERM); case I40E_AQ_RC_ENOENT: return (ENOENT); case I40E_AQ_RC_ESRCH: return (ESRCH); case I40E_AQ_RC_EINTR: return (EINTR); case I40E_AQ_RC_EIO: return (EIO); case I40E_AQ_RC_ENXIO: return (ENXIO); case I40E_AQ_RC_E2BIG: return (E2BIG); case I40E_AQ_RC_EAGAIN: return (EAGAIN); case I40E_AQ_RC_ENOMEM: return (ENOMEM); case I40E_AQ_RC_EACCES: return (EACCES); case I40E_AQ_RC_EFAULT: return (EFAULT); case I40E_AQ_RC_EBUSY: return (EBUSY); case I40E_AQ_RC_EEXIST: return (EEXIST); case I40E_AQ_RC_EINVAL: return (EINVAL); case I40E_AQ_RC_ENOTTY: return (ENOTTY); case I40E_AQ_RC_ENOSPC: return (ENOSPC); case I40E_AQ_RC_ENOSYS: return (ENOSYS); case I40E_AQ_RC_ERANGE: return (ERANGE); case I40E_AQ_RC_EFLUSHED: return (EINVAL); /* No exact equivalent in errno.h */ case I40E_AQ_RC_BAD_ADDR: return (EFAULT); case I40E_AQ_RC_EMODE: return (EPERM); case I40E_AQ_RC_EFBIG: return (EFBIG); default: return (EINVAL); } } static int ixl_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params) { struct ixl_pf *pf; struct i40e_hw *hw; struct ixl_vsi *pf_vsi; enum i40e_status_code ret; int i, error; pf = device_get_softc(dev); hw = &pf->hw; pf_vsi = &pf->vsi; IXL_PF_LOCK(pf); pf->vfs = malloc(sizeof(struct ixl_vf) * num_vfs, M_IXL, M_NOWAIT | M_ZERO); if (pf->vfs == NULL) { error = ENOMEM; goto fail; } for (i = 0; i < num_vfs; i++) sysctl_ctx_init(&pf->vfs[i].ctx); ret = i40e_aq_add_veb(hw, pf_vsi->uplink_seid, pf_vsi->seid, 1, FALSE, &pf->veb_seid, FALSE, NULL); if (ret != I40E_SUCCESS) { error = ixl_adminq_err_to_errno(hw->aq.asq_last_status); device_printf(dev, "add_veb failed; code=%d error=%d", ret, error); goto fail; } // TODO: [Configure MSI-X here] ixl_enable_adminq(hw); pf->num_vfs = num_vfs; IXL_PF_UNLOCK(pf); return (0); fail: free(pf->vfs, M_IXL); pf->vfs = NULL; IXL_PF_UNLOCK(pf); return (error); } static void ixl_iov_uninit(device_t dev) { struct ixl_pf *pf; struct i40e_hw *hw; struct ixl_vsi *vsi; struct ifnet *ifp; struct ixl_vf *vfs; int i, num_vfs; pf = device_get_softc(dev); hw = &pf->hw; vsi = &pf->vsi; ifp = vsi->ifp; IXL_PF_LOCK(pf); for (i = 0; i < pf->num_vfs; i++) { if (pf->vfs[i].vsi.seid != 0) i40e_aq_delete_element(hw, pf->vfs[i].vsi.seid, NULL); } if (pf->veb_seid != 0) { i40e_aq_delete_element(hw, pf->veb_seid, NULL); pf->veb_seid = 0; } if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ixl_disable_intr(vsi); vfs = pf->vfs; num_vfs = pf->num_vfs; pf->vfs = NULL; pf->num_vfs = 0; IXL_PF_UNLOCK(pf); /* Do this after the unlock as sysctl_ctx_free might sleep. */ for (i = 0; i < num_vfs; i++) sysctl_ctx_free(&vfs[i].ctx); free(vfs, M_IXL); } static int ixl_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params) { char sysctl_name[QUEUE_NAME_LEN]; struct ixl_pf *pf; struct ixl_vf *vf; const void *mac; size_t size; int error; pf = device_get_softc(dev); vf = &pf->vfs[vfnum]; IXL_PF_LOCK(pf); vf->vf_num = vfnum; vf->vsi.back = pf; vf->vf_flags = VF_FLAG_ENABLED; SLIST_INIT(&vf->vsi.ftl); error = ixl_vf_setup_vsi(pf, vf); if (error != 0) goto out; if (nvlist_exists_binary(params, "mac-addr")) { mac = nvlist_get_binary(params, "mac-addr", &size); bcopy(mac, vf->mac, ETHER_ADDR_LEN); if (nvlist_get_bool(params, "allow-set-mac")) vf->vf_flags |= VF_FLAG_SET_MAC_CAP; } else /* * If the administrator has not specified a MAC address then * we must allow the VF to choose one. */ vf->vf_flags |= VF_FLAG_SET_MAC_CAP; if (nvlist_get_bool(params, "mac-anti-spoof")) vf->vf_flags |= VF_FLAG_MAC_ANTI_SPOOF; if (nvlist_get_bool(params, "allow-promisc")) vf->vf_flags |= VF_FLAG_PROMISC_CAP; /* TODO: Get VLAN that PF has set for the VF */ vf->vf_flags |= VF_FLAG_VLAN_CAP; ixl_reset_vf(pf, vf); out: IXL_PF_UNLOCK(pf); if (error == 0) { snprintf(sysctl_name, sizeof(sysctl_name), "vf%d", vfnum); ixl_add_vsi_sysctls(pf, &vf->vsi, &vf->ctx, sysctl_name); } return (error); } #endif /* PCI_IOV */ Index: head/sys/dev/ixl/if_ixlv.c =================================================================== --- head/sys/dev/ixl/if_ixlv.c (revision 302383) +++ head/sys/dev/ixl/if_ixlv.c (revision 302384) @@ -1,2977 +1,2978 @@ /****************************************************************************** Copyright (c) 2013-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef IXL_STANDALONE_BUILD #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #endif #include "ixl.h" #include "ixlv.h" #ifdef RSS #include #endif /********************************************************************* * Driver version *********************************************************************/ char ixlv_driver_version[] = "1.2.11-k"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into ixlv_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static ixl_vendor_info_t ixlv_vendor_info_array[] = { {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_VF, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_VF_HV, 0, 0, 0}, /* required last entry */ {0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings *********************************************************************/ static char *ixlv_strings[] = { "Intel(R) Ethernet Connection XL710 VF Driver" }; /********************************************************************* * Function prototypes *********************************************************************/ static int ixlv_probe(device_t); static int ixlv_attach(device_t); static int ixlv_detach(device_t); static int ixlv_shutdown(device_t); static void ixlv_init_locked(struct ixlv_sc *); static int ixlv_allocate_pci_resources(struct ixlv_sc *); static void ixlv_free_pci_resources(struct ixlv_sc *); static int ixlv_assign_msix(struct ixlv_sc *); static int ixlv_init_msix(struct ixlv_sc *); static int ixlv_init_taskqueue(struct ixlv_sc *); static int ixlv_setup_queues(struct ixlv_sc *); static void ixlv_config_rss(struct ixlv_sc *); static void ixlv_stop(struct ixlv_sc *); static void ixlv_add_multi(struct ixl_vsi *); static void ixlv_del_multi(struct ixl_vsi *); static void ixlv_free_queues(struct ixl_vsi *); static int ixlv_setup_interface(device_t, struct ixlv_sc *); static int ixlv_media_change(struct ifnet *); static void ixlv_media_status(struct ifnet *, struct ifmediareq *); static void ixlv_local_timer(void *); static int ixlv_add_mac_filter(struct ixlv_sc *, u8 *, u16); static int ixlv_del_mac_filter(struct ixlv_sc *sc, u8 *macaddr); static void ixlv_init_filters(struct ixlv_sc *); static void ixlv_free_filters(struct ixlv_sc *); static void ixlv_msix_que(void *); static void ixlv_msix_adminq(void *); static void ixlv_do_adminq(void *, int); static void ixlv_do_adminq_locked(struct ixlv_sc *sc); static void ixlv_handle_que(void *, int); static int ixlv_reset(struct ixlv_sc *); static int ixlv_reset_complete(struct i40e_hw *); static void ixlv_set_queue_rx_itr(struct ixl_queue *); static void ixlv_set_queue_tx_itr(struct ixl_queue *); static void ixl_init_cmd_complete(struct ixl_vc_cmd *, void *, enum i40e_status_code); static void ixlv_enable_adminq_irq(struct i40e_hw *); static void ixlv_disable_adminq_irq(struct i40e_hw *); static void ixlv_enable_queue_irq(struct i40e_hw *, int); static void ixlv_disable_queue_irq(struct i40e_hw *, int); static void ixlv_setup_vlan_filters(struct ixlv_sc *); static void ixlv_register_vlan(void *, struct ifnet *, u16); static void ixlv_unregister_vlan(void *, struct ifnet *, u16); static void ixlv_init_hw(struct ixlv_sc *); static int ixlv_setup_vc(struct ixlv_sc *); static int ixlv_vf_config(struct ixlv_sc *); static void ixlv_cap_txcsum_tso(struct ixl_vsi *, struct ifnet *, int); static void ixlv_add_sysctls(struct ixlv_sc *); static int ixlv_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS); static int ixlv_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t ixlv_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ixlv_probe), DEVMETHOD(device_attach, ixlv_attach), DEVMETHOD(device_detach, ixlv_detach), DEVMETHOD(device_shutdown, ixlv_shutdown), {0, 0} }; static driver_t ixlv_driver = { "ixlv", ixlv_methods, sizeof(struct ixlv_sc), }; devclass_t ixlv_devclass; DRIVER_MODULE(ixlv, pci, ixlv_driver, ixlv_devclass, 0, 0); MODULE_DEPEND(ixlv, pci, 1, 1, 1); MODULE_DEPEND(ixlv, ether, 1, 1, 1); /* ** TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, ixlv, CTLFLAG_RD, 0, "IXLV driver parameters"); /* ** Number of descriptors per ring: ** - TX and RX are the same size */ static int ixlv_ringsz = DEFAULT_RING; TUNABLE_INT("hw.ixlv.ringsz", &ixlv_ringsz); SYSCTL_INT(_hw_ixlv, OID_AUTO, ring_size, CTLFLAG_RDTUN, &ixlv_ringsz, 0, "Descriptor Ring Size"); /* Set to zero to auto calculate */ int ixlv_max_queues = 0; TUNABLE_INT("hw.ixlv.max_queues", &ixlv_max_queues); SYSCTL_INT(_hw_ixlv, OID_AUTO, max_queues, CTLFLAG_RDTUN, &ixlv_max_queues, 0, "Number of Queues"); /* ** Number of entries in Tx queue buf_ring. ** Increasing this will reduce the number of ** errors when transmitting fragmented UDP ** packets. */ static int ixlv_txbrsz = DEFAULT_TXBRSZ; TUNABLE_INT("hw.ixlv.txbrsz", &ixlv_txbrsz); SYSCTL_INT(_hw_ixlv, OID_AUTO, txbr_size, CTLFLAG_RDTUN, &ixlv_txbrsz, 0, "TX Buf Ring Size"); /* ** Controls for Interrupt Throttling ** - true/false for dynamic adjustment ** - default values for static ITR */ int ixlv_dynamic_rx_itr = 0; TUNABLE_INT("hw.ixlv.dynamic_rx_itr", &ixlv_dynamic_rx_itr); SYSCTL_INT(_hw_ixlv, OID_AUTO, dynamic_rx_itr, CTLFLAG_RDTUN, &ixlv_dynamic_rx_itr, 0, "Dynamic RX Interrupt Rate"); int ixlv_dynamic_tx_itr = 0; TUNABLE_INT("hw.ixlv.dynamic_tx_itr", &ixlv_dynamic_tx_itr); SYSCTL_INT(_hw_ixlv, OID_AUTO, dynamic_tx_itr, CTLFLAG_RDTUN, &ixlv_dynamic_tx_itr, 0, "Dynamic TX Interrupt Rate"); int ixlv_rx_itr = IXL_ITR_8K; TUNABLE_INT("hw.ixlv.rx_itr", &ixlv_rx_itr); SYSCTL_INT(_hw_ixlv, OID_AUTO, rx_itr, CTLFLAG_RDTUN, &ixlv_rx_itr, 0, "RX Interrupt Rate"); int ixlv_tx_itr = IXL_ITR_4K; TUNABLE_INT("hw.ixlv.tx_itr", &ixlv_tx_itr); SYSCTL_INT(_hw_ixlv, OID_AUTO, tx_itr, CTLFLAG_RDTUN, &ixlv_tx_itr, 0, "TX Interrupt Rate"); /********************************************************************* * Device identification routine * * ixlv_probe determines if the driver should be loaded on * the hardware based on PCI vendor/device id of the device. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int ixlv_probe(device_t dev) { ixl_vendor_info_t *ent; u16 pci_vendor_id, pci_device_id; u16 pci_subvendor_id, pci_subdevice_id; char device_name[256]; #if 0 INIT_DEBUGOUT("ixlv_probe: begin"); #endif pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != I40E_INTEL_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = ixlv_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == 0)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == 0))) { sprintf(device_name, "%s, Version - %s", ixlv_strings[ent->index], ixlv_driver_version); device_set_desc_copy(dev, device_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int ixlv_attach(device_t dev) { struct ixlv_sc *sc; struct i40e_hw *hw; struct ixl_vsi *vsi; int error = 0; INIT_DBG_DEV(dev, "begin"); /* Allocate, clear, and link in our primary soft structure */ sc = device_get_softc(dev); sc->dev = sc->osdep.dev = dev; hw = &sc->hw; vsi = &sc->vsi; vsi->dev = dev; /* Initialize hw struct */ ixlv_init_hw(sc); /* Allocate filter lists */ ixlv_init_filters(sc); /* Core Lock Init*/ mtx_init(&sc->mtx, device_get_nameunit(dev), "IXL SC Lock", MTX_DEF); /* Set up the timer callout */ callout_init_mtx(&sc->timer, &sc->mtx, 0); /* Do PCI setup - map BAR0, etc */ if (ixlv_allocate_pci_resources(sc)) { device_printf(dev, "%s: Allocation of PCI resources failed\n", __func__); error = ENXIO; goto err_early; } INIT_DBG_DEV(dev, "Allocated PCI resources and MSIX vectors"); error = i40e_set_mac_type(hw); if (error) { device_printf(dev, "%s: set_mac_type failed: %d\n", __func__, error); goto err_pci_res; } error = ixlv_reset_complete(hw); if (error) { device_printf(dev, "%s: Device is still being reset\n", __func__); goto err_pci_res; } INIT_DBG_DEV(dev, "VF Device is ready for configuration"); error = ixlv_setup_vc(sc); if (error) { device_printf(dev, "%s: Error setting up PF comms, %d\n", __func__, error); goto err_pci_res; } INIT_DBG_DEV(dev, "PF API version verified"); /* Need API version before sending reset message */ error = ixlv_reset(sc); if (error) { device_printf(dev, "VF reset failed; reload the driver\n"); goto err_aq; } INIT_DBG_DEV(dev, "VF reset complete"); /* Ask for VF config from PF */ error = ixlv_vf_config(sc); if (error) { device_printf(dev, "Error getting configuration from PF: %d\n", error); goto err_aq; } INIT_DBG_DEV(dev, "VF config from PF:"); INIT_DBG_DEV(dev, "VSIs %d, Queues %d, Max Vectors %d, Max MTU %d", sc->vf_res->num_vsis, sc->vf_res->num_queue_pairs, sc->vf_res->max_vectors, sc->vf_res->max_mtu); INIT_DBG_DEV(dev, "Offload flags: %#010x", sc->vf_res->vf_offload_flags); /* got VF config message back from PF, now we can parse it */ for (int i = 0; i < sc->vf_res->num_vsis; i++) { if (sc->vf_res->vsi_res[i].vsi_type == I40E_VSI_SRIOV) sc->vsi_res = &sc->vf_res->vsi_res[i]; } if (!sc->vsi_res) { device_printf(dev, "%s: no LAN VSI found\n", __func__); error = EIO; goto err_res_buf; } INIT_DBG_DEV(dev, "Resource Acquisition complete"); /* If no mac address was assigned just make a random one */ if (!ixlv_check_ether_addr(hw->mac.addr)) { u8 addr[ETHER_ADDR_LEN]; arc4rand(&addr, sizeof(addr), 0); addr[0] &= 0xFE; addr[0] |= 0x02; bcopy(addr, hw->mac.addr, sizeof(addr)); } vsi->id = sc->vsi_res->vsi_id; vsi->back = (void *)sc; sc->link_up = TRUE; /* This allocates the memory and early settings */ if (ixlv_setup_queues(sc) != 0) { device_printf(dev, "%s: setup queues failed!\n", __func__); error = EIO; goto out; } /* Setup the stack interface */ if (ixlv_setup_interface(dev, sc) != 0) { device_printf(dev, "%s: setup interface failed!\n", __func__); error = EIO; goto out; } INIT_DBG_DEV(dev, "Queue memory and interface setup"); /* Do queue interrupt setup */ ixlv_assign_msix(sc); /* Start AdminQ taskqueue */ ixlv_init_taskqueue(sc); /* Initialize stats */ bzero(&sc->vsi.eth_stats, sizeof(struct i40e_eth_stats)); ixlv_add_sysctls(sc); /* Register for VLAN events */ vsi->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, ixlv_register_vlan, vsi, EVENTHANDLER_PRI_FIRST); vsi->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, ixlv_unregister_vlan, vsi, EVENTHANDLER_PRI_FIRST); /* We want AQ enabled early */ ixlv_enable_adminq_irq(hw); /* Set things up to run init */ sc->init_state = IXLV_INIT_READY; ixl_vc_init_mgr(sc, &sc->vc_mgr); INIT_DBG_DEV(dev, "end"); return (error); out: ixlv_free_queues(vsi); err_res_buf: free(sc->vf_res, M_DEVBUF); err_aq: i40e_shutdown_adminq(hw); err_pci_res: ixlv_free_pci_resources(sc); err_early: mtx_destroy(&sc->mtx); ixlv_free_filters(sc); INIT_DBG_DEV(dev, "end: error %d", error); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int ixlv_detach(device_t dev) { struct ixlv_sc *sc = device_get_softc(dev); struct ixl_vsi *vsi = &sc->vsi; INIT_DBG_DEV(dev, "begin"); /* Make sure VLANS are not using driver */ if (vsi->ifp->if_vlantrunk != NULL) { if_printf(vsi->ifp, "Vlan in use, detach first\n"); INIT_DBG_DEV(dev, "end"); return (EBUSY); } /* Stop driver */ ether_ifdetach(vsi->ifp); if (vsi->ifp->if_drv_flags & IFF_DRV_RUNNING) { mtx_lock(&sc->mtx); ixlv_stop(sc); mtx_unlock(&sc->mtx); } /* Unregister VLAN events */ if (vsi->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, vsi->vlan_attach); if (vsi->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, vsi->vlan_detach); /* Drain VC mgr */ callout_drain(&sc->vc_mgr.callout); i40e_shutdown_adminq(&sc->hw); taskqueue_free(sc->tq); if_free(vsi->ifp); free(sc->vf_res, M_DEVBUF); ixlv_free_pci_resources(sc); ixlv_free_queues(vsi); mtx_destroy(&sc->mtx); ixlv_free_filters(sc); bus_generic_detach(dev); INIT_DBG_DEV(dev, "end"); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int ixlv_shutdown(device_t dev) { struct ixlv_sc *sc = device_get_softc(dev); INIT_DBG_DEV(dev, "begin"); mtx_lock(&sc->mtx); ixlv_stop(sc); mtx_unlock(&sc->mtx); INIT_DBG_DEV(dev, "end"); return (0); } /* * Configure TXCSUM(IPV6) and TSO(4/6) * - the hardware handles these together so we * need to tweak them */ static void ixlv_cap_txcsum_tso(struct ixl_vsi *vsi, struct ifnet *ifp, int mask) { /* Enable/disable TXCSUM/TSO4 */ if (!(ifp->if_capenable & IFCAP_TXCSUM) && !(ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) { ifp->if_capenable |= IFCAP_TXCSUM; /* enable TXCSUM, restore TSO if previously enabled */ if (vsi->flags & IXL_FLAGS_KEEP_TSO4) { vsi->flags &= ~IXL_FLAGS_KEEP_TSO4; ifp->if_capenable |= IFCAP_TSO4; } } else if (mask & IFCAP_TSO4) { ifp->if_capenable |= (IFCAP_TXCSUM | IFCAP_TSO4); vsi->flags &= ~IXL_FLAGS_KEEP_TSO4; if_printf(ifp, "TSO4 requires txcsum, enabling both...\n"); } } else if((ifp->if_capenable & IFCAP_TXCSUM) && !(ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) ifp->if_capenable &= ~IFCAP_TXCSUM; else if (mask & IFCAP_TSO4) ifp->if_capenable |= IFCAP_TSO4; } else if((ifp->if_capenable & IFCAP_TXCSUM) && (ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) { vsi->flags |= IXL_FLAGS_KEEP_TSO4; ifp->if_capenable &= ~(IFCAP_TXCSUM | IFCAP_TSO4); if_printf(ifp, "TSO4 requires txcsum, disabling both...\n"); } else if (mask & IFCAP_TSO4) ifp->if_capenable &= ~IFCAP_TSO4; } /* Enable/disable TXCSUM_IPV6/TSO6 */ if (!(ifp->if_capenable & IFCAP_TXCSUM_IPV6) && !(ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable |= IFCAP_TXCSUM_IPV6; if (vsi->flags & IXL_FLAGS_KEEP_TSO6) { vsi->flags &= ~IXL_FLAGS_KEEP_TSO6; ifp->if_capenable |= IFCAP_TSO6; } } else if (mask & IFCAP_TSO6) { ifp->if_capenable |= (IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); vsi->flags &= ~IXL_FLAGS_KEEP_TSO6; if_printf(ifp, "TSO6 requires txcsum6, enabling both...\n"); } } else if((ifp->if_capenable & IFCAP_TXCSUM_IPV6) && !(ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) ifp->if_capenable &= ~IFCAP_TXCSUM_IPV6; else if (mask & IFCAP_TSO6) ifp->if_capenable |= IFCAP_TSO6; } else if ((ifp->if_capenable & IFCAP_TXCSUM_IPV6) && (ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) { vsi->flags |= IXL_FLAGS_KEEP_TSO6; ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); if_printf(ifp, "TSO6 requires txcsum6, disabling both...\n"); } else if (mask & IFCAP_TSO6) ifp->if_capenable &= ~IFCAP_TSO6; } } /********************************************************************* * Ioctl entry point * * ixlv_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ static int ixlv_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ixl_vsi *vsi = ifp->if_softc; struct ixlv_sc *sc = vsi->back; struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; bool avoid_reset = FALSE; #endif int error = 0; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif #if defined(INET) || defined(INET6) /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ixlv_init(vsi); #ifdef INET if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; #endif case SIOCSIFMTU: IOCTL_DBG_IF2(ifp, "SIOCSIFMTU (Set Interface MTU)"); mtx_lock(&sc->mtx); if (ifr->ifr_mtu > IXL_MAX_FRAME - ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN) { error = EINVAL; IOCTL_DBG_IF(ifp, "mtu too large"); } else { IOCTL_DBG_IF2(ifp, "mtu: %lu -> %d", (u_long)ifp->if_mtu, ifr->ifr_mtu); // ERJ: Interestingly enough, these types don't match ifp->if_mtu = (u_long)ifr->ifr_mtu; vsi->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; - ixlv_init_locked(sc); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + ixlv_init_locked(sc); } mtx_unlock(&sc->mtx); break; case SIOCSIFFLAGS: IOCTL_DBG_IF2(ifp, "SIOCSIFFLAGS (Set Interface Flags)"); mtx_lock(&sc->mtx); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ixlv_init_locked(sc); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixlv_stop(sc); sc->if_flags = ifp->if_flags; mtx_unlock(&sc->mtx); break; case SIOCADDMULTI: IOCTL_DBG_IF2(ifp, "SIOCADDMULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { mtx_lock(&sc->mtx); ixlv_disable_intr(vsi); ixlv_add_multi(vsi); ixlv_enable_intr(vsi); mtx_unlock(&sc->mtx); } break; case SIOCDELMULTI: IOCTL_DBG_IF2(ifp, "SIOCDELMULTI"); if (sc->init_state == IXLV_RUNNING) { mtx_lock(&sc->mtx); ixlv_disable_intr(vsi); ixlv_del_multi(vsi); ixlv_enable_intr(vsi); mtx_unlock(&sc->mtx); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: IOCTL_DBG_IF2(ifp, "SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &sc->media, command); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; IOCTL_DBG_IF2(ifp, "SIOCSIFCAP (Set Capabilities)"); ixlv_cap_txcsum_tso(vsi, ifp, mask); if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ixlv_init(vsi); } VLAN_CAPABILITIES(ifp); break; } default: IOCTL_DBG_IF2(ifp, "UNKNOWN (0x%X)", (int)command); error = ether_ioctl(ifp, command, data); break; } return (error); } /* ** To do a reinit on the VF is unfortunately more complicated ** than a physical device, we must have the PF more or less ** completely recreate our memory, so many things that were ** done only once at attach in traditional drivers now must be ** redone at each reinitialization. This function does that ** 'prelude' so we can then call the normal locked init code. */ int ixlv_reinit_locked(struct ixlv_sc *sc) { struct i40e_hw *hw = &sc->hw; struct ixl_vsi *vsi = &sc->vsi; struct ifnet *ifp = vsi->ifp; struct ixlv_mac_filter *mf, *mf_temp; struct ixlv_vlan_filter *vf; int error = 0; INIT_DBG_IF(ifp, "begin"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixlv_stop(sc); error = ixlv_reset(sc); INIT_DBG_IF(ifp, "VF was reset"); /* set the state in case we went thru RESET */ sc->init_state = IXLV_RUNNING; /* ** Resetting the VF drops all filters from hardware; ** we need to mark them to be re-added in init. */ SLIST_FOREACH_SAFE(mf, sc->mac_filters, next, mf_temp) { if (mf->flags & IXL_FILTER_DEL) { SLIST_REMOVE(sc->mac_filters, mf, ixlv_mac_filter, next); free(mf, M_DEVBUF); } else mf->flags |= IXL_FILTER_ADD; } if (vsi->num_vlans != 0) SLIST_FOREACH(vf, sc->vlan_filters, next) vf->flags = IXL_FILTER_ADD; else { /* clean any stale filters */ while (!SLIST_EMPTY(sc->vlan_filters)) { vf = SLIST_FIRST(sc->vlan_filters); SLIST_REMOVE_HEAD(sc->vlan_filters, next); free(vf, M_DEVBUF); } } ixlv_enable_adminq_irq(hw); ixl_vc_flush(&sc->vc_mgr); INIT_DBG_IF(ifp, "end"); return (error); } static void ixl_init_cmd_complete(struct ixl_vc_cmd *cmd, void *arg, enum i40e_status_code code) { struct ixlv_sc *sc; sc = arg; /* * Ignore "Adapter Stopped" message as that happens if an ifconfig down * happens while a command is in progress, so we don't print an error * in that case. */ if (code != I40E_SUCCESS && code != I40E_ERR_ADAPTER_STOPPED) { if_printf(sc->vsi.ifp, "Error %d waiting for PF to complete operation %d\n", code, cmd->request); } } static void ixlv_init_locked(struct ixlv_sc *sc) { struct i40e_hw *hw = &sc->hw; struct ixl_vsi *vsi = &sc->vsi; struct ixl_queue *que = vsi->queues; struct ifnet *ifp = vsi->ifp; int error = 0; INIT_DBG_IF(ifp, "begin"); IXLV_CORE_LOCK_ASSERT(sc); /* Do a reinit first if an init has already been done */ if ((sc->init_state == IXLV_RUNNING) || (sc->init_state == IXLV_RESET_REQUIRED) || (sc->init_state == IXLV_RESET_PENDING)) error = ixlv_reinit_locked(sc); /* Don't bother with init if we failed reinit */ if (error) goto init_done; /* Remove existing MAC filter if new MAC addr is set */ if (bcmp(IF_LLADDR(ifp), hw->mac.addr, ETHER_ADDR_LEN) != 0) { error = ixlv_del_mac_filter(sc, hw->mac.addr); if (error == 0) ixl_vc_enqueue(&sc->vc_mgr, &sc->del_mac_cmd, IXLV_FLAG_AQ_DEL_MAC_FILTER, ixl_init_cmd_complete, sc); } /* Check for an LAA mac address... */ bcopy(IF_LLADDR(ifp), hw->mac.addr, ETHER_ADDR_LEN); ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_OFFLOAD_IPV4 & ~CSUM_IP); if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= CSUM_OFFLOAD_IPV6; /* Add mac filter for this VF to PF */ if (i40e_validate_mac_addr(hw->mac.addr) == I40E_SUCCESS) { error = ixlv_add_mac_filter(sc, hw->mac.addr, 0); if (!error || error == EEXIST) ixl_vc_enqueue(&sc->vc_mgr, &sc->add_mac_cmd, IXLV_FLAG_AQ_ADD_MAC_FILTER, ixl_init_cmd_complete, sc); } /* Setup vlan's if needed */ ixlv_setup_vlan_filters(sc); /* Prepare the queues for operation */ for (int i = 0; i < vsi->num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; ixl_init_tx_ring(que); if (vsi->max_frame_size <= MCLBYTES) rxr->mbuf_sz = MCLBYTES; else rxr->mbuf_sz = MJUMPAGESIZE; ixl_init_rx_ring(que); } /* Configure queues */ ixl_vc_enqueue(&sc->vc_mgr, &sc->config_queues_cmd, IXLV_FLAG_AQ_CONFIGURE_QUEUES, ixl_init_cmd_complete, sc); /* Set up RSS */ ixlv_config_rss(sc); /* Map vectors */ ixl_vc_enqueue(&sc->vc_mgr, &sc->map_vectors_cmd, IXLV_FLAG_AQ_MAP_VECTORS, ixl_init_cmd_complete, sc); /* Enable queues */ ixl_vc_enqueue(&sc->vc_mgr, &sc->enable_queues_cmd, IXLV_FLAG_AQ_ENABLE_QUEUES, ixl_init_cmd_complete, sc); /* Start the local timer */ callout_reset(&sc->timer, hz, ixlv_local_timer, sc); sc->init_state = IXLV_RUNNING; init_done: INIT_DBG_IF(ifp, "end"); return; } /* ** Init entry point for the stack */ void ixlv_init(void *arg) { struct ixl_vsi *vsi = (struct ixl_vsi *)arg; struct ixlv_sc *sc = vsi->back; int retries = 0; mtx_lock(&sc->mtx); ixlv_init_locked(sc); mtx_unlock(&sc->mtx); /* Wait for init_locked to finish */ while (!(vsi->ifp->if_drv_flags & IFF_DRV_RUNNING) && ++retries < IXLV_AQ_MAX_ERR) { i40e_msec_delay(25); } if (retries >= IXLV_AQ_MAX_ERR) if_printf(vsi->ifp, "Init failed to complete in allotted time!\n"); } /* * ixlv_attach() helper function; gathers information about * the (virtual) hardware for use elsewhere in the driver. */ static void ixlv_init_hw(struct ixlv_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; /* Save off the information about this board */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); hw->bus.device = pci_get_slot(dev); hw->bus.func = pci_get_function(dev); } /* * ixlv_attach() helper function; initalizes the admin queue * and attempts to establish contact with the PF by * retrying the initial "API version" message several times * or until the PF responds. */ static int ixlv_setup_vc(struct ixlv_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; int error = 0, ret_error = 0, asq_retries = 0; bool send_api_ver_retried = 0; /* Need to set these AQ paramters before initializing AQ */ hw->aq.num_arq_entries = IXL_AQ_LEN; hw->aq.num_asq_entries = IXL_AQ_LEN; hw->aq.arq_buf_size = IXL_AQ_BUFSZ; hw->aq.asq_buf_size = IXL_AQ_BUFSZ; for (int i = 0; i < IXLV_AQ_MAX_ERR; i++) { /* Initialize admin queue */ error = i40e_init_adminq(hw); if (error) { device_printf(dev, "%s: init_adminq failed: %d\n", __func__, error); ret_error = 1; continue; } INIT_DBG_DEV(dev, "Initialized Admin Queue; starting" " send_api_ver attempt %d", i+1); retry_send: /* Send VF's API version */ error = ixlv_send_api_ver(sc); if (error) { i40e_shutdown_adminq(hw); ret_error = 2; device_printf(dev, "%s: unable to send api" " version to PF on attempt %d, error %d\n", __func__, i+1, error); } asq_retries = 0; while (!i40e_asq_done(hw)) { if (++asq_retries > IXLV_AQ_MAX_ERR) { i40e_shutdown_adminq(hw); DDPRINTF(dev, "Admin Queue timeout " "(waiting for send_api_ver), %d more retries...", IXLV_AQ_MAX_ERR - (i + 1)); ret_error = 3; break; } i40e_msec_delay(10); } if (asq_retries > IXLV_AQ_MAX_ERR) continue; INIT_DBG_DEV(dev, "Sent API version message to PF"); /* Verify that the VF accepts the PF's API version */ error = ixlv_verify_api_ver(sc); if (error == ETIMEDOUT) { if (!send_api_ver_retried) { /* Resend message, one more time */ send_api_ver_retried++; device_printf(dev, "%s: Timeout while verifying API version on first" " try!\n", __func__); goto retry_send; } else { device_printf(dev, "%s: Timeout while verifying API version on second" " try!\n", __func__); ret_error = 4; break; } } if (error) { device_printf(dev, "%s: Unable to verify API version," " error %d\n", __func__, error); ret_error = 5; } break; } if (ret_error >= 4) i40e_shutdown_adminq(hw); return (ret_error); } /* * ixlv_attach() helper function; asks the PF for this VF's * configuration, and saves the information if it receives it. */ static int ixlv_vf_config(struct ixlv_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; int bufsz, error = 0, ret_error = 0; int asq_retries, retried = 0; retry_config: error = ixlv_send_vf_config_msg(sc); if (error) { device_printf(dev, "%s: Unable to send VF config request, attempt %d," " error %d\n", __func__, retried + 1, error); ret_error = 2; } asq_retries = 0; while (!i40e_asq_done(hw)) { if (++asq_retries > IXLV_AQ_MAX_ERR) { device_printf(dev, "%s: Admin Queue timeout " "(waiting for send_vf_config_msg), attempt %d\n", __func__, retried + 1); ret_error = 3; goto fail; } i40e_msec_delay(10); } INIT_DBG_DEV(dev, "Sent VF config message to PF, attempt %d", retried + 1); if (!sc->vf_res) { bufsz = sizeof(struct i40e_virtchnl_vf_resource) + (I40E_MAX_VF_VSI * sizeof(struct i40e_virtchnl_vsi_resource)); sc->vf_res = malloc(bufsz, M_DEVBUF, M_NOWAIT); if (!sc->vf_res) { device_printf(dev, "%s: Unable to allocate memory for VF configuration" " message from PF on attempt %d\n", __func__, retried + 1); ret_error = 1; goto fail; } } /* Check for VF config response */ error = ixlv_get_vf_config(sc); if (error == ETIMEDOUT) { /* The 1st time we timeout, send the configuration message again */ if (!retried) { retried++; goto retry_config; } device_printf(dev, "%s: ixlv_get_vf_config() timed out waiting for a response\n", __func__); } if (error) { device_printf(dev, "%s: Unable to get VF configuration from PF after %d tries!\n", __func__, retried + 1); ret_error = 4; } goto done; fail: free(sc->vf_res, M_DEVBUF); done: return (ret_error); } /* * Allocate MSI/X vectors, setup the AQ vector early */ static int ixlv_init_msix(struct ixlv_sc *sc) { device_t dev = sc->dev; int rid, want, vectors, queues, available; rid = PCIR_BAR(IXL_BAR); sc->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!sc->msix_mem) { /* May not be enabled */ device_printf(sc->dev, "Unable to map MSIX table \n"); goto fail; } available = pci_msix_count(dev); if (available == 0) { /* system has msix disabled */ bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->msix_mem); sc->msix_mem = NULL; goto fail; } /* Figure out a reasonable auto config value */ queues = (mp_ncpus > (available - 1)) ? (available - 1) : mp_ncpus; /* Override with hardcoded value if sane */ if ((ixlv_max_queues != 0) && (ixlv_max_queues <= queues)) queues = ixlv_max_queues; #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif /* Enforce the VF max value */ if (queues > IXLV_MAX_QUEUES) queues = IXLV_MAX_QUEUES; /* ** Want one vector (RX/TX pair) per queue ** plus an additional for the admin queue. */ want = queues + 1; if (want <= available) /* Have enough */ vectors = want; else { device_printf(sc->dev, "MSIX Configuration Problem, " "%d vectors available but %d wanted!\n", available, want); goto fail; } #ifdef RSS /* * If we're doing RSS, the number of queues needs to * match the number of RSS buckets that are configured. * * + If there's more queues than RSS buckets, we'll end * up with queues that get no traffic. * * + If there's more RSS buckets than queues, we'll end * up having multiple RSS buckets map to the same queue, * so there'll be some contention. */ if (queues != rss_getnumbuckets()) { device_printf(dev, "%s: queues (%d) != RSS buckets (%d)" "; performance will be impacted.\n", __func__, queues, rss_getnumbuckets()); } #endif if (pci_alloc_msix(dev, &vectors) == 0) { device_printf(sc->dev, "Using MSIX interrupts with %d vectors\n", vectors); sc->msix = vectors; sc->vsi.num_queues = queues; } /* ** Explicitly set the guest PCI BUSMASTER capability ** and we must rewrite the ENABLE in the MSIX control ** register again at this point to cause the host to ** successfully initialize us. */ { u16 pci_cmd_word; int msix_ctrl; pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); pci_cmd_word |= PCIM_CMD_BUSMASTEREN; pci_write_config(dev, PCIR_COMMAND, pci_cmd_word, 2); pci_find_cap(dev, PCIY_MSIX, &rid); rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } /* Next we need to setup the vector for the Admin Queue */ rid = 1; // zero vector + 1 sc->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->res == NULL) { device_printf(dev,"Unable to allocate" " bus resource: AQ interrupt \n"); goto fail; } if (bus_setup_intr(dev, sc->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixlv_msix_adminq, sc, &sc->tag)) { sc->res = NULL; device_printf(dev, "Failed to register AQ handler"); goto fail; } bus_describe_intr(dev, sc->res, sc->tag, "adminq"); return (vectors); fail: /* The VF driver MUST use MSIX */ return (0); } static int ixlv_allocate_pci_resources(struct ixlv_sc *sc) { int rid; device_t dev = sc->dev; rid = PCIR_BAR(0); sc->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(sc->pci_mem)) { device_printf(dev,"Unable to allocate bus resource: memory\n"); return (ENXIO); } sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->pci_mem); sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->pci_mem); sc->osdep.mem_bus_space_size = rman_get_size(sc->pci_mem); sc->osdep.flush_reg = I40E_VFGEN_RSTAT; sc->hw.hw_addr = (u8 *) &sc->osdep.mem_bus_space_handle; sc->hw.back = &sc->osdep; /* Disable adminq interrupts */ ixlv_disable_adminq_irq(&sc->hw); /* ** Now setup MSI/X, it will return ** us the number of supported vectors */ sc->msix = ixlv_init_msix(sc); /* We fail without MSIX support */ if (sc->msix == 0) return (ENXIO); return (0); } static void ixlv_free_pci_resources(struct ixlv_sc *sc) { struct ixl_vsi *vsi = &sc->vsi; struct ixl_queue *que = vsi->queues; device_t dev = sc->dev; /* We may get here before stations are setup */ if (que == NULL) goto early; /* ** Release all msix queue resources: */ for (int i = 0; i < vsi->num_queues; i++, que++) { int rid = que->msix + 1; if (que->tag != NULL) { bus_teardown_intr(dev, que->res, que->tag); que->tag = NULL; } if (que->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); } early: /* Clean the AdminQ interrupt */ if (sc->tag != NULL) { bus_teardown_intr(dev, sc->res, sc->tag); sc->tag = NULL; } if (sc->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, 1, sc->res); pci_release_msi(dev); if (sc->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(IXL_BAR), sc->msix_mem); if (sc->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), sc->pci_mem); return; } /* * Create taskqueue and tasklet for Admin Queue interrupts. */ static int ixlv_init_taskqueue(struct ixlv_sc *sc) { int error = 0; TASK_INIT(&sc->aq_irq, 0, ixlv_do_adminq, sc); sc->tq = taskqueue_create_fast("ixl_adm", M_NOWAIT, taskqueue_thread_enqueue, &sc->tq); taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s sc->tq", device_get_nameunit(sc->dev)); return (error); } /********************************************************************* * * Setup MSIX Interrupt resources and handlers for the VSI queues * **********************************************************************/ static int ixlv_assign_msix(struct ixlv_sc *sc) { device_t dev = sc->dev; struct ixl_vsi *vsi = &sc->vsi; struct ixl_queue *que = vsi->queues; struct tx_ring *txr; int error, rid, vector = 1; #ifdef RSS cpuset_t cpu_mask; #endif for (int i = 0; i < vsi->num_queues; i++, vector++, que++) { int cpu_id = i; rid = vector + 1; txr = &que->txr; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (que->res == NULL) { device_printf(dev,"Unable to allocate" " bus resource: que interrupt [%d]\n", vector); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixlv_msix_que, que, &que->tag); if (error) { que->res = NULL; device_printf(dev, "Failed to register que handler"); return (error); } bus_describe_intr(dev, que->res, que->tag, "que %d", i); /* Bind the vector to a CPU */ #ifdef RSS cpu_id = rss_getcpu(i % rss_getnumbuckets()); #endif bus_bind_intr(dev, que->res, cpu_id); que->msix = vector; vsi->que_mask |= (u64)(1 << que->msix); TASK_INIT(&que->tx_task, 0, ixl_deferred_mq_start, que); TASK_INIT(&que->task, 0, ixlv_handle_que, que); que->tq = taskqueue_create_fast("ixlv_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); #ifdef RSS CPU_SETOF(cpu_id, &cpu_mask); taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, &cpu_mask, "%s (bucket %d)", device_get_nameunit(dev), cpu_id); #else taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(dev)); #endif } return (0); } /* ** Requests a VF reset from the PF. ** ** Requires the VF's Admin Queue to be initialized. */ static int ixlv_reset(struct ixlv_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; int error = 0; /* Ask the PF to reset us if we are initiating */ if (sc->init_state != IXLV_RESET_PENDING) ixlv_request_reset(sc); i40e_msec_delay(100); error = ixlv_reset_complete(hw); if (error) { device_printf(dev, "%s: VF reset failed\n", __func__); return (error); } error = i40e_shutdown_adminq(hw); if (error) { device_printf(dev, "%s: shutdown_adminq failed: %d\n", __func__, error); return (error); } error = i40e_init_adminq(hw); if (error) { device_printf(dev, "%s: init_adminq failed: %d\n", __func__, error); return(error); } return (0); } static int ixlv_reset_complete(struct i40e_hw *hw) { u32 reg; for (int i = 0; i < 100; i++) { reg = rd32(hw, I40E_VFGEN_RSTAT) & I40E_VFGEN_RSTAT_VFR_STATE_MASK; if ((reg == I40E_VFR_VFACTIVE) || (reg == I40E_VFR_COMPLETED)) return (0); i40e_msec_delay(100); } return (EBUSY); } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int ixlv_setup_interface(device_t dev, struct ixlv_sc *sc) { struct ifnet *ifp; struct ixl_vsi *vsi = &sc->vsi; struct ixl_queue *que = vsi->queues; INIT_DBG_DEV(dev, "begin"); ifp = vsi->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "%s: could not allocate ifnet" " structure!\n", __func__); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_mtu = ETHERMTU; ifp->if_baudrate = 4000000000; // ?? ifp->if_init = ixlv_init; ifp->if_softc = vsi; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ixlv_ioctl; #if __FreeBSD_version >= 1100000 if_setgetcounterfn(ifp, ixl_get_counter); #endif ifp->if_transmit = ixl_mq_start; ifp->if_qflush = ixl_qflush; ifp->if_snd.ifq_maxlen = que->num_desc - 2; ether_ifattach(ifp, sc->hw.mac.addr); vsi->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; /* * Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_HWCSUM; ifp->if_capabilities |= IFCAP_HWCSUM_IPV6; ifp->if_capabilities |= IFCAP_TSO; ifp->if_capabilities |= IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM | IFCAP_LRO; ifp->if_capenable = ifp->if_capabilities; /* ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the ixl driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&sc->media, IFM_IMASK, ixlv_media_change, ixlv_media_status); // JFV Add media types later? ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO); INIT_DBG_DEV(dev, "end"); return (0); } /* ** Allocate and setup the interface queues */ static int ixlv_setup_queues(struct ixlv_sc *sc) { device_t dev = sc->dev; struct ixl_vsi *vsi; struct ixl_queue *que; struct tx_ring *txr; struct rx_ring *rxr; int rsize, tsize; int error = I40E_SUCCESS; vsi = &sc->vsi; vsi->back = (void *)sc; vsi->hw = &sc->hw; vsi->num_vlans = 0; /* Get memory for the station queues */ if (!(vsi->queues = (struct ixl_queue *) malloc(sizeof(struct ixl_queue) * vsi->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate queue memory\n"); error = ENOMEM; goto early; } for (int i = 0; i < vsi->num_queues; i++) { que = &vsi->queues[i]; que->num_desc = ixlv_ringsz; que->me = i; que->vsi = vsi; /* mark the queue as active */ vsi->active_queues |= (u64)1 << que->me; txr = &que->txr; txr->que = que; txr->tail = I40E_QTX_TAIL1(que->me); /* Initialize the TX lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), que->me); mtx_init(&txr->mtx, txr->mtx_name, NULL, MTX_DEF); /* ** Create the TX descriptor ring, the extra int is ** added as the location for HEAD WB. */ tsize = roundup2((que->num_desc * sizeof(struct i40e_tx_desc)) + sizeof(u32), DBA_ALIGN); if (i40e_allocate_dma_mem(&sc->hw, &txr->dma, i40e_mem_reserved, tsize, DBA_ALIGN)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto fail; } txr->base = (struct i40e_tx_desc *)txr->dma.va; bzero((void *)txr->base, tsize); /* Now allocate transmit soft structs for the ring */ if (ixl_allocate_tx_data(que)) { device_printf(dev, "Critical Failure setting up TX structures\n"); error = ENOMEM; goto fail; } /* Allocate a buf ring */ txr->br = buf_ring_alloc(ixlv_txbrsz, M_DEVBUF, M_WAITOK, &txr->mtx); if (txr->br == NULL) { device_printf(dev, "Critical Failure setting up TX buf ring\n"); error = ENOMEM; goto fail; } /* * Next the RX queues... */ rsize = roundup2(que->num_desc * sizeof(union i40e_rx_desc), DBA_ALIGN); rxr = &que->rxr; rxr->que = que; rxr->tail = I40E_QRX_TAIL1(que->me); /* Initialize the RX side lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), que->me); mtx_init(&rxr->mtx, rxr->mtx_name, NULL, MTX_DEF); if (i40e_allocate_dma_mem(&sc->hw, &rxr->dma, i40e_mem_reserved, rsize, 4096)) { //JFV - should this be DBA? device_printf(dev, "Unable to allocate RX Descriptor memory\n"); error = ENOMEM; goto fail; } rxr->base = (union i40e_rx_desc *)rxr->dma.va; bzero((void *)rxr->base, rsize); /* Allocate receive soft structs for the ring*/ if (ixl_allocate_rx_data(que)) { device_printf(dev, "Critical Failure setting up receive structs\n"); error = ENOMEM; goto fail; } } return (0); fail: for (int i = 0; i < vsi->num_queues; i++) { que = &vsi->queues[i]; rxr = &que->rxr; txr = &que->txr; if (rxr->base) i40e_free_dma_mem(&sc->hw, &rxr->dma); if (txr->base) i40e_free_dma_mem(&sc->hw, &txr->dma); } free(vsi->queues, M_DEVBUF); early: return (error); } /* ** This routine is run via an vlan config EVENT, ** it enables us to use the HW Filter table since ** we can get the vlan id. This just creates the ** entry in the soft version of the VFTA, init will ** repopulate the real table. */ static void ixlv_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct ixl_vsi *vsi = arg; struct ixlv_sc *sc = vsi->back; struct ixlv_vlan_filter *v; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; /* Sanity check - make sure it doesn't already exist */ SLIST_FOREACH(v, sc->vlan_filters, next) { if (v->vlan == vtag) return; } mtx_lock(&sc->mtx); ++vsi->num_vlans; v = malloc(sizeof(struct ixlv_vlan_filter), M_DEVBUF, M_NOWAIT | M_ZERO); SLIST_INSERT_HEAD(sc->vlan_filters, v, next); v->vlan = vtag; v->flags = IXL_FILTER_ADD; ixl_vc_enqueue(&sc->vc_mgr, &sc->add_vlan_cmd, IXLV_FLAG_AQ_ADD_VLAN_FILTER, ixl_init_cmd_complete, sc); mtx_unlock(&sc->mtx); return; } /* ** This routine is run via an vlan ** unconfig EVENT, remove our entry ** in the soft vfta. */ static void ixlv_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct ixl_vsi *vsi = arg; struct ixlv_sc *sc = vsi->back; struct ixlv_vlan_filter *v; int i = 0; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; mtx_lock(&sc->mtx); SLIST_FOREACH(v, sc->vlan_filters, next) { if (v->vlan == vtag) { v->flags = IXL_FILTER_DEL; ++i; --vsi->num_vlans; } } if (i) ixl_vc_enqueue(&sc->vc_mgr, &sc->del_vlan_cmd, IXLV_FLAG_AQ_DEL_VLAN_FILTER, ixl_init_cmd_complete, sc); mtx_unlock(&sc->mtx); return; } /* ** Get a new filter and add it to the mac filter list. */ static struct ixlv_mac_filter * ixlv_get_mac_filter(struct ixlv_sc *sc) { struct ixlv_mac_filter *f; f = malloc(sizeof(struct ixlv_mac_filter), M_DEVBUF, M_NOWAIT | M_ZERO); if (f) SLIST_INSERT_HEAD(sc->mac_filters, f, next); return (f); } /* ** Find the filter with matching MAC address */ static struct ixlv_mac_filter * ixlv_find_mac_filter(struct ixlv_sc *sc, u8 *macaddr) { struct ixlv_mac_filter *f; bool match = FALSE; SLIST_FOREACH(f, sc->mac_filters, next) { if (cmp_etheraddr(f->macaddr, macaddr)) { match = TRUE; break; } } if (!match) f = NULL; return (f); } /* ** Admin Queue interrupt handler */ static void ixlv_msix_adminq(void *arg) { struct ixlv_sc *sc = arg; struct i40e_hw *hw = &sc->hw; u32 reg, mask; reg = rd32(hw, I40E_VFINT_ICR01); mask = rd32(hw, I40E_VFINT_ICR0_ENA1); reg = rd32(hw, I40E_VFINT_DYN_CTL01); reg |= I40E_VFINT_DYN_CTL01_CLEARPBA_MASK; wr32(hw, I40E_VFINT_DYN_CTL01, reg); /* schedule task */ taskqueue_enqueue(sc->tq, &sc->aq_irq); return; } void ixlv_enable_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; ixlv_enable_adminq_irq(hw); for (int i = 0; i < vsi->num_queues; i++, que++) ixlv_enable_queue_irq(hw, que->me); } void ixlv_disable_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; ixlv_disable_adminq_irq(hw); for (int i = 0; i < vsi->num_queues; i++, que++) ixlv_disable_queue_irq(hw, que->me); } static void ixlv_disable_adminq_irq(struct i40e_hw *hw) { wr32(hw, I40E_VFINT_DYN_CTL01, 0); wr32(hw, I40E_VFINT_ICR0_ENA1, 0); /* flush */ rd32(hw, I40E_VFGEN_RSTAT); return; } static void ixlv_enable_adminq_irq(struct i40e_hw *hw) { wr32(hw, I40E_VFINT_DYN_CTL01, I40E_VFINT_DYN_CTL01_INTENA_MASK | I40E_VFINT_DYN_CTL01_ITR_INDX_MASK); wr32(hw, I40E_VFINT_ICR0_ENA1, I40E_VFINT_ICR0_ENA1_ADMINQ_MASK); /* flush */ rd32(hw, I40E_VFGEN_RSTAT); return; } static void ixlv_enable_queue_irq(struct i40e_hw *hw, int id) { u32 reg; reg = I40E_VFINT_DYN_CTLN1_INTENA_MASK | I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK; wr32(hw, I40E_VFINT_DYN_CTLN1(id), reg); } static void ixlv_disable_queue_irq(struct i40e_hw *hw, int id) { wr32(hw, I40E_VFINT_DYN_CTLN1(id), 0); rd32(hw, I40E_VFGEN_RSTAT); return; } /* ** Provide a update to the queue RX ** interrupt moderation value. */ static void ixlv_set_queue_rx_itr(struct ixl_queue *que) { struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct rx_ring *rxr = &que->rxr; u16 rx_itr; u16 rx_latency = 0; int rx_bytes; /* Idle, do nothing */ if (rxr->bytes == 0) return; if (ixlv_dynamic_rx_itr) { rx_bytes = rxr->bytes/rxr->itr; rx_itr = rxr->itr; /* Adjust latency range */ switch (rxr->latency) { case IXL_LOW_LATENCY: if (rx_bytes > 10) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (rx_bytes > 20) { rx_latency = IXL_BULK_LATENCY; rx_itr = IXL_ITR_8K; } else if (rx_bytes <= 10) { rx_latency = IXL_LOW_LATENCY; rx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (rx_bytes <= 20) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; } rxr->latency = rx_latency; if (rx_itr != rxr->itr) { /* do an exponential smoothing */ rx_itr = (10 * rx_itr * rxr->itr) / ((9 * rx_itr) + rxr->itr); rxr->itr = rx_itr & IXL_MAX_ITR; wr32(hw, I40E_VFINT_ITRN1(IXL_RX_ITR, que->me), rxr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->rx_itr_setting & IXL_ITR_DYNAMIC) vsi->rx_itr_setting = ixlv_rx_itr; /* Update the hardware if needed */ if (rxr->itr != vsi->rx_itr_setting) { rxr->itr = vsi->rx_itr_setting; wr32(hw, I40E_VFINT_ITRN1(IXL_RX_ITR, que->me), rxr->itr); } } rxr->bytes = 0; rxr->packets = 0; return; } /* ** Provide a update to the queue TX ** interrupt moderation value. */ static void ixlv_set_queue_tx_itr(struct ixl_queue *que) { struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; u16 tx_itr; u16 tx_latency = 0; int tx_bytes; /* Idle, do nothing */ if (txr->bytes == 0) return; if (ixlv_dynamic_tx_itr) { tx_bytes = txr->bytes/txr->itr; tx_itr = txr->itr; switch (txr->latency) { case IXL_LOW_LATENCY: if (tx_bytes > 10) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (tx_bytes > 20) { tx_latency = IXL_BULK_LATENCY; tx_itr = IXL_ITR_8K; } else if (tx_bytes <= 10) { tx_latency = IXL_LOW_LATENCY; tx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (tx_bytes <= 20) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; } txr->latency = tx_latency; if (tx_itr != txr->itr) { /* do an exponential smoothing */ tx_itr = (10 * tx_itr * txr->itr) / ((9 * tx_itr) + txr->itr); txr->itr = tx_itr & IXL_MAX_ITR; wr32(hw, I40E_VFINT_ITRN1(IXL_TX_ITR, que->me), txr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->tx_itr_setting & IXL_ITR_DYNAMIC) vsi->tx_itr_setting = ixlv_tx_itr; /* Update the hardware if needed */ if (txr->itr != vsi->tx_itr_setting) { txr->itr = vsi->tx_itr_setting; wr32(hw, I40E_VFINT_ITRN1(IXL_TX_ITR, que->me), txr->itr); } } txr->bytes = 0; txr->packets = 0; return; } /* ** ** MSIX Interrupt Handlers and Tasklets ** */ static void ixlv_handle_que(void *context, int pending) { struct ixl_queue *que = context; struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; struct ifnet *ifp = vsi->ifp; bool more; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { more = ixl_rxeof(que, IXL_RX_LIMIT); mtx_lock(&txr->mtx); ixl_txeof(que); if (!drbr_empty(ifp, txr->br)) ixl_mq_start_locked(ifp, txr); mtx_unlock(&txr->mtx); if (more) { taskqueue_enqueue(que->tq, &que->task); return; } } /* Reenable this interrupt - hmmm */ ixlv_enable_queue_irq(hw, que->me); return; } /********************************************************************* * * MSIX Queue Interrupt Service routine * **********************************************************************/ static void ixlv_msix_que(void *arg) { struct ixl_queue *que = arg; struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; bool more_tx, more_rx; /* Spurious interrupts are ignored */ if (!(vsi->ifp->if_drv_flags & IFF_DRV_RUNNING)) return; ++que->irqs; more_rx = ixl_rxeof(que, IXL_RX_LIMIT); mtx_lock(&txr->mtx); more_tx = ixl_txeof(que); /* ** Make certain that if the stack ** has anything queued the task gets ** scheduled to handle it. */ if (!drbr_empty(vsi->ifp, txr->br)) more_tx = 1; mtx_unlock(&txr->mtx); ixlv_set_queue_rx_itr(que); ixlv_set_queue_tx_itr(que); if (more_tx || more_rx) taskqueue_enqueue(que->tq, &que->task); else ixlv_enable_queue_irq(hw, que->me); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void ixlv_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) { struct ixl_vsi *vsi = ifp->if_softc; struct ixlv_sc *sc = vsi->back; INIT_DBG_IF(ifp, "begin"); mtx_lock(&sc->mtx); ixlv_update_link_status(sc); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!sc->link_up) { mtx_unlock(&sc->mtx); INIT_DBG_IF(ifp, "end: link not up"); return; } ifmr->ifm_status |= IFM_ACTIVE; /* Hardware is always full-duplex */ ifmr->ifm_active |= IFM_FDX; mtx_unlock(&sc->mtx); INIT_DBG_IF(ifp, "end"); return; } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int ixlv_media_change(struct ifnet * ifp) { struct ixl_vsi *vsi = ifp->if_softc; struct ifmedia *ifm = &vsi->media; INIT_DBG_IF(ifp, "begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); INIT_DBG_IF(ifp, "end"); return (0); } /********************************************************************* * Multicast Initialization * * This routine is called by init to reset a fresh state. * **********************************************************************/ static void ixlv_init_multi(struct ixl_vsi *vsi) { struct ixlv_mac_filter *f; struct ixlv_sc *sc = vsi->back; int mcnt = 0; IOCTL_DBG_IF(vsi->ifp, "begin"); /* First clear any multicast filters */ SLIST_FOREACH(f, sc->mac_filters, next) { if ((f->flags & IXL_FILTER_USED) && (f->flags & IXL_FILTER_MC)) { f->flags |= IXL_FILTER_DEL; mcnt++; } } if (mcnt > 0) ixl_vc_enqueue(&sc->vc_mgr, &sc->del_multi_cmd, IXLV_FLAG_AQ_DEL_MAC_FILTER, ixl_init_cmd_complete, sc); IOCTL_DBG_IF(vsi->ifp, "end"); } static void ixlv_add_multi(struct ixl_vsi *vsi) { struct ifmultiaddr *ifma; struct ifnet *ifp = vsi->ifp; struct ixlv_sc *sc = vsi->back; int mcnt = 0; IOCTL_DBG_IF(ifp, "begin"); if_maddr_rlock(ifp); /* ** Get a count, to decide if we ** simply use multicast promiscuous. */ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mcnt++; } if_maddr_runlock(ifp); // TODO: Remove -- cannot set promiscuous mode in a VF if (__predict_false(mcnt >= MAX_MULTICAST_ADDR)) { /* delete all multicast filters */ ixlv_init_multi(vsi); sc->promiscuous_flags |= I40E_FLAG_VF_MULTICAST_PROMISC; ixl_vc_enqueue(&sc->vc_mgr, &sc->add_multi_cmd, IXLV_FLAG_AQ_CONFIGURE_PROMISC, ixl_init_cmd_complete, sc); IOCTL_DEBUGOUT("%s: end: too many filters", __func__); return; } mcnt = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (!ixlv_add_mac_filter(sc, (u8*)LLADDR((struct sockaddr_dl *) ifma->ifma_addr), IXL_FILTER_MC)) mcnt++; } if_maddr_runlock(ifp); /* ** Notify AQ task that sw filters need to be ** added to hw list */ if (mcnt > 0) ixl_vc_enqueue(&sc->vc_mgr, &sc->add_multi_cmd, IXLV_FLAG_AQ_ADD_MAC_FILTER, ixl_init_cmd_complete, sc); IOCTL_DBG_IF(ifp, "end"); } static void ixlv_del_multi(struct ixl_vsi *vsi) { struct ixlv_mac_filter *f; struct ifmultiaddr *ifma; struct ifnet *ifp = vsi->ifp; struct ixlv_sc *sc = vsi->back; int mcnt = 0; bool match = FALSE; IOCTL_DBG_IF(ifp, "begin"); /* Search for removed multicast addresses */ if_maddr_rlock(ifp); SLIST_FOREACH(f, sc->mac_filters, next) { if ((f->flags & IXL_FILTER_USED) && (f->flags & IXL_FILTER_MC)) { /* check if mac address in filter is in sc's list */ match = FALSE; TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; u8 *mc_addr = (u8 *)LLADDR((struct sockaddr_dl *)ifma->ifma_addr); if (cmp_etheraddr(f->macaddr, mc_addr)) { match = TRUE; break; } } /* if this filter is not in the sc's list, remove it */ if (match == FALSE && !(f->flags & IXL_FILTER_DEL)) { f->flags |= IXL_FILTER_DEL; mcnt++; IOCTL_DBG_IF(ifp, "marked: " MAC_FORMAT, MAC_FORMAT_ARGS(f->macaddr)); } else if (match == FALSE) IOCTL_DBG_IF(ifp, "exists: " MAC_FORMAT, MAC_FORMAT_ARGS(f->macaddr)); } } if_maddr_runlock(ifp); if (mcnt > 0) ixl_vc_enqueue(&sc->vc_mgr, &sc->del_multi_cmd, IXLV_FLAG_AQ_DEL_MAC_FILTER, ixl_init_cmd_complete, sc); IOCTL_DBG_IF(ifp, "end"); } /********************************************************************* * Timer routine * * This routine checks for link status,updates statistics, * and runs the watchdog check. * **********************************************************************/ static void ixlv_local_timer(void *arg) { struct ixlv_sc *sc = arg; struct i40e_hw *hw = &sc->hw; struct ixl_vsi *vsi = &sc->vsi; struct ixl_queue *que = vsi->queues; device_t dev = sc->dev; int hung = 0; u32 mask, val; IXLV_CORE_LOCK_ASSERT(sc); /* If Reset is in progress just bail */ if (sc->init_state == IXLV_RESET_PENDING) return; /* Check for when PF triggers a VF reset */ val = rd32(hw, I40E_VFGEN_RSTAT) & I40E_VFGEN_RSTAT_VFR_STATE_MASK; if (val != I40E_VFR_VFACTIVE && val != I40E_VFR_COMPLETED) { DDPRINTF(dev, "reset in progress! (%d)", val); return; } ixlv_request_stats(sc); /* clean and process any events */ taskqueue_enqueue(sc->tq, &sc->aq_irq); /* ** Check status on the queues for a hang */ mask = (I40E_VFINT_DYN_CTLN1_INTENA_MASK | I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK); for (int i = 0; i < vsi->num_queues; i++,que++) { /* Any queues with outstanding work get a sw irq */ if (que->busy) wr32(hw, I40E_VFINT_DYN_CTLN1(que->me), mask); /* ** Each time txeof runs without cleaning, but there ** are uncleaned descriptors it increments busy. If ** we get to 5 we declare it hung. */ if (que->busy == IXL_QUEUE_HUNG) { ++hung; /* Mark the queue as inactive */ vsi->active_queues &= ~((u64)1 << que->me); continue; } else { /* Check if we've come back from hung */ if ((vsi->active_queues & ((u64)1 << que->me)) == 0) vsi->active_queues |= ((u64)1 << que->me); } if (que->busy >= IXL_MAX_TX_BUSY) { device_printf(dev,"Warning queue %d " "appears to be hung!\n", i); que->busy = IXL_QUEUE_HUNG; ++hung; } } /* Only reset when all queues show hung */ if (hung == vsi->num_queues) goto hung; callout_reset(&sc->timer, hz, ixlv_local_timer, sc); return; hung: device_printf(dev, "Local Timer: TX HANG DETECTED - Resetting!!\n"); sc->init_state = IXLV_RESET_REQUIRED; ixlv_init_locked(sc); } /* ** Note: this routine updates the OS on the link state ** the real check of the hardware only happens with ** a link interrupt. */ void ixlv_update_link_status(struct ixlv_sc *sc) { struct ixl_vsi *vsi = &sc->vsi; struct ifnet *ifp = vsi->ifp; if (sc->link_up){ if (vsi->link_active == FALSE) { if (bootverbose) if_printf(ifp,"Link is Up, %d Gbps\n", (sc->link_speed == I40E_LINK_SPEED_40GB) ? 40:10); vsi->link_active = TRUE; if_link_state_change(ifp, LINK_STATE_UP); } } else { /* Link down */ if (vsi->link_active == TRUE) { if (bootverbose) if_printf(ifp,"Link is Down\n"); if_link_state_change(ifp, LINK_STATE_DOWN); vsi->link_active = FALSE; } } return; } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void ixlv_stop(struct ixlv_sc *sc) { struct ifnet *ifp; int start; ifp = sc->vsi.ifp; INIT_DBG_IF(ifp, "begin"); IXLV_CORE_LOCK_ASSERT(sc); ixl_vc_flush(&sc->vc_mgr); ixlv_disable_queues(sc); start = ticks; while ((ifp->if_drv_flags & IFF_DRV_RUNNING) && ((ticks - start) < hz/10)) ixlv_do_adminq_locked(sc); /* Stop the local timer */ callout_stop(&sc->timer); INIT_DBG_IF(ifp, "end"); } /********************************************************************* * * Free all station queue structs. * **********************************************************************/ static void ixlv_free_queues(struct ixl_vsi *vsi) { struct ixlv_sc *sc = (struct ixlv_sc *)vsi->back; struct ixl_queue *que = vsi->queues; for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; if (!mtx_initialized(&txr->mtx)) /* uninitialized */ continue; IXL_TX_LOCK(txr); ixl_free_que_tx(que); if (txr->base) i40e_free_dma_mem(&sc->hw, &txr->dma); IXL_TX_UNLOCK(txr); IXL_TX_LOCK_DESTROY(txr); if (!mtx_initialized(&rxr->mtx)) /* uninitialized */ continue; IXL_RX_LOCK(rxr); ixl_free_que_rx(que); if (rxr->base) i40e_free_dma_mem(&sc->hw, &rxr->dma); IXL_RX_UNLOCK(rxr); IXL_RX_LOCK_DESTROY(rxr); } free(vsi->queues, M_DEVBUF); } /* ** ixlv_config_rss - setup RSS ** ** RSS keys and table are cleared on VF reset. */ static void ixlv_config_rss(struct ixlv_sc *sc) { struct i40e_hw *hw = &sc->hw; struct ixl_vsi *vsi = &sc->vsi; u32 lut = 0; u64 set_hena = 0, hena; int i, j, que_id; #ifdef RSS u32 rss_hash_config; u32 rss_seed[IXL_KEYSZ]; #else u32 rss_seed[IXL_KEYSZ] = {0x41b01687, 0x183cfd8c, 0xce880440, 0x580cbc3c, 0x35897377, 0x328b25e1, 0x4fa98922, 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1}; #endif /* Don't set up RSS if using a single queue */ if (vsi->num_queues == 1) { wr32(hw, I40E_VFQF_HENA(0), 0); wr32(hw, I40E_VFQF_HENA(1), 0); ixl_flush(hw); return; } #ifdef RSS /* Fetch the configured RSS key */ rss_getkey((uint8_t *) &rss_seed); #endif /* Fill out hash function seed */ for (i = 0; i <= IXL_KEYSZ; i++) wr32(hw, I40E_VFQF_HKEY(i), rss_seed[i]); /* Enable PCTYPES for RSS: */ #ifdef RSS rss_hash_config = rss_gethashconfig(); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP); #else set_hena = ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) | ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV4) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_SCTP) | ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) | ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6) | ((u64)1 << I40E_FILTER_PCTYPE_L2_PAYLOAD); #endif hena = (u64)rd32(hw, I40E_VFQF_HENA(0)) | ((u64)rd32(hw, I40E_VFQF_HENA(1)) << 32); hena |= set_hena; wr32(hw, I40E_VFQF_HENA(0), (u32)hena); wr32(hw, I40E_VFQF_HENA(1), (u32)(hena >> 32)); // TODO: Fix -- only 3,7,11,15 are filled out, instead of all 16 registers /* Populate the LUT with max no. of queues in round robin fashion */ for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++, j++) { if (j == vsi->num_queues) j = 0; #ifdef RSS /* * Fetch the RSS bucket id for the given indirection entry. * Cap it at the number of configured buckets (which is * num_queues.) */ que_id = rss_get_indirection_to_bucket(i); que_id = que_id % vsi->num_queues; #else que_id = j; #endif /* lut = 4-byte sliding window of 4 lut entries */ lut = (lut << 8) | (que_id & 0xF); /* On i = 3, we have 4 entries in lut; write to the register */ if ((i & 3) == 3) { wr32(hw, I40E_VFQF_HLUT(i), lut); DDPRINTF(sc->dev, "HLUT(%2d): %#010x", i, lut); } } ixl_flush(hw); } /* ** This routine refreshes vlan filters, called by init ** it scans the filter table and then updates the AQ */ static void ixlv_setup_vlan_filters(struct ixlv_sc *sc) { struct ixl_vsi *vsi = &sc->vsi; struct ixlv_vlan_filter *f; int cnt = 0; if (vsi->num_vlans == 0) return; /* ** Scan the filter table for vlan entries, ** and if found call for the AQ update. */ SLIST_FOREACH(f, sc->vlan_filters, next) if (f->flags & IXL_FILTER_ADD) cnt++; if (cnt > 0) ixl_vc_enqueue(&sc->vc_mgr, &sc->add_vlan_cmd, IXLV_FLAG_AQ_ADD_VLAN_FILTER, ixl_init_cmd_complete, sc); } /* ** This routine adds new MAC filters to the sc's list; ** these are later added in hardware by sending a virtual ** channel message. */ static int ixlv_add_mac_filter(struct ixlv_sc *sc, u8 *macaddr, u16 flags) { struct ixlv_mac_filter *f; /* Does one already exist? */ f = ixlv_find_mac_filter(sc, macaddr); if (f != NULL) { IDPRINTF(sc->vsi.ifp, "exists: " MAC_FORMAT, MAC_FORMAT_ARGS(macaddr)); return (EEXIST); } /* If not, get a new empty filter */ f = ixlv_get_mac_filter(sc); if (f == NULL) { if_printf(sc->vsi.ifp, "%s: no filters available!!\n", __func__); return (ENOMEM); } IDPRINTF(sc->vsi.ifp, "marked: " MAC_FORMAT, MAC_FORMAT_ARGS(macaddr)); bcopy(macaddr, f->macaddr, ETHER_ADDR_LEN); f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); f->flags |= flags; return (0); } /* ** Marks a MAC filter for deletion. */ static int ixlv_del_mac_filter(struct ixlv_sc *sc, u8 *macaddr) { struct ixlv_mac_filter *f; f = ixlv_find_mac_filter(sc, macaddr); if (f == NULL) return (ENOENT); f->flags |= IXL_FILTER_DEL; return (0); } /* ** Tasklet handler for MSIX Adminq interrupts ** - done outside interrupt context since it might sleep */ static void ixlv_do_adminq(void *context, int pending) { struct ixlv_sc *sc = context; mtx_lock(&sc->mtx); ixlv_do_adminq_locked(sc); mtx_unlock(&sc->mtx); return; } static void ixlv_do_adminq_locked(struct ixlv_sc *sc) { struct i40e_hw *hw = &sc->hw; struct i40e_arq_event_info event; struct i40e_virtchnl_msg *v_msg; device_t dev = sc->dev; u16 result = 0; u32 reg, oldreg; i40e_status ret; IXLV_CORE_LOCK_ASSERT(sc); event.buf_len = IXL_AQ_BUF_SZ; event.msg_buf = sc->aq_buffer; v_msg = (struct i40e_virtchnl_msg *)&event.desc; do { ret = i40e_clean_arq_element(hw, &event, &result); if (ret) break; ixlv_vc_completion(sc, v_msg->v_opcode, v_msg->v_retval, event.msg_buf, event.msg_len); if (result != 0) bzero(event.msg_buf, IXL_AQ_BUF_SZ); } while (result); /* check for Admin queue errors */ oldreg = reg = rd32(hw, hw->aq.arq.len); if (reg & I40E_VF_ARQLEN1_ARQVFE_MASK) { device_printf(dev, "ARQ VF Error detected\n"); reg &= ~I40E_VF_ARQLEN1_ARQVFE_MASK; } if (reg & I40E_VF_ARQLEN1_ARQOVFL_MASK) { device_printf(dev, "ARQ Overflow Error detected\n"); reg &= ~I40E_VF_ARQLEN1_ARQOVFL_MASK; } if (reg & I40E_VF_ARQLEN1_ARQCRIT_MASK) { device_printf(dev, "ARQ Critical Error detected\n"); reg &= ~I40E_VF_ARQLEN1_ARQCRIT_MASK; } if (oldreg != reg) wr32(hw, hw->aq.arq.len, reg); oldreg = reg = rd32(hw, hw->aq.asq.len); if (reg & I40E_VF_ATQLEN1_ATQVFE_MASK) { device_printf(dev, "ASQ VF Error detected\n"); reg &= ~I40E_VF_ATQLEN1_ATQVFE_MASK; } if (reg & I40E_VF_ATQLEN1_ATQOVFL_MASK) { device_printf(dev, "ASQ Overflow Error detected\n"); reg &= ~I40E_VF_ATQLEN1_ATQOVFL_MASK; } if (reg & I40E_VF_ATQLEN1_ATQCRIT_MASK) { device_printf(dev, "ASQ Critical Error detected\n"); reg &= ~I40E_VF_ATQLEN1_ATQCRIT_MASK; } if (oldreg != reg) wr32(hw, hw->aq.asq.len, reg); ixlv_enable_adminq_irq(hw); } static void ixlv_add_sysctls(struct ixlv_sc *sc) { device_t dev = sc->dev; struct ixl_vsi *vsi = &sc->vsi; struct i40e_eth_stats *es = &vsi->eth_stats; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct sysctl_oid *vsi_node, *queue_node; struct sysctl_oid_list *vsi_list, *queue_list; #define QUEUE_NAME_LEN 32 char queue_namebuf[QUEUE_NAME_LEN]; struct ixl_queue *queues = vsi->queues; struct tx_ring *txr; struct rx_ring *rxr; /* Driver statistics sysctls */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &sc->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "admin_irq", CTLFLAG_RD, &sc->admin_irq, "Admin Queue IRQ Handled"); /* VSI statistics sysctls */ vsi_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "vsi", CTLFLAG_RD, NULL, "VSI-specific statistics"); vsi_list = SYSCTL_CHILDREN(vsi_node); struct ixl_sysctl_info ctls[] = { {&es->rx_bytes, "good_octets_rcvd", "Good Octets Received"}, {&es->rx_unicast, "ucast_pkts_rcvd", "Unicast Packets Received"}, {&es->rx_multicast, "mcast_pkts_rcvd", "Multicast Packets Received"}, {&es->rx_broadcast, "bcast_pkts_rcvd", "Broadcast Packets Received"}, {&es->rx_discards, "rx_discards", "Discarded RX packets"}, {&es->rx_unknown_protocol, "rx_unknown_proto", "RX unknown protocol packets"}, {&es->tx_bytes, "good_octets_txd", "Good Octets Transmitted"}, {&es->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted"}, {&es->tx_multicast, "mcast_pkts_txd", "Multicast Packets Transmitted"}, {&es->tx_broadcast, "bcast_pkts_txd", "Broadcast Packets Transmitted"}, {&es->tx_errors, "tx_errors", "TX packet errors"}, // end {0,0,0} }; struct ixl_sysctl_info *entry = ctls; while (entry->stat != NULL) { SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, entry->name, CTLFLAG_RD, entry->stat, entry->description); entry++; } /* Queue sysctls */ for (int q = 0; q < vsi->num_queues; q++) { snprintf(queue_namebuf, QUEUE_NAME_LEN, "que%d", q); queue_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, queue_namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); txr = &(queues[q].txr); rxr = &(queues[q].rxr); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &(queues[q].mbuf_defrag_failed), "m_defrag() failed"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "dropped", CTLFLAG_RD, &(queues[q].dropped_pkts), "Driver dropped packets"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "irqs", CTLFLAG_RD, &(queues[q].irqs), "irqs on this queue"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tso_tx", CTLFLAG_RD, &(queues[q].tso), "TSO"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_dma_setup", CTLFLAG_RD, &(queues[q].tx_dma_setup), "Driver tx dma failure in xmit"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &(txr->no_desc), "Queue No Descriptor Available"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &(txr->total_packets), "Queue Packets Transmitted"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, &(txr->tx_bytes), "Queue Bytes Transmitted"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &(rxr->rx_packets), "Queue Packets Received"); SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &(rxr->rx_bytes), "Queue Bytes Received"); /* Examine queue state */ SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "qtx_head", CTLTYPE_UINT | CTLFLAG_RD, &queues[q], sizeof(struct ixl_queue), ixlv_sysctl_qtx_tail_handler, "IU", "Queue Transmit Descriptor Tail"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "qrx_head", CTLTYPE_UINT | CTLFLAG_RD, &queues[q], sizeof(struct ixl_queue), ixlv_sysctl_qrx_tail_handler, "IU", "Queue Receive Descriptor Tail"); } } static void ixlv_init_filters(struct ixlv_sc *sc) { sc->mac_filters = malloc(sizeof(struct ixlv_mac_filter), M_DEVBUF, M_NOWAIT | M_ZERO); SLIST_INIT(sc->mac_filters); sc->vlan_filters = malloc(sizeof(struct ixlv_vlan_filter), M_DEVBUF, M_NOWAIT | M_ZERO); SLIST_INIT(sc->vlan_filters); return; } static void ixlv_free_filters(struct ixlv_sc *sc) { struct ixlv_mac_filter *f; struct ixlv_vlan_filter *v; while (!SLIST_EMPTY(sc->mac_filters)) { f = SLIST_FIRST(sc->mac_filters); SLIST_REMOVE_HEAD(sc->mac_filters, next); free(f, M_DEVBUF); } while (!SLIST_EMPTY(sc->vlan_filters)) { v = SLIST_FIRST(sc->vlan_filters); SLIST_REMOVE_HEAD(sc->vlan_filters, next); free(v, M_DEVBUF); } return; } /** * ixlv_sysctl_qtx_tail_handler * Retrieves I40E_QTX_TAIL1 value from hardware * for a sysctl. */ static int ixlv_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS) { struct ixl_queue *que; int error; u32 val; que = ((struct ixl_queue *)oidp->oid_arg1); if (!que) return 0; val = rd32(que->vsi->hw, que->txr.tail); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /** * ixlv_sysctl_qrx_tail_handler * Retrieves I40E_QRX_TAIL1 value from hardware * for a sysctl. */ static int ixlv_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS) { struct ixl_queue *que; int error; u32 val; que = ((struct ixl_queue *)oidp->oid_arg1); if (!que) return 0; val = rd32(que->vsi->hw, que->rxr.tail); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); }