Index: stable/11/sys/dev/ixl/if_ixl.c =================================================================== --- stable/11/sys/dev/ixl/if_ixl.c (revision 343558) +++ stable/11/sys/dev/ixl/if_ixl.c (revision 343559) @@ -1,816 +1,810 @@ /****************************************************************************** Copyright (c) 2013-2017, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "ixl.h" #include "ixl_pf.h" #ifdef IXL_IW #include "ixl_iw.h" #include "ixl_iw_int.h" #endif #ifdef PCI_IOV #include "ixl_pf_iov.h" #endif /********************************************************************* * Driver version *********************************************************************/ #define IXL_DRIVER_VERSION_MAJOR 1 #define IXL_DRIVER_VERSION_MINOR 9 #define IXL_DRIVER_VERSION_BUILD 9 char ixl_driver_version[] = __XSTRING(IXL_DRIVER_VERSION_MAJOR) "." __XSTRING(IXL_DRIVER_VERSION_MINOR) "." __XSTRING(IXL_DRIVER_VERSION_BUILD) "-k"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into ixl_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static ixl_vendor_info_t ixl_vendor_info_array[] = { {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_XL710, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_B, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_C, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_A, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_B, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_C, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T4, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_X722, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_X722, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_X722, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_1G_BASE_T_X722, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T_X722, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_I_X722, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_25G_B, 0, 0, 0}, {I40E_INTEL_VENDOR_ID, I40E_DEV_ID_25G_SFP28, 0, 0, 0}, /* required last entry */ {0, 0, 0, 0, 0} }; /********************************************************************* * Table of branding strings *********************************************************************/ static char *ixl_strings[] = { "Intel(R) Ethernet Connection 700 Series PF Driver" }; /********************************************************************* * Function prototypes *********************************************************************/ static int ixl_probe(device_t); static int ixl_attach(device_t); static int ixl_detach(device_t); static int ixl_shutdown(device_t); static int ixl_save_pf_tunables(struct ixl_pf *); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t ixl_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ixl_probe), DEVMETHOD(device_attach, ixl_attach), DEVMETHOD(device_detach, ixl_detach), DEVMETHOD(device_shutdown, ixl_shutdown), #ifdef PCI_IOV DEVMETHOD(pci_iov_init, ixl_iov_init), DEVMETHOD(pci_iov_uninit, ixl_iov_uninit), DEVMETHOD(pci_iov_add_vf, ixl_add_vf), #endif {0, 0} }; static driver_t ixl_driver = { "ixl", ixl_methods, sizeof(struct ixl_pf), }; devclass_t ixl_devclass; DRIVER_MODULE(ixl, pci, ixl_driver, ixl_devclass, 0, 0); MODULE_VERSION(ixl, 1); MODULE_DEPEND(ixl, pci, 1, 1, 1); MODULE_DEPEND(ixl, ether, 1, 1, 1); #if defined(DEV_NETMAP) && __FreeBSD_version >= 1100000 MODULE_DEPEND(ixl, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ /* ** TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, ixl, CTLFLAG_RD, 0, "IXL driver parameters"); /* * MSIX should be the default for best performance, * but this allows it to be forced off for testing. */ static int ixl_enable_msix = 1; TUNABLE_INT("hw.ixl.enable_msix", &ixl_enable_msix); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixl_enable_msix, 0, "Enable MSI-X interrupts"); /* ** Number of descriptors per ring ** - TX and RX sizes are independently configurable */ static int ixl_tx_ring_size = IXL_DEFAULT_RING; TUNABLE_INT("hw.ixl.tx_ring_size", &ixl_tx_ring_size); SYSCTL_INT(_hw_ixl, OID_AUTO, tx_ring_size, CTLFLAG_RDTUN, &ixl_tx_ring_size, 0, "TX Descriptor Ring Size"); static int ixl_rx_ring_size = IXL_DEFAULT_RING; TUNABLE_INT("hw.ixl.rx_ring_size", &ixl_rx_ring_size); SYSCTL_INT(_hw_ixl, OID_AUTO, rx_ring_size, CTLFLAG_RDTUN, &ixl_rx_ring_size, 0, "RX Descriptor Ring Size"); /* ** This can be set manually, if left as 0 the ** number of queues will be calculated based ** on cpus and msix vectors available. */ static int ixl_max_queues = 0; TUNABLE_INT("hw.ixl.max_queues", &ixl_max_queues); SYSCTL_INT(_hw_ixl, OID_AUTO, max_queues, CTLFLAG_RDTUN, &ixl_max_queues, 0, "Number of Queues"); /* * Leave this on unless you need to send flow control * frames (or other control frames) from software */ static int ixl_enable_tx_fc_filter = 1; TUNABLE_INT("hw.ixl.enable_tx_fc_filter", &ixl_enable_tx_fc_filter); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_tx_fc_filter, CTLFLAG_RDTUN, &ixl_enable_tx_fc_filter, 0, "Filter out packets with Ethertype 0x8808 from being sent out by non-HW sources"); /* * Different method for processing TX descriptor * completion. */ static int ixl_enable_head_writeback = 1; TUNABLE_INT("hw.ixl.enable_head_writeback", &ixl_enable_head_writeback); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_head_writeback, CTLFLAG_RDTUN, &ixl_enable_head_writeback, 0, "For detecting last completed TX descriptor by hardware, use value written by HW instead of checking descriptors"); static int ixl_core_debug_mask = 0; TUNABLE_INT("hw.ixl.core_debug_mask", &ixl_core_debug_mask); SYSCTL_INT(_hw_ixl, OID_AUTO, core_debug_mask, CTLFLAG_RDTUN, &ixl_core_debug_mask, 0, "Display debug statements that are printed in non-shared code"); static int ixl_shared_debug_mask = 0; TUNABLE_INT("hw.ixl.shared_debug_mask", &ixl_shared_debug_mask); SYSCTL_INT(_hw_ixl, OID_AUTO, shared_debug_mask, CTLFLAG_RDTUN, &ixl_shared_debug_mask, 0, "Display debug statements that are printed in shared code"); /* ** Controls for Interrupt Throttling ** - true/false for dynamic adjustment ** - default values for static ITR */ static int ixl_dynamic_rx_itr = 1; TUNABLE_INT("hw.ixl.dynamic_rx_itr", &ixl_dynamic_rx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, dynamic_rx_itr, CTLFLAG_RDTUN, &ixl_dynamic_rx_itr, 0, "Dynamic RX Interrupt Rate"); static int ixl_dynamic_tx_itr = 1; TUNABLE_INT("hw.ixl.dynamic_tx_itr", &ixl_dynamic_tx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, dynamic_tx_itr, CTLFLAG_RDTUN, &ixl_dynamic_tx_itr, 0, "Dynamic TX Interrupt Rate"); static int ixl_rx_itr = IXL_ITR_8K; TUNABLE_INT("hw.ixl.rx_itr", &ixl_rx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, rx_itr, CTLFLAG_RDTUN, &ixl_rx_itr, 0, "RX Interrupt Rate"); static int ixl_tx_itr = IXL_ITR_4K; TUNABLE_INT("hw.ixl.tx_itr", &ixl_tx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, tx_itr, CTLFLAG_RDTUN, &ixl_tx_itr, 0, "TX Interrupt Rate"); #ifdef IXL_IW int ixl_enable_iwarp = 0; TUNABLE_INT("hw.ixl.enable_iwarp", &ixl_enable_iwarp); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_iwarp, CTLFLAG_RDTUN, &ixl_enable_iwarp, 0, "iWARP enabled"); #if __FreeBSD_version < 1100000 int ixl_limit_iwarp_msix = 1; #else int ixl_limit_iwarp_msix = IXL_IW_MAX_MSIX; #endif TUNABLE_INT("hw.ixl.limit_iwarp_msix", &ixl_limit_iwarp_msix); SYSCTL_INT(_hw_ixl, OID_AUTO, limit_iwarp_msix, CTLFLAG_RDTUN, &ixl_limit_iwarp_msix, 0, "Limit MSIX vectors assigned to iWARP"); #endif #ifdef DEV_NETMAP #define NETMAP_IXL_MAIN /* only bring in one part of the netmap code */ #include #endif /* DEV_NETMAP */ /********************************************************************* * Device identification routine * * ixl_probe determines if the driver should be loaded on * the hardware based on PCI vendor/device id of the device. * * return BUS_PROBE_DEFAULT on success, positive on failure *********************************************************************/ static int ixl_probe(device_t dev) { ixl_vendor_info_t *ent; u16 pci_vendor_id, pci_device_id; u16 pci_subvendor_id, pci_subdevice_id; char device_name[256]; #if 0 INIT_DEBUGOUT("ixl_probe: begin"); #endif pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != I40E_INTEL_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = ixl_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == 0)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == 0))) { sprintf(device_name, "%s, Version - %s", ixl_strings[ent->index], ixl_driver_version); device_set_desc_copy(dev, device_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /* * Sanity check and save off tunable values. */ static int ixl_save_pf_tunables(struct ixl_pf *pf) { device_t dev = pf->dev; /* Save tunable information */ pf->enable_msix = ixl_enable_msix; pf->max_queues = ixl_max_queues; pf->enable_tx_fc_filter = ixl_enable_tx_fc_filter; pf->dynamic_rx_itr = ixl_dynamic_rx_itr; pf->dynamic_tx_itr = ixl_dynamic_tx_itr; pf->dbg_mask = ixl_core_debug_mask; pf->hw.debug_mask = ixl_shared_debug_mask; #ifdef DEV_NETMAP if (ixl_enable_head_writeback == 0) device_printf(dev, "Head writeback mode cannot be disabled " "when netmap is enabled\n"); pf->vsi.enable_head_writeback = 1; #else pf->vsi.enable_head_writeback = !!(ixl_enable_head_writeback); #endif ixl_vsi_setup_rings_size(&pf->vsi, ixl_tx_ring_size, ixl_rx_ring_size); if (ixl_tx_itr < 0 || ixl_tx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid tx_itr value of %d set!\n", ixl_tx_itr); device_printf(dev, "tx_itr must be between %d and %d, " "inclusive\n", 0, IXL_MAX_ITR); device_printf(dev, "Using default value of %d instead\n", IXL_ITR_4K); pf->tx_itr = IXL_ITR_4K; } else pf->tx_itr = ixl_tx_itr; if (ixl_rx_itr < 0 || ixl_rx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid rx_itr value of %d set!\n", ixl_rx_itr); device_printf(dev, "rx_itr must be between %d and %d, " "inclusive\n", 0, IXL_MAX_ITR); device_printf(dev, "Using default value of %d instead\n", IXL_ITR_8K); pf->rx_itr = IXL_ITR_8K; } else pf->rx_itr = ixl_rx_itr; return (0); } /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int ixl_attach(device_t dev) { struct ixl_pf *pf; struct i40e_hw *hw; struct ixl_vsi *vsi; enum i40e_status_code status; int error = 0; INIT_DEBUGOUT("ixl_attach: begin"); /* Allocate, clear, and link in our primary soft structure */ pf = device_get_softc(dev); pf->dev = pf->osdep.dev = dev; hw = &pf->hw; /* ** Note this assumes we have a single embedded VSI, ** this could be enhanced later to allocate multiple */ vsi = &pf->vsi; vsi->dev = pf->dev; vsi->back = pf; /* Save tunable values */ error = ixl_save_pf_tunables(pf); if (error) return (error); /* Core Lock Init*/ IXL_PF_LOCK_INIT(pf, device_get_nameunit(dev)); /* Set up the timer callout */ callout_init_mtx(&pf->timer, &pf->pf_mtx, 0); /* Do PCI setup - map BAR0, etc */ if (ixl_allocate_pci_resources(pf)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_out; } /* Establish a clean starting point */ i40e_clear_hw(hw); status = i40e_pf_reset(hw); if (status) { device_printf(dev, "PF reset failure %s\n", i40e_stat_str(hw, status)); error = EIO; goto err_out; } /* Initialize the shared code */ status = i40e_init_shared_code(hw); if (status) { device_printf(dev, "Unable to initialize shared code, error %s\n", i40e_stat_str(hw, status)); error = EIO; goto err_out; } /* Set up the admin queue */ hw->aq.num_arq_entries = IXL_AQ_LEN; hw->aq.num_asq_entries = IXL_AQ_LEN; hw->aq.arq_buf_size = IXL_AQ_BUF_SZ; hw->aq.asq_buf_size = IXL_AQ_BUF_SZ; status = i40e_init_adminq(hw); if (status != 0 && status != I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "Unable to initialize Admin Queue, error %s\n", i40e_stat_str(hw, status)); error = EIO; goto err_out; } ixl_print_nvm_version(pf); if (status == I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "The driver for the device stopped " "because the NVM image is newer than expected.\n"); device_printf(dev, "You must install the most recent version of " "the network driver.\n"); error = EIO; goto err_out; } if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw)) { device_printf(dev, "The driver for the device detected " "a newer version of the NVM image than expected.\n"); device_printf(dev, "Please install the most recent version " "of the network driver.\n"); } else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4) { device_printf(dev, "The driver for the device detected " "an older version of the NVM image than expected.\n"); device_printf(dev, "Please update the NVM image.\n"); } /* Clear PXE mode */ i40e_clear_pxe_mode(hw); /* Get capabilities from the device */ error = ixl_get_hw_capabilities(pf); if (error) { device_printf(dev, "HW capabilities failure!\n"); goto err_get_cap; } /* * Allocate interrupts and figure out number of queues to use * for PF interface */ pf->msix = ixl_init_msix(pf); /* Set up host memory cache */ status = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (status) { device_printf(dev, "init_lan_hmc failed: %s\n", i40e_stat_str(hw, status)); goto err_get_cap; } status = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (status) { device_printf(dev, "configure_lan_hmc failed: %s\n", i40e_stat_str(hw, status)); goto err_mac_hmc; } /* Init queue allocation manager */ error = ixl_pf_qmgr_init(&pf->qmgr, hw->func_caps.num_tx_qp); if (error) { device_printf(dev, "Failed to init queue manager for PF queues, error %d\n", error); goto err_mac_hmc; } /* reserve a contiguous allocation for the PF's VSI */ error = ixl_pf_qmgr_alloc_contiguous(&pf->qmgr, vsi->num_queues, &pf->qtag); if (error) { device_printf(dev, "Failed to reserve queues for PF LAN VSI, error %d\n", error); goto err_mac_hmc; } device_printf(dev, "Allocating %d queues for PF LAN VSI; %d queues active\n", pf->qtag.num_allocated, pf->qtag.num_active); /* Disable LLDP from the firmware for certain NVM versions */ if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) || (pf->hw.aq.fw_maj_ver < 4)) { i40e_aq_stop_lldp(hw, TRUE, NULL); pf->state |= IXL_PF_STATE_FW_LLDP_DISABLED; } /* Get MAC addresses from hardware */ i40e_get_mac_addr(hw, hw->mac.addr); error = i40e_validate_mac_addr(hw->mac.addr); if (error) { device_printf(dev, "validate_mac_addr failed: %d\n", error); goto err_mac_hmc; } bcopy(hw->mac.addr, hw->mac.perm_addr, ETHER_ADDR_LEN); i40e_get_port_mac_addr(hw, hw->mac.port_addr); /* Query device FW LLDP status */ ixl_get_fw_lldp_status(pf); /* Tell FW to apply DCB config on link up */ if ((hw->mac.type != I40E_MAC_X722) && ((pf->hw.aq.api_maj_ver > 1) || (pf->hw.aq.api_maj_ver == 1 && pf->hw.aq.api_min_ver >= 7))) i40e_aq_set_dcb_parameters(hw, true, NULL); /* Initialize mac filter list for VSI */ SLIST_INIT(&vsi->ftl); /* Set up SW VSI and allocate queue memory and rings */ if (ixl_setup_stations(pf)) { device_printf(dev, "setup stations failed!\n"); error = ENOMEM; goto err_mac_hmc; } /* Setup OS network interface / ifnet */ if (ixl_setup_interface(dev, vsi)) { device_printf(dev, "interface setup failed!\n"); error = EIO; goto err_late; } /* Determine link state */ if (ixl_attach_get_link_status(pf)) { error = EINVAL; goto err_late; } error = ixl_switch_config(pf); if (error) { device_printf(dev, "Initial ixl_switch_config() failed: %d\n", error); goto err_late; } /* Limit PHY interrupts to link, autoneg, and modules failure */ status = i40e_aq_set_phy_int_mask(hw, IXL_DEFAULT_PHY_INT_MASK, NULL); if (status) { device_printf(dev, "i40e_aq_set_phy_mask() failed: err %s," " aq_err %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); goto err_late; } /* Get the bus configuration and set the shared code's config */ ixl_get_bus_info(pf); /* * In MSI-X mode, initialize the Admin Queue interrupt, * so userland tools can communicate with the adapter regardless of * the ifnet interface's status. */ if (pf->msix > 1) { error = ixl_setup_adminq_msix(pf); if (error) { device_printf(dev, "ixl_setup_adminq_msix() error: %d\n", error); goto err_late; } error = ixl_setup_adminq_tq(pf); if (error) { device_printf(dev, "ixl_setup_adminq_tq() error: %d\n", error); goto err_late; } ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); error = ixl_setup_queue_msix(vsi); if (error) device_printf(dev, "ixl_setup_queue_msix() error: %d\n", error); error = ixl_setup_queue_tqs(vsi); if (error) device_printf(dev, "ixl_setup_queue_tqs() error: %d\n", error); } else { error = ixl_setup_legacy(pf); error = ixl_setup_adminq_tq(pf); if (error) { device_printf(dev, "ixl_setup_adminq_tq() error: %d\n", error); goto err_late; } error = ixl_setup_queue_tqs(vsi); if (error) device_printf(dev, "ixl_setup_queue_tqs() error: %d\n", error); } if (error) { device_printf(dev, "interrupt setup error: %d\n", error); } /* Set initial advertised speed sysctl value */ ixl_set_initial_advertised_speeds(pf); /* Initialize statistics & add sysctls */ ixl_add_device_sysctls(pf); ixl_pf_reset_stats(pf); ixl_update_stats_counters(pf); ixl_add_hw_stats(pf); /* Register for VLAN events */ vsi->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, ixl_register_vlan, vsi, EVENTHANDLER_PRI_FIRST); vsi->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, ixl_unregister_vlan, vsi, EVENTHANDLER_PRI_FIRST); #ifdef PCI_IOV ixl_initialize_sriov(pf); #endif #ifdef DEV_NETMAP - if (vsi->num_rx_desc == vsi->num_tx_desc) { - vsi->queues[0].num_desc = vsi->num_rx_desc; - ixl_netmap_attach(vsi); - } else - device_printf(dev, - "Netmap is not supported when RX and TX descriptor ring sizes differ\n"); - + ixl_netmap_attach(vsi); #endif /* DEV_NETMAP */ #ifdef IXL_IW if (hw->func_caps.iwarp && ixl_enable_iwarp) { pf->iw_enabled = (pf->iw_msix > 0) ? true : false; if (pf->iw_enabled) { error = ixl_iw_pf_attach(pf); if (error) { device_printf(dev, "interfacing to iwarp driver failed: %d\n", error); goto err_late; } else device_printf(dev, "iWARP ready\n"); } else device_printf(dev, "iwarp disabled on this device (no msix vectors)\n"); } else { pf->iw_enabled = false; device_printf(dev, "The device is not iWARP enabled\n"); } #endif INIT_DEBUGOUT("ixl_attach: end"); return (0); err_late: if (vsi->ifp != NULL) { ether_ifdetach(vsi->ifp); if_free(vsi->ifp); } err_mac_hmc: i40e_shutdown_lan_hmc(hw); err_get_cap: i40e_shutdown_adminq(hw); err_out: ixl_free_pci_resources(pf); ixl_free_vsi(vsi); IXL_PF_LOCK_DESTROY(pf); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int ixl_detach(device_t dev) { struct ixl_pf *pf = device_get_softc(dev); struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; enum i40e_status_code status; #if defined(PCI_IOV) || defined(IXL_IW) int error; #endif INIT_DEBUGOUT("ixl_detach: begin"); /* Make sure VLANS are not using driver */ if (vsi->ifp->if_vlantrunk != NULL) { device_printf(dev, "Vlan in use, detach first\n"); return (EBUSY); } #ifdef PCI_IOV error = pci_iov_detach(dev); if (error != 0) { device_printf(dev, "SR-IOV in use; detach first.\n"); return (error); } #endif /* Remove all previously allocated media types */ ifmedia_removeall(&vsi->media); ether_ifdetach(vsi->ifp); if (vsi->ifp->if_drv_flags & IFF_DRV_RUNNING) ixl_stop(pf); /* Shutdown LAN HMC */ status = i40e_shutdown_lan_hmc(hw); if (status) device_printf(dev, "Shutdown LAN HMC failed with code %d\n", status); /* Teardown LAN queue resources */ ixl_teardown_queue_msix(vsi); ixl_free_queue_tqs(vsi); /* Shutdown admin queue */ ixl_disable_intr0(hw); ixl_teardown_adminq_msix(pf); ixl_free_adminq_tq(pf); status = i40e_shutdown_adminq(hw); if (status) device_printf(dev, "Shutdown Admin queue failed with code %d\n", status); /* Unregister VLAN events */ if (vsi->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, vsi->vlan_attach); if (vsi->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, vsi->vlan_detach); callout_drain(&pf->timer); #ifdef IXL_IW if (ixl_enable_iwarp && pf->iw_enabled) { error = ixl_iw_pf_detach(pf); if (error == EBUSY) { device_printf(dev, "iwarp in use; stop it first.\n"); return (error); } } #endif #ifdef DEV_NETMAP netmap_detach(vsi->ifp); #endif /* DEV_NETMAP */ ixl_pf_qmgr_destroy(&pf->qmgr); ixl_free_pci_resources(pf); bus_generic_detach(dev); if_free(vsi->ifp); ixl_free_vsi(vsi); IXL_PF_LOCK_DESTROY(pf); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int ixl_shutdown(device_t dev) { struct ixl_pf *pf = device_get_softc(dev); ixl_stop(pf); return (0); } Index: stable/11/sys/dev/ixl/ixl.h =================================================================== --- stable/11/sys/dev/ixl/ixl.h (revision 343558) +++ stable/11/sys/dev/ixl/ixl.h (revision 343559) @@ -1,712 +1,709 @@ /****************************************************************************** Copyright (c) 2013-2017, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _IXL_H_ #define _IXL_H_ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #include "opt_ixl.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #include #endif #include "i40e_type.h" #include "i40e_prototype.h" #define MAC_FORMAT "%02x:%02x:%02x:%02x:%02x:%02x" #define MAC_FORMAT_ARGS(mac_addr) \ (mac_addr)[0], (mac_addr)[1], (mac_addr)[2], (mac_addr)[3], \ (mac_addr)[4], (mac_addr)[5] #define ON_OFF_STR(is_set) ((is_set) ? "On" : "Off") #ifdef IXL_DEBUG #define _DBG_PRINTF(S, ...) printf("%s: " S "\n", __func__, ##__VA_ARGS__) #define _DEV_DBG_PRINTF(dev, S, ...) device_printf(dev, "%s: " S "\n", __func__, ##__VA_ARGS__) #define _IF_DBG_PRINTF(ifp, S, ...) if_printf(ifp, "%s: " S "\n", __func__, ##__VA_ARGS__) /* Defines for printing generic debug information */ #define DPRINTF(...) _DBG_PRINTF(__VA_ARGS__) #define DDPRINTF(...) _DEV_DBG_PRINTF(__VA_ARGS__) #define IDPRINTF(...) _IF_DBG_PRINTF(__VA_ARGS__) /* Defines for printing specific debug information */ #define DEBUG_INIT 1 #define DEBUG_IOCTL 1 #define DEBUG_HW 1 #define INIT_DEBUGOUT(...) if (DEBUG_INIT) _DBG_PRINTF(__VA_ARGS__) #define INIT_DBG_DEV(...) if (DEBUG_INIT) _DEV_DBG_PRINTF(__VA_ARGS__) #define INIT_DBG_IF(...) if (DEBUG_INIT) _IF_DBG_PRINTF(__VA_ARGS__) #define IOCTL_DEBUGOUT(...) if (DEBUG_IOCTL) _DBG_PRINTF(__VA_ARGS__) #define IOCTL_DBG_IF2(ifp, S, ...) if (DEBUG_IOCTL) \ if_printf(ifp, S "\n", ##__VA_ARGS__) #define IOCTL_DBG_IF(...) if (DEBUG_IOCTL) _IF_DBG_PRINTF(__VA_ARGS__) #define HW_DEBUGOUT(...) if (DEBUG_HW) _DBG_PRINTF(__VA_ARGS__) #else /* no IXL_DEBUG */ #define DEBUG_INIT 0 #define DEBUG_IOCTL 0 #define DEBUG_HW 0 #define DPRINTF(...) #define DDPRINTF(...) #define IDPRINTF(...) #define INIT_DEBUGOUT(...) #define INIT_DBG_DEV(...) #define INIT_DBG_IF(...) #define IOCTL_DEBUGOUT(...) #define IOCTL_DBG_IF2(...) #define IOCTL_DBG_IF(...) #define HW_DEBUGOUT(...) #endif /* IXL_DEBUG */ enum ixl_dbg_mask { IXL_DBG_INFO = 0x00000001, IXL_DBG_EN_DIS = 0x00000002, IXL_DBG_AQ = 0x00000004, IXL_DBG_NVMUPD = 0x00000008, IXL_DBG_IOCTL_KNOWN = 0x00000010, IXL_DBG_IOCTL_UNKNOWN = 0x00000020, IXL_DBG_IOCTL_ALL = 0x00000030, I40E_DEBUG_RSS = 0x00000100, IXL_DBG_IOV = 0x00001000, IXL_DBG_IOV_VC = 0x00002000, IXL_DBG_SWITCH_INFO = 0x00010000, IXL_DBG_I2C = 0x00020000, IXL_DBG_ALL = 0xFFFFFFFF }; /* Tunables */ /* * Ring Descriptors Valid Range: 32-4096 Default Value: 1024 This value is the * number of tx/rx descriptors allocated by the driver. Increasing this * value allows the driver to queue more operations. * * Tx descriptors are always 16 bytes, but Rx descriptors can be 32 bytes. * The driver currently always uses 32 byte Rx descriptors. */ #define IXL_DEFAULT_RING 1024 #define IXL_MAX_RING 4096 #define IXL_MIN_RING 64 #define IXL_RING_INCREMENT 32 #define IXL_AQ_LEN 256 #define IXL_AQ_LEN_MAX 1024 /* ** Default number of entries in Tx queue buf_ring. */ #define DEFAULT_TXBRSZ 4096 /* Alignment for rings */ #define DBA_ALIGN 128 /* * This is the max watchdog interval, ie. the time that can * pass between any two TX clean operations, such only happening * when the TX hardware is functioning. * * XXX: Watchdog currently counts down in units of (hz) * Set this to just (hz) if you want queues to hang under a little bit of stress */ #define IXL_WATCHDOG (10 * hz) /* * This parameters control when the driver calls the routine to reclaim * transmit descriptors. */ #define IXL_TX_CLEANUP_THRESHOLD (que->num_tx_desc / 8) #define IXL_TX_OP_THRESHOLD (que->num_tx_desc / 32) #define MAX_MULTICAST_ADDR 128 #define IXL_MSIX_BAR 3 #define IXL_ADM_LIMIT 2 #define IXL_TSO_SIZE 65535 #define IXL_AQ_BUF_SZ ((u32) 4096) #define IXL_RX_HDR 128 #define IXL_RX_LIMIT 512 #define IXL_RX_ITR 0 #define IXL_TX_ITR 1 #define IXL_ITR_NONE 3 #define IXL_QUEUE_EOL 0x7FF #define IXL_MAX_FRAME 9728 #define IXL_MAX_TX_SEGS 8 #define IXL_MAX_TSO_SEGS 128 #define IXL_SPARSE_CHAIN 7 #define IXL_QUEUE_HUNG 0x80000000 #define IXL_MIN_TSO_MSS 64 #define IXL_MAX_DMA_SEG_SIZE ((16 * 1024) - 1) #define IXL_RSS_KEY_SIZE_REG 13 #define IXL_RSS_KEY_SIZE (IXL_RSS_KEY_SIZE_REG * 4) #define IXL_RSS_VSI_LUT_SIZE 64 /* X722 -> VSI, X710 -> VF */ #define IXL_RSS_VSI_LUT_ENTRY_MASK 0x3F #define IXL_RSS_VF_LUT_ENTRY_MASK 0xF #define IXL_VF_MAX_BUFFER 0x3F80 #define IXL_VF_MAX_HDR_BUFFER 0x840 #define IXL_VF_MAX_FRAME 0x3FFF /* ERJ: hardware can support ~2k (SW5+) filters between all functions */ #define IXL_MAX_FILTERS 256 #define IXL_MAX_TX_BUSY 10 #define IXL_NVM_VERSION_LO_SHIFT 0 #define IXL_NVM_VERSION_LO_MASK (0xff << IXL_NVM_VERSION_LO_SHIFT) #define IXL_NVM_VERSION_HI_SHIFT 12 #define IXL_NVM_VERSION_HI_MASK (0xf << IXL_NVM_VERSION_HI_SHIFT) /* * Interrupt Moderation parameters * Multiply ITR values by 2 for real ITR value */ #define IXL_MAX_ITR 0x0FF0 #define IXL_ITR_100K 0x0005 #define IXL_ITR_20K 0x0019 #define IXL_ITR_8K 0x003E #define IXL_ITR_4K 0x007A #define IXL_ITR_1K 0x01F4 #define IXL_ITR_DYNAMIC 0x8000 #define IXL_LOW_LATENCY 0 #define IXL_AVE_LATENCY 1 #define IXL_BULK_LATENCY 2 /* MacVlan Flags */ #define IXL_FILTER_USED (u16)(1 << 0) #define IXL_FILTER_VLAN (u16)(1 << 1) #define IXL_FILTER_ADD (u16)(1 << 2) #define IXL_FILTER_DEL (u16)(1 << 3) #define IXL_FILTER_MC (u16)(1 << 4) /* used in the vlan field of the filter when not a vlan */ #define IXL_VLAN_ANY -1 #define CSUM_OFFLOAD_IPV4 (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP) #define CSUM_OFFLOAD_IPV6 (CSUM_TCP_IPV6|CSUM_UDP_IPV6|CSUM_SCTP_IPV6) #define CSUM_OFFLOAD (CSUM_OFFLOAD_IPV4|CSUM_OFFLOAD_IPV6|CSUM_TSO) /* Misc flags for ixl_vsi.flags */ #define IXL_FLAGS_KEEP_TSO4 (1 << 0) #define IXL_FLAGS_KEEP_TSO6 (1 << 1) #define IXL_FLAGS_USES_MSIX (1 << 2) #define IXL_FLAGS_IS_VF (1 << 3) #define IXL_VF_RESET_TIMEOUT 100 #define IXL_VSI_DATA_PORT 0x01 #define IXLV_MAX_QUEUES 16 #define IXL_MAX_VSI_QUEUES (2 * (I40E_VSILAN_QTABLE_MAX_INDEX + 1)) #define IXL_RX_CTX_BASE_UNITS 128 #define IXL_TX_CTX_BASE_UNITS 128 #define IXL_VPINT_LNKLSTN_REG(hw, vector, vf_num) \ I40E_VPINT_LNKLSTN(((vector) - 1) + \ (((hw)->func_caps.num_msix_vectors_vf - 1) * (vf_num))) #define IXL_VFINT_DYN_CTLN_REG(hw, vector, vf_num) \ I40E_VFINT_DYN_CTLN(((vector) - 1) + \ (((hw)->func_caps.num_msix_vectors_vf - 1) * (vf_num))) #define IXL_PF_PCI_CIAA_VF_DEVICE_STATUS 0xAA #define IXL_PF_PCI_CIAD_VF_TRANS_PENDING_MASK 0x20 #define IXL_GLGEN_VFLRSTAT_INDEX(glb_vf) ((glb_vf) / 32) #define IXL_GLGEN_VFLRSTAT_MASK(glb_vf) (1 << ((glb_vf) % 32)) #define IXL_MAX_ITR_IDX 3 #define IXL_END_OF_INTR_LNKLST 0x7FF #define IXL_DEFAULT_RSS_HENA_BASE (\ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_SCTP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) | \ BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_SCTP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) | \ BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6) | \ BIT_ULL(I40E_FILTER_PCTYPE_L2_PAYLOAD)) #define IXL_DEFAULT_RSS_HENA_XL710 IXL_DEFAULT_RSS_HENA_BASE #define IXL_DEFAULT_RSS_HENA_X722 (\ IXL_DEFAULT_RSS_HENA_BASE | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK) | \ BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK)) #define IXL_TX_LOCK(_sc) mtx_lock(&(_sc)->mtx) #define IXL_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->mtx) #define IXL_TX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->mtx) #define IXL_TX_TRYLOCK(_sc) mtx_trylock(&(_sc)->mtx) #define IXL_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->mtx, MA_OWNED) #define IXL_RX_LOCK(_sc) mtx_lock(&(_sc)->mtx) #define IXL_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->mtx) #define IXL_RX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->mtx) /* Pre-11 counter(9) compatibility */ #if __FreeBSD_version >= 1100036 #define IXL_SET_IPACKETS(vsi, count) (vsi)->ipackets = (count) #define IXL_SET_IERRORS(vsi, count) (vsi)->ierrors = (count) #define IXL_SET_OPACKETS(vsi, count) (vsi)->opackets = (count) #define IXL_SET_OERRORS(vsi, count) (vsi)->oerrors = (count) #define IXL_SET_COLLISIONS(vsi, count) /* Do nothing; collisions is always 0. */ #define IXL_SET_IBYTES(vsi, count) (vsi)->ibytes = (count) #define IXL_SET_OBYTES(vsi, count) (vsi)->obytes = (count) #define IXL_SET_IMCASTS(vsi, count) (vsi)->imcasts = (count) #define IXL_SET_OMCASTS(vsi, count) (vsi)->omcasts = (count) #define IXL_SET_IQDROPS(vsi, count) (vsi)->iqdrops = (count) #define IXL_SET_OQDROPS(vsi, count) (vsi)->oqdrops = (count) #define IXL_SET_NOPROTO(vsi, count) (vsi)->noproto = (count) #else #define IXL_SET_IPACKETS(vsi, count) (vsi)->ifp->if_ipackets = (count) #define IXL_SET_IERRORS(vsi, count) (vsi)->ifp->if_ierrors = (count) #define IXL_SET_OPACKETS(vsi, count) (vsi)->ifp->if_opackets = (count) #define IXL_SET_OERRORS(vsi, count) (vsi)->ifp->if_oerrors = (count) #define IXL_SET_COLLISIONS(vsi, count) (vsi)->ifp->if_collisions = (count) #define IXL_SET_IBYTES(vsi, count) (vsi)->ifp->if_ibytes = (count) #define IXL_SET_OBYTES(vsi, count) (vsi)->ifp->if_obytes = (count) #define IXL_SET_IMCASTS(vsi, count) (vsi)->ifp->if_imcasts = (count) #define IXL_SET_OMCASTS(vsi, count) (vsi)->ifp->if_omcasts = (count) #define IXL_SET_IQDROPS(vsi, count) (vsi)->ifp->if_iqdrops = (count) #define IXL_SET_OQDROPS(vsi, odrops) (vsi)->ifp->if_snd.ifq_drops = (odrops) #define IXL_SET_NOPROTO(vsi, count) (vsi)->noproto = (count) #endif /* ***************************************************************************** * vendor_info_array * * This array contains the list of Subvendor/Subdevice IDs on which the driver * should load. * ***************************************************************************** */ typedef struct _ixl_vendor_info_t { unsigned int vendor_id; unsigned int device_id; unsigned int subvendor_id; unsigned int subdevice_id; unsigned int index; } ixl_vendor_info_t; struct ixl_tx_buf { u32 eop_index; struct mbuf *m_head; bus_dmamap_t map; bus_dma_tag_t tag; }; struct ixl_rx_buf { struct mbuf *m_head; struct mbuf *m_pack; struct mbuf *fmp; bus_dmamap_t hmap; bus_dmamap_t pmap; }; /* ** This struct has multiple uses, multicast ** addresses, vlans, and mac filters all use it. */ struct ixl_mac_filter { SLIST_ENTRY(ixl_mac_filter) next; u8 macaddr[ETHER_ADDR_LEN]; s16 vlan; u16 flags; }; /* * The Transmit ring control struct */ struct tx_ring { struct ixl_queue *que; struct mtx mtx; u32 tail; struct i40e_tx_desc *base; struct i40e_dma_mem dma; u16 next_avail; u16 next_to_clean; u16 atr_rate; u16 atr_count; u32 itr; u32 latency; struct ixl_tx_buf *buffers; volatile u16 avail; u32 cmd; bus_dma_tag_t tx_tag; bus_dma_tag_t tso_tag; char mtx_name[16]; struct buf_ring *br; s32 watchdog_timer; /* Used for Dynamic ITR calculation */ u32 packets; u32 bytes; /* Soft Stats */ u64 tx_bytes; u64 no_desc; u64 total_packets; }; /* * The Receive ring control struct */ struct rx_ring { struct ixl_queue *que; struct mtx mtx; union i40e_rx_desc *base; struct i40e_dma_mem dma; struct lro_ctrl lro; bool lro_enabled; bool hdr_split; bool discard; u32 next_refresh; u32 next_check; u32 itr; u32 latency; char mtx_name[16]; struct ixl_rx_buf *buffers; u32 mbuf_sz; u32 tail; bus_dma_tag_t htag; bus_dma_tag_t ptag; /* Used for Dynamic ITR calculation */ u32 packets; u32 bytes; /* Soft stats */ u64 split; u64 rx_packets; u64 rx_bytes; u64 desc_errs; u64 not_done; }; /* ** Driver queue struct: this is the interrupt container ** for the associated tx and rx ring pair. */ struct ixl_queue { struct ixl_vsi *vsi; u32 me; u32 msix; /* This queue's MSIX vector */ u32 eims; /* This queue's EIMS bit */ struct resource *res; void *tag; int num_tx_desc; /* both tx and rx */ int num_rx_desc; /* both tx and rx */ -#ifdef DEV_NETMAP - int num_desc; /* for compatibility with current netmap code in kernel */ -#endif struct tx_ring txr; struct rx_ring rxr; struct task task; struct task tx_task; struct taskqueue *tq; /* Queue stats */ u64 irqs; u64 tso; u64 mbuf_defrag_failed; u64 mbuf_hdr_failed; u64 mbuf_pkt_failed; u64 tx_dmamap_failed; u64 dropped_pkts; u64 mss_too_small; }; /* ** Virtual Station Interface */ SLIST_HEAD(ixl_ftl_head, ixl_mac_filter); struct ixl_vsi { void *back; struct ifnet *ifp; struct device *dev; struct i40e_hw *hw; struct ifmedia media; enum i40e_vsi_type type; int id; u16 num_queues; int num_tx_desc; int num_rx_desc; u32 rx_itr_setting; u32 tx_itr_setting; u16 max_frame_size; bool enable_head_writeback; struct ixl_queue *queues; /* head of queues */ u16 vsi_num; bool link_active; u16 seid; u16 uplink_seid; u16 downlink_seid; /* MAC/VLAN Filter list */ struct ixl_ftl_head ftl; u16 num_macs; /* Contains readylist & stat counter id */ struct i40e_aqc_vsi_properties_data info; eventhandler_tag vlan_attach; eventhandler_tag vlan_detach; u16 num_vlans; /* Per-VSI stats from hardware */ struct i40e_eth_stats eth_stats; struct i40e_eth_stats eth_stats_offsets; bool stat_offsets_loaded; /* VSI stat counters */ u64 ipackets; u64 ierrors; u64 opackets; u64 oerrors; u64 ibytes; u64 obytes; u64 imcasts; u64 omcasts; u64 iqdrops; u64 oqdrops; u64 noproto; /* Driver statistics */ u64 hw_filters_del; u64 hw_filters_add; /* Misc. */ u64 flags; struct sysctl_oid *vsi_node; }; /* ** Find the number of unrefreshed RX descriptors */ static inline u16 ixl_rx_unrefreshed(struct ixl_queue *que) { struct rx_ring *rxr = &que->rxr; if (rxr->next_check > rxr->next_refresh) return (rxr->next_check - rxr->next_refresh - 1); else return ((que->num_rx_desc + rxr->next_check) - rxr->next_refresh - 1); } /* ** Find the next available unused filter */ static inline struct ixl_mac_filter * ixl_get_filter(struct ixl_vsi *vsi) { struct ixl_mac_filter *f; /* create a new empty filter */ f = malloc(sizeof(struct ixl_mac_filter), M_DEVBUF, M_NOWAIT | M_ZERO); if (f) SLIST_INSERT_HEAD(&vsi->ftl, f, next); return (f); } /* ** Compare two ethernet addresses */ static inline bool cmp_etheraddr(const u8 *ea1, const u8 *ea2) { bool cmp = FALSE; if ((ea1[0] == ea2[0]) && (ea1[1] == ea2[1]) && (ea1[2] == ea2[2]) && (ea1[3] == ea2[3]) && (ea1[4] == ea2[4]) && (ea1[5] == ea2[5])) cmp = TRUE; return (cmp); } /* * Return next largest power of 2, unsigned * * Public domain, from Bit Twiddling Hacks */ static inline u32 next_power_of_two(u32 n) { n--; n |= n >> 1; n |= n >> 2; n |= n >> 4; n |= n >> 8; n |= n >> 16; n++; /* Next power of two > 0 is 1 */ n += (n == 0); return (n); } /* * Info for stats sysctls */ struct ixl_sysctl_info { u64 *stat; char *name; char *description; }; extern const uint8_t ixl_bcast_addr[ETHER_ADDR_LEN]; /********************************************************************* * TXRX Function prototypes *********************************************************************/ int ixl_allocate_tx_data(struct ixl_queue *); int ixl_allocate_rx_data(struct ixl_queue *); void ixl_init_tx_ring(struct ixl_queue *); int ixl_init_rx_ring(struct ixl_queue *); bool ixl_rxeof(struct ixl_queue *, int); bool ixl_txeof(struct ixl_queue *); void ixl_free_que_tx(struct ixl_queue *); void ixl_free_que_rx(struct ixl_queue *); int ixl_mq_start(struct ifnet *, struct mbuf *); int ixl_mq_start_locked(struct ifnet *, struct tx_ring *); void ixl_deferred_mq_start(void *, int); void ixl_vsi_setup_rings_size(struct ixl_vsi *, int, int); int ixl_queue_hang_check(struct ixl_vsi *); void ixl_free_vsi(struct ixl_vsi *); void ixl_qflush(struct ifnet *); /* Common function prototypes between PF/VF driver */ #if __FreeBSD_version >= 1100000 uint64_t ixl_get_counter(if_t ifp, ift_counter cnt); #endif void ixl_get_default_rss_key(u32 *); const char * i40e_vc_stat_str(struct i40e_hw *hw, enum virtchnl_status_code stat_err); void ixl_set_busmaster(device_t); void ixl_set_msix_enable(device_t); #endif /* _IXL_H_ */ Index: stable/11/sys/dev/ixl/ixl_txrx.c =================================================================== --- stable/11/sys/dev/ixl/ixl_txrx.c (revision 343558) +++ stable/11/sys/dev/ixl/ixl_txrx.c (revision 343559) @@ -1,2149 +1,2149 @@ /****************************************************************************** Copyright (c) 2013-2017, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ /* ** IXL driver TX/RX Routines: ** This was seperated to allow usage by ** both the PF and VF drivers. */ #ifndef IXL_STANDALONE_BUILD #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #endif #include "ixl.h" #ifdef RSS #include #endif /* Local Prototypes */ static void ixl_rx_checksum(struct mbuf *, u32, u32, u8); static void ixl_refresh_mbufs(struct ixl_queue *, int); static int ixl_xmit(struct ixl_queue *, struct mbuf **); static int ixl_tx_setup_offload(struct ixl_queue *, struct mbuf *, u32 *, u32 *); static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *); static void ixl_queue_sw_irq(struct ixl_vsi *, int); static inline void ixl_rx_discard(struct rx_ring *, int); static inline void ixl_rx_input(struct rx_ring *, struct ifnet *, struct mbuf *, u8); static inline bool ixl_tso_detect_sparse(struct mbuf *mp); static inline u32 ixl_get_tx_head(struct ixl_queue *que); #ifdef DEV_NETMAP #include #endif /* DEV_NETMAP */ /* * @key key is saved into this parameter */ void ixl_get_default_rss_key(u32 *key) { MPASS(key != NULL); u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687, 0x183cfd8c, 0xce880440, 0x580cbc3c, 0x35897377, 0x328b25e1, 0x4fa98922, 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1, 0x0, 0x0, 0x0}; bcopy(rss_seed, key, IXL_RSS_KEY_SIZE); } /** * i40e_vc_stat_str - convert virtchnl status err code to a string * @hw: pointer to the HW structure * @stat_err: the status error code to convert **/ const char * i40e_vc_stat_str(struct i40e_hw *hw, enum virtchnl_status_code stat_err) { switch (stat_err) { case VIRTCHNL_STATUS_SUCCESS: return "OK"; case VIRTCHNL_ERR_PARAM: return "VIRTCHNL_ERR_PARAM"; case VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH: return "VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH"; case VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR: return "VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR"; case VIRTCHNL_STATUS_ERR_INVALID_VF_ID: return "VIRTCHNL_STATUS_ERR_INVALID_VF_ID"; case VIRTCHNL_STATUS_NOT_SUPPORTED: return "VIRTCHNL_STATUS_NOT_SUPPORTED"; } snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err); return hw->err_str; } /* * PCI BUSMASTER needs to be set for proper operation. */ void ixl_set_busmaster(device_t dev) { u16 pci_cmd_word; pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); pci_cmd_word |= PCIM_CMD_BUSMASTEREN; pci_write_config(dev, PCIR_COMMAND, pci_cmd_word, 2); } /* * Rewrite the ENABLE bit in the MSIX control register */ void ixl_set_msix_enable(device_t dev) { int msix_ctrl, rid; pci_find_cap(dev, PCIY_MSIX, &rid); rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } /* ** Multiqueue Transmit driver */ int ixl_mq_start(struct ifnet *ifp, struct mbuf *m) { struct ixl_vsi *vsi = ifp->if_softc; struct ixl_queue *que; struct tx_ring *txr; int err, i; #ifdef RSS u32 bucket_id; #endif /* * Which queue to use: * * When doing RSS, map it to the same outbound * queue as the incoming flow would be mapped to. * If everything is setup correctly, it should be * the same bucket that the current CPU we're on is. */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { #ifdef RSS if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), &bucket_id) == 0) { i = bucket_id % vsi->num_queues; } else #endif i = m->m_pkthdr.flowid % vsi->num_queues; } else i = curcpu % vsi->num_queues; que = &vsi->queues[i]; txr = &que->txr; err = drbr_enqueue(ifp, txr->br, m); if (err) return (err); if (IXL_TX_TRYLOCK(txr)) { ixl_mq_start_locked(ifp, txr); IXL_TX_UNLOCK(txr); } else taskqueue_enqueue(que->tq, &que->tx_task); return (0); } int ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) { struct ixl_queue *que = txr->que; struct ixl_vsi *vsi = que->vsi; struct mbuf *next; int err = 0; if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || vsi->link_active == 0) return (ENETDOWN); /* Process the transmit queue */ while ((next = drbr_peek(ifp, txr->br)) != NULL) { if ((err = ixl_xmit(que, &next)) != 0) { if (next == NULL) drbr_advance(ifp, txr->br); else drbr_putback(ifp, txr->br, next); break; } drbr_advance(ifp, txr->br); /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; } if (txr->avail < IXL_TX_CLEANUP_THRESHOLD) ixl_txeof(que); return (err); } /* * Called from a taskqueue to drain queued transmit packets. */ void ixl_deferred_mq_start(void *arg, int pending) { struct ixl_queue *que = arg; struct tx_ring *txr = &que->txr; struct ixl_vsi *vsi = que->vsi; struct ifnet *ifp = vsi->ifp; IXL_TX_LOCK(txr); if (!drbr_empty(ifp, txr->br)) ixl_mq_start_locked(ifp, txr); IXL_TX_UNLOCK(txr); } /* ** Flush all queue ring buffers */ void ixl_qflush(struct ifnet *ifp) { struct ixl_vsi *vsi = ifp->if_softc; for (int i = 0; i < vsi->num_queues; i++) { struct ixl_queue *que = &vsi->queues[i]; struct tx_ring *txr = &que->txr; struct mbuf *m; IXL_TX_LOCK(txr); while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) m_freem(m); IXL_TX_UNLOCK(txr); } if_qflush(ifp); } static inline bool ixl_tso_detect_sparse(struct mbuf *mp) { struct mbuf *m; int num, mss; num = 0; mss = mp->m_pkthdr.tso_segsz; /* Exclude first mbuf; assume it contains all headers */ for (m = mp->m_next; m != NULL; m = m->m_next) { if (m == NULL) break; num++; mss -= m->m_len % mp->m_pkthdr.tso_segsz; if (num > IXL_SPARSE_CHAIN) return (true); if (mss < 1) { num = (mss == 0) ? 0 : 1; mss += mp->m_pkthdr.tso_segsz; } } return (false); } /********************************************************************* * * This routine maps the mbufs to tx descriptors, allowing the * TX engine to transmit the packets. * - return 0 on success, positive on failure * **********************************************************************/ #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) static int ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp) { struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; struct ixl_tx_buf *buf; struct i40e_tx_desc *txd = NULL; struct mbuf *m_head, *m; int i, j, error, nsegs; int first, last = 0; u16 vtag = 0; u32 cmd, off; bus_dmamap_t map; bus_dma_tag_t tag; bus_dma_segment_t segs[IXL_MAX_TSO_SEGS]; cmd = off = 0; m_head = *m_headp; /* * Important to capture the first descriptor * used because it will contain the index of * the one we tell the hardware to report back */ first = txr->next_avail; buf = &txr->buffers[first]; map = buf->map; tag = txr->tx_tag; if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { /* Use larger mapping for TSO */ tag = txr->tso_tag; if (ixl_tso_detect_sparse(m_head)) { m = m_defrag(m_head, M_NOWAIT); if (m == NULL) { m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); } *m_headp = m; } } /* * Map the packet for DMA. */ error = bus_dmamap_load_mbuf_sg(tag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { struct mbuf *m; m = m_defrag(*m_headp, M_NOWAIT); if (m == NULL) { que->mbuf_defrag_failed++; m_freem(*m_headp); *m_headp = NULL; return (ENOBUFS); } *m_headp = m; /* Try it again */ error = bus_dmamap_load_mbuf_sg(tag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { que->tx_dmamap_failed++; m_freem(*m_headp); *m_headp = NULL; return (error); } } else if (error != 0) { que->tx_dmamap_failed++; m_freem(*m_headp); *m_headp = NULL; return (error); } /* Make certain there are enough descriptors */ if (nsegs > txr->avail - 2) { txr->no_desc++; error = ENOBUFS; goto xmit_fail; } m_head = *m_headp; /* Set up the TSO/CSUM offload */ if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { error = ixl_tx_setup_offload(que, m_head, &cmd, &off); if (error) goto xmit_fail; } cmd |= I40E_TX_DESC_CMD_ICRC; /* Grab the VLAN tag */ if (m_head->m_flags & M_VLANTAG) { cmd |= I40E_TX_DESC_CMD_IL2TAG1; vtag = htole16(m_head->m_pkthdr.ether_vtag); } i = txr->next_avail; for (j = 0; j < nsegs; j++) { bus_size_t seglen; buf = &txr->buffers[i]; buf->tag = tag; /* Keep track of the type tag */ txd = &txr->base[i]; seglen = segs[j].ds_len; txd->buffer_addr = htole64(segs[j].ds_addr); txd->cmd_type_offset_bsz = htole64(I40E_TX_DESC_DTYPE_DATA | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT) | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT) | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT)); last = i; /* descriptor that will get completion IRQ */ if (++i == que->num_tx_desc) i = 0; buf->m_head = NULL; buf->eop_index = -1; } /* Set the last descriptor for report */ txd->cmd_type_offset_bsz |= htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT)); txr->avail -= nsegs; txr->next_avail = i; buf->m_head = m_head; /* Swap the dma map between the first and last descriptor. * The descriptor that gets checked on completion will now * have the real map from the first descriptor. */ txr->buffers[first].map = buf->map; buf->map = map; bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE); /* Set the index of the descriptor that will be marked done */ buf = &txr->buffers[first]; buf->eop_index = last; bus_dmamap_sync(txr->dma.tag, txr->dma.map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * Advance the Transmit Descriptor Tail (Tdt), this tells the * hardware that this frame is available to transmit. */ ++txr->total_packets; wr32(hw, txr->tail, i); /* Mark outstanding work */ atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG); return (0); xmit_fail: bus_dmamap_unload(tag, buf->map); return (error); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ int ixl_allocate_tx_data(struct ixl_queue *que) { struct tx_ring *txr = &que->txr; struct ixl_vsi *vsi = que->vsi; device_t dev = vsi->dev; struct ixl_tx_buf *buf; int i, error = 0; /* * Setup DMA descriptor areas. */ if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ IXL_TSO_SIZE, /* maxsize */ IXL_MAX_TX_SEGS, /* nsegments */ IXL_MAX_DMA_SEG_SIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->tx_tag))) { device_printf(dev,"Unable to allocate TX DMA tag\n"); return (error); } /* Make a special tag for TSO */ if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ IXL_TSO_SIZE, /* maxsize */ IXL_MAX_TSO_SEGS, /* nsegments */ IXL_MAX_DMA_SEG_SIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->tso_tag))) { device_printf(dev,"Unable to allocate TX TSO DMA tag\n"); goto free_tx_dma; } if (!(txr->buffers = (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) * que->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; goto free_tx_tso_dma; } /* Create the descriptor buffer default dma maps */ buf = txr->buffers; for (i = 0; i < que->num_tx_desc; i++, buf++) { buf->tag = txr->tx_tag; error = bus_dmamap_create(buf->tag, 0, &buf->map); if (error != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto free_buffers; } } return 0; free_buffers: while (i--) { buf--; bus_dmamap_destroy(buf->tag, buf->map); } free(txr->buffers, M_DEVBUF); txr->buffers = NULL; free_tx_tso_dma: bus_dma_tag_destroy(txr->tso_tag); txr->tso_tag = NULL; free_tx_dma: bus_dma_tag_destroy(txr->tx_tag); txr->tx_tag = NULL; return (error); } /********************************************************************* * * (Re)Initialize a queue transmit ring. * - called by init, it clears the descriptor ring, * and frees any stale mbufs * **********************************************************************/ void ixl_init_tx_ring(struct ixl_queue *que) { #ifdef DEV_NETMAP struct netmap_adapter *na = NA(que->vsi->ifp); struct netmap_slot *slot; #endif /* DEV_NETMAP */ struct tx_ring *txr = &que->txr; struct ixl_tx_buf *buf; /* Clear the old ring contents */ IXL_TX_LOCK(txr); #ifdef DEV_NETMAP /* * (under lock): if in netmap mode, do some consistency * checks and set slot to entry 0 of the netmap ring. */ slot = netmap_reset(na, NR_TX, que->me, 0); #endif /* DEV_NETMAP */ bzero((void *)txr->base, (sizeof(struct i40e_tx_desc)) * que->num_tx_desc); /* Reset indices */ txr->next_avail = 0; txr->next_to_clean = 0; /* Reset watchdog status */ txr->watchdog_timer = 0; /* Free any existing tx mbufs. */ buf = txr->buffers; for (int i = 0; i < que->num_tx_desc; i++, buf++) { if (buf->m_head != NULL) { bus_dmamap_sync(buf->tag, buf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(buf->tag, buf->map); m_freem(buf->m_head); buf->m_head = NULL; } #ifdef DEV_NETMAP /* * In netmap mode, set the map for the packet buffer. * NOTE: Some drivers (not this one) also need to set * the physical buffer address in the NIC ring. * netmap_idx_n2k() maps a nic index, i, into the corresponding * netmap slot index, si */ if (slot) { int si = netmap_idx_n2k(na->tx_rings[que->me], i); netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si)); } #endif /* DEV_NETMAP */ /* Clear the EOP index */ buf->eop_index = -1; } /* Set number of descriptors available */ txr->avail = que->num_tx_desc; bus_dmamap_sync(txr->dma.tag, txr->dma.map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); IXL_TX_UNLOCK(txr); } /********************************************************************* * * Free transmit ring related data structures. * **********************************************************************/ void ixl_free_que_tx(struct ixl_queue *que) { struct tx_ring *txr = &que->txr; struct ixl_tx_buf *buf; INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me); for (int i = 0; i < que->num_tx_desc; i++) { buf = &txr->buffers[i]; if (buf->m_head != NULL) { bus_dmamap_sync(buf->tag, buf->map, BUS_DMASYNC_POSTWRITE); m_freem(buf->m_head); buf->m_head = NULL; } bus_dmamap_unload(buf->tag, buf->map); bus_dmamap_destroy(buf->tag, buf->map); } if (txr->buffers != NULL) { free(txr->buffers, M_DEVBUF); txr->buffers = NULL; } if (txr->tx_tag != NULL) { bus_dma_tag_destroy(txr->tx_tag); txr->tx_tag = NULL; } if (txr->tso_tag != NULL) { bus_dma_tag_destroy(txr->tso_tag); txr->tso_tag = NULL; } INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me); return; } /********************************************************************* * * Setup descriptor for hw offloads * **********************************************************************/ static int ixl_tx_setup_offload(struct ixl_queue *que, struct mbuf *mp, u32 *cmd, u32 *off) { struct ether_vlan_header *eh; #ifdef INET struct ip *ip = NULL; #endif struct tcphdr *th = NULL; #ifdef INET6 struct ip6_hdr *ip6; #endif int elen, ip_hlen = 0, tcp_hlen; u16 etype; u8 ipproto = 0; bool tso = FALSE; /* Set up the TSO context descriptor if required */ if (mp->m_pkthdr.csum_flags & CSUM_TSO) { tso = ixl_tso_setup(que, mp); if (tso) ++que->tso; else return (ENXIO); } /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); elen = ETHER_HDR_LEN; } switch (etype) { #ifdef INET case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + elen); ip_hlen = ip->ip_hl << 2; ipproto = ip->ip_p; th = (struct tcphdr *)((caddr_t)ip + ip_hlen); /* The IP checksum must be recalculated with TSO */ if (tso) *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; else *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; break; #endif #ifdef INET6 case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + elen); ip_hlen = sizeof(struct ip6_hdr); ipproto = ip6->ip6_nxt; th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; break; #endif default: break; } *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; switch (ipproto) { case IPPROTO_TCP: tcp_hlen = th->th_off << 2; if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) { *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; *off |= (tcp_hlen >> 2) << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; } break; case IPPROTO_UDP: if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) { *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; *off |= (sizeof(struct udphdr) >> 2) << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; } break; case IPPROTO_SCTP: if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) { *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; *off |= (sizeof(struct sctphdr) >> 2) << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; } /* Fall Thru */ default: break; } return (0); } /********************************************************************** * * Setup context for hardware segmentation offload (TSO) * **********************************************************************/ static bool ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp) { struct tx_ring *txr = &que->txr; struct i40e_tx_context_desc *TXD; struct ixl_tx_buf *buf; u32 cmd, mss, type, tsolen; u16 etype; int idx, elen, ip_hlen, tcp_hlen; struct ether_vlan_header *eh; #ifdef INET struct ip *ip; #endif #ifdef INET6 struct ip6_hdr *ip6; #endif #if defined(INET6) || defined(INET) struct tcphdr *th; #endif u64 type_cmd_tso_mss; /* * Determine where frame payload starts. * Jump over vlan headers if already present */ eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; etype = eh->evl_proto; } else { elen = ETHER_HDR_LEN; etype = eh->evl_encap_proto; } switch (ntohs(etype)) { #ifdef INET6 case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + elen); if (ip6->ip6_nxt != IPPROTO_TCP) return (ENXIO); ip_hlen = sizeof(struct ip6_hdr); th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); tcp_hlen = th->th_off << 2; /* * The corresponding flag is set by the stack in the IPv4 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). * So, set it here because the rest of the flow requires it. */ mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; break; #endif #ifdef INET case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + elen); if (ip->ip_p != IPPROTO_TCP) return (ENXIO); ip->ip_sum = 0; ip_hlen = ip->ip_hl << 2; th = (struct tcphdr *)((caddr_t)ip + ip_hlen); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); tcp_hlen = th->th_off << 2; break; #endif default: printf("%s: CSUM_TSO but no supported IP version (0x%04x)", __func__, ntohs(etype)); return FALSE; } /* Ensure we have at least the IP+TCP header in the first mbuf. */ if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr)) return FALSE; idx = txr->next_avail; buf = &txr->buffers[idx]; TXD = (struct i40e_tx_context_desc *) &txr->base[idx]; tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen); type = I40E_TX_DESC_DTYPE_CONTEXT; cmd = I40E_TX_CTX_DESC_TSO; /* TSO MSS must not be less than 64 */ if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) { que->mss_too_small++; mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS; } mss = mp->m_pkthdr.tso_segsz; type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) | ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT); TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss); TXD->tunneling_params = htole32(0); buf->m_head = NULL; buf->eop_index = -1; if (++idx == que->num_tx_desc) idx = 0; txr->avail--; txr->next_avail = idx; return TRUE; } /* * ixl_get_tx_head - Retrieve the value from the * location the HW records its HEAD index */ static inline u32 ixl_get_tx_head(struct ixl_queue *que) { struct tx_ring *txr = &que->txr; void *head = &txr->base[que->num_tx_desc]; return LE32_TO_CPU(*(volatile __le32 *)head); } /********************************************************************** * * Get index of last used descriptor/buffer from hardware, and clean * the descriptors/buffers up to that index. * **********************************************************************/ static bool ixl_txeof_hwb(struct ixl_queue *que) { struct tx_ring *txr = &que->txr; u32 first, last, head, done; struct ixl_tx_buf *buf; struct i40e_tx_desc *tx_desc, *eop_desc; mtx_assert(&txr->mtx, MA_OWNED); #ifdef DEV_NETMAP // XXX todo: implement moderation if (netmap_tx_irq(que->vsi->ifp, que->me)) return FALSE; #endif /* DEF_NETMAP */ /* These are not the descriptors you seek, move along :) */ if (txr->avail == que->num_tx_desc) { atomic_store_rel_32(&txr->watchdog_timer, 0); return FALSE; } first = txr->next_to_clean; buf = &txr->buffers[first]; tx_desc = (struct i40e_tx_desc *)&txr->base[first]; last = buf->eop_index; if (last == -1) return FALSE; eop_desc = (struct i40e_tx_desc *)&txr->base[last]; /* Sync DMA before reading head index from ring */ bus_dmamap_sync(txr->dma.tag, txr->dma.map, BUS_DMASYNC_POSTREAD); /* Get the Head WB value */ head = ixl_get_tx_head(que); /* ** Get the index of the first descriptor ** BEYOND the EOP and call that 'done'. ** I do this so the comparison in the ** inner while loop below can be simple */ if (++last == que->num_tx_desc) last = 0; done = last; /* ** The HEAD index of the ring is written in a ** defined location, this rather than a done bit ** is what is used to keep track of what must be ** 'cleaned'. */ while (first != head) { /* We clean the range of the packet */ while (first != done) { ++txr->avail; if (buf->m_head) { txr->bytes += /* for ITR adjustment */ buf->m_head->m_pkthdr.len; txr->tx_bytes += /* for TX stats */ buf->m_head->m_pkthdr.len; bus_dmamap_sync(buf->tag, buf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(buf->tag, buf->map); m_freem(buf->m_head); buf->m_head = NULL; } buf->eop_index = -1; if (++first == que->num_tx_desc) first = 0; buf = &txr->buffers[first]; tx_desc = &txr->base[first]; } ++txr->packets; /* If a packet was successfully cleaned, reset the watchdog timer */ atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG); /* See if there is more work now */ last = buf->eop_index; if (last != -1) { eop_desc = &txr->base[last]; /* Get next done point */ if (++last == que->num_tx_desc) last = 0; done = last; } else break; } bus_dmamap_sync(txr->dma.tag, txr->dma.map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); txr->next_to_clean = first; /* * If there are no pending descriptors, clear the timeout. */ if (txr->avail == que->num_tx_desc) { atomic_store_rel_32(&txr->watchdog_timer, 0); return FALSE; } return TRUE; } /********************************************************************** * * Use index kept by driver and the flag on each descriptor to find used * descriptor/buffers and clean them up for re-use. * * This method of reclaiming descriptors is current incompatible with * DEV_NETMAP. * * Returns TRUE if there are more descriptors to be cleaned after this * function exits. * **********************************************************************/ static bool ixl_txeof_dwb(struct ixl_queue *que) { struct tx_ring *txr = &que->txr; u32 first, last, done; u32 limit = 256; struct ixl_tx_buf *buf; struct i40e_tx_desc *tx_desc, *eop_desc; mtx_assert(&txr->mtx, MA_OWNED); /* There are no descriptors to clean */ if (txr->avail == que->num_tx_desc) { atomic_store_rel_32(&txr->watchdog_timer, 0); return FALSE; } /* Set starting index/descriptor/buffer */ first = txr->next_to_clean; buf = &txr->buffers[first]; tx_desc = &txr->base[first]; /* * This function operates per-packet -- identifies the start of the * packet and gets the index of the last descriptor of the packet from * it, from eop_index. * * If the last descriptor is marked "done" by the hardware, then all * of the descriptors for the packet are cleaned. */ last = buf->eop_index; if (last == -1) return FALSE; eop_desc = &txr->base[last]; /* Sync DMA before reading from ring */ bus_dmamap_sync(txr->dma.tag, txr->dma.map, BUS_DMASYNC_POSTREAD); /* * Get the index of the first descriptor beyond the EOP and call that * 'done'. Simplifies the comparison for the inner loop below. */ if (++last == que->num_tx_desc) last = 0; done = last; /* * We find the last completed descriptor by examining each * descriptor's status bits to see if it's done. */ do { /* Break if last descriptor in packet isn't marked done */ if ((eop_desc->cmd_type_offset_bsz & I40E_TXD_QW1_DTYPE_MASK) != I40E_TX_DESC_DTYPE_DESC_DONE) break; /* Clean the descriptors that make up the processed packet */ while (first != done) { /* * If there was a buffer attached to this descriptor, * prevent the adapter from accessing it, and add its * length to the queue's TX stats. */ if (buf->m_head) { txr->bytes += buf->m_head->m_pkthdr.len; txr->tx_bytes += buf->m_head->m_pkthdr.len; bus_dmamap_sync(buf->tag, buf->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(buf->tag, buf->map); m_freem(buf->m_head); buf->m_head = NULL; } buf->eop_index = -1; ++txr->avail; if (++first == que->num_tx_desc) first = 0; buf = &txr->buffers[first]; tx_desc = &txr->base[first]; } ++txr->packets; /* If a packet was successfully cleaned, reset the watchdog timer */ atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG); /* * Since buf is the first buffer after the one that was just * cleaned, check if the packet it starts is done, too. */ last = buf->eop_index; if (last != -1) { eop_desc = &txr->base[last]; /* Get next done point */ if (++last == que->num_tx_desc) last = 0; done = last; } else break; } while (--limit); bus_dmamap_sync(txr->dma.tag, txr->dma.map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); txr->next_to_clean = first; /* * If there are no pending descriptors, clear the watchdog timer. */ if (txr->avail == que->num_tx_desc) { atomic_store_rel_32(&txr->watchdog_timer, 0); return FALSE; } return TRUE; } bool ixl_txeof(struct ixl_queue *que) { struct ixl_vsi *vsi = que->vsi; return (vsi->enable_head_writeback) ? ixl_txeof_hwb(que) : ixl_txeof_dwb(que); } /********************************************************************* * * Refresh mbuf buffers for RX descriptor rings * - now keeps its own state so discards due to resource * exhaustion are unnecessary, if an mbuf cannot be obtained * it just returns, keeping its placeholder, thus it can simply * be recalled to try again. * **********************************************************************/ static void ixl_refresh_mbufs(struct ixl_queue *que, int limit) { struct ixl_vsi *vsi = que->vsi; struct rx_ring *rxr = &que->rxr; bus_dma_segment_t hseg[1]; bus_dma_segment_t pseg[1]; struct ixl_rx_buf *buf; struct mbuf *mh, *mp; int i, j, nsegs, error; bool refreshed = FALSE; i = j = rxr->next_refresh; /* Control the loop with one beyond */ if (++j == que->num_rx_desc) j = 0; while (j != limit) { buf = &rxr->buffers[i]; if (rxr->hdr_split == FALSE) goto no_split; if (buf->m_head == NULL) { mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) goto update; } else mh = buf->m_head; mh->m_pkthdr.len = mh->m_len = MHLEN; mh->m_len = MHLEN; mh->m_flags |= M_PKTHDR; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->htag, buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("Refresh mbufs: hdr dmamap load" " failure - %d\n", error); m_free(mh); buf->m_head = NULL; goto update; } buf->m_head = mh; bus_dmamap_sync(rxr->htag, buf->hmap, BUS_DMASYNC_PREREAD); rxr->base[i].read.hdr_addr = htole64(hseg[0].ds_addr); no_split: if (buf->m_pack == NULL) { mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rxr->mbuf_sz); if (mp == NULL) goto update; } else mp = buf->m_pack; mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->ptag, buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { printf("Refresh mbufs: payload dmamap load" " failure - %d\n", error); m_free(mp); buf->m_pack = NULL; goto update; } buf->m_pack = mp; bus_dmamap_sync(rxr->ptag, buf->pmap, BUS_DMASYNC_PREREAD); rxr->base[i].read.pkt_addr = htole64(pseg[0].ds_addr); /* Used only when doing header split */ rxr->base[i].read.hdr_addr = 0; refreshed = TRUE; /* Next is precalculated */ i = j; rxr->next_refresh = i; if (++j == que->num_rx_desc) j = 0; } update: if (refreshed) /* Update hardware tail index */ wr32(vsi->hw, rxr->tail, rxr->next_refresh); return; } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per descriptor, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've defined. * **********************************************************************/ int ixl_allocate_rx_data(struct ixl_queue *que) { struct rx_ring *rxr = &que->rxr; struct ixl_vsi *vsi = que->vsi; device_t dev = vsi->dev; struct ixl_rx_buf *buf; int i, bsize, error; if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MSIZE, /* maxsize */ 1, /* nsegments */ MSIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &rxr->htag))) { device_printf(dev, "Unable to create RX DMA htag\n"); return (error); } if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM16BYTES, /* maxsize */ 1, /* nsegments */ MJUM16BYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &rxr->ptag))) { device_printf(dev, "Unable to create RX DMA ptag\n"); goto free_rx_htag; } bsize = sizeof(struct ixl_rx_buf) * que->num_rx_desc; if (!(rxr->buffers = (struct ixl_rx_buf *) malloc(bsize, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); error = ENOMEM; goto free_rx_ptag; } for (i = 0; i < que->num_rx_desc; i++) { buf = &rxr->buffers[i]; error = bus_dmamap_create(rxr->htag, BUS_DMA_NOWAIT, &buf->hmap); if (error) { device_printf(dev, "Unable to create RX head map\n"); goto free_buffers; } error = bus_dmamap_create(rxr->ptag, BUS_DMA_NOWAIT, &buf->pmap); if (error) { bus_dmamap_destroy(rxr->htag, buf->hmap); device_printf(dev, "Unable to create RX pkt map\n"); goto free_buffers; } } return 0; free_buffers: while (i--) { buf = &rxr->buffers[i]; bus_dmamap_destroy(rxr->ptag, buf->pmap); bus_dmamap_destroy(rxr->htag, buf->hmap); } free(rxr->buffers, M_DEVBUF); rxr->buffers = NULL; free_rx_ptag: bus_dma_tag_destroy(rxr->ptag); rxr->ptag = NULL; free_rx_htag: bus_dma_tag_destroy(rxr->htag); rxr->htag = NULL; return (error); } /********************************************************************* * * (Re)Initialize the queue receive ring and its buffers. * **********************************************************************/ int ixl_init_rx_ring(struct ixl_queue *que) { struct rx_ring *rxr = &que->rxr; struct ixl_vsi *vsi = que->vsi; #if defined(INET6) || defined(INET) struct ifnet *ifp = vsi->ifp; struct lro_ctrl *lro = &rxr->lro; #endif struct ixl_rx_buf *buf; bus_dma_segment_t pseg[1], hseg[1]; int rsize, nsegs, error = 0; #ifdef DEV_NETMAP struct netmap_adapter *na = NA(que->vsi->ifp); struct netmap_slot *slot; #endif /* DEV_NETMAP */ IXL_RX_LOCK(rxr); #ifdef DEV_NETMAP /* same as in ixl_init_tx_ring() */ slot = netmap_reset(na, NR_RX, que->me, 0); #endif /* DEV_NETMAP */ /* Clear the ring contents */ rsize = roundup2(que->num_rx_desc * sizeof(union i40e_rx_desc), DBA_ALIGN); bzero((void *)rxr->base, rsize); /* Cleanup any existing buffers */ for (int i = 0; i < que->num_rx_desc; i++) { buf = &rxr->buffers[i]; if (buf->m_head != NULL) { bus_dmamap_sync(rxr->htag, buf->hmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->htag, buf->hmap); buf->m_head->m_flags |= M_PKTHDR; m_freem(buf->m_head); } if (buf->m_pack != NULL) { bus_dmamap_sync(rxr->ptag, buf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, buf->pmap); buf->m_pack->m_flags |= M_PKTHDR; m_freem(buf->m_pack); } buf->m_head = NULL; buf->m_pack = NULL; } /* header split is off */ rxr->hdr_split = FALSE; /* Now replenish the mbufs */ for (int j = 0; j != que->num_rx_desc; ++j) { struct mbuf *mh, *mp; buf = &rxr->buffers[j]; #ifdef DEV_NETMAP /* * In netmap mode, fill the map and set the buffer * address in the NIC ring, considering the offset * between the netmap and NIC rings (see comment in * ixgbe_setup_transmit_ring() ). No need to allocate * an mbuf, so end the block with a continue; */ if (slot) { int sj = netmap_idx_n2k(na->rx_rings[que->me], j); uint64_t paddr; void *addr; addr = PNMB(na, slot + sj, &paddr); netmap_load_map(na, rxr->dma.tag, buf->pmap, addr); /* Update descriptor and the cached value */ rxr->base[j].read.pkt_addr = htole64(paddr); rxr->base[j].read.hdr_addr = 0; continue; } #endif /* DEV_NETMAP */ /* ** Don't allocate mbufs if not ** doing header split, its wasteful */ if (rxr->hdr_split == FALSE) goto skip_head; /* First the header */ buf->m_head = m_gethdr(M_NOWAIT, MT_DATA); if (buf->m_head == NULL) { error = ENOBUFS; goto fail; } m_adj(buf->m_head, ETHER_ALIGN); mh = buf->m_head; mh->m_len = mh->m_pkthdr.len = MHLEN; mh->m_flags |= M_PKTHDR; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->htag, buf->hmap, buf->m_head, hseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) /* Nothing elegant to do here */ goto fail; bus_dmamap_sync(rxr->htag, buf->hmap, BUS_DMASYNC_PREREAD); /* Update descriptor */ rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr); skip_head: /* Now the payload cluster */ buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rxr->mbuf_sz); if (buf->m_pack == NULL) { error = ENOBUFS; goto fail; } mp = buf->m_pack; mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; /* Get the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->ptag, buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); if (error != 0) goto fail; bus_dmamap_sync(rxr->ptag, buf->pmap, BUS_DMASYNC_PREREAD); /* Update descriptor */ rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr); rxr->base[j].read.hdr_addr = 0; } /* Setup our descriptor indices */ rxr->next_check = 0; rxr->next_refresh = 0; rxr->lro_enabled = FALSE; rxr->split = 0; rxr->bytes = 0; rxr->discard = FALSE; wr32(vsi->hw, rxr->tail, que->num_rx_desc - 1); ixl_flush(vsi->hw); #if defined(INET6) || defined(INET) /* ** Now set up the LRO interface: */ if (ifp->if_capenable & IFCAP_LRO) { int err = tcp_lro_init(lro); if (err) { if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me); goto fail; } INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me); rxr->lro_enabled = TRUE; lro->ifp = vsi->ifp; } #endif bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); fail: IXL_RX_UNLOCK(rxr); return (error); } /********************************************************************* * * Free station receive ring data structures * **********************************************************************/ void ixl_free_que_rx(struct ixl_queue *que) { struct rx_ring *rxr = &que->rxr; struct ixl_rx_buf *buf; /* Cleanup any existing buffers */ if (rxr->buffers != NULL) { for (int i = 0; i < que->num_rx_desc; i++) { buf = &rxr->buffers[i]; /* Free buffers and unload dma maps */ ixl_rx_discard(rxr, i); bus_dmamap_destroy(rxr->htag, buf->hmap); bus_dmamap_destroy(rxr->ptag, buf->pmap); } free(rxr->buffers, M_DEVBUF); rxr->buffers = NULL; } if (rxr->htag != NULL) { bus_dma_tag_destroy(rxr->htag); rxr->htag = NULL; } if (rxr->ptag != NULL) { bus_dma_tag_destroy(rxr->ptag); rxr->ptag = NULL; } } static inline void ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype) { #if defined(INET6) || defined(INET) /* * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet * should be computed by hardware. Also it should not have VLAN tag in * ethernet header. */ if (rxr->lro_enabled && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { /* * Send to the stack if: ** - LRO not enabled, or ** - no LRO resources, or ** - lro enqueue fails */ if (rxr->lro.lro_cnt != 0) if (tcp_lro_rx(&rxr->lro, m, 0) == 0) return; } #endif IXL_RX_UNLOCK(rxr); (*ifp->if_input)(ifp, m); IXL_RX_LOCK(rxr); } static inline void ixl_rx_discard(struct rx_ring *rxr, int i) { struct ixl_rx_buf *rbuf; KASSERT(rxr != NULL, ("Receive ring pointer cannot be null")); - KASSERT(i < rxr->que->num_rx_desc, ("Descriptor index must be less than que->num_desc")); + KASSERT(i < rxr->que->num_rx_desc, ("Descriptor index must be less than que->num_rx_desc")); rbuf = &rxr->buffers[i]; /* Free the mbufs in the current chain for the packet */ if (rbuf->fmp != NULL) { bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD); m_freem(rbuf->fmp); rbuf->fmp = NULL; } /* * Free the mbufs for the current descriptor; and let ixl_refresh_mbufs() * assign new mbufs to these. */ if (rbuf->m_head) { bus_dmamap_sync(rxr->htag, rbuf->hmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->htag, rbuf->hmap); m_free(rbuf->m_head); rbuf->m_head = NULL; } if (rbuf->m_pack) { bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rbuf->pmap); m_free(rbuf->m_pack); rbuf->m_pack = NULL; } } #ifdef RSS /* ** i40e_ptype_to_hash: parse the packet type ** to determine the appropriate hash. */ static inline int ixl_ptype_to_hash(u8 ptype) { struct i40e_rx_ptype_decoded decoded; u8 ex = 0; decoded = decode_rx_desc_ptype(ptype); ex = decoded.outer_frag; if (!decoded.known) return M_HASHTYPE_OPAQUE_HASH; if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) return M_HASHTYPE_OPAQUE_HASH; /* Note: anything that gets to this point is IP */ if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { switch (decoded.inner_prot) { case I40E_RX_PTYPE_INNER_PROT_TCP: if (ex) return M_HASHTYPE_RSS_TCP_IPV6_EX; else return M_HASHTYPE_RSS_TCP_IPV6; case I40E_RX_PTYPE_INNER_PROT_UDP: if (ex) return M_HASHTYPE_RSS_UDP_IPV6_EX; else return M_HASHTYPE_RSS_UDP_IPV6; default: if (ex) return M_HASHTYPE_RSS_IPV6_EX; else return M_HASHTYPE_RSS_IPV6; } } if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { switch (decoded.inner_prot) { case I40E_RX_PTYPE_INNER_PROT_TCP: return M_HASHTYPE_RSS_TCP_IPV4; case I40E_RX_PTYPE_INNER_PROT_UDP: if (ex) return M_HASHTYPE_RSS_UDP_IPV4_EX; else return M_HASHTYPE_RSS_UDP_IPV4; default: return M_HASHTYPE_RSS_IPV4; } } /* We should never get here!! */ return M_HASHTYPE_OPAQUE_HASH; } #endif /* RSS */ /********************************************************************* * * This routine executes in interrupt context. It replenishes * the mbufs in the descriptor and sends data which has been * dma'ed into host memory to upper layer. * * We loop at most count times if count is > 0, or until done if * count < 0. * * Return TRUE for more work, FALSE for all clean. *********************************************************************/ bool ixl_rxeof(struct ixl_queue *que, int count) { struct ixl_vsi *vsi = que->vsi; struct rx_ring *rxr = &que->rxr; struct ifnet *ifp = vsi->ifp; #if defined(INET6) || defined(INET) struct lro_ctrl *lro = &rxr->lro; #endif int i, nextp, processed = 0; union i40e_rx_desc *cur; struct ixl_rx_buf *rbuf, *nbuf; IXL_RX_LOCK(rxr); #ifdef DEV_NETMAP if (netmap_rx_irq(ifp, que->me, &count)) { IXL_RX_UNLOCK(rxr); return (FALSE); } #endif /* DEV_NETMAP */ for (i = rxr->next_check; count != 0;) { struct mbuf *sendmp, *mh, *mp; u32 status, error; u16 hlen, plen, vtag; u64 qword; u8 ptype; bool eop; /* Sync the ring. */ bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); cur = &rxr->base[i]; qword = le64toh(cur->wb.qword1.status_error_len); status = (qword & I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT; error = (qword & I40E_RXD_QW1_ERROR_MASK) >> I40E_RXD_QW1_ERROR_SHIFT; plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT; ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) { ++rxr->not_done; break; } if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; count--; sendmp = NULL; nbuf = NULL; cur->wb.qword1.status_error_len = 0; rbuf = &rxr->buffers[i]; mh = rbuf->m_head; mp = rbuf->m_pack; eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)); if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1); else vtag = 0; /* Remove device access to the rx buffers. */ if (rbuf->m_head != NULL) { bus_dmamap_sync(rxr->htag, rbuf->hmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->htag, rbuf->hmap); } if (rbuf->m_pack != NULL) { bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(rxr->ptag, rbuf->pmap); } /* ** Make sure bad packets are discarded, ** note that only EOP descriptor has valid ** error results. */ if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { rxr->desc_errs++; ixl_rx_discard(rxr, i); goto next_desc; } /* Prefetch the next buffer */ if (!eop) { nextp = i + 1; if (nextp == que->num_rx_desc) nextp = 0; nbuf = &rxr->buffers[nextp]; prefetch(nbuf); } /* ** The header mbuf is ONLY used when header ** split is enabled, otherwise we get normal ** behavior, ie, both header and payload ** are DMA'd into the payload buffer. ** ** Rather than using the fmp/lmp global pointers ** we now keep the head of a packet chain in the ** buffer struct and pass this along from one ** descriptor to the next, until we get EOP. */ if (rxr->hdr_split && (rbuf->fmp == NULL)) { if (hlen > IXL_RX_HDR) hlen = IXL_RX_HDR; mh->m_len = hlen; mh->m_flags |= M_PKTHDR; mh->m_next = NULL; mh->m_pkthdr.len = mh->m_len; /* Null buf pointer so it is refreshed */ rbuf->m_head = NULL; /* ** Check the payload length, this ** could be zero if its a small ** packet. */ if (plen > 0) { mp->m_len = plen; mp->m_next = NULL; mp->m_flags &= ~M_PKTHDR; mh->m_next = mp; mh->m_pkthdr.len += mp->m_len; /* Null buf pointer so it is refreshed */ rbuf->m_pack = NULL; rxr->split++; } /* ** Now create the forward ** chain so when complete ** we wont have to. */ if (eop == 0) { /* stash the chain head */ nbuf->fmp = mh; /* Make forward chain */ if (plen) mp->m_next = nbuf->m_pack; else mh->m_next = nbuf->m_pack; } else { /* Singlet, prepare to send */ sendmp = mh; if (vtag) { sendmp->m_pkthdr.ether_vtag = vtag; sendmp->m_flags |= M_VLANTAG; } } } else { /* ** Either no header split, or a ** secondary piece of a fragmented ** split packet. */ mp->m_len = plen; /* ** See if there is a stored head ** that determines what we are */ sendmp = rbuf->fmp; rbuf->m_pack = rbuf->fmp = NULL; if (sendmp != NULL) /* secondary frag */ sendmp->m_pkthdr.len += mp->m_len; else { /* first desc of a non-ps chain */ sendmp = mp; sendmp->m_flags |= M_PKTHDR; sendmp->m_pkthdr.len = mp->m_len; } /* Pass the head pointer on */ if (eop == 0) { nbuf->fmp = sendmp; sendmp = NULL; mp->m_next = nbuf->m_pack; } } ++processed; /* Sending this frame? */ if (eop) { sendmp->m_pkthdr.rcvif = ifp; /* gather stats */ rxr->rx_packets++; rxr->rx_bytes += sendmp->m_pkthdr.len; /* capture data for dynamic ITR adjustment */ rxr->packets++; rxr->bytes += sendmp->m_pkthdr.len; /* Set VLAN tag (field only valid in eop desc) */ if (vtag) { sendmp->m_pkthdr.ether_vtag = vtag; sendmp->m_flags |= M_VLANTAG; } if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) ixl_rx_checksum(sendmp, status, error, ptype); #ifdef RSS sendmp->m_pkthdr.flowid = le32toh(cur->wb.qword0.hi_dword.rss); M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype)); #else sendmp->m_pkthdr.flowid = que->msix; M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); #endif } next_desc: bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Advance our pointers to the next descriptor. */ if (++i == que->num_rx_desc) i = 0; /* Now send to the stack or do LRO */ if (sendmp != NULL) { rxr->next_check = i; ixl_rx_input(rxr, ifp, sendmp, ptype); /* * Update index used in loop in case another * ixl_rxeof() call executes when lock is released */ i = rxr->next_check; } /* Every 8 descriptors we go to refresh mbufs */ if (processed == 8) { ixl_refresh_mbufs(que, i); processed = 0; } } /* Refresh any remaining buf structs */ if (ixl_rx_unrefreshed(que)) ixl_refresh_mbufs(que, i); rxr->next_check = i; #if defined(INET6) || defined(INET) /* * Flush any outstanding LRO work */ #if __FreeBSD_version >= 1100105 tcp_lro_flush_all(lro); #else struct lro_entry *queued; while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } #endif #endif /* defined(INET6) || defined(INET) */ IXL_RX_UNLOCK(rxr); return (FALSE); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype) { struct i40e_rx_ptype_decoded decoded; decoded = decode_rx_desc_ptype(ptype); /* Errors? */ if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) { mp->m_pkthdr.csum_flags = 0; return; } /* IPv6 with extension headers likely have bad csum */ if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) if (status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) { mp->m_pkthdr.csum_flags = 0; return; } /* IP Checksum Good */ mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) { mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); mp->m_pkthdr.csum_data |= htons(0xffff); } return; } #if __FreeBSD_version >= 1100000 uint64_t ixl_get_counter(if_t ifp, ift_counter cnt) { struct ixl_vsi *vsi; vsi = if_getsoftc(ifp); switch (cnt) { case IFCOUNTER_IPACKETS: return (vsi->ipackets); case IFCOUNTER_IERRORS: return (vsi->ierrors); case IFCOUNTER_OPACKETS: return (vsi->opackets); case IFCOUNTER_OERRORS: return (vsi->oerrors); case IFCOUNTER_COLLISIONS: /* Collisions are by standard impossible in 40G/10G Ethernet */ return (0); case IFCOUNTER_IBYTES: return (vsi->ibytes); case IFCOUNTER_OBYTES: return (vsi->obytes); case IFCOUNTER_IMCASTS: return (vsi->imcasts); case IFCOUNTER_OMCASTS: return (vsi->omcasts); case IFCOUNTER_IQDROPS: return (vsi->iqdrops); case IFCOUNTER_OQDROPS: return (vsi->oqdrops); case IFCOUNTER_NOPROTO: return (vsi->noproto); default: return (if_get_counter_default(ifp, cnt)); } } #endif /* * Set TX and RX ring size adjusting value to supported range */ void ixl_vsi_setup_rings_size(struct ixl_vsi * vsi, int tx_ring_size, int rx_ring_size) { struct device * dev = vsi->dev; if (tx_ring_size < IXL_MIN_RING || tx_ring_size > IXL_MAX_RING || tx_ring_size % IXL_RING_INCREMENT != 0) { device_printf(dev, "Invalid tx_ring_size value of %d set!\n", tx_ring_size); device_printf(dev, "tx_ring_size must be between %d and %d, " "inclusive, and must be a multiple of %d\n", IXL_MIN_RING, IXL_MAX_RING, IXL_RING_INCREMENT); device_printf(dev, "Using default value of %d instead\n", IXL_DEFAULT_RING); vsi->num_tx_desc = IXL_DEFAULT_RING; } else vsi->num_tx_desc = tx_ring_size; if (rx_ring_size < IXL_MIN_RING || rx_ring_size > IXL_MAX_RING || rx_ring_size % IXL_RING_INCREMENT != 0) { device_printf(dev, "Invalid rx_ring_size value of %d set!\n", rx_ring_size); device_printf(dev, "rx_ring_size must be between %d and %d, " "inclusive, and must be a multiple of %d\n", IXL_MIN_RING, IXL_MAX_RING, IXL_RING_INCREMENT); device_printf(dev, "Using default value of %d instead\n", IXL_DEFAULT_RING); vsi->num_rx_desc = IXL_DEFAULT_RING; } else vsi->num_rx_desc = rx_ring_size; device_printf(dev, "using %d tx descriptors and %d rx descriptors\n", vsi->num_tx_desc, vsi->num_rx_desc); } static void ixl_queue_sw_irq(struct ixl_vsi *vsi, int qidx) { struct i40e_hw *hw = vsi->hw; u32 reg, mask; if ((vsi->flags & IXL_FLAGS_IS_VF) != 0) { mask = (I40E_VFINT_DYN_CTLN1_INTENA_MASK | I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK); reg = I40E_VFINT_DYN_CTLN1(qidx); } else { mask = (I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | I40E_PFINT_DYN_CTLN_ITR_INDX_MASK); reg = ((vsi->flags & IXL_FLAGS_USES_MSIX) != 0) ? I40E_PFINT_DYN_CTLN(qidx) : I40E_PFINT_DYN_CTL0; } wr32(hw, reg, mask); } int ixl_queue_hang_check(struct ixl_vsi *vsi) { struct ixl_queue *que = vsi->queues; device_t dev = vsi->dev; struct tx_ring *txr; s32 timer, new_timer; int hung = 0; for (int i = 0; i < vsi->num_queues; i++, que++) { txr = &que->txr; /* * If watchdog_timer is equal to defualt value set by ixl_txeof * just substract hz and move on - the queue is most probably * running. Otherwise check the value. */ if (atomic_cmpset_rel_32(&txr->watchdog_timer, IXL_WATCHDOG, (IXL_WATCHDOG) - hz) == 0) { timer = atomic_load_acq_32(&txr->watchdog_timer); /* * Again - if the timer was reset to default value * then queue is running. Otherwise check if watchdog * expired and act accrdingly. */ if (timer > 0 && timer != IXL_WATCHDOG) { new_timer = timer - hz; if (new_timer <= 0) { atomic_store_rel_32(&txr->watchdog_timer, -1); device_printf(dev, "WARNING: queue %d " "appears to be hung!\n", que->me); ++hung; /* Try to unblock the queue with SW IRQ */ ixl_queue_sw_irq(vsi, i); } else { /* * If this fails, that means something in the TX path * has updated the watchdog, so it means the TX path * is still working and the watchdog doesn't need * to countdown. */ atomic_cmpset_rel_32(&txr->watchdog_timer, timer, new_timer); } } } } return (hung); } Index: stable/11/sys/dev/netmap/if_ixl_netmap.h =================================================================== --- stable/11/sys/dev/netmap/if_ixl_netmap.h (revision 343558) +++ stable/11/sys/dev/netmap/if_ixl_netmap.h (revision 343559) @@ -1,421 +1,419 @@ /* * Copyright (C) 2015, Luigi Rizzo. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * $FreeBSD$ * * netmap support for: ixl * * derived from ixgbe * netmap support for a network driver. * This file contains code but only static or inline functions used * by a single driver. To avoid replication of code we just #include * it near the beginning of the standard driver. * For ixl the file is imported in two places, hence the conditional at the * beginning. */ #include #include /* * Some drivers may need the following headers. Others * already include them by default #include #include */ #include int ixl_netmap_txsync(struct netmap_kring *kring, int flags); int ixl_netmap_rxsync(struct netmap_kring *kring, int flags); extern int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip; #ifdef NETMAP_IXL_MAIN /* * device-specific sysctl variables: * * ixl_crcstrip: 0: NIC keeps CRC in rx frames, 1: NIC strips it (default). * During regular operations the CRC is stripped, but on some * hardware reception of frames not multiple of 64 is slower, * so using crcstrip=0 helps in benchmarks. * * ixl_rx_miss, ixl_rx_miss_bufs: * count packets that might be missed due to lost interrupts. */ int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1; SYSCTL_DECL(_dev_netmap); /* * The xl driver by default strips CRCs and we do not override it. */ #if 0 SYSCTL_INT(_dev_netmap, OID_AUTO, ixl_crcstrip, CTLFLAG_RW, &ixl_crcstrip, 1, "NIC strips CRC on rx frames"); #endif SYSCTL_INT(_dev_netmap, OID_AUTO, ixl_rx_miss, CTLFLAG_RW, &ixl_rx_miss, 0, "potentially missed rx intr"); SYSCTL_INT(_dev_netmap, OID_AUTO, ixl_rx_miss_bufs, CTLFLAG_RW, &ixl_rx_miss_bufs, 0, "potentially missed rx intr bufs"); /* * Register/unregister. We are already under netmap lock. * Only called on the first register or the last unregister. */ static int ixl_netmap_reg(struct netmap_adapter *na, int onoff) { struct ifnet *ifp = na->ifp; struct ixl_vsi *vsi = ifp->if_softc; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; IXL_PF_LOCK(pf); ixl_disable_intr(vsi); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); //set_crcstrip(&adapter->hw, onoff); /* enable or disable flags and callbacks in na and ifp */ if (onoff) { nm_set_native_flags(na); } else { nm_clear_native_flags(na); } ixl_init_locked(pf); /* also enables intr */ //set_crcstrip(&adapter->hw, onoff); // XXX why twice ? IXL_PF_UNLOCK(pf); return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); } /* * The attach routine, called near the end of ixl_attach(), * fills the parameters for netmap_attach() and calls it. * It cannot fail, in the worst case (such as no memory) * netmap mode will be disabled and the driver will only * operate in standard mode. */ static void ixl_netmap_attach(struct ixl_vsi *vsi) { struct netmap_adapter na; bzero(&na, sizeof(na)); na.ifp = vsi->ifp; na.na_flags = NAF_BDG_MAYSLEEP; - // XXX check that queues is set. - nm_prinf("queues is %p", vsi->queues); - if (vsi->queues) { - na.num_tx_desc = vsi->queues[0].num_desc; - na.num_rx_desc = vsi->queues[0].num_desc; - } + na.num_tx_desc = vsi->num_tx_desc; + na.num_rx_desc = vsi->num_rx_desc; na.nm_txsync = ixl_netmap_txsync; na.nm_rxsync = ixl_netmap_rxsync; na.nm_register = ixl_netmap_reg; na.num_tx_rings = na.num_rx_rings = vsi->num_queues; netmap_attach(&na); } #else /* !NETMAP_IXL_MAIN, code for ixl_txrx.c */ /* * Reconcile kernel and user view of the transmit ring. * * All information is in the kring. * Userspace wants to send packets up to the one before kring->rhead, * kernel knows kring->nr_hwcur is the first unsent packet. * * Here we push packets out (as many as possible), and possibly * reclaim buffers from previously completed transmission. * * The caller (netmap) guarantees that there is only one instance * running at any time. Any interference with other driver * methods should be handled by the individual drivers. */ int ixl_netmap_txsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct ifnet *ifp = na->ifp; struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; /* * interrupts on every tx packet are expensive so request * them every half ring, or where NS_REPORT is set */ u_int report_frequency = kring->nkr_num_slots >> 1; /* device-specific */ struct ixl_vsi *vsi = ifp->if_softc; struct ixl_queue *que = &vsi->queues[kring->ring_id]; struct tx_ring *txr = &que->txr; bus_dmamap_sync(txr->dma.tag, txr->dma.map, BUS_DMASYNC_POSTREAD); /* * First part: process new packets to send. * nm_i is the current index in the netmap ring, * nic_i is the corresponding index in the NIC ring. * * If we have packets to send (nm_i != head) * iterate over the netmap ring, fetch length and update * the corresponding slot in the NIC ring. Some drivers also * need to update the buffer's physical address in the NIC slot * even NS_BUF_CHANGED is not set (PNMB computes the addresses). * * The netmap_reload_map() calls is especially expensive, * even when (as in this case) the tag is 0, so do only * when the buffer has actually changed. * * If possible do not set the report/intr bit on all slots, * but only a few times per ring or when NS_REPORT is set. * * Finally, on 10G and faster drivers, it might be useful * to prefetch the next slot and txr entry. */ nm_i = kring->nr_hwcur; if (nm_i != head) { /* we have new packets to send */ nic_i = netmap_idx_k2n(kring, nm_i); __builtin_prefetch(&ring->slot[nm_i]); __builtin_prefetch(&txr->buffers[nic_i]); for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; u_int len = slot->len; uint64_t paddr; void *addr = PNMB(na, slot, &paddr); /* device-specific */ struct i40e_tx_desc *curr = &txr->base[nic_i]; struct ixl_tx_buf *txbuf = &txr->buffers[nic_i]; u64 flags = (slot->flags & NS_REPORT || nic_i == 0 || nic_i == report_frequency) ? ((u64)I40E_TX_DESC_CMD_RS << I40E_TXD_QW1_CMD_SHIFT) : 0; /* prefetch for next round */ __builtin_prefetch(&ring->slot[nm_i + 1]); __builtin_prefetch(&txr->buffers[nic_i + 1]); NM_CHECK_ADDR_LEN(na, addr, len); if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ netmap_reload_map(na, txr->dma.tag, txbuf->map, addr); } slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); /* Fill the slot in the NIC ring. */ curr->buffer_addr = htole64(paddr); curr->cmd_type_offset_bsz = htole64( ((u64)len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) | flags | ((u64)I40E_TX_DESC_CMD_EOP << I40E_TXD_QW1_CMD_SHIFT) ); // XXX more ? /* make sure changes to the buffer are synced */ bus_dmamap_sync(txr->dma.tag, txbuf->map, BUS_DMASYNC_PREWRITE); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } kring->nr_hwcur = head; /* synchronize the NIC ring */ bus_dmamap_sync(txr->dma.tag, txr->dma.map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* (re)start the tx unit up to slot nic_i (excluded) */ wr32(vsi->hw, txr->tail, nic_i); } /* * Second part: reclaim buffers for completed transmissions. */ - nic_i = LE32_TO_CPU(*(volatile __le32 *)&txr->base[que->num_desc]); - if (nic_i != txr->next_to_clean) { + nic_i = LE32_TO_CPU(*(volatile __le32 *)&txr->base[que->num_tx_desc]); + if (unlikely(nic_i >= que->num_tx_desc)) { + nm_prerr("error: invalid value of hw head index %u", nic_i); + } else if (nic_i != txr->next_to_clean) { /* some tx completed, increment avail */ txr->next_to_clean = nic_i; kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } return 0; } /* * Reconcile kernel and user view of the receive ring. * Same as for the txsync, this routine must be efficient. * The caller guarantees a single invocations, but races against * the rest of the driver should be handled here. * * On call, kring->rhead is the first packet that userspace wants * to keep, and kring->rcur is the wakeup point. * The kernel has previously reported packets up to kring->rtail. * * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective * of whether or not we received an interrupt. */ int ixl_netmap_rxsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct ifnet *ifp = na->ifp; struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; /* device-specific */ struct ixl_vsi *vsi = ifp->if_softc; struct ixl_queue *que = &vsi->queues[kring->ring_id]; struct rx_ring *rxr = &que->rxr; if (head > lim) return netmap_ring_reinit(kring); /* XXX check sync modes */ bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* * First part: import newly received packets. * * nm_i is the index of the next free slot in the netmap ring, * nic_i is the index of the next received packet in the NIC ring, * and they may differ in case if_init() has been called while * in netmap mode. For the receive ring we have * * nic_i = rxr->next_check; * nm_i = kring->nr_hwtail (previous) * and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size * * rxr->next_check is set to 0 on a ring reinit */ if (netmap_no_pendintr || force_update) { int crclen = ixl_crcstrip ? 0 : 4; nic_i = rxr->next_check; // or also k2n(kring->nr_hwtail) nm_i = netmap_idx_n2k(kring, nic_i); for (n = 0; ; n++) { union i40e_32byte_rx_desc *curr = &rxr->base[nic_i]; uint64_t qword = le64toh(curr->wb.qword1.status_error_len); uint32_t staterr = (qword & I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT; if ((staterr & (1<slot[nm_i].len = ((qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT) - crclen; ring->slot[nm_i].flags = 0; bus_dmamap_sync(rxr->ptag, rxr->buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } if (n) { /* update the state variables */ if (netmap_no_pendintr && !force_update) { /* diagnostics */ ixl_rx_miss ++; ixl_rx_miss_bufs += n; } rxr->next_check = nic_i; kring->nr_hwtail = nm_i; } kring->nr_kflags &= ~NKR_PENDINTR; } /* * Second part: skip past packets that userspace has released. * (kring->nr_hwcur to head excluded), * and make the buffers available for reception. * As usual nm_i is the index in the netmap ring, * nic_i is the index in the NIC ring, and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size */ nm_i = kring->nr_hwcur; if (nm_i != head) { nic_i = netmap_idx_k2n(kring, nm_i); for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; uint64_t paddr; void *addr = PNMB(na, slot, &paddr); union i40e_32byte_rx_desc *curr = &rxr->base[nic_i]; struct ixl_rx_buf *rxbuf = &rxr->buffers[nic_i]; if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ goto ring_reset; if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ netmap_reload_map(na, rxr->ptag, rxbuf->pmap, addr); slot->flags &= ~NS_BUF_CHANGED; } curr->read.pkt_addr = htole64(paddr); curr->read.hdr_addr = 0; // XXX needed bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } kring->nr_hwcur = head; bus_dmamap_sync(rxr->dma.tag, rxr->dma.map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * IMPORTANT: we must leave one free slot in the ring, * so move nic_i back by one unit */ nic_i = nm_prev(nic_i, lim); wr32(vsi->hw, rxr->tail, nic_i); } return 0; ring_reset: return netmap_ring_reinit(kring); } #endif /* !NETMAP_IXL_MAIN */ /* end of file */