Index: sys/amd64/conf/GENERIC =================================================================== --- sys/amd64/conf/GENERIC +++ sys/amd64/conf/GENERIC @@ -367,3 +367,5 @@ # The crypto framework is required by IPSEC device crypto # Required by IPSEC + +options IFLIB Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -2038,6 +2038,8 @@ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_osdep.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" +dev/ixgbe/ixgbe_sysctl.c optional ix inet | ixv inet \ + compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_phy.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ixgbe_api.c optional ix inet | ixv inet \ Index: sys/dev/ixgbe/if_ix.c =================================================================== --- sys/dev/ixgbe/if_ix.c +++ sys/dev/ixgbe/if_ix.c @@ -32,5970 +32,13 @@ ******************************************************************************/ /*$FreeBSD$*/ - -#ifndef IXGBE_STANDALONE_BUILD -#include "opt_inet.h" -#include "opt_inet6.h" -#include "opt_rss.h" -#endif - -#include "ixgbe.h" - -#ifdef RSS -#include -#include -#endif - -/********************************************************************* - * Driver version - *********************************************************************/ -char ixgbe_driver_version[] = "3.1.13-k"; - - -/********************************************************************* - * PCI Device ID Table - * - * Used by probe to select devices to load on - * Last field stores an index into ixgbe_strings - * Last entry must be all 0s - * - * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } - *********************************************************************/ - -static ixgbe_vendor_info_t ixgbe_vendor_info_array[] = -{ - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_QSFP_SF_QP, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T1, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KR, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KX4, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_10G_T, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_SFP, 0, 0, 0}, - /* required last entry */ - {0, 0, 0, 0, 0} -}; - -/********************************************************************* - * Table of branding strings - *********************************************************************/ - -static char *ixgbe_strings[] = { - "Intel(R) PRO/10GbE PCI-Express Network Driver" -}; - -/********************************************************************* - * Function prototypes - *********************************************************************/ -static int ixgbe_probe(device_t); -static int ixgbe_attach(device_t); -static int ixgbe_detach(device_t); -static int ixgbe_shutdown(device_t); -static int ixgbe_suspend(device_t); -static int ixgbe_resume(device_t); -static int ixgbe_ioctl(struct ifnet *, u_long, caddr_t); -static void ixgbe_init(void *); -static void ixgbe_init_locked(struct adapter *); -static void ixgbe_stop(void *); -#if __FreeBSD_version >= 1100036 -static uint64_t ixgbe_get_counter(struct ifnet *, ift_counter); -#endif -static void ixgbe_add_media_types(struct adapter *); -static void ixgbe_media_status(struct ifnet *, struct ifmediareq *); -static int ixgbe_media_change(struct ifnet *); -static void ixgbe_identify_hardware(struct adapter *); -static int ixgbe_allocate_pci_resources(struct adapter *); -static void ixgbe_get_slot_info(struct adapter *); -static int ixgbe_allocate_msix(struct adapter *); -static int ixgbe_allocate_legacy(struct adapter *); -static int ixgbe_setup_msix(struct adapter *); -static void ixgbe_free_pci_resources(struct adapter *); -static void ixgbe_local_timer(void *); -static int ixgbe_setup_interface(device_t, struct adapter *); -static void ixgbe_config_gpie(struct adapter *); -static void ixgbe_config_dmac(struct adapter *); -static void ixgbe_config_delay_values(struct adapter *); -static void ixgbe_config_link(struct adapter *); -static void ixgbe_check_wol_support(struct adapter *); -static int ixgbe_setup_low_power_mode(struct adapter *); -static void ixgbe_rearm_queues(struct adapter *, u64); - -static void ixgbe_initialize_transmit_units(struct adapter *); -static void ixgbe_initialize_receive_units(struct adapter *); -static void ixgbe_enable_rx_drop(struct adapter *); -static void ixgbe_disable_rx_drop(struct adapter *); -static void ixgbe_initialize_rss_mapping(struct adapter *); - -static void ixgbe_enable_intr(struct adapter *); -static void ixgbe_disable_intr(struct adapter *); -static void ixgbe_update_stats_counters(struct adapter *); -static void ixgbe_set_promisc(struct adapter *); -static void ixgbe_set_multi(struct adapter *); -static void ixgbe_update_link_status(struct adapter *); -static void ixgbe_set_ivar(struct adapter *, u8, u8, s8); -static void ixgbe_configure_ivars(struct adapter *); -static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); - -static void ixgbe_setup_vlan_hw_support(struct adapter *); -static void ixgbe_register_vlan(void *, struct ifnet *, u16); -static void ixgbe_unregister_vlan(void *, struct ifnet *, u16); - -static void ixgbe_add_device_sysctls(struct adapter *); -static void ixgbe_add_hw_stats(struct adapter *); -static int ixgbe_set_flowcntl(struct adapter *, int); -static int ixgbe_set_advertise(struct adapter *, int); - -/* Sysctl handlers */ -static void ixgbe_set_sysctl_value(struct adapter *, const char *, - const char *, int *, int); -static int ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_thermal_test(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS); -#ifdef IXGBE_DEBUG -static int ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS); -#endif -static int ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_eee_enable(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_eee_negotiated(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_eee_rx_lpi_status(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_eee_tx_lpi_status(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_eee_tx_lpi_delay(SYSCTL_HANDLER_ARGS); - -/* Support for pluggable optic modules */ -static bool ixgbe_sfp_probe(struct adapter *); -static void ixgbe_setup_optics(struct adapter *); - -/* Legacy (single vector interrupt handler */ -static void ixgbe_legacy_irq(void *); - -/* The MSI/X Interrupt handlers */ -static void ixgbe_msix_que(void *); -static void ixgbe_msix_link(void *); - -/* Deferred interrupt tasklets */ -static void ixgbe_handle_que(void *, int); -static void ixgbe_handle_link(void *, int); -static void ixgbe_handle_msf(void *, int); -static void ixgbe_handle_mod(void *, int); -static void ixgbe_handle_phy(void *, int); - -#ifdef IXGBE_FDIR -static void ixgbe_reinit_fdir(void *, int); -#endif - -#ifdef PCI_IOV -static void ixgbe_ping_all_vfs(struct adapter *); -static void ixgbe_handle_mbx(void *, int); -static int ixgbe_init_iov(device_t, u16, const nvlist_t *); -static void ixgbe_uninit_iov(device_t); -static int ixgbe_add_vf(device_t, u16, const nvlist_t *); -static void ixgbe_initialize_iov(struct adapter *); -static void ixgbe_recalculate_max_frame(struct adapter *); -static void ixgbe_init_vf(struct adapter *, struct ixgbe_vf *); -#endif /* PCI_IOV */ - - -/********************************************************************* - * FreeBSD Device Interface Entry Points - *********************************************************************/ - -static device_method_t ix_methods[] = { - /* Device interface */ - DEVMETHOD(device_probe, ixgbe_probe), - DEVMETHOD(device_attach, ixgbe_attach), - DEVMETHOD(device_detach, ixgbe_detach), - DEVMETHOD(device_shutdown, ixgbe_shutdown), - DEVMETHOD(device_suspend, ixgbe_suspend), - DEVMETHOD(device_resume, ixgbe_resume), -#ifdef PCI_IOV - DEVMETHOD(pci_iov_init, ixgbe_init_iov), - DEVMETHOD(pci_iov_uninit, ixgbe_uninit_iov), - DEVMETHOD(pci_iov_add_vf, ixgbe_add_vf), -#endif /* PCI_IOV */ - DEVMETHOD_END -}; - -static driver_t ix_driver = { - "ix", ix_methods, sizeof(struct adapter), -}; - -devclass_t ix_devclass; -DRIVER_MODULE(ix, pci, ix_driver, ix_devclass, 0, 0); - -MODULE_DEPEND(ix, pci, 1, 1, 1); -MODULE_DEPEND(ix, ether, 1, 1, 1); -#ifdef DEV_NETMAP -MODULE_DEPEND(ix, netmap, 1, 1, 1); -#endif /* DEV_NETMAP */ - -/* -** TUNEABLE PARAMETERS: -*/ - -static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0, - "IXGBE driver parameters"); - -/* -** AIM: Adaptive Interrupt Moderation -** which means that the interrupt rate -** is varied over time based on the -** traffic for that interrupt vector -*/ -static int ixgbe_enable_aim = TRUE; -SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &ixgbe_enable_aim, 0, - "Enable adaptive interrupt moderation"); - -static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY); -SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, - &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second"); - -/* How many packets rxeof tries to clean at a time */ -static int ixgbe_rx_process_limit = 256; -SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, - &ixgbe_rx_process_limit, 0, - "Maximum number of received packets to process at a time," - "-1 means unlimited"); - -/* How many packets txeof tries to clean at a time */ -static int ixgbe_tx_process_limit = 256; -SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN, - &ixgbe_tx_process_limit, 0, - "Maximum number of sent packets to process at a time," - "-1 means unlimited"); - -/* Flow control setting, default to full */ -static int ixgbe_flow_control = ixgbe_fc_full; -SYSCTL_INT(_hw_ix, OID_AUTO, flow_control, CTLFLAG_RDTUN, - &ixgbe_flow_control, 0, "Default flow control used for all adapters"); - -/* Advertise Speed, default to 0 (auto) */ -static int ixgbe_advertise_speed = 0; -SYSCTL_INT(_hw_ix, OID_AUTO, advertise_speed, CTLFLAG_RDTUN, - &ixgbe_advertise_speed, 0, "Default advertised speed for all adapters"); - -/* -** Smart speed setting, default to on -** this only works as a compile option -** right now as its during attach, set -** this to 'ixgbe_smart_speed_off' to -** disable. -*/ -static int ixgbe_smart_speed = ixgbe_smart_speed_on; - -/* - * MSIX should be the default for best performance, - * but this allows it to be forced off for testing. - */ -static int ixgbe_enable_msix = 1; -SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0, - "Enable MSI-X interrupts"); - -/* - * Number of Queues, can be set to 0, - * it then autoconfigures based on the - * number of cpus with a max of 8. This - * can be overriden manually here. - */ -static int ixgbe_num_queues = 0; -SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0, - "Number of queues to configure, 0 indicates autoconfigure"); - -/* -** Number of TX descriptors per ring, -** setting higher than RX as this seems -** the better performing choice. -*/ -static int ixgbe_txd = PERFORM_TXD; -SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0, - "Number of transmit descriptors per queue"); - -/* Number of RX descriptors per ring */ -static int ixgbe_rxd = PERFORM_RXD; -SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0, - "Number of receive descriptors per queue"); - -/* -** Defining this on will allow the use -** of unsupported SFP+ modules, note that -** doing so you are on your own :) -*/ -static int allow_unsupported_sfp = FALSE; -TUNABLE_INT("hw.ix.unsupported_sfp", &allow_unsupported_sfp); - -/* Keep running tab on them for sanity check */ -static int ixgbe_total_ports; - -#ifdef IXGBE_FDIR -/* -** Flow Director actually 'steals' -** part of the packet buffer as its -** filter pool, this variable controls -** how much it uses: -** 0 = 64K, 1 = 128K, 2 = 256K -*/ -static int fdir_pballoc = 1; -#endif - -#ifdef DEV_NETMAP -/* - * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to - * be a reference on how to implement netmap support in a driver. - * Additional comments are in ixgbe_netmap.h . - * - * contains functions for netmap support - * that extend the standard driver. - */ -#include -#endif /* DEV_NETMAP */ - -static MALLOC_DEFINE(M_IXGBE, "ix", "ix driver allocations"); - -/********************************************************************* - * Device identification routine - * - * ixgbe_probe determines if the driver should be loaded on - * adapter based on PCI vendor/device id of the adapter. - * - * return BUS_PROBE_DEFAULT on success, positive on failure - *********************************************************************/ - -static int -ixgbe_probe(device_t dev) -{ - ixgbe_vendor_info_t *ent; - - u16 pci_vendor_id = 0; - u16 pci_device_id = 0; - u16 pci_subvendor_id = 0; - u16 pci_subdevice_id = 0; - char adapter_name[256]; - - INIT_DEBUGOUT("ixgbe_probe: begin"); - - pci_vendor_id = pci_get_vendor(dev); - if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID) - return (ENXIO); - - pci_device_id = pci_get_device(dev); - pci_subvendor_id = pci_get_subvendor(dev); - pci_subdevice_id = pci_get_subdevice(dev); - - ent = ixgbe_vendor_info_array; - while (ent->vendor_id != 0) { - if ((pci_vendor_id == ent->vendor_id) && - (pci_device_id == ent->device_id) && - - ((pci_subvendor_id == ent->subvendor_id) || - (ent->subvendor_id == 0)) && - - ((pci_subdevice_id == ent->subdevice_id) || - (ent->subdevice_id == 0))) { - sprintf(adapter_name, "%s, Version - %s", - ixgbe_strings[ent->index], - ixgbe_driver_version); - device_set_desc_copy(dev, adapter_name); - ++ixgbe_total_ports; - return (BUS_PROBE_DEFAULT); - } - ent++; - } - return (ENXIO); -} - -/********************************************************************* - * Device initialization routine - * - * The attach entry point is called when the driver is being loaded. - * This routine identifies the type of hardware, allocates all resources - * and initializes the hardware. - * - * return 0 on success, positive on failure - *********************************************************************/ - -static int -ixgbe_attach(device_t dev) -{ - struct adapter *adapter; - struct ixgbe_hw *hw; - int error = 0; - u16 csum; - u32 ctrl_ext; - - INIT_DEBUGOUT("ixgbe_attach: begin"); - - /* Allocate, clear, and link in our adapter structure */ - adapter = device_get_softc(dev); - adapter->dev = dev; - hw = &adapter->hw; - -#ifdef DEV_NETMAP - adapter->init_locked = ixgbe_init_locked; - adapter->stop_locked = ixgbe_stop; -#endif - - /* Core Lock Init*/ - IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); - - /* Set up the timer callout */ - callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); - - /* Determine hardware revision */ - ixgbe_identify_hardware(adapter); - - /* Do base PCI setup - map BAR0 */ - if (ixgbe_allocate_pci_resources(adapter)) { - device_printf(dev, "Allocation of PCI resources failed\n"); - error = ENXIO; - goto err_out; - } - - /* Sysctls for limiting the amount of work done in the taskqueues */ - ixgbe_set_sysctl_value(adapter, "rx_processing_limit", - "max number of rx packets to process", - &adapter->rx_process_limit, ixgbe_rx_process_limit); - - ixgbe_set_sysctl_value(adapter, "tx_processing_limit", - "max number of tx packets to process", - &adapter->tx_process_limit, ixgbe_tx_process_limit); - - /* Do descriptor calc and sanity checks */ - if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 || - ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) { - device_printf(dev, "TXD config issue, using default!\n"); - adapter->num_tx_desc = DEFAULT_TXD; - } else - adapter->num_tx_desc = ixgbe_txd; - - /* - ** With many RX rings it is easy to exceed the - ** system mbuf allocation. Tuning nmbclusters - ** can alleviate this. - */ - if (nmbclusters > 0) { - int s; - s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports; - if (s > nmbclusters) { - device_printf(dev, "RX Descriptors exceed " - "system mbuf max, using default instead!\n"); - ixgbe_rxd = DEFAULT_RXD; - } - } - - if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 || - ixgbe_rxd < MIN_RXD || ixgbe_rxd > MAX_RXD) { - device_printf(dev, "RXD config issue, using default!\n"); - adapter->num_rx_desc = DEFAULT_RXD; - } else - adapter->num_rx_desc = ixgbe_rxd; - - /* Allocate our TX/RX Queues */ - if (ixgbe_allocate_queues(adapter)) { - error = ENOMEM; - goto err_out; - } - - /* Allocate multicast array memory. */ - adapter->mta = malloc(sizeof(*adapter->mta) * - MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); - if (adapter->mta == NULL) { - device_printf(dev, "Can not allocate multicast setup array\n"); - error = ENOMEM; - goto err_late; - } - - /* Initialize the shared code */ - hw->allow_unsupported_sfp = allow_unsupported_sfp; - error = ixgbe_init_shared_code(hw); - if (error == IXGBE_ERR_SFP_NOT_PRESENT) { - /* - ** No optics in this port, set up - ** so the timer routine will probe - ** for later insertion. - */ - adapter->sfp_probe = TRUE; - error = 0; - } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) { - device_printf(dev, "Unsupported SFP+ module detected!\n"); - error = EIO; - goto err_late; - } else if (error) { - device_printf(dev, "Unable to initialize the shared code\n"); - error = EIO; - goto err_late; - } - - /* Make sure we have a good EEPROM before we read from it */ - if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) { - device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); - error = EIO; - goto err_late; - } - - error = ixgbe_init_hw(hw); - switch (error) { - case IXGBE_ERR_EEPROM_VERSION: - device_printf(dev, "This device is a pre-production adapter/" - "LOM. Please be aware there may be issues associated " - "with your hardware.\nIf you are experiencing problems " - "please contact your Intel or hardware representative " - "who provided you with this hardware.\n"); - break; - case IXGBE_ERR_SFP_NOT_SUPPORTED: - device_printf(dev, "Unsupported SFP+ Module\n"); - error = EIO; - goto err_late; - case IXGBE_ERR_SFP_NOT_PRESENT: - device_printf(dev, "No SFP+ Module found\n"); - /* falls thru */ - default: - break; - } - - /* hw.ix defaults init */ - ixgbe_set_advertise(adapter, ixgbe_advertise_speed); - ixgbe_set_flowcntl(adapter, ixgbe_flow_control); - adapter->enable_aim = ixgbe_enable_aim; - - if ((adapter->msix > 1) && (ixgbe_enable_msix)) - error = ixgbe_allocate_msix(adapter); - else - error = ixgbe_allocate_legacy(adapter); - if (error) - goto err_late; - - /* Enable the optics for 82599 SFP+ fiber */ - ixgbe_enable_tx_laser(hw); - - /* Enable power to the phy. */ - ixgbe_set_phy_power(hw, TRUE); - - /* Setup OS specific network interface */ - if (ixgbe_setup_interface(dev, adapter) != 0) - goto err_late; - - /* Initialize statistics */ - ixgbe_update_stats_counters(adapter); - - /* Register for VLAN events */ - adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, - ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); - adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, - ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); - - /* Check PCIE slot type/speed/width */ - ixgbe_get_slot_info(adapter); - - /* Set an initial default flow control & dmac value */ - adapter->fc = ixgbe_fc_full; - adapter->dmac = 0; - adapter->eee_enabled = 0; - -#ifdef PCI_IOV - if ((hw->mac.type != ixgbe_mac_82598EB) && (adapter->msix > 1)) { - nvlist_t *pf_schema, *vf_schema; - - hw->mbx.ops.init_params(hw); - pf_schema = pci_iov_schema_alloc_node(); - vf_schema = pci_iov_schema_alloc_node(); - pci_iov_schema_add_unicast_mac(vf_schema, "mac-addr", 0, NULL); - pci_iov_schema_add_bool(vf_schema, "mac-anti-spoof", - IOV_SCHEMA_HASDEFAULT, TRUE); - pci_iov_schema_add_bool(vf_schema, "allow-set-mac", - IOV_SCHEMA_HASDEFAULT, FALSE); - pci_iov_schema_add_bool(vf_schema, "allow-promisc", - IOV_SCHEMA_HASDEFAULT, FALSE); - error = pci_iov_attach(dev, pf_schema, vf_schema); - if (error != 0) { - device_printf(dev, - "Error %d setting up SR-IOV\n", error); - } - } -#endif /* PCI_IOV */ - - /* Check for certain supported features */ - ixgbe_check_wol_support(adapter); - - /* Add sysctls */ - ixgbe_add_device_sysctls(adapter); - ixgbe_add_hw_stats(adapter); - - /* let hardware know driver is loaded */ - ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); - ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD; - IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); - -#ifdef DEV_NETMAP - ixgbe_netmap_attach(adapter); -#endif /* DEV_NETMAP */ - INIT_DEBUGOUT("ixgbe_attach: end"); - return (0); - -err_late: - ixgbe_free_transmit_structures(adapter); - ixgbe_free_receive_structures(adapter); -err_out: - if (adapter->ifp != NULL) - if_free(adapter->ifp); - ixgbe_free_pci_resources(adapter); - free(adapter->mta, M_DEVBUF); - return (error); -} - -/********************************************************************* - * Device removal routine - * - * The detach entry point is called when the driver is being removed. - * This routine stops the adapter and deallocates all the resources - * that were allocated for driver operation. - * - * return 0 on success, positive on failure - *********************************************************************/ - -static int -ixgbe_detach(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - struct ix_queue *que = adapter->queues; - struct tx_ring *txr = adapter->tx_rings; - u32 ctrl_ext; - - INIT_DEBUGOUT("ixgbe_detach: begin"); - - /* Make sure VLANS are not using driver */ - if (adapter->ifp->if_vlantrunk != NULL) { - device_printf(dev,"Vlan in use, detach first\n"); - return (EBUSY); - } - -#ifdef PCI_IOV - if (pci_iov_detach(dev) != 0) { - device_printf(dev, "SR-IOV in use; detach first.\n"); - return (EBUSY); - } -#endif /* PCI_IOV */ - - ether_ifdetach(adapter->ifp); - /* Stop the adapter */ - IXGBE_CORE_LOCK(adapter); - ixgbe_setup_low_power_mode(adapter); - IXGBE_CORE_UNLOCK(adapter); - - for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { - if (que->tq) { -#ifndef IXGBE_LEGACY_TX - taskqueue_drain(que->tq, &txr->txq_task); -#endif - taskqueue_drain(que->tq, &que->que_task); - taskqueue_free(que->tq); - } - } - - /* Drain the Link queue */ - if (adapter->tq) { - taskqueue_drain(adapter->tq, &adapter->link_task); - taskqueue_drain(adapter->tq, &adapter->mod_task); - taskqueue_drain(adapter->tq, &adapter->msf_task); -#ifdef PCI_IOV - taskqueue_drain(adapter->tq, &adapter->mbx_task); -#endif - taskqueue_drain(adapter->tq, &adapter->phy_task); -#ifdef IXGBE_FDIR - taskqueue_drain(adapter->tq, &adapter->fdir_task); -#endif - taskqueue_free(adapter->tq); - } - - /* let hardware know driver is unloading */ - ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); - ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; - IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext); - - /* Unregister VLAN events */ - if (adapter->vlan_attach != NULL) - EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); - if (adapter->vlan_detach != NULL) - EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); - - callout_drain(&adapter->timer); -#ifdef DEV_NETMAP - netmap_detach(adapter->ifp); -#endif /* DEV_NETMAP */ - ixgbe_free_pci_resources(adapter); - bus_generic_detach(dev); - if_free(adapter->ifp); - - ixgbe_free_transmit_structures(adapter); - ixgbe_free_receive_structures(adapter); - free(adapter->mta, M_DEVBUF); - - IXGBE_CORE_LOCK_DESTROY(adapter); - return (0); -} - -/********************************************************************* - * - * Shutdown entry point - * - **********************************************************************/ - -static int -ixgbe_shutdown(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - int error = 0; - - INIT_DEBUGOUT("ixgbe_shutdown: begin"); - - IXGBE_CORE_LOCK(adapter); - error = ixgbe_setup_low_power_mode(adapter); - IXGBE_CORE_UNLOCK(adapter); - - return (error); -} - -/** - * Methods for going from: - * D0 -> D3: ixgbe_suspend - * D3 -> D0: ixgbe_resume - */ -static int -ixgbe_suspend(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - int error = 0; - - INIT_DEBUGOUT("ixgbe_suspend: begin"); - - IXGBE_CORE_LOCK(adapter); - - error = ixgbe_setup_low_power_mode(adapter); - - IXGBE_CORE_UNLOCK(adapter); - - return (error); -} - -static int -ixgbe_resume(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - struct ifnet *ifp = adapter->ifp; - struct ixgbe_hw *hw = &adapter->hw; - u32 wus; - - INIT_DEBUGOUT("ixgbe_resume: begin"); - - IXGBE_CORE_LOCK(adapter); - - /* Read & clear WUS register */ - wus = IXGBE_READ_REG(hw, IXGBE_WUS); - if (wus) - device_printf(dev, "Woken up by (WUS): %#010x\n", - IXGBE_READ_REG(hw, IXGBE_WUS)); - IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff); - /* And clear WUFC until next low-power transition */ - IXGBE_WRITE_REG(hw, IXGBE_WUFC, 0); - - /* - * Required after D3->D0 transition; - * will re-advertise all previous advertised speeds - */ - if (ifp->if_flags & IFF_UP) - ixgbe_init_locked(adapter); - - IXGBE_CORE_UNLOCK(adapter); - - return (0); -} - - -/********************************************************************* - * Ioctl entry point - * - * ixgbe_ioctl is called when the user wants to configure the - * interface. - * - * return 0 on success, positive on failure - **********************************************************************/ - -static int -ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data) -{ - struct adapter *adapter = ifp->if_softc; - struct ifreq *ifr = (struct ifreq *) data; -#if defined(INET) || defined(INET6) - struct ifaddr *ifa = (struct ifaddr *)data; -#endif - int error = 0; - bool avoid_reset = FALSE; - - switch (command) { - - case SIOCSIFADDR: -#ifdef INET - if (ifa->ifa_addr->sa_family == AF_INET) - avoid_reset = TRUE; -#endif -#ifdef INET6 - if (ifa->ifa_addr->sa_family == AF_INET6) - avoid_reset = TRUE; -#endif - /* - ** Calling init results in link renegotiation, - ** so we avoid doing it when possible. - */ - if (avoid_reset) { - ifp->if_flags |= IFF_UP; - if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) - ixgbe_init(adapter); -#ifdef INET - if (!(ifp->if_flags & IFF_NOARP)) - arp_ifinit(ifp, ifa); -#endif - } else - error = ether_ioctl(ifp, command, data); - break; - case SIOCSIFMTU: - IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); - if (ifr->ifr_mtu > IXGBE_MAX_MTU) { - error = EINVAL; - } else { - IXGBE_CORE_LOCK(adapter); - ifp->if_mtu = ifr->ifr_mtu; - adapter->max_frame_size = - ifp->if_mtu + IXGBE_MTU_HDR; - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - ixgbe_init_locked(adapter); -#ifdef PCI_IOV - ixgbe_recalculate_max_frame(adapter); -#endif - IXGBE_CORE_UNLOCK(adapter); - } - break; - case SIOCSIFFLAGS: - IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)"); - IXGBE_CORE_LOCK(adapter); - if (ifp->if_flags & IFF_UP) { - if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { - if ((ifp->if_flags ^ adapter->if_flags) & - (IFF_PROMISC | IFF_ALLMULTI)) { - ixgbe_set_promisc(adapter); - } - } else - ixgbe_init_locked(adapter); - } else - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - ixgbe_stop(adapter); - adapter->if_flags = ifp->if_flags; - IXGBE_CORE_UNLOCK(adapter); - break; - case SIOCADDMULTI: - case SIOCDELMULTI: - IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI"); - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - IXGBE_CORE_LOCK(adapter); - ixgbe_disable_intr(adapter); - ixgbe_set_multi(adapter); - ixgbe_enable_intr(adapter); - IXGBE_CORE_UNLOCK(adapter); - } - break; - case SIOCSIFMEDIA: - case SIOCGIFMEDIA: - IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)"); - error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); - break; - case SIOCSIFCAP: - { - IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)"); - - int mask = ifr->ifr_reqcap ^ ifp->if_capenable; - if (!mask) - break; - - /* HW cannot turn these on/off separately */ - if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { - ifp->if_capenable ^= IFCAP_RXCSUM; - ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; - } - if (mask & IFCAP_TXCSUM) - ifp->if_capenable ^= IFCAP_TXCSUM; - if (mask & IFCAP_TXCSUM_IPV6) - ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; - if (mask & IFCAP_TSO4) - ifp->if_capenable ^= IFCAP_TSO4; - if (mask & IFCAP_TSO6) - ifp->if_capenable ^= IFCAP_TSO6; - if (mask & IFCAP_LRO) - ifp->if_capenable ^= IFCAP_LRO; - if (mask & IFCAP_VLAN_HWTAGGING) - ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; - if (mask & IFCAP_VLAN_HWFILTER) - ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; - if (mask & IFCAP_VLAN_HWTSO) - ifp->if_capenable ^= IFCAP_VLAN_HWTSO; - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - IXGBE_CORE_LOCK(adapter); - ixgbe_init_locked(adapter); - IXGBE_CORE_UNLOCK(adapter); - } - VLAN_CAPABILITIES(ifp); - break; - } -#if __FreeBSD_version >= 1100036 - case SIOCGI2C: - { - struct ixgbe_hw *hw = &adapter->hw; - struct ifi2creq i2c; - int i; - IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)"); - error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); - if (error != 0) - break; - if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { - error = EINVAL; - break; - } - if (i2c.len > sizeof(i2c.data)) { - error = EINVAL; - break; - } - - for (i = 0; i < i2c.len; i++) - hw->phy.ops.read_i2c_byte(hw, i2c.offset + i, - i2c.dev_addr, &i2c.data[i]); - error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); - break; - } -#endif - default: - IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command); - error = ether_ioctl(ifp, command, data); - break; - } - - return (error); -} - -/* - * Set the various hardware offload abilities. - * - * This takes the ifnet's if_capenable flags (e.g. set by the user using - * ifconfig) and indicates to the OS via the ifnet's if_hwassist field what - * mbuf offload flags the driver will understand. - */ -static void -ixgbe_set_if_hwassist(struct adapter *adapter) -{ - struct ifnet *ifp = adapter->ifp; - struct ixgbe_hw *hw = &adapter->hw; - - ifp->if_hwassist = 0; -#if __FreeBSD_version >= 1000000 - if (ifp->if_capenable & IFCAP_TSO4) - ifp->if_hwassist |= CSUM_IP_TSO; - if (ifp->if_capenable & IFCAP_TSO6) - ifp->if_hwassist |= CSUM_IP6_TSO; - if (ifp->if_capenable & IFCAP_TXCSUM) { - ifp->if_hwassist |= (CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP); - if (hw->mac.type != ixgbe_mac_82598EB) - ifp->if_hwassist |= CSUM_IP_SCTP; - } - if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) { - ifp->if_hwassist |= (CSUM_IP6_UDP | CSUM_IP6_TCP); - if (hw->mac.type != ixgbe_mac_82598EB) - ifp->if_hwassist |= CSUM_IP6_SCTP; - } -#else - if (ifp->if_capenable & IFCAP_TSO) - ifp->if_hwassist |= CSUM_TSO; - if (ifp->if_capenable & IFCAP_TXCSUM) { - ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); - if (hw->mac.type != ixgbe_mac_82598EB) - ifp->if_hwassist |= CSUM_SCTP; - } -#endif -} - -/********************************************************************* - * Init entry point - * - * This routine is used in two ways. It is used by the stack as - * init entry point in network interface structure. It is also used - * by the driver as a hw/sw initialization routine to get to a - * consistent state. - * - * return 0 on success, positive on failure - **********************************************************************/ -#define IXGBE_MHADD_MFS_SHIFT 16 - -static void -ixgbe_init_locked(struct adapter *adapter) -{ - struct ifnet *ifp = adapter->ifp; - device_t dev = adapter->dev; - struct ixgbe_hw *hw = &adapter->hw; - struct tx_ring *txr; - struct rx_ring *rxr; - u32 txdctl, mhadd; - u32 rxdctl, rxctrl; - int err = 0; -#ifdef PCI_IOV - enum ixgbe_iov_mode mode; -#endif - - mtx_assert(&adapter->core_mtx, MA_OWNED); - INIT_DEBUGOUT("ixgbe_init_locked: begin"); - - hw->adapter_stopped = FALSE; - ixgbe_stop_adapter(hw); - callout_stop(&adapter->timer); - -#ifdef PCI_IOV - mode = ixgbe_get_iov_mode(adapter); - adapter->pool = ixgbe_max_vfs(mode); - /* Queue indices may change with IOV mode */ - for (int i = 0; i < adapter->num_queues; i++) { - adapter->rx_rings[i].me = ixgbe_pf_que_index(mode, i); - adapter->tx_rings[i].me = ixgbe_pf_que_index(mode, i); - } -#endif - /* reprogram the RAR[0] in case user changed it. */ - ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, IXGBE_RAH_AV); - - /* Get the latest mac address, User can use a LAA */ - bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS); - ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, 1); - hw->addr_ctrl.rar_used_count = 1; - - /* Set hardware offload abilities from ifnet flags */ - ixgbe_set_if_hwassist(adapter); - - /* Prepare transmit descriptors and buffers */ - if (ixgbe_setup_transmit_structures(adapter)) { - device_printf(dev, "Could not setup transmit structures\n"); - ixgbe_stop(adapter); - return; - } - - ixgbe_init_hw(hw); -#ifdef PCI_IOV - ixgbe_initialize_iov(adapter); -#endif - ixgbe_initialize_transmit_units(adapter); - - /* Setup Multicast table */ - ixgbe_set_multi(adapter); - - /* Determine the correct mbuf pool, based on frame size */ - if (adapter->max_frame_size <= MCLBYTES) - adapter->rx_mbuf_sz = MCLBYTES; - else - adapter->rx_mbuf_sz = MJUMPAGESIZE; - - /* Prepare receive descriptors and buffers */ - if (ixgbe_setup_receive_structures(adapter)) { - device_printf(dev, "Could not setup receive structures\n"); - ixgbe_stop(adapter); - return; - } - - /* Configure RX settings */ - ixgbe_initialize_receive_units(adapter); - - /* Enable SDP & MSIX interrupts based on adapter */ - ixgbe_config_gpie(adapter); - - /* Set MTU size */ - if (ifp->if_mtu > ETHERMTU) { - /* aka IXGBE_MAXFRS on 82599 and newer */ - mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); - mhadd &= ~IXGBE_MHADD_MFS_MASK; - mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT; - IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); - } - - /* Now enable all the queues */ - for (int i = 0; i < adapter->num_queues; i++) { - txr = &adapter->tx_rings[i]; - txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txr->me)); - txdctl |= IXGBE_TXDCTL_ENABLE; - /* Set WTHRESH to 8, burst writeback */ - txdctl |= (8 << 16); - /* - * When the internal queue falls below PTHRESH (32), - * start prefetching as long as there are at least - * HTHRESH (1) buffers ready. The values are taken - * from the Intel linux driver 3.8.21. - * Prefetching enables tx line rate even with 1 queue. - */ - txdctl |= (32 << 0) | (1 << 8); - IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txr->me), txdctl); - } - - for (int i = 0, j = 0; i < adapter->num_queues; i++) { - rxr = &adapter->rx_rings[i]; - rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); - if (hw->mac.type == ixgbe_mac_82598EB) { - /* - ** PTHRESH = 21 - ** HTHRESH = 4 - ** WTHRESH = 8 - */ - rxdctl &= ~0x3FFFFF; - rxdctl |= 0x080420; - } - rxdctl |= IXGBE_RXDCTL_ENABLE; - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), rxdctl); - for (; j < 10; j++) { - if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)) & - IXGBE_RXDCTL_ENABLE) - break; - else - msec_delay(1); - } - wmb(); -#ifdef DEV_NETMAP - /* - * In netmap mode, we must preserve the buffers made - * available to userspace before the if_init() - * (this is true by default on the TX side, because - * init makes all buffers available to userspace). - * - * netmap_reset() and the device specific routines - * (e.g. ixgbe_setup_receive_rings()) map these - * buffers at the end of the NIC ring, so here we - * must set the RDT (tail) register to make sure - * they are not overwritten. - * - * In this driver the NIC ring starts at RDH = 0, - * RDT points to the last slot available for reception (?), - * so RDT = num_rx_desc - 1 means the whole ring is available. - */ - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(adapter->ifp); - struct netmap_kring *kring = &na->rx_rings[i]; - int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); - - IXGBE_WRITE_REG(hw, IXGBE_RDT(rxr->me), t); - } else -#endif /* DEV_NETMAP */ - IXGBE_WRITE_REG(hw, IXGBE_RDT(rxr->me), adapter->num_rx_desc - 1); - } - - /* Enable Receive engine */ - rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); - if (hw->mac.type == ixgbe_mac_82598EB) - rxctrl |= IXGBE_RXCTRL_DMBYPS; - rxctrl |= IXGBE_RXCTRL_RXEN; - ixgbe_enable_rx_dma(hw, rxctrl); - - callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); - - /* Set up MSI/X routing */ - if (ixgbe_enable_msix) { - ixgbe_configure_ivars(adapter); - /* Set up auto-mask */ - if (hw->mac.type == ixgbe_mac_82598EB) - IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); - else { - IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF); - IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF); - } - } else { /* Simple settings for Legacy/MSI */ - ixgbe_set_ivar(adapter, 0, 0, 0); - ixgbe_set_ivar(adapter, 0, 0, 1); - IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); - } - -#ifdef IXGBE_FDIR - /* Init Flow director */ - if (hw->mac.type != ixgbe_mac_82598EB) { - u32 hdrm = 32 << fdir_pballoc; - - hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL); - ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc); - } -#endif - - /* - * Check on any SFP devices that - * need to be kick-started - */ - if (hw->phy.type == ixgbe_phy_none) { - err = hw->phy.ops.identify(hw); - if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { - device_printf(dev, - "Unsupported SFP+ module type was detected.\n"); - return; - } - } - - /* Set moderation on the Link interrupt */ - IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->vector), IXGBE_LINK_ITR); - - /* Configure Energy Efficient Ethernet for supported devices */ - if (hw->mac.ops.setup_eee) { - err = hw->mac.ops.setup_eee(hw, adapter->eee_enabled); - if (err) - device_printf(dev, "Error setting up EEE: %d\n", err); - } - - /* Enable power to the phy. */ - ixgbe_set_phy_power(hw, TRUE); - - /* Config/Enable Link */ - ixgbe_config_link(adapter); - - /* Hardware Packet Buffer & Flow Control setup */ - ixgbe_config_delay_values(adapter); - - /* Initialize the FC settings */ - ixgbe_start_hw(hw); - - /* Set up VLAN support and filter */ - ixgbe_setup_vlan_hw_support(adapter); - - /* Setup DMA Coalescing */ - ixgbe_config_dmac(adapter); - - /* And now turn on interrupts */ - ixgbe_enable_intr(adapter); - -#ifdef PCI_IOV - /* Enable the use of the MBX by the VF's */ - { - u32 reg = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); - reg |= IXGBE_CTRL_EXT_PFRSTD; - IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, reg); - } -#endif - - /* Now inform the stack we're ready */ - ifp->if_drv_flags |= IFF_DRV_RUNNING; - - return; -} - -static void -ixgbe_init(void *arg) -{ - struct adapter *adapter = arg; - - IXGBE_CORE_LOCK(adapter); - ixgbe_init_locked(adapter); - IXGBE_CORE_UNLOCK(adapter); - return; -} - -static void -ixgbe_config_gpie(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 gpie; - - gpie = IXGBE_READ_REG(hw, IXGBE_GPIE); - - /* Fan Failure Interrupt */ - if (hw->device_id == IXGBE_DEV_ID_82598AT) - gpie |= IXGBE_SDP1_GPIEN; - - /* - * Module detection (SDP2) - * Media ready (SDP1) - */ - if (hw->mac.type == ixgbe_mac_82599EB) { - gpie |= IXGBE_SDP2_GPIEN; - if (hw->device_id != IXGBE_DEV_ID_82599_QSFP_SF_QP) - gpie |= IXGBE_SDP1_GPIEN; - } - - /* - * Thermal Failure Detection (X540) - * Link Detection (X552 SFP+, X552/X557-AT) - */ - if (hw->mac.type == ixgbe_mac_X540 || - hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP || - hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) - gpie |= IXGBE_SDP0_GPIEN_X540; - - if (adapter->msix > 1) { - /* Enable Enhanced MSIX mode */ - gpie |= IXGBE_GPIE_MSIX_MODE; - gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT | - IXGBE_GPIE_OCD; - } - - IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); - return; -} - -/* - * Requires adapter->max_frame_size to be set. - */ -static void -ixgbe_config_delay_values(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 rxpb, frame, size, tmp; - - frame = adapter->max_frame_size; - - /* Calculate High Water */ - switch (hw->mac.type) { - case ixgbe_mac_X540: - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - tmp = IXGBE_DV_X540(frame, frame); - break; - default: - tmp = IXGBE_DV(frame, frame); - break; - } - size = IXGBE_BT2KB(tmp); - rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10; - hw->fc.high_water[0] = rxpb - size; - - /* Now calculate Low Water */ - switch (hw->mac.type) { - case ixgbe_mac_X540: - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - tmp = IXGBE_LOW_DV_X540(frame); - break; - default: - tmp = IXGBE_LOW_DV(frame); - break; - } - hw->fc.low_water[0] = IXGBE_BT2KB(tmp); - - hw->fc.requested_mode = adapter->fc; - hw->fc.pause_time = IXGBE_FC_PAUSE; - hw->fc.send_xon = TRUE; -} - -/* -** -** MSIX Interrupt Handlers and Tasklets -** -*/ - -static inline void -ixgbe_enable_queue(struct adapter *adapter, u32 vector) -{ - struct ixgbe_hw *hw = &adapter->hw; - u64 queue = (u64)(1 << vector); - u32 mask; - - if (hw->mac.type == ixgbe_mac_82598EB) { - mask = (IXGBE_EIMS_RTX_QUEUE & queue); - IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); - } else { - mask = (queue & 0xFFFFFFFF); - if (mask) - IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); - mask = (queue >> 32); - if (mask) - IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); - } -} - -static inline void -ixgbe_disable_queue(struct adapter *adapter, u32 vector) -{ - struct ixgbe_hw *hw = &adapter->hw; - u64 queue = (u64)(1 << vector); - u32 mask; - - if (hw->mac.type == ixgbe_mac_82598EB) { - mask = (IXGBE_EIMS_RTX_QUEUE & queue); - IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask); - } else { - mask = (queue & 0xFFFFFFFF); - if (mask) - IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask); - mask = (queue >> 32); - if (mask) - IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask); - } -} - -static void -ixgbe_handle_que(void *context, int pending) -{ - struct ix_queue *que = context; - struct adapter *adapter = que->adapter; - struct tx_ring *txr = que->txr; - struct ifnet *ifp = adapter->ifp; - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - ixgbe_rxeof(que); - IXGBE_TX_LOCK(txr); - ixgbe_txeof(txr); -#ifndef IXGBE_LEGACY_TX - if (!drbr_empty(ifp, txr->br)) - ixgbe_mq_start_locked(ifp, txr); -#else - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - ixgbe_start_locked(txr, ifp); -#endif - IXGBE_TX_UNLOCK(txr); - } - - /* Reenable this interrupt */ - if (que->res != NULL) - ixgbe_enable_queue(adapter, que->msix); - else - ixgbe_enable_intr(adapter); - return; -} - - -/********************************************************************* - * - * Legacy Interrupt Service routine - * - **********************************************************************/ - -static void -ixgbe_legacy_irq(void *arg) -{ - struct ix_queue *que = arg; - struct adapter *adapter = que->adapter; - struct ixgbe_hw *hw = &adapter->hw; - struct ifnet *ifp = adapter->ifp; - struct tx_ring *txr = adapter->tx_rings; - bool more; - u32 reg_eicr; - - - reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR); - - ++que->irqs; - if (reg_eicr == 0) { - ixgbe_enable_intr(adapter); - return; - } - - more = ixgbe_rxeof(que); - - IXGBE_TX_LOCK(txr); - ixgbe_txeof(txr); -#ifdef IXGBE_LEGACY_TX - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - ixgbe_start_locked(txr, ifp); -#else - if (!drbr_empty(ifp, txr->br)) - ixgbe_mq_start_locked(ifp, txr); +#ifndef KLD_MODULE +#include "opt_iflib.h" #endif - IXGBE_TX_UNLOCK(txr); - - /* Check for fan failure */ - if ((hw->device_id == IXGBE_DEV_ID_82598AT) && - (reg_eicr & IXGBE_EICR_GPI_SDP1)) { - device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! " - "REPLACE IMMEDIATELY!!\n"); - IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); - } - /* Link status change */ - if (reg_eicr & IXGBE_EICR_LSC) - taskqueue_enqueue(adapter->tq, &adapter->link_task); - - /* External PHY interrupt */ - if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T && - (reg_eicr & IXGBE_EICR_GPI_SDP0_X540)) - taskqueue_enqueue(adapter->tq, &adapter->phy_task); - - if (more) - taskqueue_enqueue(que->tq, &que->que_task); - else - ixgbe_enable_intr(adapter); - return; -} - - -/********************************************************************* - * - * MSIX Queue Interrupt Service routine - * - **********************************************************************/ -void -ixgbe_msix_que(void *arg) -{ - struct ix_queue *que = arg; - struct adapter *adapter = que->adapter; - struct ifnet *ifp = adapter->ifp; - struct tx_ring *txr = que->txr; - struct rx_ring *rxr = que->rxr; - bool more; - u32 newitr = 0; - - - /* Protect against spurious interrupts */ - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - return; - - ixgbe_disable_queue(adapter, que->msix); - ++que->irqs; - - more = ixgbe_rxeof(que); - - IXGBE_TX_LOCK(txr); - ixgbe_txeof(txr); -#ifdef IXGBE_LEGACY_TX - if (!IFQ_DRV_IS_EMPTY(ifp->if_snd)) - ixgbe_start_locked(txr, ifp); +#ifdef IFLIB +#include #else - if (!drbr_empty(ifp, txr->br)) - ixgbe_mq_start_locked(ifp, txr); -#endif - IXGBE_TX_UNLOCK(txr); - - /* Do AIM now? */ - - if (adapter->enable_aim == FALSE) - goto no_calc; - /* - ** Do Adaptive Interrupt Moderation: - ** - Write out last calculated setting - ** - Calculate based on average size over - ** the last interval. - */ - if (que->eitr_setting) - IXGBE_WRITE_REG(&adapter->hw, - IXGBE_EITR(que->msix), que->eitr_setting); - - que->eitr_setting = 0; - - /* Idle, do nothing */ - if ((txr->bytes == 0) && (rxr->bytes == 0)) - goto no_calc; - - if ((txr->bytes) && (txr->packets)) - newitr = txr->bytes/txr->packets; - if ((rxr->bytes) && (rxr->packets)) - newitr = max(newitr, - (rxr->bytes / rxr->packets)); - newitr += 24; /* account for hardware frame, crc */ - - /* set an upper boundary */ - newitr = min(newitr, 3000); - - /* Be nice to the mid range */ - if ((newitr > 300) && (newitr < 1200)) - newitr = (newitr / 3); - else - newitr = (newitr / 2); - - if (adapter->hw.mac.type == ixgbe_mac_82598EB) - newitr |= newitr << 16; - else - newitr |= IXGBE_EITR_CNT_WDIS; - - /* save for next interrupt */ - que->eitr_setting = newitr; - - /* Reset state */ - txr->bytes = 0; - txr->packets = 0; - rxr->bytes = 0; - rxr->packets = 0; - -no_calc: - if (more) - taskqueue_enqueue(que->tq, &que->que_task); - else - ixgbe_enable_queue(adapter, que->msix); - return; -} - - -static void -ixgbe_msix_link(void *arg) -{ - struct adapter *adapter = arg; - struct ixgbe_hw *hw = &adapter->hw; - u32 reg_eicr, mod_mask; - - ++adapter->link_irq; - - /* Pause other interrupts */ - IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_OTHER); - - /* First get the cause */ - reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS); - /* Be sure the queue bits are not cleared */ - reg_eicr &= ~IXGBE_EICR_RTX_QUEUE; - /* Clear interrupt with write */ - IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr); - - /* Link status change */ - if (reg_eicr & IXGBE_EICR_LSC) { - IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC); - taskqueue_enqueue(adapter->tq, &adapter->link_task); - } - - if (adapter->hw.mac.type != ixgbe_mac_82598EB) { -#ifdef IXGBE_FDIR - if (reg_eicr & IXGBE_EICR_FLOW_DIR) { - /* This is probably overkill :) */ - if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1)) - return; - /* Disable the interrupt */ - IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR); - taskqueue_enqueue(adapter->tq, &adapter->fdir_task); - } else -#endif - if (reg_eicr & IXGBE_EICR_ECC) { - device_printf(adapter->dev, "CRITICAL: ECC ERROR!! " - "Please Reboot!!\n"); - IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); - } - - /* Check for over temp condition */ - if (reg_eicr & IXGBE_EICR_TS) { - device_printf(adapter->dev, "CRITICAL: OVER TEMP!! " - "PHY IS SHUT DOWN!!\n"); - device_printf(adapter->dev, "System shutdown required!\n"); - IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS); - } -#ifdef PCI_IOV - if (reg_eicr & IXGBE_EICR_MAILBOX) - taskqueue_enqueue(adapter->tq, &adapter->mbx_task); +#include #endif - } - - /* Pluggable optics-related interrupt */ - if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP) - mod_mask = IXGBE_EICR_GPI_SDP0_X540; - else - mod_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw); - - if (ixgbe_is_sfp(hw)) { - if (reg_eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw)) { - IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); - taskqueue_enqueue(adapter->tq, &adapter->msf_task); - } else if (reg_eicr & mod_mask) { - IXGBE_WRITE_REG(hw, IXGBE_EICR, mod_mask); - taskqueue_enqueue(adapter->tq, &adapter->mod_task); - } - } - - /* Check for fan failure */ - if ((hw->device_id == IXGBE_DEV_ID_82598AT) && - (reg_eicr & IXGBE_EICR_GPI_SDP1)) { - IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1); - device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! " - "REPLACE IMMEDIATELY!!\n"); - } - - /* External PHY interrupt */ - if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T && - (reg_eicr & IXGBE_EICR_GPI_SDP0_X540)) { - IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0_X540); - taskqueue_enqueue(adapter->tq, &adapter->phy_task); - } - - /* Re-enable other interrupts */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER); - return; -} - -/********************************************************************* - * - * Media Ioctl callback - * - * This routine is called whenever the user queries the status of - * the interface using ifconfig. - * - **********************************************************************/ -static void -ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) -{ - struct adapter *adapter = ifp->if_softc; - struct ixgbe_hw *hw = &adapter->hw; - int layer; - - INIT_DEBUGOUT("ixgbe_media_status: begin"); - IXGBE_CORE_LOCK(adapter); - ixgbe_update_link_status(adapter); - - ifmr->ifm_status = IFM_AVALID; - ifmr->ifm_active = IFM_ETHER; - - if (!adapter->link_active) { - IXGBE_CORE_UNLOCK(adapter); - return; - } - - ifmr->ifm_status |= IFM_ACTIVE; - layer = adapter->phy_layer; - - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T || - layer & IXGBE_PHYSICAL_LAYER_1000BASE_T || - layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) - switch (adapter->link_speed) { - case IXGBE_LINK_SPEED_10GB_FULL: - ifmr->ifm_active |= IFM_10G_T | IFM_FDX; - break; - case IXGBE_LINK_SPEED_1GB_FULL: - ifmr->ifm_active |= IFM_1000_T | IFM_FDX; - break; - case IXGBE_LINK_SPEED_100_FULL: - ifmr->ifm_active |= IFM_100_TX | IFM_FDX; - break; - } - if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || - layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) - switch (adapter->link_speed) { - case IXGBE_LINK_SPEED_10GB_FULL: - ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX; - break; - } - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) - switch (adapter->link_speed) { - case IXGBE_LINK_SPEED_10GB_FULL: - ifmr->ifm_active |= IFM_10G_LR | IFM_FDX; - break; - case IXGBE_LINK_SPEED_1GB_FULL: - ifmr->ifm_active |= IFM_1000_LX | IFM_FDX; - break; - } - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) - switch (adapter->link_speed) { - case IXGBE_LINK_SPEED_10GB_FULL: - ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX; - break; - case IXGBE_LINK_SPEED_1GB_FULL: - ifmr->ifm_active |= IFM_1000_LX | IFM_FDX; - break; - } - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR || - layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) - switch (adapter->link_speed) { - case IXGBE_LINK_SPEED_10GB_FULL: - ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; - break; - case IXGBE_LINK_SPEED_1GB_FULL: - ifmr->ifm_active |= IFM_1000_SX | IFM_FDX; - break; - } - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) - switch (adapter->link_speed) { - case IXGBE_LINK_SPEED_10GB_FULL: - ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX; - break; - } - /* - ** XXX: These need to use the proper media types once - ** they're added. - */ -#ifndef IFM_ETH_XTYPE - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) - switch (adapter->link_speed) { - case IXGBE_LINK_SPEED_10GB_FULL: - ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; - break; - case IXGBE_LINK_SPEED_2_5GB_FULL: - ifmr->ifm_active |= IFM_2500_SX | IFM_FDX; - break; - case IXGBE_LINK_SPEED_1GB_FULL: - ifmr->ifm_active |= IFM_1000_CX | IFM_FDX; - break; - } - else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4 - || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) - switch (adapter->link_speed) { - case IXGBE_LINK_SPEED_10GB_FULL: - ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX; - break; - case IXGBE_LINK_SPEED_2_5GB_FULL: - ifmr->ifm_active |= IFM_2500_SX | IFM_FDX; - break; - case IXGBE_LINK_SPEED_1GB_FULL: - ifmr->ifm_active |= IFM_1000_CX | IFM_FDX; - break; - } -#else - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) - switch (adapter->link_speed) { - case IXGBE_LINK_SPEED_10GB_FULL: - ifmr->ifm_active |= IFM_10G_KR | IFM_FDX; - break; - case IXGBE_LINK_SPEED_2_5GB_FULL: - ifmr->ifm_active |= IFM_2500_KX | IFM_FDX; - break; - case IXGBE_LINK_SPEED_1GB_FULL: - ifmr->ifm_active |= IFM_1000_KX | IFM_FDX; - break; - } - else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4 - || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) - switch (adapter->link_speed) { - case IXGBE_LINK_SPEED_10GB_FULL: - ifmr->ifm_active |= IFM_10G_KX4 | IFM_FDX; - break; - case IXGBE_LINK_SPEED_2_5GB_FULL: - ifmr->ifm_active |= IFM_2500_KX | IFM_FDX; - break; - case IXGBE_LINK_SPEED_1GB_FULL: - ifmr->ifm_active |= IFM_1000_KX | IFM_FDX; - break; - } -#endif - - /* If nothing is recognized... */ - if (IFM_SUBTYPE(ifmr->ifm_active) == 0) - ifmr->ifm_active |= IFM_UNKNOWN; - -#if __FreeBSD_version >= 900025 - /* Display current flow control setting used on link */ - if (hw->fc.current_mode == ixgbe_fc_rx_pause || - hw->fc.current_mode == ixgbe_fc_full) - ifmr->ifm_active |= IFM_ETH_RXPAUSE; - if (hw->fc.current_mode == ixgbe_fc_tx_pause || - hw->fc.current_mode == ixgbe_fc_full) - ifmr->ifm_active |= IFM_ETH_TXPAUSE; -#endif - - IXGBE_CORE_UNLOCK(adapter); - - return; -} - -/********************************************************************* - * - * Media Ioctl callback - * - * This routine is called when the user changes speed/duplex using - * media/mediopt option with ifconfig. - * - **********************************************************************/ -static int -ixgbe_media_change(struct ifnet * ifp) -{ - struct adapter *adapter = ifp->if_softc; - struct ifmedia *ifm = &adapter->media; - struct ixgbe_hw *hw = &adapter->hw; - ixgbe_link_speed speed = 0; - - INIT_DEBUGOUT("ixgbe_media_change: begin"); - - if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) - return (EINVAL); - - if (hw->phy.media_type == ixgbe_media_type_backplane) - return (ENODEV); - - /* - ** We don't actually need to check against the supported - ** media types of the adapter; ifmedia will take care of - ** that for us. - */ -#ifndef IFM_ETH_XTYPE - switch (IFM_SUBTYPE(ifm->ifm_media)) { - case IFM_AUTO: - case IFM_10G_T: - speed |= IXGBE_LINK_SPEED_100_FULL; - case IFM_10G_LRM: - case IFM_10G_SR: /* KR, too */ - case IFM_10G_LR: - case IFM_10G_CX4: /* KX4 */ - speed |= IXGBE_LINK_SPEED_1GB_FULL; - case IFM_10G_TWINAX: - speed |= IXGBE_LINK_SPEED_10GB_FULL; - break; - case IFM_1000_T: - speed |= IXGBE_LINK_SPEED_100_FULL; - case IFM_1000_LX: - case IFM_1000_SX: - case IFM_1000_CX: /* KX */ - speed |= IXGBE_LINK_SPEED_1GB_FULL; - break; - case IFM_100_TX: - speed |= IXGBE_LINK_SPEED_100_FULL; - break; - default: - goto invalid; - } -#else - switch (IFM_SUBTYPE(ifm->ifm_media)) { - case IFM_AUTO: - case IFM_10G_T: - speed |= IXGBE_LINK_SPEED_100_FULL; - case IFM_10G_LRM: - case IFM_10G_KR: - case IFM_10G_LR: - case IFM_10G_KX4: - speed |= IXGBE_LINK_SPEED_1GB_FULL; - case IFM_10G_TWINAX: - speed |= IXGBE_LINK_SPEED_10GB_FULL; - break; - case IFM_1000_T: - speed |= IXGBE_LINK_SPEED_100_FULL; - case IFM_1000_LX: - case IFM_1000_SX: - case IFM_1000_KX: - speed |= IXGBE_LINK_SPEED_1GB_FULL; - break; - case IFM_100_TX: - speed |= IXGBE_LINK_SPEED_100_FULL; - break; - default: - goto invalid; - } -#endif - - hw->mac.autotry_restart = TRUE; - hw->mac.ops.setup_link(hw, speed, TRUE); - if (IFM_SUBTYPE(ifm->ifm_media) == IFM_AUTO) { - adapter->advertise = 0; - } else { - if ((speed & IXGBE_LINK_SPEED_10GB_FULL) != 0) - adapter->advertise |= 1 << 2; - if ((speed & IXGBE_LINK_SPEED_1GB_FULL) != 0) - adapter->advertise |= 1 << 1; - if ((speed & IXGBE_LINK_SPEED_100_FULL) != 0) - adapter->advertise |= 1 << 0; - } - - return (0); - -invalid: - device_printf(adapter->dev, "Invalid media type!\n"); - return (EINVAL); -} - -static void -ixgbe_set_promisc(struct adapter *adapter) -{ - u_int32_t reg_rctl; - struct ifnet *ifp = adapter->ifp; - int mcnt = 0; - - reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); - reg_rctl &= (~IXGBE_FCTRL_UPE); - if (ifp->if_flags & IFF_ALLMULTI) - mcnt = MAX_NUM_MULTICAST_ADDRESSES; - else { - struct ifmultiaddr *ifma; -#if __FreeBSD_version < 800000 - IF_ADDR_LOCK(ifp); -#else - if_maddr_rlock(ifp); -#endif - TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { - if (ifma->ifma_addr->sa_family != AF_LINK) - continue; - if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) - break; - mcnt++; - } -#if __FreeBSD_version < 800000 - IF_ADDR_UNLOCK(ifp); -#else - if_maddr_runlock(ifp); -#endif - } - if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) - reg_rctl &= (~IXGBE_FCTRL_MPE); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); - - if (ifp->if_flags & IFF_PROMISC) { - reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); - } else if (ifp->if_flags & IFF_ALLMULTI) { - reg_rctl |= IXGBE_FCTRL_MPE; - reg_rctl &= ~IXGBE_FCTRL_UPE; - IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); - } - return; -} - - -/********************************************************************* - * Multicast Update - * - * This routine is called whenever multicast address list is updated. - * - **********************************************************************/ -#define IXGBE_RAR_ENTRIES 16 - -static void -ixgbe_set_multi(struct adapter *adapter) -{ - u32 fctrl; - u8 *update_ptr; - struct ifmultiaddr *ifma; - struct ixgbe_mc_addr *mta; - int mcnt = 0; - struct ifnet *ifp = adapter->ifp; - - IOCTL_DEBUGOUT("ixgbe_set_multi: begin"); - - mta = adapter->mta; - bzero(mta, sizeof(*mta) * MAX_NUM_MULTICAST_ADDRESSES); - -#if __FreeBSD_version < 800000 - IF_ADDR_LOCK(ifp); -#else - if_maddr_rlock(ifp); -#endif - TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { - if (ifma->ifma_addr->sa_family != AF_LINK) - continue; - if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) - break; - bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), - mta[mcnt].addr, IXGBE_ETH_LENGTH_OF_ADDRESS); - mta[mcnt].vmdq = adapter->pool; - mcnt++; - } -#if __FreeBSD_version < 800000 - IF_ADDR_UNLOCK(ifp); -#else - if_maddr_runlock(ifp); -#endif - - fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); - fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); - if (ifp->if_flags & IFF_PROMISC) - fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); - else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES || - ifp->if_flags & IFF_ALLMULTI) { - fctrl |= IXGBE_FCTRL_MPE; - fctrl &= ~IXGBE_FCTRL_UPE; - } else - fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); - - IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl); - - if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) { - update_ptr = (u8 *)mta; - ixgbe_update_mc_addr_list(&adapter->hw, - update_ptr, mcnt, ixgbe_mc_array_itr, TRUE); - } - - return; -} - -/* - * This is an iterator function now needed by the multicast - * shared code. It simply feeds the shared code routine the - * addresses in the array of ixgbe_set_multi() one by one. - */ -static u8 * -ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) -{ - struct ixgbe_mc_addr *mta; - - mta = (struct ixgbe_mc_addr *)*update_ptr; - *vmdq = mta->vmdq; - - *update_ptr = (u8*)(mta + 1); - return (mta->addr); -} - - -/********************************************************************* - * Timer routine - * - * This routine checks for link status,updates statistics, - * and runs the watchdog check. - * - **********************************************************************/ - -static void -ixgbe_local_timer(void *arg) -{ - struct adapter *adapter = arg; - device_t dev = adapter->dev; - struct ix_queue *que = adapter->queues; - u64 queues = 0; - int hung = 0; - - mtx_assert(&adapter->core_mtx, MA_OWNED); - - /* Check for pluggable optics */ - if (adapter->sfp_probe) - if (!ixgbe_sfp_probe(adapter)) - goto out; /* Nothing to do */ - - ixgbe_update_link_status(adapter); - ixgbe_update_stats_counters(adapter); - - /* - ** Check the TX queues status - ** - mark hung queues so we don't schedule on them - ** - watchdog only if all queues show hung - */ - for (int i = 0; i < adapter->num_queues; i++, que++) { - /* Keep track of queues with work for soft irq */ - if (que->txr->busy) - queues |= ((u64)1 << que->me); - /* - ** Each time txeof runs without cleaning, but there - ** are uncleaned descriptors it increments busy. If - ** we get to the MAX we declare it hung. - */ - if (que->busy == IXGBE_QUEUE_HUNG) { - ++hung; - /* Mark the queue as inactive */ - adapter->active_queues &= ~((u64)1 << que->me); - continue; - } else { - /* Check if we've come back from hung */ - if ((adapter->active_queues & ((u64)1 << que->me)) == 0) - adapter->active_queues |= ((u64)1 << que->me); - } - if (que->busy >= IXGBE_MAX_TX_BUSY) { - device_printf(dev,"Warning queue %d " - "appears to be hung!\n", i); - que->txr->busy = IXGBE_QUEUE_HUNG; - ++hung; - } - - } - - /* Only truly watchdog if all queues show hung */ - if (hung == adapter->num_queues) - goto watchdog; - else if (queues != 0) { /* Force an IRQ on queues with work */ - ixgbe_rearm_queues(adapter, queues); - } - -out: - callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); - return; - -watchdog: - device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); - adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - adapter->watchdog_events++; - ixgbe_init_locked(adapter); -} - - -/* -** Note: this routine updates the OS on the link state -** the real check of the hardware only happens with -** a link interrupt. -*/ -static void -ixgbe_update_link_status(struct adapter *adapter) -{ - struct ifnet *ifp = adapter->ifp; - device_t dev = adapter->dev; - - if (adapter->link_up){ - if (adapter->link_active == FALSE) { - if (bootverbose) - device_printf(dev,"Link is up %d Gbps %s \n", - ((adapter->link_speed == 128)? 10:1), - "Full Duplex"); - adapter->link_active = TRUE; - /* Update any Flow Control changes */ - ixgbe_fc_enable(&adapter->hw); - /* Update DMA coalescing config */ - ixgbe_config_dmac(adapter); - if_link_state_change(ifp, LINK_STATE_UP); -#ifdef PCI_IOV - ixgbe_ping_all_vfs(adapter); -#endif - } - } else { /* Link down */ - if (adapter->link_active == TRUE) { - if (bootverbose) - device_printf(dev,"Link is Down\n"); - if_link_state_change(ifp, LINK_STATE_DOWN); - adapter->link_active = FALSE; -#ifdef PCI_IOV - ixgbe_ping_all_vfs(adapter); -#endif - } - } - - return; -} - - -/********************************************************************* - * - * This routine disables all traffic on the adapter by issuing a - * global reset on the MAC and deallocates TX/RX buffers. - * - **********************************************************************/ - -static void -ixgbe_stop(void *arg) -{ - struct ifnet *ifp; - struct adapter *adapter = arg; - struct ixgbe_hw *hw = &adapter->hw; - ifp = adapter->ifp; - - mtx_assert(&adapter->core_mtx, MA_OWNED); - - INIT_DEBUGOUT("ixgbe_stop: begin\n"); - ixgbe_disable_intr(adapter); - callout_stop(&adapter->timer); - - /* Let the stack know...*/ - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - - ixgbe_reset_hw(hw); - hw->adapter_stopped = FALSE; - ixgbe_stop_adapter(hw); - if (hw->mac.type == ixgbe_mac_82599EB) - ixgbe_stop_mac_link_on_d3_82599(hw); - /* Turn off the laser - noop with no optics */ - ixgbe_disable_tx_laser(hw); - - /* Update the stack */ - adapter->link_up = FALSE; - ixgbe_update_link_status(adapter); - - /* reprogram the RAR[0] in case user changed it. */ - ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV); - - return; -} - - -/********************************************************************* - * - * Determine hardware revision. - * - **********************************************************************/ -static void -ixgbe_identify_hardware(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct ixgbe_hw *hw = &adapter->hw; - - /* Save off the information about this board */ - hw->vendor_id = pci_get_vendor(dev); - hw->device_id = pci_get_device(dev); - hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); - hw->subsystem_vendor_id = - pci_read_config(dev, PCIR_SUBVEND_0, 2); - hw->subsystem_device_id = - pci_read_config(dev, PCIR_SUBDEV_0, 2); - - /* - ** Make sure BUSMASTER is set - */ - pci_enable_busmaster(dev); - - /* We need this here to set the num_segs below */ - ixgbe_set_mac_type(hw); - - /* Pick up the 82599 settings */ - if (hw->mac.type != ixgbe_mac_82598EB) { - hw->phy.smart_speed = ixgbe_smart_speed; - adapter->num_segs = IXGBE_82599_SCATTER; - } else - adapter->num_segs = IXGBE_82598_SCATTER; - - return; -} - -/********************************************************************* - * - * Determine optic type - * - **********************************************************************/ -static void -ixgbe_setup_optics(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - int layer; - - layer = adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); - - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) { - adapter->optics = IFM_10G_T; - return; - } - - if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) { - adapter->optics = IFM_1000_T; - return; - } - - if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) { - adapter->optics = IFM_1000_SX; - return; - } - - if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR | - IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) { - adapter->optics = IFM_10G_LR; - return; - } - - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) { - adapter->optics = IFM_10G_SR; - return; - } - - if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) { - adapter->optics = IFM_10G_TWINAX; - return; - } - - if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 | - IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) { - adapter->optics = IFM_10G_CX4; - return; - } - - /* If we get here just set the default */ - adapter->optics = IFM_ETHER | IFM_AUTO; - return; -} - -/********************************************************************* - * - * Setup the Legacy or MSI Interrupt handler - * - **********************************************************************/ -static int -ixgbe_allocate_legacy(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct ix_queue *que = adapter->queues; -#ifndef IXGBE_LEGACY_TX - struct tx_ring *txr = adapter->tx_rings; -#endif - int error, rid = 0; - - /* MSI RID at 1 */ - if (adapter->msix == 1) - rid = 1; - - /* We allocate a single interrupt resource */ - adapter->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); - if (adapter->res == NULL) { - device_printf(dev, "Unable to allocate bus resource: " - "interrupt\n"); - return (ENXIO); - } - - /* - * Try allocating a fast interrupt and the associated deferred - * processing contexts. - */ -#ifndef IXGBE_LEGACY_TX - TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr); -#endif - TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que); - que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT, - taskqueue_thread_enqueue, &que->tq); - taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq", - device_get_nameunit(adapter->dev)); - - /* Tasklets for Link, SFP and Multispeed Fiber */ - TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter); - TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter); - TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter); - TASK_INIT(&adapter->phy_task, 0, ixgbe_handle_phy, adapter); -#ifdef IXGBE_FDIR - TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter); -#endif - adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT, - taskqueue_thread_enqueue, &adapter->tq); - taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq", - device_get_nameunit(adapter->dev)); - - if ((error = bus_setup_intr(dev, adapter->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq, - que, &adapter->tag)) != 0) { - device_printf(dev, "Failed to register fast interrupt " - "handler: %d\n", error); - taskqueue_free(que->tq); - taskqueue_free(adapter->tq); - que->tq = NULL; - adapter->tq = NULL; - return (error); - } - /* For simplicity in the handlers */ - adapter->active_queues = IXGBE_EIMS_ENABLE_MASK; - - return (0); -} - - -/********************************************************************* - * - * Setup MSIX Interrupt resources and handlers - * - **********************************************************************/ -static int -ixgbe_allocate_msix(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct ix_queue *que = adapter->queues; - struct tx_ring *txr = adapter->tx_rings; - int error, rid, vector = 0; - int cpu_id = 0; -#ifdef RSS - cpuset_t cpu_mask; -#endif - -#ifdef RSS - /* - * If we're doing RSS, the number of queues needs to - * match the number of RSS buckets that are configured. - * - * + If there's more queues than RSS buckets, we'll end - * up with queues that get no traffic. - * - * + If there's more RSS buckets than queues, we'll end - * up having multiple RSS buckets map to the same queue, - * so there'll be some contention. - */ - if (adapter->num_queues != rss_getnumbuckets()) { - device_printf(dev, - "%s: number of queues (%d) != number of RSS buckets (%d)" - "; performance will be impacted.\n", - __func__, - adapter->num_queues, - rss_getnumbuckets()); - } -#endif - - for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) { - rid = vector + 1; - que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, - RF_SHAREABLE | RF_ACTIVE); - if (que->res == NULL) { - device_printf(dev,"Unable to allocate" - " bus resource: que interrupt [%d]\n", vector); - return (ENXIO); - } - /* Set the handler function */ - error = bus_setup_intr(dev, que->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, - ixgbe_msix_que, que, &que->tag); - if (error) { - que->res = NULL; - device_printf(dev, "Failed to register QUE handler"); - return (error); - } -#if __FreeBSD_version >= 800504 - bus_describe_intr(dev, que->res, que->tag, "q%d", i); -#endif - que->msix = vector; - adapter->active_queues |= (u64)(1 << que->msix); -#ifdef RSS - /* - * The queue ID is used as the RSS layer bucket ID. - * We look up the queue ID -> RSS CPU ID and select - * that. - */ - cpu_id = rss_getcpu(i % rss_getnumbuckets()); -#else - /* - * Bind the msix vector, and thus the - * rings to the corresponding cpu. - * - * This just happens to match the default RSS round-robin - * bucket -> queue -> CPU allocation. - */ - if (adapter->num_queues > 1) - cpu_id = i; -#endif - if (adapter->num_queues > 1) - bus_bind_intr(dev, que->res, cpu_id); -#ifdef IXGBE_DEBUG -#ifdef RSS - device_printf(dev, - "Bound RSS bucket %d to CPU %d\n", - i, cpu_id); -#else - device_printf(dev, - "Bound queue %d to cpu %d\n", - i, cpu_id); -#endif -#endif /* IXGBE_DEBUG */ - - -#ifndef IXGBE_LEGACY_TX - TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr); -#endif - TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que); - que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT, - taskqueue_thread_enqueue, &que->tq); -#ifdef RSS - CPU_SETOF(cpu_id, &cpu_mask); - taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, - &cpu_mask, - "%s (bucket %d)", - device_get_nameunit(adapter->dev), - cpu_id); -#else - taskqueue_start_threads(&que->tq, 1, PI_NET, "%s:q%d", - device_get_nameunit(adapter->dev), i); -#endif - } - - /* and Link */ - rid = vector + 1; - adapter->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); - if (!adapter->res) { - device_printf(dev,"Unable to allocate" - " bus resource: Link interrupt [%d]\n", rid); - return (ENXIO); - } - /* Set the link handler function */ - error = bus_setup_intr(dev, adapter->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, - ixgbe_msix_link, adapter, &adapter->tag); - if (error) { - adapter->res = NULL; - device_printf(dev, "Failed to register LINK handler"); - return (error); - } -#if __FreeBSD_version >= 800504 - bus_describe_intr(dev, adapter->res, adapter->tag, "link"); -#endif - adapter->vector = vector; - /* Tasklets for Link, SFP and Multispeed Fiber */ - TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter); - TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter); - TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter); -#ifdef PCI_IOV - TASK_INIT(&adapter->mbx_task, 0, ixgbe_handle_mbx, adapter); -#endif - TASK_INIT(&adapter->phy_task, 0, ixgbe_handle_phy, adapter); -#ifdef IXGBE_FDIR - TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter); -#endif - adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT, - taskqueue_thread_enqueue, &adapter->tq); - taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq", - device_get_nameunit(adapter->dev)); - - return (0); -} - -/* - * Setup Either MSI/X or MSI - */ -static int -ixgbe_setup_msix(struct adapter *adapter) -{ - device_t dev = adapter->dev; - int rid, want, queues, msgs; - - /* Override by tuneable */ - if (ixgbe_enable_msix == 0) - goto msi; - - /* First try MSI/X */ - msgs = pci_msix_count(dev); - if (msgs == 0) - goto msi; - rid = PCIR_BAR(MSIX_82598_BAR); - adapter->msix_mem = bus_alloc_resource_any(dev, - SYS_RES_MEMORY, &rid, RF_ACTIVE); - if (adapter->msix_mem == NULL) { - rid += 4; /* 82599 maps in higher BAR */ - adapter->msix_mem = bus_alloc_resource_any(dev, - SYS_RES_MEMORY, &rid, RF_ACTIVE); - } - if (adapter->msix_mem == NULL) { - /* May not be enabled */ - device_printf(adapter->dev, - "Unable to map MSIX table \n"); - goto msi; - } - - /* Figure out a reasonable auto config value */ - queues = (mp_ncpus > (msgs - 1)) ? (msgs - 1) : mp_ncpus; - -#ifdef RSS - /* If we're doing RSS, clamp at the number of RSS buckets */ - if (queues > rss_getnumbuckets()) - queues = rss_getnumbuckets(); -#endif - - if (ixgbe_num_queues != 0) - queues = ixgbe_num_queues; - /* Set max queues to 8 when autoconfiguring */ - else if ((ixgbe_num_queues == 0) && (queues > 8)) - queues = 8; - - /* reflect correct sysctl value */ - ixgbe_num_queues = queues; - - /* - ** Want one vector (RX/TX pair) per queue - ** plus an additional for Link. - */ - want = queues + 1; - if (msgs >= want) - msgs = want; - else { - device_printf(adapter->dev, - "MSIX Configuration Problem, " - "%d vectors but %d queues wanted!\n", - msgs, want); - goto msi; - } - if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { - device_printf(adapter->dev, - "Using MSIX interrupts with %d vectors\n", msgs); - adapter->num_queues = queues; - return (msgs); - } - /* - ** If MSIX alloc failed or provided us with - ** less than needed, free and fall through to MSI - */ - pci_release_msi(dev); - -msi: - if (adapter->msix_mem != NULL) { - bus_release_resource(dev, SYS_RES_MEMORY, - rid, adapter->msix_mem); - adapter->msix_mem = NULL; - } - msgs = 1; - if (pci_alloc_msi(dev, &msgs) == 0) { - device_printf(adapter->dev, "Using an MSI interrupt\n"); - return (msgs); - } - device_printf(adapter->dev, "Using a Legacy interrupt\n"); - return (0); -} - - -static int -ixgbe_allocate_pci_resources(struct adapter *adapter) -{ - int rid; - device_t dev = adapter->dev; - - rid = PCIR_BAR(0); - adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, - &rid, RF_ACTIVE); - - if (!(adapter->pci_mem)) { - device_printf(dev, "Unable to allocate bus resource: memory\n"); - return (ENXIO); - } - - /* Save bus_space values for READ/WRITE_REG macros */ - adapter->osdep.mem_bus_space_tag = - rman_get_bustag(adapter->pci_mem); - adapter->osdep.mem_bus_space_handle = - rman_get_bushandle(adapter->pci_mem); - /* Set hw values for shared code */ - adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle; - adapter->hw.back = adapter; - - /* Default to 1 queue if MSI-X setup fails */ - adapter->num_queues = 1; - - /* - ** Now setup MSI or MSI-X, should - ** return us the number of supported - ** vectors. (Will be 1 for MSI) - */ - adapter->msix = ixgbe_setup_msix(adapter); - return (0); -} - -static void -ixgbe_free_pci_resources(struct adapter * adapter) -{ - struct ix_queue *que = adapter->queues; - device_t dev = adapter->dev; - int rid, memrid; - - if (adapter->hw.mac.type == ixgbe_mac_82598EB) - memrid = PCIR_BAR(MSIX_82598_BAR); - else - memrid = PCIR_BAR(MSIX_82599_BAR); - - /* - ** There is a slight possibility of a failure mode - ** in attach that will result in entering this function - ** before interrupt resources have been initialized, and - ** in that case we do not want to execute the loops below - ** We can detect this reliably by the state of the adapter - ** res pointer. - */ - if (adapter->res == NULL) - goto mem; - - /* - ** Release all msix queue resources: - */ - for (int i = 0; i < adapter->num_queues; i++, que++) { - rid = que->msix + 1; - if (que->tag != NULL) { - bus_teardown_intr(dev, que->res, que->tag); - que->tag = NULL; - } - if (que->res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); - } - - - /* Clean the Legacy or Link interrupt last */ - if (adapter->vector) /* we are doing MSIX */ - rid = adapter->vector + 1; - else - (adapter->msix != 0) ? (rid = 1):(rid = 0); - - if (adapter->tag != NULL) { - bus_teardown_intr(dev, adapter->res, adapter->tag); - adapter->tag = NULL; - } - if (adapter->res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); - -mem: - if (adapter->msix) - pci_release_msi(dev); - - if (adapter->msix_mem != NULL) - bus_release_resource(dev, SYS_RES_MEMORY, - memrid, adapter->msix_mem); - - if (adapter->pci_mem != NULL) - bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(0), adapter->pci_mem); - - return; -} - -/********************************************************************* - * - * Setup networking device structure and register an interface. - * - **********************************************************************/ -static int -ixgbe_setup_interface(device_t dev, struct adapter *adapter) -{ - struct ifnet *ifp; - - INIT_DEBUGOUT("ixgbe_setup_interface: begin"); - - ifp = adapter->ifp = if_alloc(IFT_ETHER); - if (ifp == NULL) { - device_printf(dev, "can not allocate ifnet structure\n"); - return (-1); - } - if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - ifp->if_baudrate = IF_Gbps(10); - ifp->if_init = ixgbe_init; - ifp->if_softc = adapter; - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_ioctl = ixgbe_ioctl; -#if __FreeBSD_version >= 1100036 - if_setgetcounterfn(ifp, ixgbe_get_counter); -#endif -#if __FreeBSD_version >= 1100045 - /* TSO parameters */ - ifp->if_hw_tsomax = 65518; - ifp->if_hw_tsomaxsegcount = IXGBE_82599_SCATTER; - ifp->if_hw_tsomaxsegsize = 2048; -#endif -#ifndef IXGBE_LEGACY_TX - ifp->if_transmit = ixgbe_mq_start; - ifp->if_qflush = ixgbe_qflush; -#else - ifp->if_start = ixgbe_start; - IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2); - ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2; - IFQ_SET_READY(&ifp->if_snd); -#endif - - ether_ifattach(ifp, adapter->hw.mac.addr); - - adapter->max_frame_size = - ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; - - /* - * Tell the upper layer(s) we support long frames. - */ - ifp->if_hdrlen = sizeof(struct ether_vlan_header); - - /* Set capability flags */ - ifp->if_capabilities |= IFCAP_RXCSUM - | IFCAP_TXCSUM - | IFCAP_RXCSUM_IPV6 - | IFCAP_TXCSUM_IPV6 - | IFCAP_TSO4 - | IFCAP_TSO6 - | IFCAP_LRO - | IFCAP_VLAN_HWTAGGING - | IFCAP_VLAN_HWTSO - | IFCAP_VLAN_HWCSUM - | IFCAP_JUMBO_MTU - | IFCAP_VLAN_MTU - | IFCAP_HWSTATS; - - /* Enable the above capabilities by default */ - ifp->if_capenable = ifp->if_capabilities; - - /* - ** Don't turn this on by default, if vlans are - ** created on another pseudo device (eg. lagg) - ** then vlan events are not passed thru, breaking - ** operation, but with HW FILTER off it works. If - ** using vlans directly on the ixgbe driver you can - ** enable this and get full hardware tag filtering. - */ - ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; - - /* - * Specify the media types supported by this adapter and register - * callbacks to update media and link information - */ - ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change, - ixgbe_media_status); - - adapter->phy_layer = ixgbe_get_supported_physical_layer(&adapter->hw); - ixgbe_add_media_types(adapter); - - /* Set autoselect media by default */ - ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); - - return (0); -} - -static void -ixgbe_add_media_types(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - device_t dev = adapter->dev; - int layer; - - layer = adapter->phy_layer; - - /* Media types with matching FreeBSD media defines */ - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_T, 0, NULL); - if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); - if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); - - if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || - layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); - - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) { - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_LR, 0, NULL); - if (hw->phy.multispeed_fiber) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_LX, 0, NULL); - } - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) { - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); - if (hw->phy.multispeed_fiber) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); - } else if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); - -#ifdef IFM_ETH_XTYPE - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_KR, 0, NULL); - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_KX4, 0, NULL); - if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_KX, 0, NULL); -#else - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) { - device_printf(dev, "Media supported: 10GbaseKR\n"); - device_printf(dev, "10GbaseKR mapped to 10GbaseSR\n"); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); - } - if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) { - device_printf(dev, "Media supported: 10GbaseKX4\n"); - device_printf(dev, "10GbaseKX4 mapped to 10GbaseCX4\n"); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); - } - if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) { - device_printf(dev, "Media supported: 1000baseKX\n"); - device_printf(dev, "1000baseKX mapped to 1000baseCX\n"); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_CX, 0, NULL); - } -#endif - if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) - device_printf(dev, "Media supported: 1000baseBX\n"); - - if (hw->device_id == IXGBE_DEV_ID_82598AT) { - ifmedia_add(&adapter->media, - IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); - ifmedia_add(&adapter->media, - IFM_ETHER | IFM_1000_T, 0, NULL); - } - - ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); -} - -static void -ixgbe_config_link(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 autoneg, err = 0; - bool sfp, negotiate; - - sfp = ixgbe_is_sfp(hw); - - if (sfp) { - taskqueue_enqueue(adapter->tq, &adapter->mod_task); - } else { - if (hw->mac.ops.check_link) - err = ixgbe_check_link(hw, &adapter->link_speed, - &adapter->link_up, FALSE); - if (err) - goto out; - autoneg = hw->phy.autoneg_advertised; - if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) - err = hw->mac.ops.get_link_capabilities(hw, - &autoneg, &negotiate); - if (err) - goto out; - if (hw->mac.ops.setup_link) - err = hw->mac.ops.setup_link(hw, - autoneg, adapter->link_up); - } -out: - return; -} - - -/********************************************************************* - * - * Enable transmit units. - * - **********************************************************************/ -static void -ixgbe_initialize_transmit_units(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; - struct ixgbe_hw *hw = &adapter->hw; - - /* Setup the Base and Length of the Tx Descriptor Ring */ - for (int i = 0; i < adapter->num_queues; i++, txr++) { - u64 tdba = txr->txdma.dma_paddr; - u32 txctrl = 0; - int j = txr->me; - - IXGBE_WRITE_REG(hw, IXGBE_TDBAL(j), - (tdba & 0x00000000ffffffffULL)); - IXGBE_WRITE_REG(hw, IXGBE_TDBAH(j), (tdba >> 32)); - IXGBE_WRITE_REG(hw, IXGBE_TDLEN(j), - adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc)); - - /* Setup the HW Tx Head and Tail descriptor pointers */ - IXGBE_WRITE_REG(hw, IXGBE_TDH(j), 0); - IXGBE_WRITE_REG(hw, IXGBE_TDT(j), 0); - - /* Cache the tail address */ - txr->tail = IXGBE_TDT(j); - - /* Disable Head Writeback */ - /* - * Note: for X550 series devices, these registers are actually - * prefixed with TPH_ isntead of DCA_, but the addresses and - * fields remain the same. - */ - switch (hw->mac.type) { - case ixgbe_mac_82598EB: - txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(j)); - break; - default: - txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(j)); - break; - } - txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; - switch (hw->mac.type) { - case ixgbe_mac_82598EB: - IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(j), txctrl); - break; - default: - IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(j), txctrl); - break; - } - - } - - if (hw->mac.type != ixgbe_mac_82598EB) { - u32 dmatxctl, rttdcs; -#ifdef PCI_IOV - enum ixgbe_iov_mode mode = ixgbe_get_iov_mode(adapter); -#endif - dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL); - dmatxctl |= IXGBE_DMATXCTL_TE; - IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl); - /* Disable arbiter to set MTQC */ - rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS); - rttdcs |= IXGBE_RTTDCS_ARBDIS; - IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); -#ifdef PCI_IOV - IXGBE_WRITE_REG(hw, IXGBE_MTQC, ixgbe_get_mtqc(mode)); -#else - IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB); -#endif - rttdcs &= ~IXGBE_RTTDCS_ARBDIS; - IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); - } - - return; -} - -static void -ixgbe_initialize_rss_mapping(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 reta = 0, mrqc, rss_key[10]; - int queue_id, table_size, index_mult; -#ifdef RSS - u32 rss_hash_config; -#endif -#ifdef PCI_IOV - enum ixgbe_iov_mode mode; -#endif - -#ifdef RSS - /* Fetch the configured RSS key */ - rss_getkey((uint8_t *) &rss_key); -#else - /* set up random bits */ - arc4rand(&rss_key, sizeof(rss_key), 0); -#endif - - /* Set multiplier for RETA setup and table size based on MAC */ - index_mult = 0x1; - table_size = 128; - switch (adapter->hw.mac.type) { - case ixgbe_mac_82598EB: - index_mult = 0x11; - break; - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - table_size = 512; - break; - default: - break; - } - - /* Set up the redirection table */ - for (int i = 0, j = 0; i < table_size; i++, j++) { - if (j == adapter->num_queues) j = 0; -#ifdef RSS - /* - * Fetch the RSS bucket id for the given indirection entry. - * Cap it at the number of configured buckets (which is - * num_queues.) - */ - queue_id = rss_get_indirection_to_bucket(i); - queue_id = queue_id % adapter->num_queues; -#else - queue_id = (j * index_mult); -#endif - /* - * The low 8 bits are for hash value (n+0); - * The next 8 bits are for hash value (n+1), etc. - */ - reta = reta >> 8; - reta = reta | ( ((uint32_t) queue_id) << 24); - if ((i & 3) == 3) { - if (i < 128) - IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); - else - IXGBE_WRITE_REG(hw, IXGBE_ERETA((i >> 2) - 32), reta); - reta = 0; - } - } - - /* Now fill our hash function seeds */ - for (int i = 0; i < 10; i++) - IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]); - - /* Perform hash on these packet types */ -#ifdef RSS - mrqc = IXGBE_MRQC_RSSEN; - rss_hash_config = rss_gethashconfig(); - if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4; - if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP; - if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6; - if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP; - if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX; - if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP; - if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP; - if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4_EX) - device_printf(adapter->dev, - "%s: RSS_HASHTYPE_RSS_UDP_IPV4_EX defined, " - "but not supported\n", __func__); - if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP; - if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX) - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; -#else - /* - * Disable UDP - IP fragments aren't currently being handled - * and so we end up with a mix of 2-tuple and 4-tuple - * traffic. - */ - mrqc = IXGBE_MRQC_RSSEN - | IXGBE_MRQC_RSS_FIELD_IPV4 - | IXGBE_MRQC_RSS_FIELD_IPV4_TCP - | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP - | IXGBE_MRQC_RSS_FIELD_IPV6_EX - | IXGBE_MRQC_RSS_FIELD_IPV6 - | IXGBE_MRQC_RSS_FIELD_IPV6_TCP - ; -#endif /* RSS */ -#ifdef PCI_IOV - mode = ixgbe_get_iov_mode(adapter); - mrqc |= ixgbe_get_mrqc(mode); -#endif - IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); -} - - -/********************************************************************* - * - * Setup receive registers and features. - * - **********************************************************************/ -#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 - -#define BSIZEPKT_ROUNDUP ((1<rx_rings; - struct ixgbe_hw *hw = &adapter->hw; - struct ifnet *ifp = adapter->ifp; - u32 bufsz, fctrl, srrctl, rxcsum; - u32 hlreg; - - /* - * Make sure receives are disabled while - * setting up the descriptor ring - */ - ixgbe_disable_rx(hw); - - /* Enable broadcasts */ - fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); - fctrl |= IXGBE_FCTRL_BAM; - if (adapter->hw.mac.type == ixgbe_mac_82598EB) { - fctrl |= IXGBE_FCTRL_DPF; - fctrl |= IXGBE_FCTRL_PMCF; - } - IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); - - /* Set for Jumbo Frames? */ - hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); - if (ifp->if_mtu > ETHERMTU) - hlreg |= IXGBE_HLREG0_JUMBOEN; - else - hlreg &= ~IXGBE_HLREG0_JUMBOEN; -#ifdef DEV_NETMAP - /* crcstrip is conditional in netmap (in RDRXCTL too ?) */ - if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) - hlreg &= ~IXGBE_HLREG0_RXCRCSTRP; - else - hlreg |= IXGBE_HLREG0_RXCRCSTRP; -#endif /* DEV_NETMAP */ - IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); - - bufsz = (adapter->rx_mbuf_sz + - BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; - - for (int i = 0; i < adapter->num_queues; i++, rxr++) { - u64 rdba = rxr->rxdma.dma_paddr; - int j = rxr->me; - - /* Setup the Base and Length of the Rx Descriptor Ring */ - IXGBE_WRITE_REG(hw, IXGBE_RDBAL(j), - (rdba & 0x00000000ffffffffULL)); - IXGBE_WRITE_REG(hw, IXGBE_RDBAH(j), (rdba >> 32)); - IXGBE_WRITE_REG(hw, IXGBE_RDLEN(j), - adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc)); - - /* Set up the SRRCTL register */ - srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(j)); - srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; - srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; - srrctl |= bufsz; - srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; - - /* - * Set DROP_EN iff we have no flow control and >1 queue. - * Note that srrctl was cleared shortly before during reset, - * so we do not need to clear the bit, but do it just in case - * this code is moved elsewhere. - */ - if (adapter->num_queues > 1 && - adapter->hw.fc.requested_mode == ixgbe_fc_none) { - srrctl |= IXGBE_SRRCTL_DROP_EN; - } else { - srrctl &= ~IXGBE_SRRCTL_DROP_EN; - } - - IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(j), srrctl); - - /* Setup the HW Rx Head and Tail Descriptor Pointers */ - IXGBE_WRITE_REG(hw, IXGBE_RDH(j), 0); - IXGBE_WRITE_REG(hw, IXGBE_RDT(j), 0); - - /* Set the driver rx tail address */ - rxr->tail = IXGBE_RDT(rxr->me); - } - - if (adapter->hw.mac.type != ixgbe_mac_82598EB) { - u32 psrtype = IXGBE_PSRTYPE_TCPHDR | - IXGBE_PSRTYPE_UDPHDR | - IXGBE_PSRTYPE_IPV4HDR | - IXGBE_PSRTYPE_IPV6HDR; - IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype); - } - - rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); - - ixgbe_initialize_rss_mapping(adapter); - - if (adapter->num_queues > 1) { - /* RSS and RX IPP Checksum are mutually exclusive */ - rxcsum |= IXGBE_RXCSUM_PCSD; - } - - if (ifp->if_capenable & IFCAP_RXCSUM) - rxcsum |= IXGBE_RXCSUM_PCSD; - - /* This is useful for calculating UDP/IP fragment checksums */ - if (!(rxcsum & IXGBE_RXCSUM_PCSD)) - rxcsum |= IXGBE_RXCSUM_IPPCSE; - - IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); - - return; -} - - -/* -** This routine is run via an vlan config EVENT, -** it enables us to use the HW Filter table since -** we can get the vlan id. This just creates the -** entry in the soft version of the VFTA, init will -** repopulate the real table. -*/ -static void -ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) -{ - struct adapter *adapter = ifp->if_softc; - u16 index, bit; - - if (ifp->if_softc != arg) /* Not our event */ - return; - - if ((vtag == 0) || (vtag > 4095)) /* Invalid */ - return; - - IXGBE_CORE_LOCK(adapter); - index = (vtag >> 5) & 0x7F; - bit = vtag & 0x1F; - adapter->shadow_vfta[index] |= (1 << bit); - ++adapter->num_vlans; - ixgbe_setup_vlan_hw_support(adapter); - IXGBE_CORE_UNLOCK(adapter); -} - -/* -** This routine is run via an vlan -** unconfig EVENT, remove our entry -** in the soft vfta. -*/ -static void -ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) -{ - struct adapter *adapter = ifp->if_softc; - u16 index, bit; - - if (ifp->if_softc != arg) - return; - - if ((vtag == 0) || (vtag > 4095)) /* Invalid */ - return; - - IXGBE_CORE_LOCK(adapter); - index = (vtag >> 5) & 0x7F; - bit = vtag & 0x1F; - adapter->shadow_vfta[index] &= ~(1 << bit); - --adapter->num_vlans; - /* Re-init to load the changes */ - ixgbe_setup_vlan_hw_support(adapter); - IXGBE_CORE_UNLOCK(adapter); -} - -static void -ixgbe_setup_vlan_hw_support(struct adapter *adapter) -{ - struct ifnet *ifp = adapter->ifp; - struct ixgbe_hw *hw = &adapter->hw; - struct rx_ring *rxr; - u32 ctrl; - - - /* - ** We get here thru init_locked, meaning - ** a soft reset, this has already cleared - ** the VFTA and other state, so if there - ** have been no vlan's registered do nothing. - */ - if (adapter->num_vlans == 0) - return; - - /* Setup the queues for vlans */ - for (int i = 0; i < adapter->num_queues; i++) { - rxr = &adapter->rx_rings[i]; - /* On 82599 the VLAN enable is per/queue in RXDCTL */ - if (hw->mac.type != ixgbe_mac_82598EB) { - ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); - ctrl |= IXGBE_RXDCTL_VME; - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), ctrl); - } - rxr->vtag_strip = TRUE; - } - - if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) - return; - /* - ** A soft reset zero's out the VFTA, so - ** we need to repopulate it now. - */ - for (int i = 0; i < IXGBE_VFTA_SIZE; i++) - if (adapter->shadow_vfta[i] != 0) - IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), - adapter->shadow_vfta[i]); - - ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - /* Enable the Filter Table if enabled */ - if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { - ctrl &= ~IXGBE_VLNCTRL_CFIEN; - ctrl |= IXGBE_VLNCTRL_VFE; - } - if (hw->mac.type == ixgbe_mac_82598EB) - ctrl |= IXGBE_VLNCTRL_VME; - IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl); -} - -static void -ixgbe_enable_intr(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - struct ix_queue *que = adapter->queues; - u32 mask, fwsm; - - mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); - /* Enable Fan Failure detection */ - if (hw->device_id == IXGBE_DEV_ID_82598AT) - mask |= IXGBE_EIMS_GPI_SDP1; - - switch (adapter->hw.mac.type) { - case ixgbe_mac_82599EB: - mask |= IXGBE_EIMS_ECC; - /* Temperature sensor on some adapters */ - mask |= IXGBE_EIMS_GPI_SDP0; - /* SFP+ (RX_LOS_N & MOD_ABS_N) */ - mask |= IXGBE_EIMS_GPI_SDP1; - mask |= IXGBE_EIMS_GPI_SDP2; -#ifdef IXGBE_FDIR - mask |= IXGBE_EIMS_FLOW_DIR; -#endif -#ifdef PCI_IOV - mask |= IXGBE_EIMS_MAILBOX; -#endif - break; - case ixgbe_mac_X540: - /* Detect if Thermal Sensor is enabled */ - fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM); - if (fwsm & IXGBE_FWSM_TS_ENABLED) - mask |= IXGBE_EIMS_TS; - mask |= IXGBE_EIMS_ECC; -#ifdef IXGBE_FDIR - mask |= IXGBE_EIMS_FLOW_DIR; -#endif - break; - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - /* MAC thermal sensor is automatically enabled */ - mask |= IXGBE_EIMS_TS; - /* Some devices use SDP0 for important information */ - if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP || - hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) - mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw); - mask |= IXGBE_EIMS_ECC; -#ifdef IXGBE_FDIR - mask |= IXGBE_EIMS_FLOW_DIR; -#endif -#ifdef PCI_IOV - mask |= IXGBE_EIMS_MAILBOX; -#endif - /* falls through */ - default: - break; - } - - IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); - - /* With MSI-X we use auto clear */ - if (adapter->msix_mem) { - mask = IXGBE_EIMS_ENABLE_MASK; - /* Don't autoclear Link */ - mask &= ~IXGBE_EIMS_OTHER; - mask &= ~IXGBE_EIMS_LSC; -#ifdef PCI_IOV - mask &= ~IXGBE_EIMS_MAILBOX; -#endif - IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask); - } - - /* - ** Now enable all queues, this is done separately to - ** allow for handling the extended (beyond 32) MSIX - ** vectors that can be used by 82599 - */ - for (int i = 0; i < adapter->num_queues; i++, que++) - ixgbe_enable_queue(adapter, que->msix); - - IXGBE_WRITE_FLUSH(hw); - - return; -} - -static void -ixgbe_disable_intr(struct adapter *adapter) -{ - if (adapter->msix_mem) - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0); - if (adapter->hw.mac.type == ixgbe_mac_82598EB) { - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0); - } else { - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0); - } - IXGBE_WRITE_FLUSH(&adapter->hw); - return; -} - -/* -** Get the width and transaction speed of -** the slot this adapter is plugged into. -*/ -static void -ixgbe_get_slot_info(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct ixgbe_hw *hw = &adapter->hw; - struct ixgbe_mac_info *mac = &hw->mac; - u16 link; - u32 offset; - - /* For most devices simply call the shared code routine */ - if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) { - ixgbe_get_bus_info(hw); - /* These devices don't use PCI-E */ - switch (hw->mac.type) { - case ixgbe_mac_X550EM_x: - return; - default: - goto display; - } - } - - /* - ** For the Quad port adapter we need to parse back - ** up the PCI tree to find the speed of the expansion - ** slot into which this adapter is plugged. A bit more work. - */ - dev = device_get_parent(device_get_parent(dev)); -#ifdef IXGBE_DEBUG - device_printf(dev, "parent pcib = %x,%x,%x\n", - pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); -#endif - dev = device_get_parent(device_get_parent(dev)); -#ifdef IXGBE_DEBUG - device_printf(dev, "slot pcib = %x,%x,%x\n", - pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); -#endif - /* Now get the PCI Express Capabilities offset */ - pci_find_cap(dev, PCIY_EXPRESS, &offset); - /* ...and read the Link Status Register */ - link = pci_read_config(dev, offset + PCIER_LINK_STA, 2); - switch (link & IXGBE_PCI_LINK_WIDTH) { - case IXGBE_PCI_LINK_WIDTH_1: - hw->bus.width = ixgbe_bus_width_pcie_x1; - break; - case IXGBE_PCI_LINK_WIDTH_2: - hw->bus.width = ixgbe_bus_width_pcie_x2; - break; - case IXGBE_PCI_LINK_WIDTH_4: - hw->bus.width = ixgbe_bus_width_pcie_x4; - break; - case IXGBE_PCI_LINK_WIDTH_8: - hw->bus.width = ixgbe_bus_width_pcie_x8; - break; - default: - hw->bus.width = ixgbe_bus_width_unknown; - break; - } - - switch (link & IXGBE_PCI_LINK_SPEED) { - case IXGBE_PCI_LINK_SPEED_2500: - hw->bus.speed = ixgbe_bus_speed_2500; - break; - case IXGBE_PCI_LINK_SPEED_5000: - hw->bus.speed = ixgbe_bus_speed_5000; - break; - case IXGBE_PCI_LINK_SPEED_8000: - hw->bus.speed = ixgbe_bus_speed_8000; - break; - default: - hw->bus.speed = ixgbe_bus_speed_unknown; - break; - } - - mac->ops.set_lan_id(hw); - -display: - device_printf(dev,"PCI Express Bus: Speed %s %s\n", - ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s": - (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s": - (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"), - (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" : - (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" : - (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" : - ("Unknown")); - - if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) && - ((hw->bus.width <= ixgbe_bus_width_pcie_x4) && - (hw->bus.speed == ixgbe_bus_speed_2500))) { - device_printf(dev, "PCI-Express bandwidth available" - " for this card\n is not sufficient for" - " optimal performance.\n"); - device_printf(dev, "For optimal performance a x8 " - "PCIE, or x4 PCIE Gen2 slot is required.\n"); - } - if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) && - ((hw->bus.width <= ixgbe_bus_width_pcie_x8) && - (hw->bus.speed < ixgbe_bus_speed_8000))) { - device_printf(dev, "PCI-Express bandwidth available" - " for this card\n is not sufficient for" - " optimal performance.\n"); - device_printf(dev, "For optimal performance a x8 " - "PCIE Gen3 slot is required.\n"); - } - - return; -} - - -/* -** Setup the correct IVAR register for a particular MSIX interrupt -** (yes this is all very magic and confusing :) -** - entry is the register array entry -** - vector is the MSIX vector for this queue -** - type is RX/TX/MISC -*/ -static void -ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 ivar, index; - - vector |= IXGBE_IVAR_ALLOC_VAL; - - switch (hw->mac.type) { - - case ixgbe_mac_82598EB: - if (type == -1) - entry = IXGBE_IVAR_OTHER_CAUSES_INDEX; - else - entry += (type * 64); - index = (entry >> 2) & 0x1F; - ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index)); - ivar &= ~(0xFF << (8 * (entry & 0x3))); - ivar |= (vector << (8 * (entry & 0x3))); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar); - break; - - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - if (type == -1) { /* MISC IVAR */ - index = (entry & 1) * 8; - ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC); - ivar &= ~(0xFF << index); - ivar |= (vector << index); - IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar); - } else { /* RX/TX IVARS */ - index = (16 * (entry & 1)) + (8 * type); - ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1)); - ivar &= ~(0xFF << index); - ivar |= (vector << index); - IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar); - } - - default: - break; - } -} - -static void -ixgbe_configure_ivars(struct adapter *adapter) -{ - struct ix_queue *que = adapter->queues; - u32 newitr; - - if (ixgbe_max_interrupt_rate > 0) - newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8; - else { - /* - ** Disable DMA coalescing if interrupt moderation is - ** disabled. - */ - adapter->dmac = 0; - newitr = 0; - } - - for (int i = 0; i < adapter->num_queues; i++, que++) { - struct rx_ring *rxr = &adapter->rx_rings[i]; - struct tx_ring *txr = &adapter->tx_rings[i]; - /* First the RX queue entry */ - ixgbe_set_ivar(adapter, rxr->me, que->msix, 0); - /* ... and the TX */ - ixgbe_set_ivar(adapter, txr->me, que->msix, 1); - /* Set an Initial EITR value */ - IXGBE_WRITE_REG(&adapter->hw, - IXGBE_EITR(que->msix), newitr); - } - - /* For the Link interrupt */ - ixgbe_set_ivar(adapter, 1, adapter->vector, -1); -} - -/* -** ixgbe_sfp_probe - called in the local timer to -** determine if a port had optics inserted. -*/ -static bool -ixgbe_sfp_probe(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - device_t dev = adapter->dev; - bool result = FALSE; - - if ((hw->phy.type == ixgbe_phy_nl) && - (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) { - s32 ret = hw->phy.ops.identify_sfp(hw); - if (ret) - goto out; - ret = hw->phy.ops.reset(hw); - if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) { - device_printf(dev, "Unsupported SFP+ module detected!"); - device_printf(dev, "Reload driver with supported module.\n"); - adapter->sfp_probe = FALSE; - goto out; - } else - device_printf(dev, "SFP+ module detected!\n"); - /* We now have supported optics */ - adapter->sfp_probe = FALSE; - /* Set the optics type so system reports correctly */ - ixgbe_setup_optics(adapter); - result = TRUE; - } -out: - return (result); -} - -/* -** Tasklet handler for MSIX Link interrupts -** - do outside interrupt since it might sleep -*/ -static void -ixgbe_handle_link(void *context, int pending) -{ - struct adapter *adapter = context; - struct ixgbe_hw *hw = &adapter->hw; - - ixgbe_check_link(hw, - &adapter->link_speed, &adapter->link_up, 0); - ixgbe_update_link_status(adapter); - - /* Re-enable link interrupts */ - IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_LSC); -} - -/* -** Tasklet for handling SFP module interrupts -*/ -static void -ixgbe_handle_mod(void *context, int pending) -{ - struct adapter *adapter = context; - struct ixgbe_hw *hw = &adapter->hw; - enum ixgbe_phy_type orig_type = hw->phy.type; - device_t dev = adapter->dev; - u32 err; - - IXGBE_CORE_LOCK(adapter); - - /* Check to see if the PHY type changed */ - if (hw->phy.ops.identify) { - hw->phy.type = ixgbe_phy_unknown; - hw->phy.ops.identify(hw); - } - - if (hw->phy.type != orig_type) { - device_printf(dev, "Detected phy_type %d\n", hw->phy.type); - - if (hw->phy.type == ixgbe_phy_none) { - hw->phy.sfp_type = ixgbe_sfp_type_unknown; - goto out; - } - - /* Try to do the initialization that was skipped before */ - if (hw->phy.ops.init) - hw->phy.ops.init(hw); - if (hw->phy.ops.reset) - hw->phy.ops.reset(hw); - } - - err = hw->phy.ops.identify_sfp(hw); - if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { - device_printf(dev, - "Unsupported SFP+ module type was detected.\n"); - goto out; - } - - err = hw->mac.ops.setup_sfp(hw); - if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { - device_printf(dev, - "Setup failure - unsupported SFP+ module type.\n"); - goto out; - } - if (hw->phy.multispeed_fiber) - taskqueue_enqueue(adapter->tq, &adapter->msf_task); -out: - /* Update media type */ - switch (hw->mac.ops.get_media_type(hw)) { - case ixgbe_media_type_fiber: - adapter->optics = IFM_10G_SR; - break; - case ixgbe_media_type_copper: - adapter->optics = IFM_10G_TWINAX; - break; - case ixgbe_media_type_cx4: - adapter->optics = IFM_10G_CX4; - break; - default: - adapter->optics = 0; - break; - } - - IXGBE_CORE_UNLOCK(adapter); - return; -} - - -/* -** Tasklet for handling MSF (multispeed fiber) interrupts -*/ -static void -ixgbe_handle_msf(void *context, int pending) -{ - struct adapter *adapter = context; - struct ixgbe_hw *hw = &adapter->hw; - u32 autoneg; - bool negotiate; - - IXGBE_CORE_LOCK(adapter); - /* get_supported_phy_layer will call hw->phy.ops.identify_sfp() */ - adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); - - autoneg = hw->phy.autoneg_advertised; - if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) - hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); - if (hw->mac.ops.setup_link) - hw->mac.ops.setup_link(hw, autoneg, TRUE); - - /* Adjust media types shown in ifconfig */ - ifmedia_removeall(&adapter->media); - ixgbe_add_media_types(adapter); - IXGBE_CORE_UNLOCK(adapter); - return; -} - -/* -** Tasklet for handling interrupts from an external PHY -*/ -static void -ixgbe_handle_phy(void *context, int pending) -{ - struct adapter *adapter = context; - struct ixgbe_hw *hw = &adapter->hw; - int error; - - error = hw->phy.ops.handle_lasi(hw); - if (error == IXGBE_ERR_OVERTEMP) - device_printf(adapter->dev, - "CRITICAL: EXTERNAL PHY OVER TEMP!! " - " PHY will downshift to lower power state!\n"); - else if (error) - device_printf(adapter->dev, - "Error handling LASI interrupt: %d\n", - error); - return; -} - -#ifdef IXGBE_FDIR -/* -** Tasklet for reinitializing the Flow Director filter table -*/ -static void -ixgbe_reinit_fdir(void *context, int pending) -{ - struct adapter *adapter = context; - struct ifnet *ifp = adapter->ifp; - - if (adapter->fdir_reinit != 1) /* Shouldn't happen */ - return; - ixgbe_reinit_fdir_tables_82599(&adapter->hw); - adapter->fdir_reinit = 0; - /* re-enable flow director interrupts */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR); - /* Restart the interface */ - ifp->if_drv_flags |= IFF_DRV_RUNNING; - return; -} -#endif - -/********************************************************************* - * - * Configure DMA Coalescing - * - **********************************************************************/ -static void -ixgbe_config_dmac(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config; - - if (hw->mac.type < ixgbe_mac_X550 || - !hw->mac.ops.dmac_config) - return; - - if (dcfg->watchdog_timer ^ adapter->dmac || - dcfg->link_speed ^ adapter->link_speed) { - dcfg->watchdog_timer = adapter->dmac; - dcfg->fcoe_en = false; - dcfg->link_speed = adapter->link_speed; - dcfg->num_tcs = 1; - - INIT_DEBUGOUT2("dmac settings: watchdog %d, link speed %d\n", - dcfg->watchdog_timer, dcfg->link_speed); - - hw->mac.ops.dmac_config(hw); - } -} - -/* - * Checks whether the adapter's ports are capable of - * Wake On LAN by reading the adapter's NVM. - * - * Sets each port's hw->wol_enabled value depending - * on the value read here. - */ -static void -ixgbe_check_wol_support(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u16 dev_caps = 0; - - /* Find out WoL support for port */ - adapter->wol_support = hw->wol_enabled = 0; - ixgbe_get_device_caps(hw, &dev_caps); - if ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0_1) || - ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0) && - hw->bus.func == 0)) - adapter->wol_support = hw->wol_enabled = 1; - - /* Save initial wake up filter configuration */ - adapter->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC); - - return; -} - -/* - * Prepare the adapter/port for LPLU and/or WoL - */ -static int -ixgbe_setup_low_power_mode(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - device_t dev = adapter->dev; - s32 error = 0; - - mtx_assert(&adapter->core_mtx, MA_OWNED); - - if (!hw->wol_enabled) - ixgbe_set_phy_power(hw, FALSE); - - /* Limit power management flow to X550EM baseT */ - if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T - && hw->phy.ops.enter_lplu) { - /* Turn off support for APM wakeup. (Using ACPI instead) */ - IXGBE_WRITE_REG(hw, IXGBE_GRC, - IXGBE_READ_REG(hw, IXGBE_GRC) & ~(u32)2); - - /* - * Clear Wake Up Status register to prevent any previous wakeup - * events from waking us up immediately after we suspend. - */ - IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff); - - /* - * Program the Wakeup Filter Control register with user filter - * settings - */ - IXGBE_WRITE_REG(hw, IXGBE_WUFC, adapter->wufc); - - /* Enable wakeups and power management in Wakeup Control */ - IXGBE_WRITE_REG(hw, IXGBE_WUC, - IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN); - - /* X550EM baseT adapters need a special LPLU flow */ - hw->phy.reset_disable = true; - ixgbe_stop(adapter); - error = hw->phy.ops.enter_lplu(hw); - if (error) - device_printf(dev, - "Error entering LPLU: %d\n", error); - hw->phy.reset_disable = false; - } else { - /* Just stop for other adapters */ - ixgbe_stop(adapter); - } - - return error; -} - -/********************************************************************** - * - * Update the board statistics counters. - * - **********************************************************************/ -static void -ixgbe_update_stats_counters(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 missed_rx = 0, bprc, lxon, lxoff, total; - u64 total_missed_rx = 0; - - adapter->stats.pf.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS); - adapter->stats.pf.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC); - adapter->stats.pf.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC); - adapter->stats.pf.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC); - - for (int i = 0; i < 16; i++) { - adapter->stats.pf.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); - adapter->stats.pf.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); - adapter->stats.pf.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); - } - adapter->stats.pf.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC); - adapter->stats.pf.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC); - adapter->stats.pf.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC); - - /* Hardware workaround, gprc counts missed packets */ - adapter->stats.pf.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC); - adapter->stats.pf.gprc -= missed_rx; - - if (hw->mac.type != ixgbe_mac_82598EB) { - adapter->stats.pf.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) + - ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32); - adapter->stats.pf.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) + - ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32); - adapter->stats.pf.tor += IXGBE_READ_REG(hw, IXGBE_TORL) + - ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32); - adapter->stats.pf.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); - adapter->stats.pf.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); - } else { - adapter->stats.pf.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC); - adapter->stats.pf.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); - /* 82598 only has a counter in the high register */ - adapter->stats.pf.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH); - adapter->stats.pf.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH); - adapter->stats.pf.tor += IXGBE_READ_REG(hw, IXGBE_TORH); - } - - /* - * Workaround: mprc hardware is incorrectly counting - * broadcasts, so for now we subtract those. - */ - bprc = IXGBE_READ_REG(hw, IXGBE_BPRC); - adapter->stats.pf.bprc += bprc; - adapter->stats.pf.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC); - if (hw->mac.type == ixgbe_mac_82598EB) - adapter->stats.pf.mprc -= bprc; - - adapter->stats.pf.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64); - adapter->stats.pf.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127); - adapter->stats.pf.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255); - adapter->stats.pf.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511); - adapter->stats.pf.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023); - adapter->stats.pf.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522); - - lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC); - adapter->stats.pf.lxontxc += lxon; - lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); - adapter->stats.pf.lxofftxc += lxoff; - total = lxon + lxoff; - - adapter->stats.pf.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC); - adapter->stats.pf.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC); - adapter->stats.pf.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64); - adapter->stats.pf.gptc -= total; - adapter->stats.pf.mptc -= total; - adapter->stats.pf.ptc64 -= total; - adapter->stats.pf.gotc -= total * ETHER_MIN_LEN; - - adapter->stats.pf.ruc += IXGBE_READ_REG(hw, IXGBE_RUC); - adapter->stats.pf.rfc += IXGBE_READ_REG(hw, IXGBE_RFC); - adapter->stats.pf.roc += IXGBE_READ_REG(hw, IXGBE_ROC); - adapter->stats.pf.rjc += IXGBE_READ_REG(hw, IXGBE_RJC); - adapter->stats.pf.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC); - adapter->stats.pf.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC); - adapter->stats.pf.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC); - adapter->stats.pf.tpr += IXGBE_READ_REG(hw, IXGBE_TPR); - adapter->stats.pf.tpt += IXGBE_READ_REG(hw, IXGBE_TPT); - adapter->stats.pf.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127); - adapter->stats.pf.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255); - adapter->stats.pf.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511); - adapter->stats.pf.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023); - adapter->stats.pf.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522); - adapter->stats.pf.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC); - adapter->stats.pf.xec += IXGBE_READ_REG(hw, IXGBE_XEC); - adapter->stats.pf.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC); - adapter->stats.pf.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST); - /* Only read FCOE on 82599 */ - if (hw->mac.type != ixgbe_mac_82598EB) { - adapter->stats.pf.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC); - adapter->stats.pf.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC); - adapter->stats.pf.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC); - adapter->stats.pf.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC); - adapter->stats.pf.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC); - } - - /* Fill out the OS statistics structure */ - IXGBE_SET_IPACKETS(adapter, adapter->stats.pf.gprc); - IXGBE_SET_OPACKETS(adapter, adapter->stats.pf.gptc); - IXGBE_SET_IBYTES(adapter, adapter->stats.pf.gorc); - IXGBE_SET_OBYTES(adapter, adapter->stats.pf.gotc); - IXGBE_SET_IMCASTS(adapter, adapter->stats.pf.mprc); - IXGBE_SET_OMCASTS(adapter, adapter->stats.pf.mptc); - IXGBE_SET_COLLISIONS(adapter, 0); - IXGBE_SET_IQDROPS(adapter, total_missed_rx); - IXGBE_SET_IERRORS(adapter, adapter->stats.pf.crcerrs - + adapter->stats.pf.rlec); -} - -#if __FreeBSD_version >= 1100036 -static uint64_t -ixgbe_get_counter(struct ifnet *ifp, ift_counter cnt) -{ - struct adapter *adapter; - struct tx_ring *txr; - uint64_t rv; - - adapter = if_getsoftc(ifp); - - switch (cnt) { - case IFCOUNTER_IPACKETS: - return (adapter->ipackets); - case IFCOUNTER_OPACKETS: - return (adapter->opackets); - case IFCOUNTER_IBYTES: - return (adapter->ibytes); - case IFCOUNTER_OBYTES: - return (adapter->obytes); - case IFCOUNTER_IMCASTS: - return (adapter->imcasts); - case IFCOUNTER_OMCASTS: - return (adapter->omcasts); - case IFCOUNTER_COLLISIONS: - return (0); - case IFCOUNTER_IQDROPS: - return (adapter->iqdrops); - case IFCOUNTER_OQDROPS: - rv = 0; - txr = adapter->tx_rings; - for (int i = 0; i < adapter->num_queues; i++, txr++) - rv += txr->br->br_drops; - return (rv); - case IFCOUNTER_IERRORS: - return (adapter->ierrors); - default: - return (if_get_counter_default(ifp, cnt)); - } -} -#endif - -/** ixgbe_sysctl_tdh_handler - Handler function - * Retrieves the TDH value from the hardware - */ -static int -ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS) -{ - int error; - - struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); - if (!txr) return 0; - - unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me)); - error = sysctl_handle_int(oidp, &val, 0, req); - if (error || !req->newptr) - return error; - return 0; -} - -/** ixgbe_sysctl_tdt_handler - Handler function - * Retrieves the TDT value from the hardware - */ -static int -ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS) -{ - int error; - - struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); - if (!txr) return 0; - - unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me)); - error = sysctl_handle_int(oidp, &val, 0, req); - if (error || !req->newptr) - return error; - return 0; -} - -/** ixgbe_sysctl_rdh_handler - Handler function - * Retrieves the RDH value from the hardware - */ -static int -ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS) -{ - int error; - - struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); - if (!rxr) return 0; - - unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me)); - error = sysctl_handle_int(oidp, &val, 0, req); - if (error || !req->newptr) - return error; - return 0; -} - -/** ixgbe_sysctl_rdt_handler - Handler function - * Retrieves the RDT value from the hardware - */ -static int -ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS) -{ - int error; - - struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); - if (!rxr) return 0; - - unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me)); - error = sysctl_handle_int(oidp, &val, 0, req); - if (error || !req->newptr) - return error; - return 0; -} - -static int -ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) -{ - int error; - struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1); - unsigned int reg, usec, rate; - - reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix)); - usec = ((reg & 0x0FF8) >> 3); - if (usec > 0) - rate = 500000 / usec; - else - rate = 0; - error = sysctl_handle_int(oidp, &rate, 0, req); - if (error || !req->newptr) - return error; - reg &= ~0xfff; /* default, no limitation */ - ixgbe_max_interrupt_rate = 0; - if (rate > 0 && rate < 500000) { - if (rate < 1000) - rate = 1000; - ixgbe_max_interrupt_rate = rate; - reg |= ((4000000/rate) & 0xff8 ); - } - IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg); - return 0; -} - -static void -ixgbe_add_device_sysctls(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct ixgbe_hw *hw = &adapter->hw; - struct sysctl_oid_list *child; - struct sysctl_ctx_list *ctx; - - ctx = device_get_sysctl_ctx(dev); - child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); - - /* Sysctls for all devices */ - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "fc", - CTLTYPE_INT | CTLFLAG_RW, adapter, 0, - ixgbe_sysctl_flowcntl, "I", IXGBE_SYSCTL_DESC_SET_FC); - - SYSCTL_ADD_INT(ctx, child, OID_AUTO, "enable_aim", - CTLFLAG_RW, - &ixgbe_enable_aim, 1, "Interrupt Moderation"); - - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "advertise_speed", - CTLTYPE_INT | CTLFLAG_RW, adapter, 0, - ixgbe_sysctl_advertise, "I", IXGBE_SYSCTL_DESC_ADV_SPEED); - - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "thermal_test", - CTLTYPE_INT | CTLFLAG_RW, adapter, 0, - ixgbe_sysctl_thermal_test, "I", "Thermal Test"); - -#ifdef IXGBE_DEBUG - /* testing sysctls (for all devices) */ - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "power_state", - CTLTYPE_INT | CTLFLAG_RW, adapter, 0, - ixgbe_sysctl_power_state, "I", "PCI Power State"); - - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "print_rss_config", - CTLTYPE_STRING | CTLFLAG_RD, adapter, 0, - ixgbe_sysctl_print_rss_config, "A", "Prints RSS Configuration"); -#endif - /* for X550 series devices */ - if (hw->mac.type >= ixgbe_mac_X550) - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "dmac", - CTLTYPE_INT | CTLFLAG_RW, adapter, 0, - ixgbe_sysctl_dmac, "I", "DMA Coalesce"); - - /* for X552 backplane devices */ - if (hw->device_id == IXGBE_DEV_ID_X550EM_X_KR) { - struct sysctl_oid *eee_node; - struct sysctl_oid_list *eee_list; - - eee_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eee", - CTLFLAG_RD, NULL, - "Energy Efficient Ethernet sysctls"); - eee_list = SYSCTL_CHILDREN(eee_node); - - SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "enable", - CTLTYPE_INT | CTLFLAG_RW, adapter, 0, - ixgbe_sysctl_eee_enable, "I", - "Enable or Disable EEE"); - - SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "negotiated", - CTLTYPE_INT | CTLFLAG_RD, adapter, 0, - ixgbe_sysctl_eee_negotiated, "I", - "EEE negotiated on link"); - - SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "tx_lpi_status", - CTLTYPE_INT | CTLFLAG_RD, adapter, 0, - ixgbe_sysctl_eee_tx_lpi_status, "I", - "Whether or not TX link is in LPI state"); - - SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "rx_lpi_status", - CTLTYPE_INT | CTLFLAG_RD, adapter, 0, - ixgbe_sysctl_eee_rx_lpi_status, "I", - "Whether or not RX link is in LPI state"); - - SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "tx_lpi_delay", - CTLTYPE_INT | CTLFLAG_RD, adapter, 0, - ixgbe_sysctl_eee_tx_lpi_delay, "I", - "TX LPI entry delay in microseconds"); - } - - /* for WoL-capable devices */ - if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "wol_enable", - CTLTYPE_INT | CTLFLAG_RW, adapter, 0, - ixgbe_sysctl_wol_enable, "I", - "Enable/Disable Wake on LAN"); - - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "wufc", - CTLTYPE_INT | CTLFLAG_RW, adapter, 0, - ixgbe_sysctl_wufc, "I", - "Enable/Disable Wake Up Filters"); - } - - /* for X552/X557-AT devices */ - if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { - struct sysctl_oid *phy_node; - struct sysctl_oid_list *phy_list; - - phy_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "phy", - CTLFLAG_RD, NULL, - "External PHY sysctls"); - phy_list = SYSCTL_CHILDREN(phy_node); - - SYSCTL_ADD_PROC(ctx, phy_list, OID_AUTO, "temp", - CTLTYPE_INT | CTLFLAG_RD, adapter, 0, - ixgbe_sysctl_phy_temp, "I", - "Current External PHY Temperature (Celsius)"); - - SYSCTL_ADD_PROC(ctx, phy_list, OID_AUTO, "overtemp_occurred", - CTLTYPE_INT | CTLFLAG_RD, adapter, 0, - ixgbe_sysctl_phy_overtemp_occurred, "I", - "External PHY High Temperature Event Occurred"); - } -} - -/* - * Add sysctl variables, one per statistic, to the system. - */ -static void -ixgbe_add_hw_stats(struct adapter *adapter) -{ - device_t dev = adapter->dev; - - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; - - struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); - struct sysctl_oid *tree = device_get_sysctl_tree(dev); - struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); - struct ixgbe_hw_stats *stats = &adapter->stats.pf; - - struct sysctl_oid *stat_node, *queue_node; - struct sysctl_oid_list *stat_list, *queue_list; - -#define QUEUE_NAME_LEN 32 - char namebuf[QUEUE_NAME_LEN]; - - /* Driver Statistics */ - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", - CTLFLAG_RD, &adapter->dropped_pkts, - "Driver dropped packets"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed", - CTLFLAG_RD, &adapter->mbuf_defrag_failed, - "m_defrag() failed"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", - CTLFLAG_RD, &adapter->watchdog_events, - "Watchdog timeouts"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", - CTLFLAG_RD, &adapter->link_irq, - "Link MSIX IRQ Handled"); - - for (int i = 0; i < adapter->num_queues; i++, txr++) { - snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); - queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, - CTLFLAG_RD, NULL, "Queue Name"); - queue_list = SYSCTL_CHILDREN(queue_node); - - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", - CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i], - sizeof(&adapter->queues[i]), - ixgbe_sysctl_interrupt_rate_handler, "IU", - "Interrupt Rate"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", - CTLFLAG_RD, &(adapter->queues[i].irqs), - "irqs on this queue"); - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", - CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr), - ixgbe_sysctl_tdh_handler, "IU", - "Transmit Descriptor Head"); - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", - CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr), - ixgbe_sysctl_tdt_handler, "IU", - "Transmit Descriptor Tail"); - SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx", - CTLFLAG_RD, &txr->tso_tx, - "TSO"); - SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_tx_dma_setup", - CTLFLAG_RD, &txr->no_tx_dma_setup, - "Driver tx dma failure in xmit"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", - CTLFLAG_RD, &txr->no_desc_avail, - "Queue No Descriptor Available"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", - CTLFLAG_RD, &txr->total_packets, - "Queue Packets Transmitted"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "br_drops", - CTLFLAG_RD, &txr->br->br_drops, - "Packets dropped in buf_ring"); - } - - for (int i = 0; i < adapter->num_queues; i++, rxr++) { - snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); - queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, - CTLFLAG_RD, NULL, "Queue Name"); - queue_list = SYSCTL_CHILDREN(queue_node); - - struct lro_ctrl *lro = &rxr->lro; - - snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); - queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, - CTLFLAG_RD, NULL, "Queue Name"); - queue_list = SYSCTL_CHILDREN(queue_node); - - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", - CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr), - ixgbe_sysctl_rdh_handler, "IU", - "Receive Descriptor Head"); - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", - CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr), - ixgbe_sysctl_rdt_handler, "IU", - "Receive Descriptor Tail"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", - CTLFLAG_RD, &rxr->rx_packets, - "Queue Packets Received"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", - CTLFLAG_RD, &rxr->rx_bytes, - "Queue Bytes Received"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies", - CTLFLAG_RD, &rxr->rx_copies, - "Copied RX Frames"); - SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued", - CTLFLAG_RD, &lro->lro_queued, 0, - "LRO Queued"); - SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed", - CTLFLAG_RD, &lro->lro_flushed, 0, - "LRO Flushed"); - } - - /* MAC stats get the own sub node */ - - stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", - CTLFLAG_RD, NULL, "MAC Statistics"); - stat_list = SYSCTL_CHILDREN(stat_node); - - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", - CTLFLAG_RD, &stats->crcerrs, - "CRC Errors"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs", - CTLFLAG_RD, &stats->illerrc, - "Illegal Byte Errors"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs", - CTLFLAG_RD, &stats->errbc, - "Byte Errors"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards", - CTLFLAG_RD, &stats->mspdc, - "MAC Short Packets Discarded"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults", - CTLFLAG_RD, &stats->mlfc, - "MAC Local Faults"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults", - CTLFLAG_RD, &stats->mrfc, - "MAC Remote Faults"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs", - CTLFLAG_RD, &stats->rlec, - "Receive Length Errors"); - - /* Flow Control stats */ - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", - CTLFLAG_RD, &stats->lxontxc, - "Link XON Transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", - CTLFLAG_RD, &stats->lxonrxc, - "Link XON Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", - CTLFLAG_RD, &stats->lxofftxc, - "Link XOFF Transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", - CTLFLAG_RD, &stats->lxoffrxc, - "Link XOFF Received"); - - /* Packet Reception Stats */ - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd", - CTLFLAG_RD, &stats->tor, - "Total Octets Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd", - CTLFLAG_RD, &stats->gorc, - "Good Octets Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd", - CTLFLAG_RD, &stats->tpr, - "Total Packets Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd", - CTLFLAG_RD, &stats->gprc, - "Good Packets Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd", - CTLFLAG_RD, &stats->mprc, - "Multicast Packets Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd", - CTLFLAG_RD, &stats->bprc, - "Broadcast Packets Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", - CTLFLAG_RD, &stats->prc64, - "64 byte frames received "); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", - CTLFLAG_RD, &stats->prc127, - "65-127 byte frames received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", - CTLFLAG_RD, &stats->prc255, - "128-255 byte frames received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", - CTLFLAG_RD, &stats->prc511, - "256-511 byte frames received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", - CTLFLAG_RD, &stats->prc1023, - "512-1023 byte frames received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", - CTLFLAG_RD, &stats->prc1522, - "1023-1522 byte frames received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized", - CTLFLAG_RD, &stats->ruc, - "Receive Undersized"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", - CTLFLAG_RD, &stats->rfc, - "Fragmented Packets Received "); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized", - CTLFLAG_RD, &stats->roc, - "Oversized Packets Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd", - CTLFLAG_RD, &stats->rjc, - "Received Jabber"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd", - CTLFLAG_RD, &stats->mngprc, - "Management Packets Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd", - CTLFLAG_RD, &stats->mngptc, - "Management Packets Dropped"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs", - CTLFLAG_RD, &stats->xec, - "Checksum Errors"); - - /* Packet Transmission Stats */ - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", - CTLFLAG_RD, &stats->gotc, - "Good Octets Transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", - CTLFLAG_RD, &stats->tpt, - "Total Packets Transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", - CTLFLAG_RD, &stats->gptc, - "Good Packets Transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", - CTLFLAG_RD, &stats->bptc, - "Broadcast Packets Transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", - CTLFLAG_RD, &stats->mptc, - "Multicast Packets Transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd", - CTLFLAG_RD, &stats->mngptc, - "Management Packets Transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", - CTLFLAG_RD, &stats->ptc64, - "64 byte frames transmitted "); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", - CTLFLAG_RD, &stats->ptc127, - "65-127 byte frames transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", - CTLFLAG_RD, &stats->ptc255, - "128-255 byte frames transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", - CTLFLAG_RD, &stats->ptc511, - "256-511 byte frames transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", - CTLFLAG_RD, &stats->ptc1023, - "512-1023 byte frames transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", - CTLFLAG_RD, &stats->ptc1522, - "1024-1522 byte frames transmitted"); -} - -static void -ixgbe_set_sysctl_value(struct adapter *adapter, const char *name, - const char *description, int *limit, int value) -{ - *limit = value; - SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), - OID_AUTO, name, CTLFLAG_RW, limit, value, description); -} - -/* -** Set flow control using sysctl: -** Flow control values: -** 0 - off -** 1 - rx pause -** 2 - tx pause -** 3 - full -*/ -static int -ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS) -{ - int error, fc; - struct adapter *adapter; - - adapter = (struct adapter *) arg1; - fc = adapter->fc; - - error = sysctl_handle_int(oidp, &fc, 0, req); - if ((error) || (req->newptr == NULL)) - return (error); - - /* Don't bother if it's not changed */ - if (adapter->fc == fc) - return (0); - - return ixgbe_set_flowcntl(adapter, fc); -} - - -static int -ixgbe_set_flowcntl(struct adapter *adapter, int fc) -{ - - switch (fc) { - case ixgbe_fc_rx_pause: - case ixgbe_fc_tx_pause: - case ixgbe_fc_full: - adapter->hw.fc.requested_mode = adapter->fc; - if (adapter->num_queues > 1) - ixgbe_disable_rx_drop(adapter); - break; - case ixgbe_fc_none: - adapter->hw.fc.requested_mode = ixgbe_fc_none; - if (adapter->num_queues > 1) - ixgbe_enable_rx_drop(adapter); - break; - default: - return (EINVAL); - } - adapter->fc = fc; - /* Don't autoneg if forcing a value */ - adapter->hw.fc.disable_fc_autoneg = TRUE; - ixgbe_fc_enable(&adapter->hw); - return (0); -} - -/* -** Control advertised link speed: -** Flags: -** 0x1 - advertise 100 Mb -** 0x2 - advertise 1G -** 0x4 - advertise 10G -*/ -static int -ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS) -{ - int error, advertise; - struct adapter *adapter; - - adapter = (struct adapter *) arg1; - advertise = adapter->advertise; - - error = sysctl_handle_int(oidp, &advertise, 0, req); - if ((error) || (req->newptr == NULL)) - return (error); - - return ixgbe_set_advertise(adapter, advertise); -} - -static int -ixgbe_set_advertise(struct adapter *adapter, int advertise) -{ - device_t dev; - struct ixgbe_hw *hw; - ixgbe_link_speed speed; - - /* Checks to validate new value */ - if (adapter->advertise == advertise) /* no change */ - return (0); - - hw = &adapter->hw; - dev = adapter->dev; - - /* No speed changes for backplane media */ - if (hw->phy.media_type == ixgbe_media_type_backplane) - return (ENODEV); - - if (!((hw->phy.media_type == ixgbe_media_type_copper) || - (hw->phy.multispeed_fiber))) { - device_printf(dev, - "Advertised speed can only be set on copper or " - "multispeed fiber media types.\n"); - return (EINVAL); - } - - if (advertise < 0x1 || advertise > 0x7) { - device_printf(dev, - "Invalid advertised speed; valid modes are 0x1 through 0x7\n"); - return (EINVAL); - } - - if ((advertise & 0x1) - && (hw->mac.type != ixgbe_mac_X540) - && (hw->mac.type != ixgbe_mac_X550)) { - device_printf(dev, "Set Advertise: 100Mb on X540/X550 only\n"); - return (EINVAL); - } - - /* Set new value and report new advertised mode */ - speed = 0; - if (advertise & 0x1) - speed |= IXGBE_LINK_SPEED_100_FULL; - if (advertise & 0x2) - speed |= IXGBE_LINK_SPEED_1GB_FULL; - if (advertise & 0x4) - speed |= IXGBE_LINK_SPEED_10GB_FULL; - adapter->advertise = advertise; - - hw->mac.autotry_restart = TRUE; - hw->mac.ops.setup_link(hw, speed, TRUE); - - return (0); -} - -/* - * The following two sysctls are for X552/X557-AT devices; - * they deal with the external PHY used in them. - */ -static int -ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - struct ixgbe_hw *hw = &adapter->hw; - u16 reg; - - if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { - device_printf(adapter->dev, - "Device has no supported external thermal sensor.\n"); - return (ENODEV); - } - - if (hw->phy.ops.read_reg(hw, IXGBE_PHY_CURRENT_TEMP, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, - ®)) { - device_printf(adapter->dev, - "Error reading from PHY's current temperature register\n"); - return (EAGAIN); - } - - /* Shift temp for output */ - reg = reg >> 8; - - return (sysctl_handle_int(oidp, NULL, reg, req)); -} - -/* - * Reports whether the current PHY temperature is over - * the overtemp threshold. - * - This is reported directly from the PHY - */ -static int -ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - struct ixgbe_hw *hw = &adapter->hw; - u16 reg; - - if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { - device_printf(adapter->dev, - "Device has no supported external thermal sensor.\n"); - return (ENODEV); - } - - if (hw->phy.ops.read_reg(hw, IXGBE_PHY_OVERTEMP_STATUS, - IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, - ®)) { - device_printf(adapter->dev, - "Error reading from PHY's temperature status register\n"); - return (EAGAIN); - } - - /* Get occurrence bit */ - reg = !!(reg & 0x4000); - return (sysctl_handle_int(oidp, 0, reg, req)); -} - -/* -** Thermal Shutdown Trigger (internal MAC) -** - Set this to 1 to cause an overtemp event to occur -*/ -static int -ixgbe_sysctl_thermal_test(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - struct ixgbe_hw *hw = &adapter->hw; - int error, fire = 0; - - error = sysctl_handle_int(oidp, &fire, 0, req); - if ((error) || (req->newptr == NULL)) - return (error); - - if (fire) { - u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS); - reg |= IXGBE_EICR_TS; - IXGBE_WRITE_REG(hw, IXGBE_EICS, reg); - } - - return (0); -} - -/* -** Manage DMA Coalescing. -** Control values: -** 0/1 - off / on (use default value of 1000) -** -** Legal timer values are: -** 50,100,250,500,1000,2000,5000,10000 -** -** Turning off interrupt moderation will also turn this off. -*/ -static int -ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - struct ifnet *ifp = adapter->ifp; - int error; - u32 newval; - - newval = adapter->dmac; - error = sysctl_handle_int(oidp, &newval, 0, req); - if ((error) || (req->newptr == NULL)) - return (error); - - switch (newval) { - case 0: - /* Disabled */ - adapter->dmac = 0; - break; - case 1: - /* Enable and use default */ - adapter->dmac = 1000; - break; - case 50: - case 100: - case 250: - case 500: - case 1000: - case 2000: - case 5000: - case 10000: - /* Legal values - allow */ - adapter->dmac = newval; - break; - default: - /* Do nothing, illegal value */ - return (EINVAL); - } - - /* Re-initialize hardware if it's already running */ - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - ixgbe_init(adapter); - - return (0); -} - -#ifdef IXGBE_DEBUG -/** - * Sysctl to test power states - * Values: - * 0 - set device to D0 - * 3 - set device to D3 - * (none) - get current device power state - */ -static int -ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - device_t dev = adapter->dev; - int curr_ps, new_ps, error = 0; - - curr_ps = new_ps = pci_get_powerstate(dev); - - error = sysctl_handle_int(oidp, &new_ps, 0, req); - if ((error) || (req->newptr == NULL)) - return (error); - - if (new_ps == curr_ps) - return (0); - - if (new_ps == 3 && curr_ps == 0) - error = DEVICE_SUSPEND(dev); - else if (new_ps == 0 && curr_ps == 3) - error = DEVICE_RESUME(dev); - else - return (EINVAL); - - device_printf(dev, "New state: %d\n", pci_get_powerstate(dev)); - - return (error); -} -#endif -/* - * Sysctl to enable/disable the WoL capability, if supported by the adapter. - * Values: - * 0 - disabled - * 1 - enabled - */ -static int -ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - struct ixgbe_hw *hw = &adapter->hw; - int new_wol_enabled; - int error = 0; - - new_wol_enabled = hw->wol_enabled; - error = sysctl_handle_int(oidp, &new_wol_enabled, 0, req); - if ((error) || (req->newptr == NULL)) - return (error); - new_wol_enabled = !!(new_wol_enabled); - if (new_wol_enabled == hw->wol_enabled) - return (0); - - if (new_wol_enabled > 0 && !adapter->wol_support) - return (ENODEV); - else - hw->wol_enabled = new_wol_enabled; - - return (0); -} - -/* - * Sysctl to enable/disable the Energy Efficient Ethernet capability, - * if supported by the adapter. - * Values: - * 0 - disabled - * 1 - enabled - */ -static int -ixgbe_sysctl_eee_enable(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - struct ixgbe_hw *hw = &adapter->hw; - struct ifnet *ifp = adapter->ifp; - int new_eee_enabled, error = 0; - - new_eee_enabled = adapter->eee_enabled; - error = sysctl_handle_int(oidp, &new_eee_enabled, 0, req); - if ((error) || (req->newptr == NULL)) - return (error); - new_eee_enabled = !!(new_eee_enabled); - if (new_eee_enabled == adapter->eee_enabled) - return (0); - - if (new_eee_enabled > 0 && !hw->mac.ops.setup_eee) - return (ENODEV); - else - adapter->eee_enabled = new_eee_enabled; - - /* Re-initialize hardware if it's already running */ - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - ixgbe_init(adapter); - - return (0); -} - -/* - * Read-only sysctl indicating whether EEE support was negotiated - * on the link. - */ -static int -ixgbe_sysctl_eee_negotiated(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - struct ixgbe_hw *hw = &adapter->hw; - bool status; - - status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & IXGBE_EEE_STAT_NEG); - - return (sysctl_handle_int(oidp, 0, status, req)); -} - -/* - * Read-only sysctl indicating whether RX Link is in LPI state. - */ -static int -ixgbe_sysctl_eee_rx_lpi_status(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - struct ixgbe_hw *hw = &adapter->hw; - bool status; - - status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & - IXGBE_EEE_RX_LPI_STATUS); - - return (sysctl_handle_int(oidp, 0, status, req)); -} - -/* - * Read-only sysctl indicating whether TX Link is in LPI state. - */ -static int -ixgbe_sysctl_eee_tx_lpi_status(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - struct ixgbe_hw *hw = &adapter->hw; - bool status; - - status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & - IXGBE_EEE_TX_LPI_STATUS); - - return (sysctl_handle_int(oidp, 0, status, req)); -} - -/* - * Read-only sysctl indicating TX Link LPI delay - */ -static int -ixgbe_sysctl_eee_tx_lpi_delay(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - struct ixgbe_hw *hw = &adapter->hw; - u32 reg; - - reg = IXGBE_READ_REG(hw, IXGBE_EEE_SU); - - return (sysctl_handle_int(oidp, 0, reg >> 26, req)); -} - -/* - * Sysctl to enable/disable the types of packets that the - * adapter will wake up on upon receipt. - * WUFC - Wake Up Filter Control - * Flags: - * 0x1 - Link Status Change - * 0x2 - Magic Packet - * 0x4 - Direct Exact - * 0x8 - Directed Multicast - * 0x10 - Broadcast - * 0x20 - ARP/IPv4 Request Packet - * 0x40 - Direct IPv4 Packet - * 0x80 - Direct IPv6 Packet - * - * Setting another flag will cause the sysctl to return an - * error. - */ -static int -ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *) arg1; - int error = 0; - u32 new_wufc; - - new_wufc = adapter->wufc; - - error = sysctl_handle_int(oidp, &new_wufc, 0, req); - if ((error) || (req->newptr == NULL)) - return (error); - if (new_wufc == adapter->wufc) - return (0); - - if (new_wufc & 0xffffff00) - return (EINVAL); - else { - new_wufc &= 0xff; - new_wufc |= (0xffffff & adapter->wufc); - adapter->wufc = new_wufc; - } - - return (0); -} - -#ifdef IXGBE_DEBUG -static int -ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS) -{ - struct adapter *adapter = (struct adapter *)arg1; - struct ixgbe_hw *hw = &adapter->hw; - device_t dev = adapter->dev; - int error = 0, reta_size; - struct sbuf *buf; - u32 reg; - - buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); - if (!buf) { - device_printf(dev, "Could not allocate sbuf for output.\n"); - return (ENOMEM); - } - - // TODO: use sbufs to make a string to print out - /* Set multiplier for RETA setup and table size based on MAC */ - switch (adapter->hw.mac.type) { - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - reta_size = 128; - break; - default: - reta_size = 32; - break; - } - - /* Print out the redirection table */ - sbuf_cat(buf, "\n"); - for (int i = 0; i < reta_size; i++) { - if (i < 32) { - reg = IXGBE_READ_REG(hw, IXGBE_RETA(i)); - sbuf_printf(buf, "RETA(%2d): 0x%08x\n", i, reg); - } else { - reg = IXGBE_READ_REG(hw, IXGBE_ERETA(i - 32)); - sbuf_printf(buf, "ERETA(%2d): 0x%08x\n", i - 32, reg); - } - } - - // TODO: print more config - - error = sbuf_finish(buf); - if (error) - device_printf(dev, "Error finishing sbuf: %d\n", error); - - sbuf_delete(buf); - return (0); -} -#endif /* IXGBE_DEBUG */ - -/* -** Enable the hardware to drop packets when the buffer is -** full. This is useful when multiqueue,so that no single -** queue being full stalls the entire RX engine. We only -** enable this when Multiqueue AND when Flow Control is -** disabled. -*/ -static void -ixgbe_enable_rx_drop(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - - for (int i = 0; i < adapter->num_queues; i++) { - struct rx_ring *rxr = &adapter->rx_rings[i]; - u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); - srrctl |= IXGBE_SRRCTL_DROP_EN; - IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); - } -#ifdef PCI_IOV - /* enable drop for each vf */ - for (int i = 0; i < adapter->num_vfs; i++) { - IXGBE_WRITE_REG(hw, IXGBE_QDE, - (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT) | - IXGBE_QDE_ENABLE)); - } -#endif -} - -static void -ixgbe_disable_rx_drop(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - - for (int i = 0; i < adapter->num_queues; i++) { - struct rx_ring *rxr = &adapter->rx_rings[i]; - u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); - srrctl &= ~IXGBE_SRRCTL_DROP_EN; - IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); - } -#ifdef PCI_IOV - /* disable drop for each vf */ - for (int i = 0; i < adapter->num_vfs; i++) { - IXGBE_WRITE_REG(hw, IXGBE_QDE, - (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT))); - } -#endif -} - -static void -ixgbe_rearm_queues(struct adapter *adapter, u64 queues) -{ - u32 mask; - - switch (adapter->hw.mac.type) { - case ixgbe_mac_82598EB: - mask = (IXGBE_EIMS_RTX_QUEUE & queues); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask); - break; - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - mask = (queues & 0xFFFFFFFF); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask); - mask = (queues >> 32); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask); - break; - default: - break; - } -} - -#ifdef PCI_IOV - -/* -** Support functions for SRIOV/VF management -*/ - -static void -ixgbe_ping_all_vfs(struct adapter *adapter) -{ - struct ixgbe_vf *vf; - - for (int i = 0; i < adapter->num_vfs; i++) { - vf = &adapter->vfs[i]; - if (vf->flags & IXGBE_VF_ACTIVE) - ixgbe_send_vf_msg(adapter, vf, IXGBE_PF_CONTROL_MSG); - } -} - - -static void -ixgbe_vf_set_default_vlan(struct adapter *adapter, struct ixgbe_vf *vf, - uint16_t tag) -{ - struct ixgbe_hw *hw; - uint32_t vmolr, vmvir; - - hw = &adapter->hw; - - vf->vlan_tag = tag; - - vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf->pool)); - - /* Do not receive packets that pass inexact filters. */ - vmolr &= ~(IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_ROPE); - - /* Disable Multicast Promicuous Mode. */ - vmolr &= ~IXGBE_VMOLR_MPE; - - /* Accept broadcasts. */ - vmolr |= IXGBE_VMOLR_BAM; - - if (tag == 0) { - /* Accept non-vlan tagged traffic. */ - //vmolr |= IXGBE_VMOLR_AUPE; - - /* Allow VM to tag outgoing traffic; no default tag. */ - vmvir = 0; - } else { - /* Require vlan-tagged traffic. */ - vmolr &= ~IXGBE_VMOLR_AUPE; - - /* Tag all traffic with provided vlan tag. */ - vmvir = (tag | IXGBE_VMVIR_VLANA_DEFAULT); - } - IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf->pool), vmolr); - IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf->pool), vmvir); -} - - -static boolean_t -ixgbe_vf_frame_size_compatible(struct adapter *adapter, struct ixgbe_vf *vf) -{ - - /* - * Frame size compatibility between PF and VF is only a problem on - * 82599-based cards. X540 and later support any combination of jumbo - * frames on PFs and VFs. - */ - if (adapter->hw.mac.type != ixgbe_mac_82599EB) - return (TRUE); - - switch (vf->api_ver) { - case IXGBE_API_VER_1_0: - case IXGBE_API_VER_UNKNOWN: - /* - * On legacy (1.0 and older) VF versions, we don't support jumbo - * frames on either the PF or the VF. - */ - if (adapter->max_frame_size > ETHER_MAX_LEN || - vf->max_frame_size > ETHER_MAX_LEN) - return (FALSE); - - return (TRUE); - - break; - case IXGBE_API_VER_1_1: - default: - /* - * 1.1 or later VF versions always work if they aren't using - * jumbo frames. - */ - if (vf->max_frame_size <= ETHER_MAX_LEN) - return (TRUE); - - /* - * Jumbo frames only work with VFs if the PF is also using jumbo - * frames. - */ - if (adapter->max_frame_size <= ETHER_MAX_LEN) - return (TRUE); - - return (FALSE); - - } -} - - -static void -ixgbe_process_vf_reset(struct adapter *adapter, struct ixgbe_vf *vf) -{ - ixgbe_vf_set_default_vlan(adapter, vf, vf->default_vlan); - - // XXX clear multicast addresses - - ixgbe_clear_rar(&adapter->hw, vf->rar_index); - - vf->api_ver = IXGBE_API_VER_UNKNOWN; -} - - -static void -ixgbe_vf_enable_transmit(struct adapter *adapter, struct ixgbe_vf *vf) -{ - struct ixgbe_hw *hw; - uint32_t vf_index, vfte; - - hw = &adapter->hw; - - vf_index = IXGBE_VF_INDEX(vf->pool); - vfte = IXGBE_READ_REG(hw, IXGBE_VFTE(vf_index)); - vfte |= IXGBE_VF_BIT(vf->pool); - IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_index), vfte); -} - - -static void -ixgbe_vf_enable_receive(struct adapter *adapter, struct ixgbe_vf *vf) -{ - struct ixgbe_hw *hw; - uint32_t vf_index, vfre; - - hw = &adapter->hw; - - vf_index = IXGBE_VF_INDEX(vf->pool); - vfre = IXGBE_READ_REG(hw, IXGBE_VFRE(vf_index)); - if (ixgbe_vf_frame_size_compatible(adapter, vf)) - vfre |= IXGBE_VF_BIT(vf->pool); - else - vfre &= ~IXGBE_VF_BIT(vf->pool); - IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_index), vfre); -} - - -static void -ixgbe_vf_reset_msg(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) -{ - struct ixgbe_hw *hw; - uint32_t ack; - uint32_t resp[IXGBE_VF_PERMADDR_MSG_LEN]; - - hw = &adapter->hw; - - ixgbe_process_vf_reset(adapter, vf); - - if (ixgbe_validate_mac_addr(vf->ether_addr) == 0) { - ixgbe_set_rar(&adapter->hw, vf->rar_index, - vf->ether_addr, vf->pool, TRUE); - ack = IXGBE_VT_MSGTYPE_ACK; - } else - ack = IXGBE_VT_MSGTYPE_NACK; - - ixgbe_vf_enable_transmit(adapter, vf); - ixgbe_vf_enable_receive(adapter, vf); - - vf->flags |= IXGBE_VF_CTS; - - resp[0] = IXGBE_VF_RESET | ack | IXGBE_VT_MSGTYPE_CTS; - bcopy(vf->ether_addr, &resp[1], ETHER_ADDR_LEN); - resp[3] = hw->mac.mc_filter_type; - ixgbe_write_mbx(hw, resp, IXGBE_VF_PERMADDR_MSG_LEN, vf->pool); -} - - -static void -ixgbe_vf_set_mac(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) -{ - uint8_t *mac; - - mac = (uint8_t*)&msg[1]; - - /* Check that the VF has permission to change the MAC address. */ - if (!(vf->flags & IXGBE_VF_CAP_MAC) && ixgbe_vf_mac_changed(vf, mac)) { - ixgbe_send_vf_nack(adapter, vf, msg[0]); - return; - } - - if (ixgbe_validate_mac_addr(mac) != 0) { - ixgbe_send_vf_nack(adapter, vf, msg[0]); - return; - } - - bcopy(mac, vf->ether_addr, ETHER_ADDR_LEN); - - ixgbe_set_rar(&adapter->hw, vf->rar_index, vf->ether_addr, - vf->pool, TRUE); - - ixgbe_send_vf_ack(adapter, vf, msg[0]); -} - - -/* -** VF multicast addresses are set by using the appropriate bit in -** 1 of 128 32 bit addresses (4096 possible). -*/ -static void -ixgbe_vf_set_mc_addr(struct adapter *adapter, struct ixgbe_vf *vf, u32 *msg) -{ - u16 *list = (u16*)&msg[1]; - int entries; - u32 vmolr, vec_bit, vec_reg, mta_reg; - - entries = (msg[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT; - entries = min(entries, IXGBE_MAX_VF_MC); - - vmolr = IXGBE_READ_REG(&adapter->hw, IXGBE_VMOLR(vf->pool)); - - vf->num_mc_hashes = entries; - - /* Set the appropriate MTA bit */ - for (int i = 0; i < entries; i++) { - vf->mc_hash[i] = list[i]; - vec_reg = (vf->mc_hash[i] >> 5) & 0x7F; - vec_bit = vf->mc_hash[i] & 0x1F; - mta_reg = IXGBE_READ_REG(&adapter->hw, IXGBE_MTA(vec_reg)); - mta_reg |= (1 << vec_bit); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_MTA(vec_reg), mta_reg); - } - - vmolr |= IXGBE_VMOLR_ROMPE; - IXGBE_WRITE_REG(&adapter->hw, IXGBE_VMOLR(vf->pool), vmolr); - ixgbe_send_vf_ack(adapter, vf, msg[0]); - return; -} - - -static void -ixgbe_vf_set_vlan(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) -{ - struct ixgbe_hw *hw; - int enable; - uint16_t tag; - - hw = &adapter->hw; - enable = IXGBE_VT_MSGINFO(msg[0]); - tag = msg[1] & IXGBE_VLVF_VLANID_MASK; - - if (!(vf->flags & IXGBE_VF_CAP_VLAN)) { - ixgbe_send_vf_nack(adapter, vf, msg[0]); - return; - } - - /* It is illegal to enable vlan tag 0. */ - if (tag == 0 && enable != 0){ - ixgbe_send_vf_nack(adapter, vf, msg[0]); - return; - } - - ixgbe_set_vfta(hw, tag, vf->pool, enable); - ixgbe_send_vf_ack(adapter, vf, msg[0]); -} - - -static void -ixgbe_vf_set_lpe(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) -{ - struct ixgbe_hw *hw; - uint32_t vf_max_size, pf_max_size, mhadd; - - hw = &adapter->hw; - vf_max_size = msg[1]; - - if (vf_max_size < ETHER_CRC_LEN) { - /* We intentionally ACK invalid LPE requests. */ - ixgbe_send_vf_ack(adapter, vf, msg[0]); - return; - } - - vf_max_size -= ETHER_CRC_LEN; - - if (vf_max_size > IXGBE_MAX_FRAME_SIZE) { - /* We intentionally ACK invalid LPE requests. */ - ixgbe_send_vf_ack(adapter, vf, msg[0]); - return; - } - - vf->max_frame_size = vf_max_size; - ixgbe_update_max_frame(adapter, vf->max_frame_size); - - /* - * We might have to disable reception to this VF if the frame size is - * not compatible with the config on the PF. - */ - ixgbe_vf_enable_receive(adapter, vf); - - mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); - pf_max_size = (mhadd & IXGBE_MHADD_MFS_MASK) >> IXGBE_MHADD_MFS_SHIFT; - - if (pf_max_size < adapter->max_frame_size) { - mhadd &= ~IXGBE_MHADD_MFS_MASK; - mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT; - IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); - } - - ixgbe_send_vf_ack(adapter, vf, msg[0]); -} - - -static void -ixgbe_vf_set_macvlan(struct adapter *adapter, struct ixgbe_vf *vf, - uint32_t *msg) -{ - //XXX implement this - ixgbe_send_vf_nack(adapter, vf, msg[0]); -} - - -static void -ixgbe_vf_api_negotiate(struct adapter *adapter, struct ixgbe_vf *vf, - uint32_t *msg) -{ - - switch (msg[1]) { - case IXGBE_API_VER_1_0: - case IXGBE_API_VER_1_1: - vf->api_ver = msg[1]; - ixgbe_send_vf_ack(adapter, vf, msg[0]); - break; - default: - vf->api_ver = IXGBE_API_VER_UNKNOWN; - ixgbe_send_vf_nack(adapter, vf, msg[0]); - break; - } -} - - -static void -ixgbe_vf_get_queues(struct adapter *adapter, struct ixgbe_vf *vf, - uint32_t *msg) -{ - struct ixgbe_hw *hw; - uint32_t resp[IXGBE_VF_GET_QUEUES_RESP_LEN]; - int num_queues; - - hw = &adapter->hw; - - /* GET_QUEUES is not supported on pre-1.1 APIs. */ - switch (msg[0]) { - case IXGBE_API_VER_1_0: - case IXGBE_API_VER_UNKNOWN: - ixgbe_send_vf_nack(adapter, vf, msg[0]); - return; - } - - resp[0] = IXGBE_VF_GET_QUEUES | IXGBE_VT_MSGTYPE_ACK | - IXGBE_VT_MSGTYPE_CTS; - - num_queues = ixgbe_vf_queues(ixgbe_get_iov_mode(adapter)); - resp[IXGBE_VF_TX_QUEUES] = num_queues; - resp[IXGBE_VF_RX_QUEUES] = num_queues; - resp[IXGBE_VF_TRANS_VLAN] = (vf->default_vlan != 0); - resp[IXGBE_VF_DEF_QUEUE] = 0; - - ixgbe_write_mbx(hw, resp, IXGBE_VF_GET_QUEUES_RESP_LEN, vf->pool); -} - - -static void -ixgbe_process_vf_msg(struct adapter *adapter, struct ixgbe_vf *vf) -{ - struct ixgbe_hw *hw; - uint32_t msg[IXGBE_VFMAILBOX_SIZE]; - int error; - - hw = &adapter->hw; - - error = ixgbe_read_mbx(hw, msg, IXGBE_VFMAILBOX_SIZE, vf->pool); - - if (error != 0) - return; - - CTR3(KTR_MALLOC, "%s: received msg %x from %d", - adapter->ifp->if_xname, msg[0], vf->pool); - if (msg[0] == IXGBE_VF_RESET) { - ixgbe_vf_reset_msg(adapter, vf, msg); - return; - } - - if (!(vf->flags & IXGBE_VF_CTS)) { - ixgbe_send_vf_nack(adapter, vf, msg[0]); - return; - } - - switch (msg[0] & IXGBE_VT_MSG_MASK) { - case IXGBE_VF_SET_MAC_ADDR: - ixgbe_vf_set_mac(adapter, vf, msg); - break; - case IXGBE_VF_SET_MULTICAST: - ixgbe_vf_set_mc_addr(adapter, vf, msg); - break; - case IXGBE_VF_SET_VLAN: - ixgbe_vf_set_vlan(adapter, vf, msg); - break; - case IXGBE_VF_SET_LPE: - ixgbe_vf_set_lpe(adapter, vf, msg); - break; - case IXGBE_VF_SET_MACVLAN: - ixgbe_vf_set_macvlan(adapter, vf, msg); - break; - case IXGBE_VF_API_NEGOTIATE: - ixgbe_vf_api_negotiate(adapter, vf, msg); - break; - case IXGBE_VF_GET_QUEUES: - ixgbe_vf_get_queues(adapter, vf, msg); - break; - default: - ixgbe_send_vf_nack(adapter, vf, msg[0]); - } -} - - -/* - * Tasklet for handling VF -> PF mailbox messages. - */ -static void -ixgbe_handle_mbx(void *context, int pending) -{ - struct adapter *adapter; - struct ixgbe_hw *hw; - struct ixgbe_vf *vf; - int i; - - adapter = context; - hw = &adapter->hw; - - IXGBE_CORE_LOCK(adapter); - for (i = 0; i < adapter->num_vfs; i++) { - vf = &adapter->vfs[i]; - - if (vf->flags & IXGBE_VF_ACTIVE) { - if (ixgbe_check_for_rst(hw, vf->pool) == 0) - ixgbe_process_vf_reset(adapter, vf); - - if (ixgbe_check_for_msg(hw, vf->pool) == 0) - ixgbe_process_vf_msg(adapter, vf); - - if (ixgbe_check_for_ack(hw, vf->pool) == 0) - ixgbe_process_vf_ack(adapter, vf); - } - } - IXGBE_CORE_UNLOCK(adapter); -} - - -static int -ixgbe_init_iov(device_t dev, u16 num_vfs, const nvlist_t *config) -{ - struct adapter *adapter; - enum ixgbe_iov_mode mode; - - adapter = device_get_softc(dev); - adapter->num_vfs = num_vfs; - mode = ixgbe_get_iov_mode(adapter); - - if (num_vfs > ixgbe_max_vfs(mode)) { - adapter->num_vfs = 0; - return (ENOSPC); - } - - IXGBE_CORE_LOCK(adapter); - - adapter->vfs = malloc(sizeof(*adapter->vfs) * num_vfs, M_IXGBE, - M_NOWAIT | M_ZERO); - - if (adapter->vfs == NULL) { - adapter->num_vfs = 0; - IXGBE_CORE_UNLOCK(adapter); - return (ENOMEM); - } - - ixgbe_init_locked(adapter); - - IXGBE_CORE_UNLOCK(adapter); - - return (0); -} - - -static void -ixgbe_uninit_iov(device_t dev) -{ - struct ixgbe_hw *hw; - struct adapter *adapter; - uint32_t pf_reg, vf_reg; - - adapter = device_get_softc(dev); - hw = &adapter->hw; - - IXGBE_CORE_LOCK(adapter); - - /* Enable rx/tx for the PF and disable it for all VFs. */ - pf_reg = IXGBE_VF_INDEX(adapter->pool); - IXGBE_WRITE_REG(hw, IXGBE_VFRE(pf_reg), - IXGBE_VF_BIT(adapter->pool)); - IXGBE_WRITE_REG(hw, IXGBE_VFTE(pf_reg), - IXGBE_VF_BIT(adapter->pool)); - - if (pf_reg == 0) - vf_reg = 1; - else - vf_reg = 0; - IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_reg), 0); - IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_reg), 0); - - IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0); - - free(adapter->vfs, M_IXGBE); - adapter->vfs = NULL; - adapter->num_vfs = 0; - - IXGBE_CORE_UNLOCK(adapter); -} - - -static void -ixgbe_initialize_iov(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - uint32_t mrqc, mtqc, vt_ctl, vf_reg, gcr_ext, gpie; - enum ixgbe_iov_mode mode; - int i; - - mode = ixgbe_get_iov_mode(adapter); - if (mode == IXGBE_NO_VM) - return; - - IXGBE_CORE_LOCK_ASSERT(adapter); - - mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC); - mrqc &= ~IXGBE_MRQC_MRQE_MASK; - - switch (mode) { - case IXGBE_64_VM: - mrqc |= IXGBE_MRQC_VMDQRSS64EN; - break; - case IXGBE_32_VM: - mrqc |= IXGBE_MRQC_VMDQRSS32EN; - break; - default: - panic("Unexpected SR-IOV mode %d", mode); - } - IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); - - mtqc = IXGBE_MTQC_VT_ENA; - switch (mode) { - case IXGBE_64_VM: - mtqc |= IXGBE_MTQC_64VF; - break; - case IXGBE_32_VM: - mtqc |= IXGBE_MTQC_32VF; - break; - default: - panic("Unexpected SR-IOV mode %d", mode); - } - IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc); - - - gcr_ext = IXGBE_READ_REG(hw, IXGBE_GCR_EXT); - gcr_ext |= IXGBE_GCR_EXT_MSIX_EN; - gcr_ext &= ~IXGBE_GCR_EXT_VT_MODE_MASK; - switch (mode) { - case IXGBE_64_VM: - gcr_ext |= IXGBE_GCR_EXT_VT_MODE_64; - break; - case IXGBE_32_VM: - gcr_ext |= IXGBE_GCR_EXT_VT_MODE_32; - break; - default: - panic("Unexpected SR-IOV mode %d", mode); - } - IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext); - - - gpie = IXGBE_READ_REG(hw, IXGBE_GPIE); - gcr_ext &= ~IXGBE_GPIE_VTMODE_MASK; - switch (mode) { - case IXGBE_64_VM: - gpie |= IXGBE_GPIE_VTMODE_64; - break; - case IXGBE_32_VM: - gpie |= IXGBE_GPIE_VTMODE_32; - break; - default: - panic("Unexpected SR-IOV mode %d", mode); - } - IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); - - /* Enable rx/tx for the PF. */ - vf_reg = IXGBE_VF_INDEX(adapter->pool); - IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_reg), - IXGBE_VF_BIT(adapter->pool)); - IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_reg), - IXGBE_VF_BIT(adapter->pool)); - - /* Allow VM-to-VM communication. */ - IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN); - - vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; - vt_ctl |= (adapter->pool << IXGBE_VT_CTL_POOL_SHIFT); - IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl); - - for (i = 0; i < adapter->num_vfs; i++) - ixgbe_init_vf(adapter, &adapter->vfs[i]); -} - - -/* -** Check the max frame setting of all active VF's -*/ -static void -ixgbe_recalculate_max_frame(struct adapter *adapter) -{ - struct ixgbe_vf *vf; - - IXGBE_CORE_LOCK_ASSERT(adapter); - - for (int i = 0; i < adapter->num_vfs; i++) { - vf = &adapter->vfs[i]; - if (vf->flags & IXGBE_VF_ACTIVE) - ixgbe_update_max_frame(adapter, vf->max_frame_size); - } -} - - -static void -ixgbe_init_vf(struct adapter *adapter, struct ixgbe_vf *vf) -{ - struct ixgbe_hw *hw; - uint32_t vf_index, pfmbimr; - - IXGBE_CORE_LOCK_ASSERT(adapter); - - hw = &adapter->hw; - - if (!(vf->flags & IXGBE_VF_ACTIVE)) - return; - - vf_index = IXGBE_VF_INDEX(vf->pool); - pfmbimr = IXGBE_READ_REG(hw, IXGBE_PFMBIMR(vf_index)); - pfmbimr |= IXGBE_VF_BIT(vf->pool); - IXGBE_WRITE_REG(hw, IXGBE_PFMBIMR(vf_index), pfmbimr); - - ixgbe_vf_set_default_vlan(adapter, vf, vf->vlan_tag); - - // XXX multicast addresses - - if (ixgbe_validate_mac_addr(vf->ether_addr) == 0) { - ixgbe_set_rar(&adapter->hw, vf->rar_index, - vf->ether_addr, vf->pool, TRUE); - } - - ixgbe_vf_enable_transmit(adapter, vf); - ixgbe_vf_enable_receive(adapter, vf); - - ixgbe_send_vf_msg(adapter, vf, IXGBE_PF_CONTROL_MSG); -} - -static int -ixgbe_add_vf(device_t dev, u16 vfnum, const nvlist_t *config) -{ - struct adapter *adapter; - struct ixgbe_vf *vf; - const void *mac; - - adapter = device_get_softc(dev); - - KASSERT(vfnum < adapter->num_vfs, ("VF index %d is out of range %d", - vfnum, adapter->num_vfs)); - - IXGBE_CORE_LOCK(adapter); - vf = &adapter->vfs[vfnum]; - vf->pool= vfnum; - - /* RAR[0] is used by the PF so use vfnum + 1 for VF RAR. */ - vf->rar_index = vfnum + 1; - vf->default_vlan = 0; - vf->max_frame_size = ETHER_MAX_LEN; - ixgbe_update_max_frame(adapter, vf->max_frame_size); - - if (nvlist_exists_binary(config, "mac-addr")) { - mac = nvlist_get_binary(config, "mac-addr", NULL); - bcopy(mac, vf->ether_addr, ETHER_ADDR_LEN); - if (nvlist_get_bool(config, "allow-set-mac")) - vf->flags |= IXGBE_VF_CAP_MAC; - } else - /* - * If the administrator has not specified a MAC address then - * we must allow the VF to choose one. - */ - vf->flags |= IXGBE_VF_CAP_MAC; - - vf->flags = IXGBE_VF_ACTIVE; - - ixgbe_init_vf(adapter, vf); - IXGBE_CORE_UNLOCK(adapter); - - return (0); -} -#endif /* PCI_IOV */ Index: sys/dev/ixgbe/if_ixv.c =================================================================== --- sys/dev/ixgbe/if_ixv.c +++ sys/dev/ixgbe/if_ixv.c @@ -32,2173 +32,12 @@ ******************************************************************************/ /*$FreeBSD$*/ - -#ifndef IXGBE_STANDALONE_BUILD -#include "opt_inet.h" -#include "opt_inet6.h" -#endif - -#include "ixgbe.h" - -/********************************************************************* - * Driver version - *********************************************************************/ -char ixv_driver_version[] = "1.4.6-k"; - -/********************************************************************* - * PCI Device ID Table - * - * Used by probe to select devices to load on - * Last field stores an index into ixv_strings - * Last entry must be all 0s - * - * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } - *********************************************************************/ - -static ixgbe_vendor_info_t ixv_vendor_info_array[] = -{ - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_VF, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_VF, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550_VF, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_VF, 0, 0, 0}, - /* required last entry */ - {0, 0, 0, 0, 0} -}; - -/********************************************************************* - * Table of branding strings - *********************************************************************/ - -static char *ixv_strings[] = { - "Intel(R) PRO/10GbE Virtual Function Network Driver" -}; - -/********************************************************************* - * Function prototypes - *********************************************************************/ -static int ixv_probe(device_t); -static int ixv_attach(device_t); -static int ixv_detach(device_t); -static int ixv_shutdown(device_t); -static int ixv_ioctl(struct ifnet *, u_long, caddr_t); -static void ixv_init(void *); -static void ixv_init_locked(struct adapter *); -static void ixv_stop(void *); -static void ixv_media_status(struct ifnet *, struct ifmediareq *); -static int ixv_media_change(struct ifnet *); -static void ixv_identify_hardware(struct adapter *); -static int ixv_allocate_pci_resources(struct adapter *); -static int ixv_allocate_msix(struct adapter *); -static int ixv_setup_msix(struct adapter *); -static void ixv_free_pci_resources(struct adapter *); -static void ixv_local_timer(void *); -static void ixv_setup_interface(device_t, struct adapter *); -static void ixv_config_link(struct adapter *); - -static void ixv_initialize_transmit_units(struct adapter *); -static void ixv_initialize_receive_units(struct adapter *); - -static void ixv_enable_intr(struct adapter *); -static void ixv_disable_intr(struct adapter *); -static void ixv_set_multi(struct adapter *); -static void ixv_update_link_status(struct adapter *); -static int ixv_sysctl_debug(SYSCTL_HANDLER_ARGS); -static void ixv_set_ivar(struct adapter *, u8, u8, s8); -static void ixv_configure_ivars(struct adapter *); -static u8 * ixv_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); - -static void ixv_setup_vlan_support(struct adapter *); -static void ixv_register_vlan(void *, struct ifnet *, u16); -static void ixv_unregister_vlan(void *, struct ifnet *, u16); - -static void ixv_save_stats(struct adapter *); -static void ixv_init_stats(struct adapter *); -static void ixv_update_stats(struct adapter *); -static void ixv_add_stats_sysctls(struct adapter *); -static void ixv_set_sysctl_value(struct adapter *, const char *, - const char *, int *, int); - -/* The MSI/X Interrupt handlers */ -static void ixv_msix_que(void *); -static void ixv_msix_mbx(void *); - -/* Deferred interrupt tasklets */ -static void ixv_handle_que(void *, int); -static void ixv_handle_mbx(void *, int); - -#ifdef DEV_NETMAP -/* - * This is defined in , which is included by - * if_ix.c. - */ -extern void ixgbe_netmap_attach(struct adapter *adapter); - -#include -#include -#include -#endif /* DEV_NETMAP */ - -/********************************************************************* - * FreeBSD Device Interface Entry Points - *********************************************************************/ - -static device_method_t ixv_methods[] = { - /* Device interface */ - DEVMETHOD(device_probe, ixv_probe), - DEVMETHOD(device_attach, ixv_attach), - DEVMETHOD(device_detach, ixv_detach), - DEVMETHOD(device_shutdown, ixv_shutdown), - DEVMETHOD_END -}; - -static driver_t ixv_driver = { - "ixv", ixv_methods, sizeof(struct adapter), -}; - -devclass_t ixv_devclass; -DRIVER_MODULE(ixv, pci, ixv_driver, ixv_devclass, 0, 0); -MODULE_DEPEND(ixv, pci, 1, 1, 1); -MODULE_DEPEND(ixv, ether, 1, 1, 1); -#ifdef DEV_NETMAP -MODULE_DEPEND(ix, netmap, 1, 1, 1); -#endif /* DEV_NETMAP */ -/* XXX depend on 'ix' ? */ - -/* -** TUNEABLE PARAMETERS: -*/ - -/* Number of Queues - do not exceed MSIX vectors - 1 */ -static int ixv_num_queues = 1; -TUNABLE_INT("hw.ixv.num_queues", &ixv_num_queues); - -/* -** AIM: Adaptive Interrupt Moderation -** which means that the interrupt rate -** is varied over time based on the -** traffic for that interrupt vector -*/ -static int ixv_enable_aim = FALSE; -TUNABLE_INT("hw.ixv.enable_aim", &ixv_enable_aim); - -/* How many packets rxeof tries to clean at a time */ -static int ixv_rx_process_limit = 256; -TUNABLE_INT("hw.ixv.rx_process_limit", &ixv_rx_process_limit); - -/* How many packets txeof tries to clean at a time */ -static int ixv_tx_process_limit = 256; -TUNABLE_INT("hw.ixv.tx_process_limit", &ixv_tx_process_limit); - -/* Flow control setting, default to full */ -static int ixv_flow_control = ixgbe_fc_full; -TUNABLE_INT("hw.ixv.flow_control", &ixv_flow_control); - -/* - * Header split: this causes the hardware to DMA - * the header into a separate mbuf from the payload, - * it can be a performance win in some workloads, but - * in others it actually hurts, its off by default. - */ -static int ixv_header_split = FALSE; -TUNABLE_INT("hw.ixv.hdr_split", &ixv_header_split); - -/* -** Number of TX descriptors per ring, -** setting higher than RX as this seems -** the better performing choice. -*/ -static int ixv_txd = DEFAULT_TXD; -TUNABLE_INT("hw.ixv.txd", &ixv_txd); - -/* Number of RX descriptors per ring */ -static int ixv_rxd = DEFAULT_RXD; -TUNABLE_INT("hw.ixv.rxd", &ixv_rxd); - -/* -** Shadow VFTA table, this is needed because -** the real filter table gets cleared during -** a soft reset and we need to repopulate it. -*/ -static u32 ixv_shadow_vfta[IXGBE_VFTA_SIZE]; - -/********************************************************************* - * Device identification routine - * - * ixv_probe determines if the driver should be loaded on - * adapter based on PCI vendor/device id of the adapter. - * - * return BUS_PROBE_DEFAULT on success, positive on failure - *********************************************************************/ - -static int -ixv_probe(device_t dev) -{ - ixgbe_vendor_info_t *ent; - - u16 pci_vendor_id = 0; - u16 pci_device_id = 0; - u16 pci_subvendor_id = 0; - u16 pci_subdevice_id = 0; - char adapter_name[256]; - - - pci_vendor_id = pci_get_vendor(dev); - if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID) - return (ENXIO); - - pci_device_id = pci_get_device(dev); - pci_subvendor_id = pci_get_subvendor(dev); - pci_subdevice_id = pci_get_subdevice(dev); - - ent = ixv_vendor_info_array; - while (ent->vendor_id != 0) { - if ((pci_vendor_id == ent->vendor_id) && - (pci_device_id == ent->device_id) && - - ((pci_subvendor_id == ent->subvendor_id) || - (ent->subvendor_id == 0)) && - - ((pci_subdevice_id == ent->subdevice_id) || - (ent->subdevice_id == 0))) { - sprintf(adapter_name, "%s, Version - %s", - ixv_strings[ent->index], - ixv_driver_version); - device_set_desc_copy(dev, adapter_name); - return (BUS_PROBE_DEFAULT); - } - ent++; - } - return (ENXIO); -} - -/********************************************************************* - * Device initialization routine - * - * The attach entry point is called when the driver is being loaded. - * This routine identifies the type of hardware, allocates all resources - * and initializes the hardware. - * - * return 0 on success, positive on failure - *********************************************************************/ - -static int -ixv_attach(device_t dev) -{ - struct adapter *adapter; - struct ixgbe_hw *hw; - int error = 0; - - INIT_DEBUGOUT("ixv_attach: begin"); - - /* Allocate, clear, and link in our adapter structure */ - adapter = device_get_softc(dev); - adapter->dev = dev; - hw = &adapter->hw; - -#ifdef DEV_NETMAP - adapter->init_locked = ixv_init_locked; - adapter->stop_locked = ixv_stop; -#endif - - /* Core Lock Init*/ - IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); - - /* SYSCTL APIs */ - SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "debug", CTLTYPE_INT | CTLFLAG_RW, - adapter, 0, ixv_sysctl_debug, "I", "Debug Info"); - - SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "enable_aim", CTLFLAG_RW, - &ixv_enable_aim, 1, "Interrupt Moderation"); - - /* Set up the timer callout */ - callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); - - /* Determine hardware revision */ - ixv_identify_hardware(adapter); - - /* Do base PCI setup - map BAR0 */ - if (ixv_allocate_pci_resources(adapter)) { - device_printf(dev, "ixv_allocate_pci_resources() failed!\n"); - error = ENXIO; - goto err_out; - } - - /* Sysctls for limiting the amount of work done in the taskqueues */ - ixv_set_sysctl_value(adapter, "rx_processing_limit", - "max number of rx packets to process", - &adapter->rx_process_limit, ixv_rx_process_limit); - - ixv_set_sysctl_value(adapter, "tx_processing_limit", - "max number of tx packets to process", - &adapter->tx_process_limit, ixv_tx_process_limit); - - /* Do descriptor calc and sanity checks */ - if (((ixv_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 || - ixv_txd < MIN_TXD || ixv_txd > MAX_TXD) { - device_printf(dev, "TXD config issue, using default!\n"); - adapter->num_tx_desc = DEFAULT_TXD; - } else - adapter->num_tx_desc = ixv_txd; - - if (((ixv_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 || - ixv_rxd < MIN_RXD || ixv_rxd > MAX_RXD) { - device_printf(dev, "RXD config issue, using default!\n"); - adapter->num_rx_desc = DEFAULT_RXD; - } else - adapter->num_rx_desc = ixv_rxd; - - /* Allocate our TX/RX Queues */ - if (ixgbe_allocate_queues(adapter)) { - device_printf(dev, "ixgbe_allocate_queues() failed!\n"); - error = ENOMEM; - goto err_out; - } - - /* - ** Initialize the shared code: its - ** at this point the mac type is set. - */ - error = ixgbe_init_shared_code(hw); - if (error) { - device_printf(dev, "ixgbe_init_shared_code() failed!\n"); - error = EIO; - goto err_late; - } - - /* Setup the mailbox */ - ixgbe_init_mbx_params_vf(hw); - - /* Reset mbox api to 1.0 */ - error = ixgbe_reset_hw(hw); - if (error == IXGBE_ERR_RESET_FAILED) - device_printf(dev, "ixgbe_reset_hw() failure: Reset Failed!\n"); - else if (error) - device_printf(dev, "ixgbe_reset_hw() failed with error %d\n", error); - if (error) { - error = EIO; - goto err_late; - } - - /* Negotiate mailbox API version */ - error = ixgbevf_negotiate_api_version(hw, ixgbe_mbox_api_11); - if (error) { - device_printf(dev, "MBX API 1.1 negotiation failed! Error %d\n", error); - error = EIO; - goto err_late; - } - - error = ixgbe_init_hw(hw); - if (error) { - device_printf(dev, "ixgbe_init_hw() failed!\n"); - error = EIO; - goto err_late; - } - - error = ixv_allocate_msix(adapter); - if (error) { - device_printf(dev, "ixv_allocate_msix() failed!\n"); - goto err_late; - } - - /* If no mac address was assigned, make a random one */ - if (!ixv_check_ether_addr(hw->mac.addr)) { - u8 addr[ETHER_ADDR_LEN]; - arc4rand(&addr, sizeof(addr), 0); - addr[0] &= 0xFE; - addr[0] |= 0x02; - bcopy(addr, hw->mac.addr, sizeof(addr)); - } - - /* Setup OS specific network interface */ - ixv_setup_interface(dev, adapter); - - /* Do the stats setup */ - ixv_save_stats(adapter); - ixv_init_stats(adapter); - ixv_add_stats_sysctls(adapter); - - /* Register for VLAN events */ - adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, - ixv_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); - adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, - ixv_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); - -#ifdef DEV_NETMAP - ixgbe_netmap_attach(adapter); -#endif /* DEV_NETMAP */ - INIT_DEBUGOUT("ixv_attach: end"); - return (0); - -err_late: - ixgbe_free_transmit_structures(adapter); - ixgbe_free_receive_structures(adapter); -err_out: - ixv_free_pci_resources(adapter); - return (error); - -} - -/********************************************************************* - * Device removal routine - * - * The detach entry point is called when the driver is being removed. - * This routine stops the adapter and deallocates all the resources - * that were allocated for driver operation. - * - * return 0 on success, positive on failure - *********************************************************************/ - -static int -ixv_detach(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - struct ix_queue *que = adapter->queues; - - INIT_DEBUGOUT("ixv_detach: begin"); - - /* Make sure VLANS are not using driver */ - if (adapter->ifp->if_vlantrunk != NULL) { - device_printf(dev, "Vlan in use, detach first\n"); - return (EBUSY); - } - - IXGBE_CORE_LOCK(adapter); - ixv_stop(adapter); - IXGBE_CORE_UNLOCK(adapter); - - for (int i = 0; i < adapter->num_queues; i++, que++) { - if (que->tq) { - struct tx_ring *txr = que->txr; - taskqueue_drain(que->tq, &txr->txq_task); - taskqueue_drain(que->tq, &que->que_task); - taskqueue_free(que->tq); - } - } - - /* Drain the Mailbox(link) queue */ - if (adapter->tq) { - taskqueue_drain(adapter->tq, &adapter->link_task); - taskqueue_free(adapter->tq); - } - - /* Unregister VLAN events */ - if (adapter->vlan_attach != NULL) - EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); - if (adapter->vlan_detach != NULL) - EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); - - ether_ifdetach(adapter->ifp); - callout_drain(&adapter->timer); -#ifdef DEV_NETMAP - netmap_detach(adapter->ifp); -#endif /* DEV_NETMAP */ - ixv_free_pci_resources(adapter); - bus_generic_detach(dev); - if_free(adapter->ifp); - - ixgbe_free_transmit_structures(adapter); - ixgbe_free_receive_structures(adapter); - - IXGBE_CORE_LOCK_DESTROY(adapter); - return (0); -} - -/********************************************************************* - * - * Shutdown entry point - * - **********************************************************************/ -static int -ixv_shutdown(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - IXGBE_CORE_LOCK(adapter); - ixv_stop(adapter); - IXGBE_CORE_UNLOCK(adapter); - return (0); -} - - -/********************************************************************* - * Ioctl entry point - * - * ixv_ioctl is called when the user wants to configure the - * interface. - * - * return 0 on success, positive on failure - **********************************************************************/ - -static int -ixv_ioctl(struct ifnet * ifp, u_long command, caddr_t data) -{ - struct adapter *adapter = ifp->if_softc; - struct ifreq *ifr = (struct ifreq *) data; -#if defined(INET) || defined(INET6) - struct ifaddr *ifa = (struct ifaddr *) data; - bool avoid_reset = FALSE; -#endif - int error = 0; - - switch (command) { - - case SIOCSIFADDR: -#ifdef INET - if (ifa->ifa_addr->sa_family == AF_INET) - avoid_reset = TRUE; -#endif -#ifdef INET6 - if (ifa->ifa_addr->sa_family == AF_INET6) - avoid_reset = TRUE; -#endif -#if defined(INET) || defined(INET6) - /* - ** Calling init results in link renegotiation, - ** so we avoid doing it when possible. - */ - if (avoid_reset) { - ifp->if_flags |= IFF_UP; - if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) - ixv_init(adapter); - if (!(ifp->if_flags & IFF_NOARP)) - arp_ifinit(ifp, ifa); - } else - error = ether_ioctl(ifp, command, data); - break; -#endif - case SIOCSIFMTU: - IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); - if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - IXGBE_MTU_HDR) { - error = EINVAL; - } else { - IXGBE_CORE_LOCK(adapter); - ifp->if_mtu = ifr->ifr_mtu; - adapter->max_frame_size = - ifp->if_mtu + IXGBE_MTU_HDR; - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - ixv_init_locked(adapter); - IXGBE_CORE_UNLOCK(adapter); - } - break; - case SIOCSIFFLAGS: - IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)"); - IXGBE_CORE_LOCK(adapter); - if (ifp->if_flags & IFF_UP) { - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - ixv_init_locked(adapter); - } else - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - ixv_stop(adapter); - adapter->if_flags = ifp->if_flags; - IXGBE_CORE_UNLOCK(adapter); - break; - case SIOCADDMULTI: - case SIOCDELMULTI: - IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI"); - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - IXGBE_CORE_LOCK(adapter); - ixv_disable_intr(adapter); - ixv_set_multi(adapter); - ixv_enable_intr(adapter); - IXGBE_CORE_UNLOCK(adapter); - } - break; - case SIOCSIFMEDIA: - case SIOCGIFMEDIA: - IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)"); - error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); - break; - case SIOCSIFCAP: - { - int mask = ifr->ifr_reqcap ^ ifp->if_capenable; - IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)"); - if (mask & IFCAP_HWCSUM) - ifp->if_capenable ^= IFCAP_HWCSUM; - if (mask & IFCAP_TSO4) - ifp->if_capenable ^= IFCAP_TSO4; - if (mask & IFCAP_LRO) - ifp->if_capenable ^= IFCAP_LRO; - if (mask & IFCAP_VLAN_HWTAGGING) - ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - IXGBE_CORE_LOCK(adapter); - ixv_init_locked(adapter); - IXGBE_CORE_UNLOCK(adapter); - } - VLAN_CAPABILITIES(ifp); - break; - } - - default: - IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command); - error = ether_ioctl(ifp, command, data); - break; - } - - return (error); -} - -/********************************************************************* - * Init entry point - * - * This routine is used in two ways. It is used by the stack as - * init entry point in network interface structure. It is also used - * by the driver as a hw/sw initialization routine to get to a - * consistent state. - * - * return 0 on success, positive on failure - **********************************************************************/ -#define IXGBE_MHADD_MFS_SHIFT 16 - -static void -ixv_init_locked(struct adapter *adapter) -{ - struct ifnet *ifp = adapter->ifp; - device_t dev = adapter->dev; - struct ixgbe_hw *hw = &adapter->hw; - int error = 0; - - INIT_DEBUGOUT("ixv_init_locked: begin"); - mtx_assert(&adapter->core_mtx, MA_OWNED); - hw->adapter_stopped = FALSE; - ixgbe_stop_adapter(hw); - callout_stop(&adapter->timer); - - /* reprogram the RAR[0] in case user changed it. */ - ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); - - /* Get the latest mac address, User can use a LAA */ - bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr, - IXGBE_ETH_LENGTH_OF_ADDRESS); - ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1); - hw->addr_ctrl.rar_used_count = 1; - - /* Prepare transmit descriptors and buffers */ - if (ixgbe_setup_transmit_structures(adapter)) { - device_printf(dev, "Could not setup transmit structures\n"); - ixv_stop(adapter); - return; - } - - /* Reset VF and renegotiate mailbox API version */ - ixgbe_reset_hw(hw); - error = ixgbevf_negotiate_api_version(hw, ixgbe_mbox_api_11); - if (error) - device_printf(dev, "MBX API 1.1 negotiation failed! Error %d\n", error); - - ixv_initialize_transmit_units(adapter); - - /* Setup Multicast table */ - ixv_set_multi(adapter); - - /* - ** Determine the correct mbuf pool - ** for doing jumbo/headersplit - */ - if (ifp->if_mtu > ETHERMTU) - adapter->rx_mbuf_sz = MJUMPAGESIZE; - else - adapter->rx_mbuf_sz = MCLBYTES; - - /* Prepare receive descriptors and buffers */ - if (ixgbe_setup_receive_structures(adapter)) { - device_printf(dev, "Could not setup receive structures\n"); - ixv_stop(adapter); - return; - } - - /* Configure RX settings */ - ixv_initialize_receive_units(adapter); - - /* Set the various hardware offload abilities */ - ifp->if_hwassist = 0; - if (ifp->if_capenable & IFCAP_TSO4) - ifp->if_hwassist |= CSUM_TSO; - if (ifp->if_capenable & IFCAP_TXCSUM) { - ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); -#if __FreeBSD_version >= 800000 - ifp->if_hwassist |= CSUM_SCTP; -#endif - } - - /* Set up VLAN offload and filter */ - ixv_setup_vlan_support(adapter); - - /* Set up MSI/X routing */ - ixv_configure_ivars(adapter); - - /* Set up auto-mask */ - IXGBE_WRITE_REG(hw, IXGBE_VTEIAM, IXGBE_EICS_RTX_QUEUE); - - /* Set moderation on the Link interrupt */ - IXGBE_WRITE_REG(hw, IXGBE_VTEITR(adapter->vector), IXGBE_LINK_ITR); - - /* Stats init */ - ixv_init_stats(adapter); - - /* Config/Enable Link */ - ixv_config_link(adapter); - - /* Start watchdog */ - callout_reset(&adapter->timer, hz, ixv_local_timer, adapter); - - /* And now turn on interrupts */ - ixv_enable_intr(adapter); - - /* Now inform the stack we're ready */ - ifp->if_drv_flags |= IFF_DRV_RUNNING; - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - - return; -} - -static void -ixv_init(void *arg) -{ - struct adapter *adapter = arg; - - IXGBE_CORE_LOCK(adapter); - ixv_init_locked(adapter); - IXGBE_CORE_UNLOCK(adapter); - return; -} - - -/* -** -** MSIX Interrupt Handlers and Tasklets -** -*/ - -static inline void -ixv_enable_queue(struct adapter *adapter, u32 vector) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 queue = 1 << vector; - u32 mask; - - mask = (IXGBE_EIMS_RTX_QUEUE & queue); - IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); -} - -static inline void -ixv_disable_queue(struct adapter *adapter, u32 vector) -{ - struct ixgbe_hw *hw = &adapter->hw; - u64 queue = (u64)(1 << vector); - u32 mask; - - mask = (IXGBE_EIMS_RTX_QUEUE & queue); - IXGBE_WRITE_REG(hw, IXGBE_VTEIMC, mask); -} - -static inline void -ixv_rearm_queues(struct adapter *adapter, u64 queues) -{ - u32 mask = (IXGBE_EIMS_RTX_QUEUE & queues); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEICS, mask); -} - - -static void -ixv_handle_que(void *context, int pending) -{ - struct ix_queue *que = context; - struct adapter *adapter = que->adapter; - struct tx_ring *txr = que->txr; - struct ifnet *ifp = adapter->ifp; - bool more; - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - more = ixgbe_rxeof(que); - IXGBE_TX_LOCK(txr); - ixgbe_txeof(txr); -#if __FreeBSD_version >= 800000 - if (!drbr_empty(ifp, txr->br)) - ixgbe_mq_start_locked(ifp, txr); -#else - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - ixgbe_start_locked(txr, ifp); -#endif - IXGBE_TX_UNLOCK(txr); - if (more) { - taskqueue_enqueue(que->tq, &que->que_task); - return; - } - } - - /* Reenable this interrupt */ - ixv_enable_queue(adapter, que->msix); - return; -} - -/********************************************************************* - * - * MSI Queue Interrupt Service routine - * - **********************************************************************/ -void -ixv_msix_que(void *arg) -{ - struct ix_queue *que = arg; - struct adapter *adapter = que->adapter; - struct ifnet *ifp = adapter->ifp; - struct tx_ring *txr = que->txr; - struct rx_ring *rxr = que->rxr; - bool more; - u32 newitr = 0; - - ixv_disable_queue(adapter, que->msix); - ++que->irqs; - - more = ixgbe_rxeof(que); - - IXGBE_TX_LOCK(txr); - ixgbe_txeof(txr); - /* - ** Make certain that if the stack - ** has anything queued the task gets - ** scheduled to handle it. - */ -#ifdef IXGBE_LEGACY_TX - if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd)) - ixgbe_start_locked(txr, ifp); -#else - if (!drbr_empty(adapter->ifp, txr->br)) - ixgbe_mq_start_locked(ifp, txr); -#endif - IXGBE_TX_UNLOCK(txr); - - /* Do AIM now? */ - - if (ixv_enable_aim == FALSE) - goto no_calc; - /* - ** Do Adaptive Interrupt Moderation: - ** - Write out last calculated setting - ** - Calculate based on average size over - ** the last interval. - */ - if (que->eitr_setting) - IXGBE_WRITE_REG(&adapter->hw, - IXGBE_VTEITR(que->msix), - que->eitr_setting); - - que->eitr_setting = 0; - - /* Idle, do nothing */ - if ((txr->bytes == 0) && (rxr->bytes == 0)) - goto no_calc; - - if ((txr->bytes) && (txr->packets)) - newitr = txr->bytes/txr->packets; - if ((rxr->bytes) && (rxr->packets)) - newitr = max(newitr, - (rxr->bytes / rxr->packets)); - newitr += 24; /* account for hardware frame, crc */ - - /* set an upper boundary */ - newitr = min(newitr, 3000); - - /* Be nice to the mid range */ - if ((newitr > 300) && (newitr < 1200)) - newitr = (newitr / 3); - else - newitr = (newitr / 2); - - newitr |= newitr << 16; - - /* save for next interrupt */ - que->eitr_setting = newitr; - - /* Reset state */ - txr->bytes = 0; - txr->packets = 0; - rxr->bytes = 0; - rxr->packets = 0; - -no_calc: - if (more) - taskqueue_enqueue(que->tq, &que->que_task); - else /* Reenable this interrupt */ - ixv_enable_queue(adapter, que->msix); - return; -} - -static void -ixv_msix_mbx(void *arg) -{ - struct adapter *adapter = arg; - struct ixgbe_hw *hw = &adapter->hw; - u32 reg; - - ++adapter->link_irq; - - /* First get the cause */ - reg = IXGBE_READ_REG(hw, IXGBE_VTEICS); - /* Clear interrupt with write */ - IXGBE_WRITE_REG(hw, IXGBE_VTEICR, reg); - - /* Link status change */ - if (reg & IXGBE_EICR_LSC) - taskqueue_enqueue(adapter->tq, &adapter->link_task); - - IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, IXGBE_EIMS_OTHER); - return; -} - -/********************************************************************* - * - * Media Ioctl callback - * - * This routine is called whenever the user queries the status of - * the interface using ifconfig. - * - **********************************************************************/ -static void -ixv_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) -{ - struct adapter *adapter = ifp->if_softc; - - INIT_DEBUGOUT("ixv_media_status: begin"); - IXGBE_CORE_LOCK(adapter); - ixv_update_link_status(adapter); - - ifmr->ifm_status = IFM_AVALID; - ifmr->ifm_active = IFM_ETHER; - - if (!adapter->link_active) { - IXGBE_CORE_UNLOCK(adapter); - return; - } - - ifmr->ifm_status |= IFM_ACTIVE; - - switch (adapter->link_speed) { - case IXGBE_LINK_SPEED_1GB_FULL: - ifmr->ifm_active |= IFM_1000_T | IFM_FDX; - break; - case IXGBE_LINK_SPEED_10GB_FULL: - ifmr->ifm_active |= IFM_FDX; - break; - } - - IXGBE_CORE_UNLOCK(adapter); - - return; -} - -/********************************************************************* - * - * Media Ioctl callback - * - * This routine is called when the user changes speed/duplex using - * media/mediopt option with ifconfig. - * - **********************************************************************/ -static int -ixv_media_change(struct ifnet * ifp) -{ - struct adapter *adapter = ifp->if_softc; - struct ifmedia *ifm = &adapter->media; - - INIT_DEBUGOUT("ixv_media_change: begin"); - - if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) - return (EINVAL); - - switch (IFM_SUBTYPE(ifm->ifm_media)) { - case IFM_AUTO: - break; - default: - device_printf(adapter->dev, "Only auto media type\n"); - return (EINVAL); - } - - return (0); -} - - -/********************************************************************* - * Multicast Update - * - * This routine is called whenever multicast address list is updated. - * - **********************************************************************/ -#define IXGBE_RAR_ENTRIES 16 - -static void -ixv_set_multi(struct adapter *adapter) -{ - u8 mta[MAX_NUM_MULTICAST_ADDRESSES * IXGBE_ETH_LENGTH_OF_ADDRESS]; - u8 *update_ptr; - struct ifmultiaddr *ifma; - int mcnt = 0; - struct ifnet *ifp = adapter->ifp; - - IOCTL_DEBUGOUT("ixv_set_multi: begin"); - -#if __FreeBSD_version < 800000 - IF_ADDR_LOCK(ifp); -#else - if_maddr_rlock(ifp); -#endif - TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { - if (ifma->ifma_addr->sa_family != AF_LINK) - continue; - bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), - &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS], - IXGBE_ETH_LENGTH_OF_ADDRESS); - mcnt++; - } -#if __FreeBSD_version < 800000 - IF_ADDR_UNLOCK(ifp); -#else - if_maddr_runlock(ifp); -#endif - - update_ptr = mta; - - ixgbe_update_mc_addr_list(&adapter->hw, - update_ptr, mcnt, ixv_mc_array_itr, TRUE); - - return; -} - -/* - * This is an iterator function now needed by the multicast - * shared code. It simply feeds the shared code routine the - * addresses in the array of ixv_set_multi() one by one. - */ -static u8 * -ixv_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) -{ - u8 *addr = *update_ptr; - u8 *newptr; - *vmdq = 0; - - newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS; - *update_ptr = newptr; - return addr; -} - -/********************************************************************* - * Timer routine - * - * This routine checks for link status,updates statistics, - * and runs the watchdog check. - * - **********************************************************************/ - -static void -ixv_local_timer(void *arg) -{ - struct adapter *adapter = arg; - device_t dev = adapter->dev; - struct ix_queue *que = adapter->queues; - u64 queues = 0; - int hung = 0; - - mtx_assert(&adapter->core_mtx, MA_OWNED); - - ixv_update_link_status(adapter); - - /* Stats Update */ - ixv_update_stats(adapter); - - /* - ** Check the TX queues status - ** - mark hung queues so we don't schedule on them - ** - watchdog only if all queues show hung - */ - for (int i = 0; i < adapter->num_queues; i++, que++) { - /* Keep track of queues with work for soft irq */ - if (que->txr->busy) - queues |= ((u64)1 << que->me); - /* - ** Each time txeof runs without cleaning, but there - ** are uncleaned descriptors it increments busy. If - ** we get to the MAX we declare it hung. - */ - if (que->busy == IXGBE_QUEUE_HUNG) { - ++hung; - /* Mark the queue as inactive */ - adapter->active_queues &= ~((u64)1 << que->me); - continue; - } else { - /* Check if we've come back from hung */ - if ((adapter->active_queues & ((u64)1 << que->me)) == 0) - adapter->active_queues |= ((u64)1 << que->me); - } - if (que->busy >= IXGBE_MAX_TX_BUSY) { - device_printf(dev,"Warning queue %d " - "appears to be hung!\n", i); - que->txr->busy = IXGBE_QUEUE_HUNG; - ++hung; - } - - } - - /* Only truly watchdog if all queues show hung */ - if (hung == adapter->num_queues) - goto watchdog; - else if (queues != 0) { /* Force an IRQ on queues with work */ - ixv_rearm_queues(adapter, queues); - } - - callout_reset(&adapter->timer, hz, ixv_local_timer, adapter); - return; - -watchdog: - device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); - adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - adapter->watchdog_events++; - ixv_init_locked(adapter); -} - -/* -** Note: this routine updates the OS on the link state -** the real check of the hardware only happens with -** a link interrupt. -*/ -static void -ixv_update_link_status(struct adapter *adapter) -{ - struct ifnet *ifp = adapter->ifp; - device_t dev = adapter->dev; - - if (adapter->link_up){ - if (adapter->link_active == FALSE) { - if (bootverbose) - device_printf(dev,"Link is up %d Gbps %s \n", - ((adapter->link_speed == 128)? 10:1), - "Full Duplex"); - adapter->link_active = TRUE; - if_link_state_change(ifp, LINK_STATE_UP); - } - } else { /* Link down */ - if (adapter->link_active == TRUE) { - if (bootverbose) - device_printf(dev,"Link is Down\n"); - if_link_state_change(ifp, LINK_STATE_DOWN); - adapter->link_active = FALSE; - } - } - - return; -} - - -/********************************************************************* - * - * This routine disables all traffic on the adapter by issuing a - * global reset on the MAC and deallocates TX/RX buffers. - * - **********************************************************************/ - -static void -ixv_stop(void *arg) -{ - struct ifnet *ifp; - struct adapter *adapter = arg; - struct ixgbe_hw *hw = &adapter->hw; - ifp = adapter->ifp; - - mtx_assert(&adapter->core_mtx, MA_OWNED); - - INIT_DEBUGOUT("ixv_stop: begin\n"); - ixv_disable_intr(adapter); - - /* Tell the stack that the interface is no longer active */ - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); - - ixgbe_reset_hw(hw); - adapter->hw.adapter_stopped = FALSE; - ixgbe_stop_adapter(hw); - callout_stop(&adapter->timer); - - /* reprogram the RAR[0] in case user changed it. */ - ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); - - return; -} - - -/********************************************************************* - * - * Determine hardware revision. - * - **********************************************************************/ -static void -ixv_identify_hardware(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct ixgbe_hw *hw = &adapter->hw; - - /* - ** Make sure BUSMASTER is set, on a VM under - ** KVM it may not be and will break things. - */ - pci_enable_busmaster(dev); - - /* Save off the information about this board */ - hw->vendor_id = pci_get_vendor(dev); - hw->device_id = pci_get_device(dev); - hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); - hw->subsystem_vendor_id = - pci_read_config(dev, PCIR_SUBVEND_0, 2); - hw->subsystem_device_id = - pci_read_config(dev, PCIR_SUBDEV_0, 2); - - /* We need this to determine device-specific things */ - ixgbe_set_mac_type(hw); - - /* Set the right number of segments */ - adapter->num_segs = IXGBE_82599_SCATTER; - - return; -} - -/********************************************************************* - * - * Setup MSIX Interrupt resources and handlers - * - **********************************************************************/ -static int -ixv_allocate_msix(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct ix_queue *que = adapter->queues; - struct tx_ring *txr = adapter->tx_rings; - int error, rid, vector = 0; - - for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) { - rid = vector + 1; - que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, - RF_SHAREABLE | RF_ACTIVE); - if (que->res == NULL) { - device_printf(dev,"Unable to allocate" - " bus resource: que interrupt [%d]\n", vector); - return (ENXIO); - } - /* Set the handler function */ - error = bus_setup_intr(dev, que->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, - ixv_msix_que, que, &que->tag); - if (error) { - que->res = NULL; - device_printf(dev, "Failed to register QUE handler"); - return (error); - } -#if __FreeBSD_version >= 800504 - bus_describe_intr(dev, que->res, que->tag, "que %d", i); -#endif - que->msix = vector; - adapter->active_queues |= (u64)(1 << que->msix); - /* - ** Bind the msix vector, and thus the - ** ring to the corresponding cpu. - */ - if (adapter->num_queues > 1) - bus_bind_intr(dev, que->res, i); - TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr); - TASK_INIT(&que->que_task, 0, ixv_handle_que, que); - que->tq = taskqueue_create_fast("ixv_que", M_NOWAIT, - taskqueue_thread_enqueue, &que->tq); - taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", - device_get_nameunit(adapter->dev)); - } - - /* and Mailbox */ - rid = vector + 1; - adapter->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); - if (!adapter->res) { - device_printf(dev,"Unable to allocate" - " bus resource: MBX interrupt [%d]\n", rid); - return (ENXIO); - } - /* Set the mbx handler function */ - error = bus_setup_intr(dev, adapter->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, - ixv_msix_mbx, adapter, &adapter->tag); - if (error) { - adapter->res = NULL; - device_printf(dev, "Failed to register LINK handler"); - return (error); - } -#if __FreeBSD_version >= 800504 - bus_describe_intr(dev, adapter->res, adapter->tag, "mbx"); +#ifndef KLD_MODULE +#include "opt_iflib.h" #endif - adapter->vector = vector; - /* Tasklets for Mailbox */ - TASK_INIT(&adapter->link_task, 0, ixv_handle_mbx, adapter); - adapter->tq = taskqueue_create_fast("ixv_mbx", M_NOWAIT, - taskqueue_thread_enqueue, &adapter->tq); - taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s mbxq", - device_get_nameunit(adapter->dev)); - /* - ** Due to a broken design QEMU will fail to properly - ** enable the guest for MSIX unless the vectors in - ** the table are all set up, so we must rewrite the - ** ENABLE in the MSIX control register again at this - ** point to cause it to successfully initialize us. - */ - if (adapter->hw.mac.type == ixgbe_mac_82599_vf) { - int msix_ctrl; - pci_find_cap(dev, PCIY_MSIX, &rid); - rid += PCIR_MSIX_CTRL; - msix_ctrl = pci_read_config(dev, rid, 2); - msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; - pci_write_config(dev, rid, msix_ctrl, 2); - } - - return (0); -} - -/* - * Setup MSIX resources, note that the VF - * device MUST use MSIX, there is no fallback. - */ -static int -ixv_setup_msix(struct adapter *adapter) -{ - device_t dev = adapter->dev; - int rid, want, msgs; - - - /* Must have at least 2 MSIX vectors */ - msgs = pci_msix_count(dev); - if (msgs < 2) - goto out; - rid = PCIR_BAR(3); - adapter->msix_mem = bus_alloc_resource_any(dev, - SYS_RES_MEMORY, &rid, RF_ACTIVE); - if (adapter->msix_mem == NULL) { - device_printf(adapter->dev, - "Unable to map MSIX table \n"); - goto out; - } - - /* - ** Want vectors for the queues, - ** plus an additional for mailbox. - */ - want = adapter->num_queues + 1; - if (want > msgs) { - want = msgs; - adapter->num_queues = msgs - 1; - } else - msgs = want; - if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { - device_printf(adapter->dev, - "Using MSIX interrupts with %d vectors\n", want); - return (want); - } - /* Release in case alloc was insufficient */ - pci_release_msi(dev); -out: - if (adapter->msix_mem != NULL) { - bus_release_resource(dev, SYS_RES_MEMORY, - rid, adapter->msix_mem); - adapter->msix_mem = NULL; - } - device_printf(adapter->dev,"MSIX config error\n"); - return (ENXIO); -} - - -static int -ixv_allocate_pci_resources(struct adapter *adapter) -{ - int rid; - device_t dev = adapter->dev; - - rid = PCIR_BAR(0); - adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, - &rid, RF_ACTIVE); - - if (!(adapter->pci_mem)) { - device_printf(dev, "Unable to allocate bus resource: memory\n"); - return (ENXIO); - } - - adapter->osdep.mem_bus_space_tag = - rman_get_bustag(adapter->pci_mem); - adapter->osdep.mem_bus_space_handle = - rman_get_bushandle(adapter->pci_mem); - adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; - - /* Pick up the tuneable queues */ - adapter->num_queues = ixv_num_queues; - adapter->hw.back = adapter; - - /* - ** Now setup MSI/X, should - ** return us the number of - ** configured vectors. - */ - adapter->msix = ixv_setup_msix(adapter); - if (adapter->msix == ENXIO) - return (ENXIO); - else - return (0); -} - -static void -ixv_free_pci_resources(struct adapter * adapter) -{ - struct ix_queue *que = adapter->queues; - device_t dev = adapter->dev; - int rid, memrid; - - memrid = PCIR_BAR(MSIX_82598_BAR); - - /* - ** There is a slight possibility of a failure mode - ** in attach that will result in entering this function - ** before interrupt resources have been initialized, and - ** in that case we do not want to execute the loops below - ** We can detect this reliably by the state of the adapter - ** res pointer. - */ - if (adapter->res == NULL) - goto mem; - - /* - ** Release all msix queue resources: - */ - for (int i = 0; i < adapter->num_queues; i++, que++) { - rid = que->msix + 1; - if (que->tag != NULL) { - bus_teardown_intr(dev, que->res, que->tag); - que->tag = NULL; - } - if (que->res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); - } - - /* Clean the Legacy or Link interrupt last */ - if (adapter->vector) /* we are doing MSIX */ - rid = adapter->vector + 1; - else - (adapter->msix != 0) ? (rid = 1):(rid = 0); - - if (adapter->tag != NULL) { - bus_teardown_intr(dev, adapter->res, adapter->tag); - adapter->tag = NULL; - } - if (adapter->res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); - -mem: - if (adapter->msix) - pci_release_msi(dev); - - if (adapter->msix_mem != NULL) - bus_release_resource(dev, SYS_RES_MEMORY, - memrid, adapter->msix_mem); - - if (adapter->pci_mem != NULL) - bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(0), adapter->pci_mem); - - return; -} - -/********************************************************************* - * - * Setup networking device structure and register an interface. - * - **********************************************************************/ -static void -ixv_setup_interface(device_t dev, struct adapter *adapter) -{ - struct ifnet *ifp; - - INIT_DEBUGOUT("ixv_setup_interface: begin"); - - ifp = adapter->ifp = if_alloc(IFT_ETHER); - if (ifp == NULL) - panic("%s: can not if_alloc()\n", device_get_nameunit(dev)); - if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - ifp->if_baudrate = 1000000000; - ifp->if_init = ixv_init; - ifp->if_softc = adapter; - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_ioctl = ixv_ioctl; -#if __FreeBSD_version >= 800000 - ifp->if_transmit = ixgbe_mq_start; - ifp->if_qflush = ixgbe_qflush; +#ifdef IFLIB +#include #else - ifp->if_start = ixgbe_start; +#include #endif - ifp->if_snd.ifq_maxlen = adapter->num_tx_desc - 2; - - ether_ifattach(ifp, adapter->hw.mac.addr); - - adapter->max_frame_size = - ifp->if_mtu + IXGBE_MTU_HDR_VLAN; - - /* - * Tell the upper layer(s) we support long frames. - */ - ifp->if_hdrlen = sizeof(struct ether_vlan_header); - - ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_VLAN_HWCSUM; - ifp->if_capabilities |= IFCAP_JUMBO_MTU; - ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING - | IFCAP_VLAN_HWTSO - | IFCAP_VLAN_MTU; - ifp->if_capabilities |= IFCAP_LRO; - ifp->if_capenable = ifp->if_capabilities; - - /* - * Specify the media types supported by this adapter and register - * callbacks to update media and link information - */ - ifmedia_init(&adapter->media, IFM_IMASK, ixv_media_change, - ixv_media_status); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); - ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); - - return; -} - -static void -ixv_config_link(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 autoneg; - - if (hw->mac.ops.check_link) - hw->mac.ops.check_link(hw, &autoneg, - &adapter->link_up, FALSE); -} - - -/********************************************************************* - * - * Enable transmit unit. - * - **********************************************************************/ -static void -ixv_initialize_transmit_units(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; - struct ixgbe_hw *hw = &adapter->hw; - - - for (int i = 0; i < adapter->num_queues; i++, txr++) { - u64 tdba = txr->txdma.dma_paddr; - u32 txctrl, txdctl; - - /* Set WTHRESH to 8, burst writeback */ - txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i)); - txdctl |= (8 << 16); - IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl); - - /* Set the HW Tx Head and Tail indices */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDH(i), 0); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDT(i), 0); - - /* Set Tx Tail register */ - txr->tail = IXGBE_VFTDT(i); - - /* Set Ring parameters */ - IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i), - (tdba & 0x00000000ffffffffULL)); - IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i), (tdba >> 32)); - IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i), - adapter->num_tx_desc * - sizeof(struct ixgbe_legacy_tx_desc)); - txctrl = IXGBE_READ_REG(hw, IXGBE_VFDCA_TXCTRL(i)); - txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; - IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i), txctrl); - - /* Now enable */ - txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i)); - txdctl |= IXGBE_TXDCTL_ENABLE; - IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl); - } - - return; -} - - -/********************************************************************* - * - * Setup receive registers and features. - * - **********************************************************************/ -#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 - -static void -ixv_initialize_receive_units(struct adapter *adapter) -{ - struct rx_ring *rxr = adapter->rx_rings; - struct ixgbe_hw *hw = &adapter->hw; - struct ifnet *ifp = adapter->ifp; - u32 bufsz, rxcsum, psrtype; - - if (ifp->if_mtu > ETHERMTU) - bufsz = 4096 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; - else - bufsz = 2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; - - psrtype = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | - IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR | - IXGBE_PSRTYPE_L2HDR; - - IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype); - - /* Tell PF our max_frame size */ - ixgbevf_rlpml_set_vf(hw, adapter->max_frame_size); - - for (int i = 0; i < adapter->num_queues; i++, rxr++) { - u64 rdba = rxr->rxdma.dma_paddr; - u32 reg, rxdctl; - - /* Disable the queue */ - rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)); - rxdctl &= ~IXGBE_RXDCTL_ENABLE; - IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl); - for (int j = 0; j < 10; j++) { - if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)) & - IXGBE_RXDCTL_ENABLE) - msec_delay(1); - else - break; - } - wmb(); - /* Setup the Base and Length of the Rx Descriptor Ring */ - IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i), - (rdba & 0x00000000ffffffffULL)); - IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i), - (rdba >> 32)); - IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i), - adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc)); - - /* Reset the ring indices */ - IXGBE_WRITE_REG(hw, IXGBE_VFRDH(rxr->me), 0); - IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), 0); - - /* Set up the SRRCTL register */ - reg = IXGBE_READ_REG(hw, IXGBE_VFSRRCTL(i)); - reg &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; - reg &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; - reg |= bufsz; - reg |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; - IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), reg); - - /* Capture Rx Tail index */ - rxr->tail = IXGBE_VFRDT(rxr->me); - - /* Do the queue enabling last */ - rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME; - IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl); - for (int k = 0; k < 10; k++) { - if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)) & - IXGBE_RXDCTL_ENABLE) - break; - else - msec_delay(1); - } - wmb(); - - /* Set the Tail Pointer */ -#ifdef DEV_NETMAP - /* - * In netmap mode, we must preserve the buffers made - * available to userspace before the if_init() - * (this is true by default on the TX side, because - * init makes all buffers available to userspace). - * - * netmap_reset() and the device specific routines - * (e.g. ixgbe_setup_receive_rings()) map these - * buffers at the end of the NIC ring, so here we - * must set the RDT (tail) register to make sure - * they are not overwritten. - * - * In this driver the NIC ring starts at RDH = 0, - * RDT points to the last slot available for reception (?), - * so RDT = num_rx_desc - 1 means the whole ring is available. - */ - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(adapter->ifp); - struct netmap_kring *kring = &na->rx_rings[i]; - int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); - - IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), t); - } else -#endif /* DEV_NETMAP */ - IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), - adapter->num_rx_desc - 1); - } - - rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); - - if (ifp->if_capenable & IFCAP_RXCSUM) - rxcsum |= IXGBE_RXCSUM_PCSD; - - if (!(rxcsum & IXGBE_RXCSUM_PCSD)) - rxcsum |= IXGBE_RXCSUM_IPPCSE; - - IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); - - return; -} - -static void -ixv_setup_vlan_support(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 ctrl, vid, vfta, retry; - struct rx_ring *rxr; - - /* - ** We get here thru init_locked, meaning - ** a soft reset, this has already cleared - ** the VFTA and other state, so if there - ** have been no vlan's registered do nothing. - */ - if (adapter->num_vlans == 0) - return; - - /* Enable the queues */ - for (int i = 0; i < adapter->num_queues; i++) { - ctrl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)); - ctrl |= IXGBE_RXDCTL_VME; - IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), ctrl); - /* - * Let Rx path know that it needs to store VLAN tag - * as part of extra mbuf info. - */ - rxr = &adapter->rx_rings[i]; - rxr->vtag_strip = TRUE; - } - - /* - ** A soft reset zero's out the VFTA, so - ** we need to repopulate it now. - */ - for (int i = 0; i < IXGBE_VFTA_SIZE; i++) { - if (ixv_shadow_vfta[i] == 0) - continue; - vfta = ixv_shadow_vfta[i]; - /* - ** Reconstruct the vlan id's - ** based on the bits set in each - ** of the array ints. - */ - for (int j = 0; j < 32; j++) { - retry = 0; - if ((vfta & (1 << j)) == 0) - continue; - vid = (i * 32) + j; - /* Call the shared code mailbox routine */ - while (ixgbe_set_vfta(hw, vid, 0, TRUE)) { - if (++retry > 5) - break; - } - } - } -} - -/* -** This routine is run via an vlan config EVENT, -** it enables us to use the HW Filter table since -** we can get the vlan id. This just creates the -** entry in the soft version of the VFTA, init will -** repopulate the real table. -*/ -static void -ixv_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) -{ - struct adapter *adapter = ifp->if_softc; - u16 index, bit; - - if (ifp->if_softc != arg) /* Not our event */ - return; - - if ((vtag == 0) || (vtag > 4095)) /* Invalid */ - return; - - IXGBE_CORE_LOCK(adapter); - index = (vtag >> 5) & 0x7F; - bit = vtag & 0x1F; - ixv_shadow_vfta[index] |= (1 << bit); - ++adapter->num_vlans; - /* Re-init to load the changes */ - ixv_init_locked(adapter); - IXGBE_CORE_UNLOCK(adapter); -} - -/* -** This routine is run via an vlan -** unconfig EVENT, remove our entry -** in the soft vfta. -*/ -static void -ixv_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) -{ - struct adapter *adapter = ifp->if_softc; - u16 index, bit; - - if (ifp->if_softc != arg) - return; - - if ((vtag == 0) || (vtag > 4095)) /* Invalid */ - return; - - IXGBE_CORE_LOCK(adapter); - index = (vtag >> 5) & 0x7F; - bit = vtag & 0x1F; - ixv_shadow_vfta[index] &= ~(1 << bit); - --adapter->num_vlans; - /* Re-init to load the changes */ - ixv_init_locked(adapter); - IXGBE_CORE_UNLOCK(adapter); -} - -static void -ixv_enable_intr(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - struct ix_queue *que = adapter->queues; - u32 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); - - - IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); - - mask = IXGBE_EIMS_ENABLE_MASK; - mask &= ~(IXGBE_EIMS_OTHER | IXGBE_EIMS_LSC); - IXGBE_WRITE_REG(hw, IXGBE_VTEIAC, mask); - - for (int i = 0; i < adapter->num_queues; i++, que++) - ixv_enable_queue(adapter, que->msix); - - IXGBE_WRITE_FLUSH(hw); - - return; -} - -static void -ixv_disable_intr(struct adapter *adapter) -{ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEIAC, 0); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEIMC, ~0); - IXGBE_WRITE_FLUSH(&adapter->hw); - return; -} - -/* -** Setup the correct IVAR register for a particular MSIX interrupt -** - entry is the register array entry -** - vector is the MSIX vector for this queue -** - type is RX/TX/MISC -*/ -static void -ixv_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 ivar, index; - - vector |= IXGBE_IVAR_ALLOC_VAL; - - if (type == -1) { /* MISC IVAR */ - ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR_MISC); - ivar &= ~0xFF; - ivar |= vector; - IXGBE_WRITE_REG(hw, IXGBE_VTIVAR_MISC, ivar); - } else { /* RX/TX IVARS */ - index = (16 * (entry & 1)) + (8 * type); - ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR(entry >> 1)); - ivar &= ~(0xFF << index); - ivar |= (vector << index); - IXGBE_WRITE_REG(hw, IXGBE_VTIVAR(entry >> 1), ivar); - } -} - -static void -ixv_configure_ivars(struct adapter *adapter) -{ - struct ix_queue *que = adapter->queues; - - for (int i = 0; i < adapter->num_queues; i++, que++) { - /* First the RX queue entry */ - ixv_set_ivar(adapter, i, que->msix, 0); - /* ... and the TX */ - ixv_set_ivar(adapter, i, que->msix, 1); - /* Set an initial value in EITR */ - IXGBE_WRITE_REG(&adapter->hw, - IXGBE_VTEITR(que->msix), IXV_EITR_DEFAULT); - } - - /* For the mailbox interrupt */ - ixv_set_ivar(adapter, 1, adapter->vector, -1); -} - - -/* -** Tasklet handler for MSIX MBX interrupts -** - do outside interrupt since it might sleep -*/ -static void -ixv_handle_mbx(void *context, int pending) -{ - struct adapter *adapter = context; - - ixgbe_check_link(&adapter->hw, - &adapter->link_speed, &adapter->link_up, 0); - ixv_update_link_status(adapter); -} - -/* -** The VF stats registers never have a truly virgin -** starting point, so this routine tries to make an -** artificial one, marking ground zero on attach as -** it were. -*/ -static void -ixv_save_stats(struct adapter *adapter) -{ - if (adapter->stats.vf.vfgprc || adapter->stats.vf.vfgptc) { - adapter->stats.vf.saved_reset_vfgprc += - adapter->stats.vf.vfgprc - adapter->stats.vf.base_vfgprc; - adapter->stats.vf.saved_reset_vfgptc += - adapter->stats.vf.vfgptc - adapter->stats.vf.base_vfgptc; - adapter->stats.vf.saved_reset_vfgorc += - adapter->stats.vf.vfgorc - adapter->stats.vf.base_vfgorc; - adapter->stats.vf.saved_reset_vfgotc += - adapter->stats.vf.vfgotc - adapter->stats.vf.base_vfgotc; - adapter->stats.vf.saved_reset_vfmprc += - adapter->stats.vf.vfmprc - adapter->stats.vf.base_vfmprc; - } -} - -static void -ixv_init_stats(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - - adapter->stats.vf.last_vfgprc = IXGBE_READ_REG(hw, IXGBE_VFGPRC); - adapter->stats.vf.last_vfgorc = IXGBE_READ_REG(hw, IXGBE_VFGORC_LSB); - adapter->stats.vf.last_vfgorc |= - (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGORC_MSB))) << 32); - - adapter->stats.vf.last_vfgptc = IXGBE_READ_REG(hw, IXGBE_VFGPTC); - adapter->stats.vf.last_vfgotc = IXGBE_READ_REG(hw, IXGBE_VFGOTC_LSB); - adapter->stats.vf.last_vfgotc |= - (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGOTC_MSB))) << 32); - - adapter->stats.vf.last_vfmprc = IXGBE_READ_REG(hw, IXGBE_VFMPRC); - - adapter->stats.vf.base_vfgprc = adapter->stats.vf.last_vfgprc; - adapter->stats.vf.base_vfgorc = adapter->stats.vf.last_vfgorc; - adapter->stats.vf.base_vfgptc = adapter->stats.vf.last_vfgptc; - adapter->stats.vf.base_vfgotc = adapter->stats.vf.last_vfgotc; - adapter->stats.vf.base_vfmprc = adapter->stats.vf.last_vfmprc; -} - -#define UPDATE_STAT_32(reg, last, count) \ -{ \ - u32 current = IXGBE_READ_REG(hw, reg); \ - if (current < last) \ - count += 0x100000000LL; \ - last = current; \ - count &= 0xFFFFFFFF00000000LL; \ - count |= current; \ -} - -#define UPDATE_STAT_36(lsb, msb, last, count) \ -{ \ - u64 cur_lsb = IXGBE_READ_REG(hw, lsb); \ - u64 cur_msb = IXGBE_READ_REG(hw, msb); \ - u64 current = ((cur_msb << 32) | cur_lsb); \ - if (current < last) \ - count += 0x1000000000LL; \ - last = current; \ - count &= 0xFFFFFFF000000000LL; \ - count |= current; \ -} - -/* -** ixv_update_stats - Update the board statistics counters. -*/ -void -ixv_update_stats(struct adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - - UPDATE_STAT_32(IXGBE_VFGPRC, adapter->stats.vf.last_vfgprc, - adapter->stats.vf.vfgprc); - UPDATE_STAT_32(IXGBE_VFGPTC, adapter->stats.vf.last_vfgptc, - adapter->stats.vf.vfgptc); - UPDATE_STAT_36(IXGBE_VFGORC_LSB, IXGBE_VFGORC_MSB, - adapter->stats.vf.last_vfgorc, adapter->stats.vf.vfgorc); - UPDATE_STAT_36(IXGBE_VFGOTC_LSB, IXGBE_VFGOTC_MSB, - adapter->stats.vf.last_vfgotc, adapter->stats.vf.vfgotc); - UPDATE_STAT_32(IXGBE_VFMPRC, adapter->stats.vf.last_vfmprc, - adapter->stats.vf.vfmprc); -} - -/* - * Add statistic sysctls for the VF. - */ -static void -ixv_add_stats_sysctls(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct ix_queue *que = &adapter->queues[0]; - struct tx_ring *txr = que->txr; - struct rx_ring *rxr = que->rxr; - - struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); - struct sysctl_oid *tree = device_get_sysctl_tree(dev); - struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); - struct ixgbevf_hw_stats *stats = &adapter->stats.vf; - - struct sysctl_oid *stat_node, *queue_node; - struct sysctl_oid_list *stat_list, *queue_list; - - /* Driver Statistics */ - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", - CTLFLAG_RD, &adapter->dropped_pkts, - "Driver dropped packets"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed", - CTLFLAG_RD, &adapter->mbuf_defrag_failed, - "m_defrag() failed"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", - CTLFLAG_RD, &adapter->watchdog_events, - "Watchdog timeouts"); - - stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac", - CTLFLAG_RD, NULL, - "VF Statistics (read from HW registers)"); - stat_list = SYSCTL_CHILDREN(stat_node); - - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd", - CTLFLAG_RD, &stats->vfgprc, - "Good Packets Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd", - CTLFLAG_RD, &stats->vfgorc, - "Good Octets Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd", - CTLFLAG_RD, &stats->vfmprc, - "Multicast Packets Received"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", - CTLFLAG_RD, &stats->vfgptc, - "Good Packets Transmitted"); - SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", - CTLFLAG_RD, &stats->vfgotc, - "Good Octets Transmitted"); - - queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "que", - CTLFLAG_RD, NULL, - "Queue Statistics (collected by SW)"); - queue_list = SYSCTL_CHILDREN(queue_node); - - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", - CTLFLAG_RD, &(que->irqs), - "IRQs on queue"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_irqs", - CTLFLAG_RD, &(rxr->rx_irq), - "RX irqs on queue"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", - CTLFLAG_RD, &(rxr->rx_packets), - "RX packets"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", - CTLFLAG_RD, &(rxr->rx_bytes), - "RX bytes"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_discarded", - CTLFLAG_RD, &(rxr->rx_discarded), - "Discarded RX packets"); - - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", - CTLFLAG_RD, &(txr->total_packets), - "TX Packets"); - - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_no_desc", - CTLFLAG_RD, &(txr->no_desc_avail), - "# of times not enough descriptors were available during TX"); -} - -static void -ixv_set_sysctl_value(struct adapter *adapter, const char *name, - const char *description, int *limit, int value) -{ - *limit = value; - SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), - OID_AUTO, name, CTLFLAG_RW, limit, value, description); -} - -/********************************************************************** - * - * This routine is called only when em_display_debug_stats is enabled. - * This routine provides a way to take a look at important statistics - * maintained by the driver and hardware. - * - **********************************************************************/ -static void -ixv_print_debug_info(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct ixgbe_hw *hw = &adapter->hw; - struct ix_queue *que = adapter->queues; - struct rx_ring *rxr; - struct tx_ring *txr; - struct lro_ctrl *lro; - - device_printf(dev,"Error Byte Count = %u \n", - IXGBE_READ_REG(hw, IXGBE_ERRBC)); - - for (int i = 0; i < adapter->num_queues; i++, que++) { - txr = que->txr; - rxr = que->rxr; - lro = &rxr->lro; - device_printf(dev,"QUE(%d) IRQs Handled: %lu\n", - que->msix, (long)que->irqs); - device_printf(dev,"RX(%d) Packets Received: %lld\n", - rxr->me, (long long)rxr->rx_packets); - device_printf(dev,"RX(%d) Bytes Received: %lu\n", - rxr->me, (long)rxr->rx_bytes); - device_printf(dev,"RX(%d) LRO Queued= %lld\n", - rxr->me, (long long)lro->lro_queued); - device_printf(dev,"RX(%d) LRO Flushed= %lld\n", - rxr->me, (long long)lro->lro_flushed); - device_printf(dev,"TX(%d) Packets Sent: %lu\n", - txr->me, (long)txr->total_packets); - device_printf(dev,"TX(%d) NO Desc Avail: %lu\n", - txr->me, (long)txr->no_desc_avail); - } - - device_printf(dev,"MBX IRQ Handled: %lu\n", - (long)adapter->link_irq); - return; -} - -static int -ixv_sysctl_debug(SYSCTL_HANDLER_ARGS) -{ - int error, result; - struct adapter *adapter; - - result = -1; - error = sysctl_handle_int(oidp, &result, 0, req); - - if (error || !req->newptr) - return (error); - - if (result == 1) { - adapter = (struct adapter *) arg1; - ixv_print_debug_info(adapter); - } - return error; -} - Index: sys/dev/ixgbe/iflib_if_ix.c =================================================================== --- /dev/null +++ sys/dev/ixgbe/iflib_if_ix.c @@ -0,0 +1,5350 @@ +/****************************************************************************** + + Copyright (c) 2001-2015, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ +/* $FreeBSD$*/ + + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_rss.h" + +#include "ixgbe.h" +#include "ifdi_if.h" + +#include +#include + +#ifdef RSS +#include +#include +#endif /* RSS */ + +/********************************************************************* + * Driver version + *********************************************************************/ +char ixgbe_driver_version[] = "3.1.13-k"; + +/********************************************************************* + * PCI Device ID Table + * + * Used by probe to select devices to load on + * Last field stores an index into ixgbe_strings + * Last entry must be all 0s + * + * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } + *********************************************************************/ +static pci_vendor_info_t ixgbe_vendor_info_array[] = +{ + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_QSFP_SF_QP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T1, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KR, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KX4, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_10G_T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_SFP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + + /* required last entry */ + PVID_END +}; + +/********************************************************************* + * Function prototypes + *********************************************************************/ +static void *ixgbe_register(device_t dev); +static int ixgbe_if_attach_pre(if_ctx_t ctx); +static int ixgbe_if_attach_post(if_ctx_t ctx); +static int ixgbe_if_detach(if_ctx_t ctx); +static int ixgbe_if_shutdown(if_ctx_t ctx); +static int ixgbe_if_suspend(if_ctx_t ctx); +static int ixgbe_if_resume(if_ctx_t ctx); + +static void ixgbe_if_stop(if_ctx_t ctx); +static void ixgbe_if_init(if_ctx_t ctx); +void ixgbe_if_enable_intr(if_ctx_t ctx); +static void ixgbe_if_disable_intr(if_ctx_t ctx); +static int ixgbe_if_queue_intr_enable(if_ctx_t ctx, uint16_t qid); +static void ixgbe_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr); +static int ixgbe_if_media_change(if_ctx_t ctx); +static int ixgbe_if_msix_intr_assign(if_ctx_t, int); +static int ixgbe_if_mtu_set(if_ctx_t ctx, uint32_t mtu); +static void ixgbe_if_crcstrip_set(if_ctx_t ctx, int onoff); +static void ixgbe_if_multi_set(if_ctx_t ctx); +static int ixgbe_if_promisc_set(if_ctx_t ctx, int flags); +static int ixgbe_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets); +static int ixgbe_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets); +static void ixgbe_if_queues_free(if_ctx_t ctx); +static void ixgbe_if_timer(if_ctx_t ctx, uint16_t); +static void ixgbe_if_update_admin_status(if_ctx_t ctx); +static void ixgbe_if_vlan_register(if_ctx_t ctx, u16 vtag); +static void ixgbe_if_vlan_unregister(if_ctx_t ctx, u16 vtag); + +int ixgbe_intr(void *arg); + +#if __FreeBSD_version >= 1100036 +static uint64_t ixgbe_if_get_counter(if_ctx_t, ift_counter); +#endif + +static void ixgbe_enable_queue(struct adapter *adapter, u32 vector); +static void ixgbe_disable_queue(struct adapter *adapter, u32 vector); +static void ixgbe_add_device_sysctls(if_ctx_t ctx); +static int ixgbe_allocate_pci_resources(if_ctx_t ctx); +static int ixgbe_setup_low_power_mode(if_ctx_t ctx); + +static void ixgbe_config_dmac(struct adapter *adapter); +static void ixgbe_configure_ivars(struct adapter *adapter); +static void ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type); +static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); +static bool ixgbe_sfp_probe(if_ctx_t ctx); + +static void ixgbe_identify_hardware(if_ctx_t ctx); +static void ixgbe_free_pci_resources(if_ctx_t ctx); + +static int ixgbe_msix_link(void *arg); +static int ixgbe_msix_que(void *arg); +static void ixgbe_initialize_rss_mapping(struct adapter *adapter); +static void ixgbe_initialize_receive_units(if_ctx_t ctx); +static void ixgbe_initialize_transmit_units(if_ctx_t ctx); + +static int ixgbe_interface_setup(if_ctx_t ctx); +static void ixgbe_add_media_types(if_ctx_t ctx); +static void ixgbe_update_stats_counters(struct adapter *adapter); +static void ixgbe_config_link(struct adapter *adapter); +static void ixgbe_get_slot_info(struct adapter *); +static void ixgbe_check_wol_support(struct adapter *adapter); +static void ixgbe_enable_rx_drop(struct adapter *); +static void ixgbe_disable_rx_drop(struct adapter *); + +static void ixgbe_add_hw_stats(struct adapter *adapter); +static int ixgbe_set_flowcntl(struct adapter *, int); +static int ixgbe_set_advertise(struct adapter *, int); +static void ixgbe_setup_vlan_hw_support(if_ctx_t ctx); +static void ixgbe_setup_optics(struct adapter *adapter); +static void ixgbe_config_gpie(struct adapter *adapter); +static void ixgbe_config_delay_values(struct adapter *adapter); + +/* Sysctl handlers */ +static void ixgbe_set_sysctl_value(struct adapter *adapter, const char *name, + const char *description, int *limit, int value); +static int ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_thermal_test(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS); +#ifdef IXGBE_DEBUG +static int ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS); +#endif +static int ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_eee_enable(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_eee_negotiated(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_eee_rx_lpi_status(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_eee_tx_lpi_status(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_eee_tx_lpi_delay(SYSCTL_HANDLER_ARGS); + +/* Support for pluggable optic modules */ +static void ixgbe_setup_optics(struct adapter *); + +/* Deferred interrupt tasklets */ +static void ixgbe_handle_msf(void *); +static void ixgbe_handle_mod(void *); +static void ixgbe_handle_phy(void *); + +#ifdef IXGBE_FDIR +static void ixgbe_reinit_fdir(void *, int); +#endif + +#ifdef PCI_IOV +static void ixgbe_ping_all_vfs(struct adapter *); +static void ixgbe_handle_mbx(void *); +static int ixgbe_init_iov(device_t, u16, const nvlist_t *); +static void ixgbe_uninit_iov(device_t); +static int ixgbe_add_vf(device_t, u16, const nvlist_t *); +static void ixgbe_initialize_iov(struct adapter *); +static void ixgbe_init_vf(struct adapter *, struct ixgbe_vf *); +#if 0 +static void ixgbe_recalculate_max_frame(struct adapter *); +#endif +#endif /* PCI_IOV */ + +/********************************************************************** + * FreeBSD Device Interface Entry Points + *********************************************************************/ +static device_method_t ixgbe_methods[] = { + /* Device interface */ + DEVMETHOD(device_register, ixgbe_register), + DEVMETHOD(device_probe, iflib_device_probe), + DEVMETHOD(device_attach, iflib_device_attach), + DEVMETHOD(device_detach, iflib_device_detach), + DEVMETHOD(device_shutdown, iflib_device_shutdown), + DEVMETHOD(device_suspend, iflib_device_suspend), + DEVMETHOD(device_resume, iflib_device_resume), +#ifdef PCI_IOV + DEVMETHOD(pci_iov_init, ixgbe_init_iov), + DEVMETHOD(pci_iov_uninit, ixgbe_uninit_iov), + DEVMETHOD(pci_iov_add_vf, ixgbe_add_vf), +#endif /* PCI_IOV */ + DEVMETHOD_END +}; + +static driver_t ixgbe_driver = { + "ix", ixgbe_methods, sizeof(struct adapter), +}; + +devclass_t ix_devclass; +DRIVER_MODULE(ix, pci, ixgbe_driver, ix_devclass, 0, 0); + +MODULE_DEPEND(ix, pci, 1, 1, 1); +MODULE_DEPEND(ix, ether, 1, 1, 1); +MODULE_DEPEND(ix, iflib, 1, 1, 1); + +#ifdef DEV_NETMAP +MODULE_DEPEND(ix, netmap, 1, 1, 1); +#endif /* DEV_NETMAP */ + +static device_method_t ixgbe_if_methods[] = { + DEVMETHOD(ifdi_attach_pre, ixgbe_if_attach_pre), + DEVMETHOD(ifdi_attach_post, ixgbe_if_attach_post), + DEVMETHOD(ifdi_detach, ixgbe_if_detach), + DEVMETHOD(ifdi_shutdown, ixgbe_if_shutdown), + DEVMETHOD(ifdi_suspend, ixgbe_if_suspend), + DEVMETHOD(ifdi_resume, ixgbe_if_resume), + DEVMETHOD(ifdi_init, ixgbe_if_init), + DEVMETHOD(ifdi_stop, ixgbe_if_stop), + DEVMETHOD(ifdi_msix_intr_assign, ixgbe_if_msix_intr_assign), + DEVMETHOD(ifdi_intr_enable, ixgbe_if_enable_intr), + DEVMETHOD(ifdi_intr_disable, ixgbe_if_disable_intr), + DEVMETHOD(ifdi_queue_intr_enable, ixgbe_if_queue_intr_enable), + DEVMETHOD(ifdi_tx_queues_alloc, ixgbe_if_tx_queues_alloc), + DEVMETHOD(ifdi_rx_queues_alloc, ixgbe_if_rx_queues_alloc), + DEVMETHOD(ifdi_queues_free, ixgbe_if_queues_free), + DEVMETHOD(ifdi_update_admin_status, ixgbe_if_update_admin_status), + DEVMETHOD(ifdi_multi_set, ixgbe_if_multi_set), + DEVMETHOD(ifdi_mtu_set, ixgbe_if_mtu_set), + DEVMETHOD(ifdi_crcstrip_set, ixgbe_if_crcstrip_set), + DEVMETHOD(ifdi_media_status, ixgbe_if_media_status), + DEVMETHOD(ifdi_media_change, ixgbe_if_media_change), + DEVMETHOD(ifdi_promisc_set, ixgbe_if_promisc_set), + DEVMETHOD(ifdi_timer, ixgbe_if_timer), + DEVMETHOD(ifdi_vlan_register, ixgbe_if_vlan_register), + DEVMETHOD(ifdi_vlan_unregister, ixgbe_if_vlan_unregister), + DEVMETHOD(ifdi_get_counter, ixgbe_if_get_counter), + DEVMETHOD_END +}; + +/* + * note that if (adapter->msix_mem) is replaced by: + * if (adapter->intr_type == IFLIB_INTR_MSIX) + */ + +static driver_t ixgbe_if_driver = { + "ixgbe_if", ixgbe_if_methods, sizeof(struct adapter) +}; + +/* +** TUNEABLE PARAMETERS: +*/ + +static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0, + "IXGBE driver parameters"); + +/* +** AIM: Adaptive Interrupt Moderation +** which means that the interrupt rate +** is varied over time based on the +** traffic for that interrupt vector +*/ +static int ixgbe_enable_aim = TRUE; +SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &ixgbe_enable_aim, 0, + "Enable adaptive interrupt moderation"); + +static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY); +SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, + &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second"); + +/* How many packets rxeof tries to clean at a time */ +static int ixgbe_rx_process_limit = 256; +SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, + &ixgbe_rx_process_limit, 0, + "Maximum number of received packets to process at a time," + "-1 means unlimited"); + +/* How many packets txeof tries to clean at a time */ +static int ixgbe_tx_process_limit = 256; +SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN, + &ixgbe_tx_process_limit, 0, + "Maximum number of sent packets to process at a time," + "-1 means unlimited"); + +/* Flow control setting, default to full */ +static int ixgbe_flow_control = ixgbe_fc_full; +SYSCTL_INT(_hw_ix, OID_AUTO, flow_control, CTLFLAG_RDTUN, + &ixgbe_flow_control, 0, "Default flow control used for all adapters"); + +/* Advertise Speed, default to 0 (auto) */ +static int ixgbe_advertise_speed = 0; +SYSCTL_INT(_hw_ix, OID_AUTO, advertise_speed, CTLFLAG_RDTUN, + &ixgbe_advertise_speed, 0, "Default advertised speed for all adapters"); + +/* +** Smart speed setting, default to on +** this only works as a compile option +** right now as its during attach, set +** this to 'ixgbe_smart_speed_off' to +** disable. +*/ +static int ixgbe_smart_speed = ixgbe_smart_speed_on; + +/* + * MSIX should be the default for best performance, + * but this allows it to be forced off for testing. + */ +static int ixgbe_enable_msix = 1; +SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0, + "Enable MSI-X interrupts"); + +/* + * Number of Queues, can be set to 0, + * it then autoconfigures based on the + * number of cpus with a max of 8. This + * can be overridden manually here. + */ +#if 0 +#endif + +/* +** Number of TX descriptors per ring, +** setting higher than RX as this seems +** the better performing choice. +*/ +static int ixgbe_txd = PERFORM_TXD; +SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0, + "Number of transmit descriptors per queue"); + +/* Number of RX descriptors per ring */ +static int ixgbe_rxd = PERFORM_RXD; +SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0, + "Number of receive descriptors per queue"); + + +#ifdef DEV_NETMAP +/* ix_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. + * During regular operations the CRC is stripped, but on some + * hardware reception of frames not multiple of 64 is slower, + * so using crcstrip=0 helps in benchmarks. + */ +SYSCTL_DECL(_dev_netmap); +int ix_crcstrip; +SYSCTL_INT(_dev_netmap, OID_AUTO, ix_crcstrip, + CTLFLAG_RW, &ix_crcstrip, 0, "strip CRC on rx frames"); +#endif /* DEV_NETMAP */ + +/* +** Defining this on will allow the use +** of unsupported SFP+ modules, note that +** doing so you are on your own :) +*/ +static int allow_unsupported_sfp = FALSE; +TUNABLE_INT("hw.ix.unsupported_sfp", &allow_unsupported_sfp); + +#if 0 +/* Keep running tab on them for sanity check */ +static int ixgbe_total_ports; +#endif + +#ifdef IXGBE_FDIR +/* +** Flow Director actually 'steals' +** part of the packet buffer as its +** filter pool, this variable controls +** how much it uses: +** 0 = 64K, 1 = 128K, 2 = 256K +*/ +static int fdir_pballoc = 1; +#endif + +static MALLOC_DEFINE(M_IXGBE, "ix", "ix driver allocations"); + +extern struct if_txrx ixgbe_txrx; + +static struct if_shared_ctx ixgbe_sctx_init = { + .isc_magic = IFLIB_MAGIC, + .isc_q_align = PAGE_SIZE,/* max(DBA_ALIGN, PAGE_SIZE) */ + .isc_tx_maxsize = IXGBE_TSO_SIZE, + + .isc_tx_maxsegsize = PAGE_SIZE, + + .isc_rx_maxsize = PAGE_SIZE*4, + .isc_rx_nsegments = 1, + .isc_rx_maxsegsize = PAGE_SIZE*4, + .isc_nfl = 1, + .isc_ntxqs = 1, + .isc_nrxqs = 1, + + .isc_admin_intrcnt = 1, + .isc_vendor_info = ixgbe_vendor_info_array, + .isc_driver_version = ixgbe_driver_version, + .isc_txrx = &ixgbe_txrx, + .isc_driver = &ixgbe_if_driver, + + .isc_nrxd_min = {MIN_RXD}, + .isc_ntxd_min = {MIN_TXD}, + .isc_nrxd_max = {MAX_RXD}, + .isc_ntxd_max = {MAX_TXD}, + .isc_nrxd_default = {DEFAULT_RXD}, + .isc_ntxd_default = {DEFAULT_TXD}, +}; + +if_shared_ctx_t ixgbe_sctx = &ixgbe_sctx_init; + +static int +ixgbe_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) +{ + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx = adapter->shared; + struct ix_tx_queue *que; + int i, j, error; +#ifdef PCI_IOV + enum ixgbe_iov_mode mode; +#endif + + MPASS(adapter->num_tx_queues > 0); + MPASS(adapter->num_tx_queues == ntxqsets); + MPASS(ntxqs == 1); + + /* Allocate queue structure memory */ + if (!(adapter->tx_queues = + (struct ix_tx_queue *) malloc(sizeof(struct ix_tx_queue) * + ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); + return (ENOMEM); + } + +#ifdef PCI_IOV + mode = ixgbe_get_iov_mode(adapter); + adapter->pool = ixgbe_max_vfs(mode); +#else + adapter->pool = 0; +#endif + + for (i = 0, que = adapter->tx_queues; i < ntxqsets; i++, que++) { + struct tx_ring *txr = &que->txr; + + if (!(txr->tx_buffers = (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) * scctx->isc_ntxd[0], M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(iflib_get_dev(ctx), "failed to allocate tx_buffer memory\n"); + error = ENOMEM; + goto fail; + } +#ifdef PCI_IOV + txr->me = ixgbe_pf_que_index(mode, i); +#else + txr->me = i; +#endif + + txr->adapter = que->adapter = adapter; + adapter->active_queues |= (u64)1 << txr->me; + + /* get the virtual and physical address of the hardware queues */ + txr->tail = IXGBE_TDT(txr->me); + txr->tx_base = (union ixgbe_adv_tx_desc *)vaddrs[i]; + txr->tx_paddr = paddrs[i]; + + txr->que = que; + for (j = 0; j < scctx->isc_ntxd[0]; j++) { + txr->tx_buffers[j].eop = -1; + } + txr->bytes = 0; + txr->total_packets = 0; + +#ifdef IXGBE_FDIR + /* Set the rate at which we sample packets */ + if (adapter->hw.mac.type != ixgbe_mac_82598EB) + txr->atr_sample = atr_sample_rate; +#endif + + } + + iflib_config_gtask_init(ctx, &adapter->mod_task, ixgbe_handle_mod, "mod_task"); + iflib_config_gtask_init(ctx, &adapter->msf_task, ixgbe_handle_msf, "msf_task"); + iflib_config_gtask_init(ctx, &adapter->phy_task, ixgbe_handle_phy, "phy_task"); +#ifdef PCI_IOV + iflib_config_gtask_init(ctx, &adapter->mbx_task, ixgbe_handle_mbx, "mbx_task"); +#endif + + device_printf(iflib_get_dev(ctx), "allocated for %d queues\n", adapter->num_tx_queues); + return (0); + + fail: + ixgbe_if_queues_free(ctx); + return (error); +} + +static int +ixgbe_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_rx_queue *que; + int i; +#ifdef PCI_IOV + enum ixgbe_iov_mode mode; +#endif + + MPASS(adapter->num_rx_queues > 0); + MPASS(adapter->num_rx_queues == nrxqsets); + MPASS(nrxqs == 1); + + /* Allocate queue structure memory */ + if (!(adapter->rx_queues = + (struct ix_rx_queue *) malloc(sizeof(struct ix_rx_queue)*nrxqsets , M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); + return (ENOMEM); + } + +#ifdef PCI_IOV + mode = ixgbe_get_iov_mode(adapter); + adapter->pool = ixgbe_max_vfs(mode); +#else + adapter->pool = 0; +#endif + + for (i = 0, que = adapter->rx_queues; i < nrxqsets; i++, que++) { + struct rx_ring *rxr = &que->rxr; + +#ifdef PCI_IOV + rxr->me = ixgbe_pf_que_index(mode, i); +#else + rxr->me = i; +#endif + + rxr->adapter = que->adapter = adapter; + + /* get the virtual and physical address of the hardware queues */ + + rxr->tail = IXGBE_RDT(rxr->me); + rxr->rx_base = (union ixgbe_adv_rx_desc *)vaddrs[i]; + rxr->rx_paddr = paddrs[i]; + rxr->bytes = 0; + rxr->que = que; + } + + device_printf(iflib_get_dev(ctx), "allocated for %d rx queues\n", adapter->num_rx_queues); + return (0); +} + +static void +ixgbe_if_queues_free(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_tx_queue *tx_que = adapter->tx_queues; + struct ix_rx_queue *rx_que = adapter->rx_queues; + int i; + + if (tx_que == NULL && rx_que == NULL) + return; + + for (i = 0; i < adapter->num_tx_queues; i++, tx_que++) { + struct tx_ring *txr = &tx_que->txr; + if (txr->tx_buffers == NULL) + break; + + free(txr->tx_buffers, M_DEVBUF); + txr->tx_buffers = NULL; + } + + free(adapter->tx_queues, M_DEVBUF); + free(adapter->rx_queues, M_DEVBUF); + adapter->rx_queues = NULL; + adapter->tx_queues = NULL; +} + +static void +ixgbe_initialize_rss_mapping(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 reta = 0, mrqc, rss_key[10]; + int queue_id, table_size, index_mult; +#ifdef RSS + u32 rss_hash_config; +#endif +#ifdef PCI_IOV + enum ixgbe_iov_mode mode; +#endif + +#ifdef RSS + /* Fetch the configured RSS key */ + rss_getkey((uint8_t *) &rss_key); +#else + /* set up random bits */ + arc4rand(&rss_key, sizeof(rss_key), 0); +#endif + + /* Set multiplier for RETA setup and table size based on MAC */ + index_mult = 0x1; + table_size = 128; + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + index_mult = 0x11; + break; + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + table_size = 512; + break; + default: + break; + } + + /* Set up the redirection table */ + for (int i = 0, j = 0; i < table_size; i++, j++) { + if (j == adapter->num_rx_queues) j = 0; +#ifdef RSS + /* + * Fetch the RSS bucket id for the given indirection entry. + * Cap it at the number of configured buckets (which is + * num_queues.) + */ + queue_id = rss_get_indirection_to_bucket(i); + queue_id = queue_id % adapter->num_rx_queues; +#else + queue_id = (j * index_mult); +#endif + /* + * The low 8 bits are for hash value (n+0); + * The next 8 bits are for hash value (n+1), etc. + */ + reta = reta >> 8; + reta = reta | ( ((uint32_t) queue_id) << 24); + if ((i & 3) == 3) { + if (i < 128) + IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); + else + IXGBE_WRITE_REG(hw, IXGBE_ERETA((i >> 2) - 32), reta); + reta = 0; + } + } + + /* Now fill our hash function seeds */ + for (int i = 0; i < 10; i++) + IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]); + + /* Perform hash on these packet types */ +#ifdef RSS + mrqc = IXGBE_MRQC_RSSEN; + rss_hash_config = rss_gethashconfig(); + if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4; + if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP; + if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6; + if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP; + if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX; + if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP; + if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP; + if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4_EX) + device_printf(adapter->dev, + "%s: RSS_HASHTYPE_RSS_UDP_IPV4_EX defined, " + "but not supported\n", __func__); + if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP; + if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; +#else + /* + * Disable UDP - IP fragments aren't currently being handled + * and so we end up with a mix of 2-tuple and 4-tuple + * traffic. + */ + mrqc = IXGBE_MRQC_RSSEN + | IXGBE_MRQC_RSS_FIELD_IPV4 + | IXGBE_MRQC_RSS_FIELD_IPV4_TCP + | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP + | IXGBE_MRQC_RSS_FIELD_IPV6_EX + | IXGBE_MRQC_RSS_FIELD_IPV6 + | IXGBE_MRQC_RSS_FIELD_IPV6_TCP + ; +#endif /* RSS */ +#ifdef PCI_IOV + mode = ixgbe_get_iov_mode(adapter); + mrqc |= ixgbe_get_mrqc(mode); +#endif + IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); +} + + +/********************************************************************* + * + * Setup receive registers and features. + * + **********************************************************************/ +#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 + +#define BSIZEPKT_ROUNDUP ((1<shared; + struct ixgbe_hw *hw = &adapter->hw; + struct ifnet *ifp = iflib_get_ifp(ctx); + + struct ix_rx_queue *que; + u32 bufsz, fctrl, srrctl, rxcsum; + u32 hlreg; + int i; + + /* + * Make sure receives are disabled while + * setting up the descriptor ring + */ + ixgbe_disable_rx(hw); + + /* Enable broadcasts */ + fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); + fctrl |= IXGBE_FCTRL_BAM; + if (adapter->hw.mac.type == ixgbe_mac_82598EB) { + fctrl |= IXGBE_FCTRL_DPF; + fctrl |= IXGBE_FCTRL_PMCF; + } + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); + + /* Set for Jumbo Frames? */ + hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); + if (ifp->if_mtu > ETHERMTU) + hlreg |= IXGBE_HLREG0_JUMBOEN; + else + hlreg &= ~IXGBE_HLREG0_JUMBOEN; +#ifdef DEV_NETMAP + /* crcstrip is conditional in netmap (in RDRXCTL too ?) */ + if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) + hlreg &= ~IXGBE_HLREG0_RXCRCSTRP; + else + hlreg |= IXGBE_HLREG0_RXCRCSTRP; +#endif /* DEV_NETMAP */ + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); + + bufsz = (adapter->rx_mbuf_sz + + BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + + /* Setup the Base and Length of the Rx Descriptor Ring */ + for (i = 0, que = adapter->rx_queues; i < adapter->num_rx_queues; i++, que++) { + struct rx_ring *rxr = &que->rxr; + u64 rdba = rxr->rx_paddr; + int j = rxr->me; + + /* Setup the Base and Length of the Rx Descriptor Ring */ + IXGBE_WRITE_REG(hw, IXGBE_RDBAL(j), + (rdba & 0x00000000ffffffffULL)); + IXGBE_WRITE_REG(hw, IXGBE_RDBAH(j), (rdba >> 32)); + IXGBE_WRITE_REG(hw, IXGBE_RDLEN(j), + scctx->isc_nrxd[0] * sizeof(union ixgbe_adv_rx_desc)); + + /* Set up the SRRCTL register */ + srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(j)); + srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; + srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; + srrctl |= bufsz; + srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; + + /* + * Set DROP_EN iff we have no flow control and >1 queue. + * Note that srrctl was cleared shortly before during reset, + * so we do not need to clear the bit, but do it just in case + * this code is moved elsewhere. + */ + if (adapter->num_rx_queues > 1 && + adapter->hw.fc.requested_mode == ixgbe_fc_none) { + srrctl |= IXGBE_SRRCTL_DROP_EN; + } else { + srrctl &= ~IXGBE_SRRCTL_DROP_EN; + } + + IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(j), srrctl); + + /* Setup the HW Rx Head and Tail Descriptor Pointers */ + IXGBE_WRITE_REG(hw, IXGBE_RDH(j), 0); + IXGBE_WRITE_REG(hw, IXGBE_RDT(j), 0); + + /* Set the driver rx tail address */ + rxr->tail = IXGBE_RDT(rxr->me); + } + + if (adapter->hw.mac.type != ixgbe_mac_82598EB) { + u32 psrtype = IXGBE_PSRTYPE_TCPHDR | + IXGBE_PSRTYPE_UDPHDR | + IXGBE_PSRTYPE_IPV4HDR | + IXGBE_PSRTYPE_IPV6HDR; + IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype); + } + + rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); + + ixgbe_initialize_rss_mapping(adapter); + + if (adapter->num_rx_queues > 1) { + /* RSS and RX IPP Checksum are mutually exclusive */ + rxcsum |= IXGBE_RXCSUM_PCSD; + } + + if (ifp->if_capenable & IFCAP_RXCSUM) + rxcsum |= IXGBE_RXCSUM_PCSD; + + if (!(rxcsum & IXGBE_RXCSUM_PCSD)) + rxcsum |= IXGBE_RXCSUM_IPPCSE; + + IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); +} + + + +/********************************************************************* + * + * Enable transmit units. + * + **********************************************************************/ +static void +ixgbe_initialize_transmit_units(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx = adapter->shared; + struct ixgbe_hw *hw = &adapter->hw; + struct ix_tx_queue *que; + int i; + + /* Setup the Base and Length of the Tx Descriptor Ring */ + for (i = 0, que = adapter->tx_queues; i < adapter->num_tx_queues; i++, que++) { + struct tx_ring *txr = &que->txr; + u64 tdba = txr->tx_paddr; + u32 txctrl = 0; + int j = txr->me; + + IXGBE_WRITE_REG(hw, IXGBE_TDBAL(j), + (tdba & 0x00000000ffffffffULL)); + IXGBE_WRITE_REG(hw, IXGBE_TDBAH(j), (tdba >> 32)); + IXGBE_WRITE_REG(hw, IXGBE_TDLEN(j), + scctx->isc_ntxd[0] * sizeof(union ixgbe_adv_tx_desc)); + + /* Setup the HW Tx Head and Tail descriptor pointers */ + IXGBE_WRITE_REG(hw, IXGBE_TDH(j), 0); + IXGBE_WRITE_REG(hw, IXGBE_TDT(j), 0); + + + /* + * Note: for X550 series devices, these registers are actually + * prefixed with TPH_ isntead of DCA_, but the addresses and + * fields remain the same. + */ + /* Disable Head Writeback */ + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(j)); + break; + default: + txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(j)); + break; + } + txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(j), txctrl); + break; + default: + IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(j), txctrl); + break; + } + + } + + if (hw->mac.type != ixgbe_mac_82598EB) { + u32 dmatxctl, rttdcs; +#ifdef PCI_IOV + enum ixgbe_iov_mode mode = ixgbe_get_iov_mode(adapter); +#endif + dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL); + dmatxctl |= IXGBE_DMATXCTL_TE; + IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl); + /* Disable arbiter to set MTQC */ + rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS); + rttdcs |= IXGBE_RTTDCS_ARBDIS; + IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); +#ifdef PCI_IOV + IXGBE_WRITE_REG(hw, IXGBE_MTQC, ixgbe_get_mtqc(mode)); +#else + IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB); +#endif + rttdcs &= ~IXGBE_RTTDCS_ARBDIS; + IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); + } +} + + +static void * +ixgbe_register(device_t dev) +{ + return (ixgbe_sctx); +} + + /********************************************************************* + * Device initialization routine + * + * The attach entry point is called when the driver is being loaded. + * This routine identifies the type of hardware, allocates all resources + * and initializes the hardware. + * + * return 0 on success, positive on failure + *********************************************************************/ + +static int +ixgbe_if_attach_pre(if_ctx_t ctx) +{ + device_t dev; + struct adapter *adapter; + if_softc_ctx_t scctx; + struct ixgbe_hw *hw; + uint16_t csum; + int error = 0; + + INIT_DEBUGOUT("ixgbe_attach: begin"); + + /* Allocate, clear, and link in our adapter structure */ + dev = iflib_get_dev(ctx); + adapter = iflib_get_softc(ctx); + adapter->ctx = ctx; + adapter->dev = dev; + scctx = adapter->shared = iflib_get_softc_ctx(ctx); + adapter->media = iflib_get_media(ctx); + hw = &adapter->hw; + + /* Identify hardware revision */ + ixgbe_identify_hardware(ctx); + + if (adapter->hw.mac.type == ixgbe_mac_82598EB) { + scctx->isc_tx_nsegments = IXGBE_82598_SCATTER; + scctx->isc_msix_bar = PCIR_BAR(MSIX_82598_BAR); + } else { + hw->phy.smart_speed = ixgbe_smart_speed; + scctx->isc_tx_nsegments = IXGBE_82599_SCATTER; + scctx->isc_msix_bar = PCIR_BAR(MSIX_82599_BAR); + } + scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(union ixgbe_adv_tx_desc) + sizeof(u32), DBA_ALIGN), + scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); + + scctx->isc_tx_tso_segments_max = scctx->isc_tx_nsegments; + scctx->isc_tx_tso_size_max = IXGBE_TSO_SIZE; + scctx->isc_tx_tso_segsize_max = PAGE_SIZE; + /* XXX */ + scctx->isc_max_txqsets = scctx->isc_max_rxqsets = 32; + + /* Sysctls */ + ixgbe_add_device_sysctls(ctx); + + /* Do base PCI setup - map BAR0 */ + if (ixgbe_allocate_pci_resources(ctx)) { + device_printf(dev, "Allocation of PCI resources failed\n"); + return (ENXIO); + } + + /* Sysctls for limiting the amount of work done in the taskqueues */ + ixgbe_set_sysctl_value(adapter, "rx_processing_limit", + "max number of rx packets to process", + &adapter->rx_process_limit, ixgbe_rx_process_limit); + + ixgbe_set_sysctl_value(adapter, "tx_processing_limit", + "max number of tx packets to process", + &adapter->tx_process_limit, ixgbe_tx_process_limit); + + /* Allocate multicast array memory. */ + adapter->mta = malloc(sizeof(*adapter->mta) * + MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); + if (adapter->mta == NULL) { + device_printf(dev, "Can not allocate multicast setup array\n"); + error = ENOMEM; + goto err_pci; + } + + /* Initialize the shared code */ + hw->allow_unsupported_sfp = allow_unsupported_sfp; + error = ixgbe_init_shared_code(hw); + if (error == IXGBE_ERR_SFP_NOT_PRESENT) { + /* + ** No optics in this port, set up + ** so the timer routine will probe + ** for later insertion. + */ + adapter->sfp_probe = TRUE; + error = 0; + } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) { + device_printf(dev,"Unsupported SFP+ module detected!\n"); + error = EIO; + goto err_late; + } else if (error) { + device_printf(dev,"Unable to initialize the shared code\n"); + error = EIO; + goto err_late; + } + + /* Make sure we have a good EEPROM before we read from it */ + if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) { + device_printf(dev,"The EEPROM Checksum Is Not Valid\n"); + error = EIO; + goto err_late; + } + + error = ixgbe_init_hw(hw); + + switch (error) { + case IXGBE_ERR_EEPROM_VERSION: + device_printf(dev, "This device is a pre-production adapter/" + "LOM. Please be aware there may be issues associated " + "with your hardware.\n If you are experiencing problems " + "please contact your Intel or hardware representative " + "who provided you with this hardware.\n"); + break; + case IXGBE_ERR_SFP_NOT_SUPPORTED: + device_printf(dev,"Unsupported SFP+ Module\n"); + error = EIO; + goto err_late; + case IXGBE_ERR_SFP_NOT_PRESENT: + device_printf(dev,"No SFP+ Module found\n"); + /* falls thru */ + default: + break; + } + + iflib_set_mac(ctx, hw->mac.addr); + switch (adapter->hw.mac.type) { + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + scctx->isc_rss_table_size = 512; + break; + default: + scctx->isc_rss_table_size = 128; + } + return (0); +err_late: + free(adapter->mta, M_DEVBUF); +err_pci: + ixgbe_free_pci_resources(ctx); + + return (error); +} + +static int +ixgbe_if_attach_post(if_ctx_t ctx) +{ + device_t dev; + struct adapter *adapter; + struct ixgbe_hw *hw; + int error = 0; + u32 ctrl_ext; + + dev = iflib_get_dev(ctx); + adapter = iflib_get_softc(ctx); + hw = &adapter->hw; + + + /* hw.ix defaults init */ + ixgbe_set_advertise(adapter, ixgbe_advertise_speed); + ixgbe_set_flowcntl(adapter, ixgbe_flow_control); + adapter->enable_aim = ixgbe_enable_aim; + + /* Enable the optics for 82599 SFP+ fiber */ + ixgbe_enable_tx_laser(hw); + + /* Enable power to the phy. */ + ixgbe_set_phy_power(hw, TRUE); + +#ifdef PCI_IOV + ixgbe_initialize_iov(adapter); +#endif + + error = ixgbe_interface_setup(ctx); + if (error) { + device_printf(dev, "Interface setup failed: %d\n", error); + goto err; + } + + /* Initialize statistics */ + ixgbe_update_stats_counters(adapter); + ixgbe_add_hw_stats(adapter); + + /* Check PCIE slot type/speed/width */ + ixgbe_get_slot_info(adapter); + + /* Set an initial default flow control & dmac value */ + adapter->fc = ixgbe_fc_full; + adapter->dmac = 0; + adapter->eee_enabled = 0; +#ifdef PCI_IOV + if ((hw->mac.type != ixgbe_mac_82598EB) && (adapter->intr_type == IFLIB_INTR_MSIX)) { + nvlist_t *pf_schema, *vf_schema; + + hw->mbx.ops.init_params(hw); + pf_schema = pci_iov_schema_alloc_node(); + vf_schema = pci_iov_schema_alloc_node(); + pci_iov_schema_add_unicast_mac(vf_schema, "mac-addr", 0, NULL); + pci_iov_schema_add_bool(vf_schema, "mac-anti-spoof", + IOV_SCHEMA_HASDEFAULT, TRUE); + pci_iov_schema_add_bool(vf_schema, "allow-set-mac", + IOV_SCHEMA_HASDEFAULT, FALSE); + pci_iov_schema_add_bool(vf_schema, "allow-promisc", + IOV_SCHEMA_HASDEFAULT, FALSE); + error = pci_iov_attach(dev, pf_schema, vf_schema); + if (error != 0) { + device_printf(dev, + "Error %d setting up SR-IOV\n", error); + } + } else { + device_printf(dev, "PCI_IOV enabled but not configured: mac_type: %x intr_type: %d\n", + hw->mac.type, adapter->intr_type); + } +#endif /* PCI_IOV */ + + /* Check for certain supported features */ + ixgbe_check_wol_support(adapter); + + /* let hardware know driver is loaded */ + ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); + ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD; + IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); + + return (0); +err: + ixgbe_if_detach(ctx); + return (error); +} + +/* + * Checks whether the adapter's ports are capable of + * Wake On LAN by reading the adapter's NVM. + * + * Sets each port's hw->wol_enabled value depending + * on the value read here. + */ +static void +ixgbe_check_wol_support(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u16 dev_caps = 0; + + /* Find out WoL support for port */ + adapter->wol_support = hw->wol_enabled = 0; + ixgbe_get_device_caps(hw, &dev_caps); + if ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0_1) || + ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0) && + hw->bus.func == 0)) + adapter->wol_support = hw->wol_enabled = 1; + + /* Save initial wake up filter configuration */ + adapter->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC); + + return; +} + +/********************************************************************* + * + * Setup networking device structure and register an interface. + * + **********************************************************************/ +#define IXGBE_IFCAPABILITIES \ + (IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6 | IFCAP_TSO4 | \ + IFCAP_TSO6 | IFCAP_LRO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM | \ + IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU | IFCAP_HWSTATS) + +static int +ixgbe_interface_setup(if_ctx_t ctx) +{ + struct ifnet *ifp = iflib_get_ifp(ctx); + struct adapter *adapter = iflib_get_softc(ctx); + uint64_t cap; + + INIT_DEBUGOUT("ixgbe_interface_setup: begin"); + + cap = IXGBE_IFCAPABILITIES; + + if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); + if_setcapabilitiesbit(ifp, cap, 0); + if_setcapenable(ifp, if_getcapabilities(ifp)); + if_setbaudrate(ifp, 1000000000); + + adapter->max_frame_size = + ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; + + /* + ** Don't turn this on by default, if vlans are + ** created on another pseudo device (eg. lagg) + ** then vlan events are not passed thru, breaking + ** operation, but with HW FILTER off it works. If + ** using vlans directly on the ixgbe driver you can + ** enable this and get full hardware tag filtering. + */ + ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; + adapter->phy_layer = ixgbe_get_supported_physical_layer(&adapter->hw); + + ixgbe_add_media_types(ctx); + + /* Autoselect media by default */ + ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); + + return (0); +} + +static uint64_t +ixgbe_if_get_counter(if_ctx_t ctx, ift_counter cnt) +{ + struct adapter *adapter = iflib_get_softc(ctx); + if_t ifp = iflib_get_ifp(ctx); + + switch (cnt) { + case IFCOUNTER_IPACKETS: + return (adapter->ipackets); + case IFCOUNTER_OPACKETS: + return (adapter->opackets); + case IFCOUNTER_IBYTES: + return (adapter->ibytes); + case IFCOUNTER_OBYTES: + return (adapter->obytes); + case IFCOUNTER_IMCASTS: + return (adapter->imcasts); + case IFCOUNTER_OMCASTS: + return (adapter->omcasts); + case IFCOUNTER_COLLISIONS: + return (0); + case IFCOUNTER_IQDROPS: + return (adapter->iqdrops); + case IFCOUNTER_OQDROPS: + return (0); + case IFCOUNTER_IERRORS: + return (adapter->ierrors); + default: + return (if_get_counter_default(ifp, cnt)); + } +} + +static void +ixgbe_add_media_types(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &adapter->hw; + device_t dev = iflib_get_dev(ctx); + int layer; + + layer = adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); + + /* Media types with matching FreeBSD media defines */ + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) + ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_T, 0, NULL); + if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) + ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); + if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) + ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); + + if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || + layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) + ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); + + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) + ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_LR, 0, NULL); + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) { + ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); + if (hw->phy.multispeed_fiber) + ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); + } else if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) + ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) + ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); +#ifdef IFM_ETH_XTYPE + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) + ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_KR, 0, NULL); + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) + ifmedia_add( adapter->media, IFM_ETHER | IFM_10G_KX4, 0, NULL); + if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) + ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_KX, 0, NULL); +#else + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) { + device_printf(dev, "Media supported: 10GbaseKR\n"); + device_printf(dev, "10GbaseKR mapped to 10GbaseSR\n"); + ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) { + device_printf(dev, "Media supported: 10GbaseKX4\n"); + device_printf(dev, "10GbaseKX4 mapped to 10GbaseCX4\n"); + ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); + } + if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) { + device_printf(dev, "Media supported: 1000baseKX\n"); + device_printf(dev, "1000baseKX mapped to 1000baseCX\n"); + ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_CX, 0, NULL); + } +#endif + if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) { + /* Someday, someone will care about you... */ + device_printf(dev, "Media supported: 1000baseBX\n"); + } + + if (hw->device_id == IXGBE_DEV_ID_82598AT) { + ifmedia_add(adapter->media, + IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); + ifmedia_add(adapter->media, + IFM_ETHER | IFM_1000_T, 0, NULL); + } + + ifmedia_add(adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); +} + +static void +ixgbe_config_link(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 autoneg, err = 0; + bool sfp, negotiate; + + sfp = ixgbe_is_sfp(hw); + + if (sfp) { + GROUPTASK_ENQUEUE(&adapter->mod_task); + } else { + if (hw->mac.ops.check_link) + err = ixgbe_check_link(hw, &adapter->link_speed, + &adapter->link_up, FALSE); + if (err) + return; + autoneg = hw->phy.autoneg_advertised; + if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) + err = hw->mac.ops.get_link_capabilities(hw, + &autoneg, &negotiate); + if (err) + return; + if (hw->mac.ops.setup_link) + err = hw->mac.ops.setup_link(hw, + autoneg, adapter->link_up); + } + +} + + +/********************************************************************** + * + * Update the board statistics counters. + * + **********************************************************************/ +static void +ixgbe_update_stats_counters(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 missed_rx = 0, bprc, lxon, lxoff, total; + u64 total_missed_rx = 0; + + adapter->stats.pf.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS); + adapter->stats.pf.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC); + adapter->stats.pf.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC); + adapter->stats.pf.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC); + + for (int i = 0; i < 16; i++) { + adapter->stats.pf.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); + adapter->stats.pf.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); + adapter->stats.pf.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); + } + adapter->stats.pf.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC); + adapter->stats.pf.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC); + adapter->stats.pf.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC); + + /* Hardware workaround, gprc counts missed packets */ + adapter->stats.pf.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC); + adapter->stats.pf.gprc -= missed_rx; + + if (hw->mac.type != ixgbe_mac_82598EB) { + adapter->stats.pf.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) + + ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32); + adapter->stats.pf.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) + + ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32); + adapter->stats.pf.tor += IXGBE_READ_REG(hw, IXGBE_TORL) + + ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32); + adapter->stats.pf.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); + adapter->stats.pf.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); + } else { + adapter->stats.pf.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC); + adapter->stats.pf.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); + /* 82598 only has a counter in the high register */ + adapter->stats.pf.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH); + adapter->stats.pf.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH); + adapter->stats.pf.tor += IXGBE_READ_REG(hw, IXGBE_TORH); + } + + /* + * Workaround: mprc hardware is incorrectly counting + * broadcasts, so for now we subtract those. + */ + bprc = IXGBE_READ_REG(hw, IXGBE_BPRC); + adapter->stats.pf.bprc += bprc; + adapter->stats.pf.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC); + if (hw->mac.type == ixgbe_mac_82598EB) + adapter->stats.pf.mprc -= bprc; + + adapter->stats.pf.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64); + adapter->stats.pf.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127); + adapter->stats.pf.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255); + adapter->stats.pf.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511); + adapter->stats.pf.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023); + adapter->stats.pf.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522); + + lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC); + adapter->stats.pf.lxontxc += lxon; + lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); + adapter->stats.pf.lxofftxc += lxoff; + total = lxon + lxoff; + + adapter->stats.pf.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC); + adapter->stats.pf.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC); + adapter->stats.pf.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64); + adapter->stats.pf.gptc -= total; + adapter->stats.pf.mptc -= total; + adapter->stats.pf.ptc64 -= total; + adapter->stats.pf.gotc -= total * ETHER_MIN_LEN; + + adapter->stats.pf.ruc += IXGBE_READ_REG(hw, IXGBE_RUC); + adapter->stats.pf.rfc += IXGBE_READ_REG(hw, IXGBE_RFC); + adapter->stats.pf.roc += IXGBE_READ_REG(hw, IXGBE_ROC); + adapter->stats.pf.rjc += IXGBE_READ_REG(hw, IXGBE_RJC); + adapter->stats.pf.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC); + adapter->stats.pf.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC); + adapter->stats.pf.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC); + adapter->stats.pf.tpr += IXGBE_READ_REG(hw, IXGBE_TPR); + adapter->stats.pf.tpt += IXGBE_READ_REG(hw, IXGBE_TPT); + adapter->stats.pf.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127); + adapter->stats.pf.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255); + adapter->stats.pf.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511); + adapter->stats.pf.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023); + adapter->stats.pf.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522); + adapter->stats.pf.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC); + adapter->stats.pf.xec += IXGBE_READ_REG(hw, IXGBE_XEC); + adapter->stats.pf.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC); + adapter->stats.pf.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST); + /* Only read FCOE on 82599 */ + if (hw->mac.type != ixgbe_mac_82598EB) { + adapter->stats.pf.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC); + adapter->stats.pf.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC); + adapter->stats.pf.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC); + adapter->stats.pf.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC); + adapter->stats.pf.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC); + } + + /* Fill out the OS statistics structure */ + IXGBE_SET_IPACKETS(adapter, adapter->stats.pf.gprc); + IXGBE_SET_OPACKETS(adapter, adapter->stats.pf.gptc); + IXGBE_SET_IBYTES(adapter, adapter->stats.pf.gorc); + IXGBE_SET_OBYTES(adapter, adapter->stats.pf.gotc); + IXGBE_SET_IMCASTS(adapter, adapter->stats.pf.mprc); + IXGBE_SET_OMCASTS(adapter, adapter->stats.pf.mptc); + IXGBE_SET_COLLISIONS(adapter, 0); + IXGBE_SET_IQDROPS(adapter, total_missed_rx); + IXGBE_SET_IERRORS(adapter, adapter->stats.pf.crcerrs + + adapter->stats.pf.rlec); +} + +/* + * Add sysctl variables, one per statistic, to the system. + */ +static void +ixgbe_add_hw_stats(struct adapter *adapter) +{ + device_t dev = iflib_get_dev(adapter->ctx); + struct ix_rx_queue *rx_que; + struct ix_tx_queue *tx_que; + int i; + + struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); + struct sysctl_oid *tree = device_get_sysctl_tree(dev); + struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); + struct ixgbe_hw_stats *stats = &adapter->stats.pf; + + struct sysctl_oid *stat_node, *queue_node; + struct sysctl_oid_list *stat_list, *queue_list; + +#define QUEUE_NAME_LEN 32 + char namebuf[QUEUE_NAME_LEN]; + + /* Driver Statistics */ + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", + CTLFLAG_RD, &adapter->watchdog_events, + "Watchdog timeouts"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", + CTLFLAG_RD, &adapter->link_irq, + "Link MSIX IRQ Handled"); + + for (i = 0, tx_que = adapter->tx_queues; i < adapter->num_tx_queues; i++, tx_que++) { + struct tx_ring *txr = &tx_que->txr; + snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); + queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, + CTLFLAG_RD, NULL, "Queue Name"); + queue_list = SYSCTL_CHILDREN(queue_node); + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", + CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr), + ixgbe_sysctl_tdh_handler, "IU", + "Transmit Descriptor Head"); + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", + CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr), + ixgbe_sysctl_tdt_handler, "IU", + "Transmit Descriptor Tail"); + SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx", + CTLFLAG_RD, &txr->tso_tx, + "TSO"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", + CTLFLAG_RD, &txr->total_packets, + "Queue Packets Transmitted"); + } + + for (i = 0, rx_que = adapter->rx_queues; i < adapter->num_rx_queues; i++, rx_que++) { + struct rx_ring *rxr = &rx_que->rxr; + snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); + queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, + CTLFLAG_RD, NULL, "Queue Name"); + queue_list = SYSCTL_CHILDREN(queue_node); + + snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); + queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, + CTLFLAG_RD, NULL, "Queue Name"); + queue_list = SYSCTL_CHILDREN(queue_node); + + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", + CTLTYPE_UINT | CTLFLAG_RW, &adapter->rx_queues[i], + sizeof(&adapter->rx_queues[i]), + ixgbe_sysctl_interrupt_rate_handler, "IU", + "Interrupt Rate"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", + CTLFLAG_RD, &(adapter->rx_queues[i].irqs), + "irqs on this queue"); + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", + CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr), + ixgbe_sysctl_rdh_handler, "IU", + "Receive Descriptor Head"); + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", + CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr), + ixgbe_sysctl_rdt_handler, "IU", + "Receive Descriptor Tail"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", + CTLFLAG_RD, &rxr->rx_packets, + "Queue Packets Received"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", + CTLFLAG_RD, &rxr->rx_bytes, + "Queue Bytes Received"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies", + CTLFLAG_RD, &rxr->rx_copies, + "Copied RX Frames"); + } + + /* MAC stats get the own sub node */ + + stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", + CTLFLAG_RD, NULL, "MAC Statistics"); + stat_list = SYSCTL_CHILDREN(stat_node); + + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", + CTLFLAG_RD, &stats->crcerrs, + "CRC Errors"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs", + CTLFLAG_RD, &stats->illerrc, + "Illegal Byte Errors"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs", + CTLFLAG_RD, &stats->errbc, + "Byte Errors"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards", + CTLFLAG_RD, &stats->mspdc, + "MAC Short Packets Discarded"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults", + CTLFLAG_RD, &stats->mlfc, + "MAC Local Faults"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults", + CTLFLAG_RD, &stats->mrfc, + "MAC Remote Faults"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs", + CTLFLAG_RD, &stats->rlec, + "Receive Length Errors"); + + /* Flow Control stats */ + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", + CTLFLAG_RD, &stats->lxontxc, + "Link XON Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", + CTLFLAG_RD, &stats->lxonrxc, + "Link XON Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", + CTLFLAG_RD, &stats->lxofftxc, + "Link XOFF Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", + CTLFLAG_RD, &stats->lxoffrxc, + "Link XOFF Received"); + + /* Packet Reception Stats */ + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd", + CTLFLAG_RD, &stats->tor, + "Total Octets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd", + CTLFLAG_RD, &stats->gorc, + "Good Octets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd", + CTLFLAG_RD, &stats->tpr, + "Total Packets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd", + CTLFLAG_RD, &stats->gprc, + "Good Packets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd", + CTLFLAG_RD, &stats->mprc, + "Multicast Packets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd", + CTLFLAG_RD, &stats->bprc, + "Broadcast Packets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", + CTLFLAG_RD, &stats->prc64, + "64 byte frames received "); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", + CTLFLAG_RD, &stats->prc127, + "65-127 byte frames received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", + CTLFLAG_RD, &stats->prc255, + "128-255 byte frames received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", + CTLFLAG_RD, &stats->prc511, + "256-511 byte frames received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", + CTLFLAG_RD, &stats->prc1023, + "512-1023 byte frames received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", + CTLFLAG_RD, &stats->prc1522, + "1023-1522 byte frames received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized", + CTLFLAG_RD, &stats->ruc, + "Receive Undersized"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", + CTLFLAG_RD, &stats->rfc, + "Fragmented Packets Received "); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized", + CTLFLAG_RD, &stats->roc, + "Oversized Packets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd", + CTLFLAG_RD, &stats->rjc, + "Received Jabber"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd", + CTLFLAG_RD, &stats->mngprc, + "Management Packets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd", + CTLFLAG_RD, &stats->mngptc, + "Management Packets Dropped"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs", + CTLFLAG_RD, &stats->xec, + "Checksum Errors"); + + /* Packet Transmission Stats */ + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", + CTLFLAG_RD, &stats->gotc, + "Good Octets Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", + CTLFLAG_RD, &stats->tpt, + "Total Packets Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", + CTLFLAG_RD, &stats->gptc, + "Good Packets Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", + CTLFLAG_RD, &stats->bptc, + "Broadcast Packets Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", + CTLFLAG_RD, &stats->mptc, + "Multicast Packets Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd", + CTLFLAG_RD, &stats->mngptc, + "Management Packets Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", + CTLFLAG_RD, &stats->ptc64, + "64 byte frames transmitted "); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", + CTLFLAG_RD, &stats->ptc127, + "65-127 byte frames transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", + CTLFLAG_RD, &stats->ptc255, + "128-255 byte frames transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", + CTLFLAG_RD, &stats->ptc511, + "256-511 byte frames transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", + CTLFLAG_RD, &stats->ptc1023, + "512-1023 byte frames transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", + CTLFLAG_RD, &stats->ptc1522, + "1024-1522 byte frames transmitted"); +} + +/** ixgbe_sysctl_tdh_handler - Handler function + * Retrieves the TDH value from the hardware + */ +static int +ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS) +{ + int error; + + struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); + if (!txr) return 0; + + unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me)); + error = sysctl_handle_int(oidp, &val, 0, req); + if (error || !req->newptr) + return error; + return 0; +} + +/** ixgbe_sysctl_tdt_handler - Handler function + * Retrieves the TDT value from the hardware + */ +static int +ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS) +{ + int error; + + struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); + if (!txr) return 0; + + unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me)); + error = sysctl_handle_int(oidp, &val, 0, req); + if (error || !req->newptr) + return error; + return 0; +} + +/** ixgbe_sysctl_rdh_handler - Handler function + * Retrieves the RDH value from the hardware + */ +static int +ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS) +{ + int error; + + struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); + if (!rxr) return 0; + + unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me)); + error = sysctl_handle_int(oidp, &val, 0, req); + if (error || !req->newptr) + return error; + return 0; +} + +/** ixgbe_sysctl_rdt_handler - Handler function + * Retrieves the RDT value from the hardware + */ +static int +ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS) +{ + int error; + + struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); + if (!rxr) return 0; + + unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me)); + error = sysctl_handle_int(oidp, &val, 0, req); + if (error || !req->newptr) + return error; + return 0; +} + +/* +** This routine is run via an vlan config EVENT, +** it enables us to use the HW Filter table since +** we can get the vlan id. This just creates the +** entry in the soft version of the VFTA, init will +** repopulate the real table. +*/ +static void +ixgbe_if_vlan_register(if_ctx_t ctx, u16 vtag) +{ + struct adapter *adapter = iflib_get_softc(ctx); + u16 index, bit; + + index = (vtag >> 5) & 0x7F; + bit = vtag & 0x1F; + adapter->shadow_vfta[index] |= (1 << bit); + ++adapter->num_vlans; + ixgbe_setup_vlan_hw_support(ctx); +} + +/* +** This routine is run via an vlan +** unconfig EVENT, remove our entry +** in the soft vfta. +*/ +static void +ixgbe_if_vlan_unregister(if_ctx_t ctx, u16 vtag) +{ + struct adapter *adapter = iflib_get_softc(ctx); + u16 index, bit; + + index = (vtag >> 5) & 0x7F; + bit = vtag & 0x1F; + adapter->shadow_vfta[index] &= ~(1 << bit); + --adapter->num_vlans; + /* Re-init to load the changes */ + ixgbe_setup_vlan_hw_support(ctx); +} + +static void +ixgbe_setup_vlan_hw_support(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &adapter->hw; + struct ifnet *ifp = iflib_get_ifp(ctx); + struct rx_ring *rxr; + u32 ctrl; + + /* We get here thru init_locked, meaning + ** a soft reset, this has already cleared + ** the VFTA and other state, so if there + ** have been no vlan's registered do nothing. + */ + if (adapter->num_vlans == 0) + return; + + /* Setup the queues for vlans */ + for (int i = 0; i < adapter->num_rx_queues; i++) { + rxr = &adapter->rx_queues[i].rxr; + /* On 82599 the VLAN enable is per/queue in RXDCTL */ + if (hw->mac.type != ixgbe_mac_82598EB) { + ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); + ctrl |= IXGBE_RXDCTL_VME; + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), ctrl); + } + rxr->vtag_strip = TRUE; + } + + if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) + return; + + /* A soft reset zero's out the VFTA, so + ** we need to repopulate it now. + */ + for (int i = 0; i < IXGBE_VFTA_SIZE; i++) + if (adapter->shadow_vfta[i] != 0) + IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), + adapter->shadow_vfta[i]); + + ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + /* Enable the Filter Table if enabled */ + if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { + ctrl &= ~IXGBE_VLNCTRL_CFIEN; + ctrl |= IXGBE_VLNCTRL_VFE; + } + if (hw->mac.type == ixgbe_mac_82598EB) + ctrl |= IXGBE_VLNCTRL_VME; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl); +} + +/* +** Get the width and transaction speed of +** the slot this adapter is plugged into. +*/ +static void +ixgbe_get_slot_info(struct adapter *adapter) +{ + device_t dev = iflib_get_dev(adapter->ctx); + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_mac_info *mac = &hw->mac; + u16 link; + u32 offset; + + MPASS(hw->back != NULL); + /* For most devices simply call the shared code routine */ + if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) { + ixgbe_get_bus_info(hw); + /* These devices don't use PCI-E */ + switch (hw->mac.type) { + case ixgbe_mac_X550EM_x: + return; + default: + goto display; + } + } + /* + ** For the Quad port adapter we need to parse back + ** up the PCI tree to find the speed of the expansion + ** slot into which this adapter is plugged. A bit more work. + */ + dev = device_get_parent(device_get_parent(dev)); +#ifdef IXGBE_DEBUG + device_printf(dev, "parent pcib = %x,%x,%x\n", + pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); +#endif + dev = device_get_parent(device_get_parent(dev)); +#ifdef IXGBE_DEBUG + device_printf(dev, "slot pcib = %x,%x,%x\n", + pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); +#endif + /* Now get the PCI Express Capabilities offset */ + pci_find_cap(dev, PCIY_EXPRESS, &offset); + /* ...and read the Link Status Register */ + link = pci_read_config(dev, offset + PCIER_LINK_STA, 2); + switch (link & IXGBE_PCI_LINK_WIDTH) { + case IXGBE_PCI_LINK_WIDTH_1: + hw->bus.width = ixgbe_bus_width_pcie_x1; + break; + case IXGBE_PCI_LINK_WIDTH_2: + hw->bus.width = ixgbe_bus_width_pcie_x2; + break; + case IXGBE_PCI_LINK_WIDTH_4: + hw->bus.width = ixgbe_bus_width_pcie_x4; + break; + case IXGBE_PCI_LINK_WIDTH_8: + hw->bus.width = ixgbe_bus_width_pcie_x8; + break; + default: + hw->bus.width = ixgbe_bus_width_unknown; + break; + } + switch (link & IXGBE_PCI_LINK_SPEED) { + case IXGBE_PCI_LINK_SPEED_2500: + hw->bus.speed = ixgbe_bus_speed_2500; + break; + case IXGBE_PCI_LINK_SPEED_5000: + hw->bus.speed = ixgbe_bus_speed_5000; + break; + case IXGBE_PCI_LINK_SPEED_8000: + hw->bus.speed = ixgbe_bus_speed_8000; + break; + default: + hw->bus.speed = ixgbe_bus_speed_unknown; + break; + } + + mac->ops.set_lan_id(hw); + +display: + device_printf(dev,"PCI Express Bus: Speed %s %s\n", + ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s": + (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s": + (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"), + (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" : + (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" : + (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" : + ("Unknown")); + + if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) && + ((hw->bus.width <= ixgbe_bus_width_pcie_x4) && + (hw->bus.speed == ixgbe_bus_speed_2500))) { + device_printf(dev, "PCI-Express bandwidth available" + " for this card\n is not sufficient for" + " optimal performance.\n"); + device_printf(dev, "For optimal performance a x8 " + "PCIE, or x4 PCIE Gen2 slot is required.\n"); + } + if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) && + ((hw->bus.width <= ixgbe_bus_width_pcie_x8) && + (hw->bus.speed < ixgbe_bus_speed_8000))) { + device_printf(dev, "PCI-Express bandwidth available" + " for this card\n is not sufficient for" + " optimal performance.\n"); + device_printf(dev, "For optimal performance a x8 " + "PCIE Gen3 slot is required.\n"); + } + + return; +} + +/********************************************************************* + * + * Setup MSIX Interrupt resources and handlers + * + **********************************************************************/ +static int +ixgbe_if_msix_intr_assign(if_ctx_t ctx, int msix) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_rx_queue *rx_que = adapter->rx_queues; + struct ix_tx_queue *tx_que; + int error, rid, vector = 0; + int cpu_id = 0; + char buf[16]; + + /* Admin Que is vector 0*/ + rid = vector + 1; + for (int i = 0; i < adapter->num_rx_queues; i++, vector++, rx_que++) { + rid = vector + 1; + + snprintf(buf, sizeof(buf), "rxq%d", i); + error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RX, + ixgbe_msix_que, rx_que, rx_que->rxr.me, buf); + + if (error) { + device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); + adapter->num_rx_queues = i + 1; + goto fail; + } + + rx_que->msix = vector; + adapter->active_queues |= (u64)(1 << rx_que->msix); +#ifdef RSS + /* + * The queue ID is used as the RSS layer bucket ID. + * We look up the queue ID -> RSS CPU ID and select + * that. + */ + cpu_id = rss_getcpu(i % rss_getnumbuckets()); +#else + /* + * Bind the msix vector, and thus the + * rings to the corresponding cpu. + * + * This just happens to match the default RSS round-robin + * bucket -> queue -> CPU allocation. + */ + if (adapter->num_rx_queues > 1) + cpu_id = i; +#endif + + } + for (int i = 0; i < adapter->num_tx_queues; i++) { + snprintf(buf, sizeof(buf), "txq%d", i); + tx_que = &adapter->tx_queues[i]; + tx_que->msix = i % adapter->num_rx_queues; + iflib_softirq_alloc_generic(ctx, rid, IFLIB_INTR_TX, tx_que, tx_que->txr.me, buf); + } + rid = vector + 1; + error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, + ixgbe_msix_link, adapter, 0, "aq"); + if (error) { + device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); + return (error); + } + + + adapter->vector = vector; + return (0); +fail: + iflib_irq_free(ctx, &adapter->irq); + rx_que = adapter->rx_queues; + for (int i = 0; i < adapter->num_rx_queues; i++, rx_que++) + iflib_irq_free(ctx, &rx_que->que_irq); + return (error); +} + +/********************************************************************* + * + * MSIX Queue Interrupt Service routine + * + **********************************************************************/ +static int +ixgbe_msix_que(void *arg) +{ + struct ix_rx_queue *que = arg; + struct adapter *adapter = que->adapter; +#ifdef notyet + struct tx_ring *txr = &que->txr; +#endif + struct rx_ring *rxr = &que->rxr; + struct ifnet *ifp = iflib_get_ifp(que->adapter->ctx); + u32 newitr = 0; + + /* Protect against spurious interrupts */ + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + return 0; + + ixgbe_disable_queue(adapter, que->msix); + ++que->irqs; + + if (ixgbe_enable_aim == FALSE) + goto no_calc; + /* + ** Do Adaptive Interrupt Moderation: + ** - Write out last calculated setting + ** - Calculate based on average size over + ** the last interval. + */ + if (que->eitr_setting) { + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix), que->eitr_setting); + } + + que->eitr_setting = 0; + + /* Idle, do nothing */ +#ifdef notyet + if ((txr->bytes) && (txr->packets)) + newitr = txr->bytes/txr->packets; +#endif + if ((rxr->bytes) && (rxr->packets)) + newitr = max(newitr, + (rxr->bytes / rxr->packets)); + newitr += 24; /* account for hardware frame, crc */ + + /* set an upper boundary */ + newitr = min(newitr, 3000); + + /* Be nice to the mid range */ + if ((newitr > 300) && (newitr < 1200)) + newitr = (newitr / 3); + else + newitr = (newitr / 2); + + if (adapter->hw.mac.type == ixgbe_mac_82598EB) + newitr |= newitr << 16; + else + newitr |= IXGBE_EITR_CNT_WDIS; + + /* save for next interrupt */ + que->eitr_setting = newitr; + + /* Reset state */ +#ifdef notyet + txr->bytes = 0; + txr->packets = 0; +#endif + rxr->bytes = 0; + rxr->packets = 0; + +no_calc: + return (FILTER_SCHEDULE_THREAD); +} + +/********************************************************************* + * + * Media Ioctl callback + * + * This routine is called whenever the user queries the status of + * the interface using ifconfig. + * + **********************************************************************/ +static void +ixgbe_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &adapter->hw; + int layer; + + INIT_DEBUGOUT("ixgbe_if_media_status: begin"); + ixgbe_if_update_admin_status(ctx); + + ifmr->ifm_status = IFM_AVALID; + ifmr->ifm_active = IFM_ETHER; + + if (!adapter->link_active) { + return; + } + + ifmr->ifm_status |= IFM_ACTIVE; + layer = adapter->phy_layer; + + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T || + layer & IXGBE_PHYSICAL_LAYER_1000BASE_T || + layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_T | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_T | IFM_FDX; + break; + case IXGBE_LINK_SPEED_100_FULL: + ifmr->ifm_active |= IFM_100_TX | IFM_FDX; + break; + } + if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || + layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX; + break; + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_LR | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_LX | IFM_FDX; + break; + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_LX | IFM_FDX; + break; + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR || + layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_SX | IFM_FDX; + break; + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX; + break; + } + /* + ** XXX: These need to use the proper media types once + ** they're added. + */ +#ifndef IFM_ETH_XTYPE + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; + break; + case IXGBE_LINK_SPEED_2_5GB_FULL: + ifmr->ifm_active |= IFM_2500_SX | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_CX | IFM_FDX; + break; + } + else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4 + || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX; + break; + case IXGBE_LINK_SPEED_2_5GB_FULL: + ifmr->ifm_active |= IFM_2500_SX | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_CX | IFM_FDX; + break; + } +#else + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_KR | IFM_FDX; + break; + case IXGBE_LINK_SPEED_2_5GB_FULL: + ifmr->ifm_active |= IFM_2500_KX | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_KX | IFM_FDX; + break; + } + else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4 + || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_KX4 | IFM_FDX; + break; + case IXGBE_LINK_SPEED_2_5GB_FULL: + ifmr->ifm_active |= IFM_2500_KX | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_KX | IFM_FDX; + break; + } +#endif + /* If nothing is recognized... */ + if (IFM_SUBTYPE(ifmr->ifm_active) == 0) + ifmr->ifm_active |= IFM_UNKNOWN; + + /* Display current flow control setting used on link */ + if (hw->fc.current_mode == ixgbe_fc_rx_pause || + hw->fc.current_mode == ixgbe_fc_full) + ifmr->ifm_active |= IFM_ETH_RXPAUSE; + if (hw->fc.current_mode == ixgbe_fc_tx_pause || + hw->fc.current_mode == ixgbe_fc_full) + ifmr->ifm_active |= IFM_ETH_TXPAUSE; +} + +/********************************************************************* + * + * Media Ioctl callback + * + * This routine is called when the user changes speed/duplex using + * media/mediopt option with ifconfig. + * + **********************************************************************/ +static int +ixgbe_if_media_change(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ifmedia *ifm = iflib_get_media(ctx); + struct ixgbe_hw *hw = &adapter->hw; + ixgbe_link_speed speed = 0; + + INIT_DEBUGOUT("ixgbe_if_media_change: begin"); + + if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) + return (EINVAL); + + if (hw->phy.media_type == ixgbe_media_type_backplane) + return (EPERM); + + /* + ** We don't actually need to check against the supported + ** media types of the adapter; ifmedia will take care of + ** that for us. + */ +#ifndef IFM_ETH_XTYPE + switch (IFM_SUBTYPE(ifm->ifm_media)) { + case IFM_AUTO: + case IFM_10G_T: + speed |= IXGBE_LINK_SPEED_100_FULL; + case IFM_10G_LRM: + case IFM_10G_SR: /* KR, too */ + case IFM_10G_LR: + case IFM_10G_CX4: /* KX4 */ + speed |= IXGBE_LINK_SPEED_1GB_FULL; + case IFM_10G_TWINAX: + speed |= IXGBE_LINK_SPEED_10GB_FULL; + break; + case IFM_1000_T: + speed |= IXGBE_LINK_SPEED_100_FULL; + case IFM_1000_LX: + case IFM_1000_SX: + case IFM_1000_CX: /* KX */ + speed |= IXGBE_LINK_SPEED_1GB_FULL; + break; + case IFM_100_TX: + speed |= IXGBE_LINK_SPEED_100_FULL; + break; + default: + goto invalid; + } +#else + switch (IFM_SUBTYPE(ifm->ifm_media)) { + case IFM_AUTO: + case IFM_10G_T: + speed |= IXGBE_LINK_SPEED_100_FULL; + case IFM_10G_LRM: + case IFM_10G_KR: + case IFM_10G_LR: + case IFM_10G_KX4: + speed |= IXGBE_LINK_SPEED_1GB_FULL; + case IFM_10G_TWINAX: + speed |= IXGBE_LINK_SPEED_10GB_FULL; + break; + case IFM_1000_T: + speed |= IXGBE_LINK_SPEED_100_FULL; + case IFM_1000_LX: + case IFM_1000_SX: + case IFM_1000_KX: + speed |= IXGBE_LINK_SPEED_1GB_FULL; + break; + case IFM_100_TX: + speed |= IXGBE_LINK_SPEED_100_FULL; + break; + default: + goto invalid; + } +#endif + hw->mac.autotry_restart = TRUE; + hw->mac.ops.setup_link(hw, speed, TRUE); + adapter->advertise = + ((speed & IXGBE_LINK_SPEED_10GB_FULL) << 2) | + ((speed & IXGBE_LINK_SPEED_1GB_FULL) << 1) | + ((speed & IXGBE_LINK_SPEED_100_FULL) << 0); + + return (0); + +invalid: + device_printf(iflib_get_dev(ctx), "Invalid media type!\n"); + return (EINVAL); +} + +static int +ixgbe_if_promisc_set(if_ctx_t ctx, int flags) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + u_int32_t reg_rctl; + int mcnt = 0; + + reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); + reg_rctl &= (~IXGBE_FCTRL_UPE); + if (ifp->if_flags & IFF_ALLMULTI) + mcnt = MAX_NUM_MULTICAST_ADDRESSES; + else { + mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES); + } + if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) + reg_rctl &= (~IXGBE_FCTRL_MPE); + + /* clear promiscuous mode and multicast filters before enabling */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); + + if (ifp->if_flags & IFF_PROMISC) { + reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); + } else if (ifp->if_flags & IFF_ALLMULTI) { + reg_rctl |= IXGBE_FCTRL_MPE; + reg_rctl &= ~IXGBE_FCTRL_UPE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); + } + + return (0); +} + +static int +ixgbe_msix_link(void *arg) +{ + struct adapter *adapter = arg; + struct ixgbe_hw *hw = &adapter->hw; + u32 reg_eicr, mod_mask; + + ++adapter->link_irq; + + /* First get the cause */ + reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS); + + /* Be sure the queue bits are not cleared */ + reg_eicr &= ~IXGBE_EICR_RTX_QUEUE; + /* Clear interrupt with write */ + IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr); + + /* Link status change */ + if (reg_eicr & IXGBE_EICR_LSC) + iflib_admin_intr_deferred(adapter->ctx); + + if (adapter->hw.mac.type != ixgbe_mac_82598EB) { +#ifdef IXGBE_FDIR + if (reg_eicr & IXGBE_EICR_FLOW_DIR) { + /* This is probably overkill :) */ + if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1)) + return; + /* Disable the interrupt */ + IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR); + GROUPTASK_ENQUEUE(&adapter->fdir_task); + } else +#endif + if (reg_eicr & IXGBE_EICR_ECC) { + device_printf(iflib_get_dev(adapter->ctx), "\nCRITICAL: ECC ERROR!! " + "Please Reboot!!\n"); + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); + } + + /* Check for over temp condition */ + if (reg_eicr & IXGBE_EICR_TS) { + device_printf(iflib_get_dev(adapter->ctx), "\nCRITICAL: OVER TEMP!! " + "PHY IS SHUT DOWN!!\n"); + device_printf(iflib_get_dev(adapter->ctx), "System shutdown required!\n"); + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS); + } + +#ifdef PCI_IOV + if (reg_eicr & IXGBE_EICR_MAILBOX) + GROUPTASK_ENQUEUE(&adapter->mbx_task); +#endif + } + /* Pluggable optics-related interrupt */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP) + mod_mask = IXGBE_EICR_GPI_SDP0_X540; + else + mod_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw); + + if (ixgbe_is_sfp(hw)) { + if (reg_eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw)) { + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); + GROUPTASK_ENQUEUE(&adapter->msf_task); + } else if (reg_eicr & mod_mask) { + IXGBE_WRITE_REG(hw, IXGBE_EICR, mod_mask); + GROUPTASK_ENQUEUE(&adapter->mod_task); + } + } + + /* Check for fan failure */ + if ((hw->device_id == IXGBE_DEV_ID_82598AT) && + (reg_eicr & IXGBE_EICR_GPI_SDP1)) { + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1); + device_printf(iflib_get_dev(adapter->ctx), "\nCRITICAL: FAN FAILURE!! " + "REPLACE IMMEDIATELY!!\n"); + } + + /* External PHY interrupt */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T && + (reg_eicr & IXGBE_EICR_GPI_SDP0_X540)) { + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0_X540); + GROUPTASK_ENQUEUE(&adapter->phy_task); + } + + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER); + return (FILTER_HANDLED); + } + +static int +ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) +{ + int error; + struct ix_rx_queue *que = ((struct ix_rx_queue *)oidp->oid_arg1); + unsigned int reg, usec, rate; + + reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix)); + usec = ((reg & 0x0FF8) >> 3); + if (usec > 0) + rate = 500000 / usec; + else + rate = 0; + error = sysctl_handle_int(oidp, &rate, 0, req); + if (error || !req->newptr) + return error; + reg &= ~0xfff; /* default, no limitation */ + ixgbe_max_interrupt_rate = 0; + if (rate > 0 && rate < 500000) { + if (rate < 1000) + rate = 1000; + ixgbe_max_interrupt_rate = rate; + reg |= ((4000000/rate) & 0xff8 ); + } + IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg); + return 0; +} + +static void +ixgbe_add_device_sysctls(if_ctx_t ctx) +{ + device_t dev = iflib_get_dev(ctx); + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &adapter->hw; + struct sysctl_oid_list *child; + struct sysctl_ctx_list *ctx_list; + + ctx_list = device_get_sysctl_ctx(dev); + child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); + + /* Sysctls for all devices */ + SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "fc", + CTLTYPE_INT | CTLFLAG_RW, adapter, 0, + ixgbe_sysctl_flowcntl, "I", IXGBE_SYSCTL_DESC_SET_FC); + + SYSCTL_ADD_INT(ctx_list, child, OID_AUTO, "enable_aim", + CTLFLAG_RW, + &ixgbe_enable_aim, 1, "Interrupt Moderation"); + + SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "advertise_speed", + CTLTYPE_INT | CTLFLAG_RW, adapter, 0, + ixgbe_sysctl_advertise, "I", IXGBE_SYSCTL_DESC_ADV_SPEED); + + SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "thermal_test", + CTLTYPE_INT | CTLFLAG_RW, adapter, 0, + ixgbe_sysctl_thermal_test, "I", "Thermal Test"); + + SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "reg_dump", + CTLTYPE_STRING | CTLFLAG_RD, adapter, 0, + ixgbe_get_regs, "A", "Dump Registers"); + +#ifdef IXGBE_DEBUG + /* testing sysctls (for all devices) */ + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "power_state", + CTLTYPE_INT | CTLFLAG_RW, adapter, 0, + ixgbe_sysctl_power_state, "I", "PCI Power State"); + + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "print_rss_config", + CTLTYPE_STRING | CTLFLAG_RD, adapter, 0, + ixgbe_sysctl_print_rss_config, "A", "Prints RSS Configuration"); +#endif + /* for X550 series devices */ + if (hw->mac.type >= ixgbe_mac_X550) + SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "dmac", + CTLTYPE_INT | CTLFLAG_RW, adapter, 0, + ixgbe_sysctl_dmac, "I", "DMA Coalesce"); + + /* for X552 backplane devices */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_KR) { + struct sysctl_oid *eee_node; + struct sysctl_oid_list *eee_list; + + eee_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "eee", + CTLFLAG_RD, NULL, + "Energy Efficient Ethernet sysctls"); + eee_list = SYSCTL_CHILDREN(eee_node); + + SYSCTL_ADD_PROC(ctx_list, eee_list, OID_AUTO, "enable", + CTLTYPE_INT | CTLFLAG_RW, adapter, 0, + ixgbe_sysctl_eee_enable, "I", + "Enable or Disable EEE"); + + SYSCTL_ADD_PROC(ctx_list, eee_list, OID_AUTO, "negotiated", + CTLTYPE_INT | CTLFLAG_RD, adapter, 0, + ixgbe_sysctl_eee_negotiated, "I", + "EEE negotiated on link"); + + SYSCTL_ADD_PROC(ctx_list, eee_list, OID_AUTO, "tx_lpi_status", + CTLTYPE_INT | CTLFLAG_RD, adapter, 0, + ixgbe_sysctl_eee_tx_lpi_status, "I", + "Whether or not TX link is in LPI state"); + + SYSCTL_ADD_PROC(ctx_list, eee_list, OID_AUTO, "rx_lpi_status", + CTLTYPE_INT | CTLFLAG_RD, adapter, 0, + ixgbe_sysctl_eee_rx_lpi_status, "I", + "Whether or not RX link is in LPI state"); + SYSCTL_ADD_PROC(ctx_list, eee_list, OID_AUTO, "tx_lpi_delay", + CTLTYPE_INT | CTLFLAG_RD, adapter, 0, + ixgbe_sysctl_eee_tx_lpi_delay, "I", + "TX LPI entry delay in microseconds"); + } + + /* for WoL-capable devices */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { + SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "wol_enable", + CTLTYPE_INT | CTLFLAG_RW, adapter, 0, + ixgbe_sysctl_wol_enable, "I", + "Enable/Disable Wake on LAN"); + + SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "wufc", + CTLTYPE_INT | CTLFLAG_RW, adapter, 0, + ixgbe_sysctl_wufc, "I", + "Enable/Disable Wake Up Filters"); + } + /* for X550EM 10GBaseT devices */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { + struct sysctl_oid *phy_node; + struct sysctl_oid_list *phy_list; + + phy_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "phy", + CTLFLAG_RD, NULL, + "External PHY sysctls"); + phy_list = SYSCTL_CHILDREN(phy_node); + + SYSCTL_ADD_PROC(ctx_list, phy_list, OID_AUTO, "temp", + CTLTYPE_INT | CTLFLAG_RD, adapter, 0, + ixgbe_sysctl_phy_temp, "I", + "Current External PHY Temperature (Celsius)"); + + SYSCTL_ADD_PROC(ctx_list, phy_list, OID_AUTO, "overtemp_occurred", + CTLTYPE_INT | CTLFLAG_RD, adapter, 0, + ixgbe_sysctl_phy_overtemp_occurred, "I", + "External PHY High Temperature Event Occurred"); + } +} + +/********************************************************************* + * + * Determine hardware revision. + * + **********************************************************************/ +static void +ixgbe_identify_hardware(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + struct ixgbe_hw *hw = &adapter->hw; + + /* Save off the information about this board */ + hw->vendor_id = pci_get_vendor(dev); + hw->device_id = pci_get_device(dev); + hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); + hw->subsystem_vendor_id = + pci_read_config(dev, PCIR_SUBVEND_0, 2); + hw->subsystem_device_id = + pci_read_config(dev, PCIR_SUBDEV_0, 2); + + /* We need this here to set the num_segs in the code that follows */ + ixgbe_set_mac_type(hw); +} + +/********************************************************************* + * + * Determine optic type + * + **********************************************************************/ +static void +ixgbe_setup_optics(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int layer; + + layer = adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); + + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) { + adapter->optics = IFM_10G_T; + return; + } + + if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) { + adapter->optics = IFM_1000_T; + return; + + } + + if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) { + adapter->optics = IFM_1000_SX; + return; + } + + if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR | + IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) { + adapter->optics = IFM_10G_LR; + return; + } + + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) { + adapter->optics = IFM_10G_SR; + return; + } + + if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) { + adapter->optics = IFM_10G_TWINAX; + return; + } + + if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 | + IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) { + adapter->optics = IFM_10G_CX4; + return; + } + + /* If we get here just set the default */ + adapter->optics = IFM_ETHER | IFM_AUTO; +} + +static int +ixgbe_allocate_pci_resources(if_ctx_t ctx) +{ + int rid; + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + + rid = PCIR_BAR(0); + adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &rid, RF_ACTIVE); + + if (!(adapter->pci_mem)) { + device_printf(dev,"Unable to allocate bus resource: memory\n"); + return (ENXIO); + } + + adapter->osdep.mem_bus_space_tag = + rman_get_bustag(adapter->pci_mem); + adapter->osdep.mem_bus_space_handle = + rman_get_bushandle(adapter->pci_mem); + adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle; + adapter->hw.back = adapter; + return (0); +} + +/********************************************************************* + * Device removal routine + * + * The detach entry point is called when the driver is being removed. + * This routine stops the adapter and deallocates all the resources + * that were allocated for driver operation. + * + * return 0 on success, positive on failure + *********************************************************************/ +static int +ixgbe_if_detach(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + u32 ctrl_ext; +#ifdef PCI_IOV + device_t dev = iflib_get_dev(ctx); +#endif + + INIT_DEBUGOUT("ixgbe_detach: begin"); + +#ifdef PCI_IOV + if (pci_iov_detach(dev) != 0) { + device_printf(dev, "SR-IOV in use; detach first.\n"); + return (EBUSY); + } +#endif /* PCI_IOV */ + + iflib_config_gtask_deinit(&adapter->mod_task); + iflib_config_gtask_deinit(&adapter->msf_task); + iflib_config_gtask_deinit(&adapter->phy_task); +#ifdef PCI_IOV + iflib_config_gtask_deinit(&adapter->mbx_task); +#endif + + ixgbe_setup_low_power_mode(ctx); + + /* let hardware know driver is unloading */ + ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); + ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext); + + ixgbe_free_pci_resources(ctx); + free(adapter->mta, M_DEVBUF); + + return (0); +} + +static int +ixgbe_setup_low_power_mode(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &adapter->hw; + device_t dev = iflib_get_dev(ctx); + s32 error = 0; + + if (!hw->wol_enabled) + ixgbe_set_phy_power(hw, FALSE); + + /* Limit power management flow to X550EM baseT */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T + && hw->phy.ops.enter_lplu) { + /* Turn off support for APM wakeup. (Using ACPI instead) */ + IXGBE_WRITE_REG(hw, IXGBE_GRC, + IXGBE_READ_REG(hw, IXGBE_GRC) & ~(u32)2); + + /* + * Clear Wake Up Status register to prevent any previous wakeup + * events from waking us up immediately after we suspend. + */ + IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff); + + /* + * Program the Wakeup Filter Control register with user filter + * settings + */ + IXGBE_WRITE_REG(hw, IXGBE_WUFC, adapter->wufc); + + /* Enable wakeups and power management in Wakeup Control */ + IXGBE_WRITE_REG(hw, IXGBE_WUC, + IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN); + + /* X550EM baseT adapters need a special LPLU flow */ + hw->phy.reset_disable = true; + error = hw->phy.ops.enter_lplu(hw); + if (error) + device_printf(dev, + "Error entering LPLU: %d\n", error); + hw->phy.reset_disable = false; + } + + return error; +} + +/********************************************************************* + * + * Shutdown entry point + * + **********************************************************************/ +static int +ixgbe_if_shutdown(if_ctx_t ctx) +{ + int error = 0; + + INIT_DEBUGOUT("ixgbe_shutdown: begin"); + + error = ixgbe_setup_low_power_mode(ctx); + return (error); +} + +static int +ixgbe_if_suspend(if_ctx_t ctx) +{ + int error = 0; + + INIT_DEBUGOUT("ixgbe_suspend: begin"); + + error = ixgbe_setup_low_power_mode(ctx); + + return (error); +} + +static int +ixgbe_if_resume(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + struct ixgbe_hw *hw = &adapter->hw; + u32 wus; + + INIT_DEBUGOUT("ixgbe_resume: begin"); + + /* Read & clear WUS register */ + wus = IXGBE_READ_REG(hw, IXGBE_WUS); + if (wus) + device_printf(dev, "Woken up by (WUS): %#010x\n", + IXGBE_READ_REG(hw, IXGBE_WUS)); + IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff); + /* And clear WUFC until next low-power transition */ + IXGBE_WRITE_REG(hw, IXGBE_WUFC, 0); + + /* + * Required after D3->D0 transition; + * will re-advertise all previous advertised speeds + */ + if (ifp->if_flags & IFF_UP) + ixgbe_if_init(ctx); + + INIT_DEBUGOUT("ixgbe_resume: end"); + return (0); +} + +/********************************************************************* + * Ioctl mtu entry point + * + * + * return 0 on success, EINVAL on failure + **********************************************************************/ +static int +ixgbe_if_mtu_set(if_ctx_t ctx, uint32_t mtu) +{ + int error = 0; + struct adapter *adapter = iflib_get_softc(ctx); + + IOCTL_DEBUGOUT("ioctl: SIOCIFMTU (Set Interface MTU)"); + + if (mtu > IXGBE_MAX_MTU) { + error = EINVAL; + } else { + adapter->max_frame_size = mtu + IXGBE_MTU_HDR; + } + + return error; +} + + +static void +ixgbe_if_crcstrip_set(if_ctx_t ctx, int onoff) +{ +#ifdef DEV_NETMAP + struct adapter *sc = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &sc->hw; + /* crc stripping is set in two places: + * IXGBE_HLREG0 (modified on init_locked and hw reset) + * IXGBE_RDRXCTL (set by the original driver in + * ixgbe_setup_hw_rsc() called in init_locked. + * We disable the setting when netmap is compiled in). + * We update the values here, but also in ixgbe.c because + * init_locked sometimes is called outside our control. + */ + uint32_t hl, rxc; + + hl = IXGBE_READ_REG(hw, IXGBE_HLREG0); + rxc = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); + if (netmap_verbose) + D("%s read HLREG 0x%x rxc 0x%x", + onoff ? "enter" : "exit", hl, rxc); + /* hw requirements ... */ + rxc &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; + rxc |= IXGBE_RDRXCTL_RSCACKC; + if (onoff && !ix_crcstrip) { + /* keep the crc. Fast rx */ + hl &= ~IXGBE_HLREG0_RXCRCSTRP; + rxc &= ~IXGBE_RDRXCTL_CRCSTRIP; + } else { + /* reset default mode */ + hl |= IXGBE_HLREG0_RXCRCSTRP; + rxc |= IXGBE_RDRXCTL_CRCSTRIP; + } + if (netmap_verbose) + D("%s write HLREG 0x%x rxc 0x%x", + onoff ? "enter" : "exit", hl, rxc); + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hl); + IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc); +#endif /* DEV_NETMAP */ +} + + + +/* + * Set the various hardware offload abilities. + * + * This takes the ifnet's if_capenable flags (e.g. set by the user using + * ifconfig) and indicates to the OS via the ifnet's if_hwassist field what + * mbuf offload flags the driver will understand. + */ +static void +ixgbe_set_if_hwassist(struct adapter *adapter) +{ + struct ifnet *ifp = iflib_get_ifp(adapter->ctx); + struct ixgbe_hw *hw = &adapter->hw; + + ifp->if_hwassist = 0; +#if __FreeBSD_version >= 1000000 + if (ifp->if_capenable & IFCAP_TSO4) + ifp->if_hwassist |= CSUM_IP_TSO; + if (ifp->if_capenable & IFCAP_TSO6) + ifp->if_hwassist |= CSUM_IP6_TSO; + if (ifp->if_capenable & IFCAP_TXCSUM) { + ifp->if_hwassist |= (CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP); + if (hw->mac.type != ixgbe_mac_82598EB) + ifp->if_hwassist |= CSUM_IP_SCTP; + } + if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) { + ifp->if_hwassist |= (CSUM_IP6_UDP | CSUM_IP6_TCP); + if (hw->mac.type != ixgbe_mac_82598EB) + ifp->if_hwassist |= CSUM_IP6_SCTP; + } +#else + if (ifp->if_capenable & IFCAP_TSO) + ifp->if_hwassist |= CSUM_TSO; + if (ifp->if_capenable & IFCAP_TXCSUM) { + ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); + if (hw->mac.type != ixgbe_mac_82598EB) + ifp->if_hwassist |= CSUM_SCTP; + } +#endif +} + +/********************************************************************* + * Init entry point + * + * This routine is used in two ways. It is used by the stack as + * init entry point in network interface structure. It is also used + * by the driver as a hw/sw initialization routine to get to a + * consistent state. + * + * return 0 on success, positive on failure + **********************************************************************/ +#define IXGBE_MHADD_MFS_SHIFT 16 + +static void +ixgbe_if_init(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + device_t dev = iflib_get_dev(ctx); + struct ixgbe_hw *hw = &adapter->hw; + struct ix_rx_queue *rx_que; + struct ix_tx_queue *tx_que; + + u32 txdctl, mhadd; + u32 rxdctl, rxctrl; + int i, err; + +#ifdef PCI_IOV + enum ixgbe_iov_mode mode; +#endif + + INIT_DEBUGOUT("ixgbe_if_init: begin"); + +#ifdef PCI_IOV + mode = ixgbe_get_iov_mode(adapter); + adapter->pool = ixgbe_max_vfs(mode); + /* Queue indices may change with IOV mode */ + for (int i = 0; i < adapter->num_rx_queues; i++) { + adapter->rx_queues[i].rxr.me = ixgbe_pf_que_index(mode, i); + adapter->tx_queues[i].txr.me = ixgbe_pf_que_index(mode, i); + } +#endif + /* reprogram the RAR[0] in case user changed it. */ + ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, IXGBE_RAH_AV); + + /* Get the latest mac address, User can use a LAA */ + bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS); + ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, 1); + hw->addr_ctrl.rar_used_count = 1; + + ixgbe_set_if_hwassist(adapter); + ixgbe_init_hw(hw); + +#ifdef PCI_IOV + ixgbe_initialize_iov(adapter); +#endif + + ixgbe_initialize_transmit_units(ctx); + + /* Setup Multicast table */ + ixgbe_if_multi_set(ctx); + + /* + ** Determine the correct mbuf pool + ** for doing jumbo frames + */ + if (adapter->max_frame_size <= MCLBYTES) + adapter->rx_mbuf_sz = MCLBYTES; + else + adapter->rx_mbuf_sz = MJUMPAGESIZE; + + /* Configure RX settings */ + ixgbe_initialize_receive_units(ctx); + + /* Enable SDP & MSIX interrupts based on adapter */ + ixgbe_config_gpie(adapter); + + /* Set MTU size */ + if (ifp->if_mtu > ETHERMTU) { + /* aka IXGBE_MAXFRS on 82599 and newer */ + mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); + mhadd &= ~IXGBE_MHADD_MFS_MASK; + mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT; + IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); + } + + /* Now enable all the queues */ + for (i = 0, tx_que = adapter->tx_queues; i < adapter->num_tx_queues; i++, tx_que++) { + struct tx_ring *txr = &tx_que->txr; + + ixgbe_init_tx_ring(tx_que); + txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txr->me)); + txdctl |= IXGBE_TXDCTL_ENABLE; + /* Set WTHRESH to 8, burst writeback */ + txdctl |= (8 << 16); + /* + * When the internal queue falls below PTHRESH (32), + * start prefetching as long as there are at least + * HTHRESH (1) buffers ready. The values are taken + * from the Intel linux driver 3.8.21. + * Prefetching enables tx line rate even with 1 queue. + */ + txdctl |= (32 << 0) | (1 << 8); + IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txr->me), txdctl); + } + + for (i = 0, rx_que = adapter->rx_queues; i < adapter->num_rx_queues; i++, rx_que++) { + struct rx_ring *rxr = &rx_que->rxr; + rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); + if (hw->mac.type == ixgbe_mac_82598EB) { + /* + ** PTHRESH = 21 + ** HTHRESH = 4 + ** WTHRESH = 8 + */ + rxdctl &= ~0x3FFFFF; + rxdctl |= 0x080420; + } + rxdctl |= IXGBE_RXDCTL_ENABLE; + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), rxdctl); + for (int j = 0; j < 10; j++) { + if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)) & + IXGBE_RXDCTL_ENABLE) + break; + else + msec_delay(1); + } + wmb(); + } + + /* Enable Receive engine */ + rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); + if (hw->mac.type == ixgbe_mac_82598EB) + rxctrl |= IXGBE_RXCTRL_DMBYPS; + rxctrl |= IXGBE_RXCTRL_RXEN; + ixgbe_enable_rx_dma(hw, rxctrl); + + /* Set up MSI/X routing */ + if (ixgbe_enable_msix) { + ixgbe_configure_ivars(adapter); + /* Set up auto-mask */ + if (hw->mac.type == ixgbe_mac_82598EB) + IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); + else { + IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF); + IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF); + } + } else { /* Simple settings for Legacy/MSI */ + ixgbe_set_ivar(adapter, 0, 0, 0); + ixgbe_set_ivar(adapter, 0, 0, 1); + IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); + } + +#ifdef IXGBE_FDIR + /* Init Flow director */ + if (hw->mac.type != ixgbe_mac_82598EB) { + u32 hdrm = 32 << fdir_pballoc; + + hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL); + ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc); + } +#endif + /* + * Check on any SFP devices that + * need to be kick-started + */ + if (hw->phy.type == ixgbe_phy_none) { + int err = hw->phy.ops.identify(hw); + if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { + device_printf(dev, + "Unsupported SFP+ module type was detected.\n"); + return; + } + } + + /* Set moderation on the Link interrupt */ + IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->vector), IXGBE_LINK_ITR); + + /* Configure Energy Efficient Ethernet for supported devices */ + if (hw->mac.ops.setup_eee) { + err = hw->mac.ops.setup_eee(hw, adapter->eee_enabled); + if (err) + device_printf(dev, "Error setting up EEE: %d\n", err); + } + + /* Enable power to the phy. */ + ixgbe_set_phy_power(hw, TRUE); + + + /* Config/Enable Link */ + ixgbe_config_link(adapter); + + /* Hardware Packet Buffer & Flow Control setup */ + ixgbe_config_delay_values(adapter); + + /* Initialize the FC settings */ + ixgbe_start_hw(hw); + + /* Set up VLAN support and filter */ + ixgbe_setup_vlan_hw_support(ctx); + + /* Setup DMA Coalescing */ + ixgbe_config_dmac(adapter); + + /* And now turn on interrupts */ + ixgbe_if_enable_intr(ctx); + +#ifdef PCI_IOV + /* Enable the use of the MBX by the VF's */ + { + u32 reg = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); + reg |= IXGBE_CTRL_EXT_PFRSTD; + IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, reg); + } +#endif +} + +/* +** Setup the correct IVAR register for a particular MSIX interrupt +** (yes this is all very magic and confusing :) +** - entry is the register array entry +** - vector is the MSIX vector for this queue +** - type is RX/TX/MISC +*/ +static void +ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 ivar, index; + + vector |= IXGBE_IVAR_ALLOC_VAL; + + switch (hw->mac.type) { + + case ixgbe_mac_82598EB: + if (type == -1) + entry = IXGBE_IVAR_OTHER_CAUSES_INDEX; + else + entry += (type * 64); + index = (entry >> 2) & 0x1F; + ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index)); + ivar &= ~(0xFF << (8 * (entry & 0x3))); + ivar |= (vector << (8 * (entry & 0x3))); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar); + break; + + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + if (type == -1) { /* MISC IVAR */ + index = (entry & 1) * 8; + ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC); + ivar &= ~(0xFF << index); + ivar |= (vector << index); + IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar); + } else { /* RX/TX IVARS */ + index = (16 * (entry & 1)) + (8 * type); + ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1)); + ivar &= ~(0xFF << index); + ivar |= (vector << index); + IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar); + } + + default: + break; + } +} + +static void +ixgbe_configure_ivars(struct adapter *adapter) +{ + struct ix_rx_queue *rx_que = adapter->rx_queues; + struct ix_tx_queue *tx_que = adapter->tx_queues; + u32 newitr; + + if (ixgbe_max_interrupt_rate > 0) + newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8; + else { + /* + ** Disable DMA coalescing if interrupt moderation is + ** disabled. + */ + adapter->dmac = 0; + newitr = 0; + } + + for (int i = 0; i < adapter->num_rx_queues; i++, rx_que++) { + struct rx_ring *rxr = &rx_que->rxr; + + /* First the RX queue entry */ + ixgbe_set_ivar(adapter, rxr->me, rx_que->msix, 0); + + /* Set an Initial EITR value */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(rx_que->msix), newitr); + } + for (int i = 0; i < adapter->num_tx_queues; i++, tx_que++) { + struct tx_ring *txr = &tx_que->txr; + + /* ... and the TX */ + ixgbe_set_ivar(adapter, txr->me, tx_que->msix, 1); + } + /* For the Link interrupt */ + ixgbe_set_ivar(adapter, 1, adapter->vector, -1); +} + +static void +ixgbe_config_gpie(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 gpie; + + gpie = IXGBE_READ_REG(hw, IXGBE_GPIE); + + /* Fan Failure Interrupt */ + if (hw->device_id == IXGBE_DEV_ID_82598AT) + gpie |= IXGBE_SDP1_GPIEN; + + /* + * Module detection (SDP2) + * Media ready (SDP1) + */ + if (hw->mac.type == ixgbe_mac_82599EB) { + gpie |= IXGBE_SDP2_GPIEN; + if (hw->device_id != IXGBE_DEV_ID_82599_QSFP_SF_QP) + gpie |= IXGBE_SDP1_GPIEN; + } + + /* + * Thermal Failure Detection (X540) + * Link Detection (X557) + */ + if (hw->mac.type == ixgbe_mac_X540 || + hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP || + hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) + gpie |= IXGBE_SDP0_GPIEN_X540; + + if (adapter->intr_type == IFLIB_INTR_MSIX) { + /* Enable Enhanced MSIX mode */ + gpie |= IXGBE_GPIE_MSIX_MODE; + gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT | + IXGBE_GPIE_OCD; + } + + IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); +} + +/* + * Requires adapter->max_frame_size to be set. + */ +static void +ixgbe_config_delay_values(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 rxpb, frame, size, tmp; + + frame = adapter->max_frame_size; + + /* Calculate High Water */ + switch (hw->mac.type) { + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + tmp = IXGBE_DV_X540(frame, frame); + break; + default: + tmp = IXGBE_DV(frame, frame); + break; + } + size = IXGBE_BT2KB(tmp); + rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10; + hw->fc.high_water[0] = rxpb - size; + + /* Now calculate Low Water */ + switch (hw->mac.type) { + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + tmp = IXGBE_LOW_DV_X540(frame); + break; + default: + tmp = IXGBE_LOW_DV(frame); + break; + } + hw->fc.low_water[0] = IXGBE_BT2KB(tmp); + + hw->fc.requested_mode = adapter->fc; + hw->fc.pause_time = IXGBE_FC_PAUSE; + hw->fc.send_xon = TRUE; +} + +/********************************************************************* + * Multicast Update + * + * This routine is called whenever multicast address list is updated. + * + **********************************************************************/ +#define IXGBE_RAR_ENTRIES 16 + +static int +ixgbe_mc_filter_apply(void *arg, struct ifmultiaddr *ifma, int count) +{ + struct adapter *adapter = arg; + struct ixgbe_mc_addr *mta = adapter->mta; + + if (ifma->ifma_addr->sa_family != AF_LINK) + return (0); + if (count == MAX_NUM_MULTICAST_ADDRESSES) + return (0); + bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), + mta[count].addr, IXGBE_ETH_LENGTH_OF_ADDRESS); + mta[count].vmdq = adapter->pool; + return (1); +} + +static void +ixgbe_if_multi_set(if_ctx_t ctx) +{ + u32 fctrl; + u8 *update_ptr; + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_mc_addr *mta; + int mcnt = 0; + struct ifnet *ifp = iflib_get_ifp(ctx); + + IOCTL_DEBUGOUT("ixgbe_if_multi_set: begin"); + + mta = adapter->mta; + bzero(mta, sizeof(*mta) * MAX_NUM_MULTICAST_ADDRESSES); + + mcnt = if_multi_apply(iflib_get_ifp(ctx), ixgbe_mc_filter_apply, adapter); + + fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); + fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); + if (ifp->if_flags & IFF_PROMISC) + fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); + else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES || + ifp->if_flags & IFF_ALLMULTI) { + fctrl |= IXGBE_FCTRL_MPE; + fctrl &= ~IXGBE_FCTRL_UPE; + } else + fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); + + IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl); + + if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) { + update_ptr = (u8 *)mta; + ixgbe_update_mc_addr_list(&adapter->hw, update_ptr, mcnt, ixgbe_mc_array_itr, TRUE); + } + + IOCTL_DEBUGOUT("ixgbe_if_multi_set: end"); +} + +/* + * This is an iterator function now needed by the multicast + * shared code. It simply feeds the shared code routine the + * addresses in the array of ixgbe_set_multi() one by one. + */ +static u8 * +ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) +{ + struct ixgbe_mc_addr *mta; + + mta = (struct ixgbe_mc_addr *)*update_ptr; + *vmdq = mta->vmdq; + + *update_ptr = (u8*)(mta + 1);; + return (mta->addr); +} + +/********************************************************************* + * Timer routine + * + * This routine checks for link status,updates statistics, + * and runs the watchdog check. + * + **********************************************************************/ + +static void +ixgbe_if_timer(if_ctx_t ctx, uint16_t qid) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_tx_queue *que = &adapter->tx_queues[qid]; + u64 queues = 0; + + /* Keep track of queues with work for soft irq */ + if (que->txr.busy) + queues |= ((u64)1 << que->txr.me); + + if (qid != 0) + return; + + /* Check for pluggable optics */ + if (adapter->sfp_probe) + if (!ixgbe_sfp_probe(ctx)) + return; /* Nothing to do */ + + ixgbe_check_link(&adapter->hw, + &adapter->link_speed, &adapter->link_up, 0); + ixgbe_if_update_admin_status(ctx); + ixgbe_update_stats_counters(adapter); + + + /* Fire off the adminq task */ + iflib_admin_intr_deferred(ctx); +} + +/* +** ixgbe_sfp_probe - called in the local timer to +** determine if a port had optics inserted. +*/ +static bool +ixgbe_sfp_probe(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &adapter->hw; + device_t dev = iflib_get_dev(ctx); + bool result = FALSE; + + if ((hw->phy.type == ixgbe_phy_nl) && + (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) { + s32 ret = hw->phy.ops.identify_sfp(hw); + if (ret) + goto out; + ret = hw->phy.ops.reset(hw); + if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) { + device_printf(dev,"Unsupported SFP+ module detected!"); + printf(" Reload driver with supported module.\n"); + adapter->sfp_probe = FALSE; + goto out; + } else + device_printf(dev,"SFP+ module detected!\n"); + /* We now have supported optics */ + adapter->sfp_probe = FALSE; + /* Set the optics type so system reports correctly */ + ixgbe_setup_optics(adapter); + result = TRUE; + } +out: + return (result); +} + +/************************************************************************* + * + * Tasklet handler - ixgbe_handle_mod, ixgbe_handle_msf, ixgbe_handle_phy + * + *************************************************************************/ +/* +** Tasklet for handling SFP module interrupts +*/ +static void +ixgbe_handle_mod(void *context) +{ + if_ctx_t ctx = context; + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &adapter->hw; + enum ixgbe_phy_type orig_type = hw->phy.type; + device_t dev = iflib_get_dev(ctx); + u32 err; + + /* Check to see if the PHY type changed */ + if (hw->phy.ops.identify) { + hw->phy.type = ixgbe_phy_unknown; + hw->phy.ops.identify(hw); + } + + if (hw->phy.type != orig_type) { + device_printf(dev, "Detected phy_type %d\n", hw->phy.type); + + if (hw->phy.type == ixgbe_phy_none) { + hw->phy.sfp_type = ixgbe_sfp_type_unknown; + goto out; + } + + /* Try to do the initialization that was skipped before */ + if (hw->phy.ops.init) + hw->phy.ops.init(hw); + if (hw->phy.ops.reset) + hw->phy.ops.reset(hw); + } + + err = hw->phy.ops.identify_sfp(hw); + if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { + device_printf(dev, + "Unsupported SFP+ module type was detected.\n"); + goto out; + } + + err = hw->mac.ops.setup_sfp(hw); + if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { + device_printf(dev, + "Setup failure - unsupported SFP+ module type.\n"); + goto out; + } + if (hw->phy.multispeed_fiber) + GROUPTASK_ENQUEUE(&adapter->msf_task); +out: + /* Update media type */ + switch (hw->mac.ops.get_media_type(hw)) { + case ixgbe_media_type_fiber: + adapter->optics = IFM_10G_SR; + break; + case ixgbe_media_type_copper: + adapter->optics = IFM_10G_TWINAX; + break; + case ixgbe_media_type_cx4: + adapter->optics = IFM_10G_CX4; + break; + default: + adapter->optics = 0; + break; + } +} + + +/* Tasklet for handling MSF (multispeed fiber) interrupts */ + +static void +ixgbe_handle_msf(void *context) +{ + if_ctx_t ctx = context; + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &adapter->hw; + u32 autoneg; + bool negotiate; + + /* get_supported_phy_layer will call hw->phy.ops.identify_sfp() */ + adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); + + autoneg = hw->phy.autoneg_advertised; + if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) + hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); + if (hw->mac.ops.setup_link) + hw->mac.ops.setup_link(hw, autoneg, TRUE); + /* Adjust media types shown in ifconfig */ + ifmedia_removeall(adapter->media); + ixgbe_add_media_types(adapter->ctx); +} + + +/* Tasklet for handling interrupts from an external PHY */ + +static void +ixgbe_handle_phy(void *context) +{ + if_ctx_t ctx = context; + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &adapter->hw; + int error; + + error = hw->phy.ops.handle_lasi(hw); + if (error == IXGBE_ERR_OVERTEMP) + device_printf(adapter->dev, + "CRITICAL: EXTERNAL PHY OVER TEMP!! " + " PHY will downshift to lower power state!\n"); + else if (error) + device_printf(adapter->dev, + "Error handling LASI interrupt: %d\n", + error); + return; +} + + +/********************************************************************* + * + * This routine disables all traffic on the adapter by issuing a + * global reset on the MAC and deallocates TX/RX buffers. + * + **********************************************************************/ +static void +ixgbe_if_stop(if_ctx_t ctx) +{ + struct ifnet *ifp; + struct adapter *adapter = iflib_get_softc(ctx); + + struct ixgbe_hw *hw = &adapter->hw; + ifp = iflib_get_ifp(ctx); + + INIT_DEBUGOUT("ixgbe_stop: begin\n"); + + ixgbe_reset_hw(hw); + hw->adapter_stopped = FALSE; + ixgbe_stop_adapter(hw); + if (hw->mac.type == ixgbe_mac_82599EB) + ixgbe_stop_mac_link_on_d3_82599(hw); + /* Turn off the laser - noop with no optics */ + ixgbe_disable_tx_laser(hw); + + /* Update the stack */ + adapter->link_up = FALSE; + ixgbe_if_update_admin_status(ctx); + + /* reprogram the RAR[0] in case user changed it. */ + ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV); + + return; +} + +/* +** Note: this routine updates the OS on the link state +** the real check of the hardware only happens with +** a link interrupt. +*/ +static void +ixgbe_if_update_admin_status(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &adapter->hw; + device_t dev = iflib_get_dev(ctx); + + if (adapter->link_up){ + if (adapter->link_active == FALSE) { + if (bootverbose) + device_printf(dev,"Link is up %d Gbps %s \n", + ((adapter->link_speed == 128)? 10:1), + "Full Duplex"); + adapter->link_active = TRUE; + /* Update any Flow Control changes */ + ixgbe_fc_enable(hw); + /* Update DMA coalescing config */ + ixgbe_config_dmac(adapter); + /* should actually be negotiated value */ + iflib_link_state_change(ctx, LINK_STATE_UP, IF_Gbps(10)); + +#ifdef PCI_IOV + ixgbe_ping_all_vfs(adapter); +#endif + + } + } else { /* Link down */ + if (adapter->link_active == TRUE) { + if (bootverbose) + device_printf(dev,"Link is Down\n"); + iflib_link_state_change(ctx, LINK_STATE_DOWN, 0); + adapter->link_active = FALSE; +#ifdef PCI_IOV + ixgbe_ping_all_vfs(adapter); +#endif + } + } + /* Re-enable link interrupts */ + IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_LSC); +} + +#ifdef IXGBE_FDIR +/* +** Tasklet for reinitializing the Flow Director filter table +*/ +static void +ixgbe_reinit_fdir(void *context, int pending) +{ + struct adapter *adapter = context; + struct ifnet *ifp = iflib_get_ifp(adapter->ctx); + + if (adapter->fdir_reinit != 1) /* Shouldn't happen */ + return; + ixgbe_reinit_fdir_tables_82599(&adapter->hw); + adapter->fdir_reinit = 0; + /* re-enable flow director interrupts */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR); + /* Restart the interface */ + ifp->if_drv_flags |= IFF_DRV_RUNNING; +} +#endif + + +/********************************************************************* + * + * Configure DMA Coalescing + * + **********************************************************************/ +static void +ixgbe_config_dmac(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config; + + if (hw->mac.type < ixgbe_mac_X550 || + !hw->mac.ops.dmac_config) + return; + + if (dcfg->watchdog_timer ^ adapter->dmac || + dcfg->link_speed ^ adapter->link_speed) { + dcfg->watchdog_timer = adapter->dmac; + dcfg->fcoe_en = false; + dcfg->link_speed = adapter->link_speed; + dcfg->num_tcs = 1; + + INIT_DEBUGOUT2("dmac settings: watchdog %d, link speed %d\n", + dcfg->watchdog_timer, dcfg->link_speed); + + hw->mac.ops.dmac_config(hw); + } +} + +void +ixgbe_if_enable_intr(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &adapter->hw; + struct ix_rx_queue *que = adapter->rx_queues; + u32 mask, fwsm; + + mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); + /* Enable Fan Failure detection */ + if (hw->device_id == IXGBE_DEV_ID_82598AT) + mask |= IXGBE_EIMS_GPI_SDP1; + + switch (adapter->hw.mac.type) { + case ixgbe_mac_82599EB: + mask |= IXGBE_EIMS_ECC; + /* Temperature sensor on some adapters */ + mask |= IXGBE_EIMS_GPI_SDP0; + /* SFP+ (RX_LOS_N & MOD_ABS_N) */ + mask |= IXGBE_EIMS_GPI_SDP1; + mask |= IXGBE_EIMS_GPI_SDP2; +#ifdef IXGBE_FDIR + mask |= IXGBE_EIMS_FLOW_DIR; +#endif +#ifdef PCI_IOV + mask |= IXGBE_EIMS_MAILBOX; +#endif + break; + case ixgbe_mac_X540: + /* Detect if Thermal Sensor is enabled */ + fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM); + if (fwsm & IXGBE_FWSM_TS_ENABLED) + mask |= IXGBE_EIMS_TS; + mask |= IXGBE_EIMS_ECC; +#ifdef IXGBE_FDIR + mask |= IXGBE_EIMS_FLOW_DIR; +#endif + break; + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + /* MAC thermal sensor is automatically enabled */ + mask |= IXGBE_EIMS_TS; + /* Some devices use SDP0 for important information */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP || + hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) + mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw); + mask |= IXGBE_EIMS_ECC; +#ifdef IXGBE_FDIR + mask |= IXGBE_EIMS_FLOW_DIR; +#endif +#ifdef PCI_IOV + mask |= IXGBE_EIMS_MAILBOX; +#endif + /* falls through */ + default: + break; + } + + IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); + + /* With MSI-X we use auto clear */ + if (adapter->intr_type == IFLIB_INTR_MSIX) { + mask = IXGBE_EIMS_ENABLE_MASK; + /* Don't autoclear Link */ + mask &= ~IXGBE_EIMS_OTHER; + mask &= ~IXGBE_EIMS_LSC; +#ifdef PCI_IOV + mask &= ~IXGBE_EIMS_MAILBOX; +#endif + IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask); + } + + /* + ** Now enable all queues, this is done separately to + ** allow for handling the extended (beyond 32) MSIX + ** vectors that can be used by 82599 + */ + for (int i = 0; i < adapter->num_rx_queues; i++, que++) + ixgbe_enable_queue(adapter, que->msix); + + IXGBE_WRITE_FLUSH(hw); +} + +static void +ixgbe_if_disable_intr(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + if (adapter->intr_type == IFLIB_INTR_MSIX) + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0); + if (adapter->hw.mac.type == ixgbe_mac_82598EB) { + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0); + } else { + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0); + } + IXGBE_WRITE_FLUSH(&adapter->hw); +} + + +static int +ixgbe_if_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_rx_queue *que = &adapter->rx_queues[rxqid]; + + ixgbe_enable_queue(adapter, que->rxr.me); + return (0); +} + +/* +** +** MSIX Interrupt Handlers and Tasklets +** +*/ + +static void +ixgbe_enable_queue(struct adapter *adapter, u32 vector) +{ + struct ixgbe_hw *hw = &adapter->hw; + u64 queue = (u64)(1 << vector); + u32 mask; + + if (hw->mac.type == ixgbe_mac_82598EB) { + mask = (IXGBE_EIMS_RTX_QUEUE & queue); + IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); + } else { + mask = (queue & 0xFFFFFFFF); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); + mask = (queue >> 32); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); + } +} + +static void +ixgbe_disable_queue(struct adapter *adapter, u32 vector) +{ + struct ixgbe_hw *hw = &adapter->hw; + u64 queue = (u64)(1 << vector); + u32 mask; + + if (hw->mac.type == ixgbe_mac_82598EB) { + mask = (IXGBE_EIMS_RTX_QUEUE & queue); + IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask); + } else { + mask = (queue & 0xFFFFFFFF); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask); + mask = (queue >> 32); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask); + } +} + + +/********************************************************************* + * + * Legacy Interrupt Service routine + * + **********************************************************************/ +int +ixgbe_intr(void *arg) +{ + struct ix_rx_queue *que = arg; + struct adapter *adapter = que->adapter; + struct ixgbe_hw *hw = &adapter->hw; + if_ctx_t ctx = adapter->ctx; + u32 reg_eicr; + + reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR); + + ++que->irqs; + if (reg_eicr == 0) { + ixgbe_if_enable_intr(ctx); + return (FILTER_HANDLED); + } + + /* Check for fan failure */ + if ((hw->device_id == IXGBE_DEV_ID_82598AT) && + (reg_eicr & IXGBE_EICR_GPI_SDP1)) { + device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! " + "REPLACE IMMEDIATELY!!\n"); + IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); + } + + /* Link status change */ + if (reg_eicr & IXGBE_EICR_LSC) { + IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC); + iflib_admin_intr_deferred(ctx); + } + /* External PHY interrupt */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T && + (reg_eicr & IXGBE_EICR_GPI_SDP0_X540)) { + GROUPTASK_ENQUEUE(&adapter->phy_task); + } + + return (FILTER_SCHEDULE_THREAD); +} + +static void +ixgbe_free_pci_resources(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_rx_queue *que = adapter->rx_queues; + device_t dev = iflib_get_dev(ctx); + + /* Release all msix queue resources */ + if (adapter->intr_type == IFLIB_INTR_MSIX) + iflib_irq_free(ctx, &adapter->irq); + + for (int i = 0; i < adapter->num_rx_queues; i++, que++) { + iflib_irq_free(ctx, &que->que_irq); + } + + /* + * Free link/admin interrupt + */ + if (adapter->pci_mem != NULL) + bus_release_resource(dev, SYS_RES_MEMORY, + PCIR_BAR(0), adapter->pci_mem); +} + +static void +ixgbe_set_sysctl_value(struct adapter *adapter, const char *name, + const char *description, int *limit, int value) +{ + *limit = value; + SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), + OID_AUTO, name, CTLFLAG_RW, limit, value, description); +} + +/* Sysctls */ +/* +** Set flow control using sysctl: +** Flow control values: +** 0 - off +** 1 - rx pause +** 2 - tx pause +** 3 - full +*/ +static int +ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS) +{ + int error, fc; + struct adapter *adapter; + + adapter = (struct adapter *) arg1; + fc = adapter->fc; + + error = sysctl_handle_int(oidp, &fc, 0, req); + if ((error) || (req->newptr == NULL)) + return (error); + + /* Don't bother if it's not changed */ + if (adapter->fc == fc) + return (0); + + return ixgbe_set_flowcntl(adapter, fc); +} + +static int +ixgbe_set_flowcntl(struct adapter *adapter, int fc) +{ + switch (adapter->fc) { + case ixgbe_fc_rx_pause: + case ixgbe_fc_tx_pause: + case ixgbe_fc_full: + adapter->hw.fc.requested_mode = adapter->fc; + if (adapter->num_rx_queues > 1) + ixgbe_disable_rx_drop(adapter); + break; + case ixgbe_fc_none: + adapter->hw.fc.requested_mode = ixgbe_fc_none; + if (adapter->num_rx_queues > 1) + ixgbe_enable_rx_drop(adapter); + break; + default: + return (EINVAL); + } + + adapter->fc = fc; + /* Don't autoneg if forcing a value */ + adapter->hw.fc.disable_fc_autoneg = TRUE; + ixgbe_fc_enable(&adapter->hw); + return (0); +} + +/* +** Enable the hardware to drop packets when the buffer is +** full. This is useful when multiqueue,so that no single +** queue being full stalls the entire RX engine. We only +** enable this when Multiqueue AND when Flow Control is +** disabled. +*/ +static void +ixgbe_enable_rx_drop(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + for (int i = 0; i < adapter->num_rx_queues; i++) { + struct rx_ring *rxr = &adapter->rx_queues[i].rxr; + u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); + srrctl |= IXGBE_SRRCTL_DROP_EN; + IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); + } +#ifdef PCI_IOV + /* enable drop for each vf */ + for (int i = 0; i < adapter->num_vfs; i++) { + IXGBE_WRITE_REG(hw, IXGBE_QDE, + (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT) | + IXGBE_QDE_ENABLE)); + } +#endif +} + +static void +ixgbe_disable_rx_drop(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + for (int i = 0; i < adapter->num_rx_queues; i++) { + struct rx_ring *rxr = &adapter->rx_queues[i].rxr; + u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); + srrctl &= ~IXGBE_SRRCTL_DROP_EN; + IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); + } +#ifdef PCI_IOV + /* disable drop for each vf */ + for (int i = 0; i < adapter->num_vfs; i++) { + IXGBE_WRITE_REG(hw, IXGBE_QDE, + (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT))); + } +#endif +} + +/* +** Control advertised link speed: +** Flags: +** 0x1 - advertise 100 Mb +** 0x2 - advertise 1G +** 0x4 - advertise 10G +*/ +static int +ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS) +{ + int error, advertise; + struct adapter *adapter; + + adapter = (struct adapter *) arg1; + advertise = adapter->advertise; + + error = sysctl_handle_int(oidp, &advertise, 0, req); + if ((error) || (req->newptr == NULL)) + return (error); + + /* Checks to validate new value */ + if (adapter->advertise == advertise) /* no change */ + return (0); + + return ixgbe_set_advertise(adapter, advertise); +} + +static int +ixgbe_set_advertise(struct adapter *adapter, int advertise) +{ + device_t dev = iflib_get_dev(adapter->ctx); + struct ixgbe_hw *hw; + ixgbe_link_speed speed; + + hw = &adapter->hw; + /* No speed changes for backplane media */ + if (hw->phy.media_type == ixgbe_media_type_backplane) + return (ENODEV); + + if (!((hw->phy.media_type == ixgbe_media_type_copper) || + (hw->phy.multispeed_fiber))) { + device_printf(dev, "Advertised speed can only be set on copper or " + "multispeed fiber media types.\n"); + return (EINVAL); + } + + if (advertise < 0x1 || advertise > 0x7) { + device_printf(dev, "Invalid advertised speed; valid modes are 0x1 through 0x7\n"); + return (EINVAL); + } + + if ((advertise & 0x1) + && (hw->mac.type != ixgbe_mac_X540) + && (hw->mac.type != ixgbe_mac_X550)) { + device_printf(dev, "Set Advertise: 100Mb on X540/X550 only\n"); + return (EINVAL); + } + + /* Set new value and report new advertised mode */ + speed = 0; + if (advertise & 0x1) + speed |= IXGBE_LINK_SPEED_100_FULL; + if (advertise & 0x2) + speed |= IXGBE_LINK_SPEED_1GB_FULL; + if (advertise & 0x4) + speed |= IXGBE_LINK_SPEED_10GB_FULL; + + hw->mac.autotry_restart = TRUE; + hw->mac.ops.setup_link(hw, speed, TRUE); + adapter->advertise = advertise; + + return (0); +} + +/** Thermal Shutdown Trigger (internal MAC) + ** - Set this to 1 to cause an overtemp event to occur + */ +static int +ixgbe_sysctl_thermal_test(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + int error, fire = 0; + + error = sysctl_handle_int(oidp, &fire, 0, req); + if ((error) || (req->newptr == NULL)) + return (error); + + if (fire) { + printf("Warning: Thermal Shutdown trigger\n"); + u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS); + reg |= IXGBE_EICR_TS; + IXGBE_WRITE_REG(hw, IXGBE_EICS, reg); + } + + return (0); +} + +/* Manage DMA Coalescing. +** Control values: +** 0/1 - off / on (use default value of 1000) +** +** Legal timer values are: +** 50,100,250,500,1000,2000,5000,10000 +** +** Turning off interrupt moderation will also turn this off. +*/ +static int +ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ifnet *ifp = iflib_get_ifp(adapter->ctx); + int error; + u16 newval; + + newval = adapter->dmac; + error = sysctl_handle_int(oidp, &newval, 0, req); + if ((error) || (req->newptr == NULL)) + return (error); + + switch (newval) { + case 0: + /* Disabled */ + adapter->dmac = 0; + break; + case 1: /* Enable and use default */ + adapter->dmac = 1000; + break; + case 50: + case 100: + case 250: + case 500: + case 1000: + case 2000: + case 5000: + case 10000: + /* Legal values - allow */ + adapter->dmac = newval; + break; + default: + /* Do nothing, illegal value */ + return (EINVAL); + } + + /* Re-initialize hardware if it's already running */ + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + ifp->if_init(ifp); + + return (0); +} + +#ifdef IXGBE_DEBUG +/** + * Sysctl to test power states + * Values: + * 0 - set device to D0 + * 3 - set device to D3 + * (none) - get current device power state + */ +static int +ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + device_t dev = adapter->dev; + int curr_ps, new_ps, error = 0; + + curr_ps = new_ps = pci_get_powerstate(dev); + + error = sysctl_handle_int(oidp, &new_ps, 0, req); + if ((error) || (req->newptr == NULL)) + return (error); + + if (new_ps == curr_ps) + return (0); + + if (new_ps == 3 && curr_ps == 0) + error = DEVICE_SUSPEND(dev); + else if (new_ps == 0 && curr_ps == 3) + error = DEVICE_RESUME(dev); + else + return (EINVAL); + + device_printf(dev, "New state: %d\n", pci_get_powerstate(dev)); + + return (error); +} +#endif + +/* + * Sysctl to enable/disable the WoL capability, if supported by the adapter. + * Values: + * 0 - disabled + * 1 - enabled + */ +static int +ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + int new_wol_enabled; + int error = 0; + + new_wol_enabled = hw->wol_enabled; + error = sysctl_handle_int(oidp, &new_wol_enabled, 0, req); + if ((error) || (req->newptr == NULL)) + return (error); + if (new_wol_enabled == hw->wol_enabled) + return (0); + + new_wol_enabled = !!(new_wol_enabled); + if (new_wol_enabled > 0 && !adapter->wol_support) + return (ENODEV); + else + hw->wol_enabled = new_wol_enabled; + + return (0); +} + +/* + * Read-only sysctl indicating whether EEE support was negotiated + * on the link. + */ +static int +ixgbe_sysctl_eee_negotiated(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + bool status; + + status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & IXGBE_EEE_STAT_NEG); + + return (sysctl_handle_int(oidp, 0, status, req)); +} + +/* + * Read-only sysctl indicating whether RX Link is in LPI state. + */ +static int +ixgbe_sysctl_eee_rx_lpi_status(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + bool status; + + status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & + IXGBE_EEE_RX_LPI_STATUS); + + return (sysctl_handle_int(oidp, 0, status, req)); +} + +/* + * Read-only sysctl indicating whether TX Link is in LPI state. + */ +static int +ixgbe_sysctl_eee_tx_lpi_status(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + bool status; + + status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & + IXGBE_EEE_TX_LPI_STATUS); + + return (sysctl_handle_int(oidp, 0, status, req)); +} + +/* + * Read-only sysctl indicating TX Link LPI delay + */ +static int +ixgbe_sysctl_eee_tx_lpi_delay(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + u32 reg; + + reg = IXGBE_READ_REG(hw, IXGBE_EEE_SU); + + return (sysctl_handle_int(oidp, 0, reg >> 26, req)); +} + +/* + * Sysctl to enable/disable the types of packets that the + * adapter will wake up on upon receipt. + * WUFC - Wake Up Filter Control + * Flags: + * 0x1 - Link Status Change + * 0x2 - Magic Packet + * 0x4 - Direct Exact + * 0x8 - Directed Multicast + * 0x10 - Broadcast + * 0x20 - ARP/IPv4 Request Packet + * 0x40 - Direct IPv4 Packet + * 0x80 - Direct IPv6 Packet + * + * Setting another flag will cause the sysctl to return an + * error. + */ +static int +ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + int error = 0; + u32 new_wufc; + + new_wufc = adapter->wufc; + + error = sysctl_handle_int(oidp, &new_wufc, 0, req); + if ((error) || (req->newptr == NULL)) + return (error); + if (new_wufc == adapter->wufc) + return (0); + + if (new_wufc & 0xffffff00) + return (EINVAL); + else { + new_wufc &= 0xff; + new_wufc |= (0xffffff & adapter->wufc); + adapter->wufc = new_wufc; + } + + return (0); +} + + +#ifdef IXGBE_DEBUG +static int +ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *)arg1; + struct ixgbe_hw *hw = &adapter->hw; + device_t dev = adapter->dev; + int error = 0, reta_size; + struct sbuf *buf; + u32 reg; + + buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); + if (!buf) { + device_printf(dev, "Could not allocate sbuf for output.\n"); + return (ENOMEM); + } + + // TODO: use sbufs to make a string to print out + /* Set multiplier for RETA setup and table size based on MAC */ + switch (adapter->hw.mac.type) { + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + reta_size = 128; + break; + default: + reta_size = 32; + break; + } + + /* Print out the redirection table */ + sbuf_cat(buf, "\n"); + for (int i = 0; i < reta_size; i++) { + if (i < 32) { + reg = IXGBE_READ_REG(hw, IXGBE_RETA(i)); + sbuf_printf(buf, "RETA(%2d): 0x%08x\n", i, reg); + } else { + reg = IXGBE_READ_REG(hw, IXGBE_ERETA(i - 32)); + sbuf_printf(buf, "ERETA(%2d): 0x%08x\n", i - 32, reg); + } + } + + // TODO: print more config + + error = sbuf_finish(buf); + if (error) + device_printf(dev, "Error finishing sbuf: %d\n", error); + + sbuf_delete(buf); + return (0); +} +#endif /* IXGBE_DEBUG */ + +/* + * The following two sysctls are for X550 BaseT devices; + * they deal with the external PHY used in them. + */ +static int +ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + u16 reg; + + if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { + device_printf(iflib_get_dev(adapter->ctx), + "Device has no supported external thermal sensor.\n"); + return (ENODEV); + } + + if (hw->phy.ops.read_reg(hw, IXGBE_PHY_CURRENT_TEMP, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + ®)) { + device_printf(iflib_get_dev(adapter->ctx), + "Error reading from PHY's current temperature register\n"); + return (EAGAIN); + } + + /* Shift temp for output */ + reg = reg >> 8; + + return (sysctl_handle_int(oidp, NULL, reg, req)); +} + +/* + * Reports whether the current PHY temperature is over + * the overtemp threshold. + * - This is reported directly from the PHY + */ +static int +ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + u16 reg; + + if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { + device_printf(iflib_get_dev(adapter->ctx), + "Device has no supported external thermal sensor.\n"); + return (ENODEV); + } + + if (hw->phy.ops.read_reg(hw, IXGBE_PHY_OVERTEMP_STATUS, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + ®)) { + device_printf(iflib_get_dev(adapter->ctx), + "Error reading from PHY's temperature status register\n"); + return (EAGAIN); + } + + /* Get occurrence bit */ + reg = !!(reg & 0x4000); + return (sysctl_handle_int(oidp, 0, reg, req)); +} + +/* + * Sysctl to enable/disable the Energy Efficient Ethernet capability, + * if supported by the adapter. + * Values: + * 0 - disabled + * 1 - enabled + */ +static int +ixgbe_sysctl_eee_enable(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + struct ifnet *ifp = iflib_get_ifp(adapter->ctx); + int new_eee_enabled, error = 0; + + new_eee_enabled = adapter->eee_enabled; + error = sysctl_handle_int(oidp, &new_eee_enabled, 0, req); + if ((error) || (req->newptr == NULL)) + return (error); + new_eee_enabled = !!(new_eee_enabled); + if (new_eee_enabled == adapter->eee_enabled) + return (0); + + if (new_eee_enabled > 0 && !hw->mac.ops.setup_eee) + return (ENODEV); + else + adapter->eee_enabled = new_eee_enabled; + + /* Re-initialize hardware if it's already running */ + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + ifp->if_init(ifp); + + return (0); +} + +#ifdef PCI_IOV + +/* +** Support functions for SRIOV/VF management +*/ + +static void +ixgbe_ping_all_vfs(struct adapter *adapter) +{ + struct ixgbe_vf *vf; + + for (int i = 0; i < adapter->num_vfs; i++) { + vf = &adapter->vfs[i]; + if (vf->flags & IXGBE_VF_ACTIVE) + ixgbe_send_vf_msg(adapter, vf, IXGBE_PF_CONTROL_MSG); + } +} + + +static void +ixgbe_vf_set_default_vlan(struct adapter *adapter, struct ixgbe_vf *vf, + uint16_t tag) +{ + struct ixgbe_hw *hw; + uint32_t vmolr, vmvir; + + hw = &adapter->hw; + + vf->vlan_tag = tag; + + vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf->pool)); + + /* Do not receive packets that pass inexact filters. */ + vmolr &= ~(IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_ROPE); + + /* Disable Multicast Promicuous Mode. */ + vmolr &= ~IXGBE_VMOLR_MPE; + + /* Accept broadcasts. */ + vmolr |= IXGBE_VMOLR_BAM; + + if (tag == 0) { + /* Accept non-vlan tagged traffic. */ + //vmolr |= IXGBE_VMOLR_AUPE; + + /* Allow VM to tag outgoing traffic; no default tag. */ + vmvir = 0; + } else { + /* Require vlan-tagged traffic. */ + vmolr &= ~IXGBE_VMOLR_AUPE; + + /* Tag all traffic with provided vlan tag. */ + vmvir = (tag | IXGBE_VMVIR_VLANA_DEFAULT); + } + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf->pool), vmolr); + IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf->pool), vmvir); +} + + +static boolean_t +ixgbe_vf_frame_size_compatible(struct adapter *adapter, struct ixgbe_vf *vf) +{ + + /* + * Frame size compatibility between PF and VF is only a problem on + * 82599-based cards. X540 and later support any combination of jumbo + * frames on PFs and VFs. + */ + if (adapter->hw.mac.type != ixgbe_mac_82599EB) + return (TRUE); + + switch (vf->api_ver) { + case IXGBE_API_VER_1_0: + case IXGBE_API_VER_UNKNOWN: + /* + * On legacy (1.0 and older) VF versions, we don't support jumbo + * frames on either the PF or the VF. + */ + if (adapter->max_frame_size > ETHER_MAX_LEN || + vf->maximum_frame_size > ETHER_MAX_LEN) + return (FALSE); + + return (TRUE); + + break; + case IXGBE_API_VER_1_1: + default: + /* + * 1.1 or later VF versions always work if they aren't using + * jumbo frames. + */ + if (vf->maximum_frame_size <= ETHER_MAX_LEN) + return (TRUE); + + /* + * Jumbo frames only work with VFs if the PF is also using jumbo + * frames. + */ + if (adapter->max_frame_size <= ETHER_MAX_LEN) + return (TRUE); + + return (FALSE); + + } +} + + +static void +ixgbe_process_vf_reset(struct adapter *adapter, struct ixgbe_vf *vf) +{ + ixgbe_vf_set_default_vlan(adapter, vf, vf->default_vlan); + + // XXX clear multicast addresses + + ixgbe_clear_rar(&adapter->hw, vf->rar_index); + + vf->api_ver = IXGBE_API_VER_UNKNOWN; +} + + +static void +ixgbe_vf_enable_transmit(struct adapter *adapter, struct ixgbe_vf *vf) +{ + struct ixgbe_hw *hw; + uint32_t vf_index, vfte; + + hw = &adapter->hw; + + vf_index = IXGBE_VF_INDEX(vf->pool); + vfte = IXGBE_READ_REG(hw, IXGBE_VFTE(vf_index)); + vfte |= IXGBE_VF_BIT(vf->pool); + IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_index), vfte); +} + + +static void +ixgbe_vf_enable_receive(struct adapter *adapter, struct ixgbe_vf *vf) +{ + struct ixgbe_hw *hw; + uint32_t vf_index, vfre; + + hw = &adapter->hw; + + vf_index = IXGBE_VF_INDEX(vf->pool); + vfre = IXGBE_READ_REG(hw, IXGBE_VFRE(vf_index)); + if (ixgbe_vf_frame_size_compatible(adapter, vf)) + vfre |= IXGBE_VF_BIT(vf->pool); + else + vfre &= ~IXGBE_VF_BIT(vf->pool); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_index), vfre); +} + + +static void +ixgbe_vf_reset_msg(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) +{ + struct ixgbe_hw *hw; + uint32_t ack; + uint32_t resp[IXGBE_VF_PERMADDR_MSG_LEN]; + + hw = &adapter->hw; + + ixgbe_process_vf_reset(adapter, vf); + + if (ixgbe_validate_mac_addr(vf->ether_addr) == 0) { + ixgbe_set_rar(&adapter->hw, vf->rar_index, + vf->ether_addr, vf->pool, TRUE); + ack = IXGBE_VT_MSGTYPE_ACK; + } else + ack = IXGBE_VT_MSGTYPE_NACK; + + ixgbe_vf_enable_transmit(adapter, vf); + ixgbe_vf_enable_receive(adapter, vf); + + vf->flags |= IXGBE_VF_CTS; + + resp[0] = IXGBE_VF_RESET | ack | IXGBE_VT_MSGTYPE_CTS; + bcopy(vf->ether_addr, &resp[1], ETHER_ADDR_LEN); + resp[3] = hw->mac.mc_filter_type; + ixgbe_write_mbx(hw, resp, IXGBE_VF_PERMADDR_MSG_LEN, vf->pool); +} + + +static void +ixgbe_vf_set_mac(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) +{ + uint8_t *mac; + + mac = (uint8_t*)&msg[1]; + + /* Check that the VF has permission to change the MAC address. */ + if (!(vf->flags & IXGBE_VF_CAP_MAC) && ixgbe_vf_mac_changed(vf, mac)) { + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; + } + + if (ixgbe_validate_mac_addr(mac) != 0) { + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; + } + + bcopy(mac, vf->ether_addr, ETHER_ADDR_LEN); + + ixgbe_set_rar(&adapter->hw, vf->rar_index, vf->ether_addr, + vf->pool, TRUE); + + ixgbe_send_vf_ack(adapter, vf, msg[0]); +} + + +/* +** VF multicast addresses are set by using the appropriate bit in +** 1 of 128 32 bit addresses (4096 possible). +*/ +static void +ixgbe_vf_set_mc_addr(struct adapter *adapter, struct ixgbe_vf *vf, u32 *msg) +{ + u16 *list = (u16*)&msg[1]; + int entries; + u32 vmolr, vec_bit, vec_reg, mta_reg; + + entries = (msg[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT; + entries = min(entries, IXGBE_MAX_VF_MC); + + vmolr = IXGBE_READ_REG(&adapter->hw, IXGBE_VMOLR(vf->pool)); + + vf->num_mc_hashes = entries; + + /* Set the appropriate MTA bit */ + for (int i = 0; i < entries; i++) { + vf->mc_hash[i] = list[i]; + vec_reg = (vf->mc_hash[i] >> 5) & 0x7F; + vec_bit = vf->mc_hash[i] & 0x1F; + mta_reg = IXGBE_READ_REG(&adapter->hw, IXGBE_MTA(vec_reg)); + mta_reg |= (1 << vec_bit); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_MTA(vec_reg), mta_reg); + } + + vmolr |= IXGBE_VMOLR_ROMPE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VMOLR(vf->pool), vmolr); + ixgbe_send_vf_ack(adapter, vf, msg[0]); +} + + +static void +ixgbe_vf_set_vlan(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) +{ + struct ixgbe_hw *hw; + int enable; + uint16_t tag; + + hw = &adapter->hw; + enable = IXGBE_VT_MSGINFO(msg[0]); + tag = msg[1] & IXGBE_VLVF_VLANID_MASK; + + if (!(vf->flags & IXGBE_VF_CAP_VLAN)) { + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; + } + + /* It is illegal to enable vlan tag 0. */ + if (tag == 0 && enable != 0){ + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; + } + + ixgbe_set_vfta(hw, tag, vf->pool, enable); + ixgbe_send_vf_ack(adapter, vf, msg[0]); +} + + +static void +ixgbe_vf_set_lpe(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) +{ + struct ixgbe_hw *hw; + uint32_t vf_max_size, pf_max_size, mhadd; + + hw = &adapter->hw; + vf_max_size = msg[1]; + + if (vf_max_size < ETHER_CRC_LEN) { + /* We intentionally ACK invalid LPE requests. */ + ixgbe_send_vf_ack(adapter, vf, msg[0]); + return; + } + + vf_max_size -= ETHER_CRC_LEN; + + if (vf_max_size > IXGBE_MAX_FRAME_SIZE) { + /* We intentionally ACK invalid LPE requests. */ + ixgbe_send_vf_ack(adapter, vf, msg[0]); + return; + } + + vf->maximum_frame_size = vf_max_size; + ixgbe_update_max_frame(adapter, vf->maximum_frame_size); + + /* + * We might have to disable reception to this VF if the frame size is + * not compatible with the config on the PF. + */ + ixgbe_vf_enable_receive(adapter, vf); + + mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); + pf_max_size = (mhadd & IXGBE_MHADD_MFS_MASK) >> IXGBE_MHADD_MFS_SHIFT; + + if (pf_max_size < adapter->max_frame_size) { + mhadd &= ~IXGBE_MHADD_MFS_MASK; + mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT; + IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); + } + + ixgbe_send_vf_ack(adapter, vf, msg[0]); +} + + +static void +ixgbe_vf_set_macvlan(struct adapter *adapter, struct ixgbe_vf *vf, + uint32_t *msg) +{ + //XXX implement this + ixgbe_send_vf_nack(adapter, vf, msg[0]); +} + + +static void +ixgbe_vf_api_negotiate(struct adapter *adapter, struct ixgbe_vf *vf, + uint32_t *msg) +{ + + switch (msg[1]) { + case IXGBE_API_VER_1_0: + case IXGBE_API_VER_1_1: + vf->api_ver = msg[1]; + ixgbe_send_vf_ack(adapter, vf, msg[0]); + break; + default: + vf->api_ver = IXGBE_API_VER_UNKNOWN; + ixgbe_send_vf_nack(adapter, vf, msg[0]); + break; + } +} + + +static void +ixgbe_vf_get_queues(struct adapter *adapter, struct ixgbe_vf *vf, + uint32_t *msg) +{ + struct ixgbe_hw *hw; + uint32_t resp[IXGBE_VF_GET_QUEUES_RESP_LEN]; + int number_queues; + + hw = &adapter->hw; + + /* GET_QUEUES is not supported on pre-1.1 APIs. */ + switch (msg[0]) { + case IXGBE_API_VER_1_0: + case IXGBE_API_VER_UNKNOWN: + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; + } + + resp[0] = IXGBE_VF_GET_QUEUES | IXGBE_VT_MSGTYPE_ACK | + IXGBE_VT_MSGTYPE_CTS; + + number_queues = ixgbe_vf_queues(ixgbe_get_iov_mode(adapter)); + resp[IXGBE_VF_TX_QUEUES] = number_queues; + resp[IXGBE_VF_RX_QUEUES] = number_queues; + resp[IXGBE_VF_TRANS_VLAN] = (vf->default_vlan != 0); + resp[IXGBE_VF_DEF_QUEUE] = 0; + + ixgbe_write_mbx(hw, resp, IXGBE_VF_GET_QUEUES_RESP_LEN, vf->pool); +} + + +static void +ixgbe_process_vf_msg(struct adapter *adapter, struct ixgbe_vf *vf) +{ + struct ixgbe_hw *hw; + uint32_t msg[IXGBE_VFMAILBOX_SIZE]; + int error; + + hw = &adapter->hw; + + error = ixgbe_read_mbx(hw, msg, IXGBE_VFMAILBOX_SIZE, vf->pool); + + if (error != 0) + return; + + CTR3(KTR_MALLOC, "%s: received msg %x from %d", + adapter->ifp->if_xname, msg[0], vf->pool); + if (msg[0] == IXGBE_VF_RESET) { + ixgbe_vf_reset_msg(adapter, vf, msg); + return; + } + + if (!(vf->flags & IXGBE_VF_CTS)) { + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; + } + + switch (msg[0] & IXGBE_VT_MSG_MASK) { + case IXGBE_VF_SET_MAC_ADDR: + ixgbe_vf_set_mac(adapter, vf, msg); + break; + case IXGBE_VF_SET_MULTICAST: + ixgbe_vf_set_mc_addr(adapter, vf, msg); + break; + case IXGBE_VF_SET_VLAN: + ixgbe_vf_set_vlan(adapter, vf, msg); + break; + case IXGBE_VF_SET_LPE: + ixgbe_vf_set_lpe(adapter, vf, msg); + break; + case IXGBE_VF_SET_MACVLAN: + ixgbe_vf_set_macvlan(adapter, vf, msg); + break; + case IXGBE_VF_API_NEGOTIATE: + ixgbe_vf_api_negotiate(adapter, vf, msg); + break; + case IXGBE_VF_GET_QUEUES: + ixgbe_vf_get_queues(adapter, vf, msg); + break; + default: + ixgbe_send_vf_nack(adapter, vf, msg[0]); + } +} + + +/* + * Tasklet for handling VF -> PF mailbox messages. + */ +static void +ixgbe_handle_mbx(void *context) +{ + struct adapter *adapter; + struct ixgbe_hw *hw; + struct ixgbe_vf *vf; + int i; + + adapter = context; + hw = &adapter->hw; + + for (i = 0; i < adapter->num_vfs; i++) { + vf = &adapter->vfs[i]; + + if (vf->flags & IXGBE_VF_ACTIVE) { + if (ixgbe_check_for_rst(hw, vf->pool) == 0) + ixgbe_process_vf_reset(adapter, vf); + + if (ixgbe_check_for_msg(hw, vf->pool) == 0) + ixgbe_process_vf_msg(adapter, vf); + + if (ixgbe_check_for_ack(hw, vf->pool) == 0) + ixgbe_process_vf_ack(adapter, vf); + } + } + +} + +static int +ixgbe_init_iov(device_t dev, u16 num_vfs, const nvlist_t *config) +{ + struct adapter *adapter; + enum ixgbe_iov_mode mode; + + adapter = device_get_softc(dev); + adapter->num_vfs = num_vfs; + mode = ixgbe_get_iov_mode(adapter); + + if (num_vfs > ixgbe_max_vfs(mode)) { + adapter->num_vfs = 0; + return (ENOSPC); + } + + + adapter->vfs = malloc(sizeof(*adapter->vfs) * num_vfs, M_IXGBE, + M_NOWAIT | M_ZERO); + + if (adapter->vfs == NULL) { + adapter->num_vfs = 0; + return (ENOMEM); + } + + ixgbe_if_init(adapter->ctx); + + return (0); +} + + +static void +ixgbe_uninit_iov(device_t dev) +{ + struct ixgbe_hw *hw; + struct adapter *adapter; + uint32_t pf_reg, vf_reg; + + adapter = device_get_softc(dev); + hw = &adapter->hw; + + /* Enable rx/tx for the PF and disable it for all VFs. */ + pf_reg = IXGBE_VF_INDEX(adapter->pool); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(pf_reg), + IXGBE_VF_BIT(adapter->pool)); + IXGBE_WRITE_REG(hw, IXGBE_VFTE(pf_reg), + IXGBE_VF_BIT(adapter->pool)); + + if (pf_reg == 0) + vf_reg = 1; + else + vf_reg = 0; + IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_reg), 0); + IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_reg), 0); + + IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0); + + free(adapter->vfs, M_IXGBE); + adapter->vfs = NULL; + adapter->num_vfs = 0; + +} + + +static void +ixgbe_initialize_iov(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + uint32_t mrqc, mtqc, vt_ctl, vf_reg, gcr_ext, gpie; + enum ixgbe_iov_mode mode; + int i; + + mode = ixgbe_get_iov_mode(adapter); + if (mode == IXGBE_NO_VM) + return; + + mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC); + mrqc &= ~IXGBE_MRQC_MRQE_MASK; + + switch (mode) { + case IXGBE_64_VM: + mrqc |= IXGBE_MRQC_VMDQRSS64EN; + break; + case IXGBE_32_VM: + mrqc |= IXGBE_MRQC_VMDQRSS32EN; + break; + default: + panic("Unexpected SR-IOV mode %d", mode); + } + IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); + + mtqc = IXGBE_MTQC_VT_ENA; + switch (mode) { + case IXGBE_64_VM: + mtqc |= IXGBE_MTQC_64VF; + break; + case IXGBE_32_VM: + mtqc |= IXGBE_MTQC_32VF; + break; + default: + panic("Unexpected SR-IOV mode %d", mode); + } + IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc); + + + gcr_ext = IXGBE_READ_REG(hw, IXGBE_GCR_EXT); + gcr_ext |= IXGBE_GCR_EXT_MSIX_EN; + gcr_ext &= ~IXGBE_GCR_EXT_VT_MODE_MASK; + switch (mode) { + case IXGBE_64_VM: + gcr_ext |= IXGBE_GCR_EXT_VT_MODE_64; + break; + case IXGBE_32_VM: + gcr_ext |= IXGBE_GCR_EXT_VT_MODE_32; + break; + default: + panic("Unexpected SR-IOV mode %d", mode); + } + IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext); + + + gpie = IXGBE_READ_REG(hw, IXGBE_GPIE); + gcr_ext &= ~IXGBE_GPIE_VTMODE_MASK; + switch (mode) { + case IXGBE_64_VM: + gpie |= IXGBE_GPIE_VTMODE_64; + break; + case IXGBE_32_VM: + gpie |= IXGBE_GPIE_VTMODE_32; + break; + default: + panic("Unexpected SR-IOV mode %d", mode); + } + IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); + + /* Enable rx/tx for the PF. */ + vf_reg = IXGBE_VF_INDEX(adapter->pool); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_reg), + IXGBE_VF_BIT(adapter->pool)); + IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_reg), + IXGBE_VF_BIT(adapter->pool)); + + /* Allow VM-to-VM communication. */ + IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN); + + vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; + vt_ctl |= (adapter->pool << IXGBE_VT_CTL_POOL_SHIFT); + IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl); + + for (i = 0; i < adapter->num_vfs; i++) + ixgbe_init_vf(adapter, &adapter->vfs[i]); +} + +#if 0 +/* +** Check the max frame setting of all active VF's +*/ +static void +ixgbe_recalculate_max_frame(struct adapter *adapter) +{ + struct ixgbe_vf *vf; + + for (int i = 0; i < adapter->num_vfs; i++) { + vf = &adapter->vfs[i]; + if (vf->flags & IXGBE_VF_ACTIVE) + ixgbe_update_max_frame(adapter, vf->maximum_frame_size); + } +} +#endif + +static void +ixgbe_init_vf(struct adapter *adapter, struct ixgbe_vf *vf) +{ + struct ixgbe_hw *hw; + uint32_t vf_index, pfmbimr; + + hw = &adapter->hw; + + if (!(vf->flags & IXGBE_VF_ACTIVE)) + return; + + vf_index = IXGBE_VF_INDEX(vf->pool); + pfmbimr = IXGBE_READ_REG(hw, IXGBE_PFMBIMR(vf_index)); + pfmbimr |= IXGBE_VF_BIT(vf->pool); + IXGBE_WRITE_REG(hw, IXGBE_PFMBIMR(vf_index), pfmbimr); + + ixgbe_vf_set_default_vlan(adapter, vf, vf->vlan_tag); + + // XXX multicast addresses + + if (ixgbe_validate_mac_addr(vf->ether_addr) == 0) { + ixgbe_set_rar(&adapter->hw, vf->rar_index, + vf->ether_addr, vf->pool, TRUE); + } + + ixgbe_vf_enable_transmit(adapter, vf); + ixgbe_vf_enable_receive(adapter, vf); + + ixgbe_send_vf_msg(adapter, vf, IXGBE_PF_CONTROL_MSG); +} + +static int +ixgbe_add_vf(device_t dev, u16 vfnum, const nvlist_t *config) +{ + struct adapter *adapter; + struct ixgbe_vf *vf; + const void *mac; + + adapter = device_get_softc(dev); + + KASSERT(vfnum < adapter->num_vfs, ("VF index %d is out of range %d", + vfnum, adapter->num_vfs)); + + vf = &adapter->vfs[vfnum]; + vf->pool= vfnum; + + /* RAR[0] is used by the PF so use vfnum + 1 for VF RAR. */ + vf->rar_index = vfnum + 1; + vf->default_vlan = 0; + vf->maximum_frame_size = ETHER_MAX_LEN; + ixgbe_update_max_frame(adapter, vf->maximum_frame_size); + + if (nvlist_exists_binary(config, "mac-addr")) { + mac = nvlist_get_binary(config, "mac-addr", NULL); + bcopy(mac, vf->ether_addr, ETHER_ADDR_LEN); + if (nvlist_get_bool(config, "allow-set-mac")) + vf->flags |= IXGBE_VF_CAP_MAC; + } else + /* + * If the administrator has not specified a MAC address then + * we must allow the VF to choose one. + */ + vf->flags |= IXGBE_VF_CAP_MAC; + + vf->flags = IXGBE_VF_ACTIVE; + + ixgbe_init_vf(adapter, vf); + + return (0); +} +#endif /* PCI_IOV */ + Index: sys/dev/ixgbe/iflib_if_ixv.c =================================================================== --- /dev/null +++ sys/dev/ixgbe/iflib_if_ixv.c @@ -0,0 +1,1783 @@ +/****************************************************************************** + + Copyright (c) 2001-2015, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ +/* $FreeBSD$*/ + + +#include "opt_inet.h" +#include "opt_inet6.h" + +#include "ixgbe.h" +#include "ifdi_if.h" + +#include +#include + +/********************************************************************* + * Driver version + *********************************************************************/ +char ixv_driver_version[] = "1.4.6-k"; + +/********************************************************************* + * PCI Device ID Table + * + * Used by probe to select devices to load on + * Last field stores an index into ixv_strings + * Last entry must be all 0s + * + * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } + *********************************************************************/ + +static pci_vendor_info_t ixv_vendor_info_array[] = +{ + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), + /* required last entry */ +PVID_END +}; + +/********************************************************************* + * Function prototypes + *********************************************************************/ +static void *ixv_register(device_t dev); +static int ixv_if_attach_pre(if_ctx_t ctx); +static int ixv_if_attach_post(if_ctx_t ctx); +static int ixv_if_detach(if_ctx_t ctx); + +static int ixv_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); +static int ixv_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); +static void ixv_if_queues_free(if_ctx_t ctx); +static void ixv_identify_hardware(if_ctx_t ctx); +static int ixv_allocate_pci_resources(if_ctx_t ctx); +static void ixv_free_pci_resources(if_ctx_t ctx); +static int ixv_setup_interface(if_ctx_t ctx); +static void ixv_if_media_status(if_ctx_t , struct ifmediareq *); +static int ixv_if_media_change(if_ctx_t ctx); +static void ixv_if_update_admin_status(if_ctx_t ctx); +static int ixv_if_msix_intr_assign(if_ctx_t ctx, int msix); + +static int ixv_if_mtu_set(if_ctx_t ctx, uint32_t mtu); +static void ixv_if_init(if_ctx_t ctx); +static void ixv_if_local_timer(if_ctx_t ctx, uint16_t qid); +static void ixv_if_stop(if_ctx_t ctx); + +static void ixv_initialize_transmit_units(if_ctx_t ctx); +static void ixv_initialize_receive_units(if_ctx_t ctx); + +static void ixv_setup_vlan_support(if_ctx_t ctx); +static void ixv_configure_ivars(struct adapter *); +static void ixv_if_enable_intr(if_ctx_t ctx); +static void ixv_if_disable_intr(if_ctx_t ctx); +static void ixv_if_set_multi(if_ctx_t ctx); + +static void ixv_if_register_vlan(if_ctx_t, u16); +static void ixv_if_unregister_vlan(if_ctx_t, u16); +static void ixv_save_stats(struct adapter *); +static void ixv_init_stats(struct adapter *); +static void ixv_update_stats(struct adapter *); +static void ixv_add_stats_sysctls(struct adapter *adapter); + +static int ixv_sysctl_debug(SYSCTL_HANDLER_ARGS); +static void ixv_set_ivar(struct adapter *, u8, u8, s8); + +static u8 * ixv_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); +static void ixv_set_sysctl_value(struct adapter *, const char *, + const char *, int *, int); + +/* The MSI/X Interrupt handlers */ +static int ixv_msix_que(void *); +static int ixv_msix_mbx(void *); + +/********************************************************************* + * FreeBSD Device Interface Entry Points + *********************************************************************/ + +static device_method_t ixv_methods[] = { + /* Device interface */ + DEVMETHOD(device_register, ixv_register), + DEVMETHOD(device_probe, iflib_device_probe), + DEVMETHOD(device_attach, iflib_device_attach), + DEVMETHOD(device_detach, iflib_device_detach), + DEVMETHOD(device_shutdown, iflib_device_shutdown), + DEVMETHOD_END +}; + +static driver_t ixv_driver = { + "ixv", ixv_methods, sizeof(struct adapter), +}; + +devclass_t ixv_devclass; +DRIVER_MODULE(ixv, pci, ixv_driver, ixv_devclass, 0, 0); +MODULE_DEPEND(ixv, pci, 1, 1, 1); +MODULE_DEPEND(ixv, ether, 1, 1, 1); +#ifdef DEV_NETMAP +MODULE_DEPEND(ix, netmap, 1, 1, 1); +#endif /* DEV_NETMAP */ +/* XXX depend on 'ix' ? */ + +static device_method_t ixv_if_methods[] = { + DEVMETHOD(ifdi_attach_pre, ixv_if_attach_pre), + DEVMETHOD(ifdi_attach_post, ixv_if_attach_post), + DEVMETHOD(ifdi_tx_queues_alloc, ixv_if_tx_queues_alloc), + DEVMETHOD(ifdi_rx_queues_alloc, ixv_if_rx_queues_alloc), + DEVMETHOD(ifdi_queues_free, ixv_if_queues_free), + DEVMETHOD(ifdi_detach, ixv_if_detach), + DEVMETHOD(ifdi_media_status, ixv_if_media_status), + DEVMETHOD(ifdi_media_change, ixv_if_media_change), + DEVMETHOD(ifdi_update_admin_status, ixv_if_update_admin_status), + DEVMETHOD(ifdi_mtu_set, ixv_if_mtu_set), + DEVMETHOD(ifdi_init, ixv_if_init), + DEVMETHOD(ifdi_multi_set, ixv_if_set_multi), + DEVMETHOD(ifdi_timer, ixv_if_local_timer), + DEVMETHOD(ifdi_stop, ixv_if_stop), + DEVMETHOD(ifdi_msix_intr_assign, ixv_if_msix_intr_assign), + DEVMETHOD(ifdi_intr_enable, ixv_if_enable_intr), + DEVMETHOD(ifdi_intr_disable, ixv_if_disable_intr), + DEVMETHOD(ifdi_vlan_register, ixv_if_register_vlan), + DEVMETHOD(ifdi_vlan_unregister, ixv_if_unregister_vlan), + DEVMETHOD_END +}; + +static driver_t ixv_if_driver = { + "ixv_if", ixv_if_methods, sizeof(struct adapter) +}; + +/* +** TUNEABLE PARAMETERS: +*/ + +/* Number of Queues - do not exceed MSIX vectors - 1 */ +static int ixv_num_queues = 1; +TUNABLE_INT("hw.ixv.num_queues", &ixv_num_queues); + +/* +** AIM: Adaptive Interrupt Moderation +** which means that the interrupt rate +** is varied over time based on the +** traffic for that interrupt vector +*/ +static int ixv_enable_aim = FALSE; +TUNABLE_INT("hw.ixv.enable_aim", &ixv_enable_aim); + +/* How many packets rxeof tries to clean at a time */ +static int ixv_rx_process_limit = 256; +TUNABLE_INT("hw.ixv.rx_process_limit", &ixv_rx_process_limit); + +/* How many packets txeof tries to clean at a time */ +static int ixv_tx_process_limit = 256; +TUNABLE_INT("hw.ixv.tx_process_limit", &ixv_tx_process_limit); + +/* Flow control setting, default to full */ +static int ixv_flow_control = ixgbe_fc_full; +TUNABLE_INT("hw.ixv.flow_control", &ixv_flow_control); + +/* + * Header split: this causes the hardware to DMA + * the header into a separate mbuf from the payload, + * it can be a performance win in some workloads, but + * in others it actually hurts, its off by default. + */ +static int ixv_header_split = FALSE; +TUNABLE_INT("hw.ixv.hdr_split", &ixv_header_split); + +/* +** Number of TX descriptors per ring, +** setting higher than RX as this seems +** the better performing choice. +*/ +static int ixv_txd = DEFAULT_TXD; +TUNABLE_INT("hw.ixv.txd", &ixv_txd); + +/* Number of RX descriptors per ring */ +static int ixv_rxd = DEFAULT_RXD; +TUNABLE_INT("hw.ixv.rxd", &ixv_rxd); + +/* +** Shadow VFTA table, this is needed because +** the real filter table gets cleared during +** a soft reset and we need to repopulate it. +*/ +static u32 ixv_shadow_vfta[IXGBE_VFTA_SIZE]; +extern struct if_txrx ixgbe_txrx; + +static struct if_shared_ctx ixv_sctx_init = { + .isc_magic = IFLIB_MAGIC, + .isc_q_align = PAGE_SIZE,/* max(DBA_ALIGN, PAGE_SIZE) */ + .isc_tx_maxsize = IXGBE_TSO_SIZE, + + .isc_tx_maxsegsize = PAGE_SIZE*4, + + .isc_rx_maxsize = PAGE_SIZE*4, + .isc_rx_nsegments = 1, + .isc_rx_maxsegsize = PAGE_SIZE*4, + .isc_nfl = 1, + .isc_ntxqs = 1, + .isc_nrxqs = 1, + .isc_admin_intrcnt = 1, + .isc_vendor_info = ixv_vendor_info_array, + .isc_driver_version = ixv_driver_version, + .isc_txrx = &ixgbe_txrx, + .isc_driver = &ixv_if_driver, +}; + +if_shared_ctx_t ixv_sctx = &ixv_sctx_init; + +static void * +ixv_register(device_t dev) +{ + + return (ixv_sctx); +} + +static int +ixv_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) +{ + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx = adapter->shared; + struct ix_tx_queue *que; + int i, j, error; + +#ifdef PCI_IOV + enum ixgbe_iov_mode iov_mode; +#endif + MPASS(adapter->num_tx_queues == ntxqsets); + MPASS(ntxqs == 1); + + /* Allocate queue structure memory */ + if (!(adapter->tx_queues = + (struct ix_tx_queue *) malloc(sizeof(struct ix_tx_queue) * ntxqsets, + M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); + return (ENOMEM); + } + +#ifdef PCI_IOV + iov_mode = ixgbe_get_iov_mode(adapter); + adapter->pool = ixgbe_max_vfs(iov_mode); +#else + adapter->pool = 0; +#endif + + for (i = 0, que = adapter->tx_queues; i < ntxqsets; i++, que++) { + struct tx_ring *txr = &que->txr; + + if (!(txr->tx_buffers = (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) * scctx->isc_ntxd[0], M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(iflib_get_dev(ctx), "failed to allocate tx_buffer memory\n"); + error = ENOMEM; + goto fail; + } +#ifdef PCI_IOV + txr->me = ixgbe_pf_que_index(iov_mode, i); +#else + txr->me = i; +#endif + txr->adapter = que->adapter = adapter; + adapter->active_queues |= (u64)1 << txr->me; + + /* get the virtual and physical address of the hardware queues */ + txr->tail = IXGBE_TDT(txr->me); + txr->tx_base = (union ixgbe_adv_tx_desc *)vaddrs[i]; + txr->tx_paddr = paddrs[i]; + txr->que = que; + for (j = 0; j < scctx->isc_ntxd[0]; j++) + txr->tx_buffers->eop = -1; + txr->bytes = 0; + txr->total_packets = 0; + +#ifdef IXGBE_FDIR + /* Set the rate at which we sample packets */ + if (adapter->hw.mac.type != ixgbe_mac_82598EB) + txr->atr_sample = atr_sample_rate; +#endif + + } + + device_printf(iflib_get_dev(ctx), "allocated for %d queues\n", adapter->num_tx_queues); + return (0); + + fail: + ixv_if_queues_free(ctx); + return (error); +} + +static int +ixv_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_rx_queue *que; + int i, error; + +#ifdef PCI_IOV + enum ixgbe_iov_mode iov_mode; +#endif + MPASS(adapter->num_rx_queues == nrxqsets); + MPASS(nrxqs == 1); + + /* Allocate queue structure memory */ + if (!(adapter->rx_queues = + (struct ix_rx_queue *) malloc(sizeof(struct ix_rx_queue) *nrxqsets, + M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); + return (ENOMEM); + } + +#ifdef PCI_IOV + iov_mode = ixgbe_get_iov_mode(adapter); + adapter->pool = ixgbe_max_vfs(iov_mode); +#else + adapter->pool = 0; +#endif + + for (i = 0, que = adapter->rx_queues; i < nrxqsets; i++, que++) { + struct rx_ring *rxr = &que->rxr; +#ifdef PCI_IOV + rxr->me = ixgbe_pf_que_index(iov_mode, i); +#else + rxr->me = i; +#endif + rxr->adapter = que->adapter = adapter; + + + /* get the virtual and physical address of the hardware queues */ + + rxr->tail = IXGBE_RDT(rxr->me); + rxr->rx_base = (union ixgbe_adv_rx_desc *)vaddrs[i]; + rxr->rx_paddr = paddrs[i]; + rxr->bytes = 0; + rxr->que = que; + } + + device_printf(iflib_get_dev(ctx), "allocated for %d rx queues\n", adapter->num_rx_queues); + return (0); + + ixv_if_queues_free(ctx); + return (error); +} + +static void +ixv_if_queues_free(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_tx_queue *que = adapter->tx_queues; + int i; + + if (que == NULL) + goto free; + + for (i = 0; i < adapter->num_tx_queues; i++, que++) { + struct tx_ring *txr = &que->txr; + if (txr->tx_buffers == NULL) + break; + + free(txr->tx_buffers, M_DEVBUF); + txr->tx_buffers = NULL; + } + if (adapter->tx_queues != NULL) + free(adapter->tx_queues, M_DEVBUF); +free: + if (adapter->rx_queues != NULL) + free(adapter->rx_queues, M_DEVBUF); + adapter->tx_queues = NULL; + adapter->rx_queues = NULL; +} + +/********************************************************************* + * Device initialization routine + * + * The attach entry point is called when the driver is being loaded. + * This routine identifies the type of hardware, allocates all resources + * and initializes the hardware. + * + * return 0 on success, positive on failure + *********************************************************************/ + +static int +ixv_if_attach_pre(if_ctx_t ctx) +{ + device_t dev; + struct adapter *adapter; + if_softc_ctx_t scctx; + struct ixgbe_hw *hw; + int error = 0; + + INIT_DEBUGOUT("ixv_attach: begin"); + + /* Allocate, clear, and link in our adapter structure */ + dev = iflib_get_dev(ctx); + adapter = iflib_get_softc(ctx); + adapter->dev = dev; + adapter->ctx = ctx; + scctx = adapter->shared = iflib_get_softc_ctx(ctx); + adapter->media = iflib_get_media(ctx); + hw = &adapter->hw; + + /* SYSCTL APIs */ + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "debug", CTLTYPE_INT | CTLFLAG_RW, + adapter, 0, ixv_sysctl_debug, "I", "Debug Info"); + + SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "enable_aim", CTLFLAG_RW, + &ixv_enable_aim, 1, "Interrupt Moderation"); + + /* Determine hardware revision */ + ixv_identify_hardware(ctx); + adapter->shared->isc_tx_nsegments = IXGBE_82599_SCATTER; + + /* Do base PCI setup - map BAR0 */ + if (ixv_allocate_pci_resources(ctx)) { + device_printf(dev, "ixv_allocate_pci_resources() failed!\n"); + error = ENXIO; + goto err; + } + + /* Sysctls for limiting the amount of work done in the taskqueues */ + ixv_set_sysctl_value(adapter, "rx_processing_limit", + "max number of rx packets to process", + &adapter->rx_process_limit, ixv_rx_process_limit); + + ixv_set_sysctl_value(adapter, "tx_processing_limit", + "max number of tx packets to process", + &adapter->tx_process_limit, ixv_tx_process_limit); + + if (scctx->isc_ntxd[0] == 0) + scctx->isc_ntxd[0] = DEFAULT_TXD; + if (scctx->isc_nrxd[0] == 0) + scctx->isc_nrxd[0] = DEFAULT_RXD; + if (scctx->isc_nrxd[0] < MIN_RXD || scctx->isc_nrxd[0] > MAX_RXD) { + device_printf(dev, "nrxd: %d not within permitted range of %d-%d setting to default value: %d\n", + scctx->isc_nrxd[0], MIN_RXD, MAX_RXD, DEFAULT_RXD); + scctx->isc_nrxd[0] = DEFAULT_RXD; + } + if (scctx->isc_ntxd[0] < MIN_TXD || scctx->isc_ntxd[0] > MAX_TXD) { + device_printf(dev, "ntxd: %d not within permitted range of %d-%d setting to default value: %d\n", + scctx->isc_ntxd[0], MIN_TXD, MAX_TXD, DEFAULT_TXD); + scctx->isc_ntxd[0] = DEFAULT_TXD; + } + scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(union ixgbe_adv_tx_desc) + + sizeof(u32), DBA_ALIGN), + scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); + + /* + ** Initialize the shared code: its + ** at this point the mac type is set. + */ + error = ixgbe_init_shared_code(hw); + if (error) { + device_printf(dev, "ixgbe_init_shared_code() failed!\n"); + error = EIO; + goto err; + } + + /* Setup the mailbox */ + ixgbe_init_mbx_params_vf(hw); + + /* Reset mbox api to 1.0 */ + error = ixgbe_reset_hw(hw); + if (error == IXGBE_ERR_RESET_FAILED) + device_printf(dev, "ixgbe_reset_hw() failure: Reset Failed!\n"); + else if (error) + device_printf(dev, "ixgbe_reset_hw() failed with error %d\n", error); + if (error) { + error = EIO; + goto err; + } + + /* Negotiate mailbox API version */ + error = ixgbevf_negotiate_api_version(hw, ixgbe_mbox_api_11); + if (error) { + device_printf(dev, "MBX API 1.1 negotiation failed! Error %d\n", error); + error = EIO; + goto err; + } + + error = ixgbe_init_hw(hw); + if (error) { + device_printf(dev, "ixgbe_init_hw() failed!\n"); + error = EIO; + goto err; + } + + /* If no mac address was assigned, make a random one */ + if (!ixv_check_ether_addr(hw->mac.addr)) { + u8 addr[ETHER_ADDR_LEN]; + arc4rand(&addr, sizeof(addr), 0); + addr[0] &= 0xFE; + addr[0] |= 0x02; + bcopy(addr, hw->mac.addr, sizeof(addr)); + } + + INIT_DEBUGOUT("ixv_attach: end"); + return (0); + +err: + ixv_free_pci_resources(ctx); + return (error); + +} + +static int +ixv_if_attach_post(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + int error = 0; + + /* Setup OS specific network interface */ + error = ixv_setup_interface(ctx); + if (error) { + device_printf(dev, "Interface setup failed: %d\n", error); + goto end; + } + + /* Do the stats setup */ + ixv_save_stats(adapter); + ixv_init_stats(adapter); + ixv_add_stats_sysctls(adapter); + + INIT_DEBUGOUT("ixv_attachpost: end"); + +end: + return error; +} + +/********************************************************************* + * Device removal routine + * + * The detach entry point is called when the driver is being removed. + * This routine stops the adapter and deallocates all the resources + * that were allocated for driver operation. + * + * return 0 on success, positive on failure + *********************************************************************/ + +static int +ixv_if_detach(if_ctx_t ctx) +{ + INIT_DEBUGOUT("ixv_detach: begin"); + + ixv_free_pci_resources(ctx); + + return (0); +} + +static int +ixv_if_mtu_set(if_ctx_t ctx, uint32_t mtu) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + int error = 0; + + IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); + if (mtu > IXGBE_MAX_FRAME_SIZE - IXGBE_MTU_HDR) { + error = EINVAL; + } else { + ifp->if_mtu = mtu; + adapter->max_frame_size = + ifp->if_mtu + IXGBE_MTU_HDR; + } + return error; +} + +/********************************************************************* + * Init entry point + * + * This routine is used in two ways. It is used by the stack as + * init entry point in network interface structure. It is also used + * by the driver as a hw/sw initialization routine to get to a + * consistent state. + * + * return 0 on success, positive on failure + **********************************************************************/ +#define IXGBE_MHADD_MFS_SHIFT 16 + +static void +ixv_if_init(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + device_t dev = iflib_get_dev(ctx); + struct ixgbe_hw *hw = &adapter->hw; + int error = 0; + + INIT_DEBUGOUT("ixv_init: begin"); + hw->adapter_stopped = FALSE; + + /* reprogram the RAR[0] in case user changed it. */ + ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); + + /* Get the latest mac address, User can use a LAA */ + bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr, + IXGBE_ETH_LENGTH_OF_ADDRESS); + ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1); + hw->addr_ctrl.rar_used_count = 1; + + /* Reset VF and renegotiate mailbox API version */ + ixgbe_reset_hw(hw); + error = ixgbevf_negotiate_api_version(hw, ixgbe_mbox_api_11); + if (error) + device_printf(dev, "MBX API 1.1 negotiation failed! Error %d\n", error); + + ixv_initialize_transmit_units(ctx); + + /* Setup Multicast table */ + ixv_if_set_multi(ctx); + + /* + ** Determine the correct mbuf pool + ** for doing jumbo/headersplit + */ + if (ifp->if_mtu > ETHERMTU) + adapter->rx_mbuf_sz = MJUMPAGESIZE; + else + adapter->rx_mbuf_sz = MCLBYTES; + + /* Configure RX settings */ + ixv_initialize_receive_units(ctx); + + /* Set the various hardware offload abilities */ + ifp->if_hwassist = 0; + if (ifp->if_capenable & IFCAP_TSO4) + ifp->if_hwassist |= CSUM_TSO; + if (ifp->if_capenable & IFCAP_TXCSUM) { + ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); +#if __FreeBSD_version >= 800000 + ifp->if_hwassist |= CSUM_SCTP; +#endif + } + + /* Set up VLAN offload and filter */ + ixv_setup_vlan_support(ctx); + + /* Set up MSI/X routing */ + ixv_configure_ivars(adapter); + + /* Set up auto-mask */ + IXGBE_WRITE_REG(hw, IXGBE_VTEIAM, IXGBE_EICS_RTX_QUEUE); + + /* Set moderation on the Link interrupt */ + IXGBE_WRITE_REG(hw, IXGBE_VTEITR(adapter->vector), IXGBE_LINK_ITR); + + /* Stats init */ + ixv_init_stats(adapter); + + return; +} + +/* +** +** MSIX Interrupt Handlers and Tasklets +** +*/ + +static inline void +ixv_enable_queue(struct adapter *adapter, u32 vector) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 queue = 1 << vector; + u32 mask; + + mask = (IXGBE_EIMS_RTX_QUEUE & queue); + IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); +} + +static inline void +ixv_disable_queue(struct adapter *adapter, u32 vector) +{ + struct ixgbe_hw *hw = &adapter->hw; + u64 queue = (u64)(1 << vector); + u32 mask; + + mask = (IXGBE_EIMS_RTX_QUEUE & queue); + IXGBE_WRITE_REG(hw, IXGBE_VTEIMC, mask); +} + +static inline void +ixv_rearm_queues(struct adapter *adapter, u64 queues) +{ + u32 mask = (IXGBE_EIMS_RTX_QUEUE & queues); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEICS, mask); +} + + +/********************************************************************* + * + * MSI Queue Interrupt Service routine + * + **********************************************************************/ +static int +ixv_msix_que(void *arg) +{ + struct ix_rx_queue *que = arg; + struct adapter *adapter = que->adapter; + struct rx_ring *rxr = &que->rxr; + u32 newitr = 0; +#ifdef notyet + struct tx_ring *txr = &que->txr; +#endif + + ixv_disable_queue(adapter, que->msix); + ++que->irqs; + + /* Do AIM now? */ + + if (ixv_enable_aim == FALSE) + goto no_calc; + /* + ** Do Adaptive Interrupt Moderation: + ** - Write out last calculated setting + ** - Calculate based on average size over + ** the last interval. + */ + if (que->eitr_setting) + IXGBE_WRITE_REG(&adapter->hw, + IXGBE_VTEITR(que->msix), + que->eitr_setting); + + que->eitr_setting = 0; + +#ifdef notyet + if ((txr->bytes) && (txr->packets)) + newitr = txr->bytes/txr->packets; +#endif + if (rxr->bytes == 0) + goto no_calc; + + if ((rxr->bytes) && (rxr->packets)) + newitr = max(newitr, + (rxr->bytes / rxr->packets)); + newitr += 24; /* account for hardware frame, crc */ + + /* set an upper boundary */ + newitr = min(newitr, 3000); + + /* Be nice to the mid range */ + if ((newitr > 300) && (newitr < 1200)) + newitr = (newitr / 3); + else + newitr = (newitr / 2); + + newitr |= newitr << 16; + + /* save for next interrupt */ + que->eitr_setting = newitr; + +#if 0 + /* Reset state */ + txr->bytes = 0; + txr->packets = 0; +#endif + rxr->bytes = 0; + rxr->packets = 0; + +no_calc: + return (FILTER_SCHEDULE_THREAD); +} + +static int +ixv_msix_mbx(void *arg) +{ + struct adapter *adapter = arg; + struct ixgbe_hw *hw = &adapter->hw; + u32 reg; + + ++adapter->link_irq; + + /* First get the cause */ + reg = IXGBE_READ_REG(hw, IXGBE_VTEICS); + /* Clear interrupt with write */ + IXGBE_WRITE_REG(hw, IXGBE_VTEICR, reg); + + /* Link status change */ + if (reg & IXGBE_EICR_LSC) + iflib_admin_intr_deferred(adapter->ctx); + + IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, IXGBE_EIMS_OTHER); + return (FILTER_HANDLED); +} + +/********************************************************************* + * + * Media Ioctl callback + * + * This routine is called whenever the user queries the status of + * the interface using ifconfig. + * + **********************************************************************/ +static void +ixv_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) +{ + struct adapter *adapter = iflib_get_softc(ctx); + + INIT_DEBUGOUT("ixv_media_status: begin"); + ixv_if_update_admin_status(ctx); + + ifmr->ifm_status = IFM_AVALID; + ifmr->ifm_active = IFM_ETHER; + + if (!adapter->link_active) { + return; + } + + ifmr->ifm_status |= IFM_ACTIVE; + + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_T | IFM_FDX; + break; + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_FDX; + break; + } + + return; +} + +/********************************************************************* + * + * Media Ioctl callback + * + * This routine is called when the user changes speed/duplex using + * media/mediopt option with ifconfig. + * + **********************************************************************/ +static int +ixv_if_media_change(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ifmedia *ifm = adapter->media; + + INIT_DEBUGOUT("ixv_media_change: begin"); + + if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) + return (EINVAL); + + switch (IFM_SUBTYPE(ifm->ifm_media)) { + case IFM_AUTO: + break; + default: + device_printf(adapter->dev, "Only auto media type\n"); + return (EINVAL); + } + + return (0); +} + + +/********************************************************************* + * Multicast Update + * + * This routine is called whenever multicast address list is updated. + * + **********************************************************************/ +#define IXGBE_RAR_ENTRIES 16 + +static void +ixv_if_set_multi(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + if_t ifp = iflib_get_ifp(ctx); + u8 mta[MAX_NUM_MULTICAST_ADDRESSES * IXGBE_ETH_LENGTH_OF_ADDRESS]; + u8 *update_ptr; + struct ifmultiaddr *ifma; + int mcnt = 0; + + IOCTL_DEBUGOUT("ixv_set_multi: begin"); + + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_LINK) + continue; + bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), + &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS], + IXGBE_ETH_LENGTH_OF_ADDRESS); + mcnt++; + } + + update_ptr = mta; + + ixgbe_update_mc_addr_list(&adapter->hw, + update_ptr, mcnt, ixv_mc_array_itr, TRUE); + + return; +} + +/* + * This is an iterator function now needed by the multicast + * shared code. It simply feeds the shared code routine the + * addresses in the array of ixv_set_multi() one by one. + */ +static u8 * +ixv_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) +{ + u8 *addr = *update_ptr; + u8 *newptr; + *vmdq = 0; + + newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS; + *update_ptr = newptr; + return addr; +} + +/********************************************************************* + * Timer routine + * + * This routine checks for link status,updates statistics, + * and runs the watchdog check. + * + **********************************************************************/ + +static void +ixv_if_local_timer(if_ctx_t ctx, uint16_t qid) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_tx_queue *que = &adapter->tx_queues[qid]; + u64 queues = 0; + + ixv_if_update_admin_status(ctx); + + /* Stats Update */ + ixv_update_stats(adapter); + + /* Keep track of queues with work for soft irq */ + if (que->txr.busy) + queues |= ((u64)1 << que->txr.me); + + if (que->txr.busy == IXGBE_QUEUE_HUNG) { + /* Mark the queue as inactive */ + adapter->active_queues &= ~((u64)1 << que->txr.me); + } else { + /* Check if we've come back from hung */ + if ((adapter->active_queues & ((u64)1 << que->txr.me)) == 0) + adapter->active_queues |= ((u64)1 << que->txr.me); + } + if (que->txr.busy >= IXGBE_MAX_TX_BUSY) { + que->txr.busy = IXGBE_QUEUE_HUNG; + } + + if (queues != 0) { /* Force an IRQ on queues with work */ + ixv_rearm_queues(adapter, queues); + } +} + +/* +** Note: this routine updates the OS on the link state +** the real check of the hardware only happens with +** a link interrupt. +*/ +static void +ixv_if_update_admin_status(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + + if (adapter->link_up){ + if (adapter->link_active == FALSE) { + if (bootverbose) + device_printf(dev,"Link is up %d Gbps %s \n", + ((adapter->link_speed == 128)? 10:1), + "Full Duplex"); + adapter->link_active = TRUE; + iflib_link_state_change(ctx, LINK_STATE_UP, IF_Gbps(10)); + } + } else { /* Link down */ + if (adapter->link_active == TRUE) { + if (bootverbose) + device_printf(dev,"Link is Down\n"); + iflib_link_state_change(ctx, LINK_STATE_DOWN, 0); + adapter->link_active = FALSE; + } + } + + return; +} + + +/********************************************************************* + * + * This routine disables all traffic on the adapter by issuing a + * global reset on the MAC and deallocates TX/RX buffers. + * + **********************************************************************/ + +static void +ixv_if_stop(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &adapter->hw; + + INIT_DEBUGOUT("ixv_stop: begin\n"); + + ixgbe_reset_hw(hw); + adapter->hw.adapter_stopped = FALSE; + ixgbe_stop_adapter(hw); + + /* reprogram the RAR[0] in case user changed it. */ + ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); + + return; +} + + +/********************************************************************* + * + * Determine hardware revision. + * + **********************************************************************/ +static void +ixv_identify_hardware(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + struct ixgbe_hw *hw = &adapter->hw; + + /* Save off the information about this board */ + hw->vendor_id = pci_get_vendor(dev); + hw->device_id = pci_get_device(dev); + hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); + hw->subsystem_vendor_id = + pci_read_config(dev, PCIR_SUBVEND_0, 2); + hw->subsystem_device_id = + pci_read_config(dev, PCIR_SUBDEV_0, 2); + + /* We need this to determine device-specific things */ + ixgbe_set_mac_type(hw); + + return; +} + +/********************************************************************* + * + * Setup MSIX Interrupt resources and handlers + * + **********************************************************************/ +static int +ixv_if_msix_intr_assign(if_ctx_t ctx, int msix) +{ + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + struct ix_rx_queue *que = adapter->rx_queues; + struct ix_tx_queue *tx_que = adapter->tx_queues; + int error, rid, vector = 0; + char buf[16]; + + /* Admin Que is vector 0*/ + rid = vector + 1; + for (int i = 0; i < adapter->num_rx_queues; i++, vector++, que++) { + rid = vector + 1; + + snprintf(buf, sizeof(buf), "rxq%d", i); + error = iflib_irq_alloc_generic(ctx, &que->que_irq, rid, IFLIB_INTR_RX, + ixv_msix_que, que, que->rxr.me, buf); + + if (error) { + device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); + adapter->num_rx_queues = i + 1; + goto fail; + } + + que->msix = vector; + adapter->active_queues |= (u64)(1 << que->msix); + + } + + for (int i = 0; i < adapter->num_tx_queues; i++, tx_que++) { + snprintf(buf, sizeof(buf), "txq%d", i); + iflib_softirq_alloc_generic(ctx, i + 1, IFLIB_INTR_TX, tx_que, tx_que->txr.me, buf); + } + rid = vector + 1; + error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, + ixv_msix_mbx, adapter, 0, "aq"); + if (error) { + device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); + return (error); + } + + adapter->vector = vector; + /* + ** Due to a broken design QEMU will fail to properly + ** enable the guest for MSIX unless the vectors in + ** the table are all set up, so we must rewrite the + ** ENABLE in the MSIX control register again at this + ** point to cause it to successfully initialize us. + */ + if (adapter->hw.mac.type == ixgbe_mac_82599_vf) { + int msix_ctrl; + pci_find_cap(dev, PCIY_MSIX, &rid); + rid += PCIR_MSIX_CTRL; + msix_ctrl = pci_read_config(dev, rid, 2); + msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; + pci_write_config(dev, rid, msix_ctrl, 2); + } + + return (0); + +fail: + iflib_irq_free(ctx, &adapter->irq); + que = adapter->rx_queues; + for (int i = 0; i < adapter->num_rx_queues; i++, que++) + iflib_irq_free(ctx, &que->que_irq); + return (error); +} + +static int +ixv_allocate_pci_resources(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + int rid; + device_t dev = iflib_get_dev(ctx); + + rid = PCIR_BAR(0); + adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &rid, RF_ACTIVE); + + if (!(adapter->pci_mem)) { + device_printf(dev, "Unable to allocate bus resource: memory\n"); + return (ENXIO); + } + + adapter->osdep.mem_bus_space_tag = + rman_get_bustag(adapter->pci_mem); + adapter->osdep.mem_bus_space_handle = + rman_get_bushandle(adapter->pci_mem); + adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; + + /* Pick up the tuneable queues */ + adapter->num_rx_queues = adapter->num_tx_queues = ixv_num_queues; + adapter->hw.back = adapter; + return (0); +} + +static void +ixv_free_pci_resources(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_rx_queue *que = adapter->rx_queues; + device_t dev = iflib_get_dev(ctx); + +/* Release all msix queue resources */ + if (adapter->intr_type == IFLIB_INTR_MSIX) + iflib_irq_free(ctx, &adapter->irq); + + for (int i = 0; i < adapter->num_rx_queues; i++, que++) { + iflib_irq_free(ctx, &que->que_irq); + } + + /* Clean the Legacy or Link interrupt last */ + if (adapter->pci_mem != NULL) + bus_release_resource(dev, SYS_RES_MEMORY, + PCIR_BAR(0), adapter->pci_mem); + + return; +} + +/********************************************************************* + * + * Setup networking device structure and register an interface. + * + **********************************************************************/ +static int +ixv_setup_interface(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx = adapter->shared; + struct ifnet *ifp = iflib_get_ifp(ctx); + uint64_t cap = 0; + + INIT_DEBUGOUT("ixv_setup_interface: begin"); + + if_setbaudrate(ifp, 1000000000); + ifp->if_snd.ifq_maxlen = scctx->isc_ntxd[0] - 2; + + adapter->max_frame_size = + ifp->if_mtu + IXGBE_MTU_HDR_VLAN; + + /* + * Tell the upper layer(s) we support long frames. + */ + + cap |= IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_VLAN_HWCSUM; + cap |= IFCAP_JUMBO_MTU; + cap |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; + cap |= IFCAP_LRO; + + if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); + if_setcapabilitiesbit(ifp, cap, 0); + if_setcapenable(ifp, if_getcapabilities(ifp)); + + + ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); + + return 0; +} + +/********************************************************************* + * + * Enable transmit unit. + * + **********************************************************************/ +static void +ixv_initialize_transmit_units(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx = adapter->shared; + struct ix_tx_queue *que = adapter->tx_queues; + struct ixgbe_hw *hw = &adapter->hw; + int i; + + for (i = 0; i < adapter->num_tx_queues; i++, que++) { + struct tx_ring *txr = &que->txr; + u64 tdba = txr->tx_paddr; + u32 txctrl, txdctl; + int j = txr->me; + + /* Set WTHRESH to 8, burst writeback */ + txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(j)); + txdctl |= (8 << 16); + IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(j), txdctl); + + /* Set the HW Tx Head and Tail indices */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDH(j), 0); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDT(j), 0); + + /* Set Tx Tail register */ + txr->tail = IXGBE_VFTDT(j); + + /* Set Ring parameters */ + IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(j), + (tdba & 0x00000000ffffffffULL)); + IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(j), (tdba >> 32)); + IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(j), + scctx->isc_ntxd[0] * + sizeof(struct ixgbe_legacy_tx_desc)); + txctrl = IXGBE_READ_REG(hw, IXGBE_VFDCA_TXCTRL(j)); + txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; + IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(j), txctrl); + + /* Now enable */ + txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(j)); + txdctl |= IXGBE_TXDCTL_ENABLE; + IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(j), txdctl); + } + + return; +} + + +/********************************************************************* + * + * Setup receive registers and features. + * + **********************************************************************/ +#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 + +static void +ixv_initialize_receive_units(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx; + struct ixgbe_hw *hw = &adapter->hw; + struct ifnet *ifp = adapter->ifp; + struct ix_rx_queue *que = adapter->rx_queues; + u32 bufsz, rxcsum, psrtype; + + if (ifp->if_mtu > ETHERMTU) + bufsz = 4096 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + else + bufsz = 2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + + psrtype = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | + IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR | + IXGBE_PSRTYPE_L2HDR; + + IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype); + + /* Tell PF our max_frame size */ + ixgbevf_rlpml_set_vf(hw, adapter->max_frame_size); + scctx = adapter->shared; + + for (int i = 0; i < adapter->num_rx_queues; i++, que++) { + struct rx_ring *rxr = &que->rxr; + u64 rdba = rxr->rx_paddr; + u32 reg, rxdctl; + int j = rxr->me; + + /* Disable the queue */ + rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(j)); + rxdctl &= ~IXGBE_RXDCTL_ENABLE; + IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(j), rxdctl); + for (int k = 0; k < 10; k++) { + if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(j)) & + IXGBE_RXDCTL_ENABLE) + msec_delay(1); + else + break; + } + wmb(); + /* Setup the Base and Length of the Rx Descriptor Ring */ + IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(j), + (rdba & 0x00000000ffffffffULL)); + IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(j), + (rdba >> 32)); + IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(j), + scctx->isc_nrxd[0] * sizeof(union ixgbe_adv_rx_desc)); + + /* Reset the ring indices */ + IXGBE_WRITE_REG(hw, IXGBE_VFRDH(rxr->me), 0); + IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), 0); + + /* Set up the SRRCTL register */ + reg = IXGBE_READ_REG(hw, IXGBE_VFSRRCTL(j)); + reg &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; + reg &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; + reg |= bufsz; + reg |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; + IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(j), reg); + + /* Capture Rx Tail index */ + rxr->tail = IXGBE_VFRDT(rxr->me); + + /* Do the queue enabling last */ + rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME; + IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(j), rxdctl); + for (int l = 0; l < 10; l++) { + if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(j)) & + IXGBE_RXDCTL_ENABLE) + break; + else + msec_delay(1); + } + wmb(); + + /* Set the Tail Pointer */ +#ifdef DEV_NETMAP + /* + * In netmap mode, we must preserve the buffers made + * available to userspace before the if_init() + * (this is true by default on the TX side, because + * init makes all buffers available to userspace). + * + * netmap_reset() and the device specific routines + * (e.g. ixgbe_setup_receive_rings()) map these + * buffers at the end of the NIC ring, so here we + * must set the RDT (tail) register to make sure + * they are not overwritten. + * + * In this driver the NIC ring starts at RDH = 0, + * RDT points to the last slot available for reception (?), + * so RDT = num_rx_desc - 1 means the whole ring is available. + */ + if (ifp->if_capenable & IFCAP_NETMAP) { + struct netmap_adapter *na = NA(adapter->ifp); + struct netmap_kring *kring = &na->rx_rings[j]; + int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); + + IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), t); + } else +#endif /* DEV_NETMAP */ + IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), + scctx->isc_nrxd[0] - 1); + } + + rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); + + if (ifp->if_capenable & IFCAP_RXCSUM) + rxcsum |= IXGBE_RXCSUM_PCSD; + + if (!(rxcsum & IXGBE_RXCSUM_PCSD)) + rxcsum |= IXGBE_RXCSUM_IPPCSE; + + IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); + + return; +} + +static void +ixv_setup_vlan_support(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &adapter->hw; + u32 ctrl, vid, vfta, retry; + struct rx_ring *rxr; + + /* + ** We get here thru init_locked, meaning + ** a soft reset, this has already cleared + ** the VFTA and other state, so if there + ** have been no vlan's registered do nothing. + */ + if (adapter->num_vlans == 0) + return; + + /* Enable the queues */ + for (int i = 0; i < adapter->num_rx_queues; i++) { + rxr = &adapter->rx_queues[i].rxr; + ctrl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)); + ctrl |= IXGBE_RXDCTL_VME; + IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), ctrl); + /* + * Let Rx path know that it needs to store VLAN tag + * as part of extra mbuf info. + */ + rxr->vtag_strip = TRUE; + } + + /* + ** A soft reset zero's out the VFTA, so + ** we need to repopulate it now. + */ + for (int i = 0; i < IXGBE_VFTA_SIZE; i++) { + if (ixv_shadow_vfta[i] == 0) + continue; + vfta = ixv_shadow_vfta[i]; + /* + ** Reconstruct the vlan id's + ** based on the bits set in each + ** of the array ints. + */ + for (int j = 0; j < 32; j++) { + retry = 0; + if ((vfta & (1 << j)) == 0) + continue; + vid = (i * 32) + j; + /* Call the shared code mailbox routine */ + while (ixgbe_set_vfta(hw, vid, 0, TRUE)) { + if (++retry > 5) + break; + } + } + } +} + +/* +** This routine is run via an vlan config EVENT, +** it enables us to use the HW Filter table since +** we can get the vlan id. This just creates the +** entry in the soft version of the VFTA, init will +** repopulate the real table. +*/ +static void +ixv_if_register_vlan(if_ctx_t ctx, u16 vtag) +{ + struct adapter *adapter = iflib_get_softc(ctx); + u16 index, bit; + + index = (vtag >> 5) & 0x7F; + bit = vtag & 0x1F; + ixv_shadow_vfta[index] |= (1 << bit); + ++adapter->num_vlans; +} + +/* +** This routine is run via an vlan +** unconfig EVENT, remove our entry +** in the soft vfta. +*/ +static void +ixv_if_unregister_vlan(if_ctx_t ctx, u16 vtag) +{ + struct adapter *adapter = iflib_get_softc(ctx); + u16 index, bit; + + index = (vtag >> 5) & 0x7F; + bit = vtag & 0x1F; + ixv_shadow_vfta[index] &= ~(1 << bit); + --adapter->num_vlans; +} + +static void +ixv_if_enable_intr(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &adapter->hw; + struct ix_rx_queue *que = adapter->rx_queues; + u32 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); + + IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); + + mask = IXGBE_EIMS_ENABLE_MASK; + mask &= ~(IXGBE_EIMS_OTHER | IXGBE_EIMS_LSC); + IXGBE_WRITE_REG(hw, IXGBE_VTEIAC, mask); + + for (int i = 0; i < adapter->num_rx_queues; i++, que++) + ixv_enable_queue(adapter, que->msix); + + IXGBE_WRITE_FLUSH(hw); + + return; +} + +static void +ixv_if_disable_intr(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEIAC, 0); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEIMC, ~0); + IXGBE_WRITE_FLUSH(&adapter->hw); + return; +} + +/* +** Setup the correct IVAR register for a particular MSIX interrupt +** - entry is the register array entry +** - vector is the MSIX vector for this queue +** - type is RX/TX/MISC +*/ +static void +ixv_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 ivar, index; + + vector |= IXGBE_IVAR_ALLOC_VAL; + + if (type == -1) { /* MISC IVAR */ + ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR_MISC); + ivar &= ~0xFF; + ivar |= vector; + IXGBE_WRITE_REG(hw, IXGBE_VTIVAR_MISC, ivar); + } else { /* RX/TX IVARS */ + index = (16 * (entry & 1)) + (8 * type); + ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR(entry >> 1)); + ivar &= ~(0xFF << index); + ivar |= (vector << index); + IXGBE_WRITE_REG(hw, IXGBE_VTIVAR(entry >> 1), ivar); + } +} + +static void +ixv_configure_ivars(struct adapter *adapter) +{ + struct ix_rx_queue *que = adapter->rx_queues; + + MPASS(adapter->num_rx_queues == adapter->num_tx_queues); + + for (int i = 0; i < adapter->num_rx_queues; i++, que++) { + /* First the RX queue entry */ + ixv_set_ivar(adapter, i, que->msix, 0); + /* ... and the TX */ + ixv_set_ivar(adapter, i, que->msix, 1); + /* Set an initial value in EITR */ + IXGBE_WRITE_REG(&adapter->hw, + IXGBE_VTEITR(que->msix), IXV_EITR_DEFAULT); + } + + /* For the mailbox interrupt */ + ixv_set_ivar(adapter, 1, adapter->vector, -1); +} + +/* +** The VF stats registers never have a truely virgin +** starting point, so this routine tries to make an +** artificial one, marking ground zero on attach as +** it were. +*/ +static void +ixv_save_stats(struct adapter *adapter) +{ + if (adapter->stats.vf.vfgprc || adapter->stats.vf.vfgptc) { + adapter->stats.vf.saved_reset_vfgprc += + adapter->stats.vf.vfgprc - adapter->stats.vf.base_vfgprc; + adapter->stats.vf.saved_reset_vfgptc += + adapter->stats.vf.vfgptc - adapter->stats.vf.base_vfgptc; + adapter->stats.vf.saved_reset_vfgorc += + adapter->stats.vf.vfgorc - adapter->stats.vf.base_vfgorc; + adapter->stats.vf.saved_reset_vfgotc += + adapter->stats.vf.vfgotc - adapter->stats.vf.base_vfgotc; + adapter->stats.vf.saved_reset_vfmprc += + adapter->stats.vf.vfmprc - adapter->stats.vf.base_vfmprc; + } +} + +static void +ixv_init_stats(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + adapter->stats.vf.last_vfgprc = IXGBE_READ_REG(hw, IXGBE_VFGPRC); + adapter->stats.vf.last_vfgorc = IXGBE_READ_REG(hw, IXGBE_VFGORC_LSB); + adapter->stats.vf.last_vfgorc |= + (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGORC_MSB))) << 32); + + adapter->stats.vf.last_vfgptc = IXGBE_READ_REG(hw, IXGBE_VFGPTC); + adapter->stats.vf.last_vfgotc = IXGBE_READ_REG(hw, IXGBE_VFGOTC_LSB); + adapter->stats.vf.last_vfgotc |= + (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGOTC_MSB))) << 32); + + adapter->stats.vf.last_vfmprc = IXGBE_READ_REG(hw, IXGBE_VFMPRC); + + adapter->stats.vf.base_vfgprc = adapter->stats.vf.last_vfgprc; + adapter->stats.vf.base_vfgorc = adapter->stats.vf.last_vfgorc; + adapter->stats.vf.base_vfgptc = adapter->stats.vf.last_vfgptc; + adapter->stats.vf.base_vfgotc = adapter->stats.vf.last_vfgotc; + adapter->stats.vf.base_vfmprc = adapter->stats.vf.last_vfmprc; +} + +#define UPDATE_STAT_32(reg, last, count) \ +{ \ + u32 current = IXGBE_READ_REG(hw, reg); \ + if (current < last) \ + count += 0x100000000LL; \ + last = current; \ + count &= 0xFFFFFFFF00000000LL; \ + count |= current; \ +} + +#define UPDATE_STAT_36(lsb, msb, last, count) \ +{ \ + u64 cur_lsb = IXGBE_READ_REG(hw, lsb); \ + u64 cur_msb = IXGBE_READ_REG(hw, msb); \ + u64 current = ((cur_msb << 32) | cur_lsb); \ + if (current < last) \ + count += 0x1000000000LL; \ + last = current; \ + count &= 0xFFFFFFF000000000LL; \ + count |= current; \ +} + +/* +** ixv_update_stats - Update the board statistics counters. +*/ +void +ixv_update_stats(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + UPDATE_STAT_32(IXGBE_VFGPRC, adapter->stats.vf.last_vfgprc, + adapter->stats.vf.vfgprc); + UPDATE_STAT_32(IXGBE_VFGPTC, adapter->stats.vf.last_vfgptc, + adapter->stats.vf.vfgptc); + UPDATE_STAT_36(IXGBE_VFGORC_LSB, IXGBE_VFGORC_MSB, + adapter->stats.vf.last_vfgorc, adapter->stats.vf.vfgorc); + UPDATE_STAT_36(IXGBE_VFGOTC_LSB, IXGBE_VFGOTC_MSB, + adapter->stats.vf.last_vfgotc, adapter->stats.vf.vfgotc); + UPDATE_STAT_32(IXGBE_VFMPRC, adapter->stats.vf.last_vfmprc, + adapter->stats.vf.vfmprc); +} + +/* + * Add statistic sysctls for the VF. + */ +static void +ixv_add_stats_sysctls(struct adapter *adapter) +{ + device_t dev = adapter->dev; + + struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); + struct sysctl_oid *tree = device_get_sysctl_tree(dev); + struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); + struct ixgbevf_hw_stats *stats = &adapter->stats.vf; + + struct sysctl_oid *stat_node; + struct sysctl_oid_list *stat_list; + + /* Driver Statistics */ + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", + CTLFLAG_RD, &adapter->watchdog_events, + "Watchdog timeouts"); + + stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac", + CTLFLAG_RD, NULL, + "VF Statistics (read from HW registers)"); + stat_list = SYSCTL_CHILDREN(stat_node); + + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd", + CTLFLAG_RD, &stats->vfgprc, + "Good Packets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd", + CTLFLAG_RD, &stats->vfgorc, + "Good Octets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd", + CTLFLAG_RD, &stats->vfmprc, + "Multicast Packets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", + CTLFLAG_RD, &stats->vfgptc, + "Good Packets Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", + CTLFLAG_RD, &stats->vfgotc, + "Good Octets Transmitted"); + +} + +static void +ixv_set_sysctl_value(struct adapter *adapter, const char *name, + const char *description, int *limit, int value) +{ + *limit = value; + SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), + OID_AUTO, name, CTLFLAG_RW, limit, value, description); +} + +/********************************************************************** + * + * This routine is called only when em_display_debug_stats is enabled. + * This routine provides a way to take a look at important statistics + * maintained by the driver and hardware. + * + **********************************************************************/ +static void +ixv_print_debug_info(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct ixgbe_hw *hw = &adapter->hw; + + device_printf(dev,"Error Byte Count = %u \n", + IXGBE_READ_REG(hw, IXGBE_ERRBC)); + + device_printf(dev,"MBX IRQ Handled: %lu\n", + (long)adapter->link_irq); + return; +} + +static int +ixv_sysctl_debug(SYSCTL_HANDLER_ARGS) +{ + int error, result; + struct adapter *adapter; + + result = -1; + error = sysctl_handle_int(oidp, &result, 0, req); + + if (error || !req->newptr) + return (error); + + if (result == 1) { + adapter = (struct adapter *) arg1; + ixv_print_debug_info(adapter); + } + return error; +} + Index: sys/dev/ixgbe/iflib_ix_txrx.c =================================================================== --- /dev/null +++ sys/dev/ixgbe/iflib_ix_txrx.c @@ -0,0 +1,541 @@ +/****************************************************************************** + + Copyright (c) 2001-2015, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ +/* $FreeBSD$*/ + + +#ifndef IXGBE_STANDALONE_BUILD +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_rss.h" +#endif + +#include "ixgbe.h" + +#ifdef RSS +#include +#include +#endif + +#undef CSUM_TCP +#define CSUM_TCP (CSUM_IP_TCP | CSUM_IP6_TCP) +#undef CSUM_UDP +#define CSUM_UDP (CSUM_IP_UDP | CSUM_IP6_UDP) +#undef CSUM_SCTP +#define CSUM_SCTP (CSUM_IP_SCTP | CSUM_IP6_SCTP) + + + +/********************************************************************* + * Local Function prototypes + *********************************************************************/ +static int ixgbe_isc_txd_encap(void *arg, if_pkt_info_t pi); +static void ixgbe_isc_txd_flush(void *arg, uint16_t txqid, uint32_t pidx); +static int ixgbe_isc_txd_credits_update(void *arg, uint16_t txqid, uint32_t cidx, bool clear); + +static void ixgbe_isc_rxd_refill(void *arg, uint16_t rxqid, uint8_t flid __unused, + uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs __unused, uint16_t count, uint16_t buf_len); +static void ixgbe_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx); +static int ixgbe_isc_rxd_available(void *arg, uint16_t rxqid, uint32_t idx, + int budget); +static int ixgbe_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); + +static void ixgbe_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype); + +extern void ixgbe_if_enable_intr(if_ctx_t ctx); +extern int ixgbe_intr(void *arg); +static int ixgbe_determine_rsstype(u16 pkt_info); + +struct if_txrx ixgbe_txrx = { + ixgbe_isc_txd_encap, + ixgbe_isc_txd_flush, + ixgbe_isc_txd_credits_update, + ixgbe_isc_rxd_available, + ixgbe_isc_rxd_pkt_get, + ixgbe_isc_rxd_refill, + ixgbe_isc_rxd_flush, + ixgbe_intr +}; + +extern if_shared_ctx_t ixgbe_sctx; + +void +ixgbe_init_tx_ring(struct ix_tx_queue *que) +{ + struct tx_ring *txr = &que->txr; + if_softc_ctx_t scctx = que->adapter->shared; + struct ixgbe_tx_buf *buf; + + buf = txr->tx_buffers; + for (int i = 0; i < scctx->isc_ntxd[0]; i++, buf++) { + buf->eop = -1; + } +} +/********************************************************************* + * + * Advanced Context Descriptor setup for VLAN, CSUM or TSO + * + **********************************************************************/ +static int +ixgbe_tx_ctx_setup(struct ixgbe_adv_tx_context_desc *TXD, if_pkt_info_t pi) +{ + u32 vlan_macip_lens, type_tucmd_mlhl; + u32 olinfo_status, mss_l4len_idx, pktlen, offload; + + offload = TRUE; + olinfo_status = mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0; + /* VLAN MACLEN IPLEN */ + vlan_macip_lens |= (htole16(pi->ipi_vtag) << IXGBE_ADVTXD_VLAN_SHIFT); + vlan_macip_lens |= pi->ipi_ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; + + pktlen = pi->ipi_len; + /* First check if TSO is to be used */ + if (pi->ipi_csum_flags & CSUM_TSO) { + /* This is used in the transmit desc in encap */ + pktlen = pi->ipi_len - pi->ipi_ehdrlen - pi->ipi_ip_hlen - pi->ipi_tcp_hlen; + mss_l4len_idx |= (pi->ipi_tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); + mss_l4len_idx |= (pi->ipi_tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); + } + + olinfo_status |= pktlen << IXGBE_ADVTXD_PAYLEN_SHIFT; + + if (pi->ipi_flags & IPI_TX_IPV4) { + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; + /* Tell transmit desc to also do IPv4 checksum. */ + if (pi->ipi_csum_flags & (CSUM_IP|CSUM_TSO)) + olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; + } else if (pi->ipi_flags & IPI_TX_IPV6) + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; + else + offload = FALSE; + + vlan_macip_lens |= pi->ipi_ip_hlen; + + switch (pi->ipi_ipproto) { + case IPPROTO_TCP: + if (pi->ipi_csum_flags & CSUM_TCP) + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; + else + offload = FALSE; + break; + case IPPROTO_UDP: + if (pi->ipi_csum_flags & CSUM_UDP) + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; + else + offload = FALSE; + break; + case IPPROTO_SCTP: + if (pi->ipi_csum_flags & CSUM_SCTP) + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; + else + offload = FALSE; + break; + default: + offload = FALSE; + break; + } +/* Insert L4 checksum into data descriptors */ + if (offload) + olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; + + type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; + + /* Now copy bits into descriptor */ + TXD->vlan_macip_lens = htole32(vlan_macip_lens); + TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); + TXD->seqnum_seed = htole32(0); + TXD->mss_l4len_idx = htole32(mss_l4len_idx); + + return (olinfo_status); +} + +static int +ixgbe_isc_txd_encap(void *arg, if_pkt_info_t pi) +{ + struct adapter *sc = arg; + if_softc_ctx_t scctx = sc->shared; + struct ix_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; + struct tx_ring *txr = &que->txr; + struct ixgbe_tx_buf *buf; + int nsegs = pi->ipi_nsegs; + bus_dma_segment_t *segs = pi->ipi_segs; + union ixgbe_adv_tx_desc *txd = NULL; + struct ixgbe_adv_tx_context_desc *TXD; + int i, j, first, cidx_last; + u32 olinfo_status, cmd, flags; + + cmd = (IXGBE_ADVTXD_DTYP_DATA | + IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); + + if (pi->ipi_mflags & M_VLANTAG) + cmd |= IXGBE_ADVTXD_DCMD_VLE; + + i = first = pi->ipi_pidx; + flags = (pi->ipi_flags & IPI_TX_INTR) ? IXGBE_TXD_CMD_RS : 0; + + TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[first]; + if (pi->ipi_csum_flags & CSUM_OFFLOAD || IXGBE_IS_X550VF(sc) || pi->ipi_vtag) { + /********************************************* + * Set up the appropriate offload context + * this will consume the first descriptor + *********************************************/ + olinfo_status = ixgbe_tx_ctx_setup(TXD, pi); + if (pi->ipi_csum_flags & CSUM_TSO) { + cmd |= IXGBE_ADVTXD_DCMD_TSE; + ++txr->tso_tx; + } + + if (++i == scctx->isc_ntxd[0]) + i = 0; + } else { + /* Indicate the whole packet as payload when not doing TSO */ + olinfo_status = pi->ipi_len << IXGBE_ADVTXD_PAYLEN_SHIFT; + } + + olinfo_status |= IXGBE_ADVTXD_CC; + for (j = 0; j < nsegs; j++) { + bus_size_t seglen; + bus_addr_t segaddr; + + txd = &txr->tx_base[i]; + buf = &txr->tx_buffers[i]; + seglen = segs[j].ds_len; + segaddr = htole64(segs[j].ds_addr); + + txd->read.buffer_addr = segaddr; + txd->read.cmd_type_len = htole32(flags | cmd | seglen); + txd->read.olinfo_status = htole32(olinfo_status); + + cidx_last = i; + if (++i == scctx->isc_ntxd[0]) { + i = 0; + } + } + + txd->read.cmd_type_len |= + htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); + + /* Set the EOP descriptor that will be marked done */ + buf = &txr->tx_buffers[first]; + buf->eop = cidx_last; + + txr->bytes += pi->ipi_len; + pi->ipi_new_pidx = i; + + ++txr->total_packets; + + return (0); +} + +static void +ixgbe_isc_txd_flush(void *arg, uint16_t txqid, uint32_t pidx) +{ + struct adapter *sc = arg; + struct ix_tx_queue *que = &sc->tx_queues[txqid]; + struct tx_ring *txr = &que->txr; + + IXGBE_WRITE_REG(&sc->hw, txr->tail, pidx); +} + +static int +ixgbe_isc_txd_credits_update(void *arg, uint16_t txqid, uint32_t cidx_init, bool clear) +{ + struct adapter *sc = arg; + if_softc_ctx_t scctx = sc->shared; + struct ix_tx_queue *que = &sc->tx_queues[txqid]; + struct tx_ring *txr = &que->txr; + + u32 cidx, ntxd, processed = 0; + u32 limit = sc->tx_process_limit; + + struct ixgbe_tx_buf *buf; + union ixgbe_adv_tx_desc *txd; + + cidx = cidx_init; + + buf = &txr->tx_buffers[cidx]; + txd = &txr->tx_base[cidx]; + ntxd = scctx->isc_ntxd[0]; + do { + int delta, eop = buf->eop; + union ixgbe_adv_tx_desc *eopd; + + if (eop == -1) { /* No work */ + break; + } + + eopd = &txr->tx_base[eop]; + if ((eopd->wb.status & IXGBE_TXD_STAT_DD) == 0) { + break; /* I/O not complete */ + } else if (clear) + buf->eop = -1; /* clear indicate processed */ + + /* + * + * update for multi segment case + */ + if (eop != cidx) { + delta = eop - cidx; + if (eop < cidx) + delta += ntxd; + processed += delta; + txd = eopd; + buf = &txr->tx_buffers[eop]; + cidx = eop; + } + processed++; + if (clear) + ++txr->packets; + + /* Try the next packet */ + txd++; + buf++; + cidx++; + /* reset with a wrap */ + if (__predict_false(cidx == scctx->isc_ntxd[0])) { + cidx = 0; + buf = txr->tx_buffers; + txd = txr->tx_base; + } + prefetch(txd); + prefetch(txd+1); + } while (__predict_true(--limit) && cidx != cidx_init); + + return (processed); +} + +static void +ixgbe_isc_rxd_refill(void *arg, uint16_t rxqid, uint8_t flid __unused, + uint32_t pidx, uint64_t *paddrs, caddr_t *vaddrs __unused, uint16_t count, uint16_t buf_len) +{ + struct adapter *sc = arg; + struct ix_rx_queue *que = &sc->rx_queues[rxqid]; + struct rx_ring *rxr = &que->rxr; + int i; + uint32_t next_pidx; + + for (i = 0, next_pidx = pidx; i < count; i++) { + rxr->rx_base[next_pidx].read.pkt_addr = htole64(paddrs[i]); + if (++next_pidx == sc->shared->isc_nrxd[0]) + next_pidx = 0; + } +} + +static void +ixgbe_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, uint32_t pidx) +{ + struct adapter *sc = arg; + struct ix_rx_queue *que = &sc->rx_queues[rxqid]; + struct rx_ring *rxr = &que->rxr; + + IXGBE_WRITE_REG(&sc->hw, rxr->tail, pidx); +} + +static int +ixgbe_isc_rxd_available(void *arg, uint16_t rxqid, uint32_t idx, int budget) +{ + struct adapter *sc = arg; + struct ix_rx_queue *que = &sc->rx_queues[rxqid]; + struct rx_ring *rxr = &que->rxr; + union ixgbe_adv_rx_desc *rxd; + u32 staterr; + int cnt, i, nrxd; + + nrxd = sc->shared->isc_nrxd[0]; + for (cnt = 0, i = idx; cnt < nrxd-1 && cnt <= budget;) { + rxd = &rxr->rx_base[i]; + staterr = le32toh(rxd->wb.upper.status_error); + + if ((staterr & IXGBE_RXD_STAT_DD) == 0) + break; + if (++i == nrxd) + i = 0; + if (staterr & IXGBE_RXD_STAT_EOP) + cnt++; + } + + return (cnt); +} + +/**************************************************************** + * Routine sends data which has been dma'ed into host memory + * to upper layer. Initialize ri structure. + * + * Returns 0 upon success, errno on failure + ***************************************************************/ + +static int +ixgbe_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) +{ + struct adapter *adapter = arg; + struct ix_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx]; + struct rx_ring *rxr = &que->rxr; + struct ifnet *ifp = iflib_get_ifp(adapter->ctx); + union ixgbe_adv_rx_desc *rxd; + + u16 pkt_info, len, cidx, i; + u16 vtag = 0; + u32 ptype; + u32 staterr = 0; + bool eop; + + i = 0; + cidx = ri->iri_cidx; + do { + rxd = &rxr->rx_base[cidx]; + staterr = le32toh(rxd->wb.upper.status_error); + pkt_info = le16toh(rxd->wb.lower.lo_dword.hs_rss.pkt_info); + + /* Error Checking then decrement count */ + MPASS ((staterr & IXGBE_RXD_STAT_DD) != 0); + + len = le16toh(rxd->wb.upper.length); + ptype = le32toh(rxd->wb.lower.lo_dword.data) & + IXGBE_RXDADV_PKTTYPE_MASK; + + ri->iri_len += len; + rxr->bytes += len; + + rxd->wb.upper.status_error = 0; + eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); + if (staterr & IXGBE_RXD_STAT_VP) { + vtag = le16toh(rxd->wb.upper.vlan); + } else { + vtag = 0; + } + + /* Make sure bad packets are discarded */ + if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) { + +#if __FreeBSD_version >= 1100036 + if (IXGBE_IS_VF(adapter)) + if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); +#endif + + rxr->rx_discarded++; + return (EBADMSG); + } + ri->iri_frags[i].irf_flid = 0; + ri->iri_frags[i].irf_idx = cidx; + ri->iri_frags[i].irf_len = len; + if (++cidx == adapter->shared->isc_nrxd[0]) + cidx = 0; + i++; + /* even a 16K packet shouldn't consume more than 8 clusters */ + MPASS(i < 9); + } while (!eop); + + rxr->rx_packets++; + rxr->packets++; + + if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) + ixgbe_rx_checksum(staterr, ri, ptype); + + ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss); + ri->iri_rsstype = ixgbe_determine_rsstype(pkt_info); + ri->iri_vtag = vtag; + ri->iri_nfrags = i; + if (vtag) + ri->iri_flags |= M_VLANTAG; + return (0); +} + +/********************************************************************* + * + * Verify that the hardware indicated that the checksum is valid. + * Inform the stack about the status of checksum so that stack + * doesn't spend time verifying the checksum. + * + *********************************************************************/ +static void +ixgbe_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype) +{ + u16 status = (u16) staterr; + u8 errors = (u8) (staterr >> 24); + bool sctp = FALSE; + + if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && + (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) + sctp = TRUE; + + if (status & IXGBE_RXD_STAT_IPCS) { + if (!(errors & IXGBE_RXD_ERR_IPE)) { + /* IP Checksum Good */ + ri->iri_csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID; + } else + ri->iri_csum_flags = 0; + } + if (status & IXGBE_RXD_STAT_L4CS) { + u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); +#if __FreeBSD_version >= 800000 + if (sctp) + type = CSUM_SCTP_VALID; +#endif + if (!(errors & IXGBE_RXD_ERR_TCPE)) { + ri->iri_csum_flags |= type; + if (!sctp) + ri->iri_csum_data = htons(0xffff); + } + } +} + +/******************************************************************** + * + * Parse the packet type to determine the appropriate hash + * + ******************************************************************/ +static int +ixgbe_determine_rsstype(u16 pkt_info) +{ + switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { + case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: + return M_HASHTYPE_RSS_TCP_IPV4; + case IXGBE_RXDADV_RSSTYPE_IPV4: + return M_HASHTYPE_RSS_IPV4; + case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: + return M_HASHTYPE_RSS_TCP_IPV6; + case IXGBE_RXDADV_RSSTYPE_IPV6_EX: + return M_HASHTYPE_RSS_IPV6_EX; + case IXGBE_RXDADV_RSSTYPE_IPV6: + return M_HASHTYPE_RSS_IPV6; + case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: + return M_HASHTYPE_RSS_TCP_IPV6_EX; + case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: + return M_HASHTYPE_RSS_UDP_IPV4; + case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: + return M_HASHTYPE_RSS_UDP_IPV6; + case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: + return M_HASHTYPE_RSS_UDP_IPV6_EX; + default: + return M_HASHTYPE_OPAQUE; + } +} Index: sys/dev/ixgbe/iflib_ixgbe.h =================================================================== --- /dev/null +++ sys/dev/ixgbe/iflib_ixgbe.h @@ -0,0 +1,804 @@ +/****************************************************************************** + + Copyright (c) 2001-2015, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ +/* $FreeBSD$*/ + + +#ifndef _IFLIB_IXGBE_H_ +#define _IFLIB_IXGBE_H_ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef PCI_IOV +#include +#include +#include +#endif + +#include "ixgbe_api.h" +#include "ixgbe_common.h" +#include "ixgbe_phy.h" +#include "ixgbe_vf.h" + +#ifdef PCI_IOV +#include "ixgbe_common.h" +#include "ixgbe_mbx.h" +#endif + +/* Tunables */ + +/* + * TxDescriptors Valid Range: 64-4096 Default Value: 256 This value is the + * number of transmit descriptors allocated by the driver. Increasing this + * value allows the driver to queue more transmits. Each descriptor is 16 + * bytes. Performance tests have show the 2K value to be optimal for top + * performance. + */ +#define DEFAULT_TXD 1024 +#define PERFORM_TXD 2048 +#define MAX_TXD 4096 +#define MIN_TXD 64 + +/* + * RxDescriptors Valid Range: 64-4096 Default Value: 256 This value is the + * number of receive descriptors allocated for each RX queue. Increasing this + * value allows the driver to buffer more incoming packets. Each descriptor + * is 16 bytes. A receive buffer is also allocated for each descriptor. + * + * Note: with 8 rings and a dual port card, it is possible to bump up + * against the system mbuf pool limit, you can tune nmbclusters + * to adjust for this. + */ +#define DEFAULT_RXD 1024 +#define PERFORM_RXD 2048 +#define MAX_RXD 4096 +#define MIN_RXD 64 + +/* Alignment for rings */ +#define DBA_ALIGN 128 + +/* + * This is the max watchdog interval, ie. the time that can + * pass between any two TX clean operations, such only happening + * when the TX hardware is functioning. + */ +#define IXGBE_WATCHDOG (10 * hz) + +/* + * This parameters control when the driver calls the routine to reclaim + * transmit descriptors. + */ +#define IXGBE_TX_CLEANUP_THRESHOLD (adapter->num_tx_desc / 8) +#define IXGBE_TX_OP_THRESHOLD (adapter->num_tx_desc / 32) + +/* These defines are used in MTU calculations */ +#define IXGBE_MAX_FRAME_SIZE 9728 +#define IXGBE_MTU_HDR (ETHER_HDR_LEN + ETHER_CRC_LEN) +#define IXGBE_MTU_HDR_VLAN (ETHER_HDR_LEN + ETHER_CRC_LEN + \ + ETHER_VLAN_ENCAP_LEN) +#define IXGBE_MAX_MTU (IXGBE_MAX_FRAME_SIZE - IXGBE_MTU_HDR) +#define IXGBE_MAX_MTU_VLAN (IXGBE_MAX_FRAME_SIZE - IXGBE_MTU_HDR_VLAN) + +/* Flow control constants */ +#define IXGBE_FC_PAUSE 0xFFFF +#define IXGBE_FC_HI 0x20000 +#define IXGBE_FC_LO 0x10000 + +/* + * Used for optimizing small rx mbufs. Effort is made to keep the copy + * small and aligned for the CPU L1 cache. + * + * MHLEN is typically 168 bytes, giving us 8-byte alignment. Getting + * 32 byte alignment needed for the fast bcopy results in 8 bytes being + * wasted. Getting 64 byte alignment, which _should_ be ideal for + * modern Intel CPUs, results in 40 bytes wasted and a significant drop + * in observed efficiency of the optimization, 97.9% -> 81.8%. + */ +#if __FreeBSD_version < 1002000 +#define MPKTHSIZE (sizeof(struct m_hdr) + sizeof(struct pkthdr)) +#endif +#define IXGBE_RX_COPY_HDR_PADDED ((((MPKTHSIZE - 1) / 32) + 1) * 32) +#define IXGBE_RX_COPY_LEN (MSIZE - IXGBE_RX_COPY_HDR_PADDED) +#define IXGBE_RX_COPY_ALIGN (IXGBE_RX_COPY_HDR_PADDED - MPKTHSIZE) + +/* Keep older OS drivers building... */ +#if !defined(SYSCTL_ADD_UQUAD) +#define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD +#endif + +/* Defines for printing debug information */ +#define DEBUG_INIT 0 +#define DEBUG_IOCTL 0 +#define DEBUG_HW 0 + +#define INIT_DEBUGOUT(S) if (DEBUG_INIT) printf(S "\n") +#define INIT_DEBUGOUT1(S, A) if (DEBUG_INIT) printf(S "\n", A) +#define INIT_DEBUGOUT2(S, A, B) if (DEBUG_INIT) printf(S "\n", A, B) +#define IOCTL_DEBUGOUT(S) if (DEBUG_IOCTL) printf(S "\n") +#define IOCTL_DEBUGOUT1(S, A) if (DEBUG_IOCTL) printf(S "\n", A) +#define IOCTL_DEBUGOUT2(S, A, B) if (DEBUG_IOCTL) printf(S "\n", A, B) +#define HW_DEBUGOUT(S) if (DEBUG_HW) printf(S "\n") +#define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) +#define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) + +#define MAX_NUM_MULTICAST_ADDRESSES 128 +#define IXGBE_82598_SCATTER 100 +#define IXGBE_82599_SCATTER 32 +#define MSIX_82598_BAR 3 +#define MSIX_82599_BAR 4 +#define IXGBE_TSO_SIZE 262140 +#define IXGBE_RX_HDR 128 +#define IXGBE_VFTA_SIZE 128 +#define IXGBE_BR_SIZE 4096 +#define IXGBE_QUEUE_MIN_FREE 32 +#define IXGBE_MAX_TX_BUSY 10 +#define IXGBE_QUEUE_HUNG 0x80000000 + +#define IXV_EITR_DEFAULT 128 + +/* Supported offload bits in mbuf flag */ +#if __FreeBSD_version >= 1000000 +#define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ + CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ + CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) +#elif __FreeBSD_version >= 800000 +#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP) +#else +#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP) +#endif + +/* Backward compatibility items for very old versions */ +#ifndef pci_find_cap +#define pci_find_cap pci_find_extcap +#endif + +#ifndef DEVMETHOD_END +#define DEVMETHOD_END { NULL, NULL } +#endif + +/* + * Interrupt Moderation parameters + */ +#define IXGBE_LOW_LATENCY 128 +#define IXGBE_AVE_LATENCY 400 +#define IXGBE_BULK_LATENCY 1200 + +/* Using 1FF (the max value), the interval is ~1.05ms */ +#define IXGBE_LINK_ITR_QUANTA 0x1FF +#define IXGBE_LINK_ITR ((IXGBE_LINK_ITR_QUANTA << 3) & \ + IXGBE_EITR_ITR_INT_MASK) + +/* MAC type macros */ +#define IXGBE_IS_X550VF(_adapter) \ + ((_adapter->hw.mac.type == ixgbe_mac_X550_vf) || \ + (_adapter->hw.mac.type == ixgbe_mac_X550EM_x_vf)) + +#define IXGBE_IS_VF(_adapter) \ + (IXGBE_IS_X550VF(_adapter) || \ + (_adapter->hw.mac.type == ixgbe_mac_X540_vf) || \ + (_adapter->hw.mac.type == ixgbe_mac_82599_vf)) + +#ifdef PCI_IOV +#define IXGBE_VF_INDEX(vmdq) ((vmdq) / 32) +#define IXGBE_VF_BIT(vmdq) (1 << ((vmdq) % 32)) + +#define IXGBE_VT_MSG_MASK 0xFFFF + +#define IXGBE_VT_MSGINFO(msg) \ + (((msg) & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT) + +#define IXGBE_VF_GET_QUEUES_RESP_LEN 5 + +#define IXGBE_API_VER_1_0 0 +#define IXGBE_API_VER_2_0 1 /* Solaris API. Not supported. */ +#define IXGBE_API_VER_1_1 2 +#define IXGBE_API_VER_UNKNOWN UINT16_MAX + +enum ixgbe_iov_mode { + IXGBE_64_VM, + IXGBE_32_VM, + IXGBE_NO_VM +}; +#endif /* PCI_IOV */ + + +/* + ***************************************************************************** + * vendor_info_array + * + * This array contains the list of Subvendor/Subdevice IDs on which the driver + * should load. + * + ***************************************************************************** + */ +typedef struct _ixgbe_vendor_info_t { + unsigned int vendor_id; + unsigned int device_id; + unsigned int subvendor_id; + unsigned int subdevice_id; + unsigned int index; +} ixgbe_vendor_info_t; + + +struct ixgbe_tx_buf { + int eop; +}; + + +/* + * Bus dma allocation structure used by ixgbe_dma_malloc and ixgbe_dma_free. + */ +struct ixgbe_dma_alloc { + bus_addr_t dma_paddr; + caddr_t dma_vaddr; + bus_dma_tag_t dma_tag; + bus_dmamap_t dma_map; + bus_dma_segment_t dma_seg; + bus_size_t dma_size; + int dma_nseg; +}; + +struct ixgbe_mc_addr { + u8 addr[IXGBE_ETH_LENGTH_OF_ADDRESS]; + u32 vmdq; +}; + +/* + * The transmit ring, one per queue + */ +struct tx_ring { + struct ix_tx_queue *que; + struct adapter *adapter; + u32 me; + u32 tail; + int busy; + union ixgbe_adv_tx_desc *tx_base; + struct ixgbe_tx_buf *tx_buffers; + uint64_t tx_paddr; +#ifdef IXGBE_FDIR + u16 atr_sample; + u16 atr_count; +#endif + u32 bytes; /* used for AIM */ + u32 packets; + /* Soft Stats */ + unsigned long tso_tx; + u64 total_packets; +}; + + +/* + * The Receive ring, one per rx queue + */ +struct rx_ring { + struct ix_rx_queue *que; + struct adapter *adapter; + u32 me; + u32 tail; + union ixgbe_adv_rx_desc *rx_base; + uint64_t rx_paddr; + bool hw_rsc; + bool vtag_strip; + bus_dma_tag_t ptag; + + u32 bytes; /* Used for AIM calc */ + u32 packets; + + /* Soft stats */ + u64 rx_irq; + u64 rx_copies; + u64 rx_packets; + u64 rx_bytes; + u64 rx_discarded; + u64 rsc_num; +#ifdef IXGBE_FDIR + u64 flm; +#endif +}; + +/* +** Driver queue struct: this is the interrupt container +** for the associated tx and rx ring. +*/ +struct ix_rx_queue { + struct adapter *adapter; + u32 msix; /* This queue's MSIX vector */ + u32 eims; /* This queue's EIMS bit */ + u32 eitr_setting; + struct resource *res; + void *tag; + int busy; + struct rx_ring rxr; + struct if_irq que_irq; + u64 irqs; +}; + +struct ix_tx_queue { + struct adapter *adapter; + u32 msix; /* This queue's MSIX vector */ + struct tx_ring txr; +}; + +#ifdef PCI_IOV +#define IXGBE_VF_CTS (1 << 0) /* VF is clear to send. */ +#define IXGBE_VF_CAP_MAC (1 << 1) /* VF is permitted to change MAC. */ +#define IXGBE_VF_CAP_VLAN (1 << 2) /* VF is permitted to join vlans. */ +#define IXGBE_VF_ACTIVE (1 << 3) /* VF is active. */ + +#define IXGBE_MAX_VF_MC 30 /* Max number of multicast entries */ + +struct ixgbe_vf { + u_int pool; + u_int rar_index; + u_int maximum_frame_size; + uint32_t flags; + uint8_t ether_addr[ETHER_ADDR_LEN]; + uint16_t mc_hash[IXGBE_MAX_VF_MC]; + uint16_t num_mc_hashes; + uint16_t default_vlan; + uint16_t vlan_tag; + uint16_t api_ver; +}; +#endif /* PCI_IOV */ + +/* Our adapter structure */ +struct adapter { + /* much of the code assumes this is first :< */ + struct ixgbe_hw hw; + struct ixgbe_osdep osdep; + if_ctx_t ctx; + if_softc_ctx_t shared; +#define num_tx_queues shared->isc_ntxqsets +#define num_rx_queues shared->isc_nrxqsets +#define max_frame_size shared->isc_max_frame_size +#define intr_type shared->isc_intr + struct ifnet *ifp; + + struct device *dev; + + struct resource *pci_mem; + + /* + * Interrupt resources: this set is + * either used for legacy, or for Link + * when doing MSIX + */ + struct if_irq irq; + void *tag; + struct resource *res; + + struct ifmedia *media; + int msix; + int if_flags; + + u16 num_vlans; + + /* + ** Shadow VFTA table, this is needed because + ** the real vlan filter table gets cleared during + ** a soft reset and the driver needs to be able + ** to repopulate it. + */ + u32 shadow_vfta[IXGBE_VFTA_SIZE]; + + /* Info about the interface */ + u32 optics; + u32 fc; /* local flow ctrl setting */ + int advertise; /* link speeds */ + bool enable_aim; /* adaptive interrupt moderation */ + bool link_active; + u16 num_segs; + u32 link_speed; + bool link_up; + u32 vector; + u16 dmac; + bool eee_enabled; + u32 phy_layer; + + /* Power management-related */ + bool wol_support; + u32 wufc; + + /* Support for pluggable optics */ + bool sfp_probe; + + struct grouptask mod_task; /* SFP tasklet */ + struct grouptask msf_task; /* Multispeed Fiber */ + +#ifdef PCI_IOV + struct grouptask mbx_task; /* VF -> PF mailbox interrupt */ +#endif /* PCI_IOV */ +#ifdef IXGBE_FDIR + int fdir_reinit; + struct grouptask fdir_task; +#endif + + struct grouptask phy_task; /* PHY intr tasklet */ + + /* + ** Queues: + ** This is the irq holder, it has + ** and RX/TX pair or rings associated + ** with it. + */ + struct ix_tx_queue *tx_queues; + struct ix_rx_queue *rx_queues; + u64 active_queues; + + u32 tx_process_limit; + u32 rx_process_limit; + + /* Multicast array memory */ + struct ixgbe_mc_addr *mta; + int num_vfs; + int pool; +#ifdef PCI_IOV + struct ixgbe_vf *vfs; +#endif +#ifdef DEV_NETMAP + void (*init_locked)(struct adapter *); + void (*stop_locked)(void *); +#endif + + /* Misc stats maintained by the driver */ + unsigned long rx_mbuf_sz; + unsigned long mbuf_header_failed; + unsigned long mbuf_packet_failed; + unsigned long watchdog_events; + unsigned long link_irq; + union { + struct ixgbe_hw_stats pf; + struct ixgbevf_hw_stats vf; + } stats; +#if __FreeBSD_version >= 1100036 + /* counter(9) stats */ + u64 ipackets; + u64 ierrors; + u64 opackets; + u64 oerrors; + u64 ibytes; + u64 obytes; + u64 imcasts; + u64 omcasts; + u64 iqdrops; + u64 noproto; +#endif +}; + +/* Precision Time Sync (IEEE 1588) defines */ +#define ETHERTYPE_IEEE1588 0x88F7 +#define PICOSECS_PER_TICK 20833 +#define TSYNC_UDP_PORT 319 /* UDP port for the protocol */ +#define IXGBE_ADVTXD_TSTAMP 0x00080000 + +/* For backward compatibility */ +#if !defined(PCIER_LINK_STA) +#define PCIER_LINK_STA PCIR_EXPRESS_LINK_STA +#endif + +/* Stats macros */ +#if __FreeBSD_version >= 1100036 +#define IXGBE_SET_IPACKETS(sc, count) (sc)->ipackets = (count) +#define IXGBE_SET_IERRORS(sc, count) (sc)->ierrors = (count) +#define IXGBE_SET_OPACKETS(sc, count) (sc)->opackets = (count) +#define IXGBE_SET_OERRORS(sc, count) (sc)->oerrors = (count) +#define IXGBE_SET_COLLISIONS(sc, count) +#define IXGBE_SET_IBYTES(sc, count) (sc)->ibytes = (count) +#define IXGBE_SET_OBYTES(sc, count) (sc)->obytes = (count) +#define IXGBE_SET_IMCASTS(sc, count) (sc)->imcasts = (count) +#define IXGBE_SET_OMCASTS(sc, count) (sc)->omcasts = (count) +#define IXGBE_SET_IQDROPS(sc, count) (sc)->iqdrops = (count) +#else +#define IXGBE_SET_IPACKETS(sc, count) (sc)->ifp->if_ipackets = (count) +#define IXGBE_SET_IERRORS(sc, count) (sc)->ifp->if_ierrors = (count) +#define IXGBE_SET_OPACKETS(sc, count) (sc)->ifp->if_opackets = (count) +#define IXGBE_SET_OERRORS(sc, count) (sc)->ifp->if_oerrors = (count) +#define IXGBE_SET_COLLISIONS(sc, count) (sc)->ifp->if_collisions = (count) +#define IXGBE_SET_IBYTES(sc, count) (sc)->ifp->if_ibytes = (count) +#define IXGBE_SET_OBYTES(sc, count) (sc)->ifp->if_obytes = (count) +#define IXGBE_SET_IMCASTS(sc, count) (sc)->ifp->if_imcasts = (count) +#define IXGBE_SET_OMCASTS(sc, count) (sc)->ifp->if_omcasts = (count) +#define IXGBE_SET_IQDROPS(sc, count) (sc)->ifp->if_iqdrops = (count) +#endif + +/* External PHY register addresses */ +#define IXGBE_PHY_CURRENT_TEMP 0xC820 +#define IXGBE_PHY_OVERTEMP_STATUS 0xC830 + +/* Sysctl help messages; displayed with sysctl -d */ +#define IXGBE_SYSCTL_DESC_ADV_SPEED \ + "\nControl advertised link speed using these flags:\n" \ + "\t0x1 - advertise 100M\n" \ + "\t0x2 - advertise 1G\n" \ + "\t0x4 - advertise 10G\n\n" \ + "\t100M is only supported on certain 10GBaseT adapters.\n" + +#define IXGBE_SYSCTL_DESC_SET_FC \ + "\nSet flow control mode using these values:\n" \ + "\t0 - off\n" \ + "\t1 - rx pause\n" \ + "\t2 - tx pause\n" \ + "\t3 - tx and rx pause" + +static inline bool +ixgbe_is_sfp(struct ixgbe_hw *hw) +{ + switch (hw->phy.type) { + case ixgbe_phy_sfp_avago: + case ixgbe_phy_sfp_ftl: + case ixgbe_phy_sfp_intel: + case ixgbe_phy_sfp_unknown: + case ixgbe_phy_sfp_passive_tyco: + case ixgbe_phy_sfp_passive_unknown: + case ixgbe_phy_qsfp_passive_unknown: + case ixgbe_phy_qsfp_active_unknown: + case ixgbe_phy_qsfp_intel: + case ixgbe_phy_qsfp_unknown: + return TRUE; + default: + return FALSE; + } +} + +/* Workaround to make 8.0 buildable */ +#if __FreeBSD_version >= 800000 && __FreeBSD_version < 800504 +static __inline int +drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br) +{ +#ifdef ALTQ + if (ALTQ_IS_ENABLED(&ifp->if_snd)) + return (1); +#endif + return (!buf_ring_empty(br)); +} +#endif + +/* +** This checks for a zero mac addr, something that will be likely +** unless the Admin on the Host has created one. +*/ +static inline bool +ixv_check_ether_addr(u8 *addr) +{ + bool status = TRUE; + + if ((addr[0] == 0 && addr[1]== 0 && addr[2] == 0 && + addr[3] == 0 && addr[4]== 0 && addr[5] == 0)) + status = FALSE; + return (status); +} + +/* Shared Prototypes */ + +int ixgbe_allocate_queues(struct adapter *); +int ixgbe_allocate_transmit_buffers(struct tx_ring *); +int ixgbe_setup_transmit_structures(struct adapter *); +void ixgbe_free_transmit_structures(struct adapter *); +int ixgbe_allocate_receive_buffers(struct rx_ring *); +int ixgbe_setup_receive_structures(struct adapter *); +void ixgbe_free_receive_structures(struct adapter *); + +int ixgbe_dma_malloc(struct adapter *, + bus_size_t, struct ixgbe_dma_alloc *, int); +void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *); +int ixgbe_get_regs(SYSCTL_HANDLER_ARGS); +void ixgbe_init_tx_ring(struct ix_tx_queue *que); + +#ifdef PCI_IOV + +static inline boolean_t +ixgbe_vf_mac_changed(struct ixgbe_vf *vf, const uint8_t *mac) +{ + return (bcmp(mac, vf->ether_addr, ETHER_ADDR_LEN) != 0); +} + +static inline void +ixgbe_send_vf_msg(struct adapter *adapter, struct ixgbe_vf *vf, u32 msg) +{ + + if (vf->flags & IXGBE_VF_CTS) + msg |= IXGBE_VT_MSGTYPE_CTS; + + ixgbe_write_mbx(&adapter->hw, &msg, 1, vf->pool); +} + +static inline void +ixgbe_send_vf_ack(struct adapter *adapter, struct ixgbe_vf *vf, u32 msg) +{ + msg &= IXGBE_VT_MSG_MASK; + ixgbe_send_vf_msg(adapter, vf, msg | IXGBE_VT_MSGTYPE_ACK); +} + +static inline void +ixgbe_send_vf_nack(struct adapter *adapter, struct ixgbe_vf *vf, u32 msg) +{ + msg &= IXGBE_VT_MSG_MASK; + ixgbe_send_vf_msg(adapter, vf, msg | IXGBE_VT_MSGTYPE_NACK); +} + +static inline void +ixgbe_process_vf_ack(struct adapter *adapter, struct ixgbe_vf *vf) +{ + if (!(vf->flags & IXGBE_VF_CTS)) + ixgbe_send_vf_nack(adapter, vf, 0); +} + +static inline enum ixgbe_iov_mode +ixgbe_get_iov_mode(struct adapter *adapter) +{ + if (adapter->num_vfs == 0) + return (IXGBE_NO_VM); + if (adapter->num_tx_queues <= 2) + return (IXGBE_64_VM); + else if (adapter->num_tx_queues <= 4) + return (IXGBE_32_VM); + else + return (IXGBE_NO_VM); +} + +static inline u16 +ixgbe_max_vfs(enum ixgbe_iov_mode mode) +{ + /* + * We return odd numbers below because we + * reserve 1 VM's worth of queues for the PF. + */ + switch (mode) { + case IXGBE_64_VM: + return (63); + case IXGBE_32_VM: + return (31); + case IXGBE_NO_VM: + default: + return (0); + } +} + +static inline int +ixgbe_vf_queues(enum ixgbe_iov_mode mode) +{ + switch (mode) { + case IXGBE_64_VM: + return (2); + case IXGBE_32_VM: + return (4); + case IXGBE_NO_VM: + default: + return (0); + } +} + +static inline int +ixgbe_vf_que_index(enum ixgbe_iov_mode mode, u32 vfnum, int num) +{ + return ((vfnum * ixgbe_vf_queues(mode)) + num); +} + +static inline int +ixgbe_pf_que_index(enum ixgbe_iov_mode mode, int num) +{ + return (ixgbe_vf_que_index(mode, ixgbe_max_vfs(mode), num)); +} + +static inline void +ixgbe_update_max_frame(struct adapter * adapter, int max_frame) +{ + if (adapter->max_frame_size < max_frame) + adapter->max_frame_size = max_frame; +} + +static inline u32 +ixgbe_get_mrqc(enum ixgbe_iov_mode mode) +{ + u32 mrqc = 0; + switch (mode) { + case IXGBE_64_VM: + mrqc = IXGBE_MRQC_VMDQRSS64EN; + break; + case IXGBE_32_VM: + mrqc = IXGBE_MRQC_VMDQRSS32EN; + break; + case IXGBE_NO_VM: + mrqc = 0; + break; + default: + panic("Unexpected SR-IOV mode %d", mode); + } + return(mrqc); +} + + +static inline u32 +ixgbe_get_mtqc(enum ixgbe_iov_mode mode) +{ + uint32_t mtqc = 0; + switch (mode) { + case IXGBE_64_VM: + mtqc |= IXGBE_MTQC_64VF | IXGBE_MTQC_VT_ENA; + break; + case IXGBE_32_VM: + mtqc |= IXGBE_MTQC_32VF | IXGBE_MTQC_VT_ENA; + break; + case IXGBE_NO_VM: + mtqc = IXGBE_MTQC_64Q_1PB; + break; + default: + panic("Unexpected SR-IOV mode %d", mode); + } + return(mtqc); +} +#endif /* PCI_IOV */ + +#endif /* _IXGBE_H_ */ Index: sys/dev/ixgbe/ix_txrx.c =================================================================== --- sys/dev/ixgbe/ix_txrx.c +++ sys/dev/ixgbe/ix_txrx.c @@ -32,2272 +32,12 @@ ******************************************************************************/ /*$FreeBSD$*/ - -#ifndef IXGBE_STANDALONE_BUILD -#include "opt_inet.h" -#include "opt_inet6.h" -#include "opt_rss.h" -#endif - -#include "ixgbe.h" - -#ifdef RSS -#include -#include -#endif - -#ifdef DEV_NETMAP -#include -#include -#include - -extern int ix_crcstrip; -#endif - -/* -** HW RSC control: -** this feature only works with -** IPv4, and only on 82599 and later. -** Also this will cause IP forwarding to -** fail and that can't be controlled by -** the stack as LRO can. For all these -** reasons I've deemed it best to leave -** this off and not bother with a tuneable -** interface, this would need to be compiled -** to enable. -*/ -static bool ixgbe_rsc_enable = FALSE; - -#ifdef IXGBE_FDIR -/* -** For Flow Director: this is the -** number of TX packets we sample -** for the filter pool, this means -** every 20th packet will be probed. -** -** This feature can be disabled by -** setting this to 0. -*/ -static int atr_sample_rate = 20; +#ifndef KLD_MODULE +#include "opt_iflib.h" #endif -/********************************************************************* - * Local Function prototypes - *********************************************************************/ -static void ixgbe_setup_transmit_ring(struct tx_ring *); -static void ixgbe_free_transmit_buffers(struct tx_ring *); -static int ixgbe_setup_receive_ring(struct rx_ring *); -static void ixgbe_free_receive_buffers(struct rx_ring *); - -static void ixgbe_rx_checksum(u32, struct mbuf *, u32); -static void ixgbe_refresh_mbufs(struct rx_ring *, int); -static int ixgbe_xmit(struct tx_ring *, struct mbuf **); -static int ixgbe_tx_ctx_setup(struct tx_ring *, - struct mbuf *, u32 *, u32 *); -static int ixgbe_tso_setup(struct tx_ring *, - struct mbuf *, u32 *, u32 *); -#ifdef IXGBE_FDIR -static void ixgbe_atr(struct tx_ring *, struct mbuf *); -#endif -static __inline void ixgbe_rx_discard(struct rx_ring *, int); -static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *, - struct mbuf *, u32); - -#ifdef IXGBE_LEGACY_TX -/********************************************************************* - * Transmit entry point - * - * ixgbe_start is called by the stack to initiate a transmit. - * The driver will remain in this routine as long as there are - * packets to transmit and transmit resources are available. - * In case resources are not available stack is notified and - * the packet is requeued. - **********************************************************************/ - -void -ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp) -{ - struct mbuf *m_head; - struct adapter *adapter = txr->adapter; - - IXGBE_TX_LOCK_ASSERT(txr); - - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - return; - if (!adapter->link_active) - return; - - while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { - if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) - break; - - IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); - if (m_head == NULL) - break; - - if (ixgbe_xmit(txr, &m_head)) { - if (m_head != NULL) - IFQ_DRV_PREPEND(&ifp->if_snd, m_head); - break; - } - /* Send a copy of the frame to the BPF listener */ - ETHER_BPF_MTAP(ifp, m_head); - } - return; -} - -/* - * Legacy TX start - called by the stack, this - * always uses the first tx ring, and should - * not be used with multiqueue tx enabled. - */ -void -ixgbe_start(struct ifnet *ifp) -{ - struct adapter *adapter = ifp->if_softc; - struct tx_ring *txr = adapter->tx_rings; - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - IXGBE_TX_LOCK(txr); - ixgbe_start_locked(txr, ifp); - IXGBE_TX_UNLOCK(txr); - } - return; -} - -#else /* ! IXGBE_LEGACY_TX */ - -/* -** Multiqueue Transmit Entry Point -** (if_transmit function) -*/ -int -ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) -{ - struct adapter *adapter = ifp->if_softc; - struct ix_queue *que; - struct tx_ring *txr; - int i, err = 0; -#ifdef RSS - uint32_t bucket_id; -#endif - - /* - * When doing RSS, map it to the same outbound queue - * as the incoming flow would be mapped to. - * - * If everything is setup correctly, it should be the - * same bucket that the current CPU we're on is. - */ - if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { -#ifdef RSS - if (rss_hash2bucket(m->m_pkthdr.flowid, - M_HASHTYPE_GET(m), &bucket_id) == 0) { - i = bucket_id % adapter->num_queues; -#ifdef IXGBE_DEBUG - if (bucket_id > adapter->num_queues) - if_printf(ifp, "bucket_id (%d) > num_queues " - "(%d)\n", bucket_id, adapter->num_queues); -#endif - } else -#endif - i = m->m_pkthdr.flowid % adapter->num_queues; - } else - i = curcpu % adapter->num_queues; - - /* Check for a hung queue and pick alternative */ - if (((1 << i) & adapter->active_queues) == 0) - i = ffsl(adapter->active_queues); - - txr = &adapter->tx_rings[i]; - que = &adapter->queues[i]; - - err = drbr_enqueue(ifp, txr->br, m); - if (err) - return (err); - if (IXGBE_TX_TRYLOCK(txr)) { - ixgbe_mq_start_locked(ifp, txr); - IXGBE_TX_UNLOCK(txr); - } else - taskqueue_enqueue(que->tq, &txr->txq_task); - - return (0); -} - -int -ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - struct mbuf *next; - int enqueued = 0, err = 0; - - if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || - adapter->link_active == 0) - return (ENETDOWN); - - /* Process the queue */ -#if __FreeBSD_version < 901504 - next = drbr_dequeue(ifp, txr->br); - while (next != NULL) { - if ((err = ixgbe_xmit(txr, &next)) != 0) { - if (next != NULL) - err = drbr_enqueue(ifp, txr->br, next); +#ifdef IFLIB +#include #else - while ((next = drbr_peek(ifp, txr->br)) != NULL) { - if ((err = ixgbe_xmit(txr, &next)) != 0) { - if (next == NULL) { - drbr_advance(ifp, txr->br); - } else { - drbr_putback(ifp, txr->br, next); - } -#endif - break; - } -#if __FreeBSD_version >= 901504 - drbr_advance(ifp, txr->br); +#include #endif - enqueued++; -#if 0 // this is VF-only -#if __FreeBSD_version >= 1100036 - /* - * Since we're looking at the tx ring, we can check - * to see if we're a VF by examing our tail register - * address. - */ - if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST) - if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); -#endif -#endif - /* Send a copy of the frame to the BPF listener */ - ETHER_BPF_MTAP(ifp, next); - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - break; -#if __FreeBSD_version < 901504 - next = drbr_dequeue(ifp, txr->br); -#endif - } - - if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD) - ixgbe_txeof(txr); - - return (err); -} - -/* - * Called from a taskqueue to drain queued transmit packets. - */ -void -ixgbe_deferred_mq_start(void *arg, int pending) -{ - struct tx_ring *txr = arg; - struct adapter *adapter = txr->adapter; - struct ifnet *ifp = adapter->ifp; - - IXGBE_TX_LOCK(txr); - if (!drbr_empty(ifp, txr->br)) - ixgbe_mq_start_locked(ifp, txr); - IXGBE_TX_UNLOCK(txr); -} - -/* - * Flush all ring buffers - */ -void -ixgbe_qflush(struct ifnet *ifp) -{ - struct adapter *adapter = ifp->if_softc; - struct tx_ring *txr = adapter->tx_rings; - struct mbuf *m; - - for (int i = 0; i < adapter->num_queues; i++, txr++) { - IXGBE_TX_LOCK(txr); - while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) - m_freem(m); - IXGBE_TX_UNLOCK(txr); - } - if_qflush(ifp); -} -#endif /* IXGBE_LEGACY_TX */ - - -/********************************************************************* - * - * This routine maps the mbufs to tx descriptors, allowing the - * TX engine to transmit the packets. - * - return 0 on success, positive on failure - * - **********************************************************************/ - -static int -ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) -{ - struct adapter *adapter = txr->adapter; - u32 olinfo_status = 0, cmd_type_len; - int i, j, error, nsegs; - int first; - bool remap = TRUE; - struct mbuf *m_head; - bus_dma_segment_t segs[adapter->num_segs]; - bus_dmamap_t map; - struct ixgbe_tx_buf *txbuf; - union ixgbe_adv_tx_desc *txd = NULL; - - m_head = *m_headp; - - /* Basic descriptor defines */ - cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA | - IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); - - if (m_head->m_flags & M_VLANTAG) - cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; - - /* - * Important to capture the first descriptor - * used because it will contain the index of - * the one we tell the hardware to report back - */ - first = txr->next_avail_desc; - txbuf = &txr->tx_buffers[first]; - map = txbuf->map; - - /* - * Map the packet for DMA. - */ -retry: - error = bus_dmamap_load_mbuf_sg(txr->txtag, map, - *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); - - if (__predict_false(error)) { - struct mbuf *m; - - switch (error) { - case EFBIG: - /* Try it again? - one try */ - if (remap == TRUE) { - remap = FALSE; - /* - * XXX: m_defrag will choke on - * non-MCLBYTES-sized clusters - */ - m = m_defrag(*m_headp, M_NOWAIT); - if (m == NULL) { - adapter->mbuf_defrag_failed++; - m_freem(*m_headp); - *m_headp = NULL; - return (ENOBUFS); - } - *m_headp = m; - goto retry; - } else - return (error); - case ENOMEM: - txr->no_tx_dma_setup++; - return (error); - default: - txr->no_tx_dma_setup++; - m_freem(*m_headp); - *m_headp = NULL; - return (error); - } - } - - /* Make certain there are enough descriptors */ - if (txr->tx_avail < (nsegs + 2)) { - txr->no_desc_avail++; - bus_dmamap_unload(txr->txtag, map); - return (ENOBUFS); - } - m_head = *m_headp; - - /* - * Set up the appropriate offload context - * this will consume the first descriptor - */ - error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); - if (__predict_false(error)) { - if (error == ENOBUFS) - *m_headp = NULL; - return (error); - } - -#ifdef IXGBE_FDIR - /* Do the flow director magic */ - if ((txr->atr_sample) && (!adapter->fdir_reinit)) { - ++txr->atr_count; - if (txr->atr_count >= atr_sample_rate) { - ixgbe_atr(txr, m_head); - txr->atr_count = 0; - } - } -#endif - - olinfo_status |= IXGBE_ADVTXD_CC; - i = txr->next_avail_desc; - for (j = 0; j < nsegs; j++) { - bus_size_t seglen; - bus_addr_t segaddr; - - txbuf = &txr->tx_buffers[i]; - txd = &txr->tx_base[i]; - seglen = segs[j].ds_len; - segaddr = htole64(segs[j].ds_addr); - - txd->read.buffer_addr = segaddr; - txd->read.cmd_type_len = htole32(txr->txd_cmd | - cmd_type_len |seglen); - txd->read.olinfo_status = htole32(olinfo_status); - - if (++i == txr->num_desc) - i = 0; - } - - txd->read.cmd_type_len |= - htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); - txr->tx_avail -= nsegs; - txr->next_avail_desc = i; - - txbuf->m_head = m_head; - /* - * Here we swap the map so the last descriptor, - * which gets the completion interrupt has the - * real map, and the first descriptor gets the - * unused map from this descriptor. - */ - txr->tx_buffers[first].map = txbuf->map; - txbuf->map = map; - bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); - - /* Set the EOP descriptor that will be marked done */ - txbuf = &txr->tx_buffers[first]; - txbuf->eop = txd; - - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - /* - * Advance the Transmit Descriptor Tail (Tdt), this tells the - * hardware that this frame is available to transmit. - */ - ++txr->total_packets; - IXGBE_WRITE_REG(&adapter->hw, txr->tail, i); - - /* Mark queue as having work */ - if (txr->busy == 0) - txr->busy = 1; - - return (0); -} - - -/********************************************************************* - * - * Allocate memory for tx_buffer structures. The tx_buffer stores all - * the information needed to transmit a packet on the wire. This is - * called only once at attach, setup is done every reset. - * - **********************************************************************/ -int -ixgbe_allocate_transmit_buffers(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - device_t dev = adapter->dev; - struct ixgbe_tx_buf *txbuf; - int error, i; - - /* - * Setup DMA descriptor areas. - */ - if ((error = bus_dma_tag_create( - bus_get_dma_tag(adapter->dev), /* parent */ - 1, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - IXGBE_TSO_SIZE, /* maxsize */ - adapter->num_segs, /* nsegments */ - PAGE_SIZE, /* maxsegsize */ - 0, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockfuncarg */ - &txr->txtag))) { - device_printf(dev,"Unable to allocate TX DMA tag\n"); - goto fail; - } - - if (!(txr->tx_buffers = - (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) * - adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate tx_buffer memory\n"); - error = ENOMEM; - goto fail; - } - - /* Create the descriptor buffer dma maps */ - txbuf = txr->tx_buffers; - for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { - error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); - if (error != 0) { - device_printf(dev, "Unable to create TX DMA map\n"); - goto fail; - } - } - - return 0; -fail: - /* We free all, it handles case where we are in the middle */ - ixgbe_free_transmit_structures(adapter); - return (error); -} - -/********************************************************************* - * - * Initialize a transmit ring. - * - **********************************************************************/ -static void -ixgbe_setup_transmit_ring(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - struct ixgbe_tx_buf *txbuf; -#ifdef DEV_NETMAP - struct netmap_adapter *na = NA(adapter->ifp); - struct netmap_slot *slot; -#endif /* DEV_NETMAP */ - - /* Clear the old ring contents */ - IXGBE_TX_LOCK(txr); -#ifdef DEV_NETMAP - /* - * (under lock): if in netmap mode, do some consistency - * checks and set slot to entry 0 of the netmap ring. - */ - slot = netmap_reset(na, NR_TX, txr->me, 0); -#endif /* DEV_NETMAP */ - bzero((void *)txr->tx_base, - (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); - /* Reset indices */ - txr->next_avail_desc = 0; - txr->next_to_clean = 0; - - /* Free any existing tx buffers. */ - txbuf = txr->tx_buffers; - for (int i = 0; i < txr->num_desc; i++, txbuf++) { - if (txbuf->m_head != NULL) { - bus_dmamap_sync(txr->txtag, txbuf->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, txbuf->map); - m_freem(txbuf->m_head); - txbuf->m_head = NULL; - } -#ifdef DEV_NETMAP - /* - * In netmap mode, set the map for the packet buffer. - * NOTE: Some drivers (not this one) also need to set - * the physical buffer address in the NIC ring. - * Slots in the netmap ring (indexed by "si") are - * kring->nkr_hwofs positions "ahead" wrt the - * corresponding slot in the NIC ring. In some drivers - * (not here) nkr_hwofs can be negative. Function - * netmap_idx_n2k() handles wraparounds properly. - */ - if (slot) { - int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); - netmap_load_map(na, txr->txtag, - txbuf->map, NMB(na, slot + si)); - } -#endif /* DEV_NETMAP */ - /* Clear the EOP descriptor pointer */ - txbuf->eop = NULL; - } - -#ifdef IXGBE_FDIR - /* Set the rate at which we sample packets */ - if (adapter->hw.mac.type != ixgbe_mac_82598EB) - txr->atr_sample = atr_sample_rate; -#endif - - /* Set number of descriptors available */ - txr->tx_avail = adapter->num_tx_desc; - - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - IXGBE_TX_UNLOCK(txr); -} - -/********************************************************************* - * - * Initialize all transmit rings. - * - **********************************************************************/ -int -ixgbe_setup_transmit_structures(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; - - for (int i = 0; i < adapter->num_queues; i++, txr++) - ixgbe_setup_transmit_ring(txr); - - return (0); -} - -/********************************************************************* - * - * Free all transmit rings. - * - **********************************************************************/ -void -ixgbe_free_transmit_structures(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; - - for (int i = 0; i < adapter->num_queues; i++, txr++) { - IXGBE_TX_LOCK(txr); - ixgbe_free_transmit_buffers(txr); - ixgbe_dma_free(adapter, &txr->txdma); - IXGBE_TX_UNLOCK(txr); - IXGBE_TX_LOCK_DESTROY(txr); - } - free(adapter->tx_rings, M_DEVBUF); -} - -/********************************************************************* - * - * Free transmit ring related data structures. - * - **********************************************************************/ -static void -ixgbe_free_transmit_buffers(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - struct ixgbe_tx_buf *tx_buffer; - int i; - - INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin"); - - if (txr->tx_buffers == NULL) - return; - - tx_buffer = txr->tx_buffers; - for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { - if (tx_buffer->m_head != NULL) { - bus_dmamap_sync(txr->txtag, tx_buffer->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, - tx_buffer->map); - m_freem(tx_buffer->m_head); - tx_buffer->m_head = NULL; - if (tx_buffer->map != NULL) { - bus_dmamap_destroy(txr->txtag, - tx_buffer->map); - tx_buffer->map = NULL; - } - } else if (tx_buffer->map != NULL) { - bus_dmamap_unload(txr->txtag, - tx_buffer->map); - bus_dmamap_destroy(txr->txtag, - tx_buffer->map); - tx_buffer->map = NULL; - } - } -#ifdef IXGBE_LEGACY_TX - if (txr->br != NULL) - buf_ring_free(txr->br, M_DEVBUF); -#endif - if (txr->tx_buffers != NULL) { - free(txr->tx_buffers, M_DEVBUF); - txr->tx_buffers = NULL; - } - if (txr->txtag != NULL) { - bus_dma_tag_destroy(txr->txtag); - txr->txtag = NULL; - } - return; -} - -/********************************************************************* - * - * Advanced Context Descriptor setup for VLAN, CSUM or TSO - * - **********************************************************************/ - -static int -ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, - u32 *cmd_type_len, u32 *olinfo_status) -{ - struct adapter *adapter = txr->adapter; - struct ixgbe_adv_tx_context_desc *TXD; - struct ether_vlan_header *eh; -#ifdef INET - struct ip *ip; -#endif -#ifdef INET6 - struct ip6_hdr *ip6; -#endif - u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; - int ehdrlen, ip_hlen = 0; - u16 etype; - u8 ipproto = 0; - int offload = TRUE; - int ctxd = txr->next_avail_desc; - u16 vtag = 0; - caddr_t l3d; - - - /* First check if TSO is to be used */ - if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO|CSUM_IP6_TSO)) - return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status)); - - if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) - offload = FALSE; - - /* Indicate the whole packet as payload when not doing TSO */ - *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; - - /* Now ready a context descriptor */ - TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; - - /* - ** In advanced descriptors the vlan tag must - ** be placed into the context descriptor. Hence - ** we need to make one even if not doing offloads. - */ - if (mp->m_flags & M_VLANTAG) { - vtag = htole16(mp->m_pkthdr.ether_vtag); - vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); - } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE)) - return (0); - - /* - * Determine where frame payload starts. - * Jump over vlan headers if already present, - * helpful for QinQ too. - */ - eh = mtod(mp, struct ether_vlan_header *); - if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { - etype = ntohs(eh->evl_proto); - ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; - } else { - etype = ntohs(eh->evl_encap_proto); - ehdrlen = ETHER_HDR_LEN; - } - - /* Set the ether header length */ - vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; - - if (offload == FALSE) - goto no_offloads; - - /* - * If the first mbuf only includes the ethernet header, jump to the next one - * XXX: This assumes the stack splits mbufs containing headers on header boundaries - * XXX: And assumes the entire IP header is contained in one mbuf - */ - if (mp->m_len == ehdrlen && mp->m_next) - l3d = mtod(mp->m_next, caddr_t); - else - l3d = mtod(mp, caddr_t) + ehdrlen; - - switch (etype) { -#ifdef INET - case ETHERTYPE_IP: - ip = (struct ip *)(l3d); - ip_hlen = ip->ip_hl << 2; - ipproto = ip->ip_p; - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; - /* Insert IPv4 checksum into data descriptors */ - if (mp->m_pkthdr.csum_flags & CSUM_IP) { - ip->ip_sum = 0; - *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; - } - break; -#endif -#ifdef INET6 - case ETHERTYPE_IPV6: - ip6 = (struct ip6_hdr *)(l3d); - ip_hlen = sizeof(struct ip6_hdr); - ipproto = ip6->ip6_nxt; - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; - break; -#endif - default: - offload = FALSE; - break; - } - - vlan_macip_lens |= ip_hlen; - - /* No support for offloads for non-L4 next headers */ - switch (ipproto) { - case IPPROTO_TCP: - if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; - else - offload = false; - break; - case IPPROTO_UDP: - if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; - else - offload = false; - break; - case IPPROTO_SCTP: - if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; - else - offload = false; - break; - default: - offload = false; - break; - } - - if (offload) /* Insert L4 checksum into data descriptors */ - *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; - -no_offloads: - type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; - - /* Now copy bits into descriptor */ - TXD->vlan_macip_lens = htole32(vlan_macip_lens); - TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); - TXD->seqnum_seed = htole32(0); - TXD->mss_l4len_idx = htole32(0); - - /* We've consumed the first desc, adjust counters */ - if (++ctxd == txr->num_desc) - ctxd = 0; - txr->next_avail_desc = ctxd; - --txr->tx_avail; - - return (0); -} - -/********************************************************************** - * - * Setup work for hardware segmentation offload (TSO) on - * adapters using advanced tx descriptors - * - **********************************************************************/ -static int -ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, - u32 *cmd_type_len, u32 *olinfo_status) -{ - struct ixgbe_adv_tx_context_desc *TXD; - u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; - u32 mss_l4len_idx = 0, paylen; - u16 vtag = 0, eh_type; - int ctxd, ehdrlen, ip_hlen, tcp_hlen; - struct ether_vlan_header *eh; -#ifdef INET6 - struct ip6_hdr *ip6; -#endif -#ifdef INET - struct ip *ip; -#endif - struct tcphdr *th; - - /* - * Determine where frame payload starts. - * Jump over vlan headers if already present - */ - eh = mtod(mp, struct ether_vlan_header *); - if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { - ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; - eh_type = eh->evl_proto; - } else { - ehdrlen = ETHER_HDR_LEN; - eh_type = eh->evl_encap_proto; - } - - switch (ntohs(eh_type)) { -#ifdef INET6 - case ETHERTYPE_IPV6: - ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); - /* XXX-BZ For now we do not pretend to support ext. hdrs. */ - if (ip6->ip6_nxt != IPPROTO_TCP) - return (ENXIO); - ip_hlen = sizeof(struct ip6_hdr); - ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); - th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); - th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; - break; -#endif -#ifdef INET - case ETHERTYPE_IP: - ip = (struct ip *)(mp->m_data + ehdrlen); - if (ip->ip_p != IPPROTO_TCP) - return (ENXIO); - ip->ip_sum = 0; - ip_hlen = ip->ip_hl << 2; - th = (struct tcphdr *)((caddr_t)ip + ip_hlen); - th->th_sum = in_pseudo(ip->ip_src.s_addr, - ip->ip_dst.s_addr, htons(IPPROTO_TCP)); - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; - /* Tell transmit desc to also do IPv4 checksum. */ - *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; - break; -#endif - default: - panic("%s: CSUM_TSO but no supported IP version (0x%04x)", - __func__, ntohs(eh_type)); - break; - } - - ctxd = txr->next_avail_desc; - TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; - - tcp_hlen = th->th_off << 2; - - /* This is used in the transmit desc in encap */ - paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; - - /* VLAN MACLEN IPLEN */ - if (mp->m_flags & M_VLANTAG) { - vtag = htole16(mp->m_pkthdr.ether_vtag); - vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); - } - - vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; - vlan_macip_lens |= ip_hlen; - TXD->vlan_macip_lens = htole32(vlan_macip_lens); - - /* ADV DTYPE TUCMD */ - type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; - TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); - - /* MSS L4LEN IDX */ - mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); - mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); - TXD->mss_l4len_idx = htole32(mss_l4len_idx); - - TXD->seqnum_seed = htole32(0); - - if (++ctxd == txr->num_desc) - ctxd = 0; - - txr->tx_avail--; - txr->next_avail_desc = ctxd; - *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; - *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; - *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; - ++txr->tso_tx; - return (0); -} - - -/********************************************************************** - * - * Examine each tx_buffer in the used queue. If the hardware is done - * processing the packet then free associated resources. The - * tx_buffer is put back on the free queue. - * - **********************************************************************/ -void -ixgbe_txeof(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; -#ifdef DEV_NETMAP - struct ifnet *ifp = adapter->ifp; -#endif - u32 work, processed = 0; - u32 limit = adapter->tx_process_limit; - struct ixgbe_tx_buf *buf; - union ixgbe_adv_tx_desc *txd; - - mtx_assert(&txr->tx_mtx, MA_OWNED); - -#ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(ifp); - struct netmap_kring *kring = &na->tx_rings[txr->me]; - txd = txr->tx_base; - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_POSTREAD); - /* - * In netmap mode, all the work is done in the context - * of the client thread. Interrupt handlers only wake up - * clients, which may be sleeping on individual rings - * or on a global resource for all rings. - * To implement tx interrupt mitigation, we wake up the client - * thread roughly every half ring, even if the NIC interrupts - * more frequently. This is implemented as follows: - * - ixgbe_txsync() sets kring->nr_kflags with the index of - * the slot that should wake up the thread (nkr_num_slots - * means the user thread should not be woken up); - * - the driver ignores tx interrupts unless netmap_mitigate=0 - * or the slot has the DD bit set. - */ - if (!netmap_mitigate || - (kring->nr_kflags < kring->nkr_num_slots && - txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) { - netmap_tx_irq(ifp, txr->me); - } - return; - } -#endif /* DEV_NETMAP */ - - if (txr->tx_avail == txr->num_desc) { - txr->busy = 0; - return; - } - - /* Get work starting point */ - work = txr->next_to_clean; - buf = &txr->tx_buffers[work]; - txd = &txr->tx_base[work]; - work -= txr->num_desc; /* The distance to ring end */ - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_POSTREAD); - - do { - union ixgbe_adv_tx_desc *eop = buf->eop; - if (eop == NULL) /* No work */ - break; - - if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0) - break; /* I/O not complete */ - - if (buf->m_head) { - txr->bytes += - buf->m_head->m_pkthdr.len; - bus_dmamap_sync(txr->txtag, - buf->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, - buf->map); - m_freem(buf->m_head); - buf->m_head = NULL; - } - buf->eop = NULL; - ++txr->tx_avail; - - /* We clean the range if multi segment */ - while (txd != eop) { - ++txd; - ++buf; - ++work; - /* wrap the ring? */ - if (__predict_false(!work)) { - work -= txr->num_desc; - buf = txr->tx_buffers; - txd = txr->tx_base; - } - if (buf->m_head) { - txr->bytes += - buf->m_head->m_pkthdr.len; - bus_dmamap_sync(txr->txtag, - buf->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, - buf->map); - m_freem(buf->m_head); - buf->m_head = NULL; - } - ++txr->tx_avail; - buf->eop = NULL; - - } - ++txr->packets; - ++processed; - - /* Try the next packet */ - ++txd; - ++buf; - ++work; - /* reset with a wrap */ - if (__predict_false(!work)) { - work -= txr->num_desc; - buf = txr->tx_buffers; - txd = txr->tx_base; - } - prefetch(txd); - } while (__predict_true(--limit)); - - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - work += txr->num_desc; - txr->next_to_clean = work; - - /* - ** Queue Hang detection, we know there's - ** work outstanding or the first return - ** would have been taken, so increment busy - ** if nothing managed to get cleaned, then - ** in local_timer it will be checked and - ** marked as HUNG if it exceeds a MAX attempt. - */ - if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG)) - ++txr->busy; - /* - ** If anything gets cleaned we reset state to 1, - ** note this will turn off HUNG if its set. - */ - if (processed) - txr->busy = 1; - - if (txr->tx_avail == txr->num_desc) - txr->busy = 0; - - return; -} - - -#ifdef IXGBE_FDIR -/* -** This routine parses packet headers so that Flow -** Director can make a hashed filter table entry -** allowing traffic flows to be identified and kept -** on the same cpu. This would be a performance -** hit, but we only do it at IXGBE_FDIR_RATE of -** packets. -*/ -static void -ixgbe_atr(struct tx_ring *txr, struct mbuf *mp) -{ - struct adapter *adapter = txr->adapter; - struct ix_queue *que; - struct ip *ip; - struct tcphdr *th; - struct udphdr *uh; - struct ether_vlan_header *eh; - union ixgbe_atr_hash_dword input = {.dword = 0}; - union ixgbe_atr_hash_dword common = {.dword = 0}; - int ehdrlen, ip_hlen; - u16 etype; - - eh = mtod(mp, struct ether_vlan_header *); - if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { - ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; - etype = eh->evl_proto; - } else { - ehdrlen = ETHER_HDR_LEN; - etype = eh->evl_encap_proto; - } - - /* Only handling IPv4 */ - if (etype != htons(ETHERTYPE_IP)) - return; - - ip = (struct ip *)(mp->m_data + ehdrlen); - ip_hlen = ip->ip_hl << 2; - - /* check if we're UDP or TCP */ - switch (ip->ip_p) { - case IPPROTO_TCP: - th = (struct tcphdr *)((caddr_t)ip + ip_hlen); - /* src and dst are inverted */ - common.port.dst ^= th->th_sport; - common.port.src ^= th->th_dport; - input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4; - break; - case IPPROTO_UDP: - uh = (struct udphdr *)((caddr_t)ip + ip_hlen); - /* src and dst are inverted */ - common.port.dst ^= uh->uh_sport; - common.port.src ^= uh->uh_dport; - input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4; - break; - default: - return; - } - - input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag); - if (mp->m_pkthdr.ether_vtag) - common.flex_bytes ^= htons(ETHERTYPE_VLAN); - else - common.flex_bytes ^= etype; - common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr; - - que = &adapter->queues[txr->me]; - /* - ** This assumes the Rx queue and Tx - ** queue are bound to the same CPU - */ - ixgbe_fdir_add_signature_filter_82599(&adapter->hw, - input, common, que->msix); -} -#endif /* IXGBE_FDIR */ - -/* -** Used to detect a descriptor that has -** been merged by Hardware RSC. -*/ -static inline u32 -ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx) -{ - return (le32toh(rx->wb.lower.lo_dword.data) & - IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT; -} - -/********************************************************************* - * - * Initialize Hardware RSC (LRO) feature on 82599 - * for an RX ring, this is toggled by the LRO capability - * even though it is transparent to the stack. - * - * NOTE: since this HW feature only works with IPV4 and - * our testing has shown soft LRO to be as effective - * I have decided to disable this by default. - * - **********************************************************************/ -static void -ixgbe_setup_hw_rsc(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - struct ixgbe_hw *hw = &adapter->hw; - u32 rscctrl, rdrxctl; - - /* If turning LRO/RSC off we need to disable it */ - if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) { - rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); - rscctrl &= ~IXGBE_RSCCTL_RSCEN; - return; - } - - rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); - rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; -#ifdef DEV_NETMAP /* crcstrip is optional in netmap */ - if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) -#endif /* DEV_NETMAP */ - rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; - rdrxctl |= IXGBE_RDRXCTL_RSCACKC; - IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); - - rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); - rscctrl |= IXGBE_RSCCTL_RSCEN; - /* - ** Limit the total number of descriptors that - ** can be combined, so it does not exceed 64K - */ - if (rxr->mbuf_sz == MCLBYTES) - rscctrl |= IXGBE_RSCCTL_MAXDESC_16; - else if (rxr->mbuf_sz == MJUMPAGESIZE) - rscctrl |= IXGBE_RSCCTL_MAXDESC_8; - else if (rxr->mbuf_sz == MJUM9BYTES) - rscctrl |= IXGBE_RSCCTL_MAXDESC_4; - else /* Using 16K cluster */ - rscctrl |= IXGBE_RSCCTL_MAXDESC_1; - - IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl); - - /* Enable TCP header recognition */ - IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), - (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | - IXGBE_PSRTYPE_TCPHDR)); - - /* Disable RSC for ACK packets */ - IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, - (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU))); - - rxr->hw_rsc = TRUE; -} - -/********************************************************************* - * - * Refresh mbuf buffers for RX descriptor rings - * - now keeps its own state so discards due to resource - * exhaustion are unnecessary, if an mbuf cannot be obtained - * it just returns, keeping its placeholder, thus it can simply - * be recalled to try again. - * - **********************************************************************/ -static void -ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit) -{ - struct adapter *adapter = rxr->adapter; - bus_dma_segment_t seg[1]; - struct ixgbe_rx_buf *rxbuf; - struct mbuf *mp; - int i, j, nsegs, error; - bool refreshed = FALSE; - - i = j = rxr->next_to_refresh; - /* Control the loop with one beyond */ - if (++j == rxr->num_desc) - j = 0; - - while (j != limit) { - rxbuf = &rxr->rx_buffers[i]; - if (rxbuf->buf == NULL) { - mp = m_getjcl(M_NOWAIT, MT_DATA, - M_PKTHDR, rxr->mbuf_sz); - if (mp == NULL) - goto update; - if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) - m_adj(mp, ETHER_ALIGN); - } else - mp = rxbuf->buf; - - mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; - - /* If we're dealing with an mbuf that was copied rather - * than replaced, there's no need to go through busdma. - */ - if ((rxbuf->flags & IXGBE_RX_COPY) == 0) { - /* Get the memory mapping */ - bus_dmamap_unload(rxr->ptag, rxbuf->pmap); - error = bus_dmamap_load_mbuf_sg(rxr->ptag, - rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT); - if (error != 0) { - printf("Refresh mbufs: payload dmamap load" - " failure - %d\n", error); - m_free(mp); - rxbuf->buf = NULL; - goto update; - } - rxbuf->buf = mp; - bus_dmamap_sync(rxr->ptag, rxbuf->pmap, - BUS_DMASYNC_PREREAD); - rxbuf->addr = rxr->rx_base[i].read.pkt_addr = - htole64(seg[0].ds_addr); - } else { - rxr->rx_base[i].read.pkt_addr = rxbuf->addr; - rxbuf->flags &= ~IXGBE_RX_COPY; - } - - refreshed = TRUE; - /* Next is precalculated */ - i = j; - rxr->next_to_refresh = i; - if (++j == rxr->num_desc) - j = 0; - } -update: - if (refreshed) /* Update hardware tail index */ - IXGBE_WRITE_REG(&adapter->hw, - rxr->tail, rxr->next_to_refresh); - return; -} - -/********************************************************************* - * - * Allocate memory for rx_buffer structures. Since we use one - * rx_buffer per received packet, the maximum number of rx_buffer's - * that we'll need is equal to the number of receive descriptors - * that we've allocated. - * - **********************************************************************/ -int -ixgbe_allocate_receive_buffers(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - device_t dev = adapter->dev; - struct ixgbe_rx_buf *rxbuf; - int bsize, error; - - bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc; - if (!(rxr->rx_buffers = - (struct ixgbe_rx_buf *) malloc(bsize, - M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate rx_buffer memory\n"); - error = ENOMEM; - goto fail; - } - - if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ - 1, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - MJUM16BYTES, /* maxsize */ - 1, /* nsegments */ - MJUM16BYTES, /* maxsegsize */ - 0, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockfuncarg */ - &rxr->ptag))) { - device_printf(dev, "Unable to create RX DMA tag\n"); - goto fail; - } - - for (int i = 0; i < rxr->num_desc; i++, rxbuf++) { - rxbuf = &rxr->rx_buffers[i]; - error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); - if (error) { - device_printf(dev, "Unable to create RX dma map\n"); - goto fail; - } - } - - return (0); - -fail: - /* Frees all, but can handle partial completion */ - ixgbe_free_receive_structures(adapter); - return (error); -} - -static void -ixgbe_free_receive_ring(struct rx_ring *rxr) -{ - struct ixgbe_rx_buf *rxbuf; - - for (int i = 0; i < rxr->num_desc; i++) { - rxbuf = &rxr->rx_buffers[i]; - if (rxbuf->buf != NULL) { - bus_dmamap_sync(rxr->ptag, rxbuf->pmap, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->ptag, rxbuf->pmap); - rxbuf->buf->m_flags |= M_PKTHDR; - m_freem(rxbuf->buf); - rxbuf->buf = NULL; - rxbuf->flags = 0; - } - } -} - -/********************************************************************* - * - * Initialize a receive ring and its buffers. - * - **********************************************************************/ -static int -ixgbe_setup_receive_ring(struct rx_ring *rxr) -{ - struct adapter *adapter; - struct ifnet *ifp; - device_t dev; - struct ixgbe_rx_buf *rxbuf; - bus_dma_segment_t seg[1]; - struct lro_ctrl *lro = &rxr->lro; - int rsize, nsegs, error = 0; -#ifdef DEV_NETMAP - struct netmap_adapter *na = NA(rxr->adapter->ifp); - struct netmap_slot *slot; -#endif /* DEV_NETMAP */ - - adapter = rxr->adapter; - ifp = adapter->ifp; - dev = adapter->dev; - - /* Clear the ring contents */ - IXGBE_RX_LOCK(rxr); -#ifdef DEV_NETMAP - /* same as in ixgbe_setup_transmit_ring() */ - slot = netmap_reset(na, NR_RX, rxr->me, 0); -#endif /* DEV_NETMAP */ - rsize = roundup2(adapter->num_rx_desc * - sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); - bzero((void *)rxr->rx_base, rsize); - /* Cache the size */ - rxr->mbuf_sz = adapter->rx_mbuf_sz; - - /* Free current RX buffer structs and their mbufs */ - ixgbe_free_receive_ring(rxr); - - /* Now replenish the mbufs */ - for (int j = 0; j != rxr->num_desc; ++j) { - struct mbuf *mp; - - rxbuf = &rxr->rx_buffers[j]; -#ifdef DEV_NETMAP - /* - * In netmap mode, fill the map and set the buffer - * address in the NIC ring, considering the offset - * between the netmap and NIC rings (see comment in - * ixgbe_setup_transmit_ring() ). No need to allocate - * an mbuf, so end the block with a continue; - */ - if (slot) { - int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); - uint64_t paddr; - void *addr; - - addr = PNMB(na, slot + sj, &paddr); - netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); - /* Update descriptor and the cached value */ - rxr->rx_base[j].read.pkt_addr = htole64(paddr); - rxbuf->addr = htole64(paddr); - continue; - } -#endif /* DEV_NETMAP */ - rxbuf->flags = 0; - rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, - M_PKTHDR, adapter->rx_mbuf_sz); - if (rxbuf->buf == NULL) { - error = ENOBUFS; - goto fail; - } - mp = rxbuf->buf; - mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; - /* Get the memory mapping */ - error = bus_dmamap_load_mbuf_sg(rxr->ptag, - rxbuf->pmap, mp, seg, - &nsegs, BUS_DMA_NOWAIT); - if (error != 0) - goto fail; - bus_dmamap_sync(rxr->ptag, - rxbuf->pmap, BUS_DMASYNC_PREREAD); - /* Update the descriptor and the cached value */ - rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr); - rxbuf->addr = htole64(seg[0].ds_addr); - } - - - /* Setup our descriptor indices */ - rxr->next_to_check = 0; - rxr->next_to_refresh = 0; - rxr->lro_enabled = FALSE; - rxr->rx_copies = 0; - rxr->rx_bytes = 0; - rxr->vtag_strip = FALSE; - - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - /* - ** Now set up the LRO interface: - */ - if (ixgbe_rsc_enable) - ixgbe_setup_hw_rsc(rxr); - else if (ifp->if_capenable & IFCAP_LRO) { - int err = tcp_lro_init(lro); - if (err) { - device_printf(dev, "LRO Initialization failed!\n"); - goto fail; - } - INIT_DEBUGOUT("RX Soft LRO Initialized\n"); - rxr->lro_enabled = TRUE; - lro->ifp = adapter->ifp; - } - - IXGBE_RX_UNLOCK(rxr); - return (0); - -fail: - ixgbe_free_receive_ring(rxr); - IXGBE_RX_UNLOCK(rxr); - return (error); -} - -/********************************************************************* - * - * Initialize all receive rings. - * - **********************************************************************/ -int -ixgbe_setup_receive_structures(struct adapter *adapter) -{ - struct rx_ring *rxr = adapter->rx_rings; - int j; - - for (j = 0; j < adapter->num_queues; j++, rxr++) - if (ixgbe_setup_receive_ring(rxr)) - goto fail; - - return (0); -fail: - /* - * Free RX buffers allocated so far, we will only handle - * the rings that completed, the failing case will have - * cleaned up for itself. 'j' failed, so its the terminus. - */ - for (int i = 0; i < j; ++i) { - rxr = &adapter->rx_rings[i]; - ixgbe_free_receive_ring(rxr); - } - - return (ENOBUFS); -} - - -/********************************************************************* - * - * Free all receive rings. - * - **********************************************************************/ -void -ixgbe_free_receive_structures(struct adapter *adapter) -{ - struct rx_ring *rxr = adapter->rx_rings; - - INIT_DEBUGOUT("ixgbe_free_receive_structures: begin"); - - for (int i = 0; i < adapter->num_queues; i++, rxr++) { - struct lro_ctrl *lro = &rxr->lro; - ixgbe_free_receive_buffers(rxr); - /* Free LRO memory */ - tcp_lro_free(lro); - /* Free the ring memory as well */ - ixgbe_dma_free(adapter, &rxr->rxdma); - } - - free(adapter->rx_rings, M_DEVBUF); -} - - -/********************************************************************* - * - * Free receive ring data structures - * - **********************************************************************/ -void -ixgbe_free_receive_buffers(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - struct ixgbe_rx_buf *rxbuf; - - INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin"); - - /* Cleanup any existing buffers */ - if (rxr->rx_buffers != NULL) { - for (int i = 0; i < adapter->num_rx_desc; i++) { - rxbuf = &rxr->rx_buffers[i]; - if (rxbuf->buf != NULL) { - bus_dmamap_sync(rxr->ptag, rxbuf->pmap, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->ptag, rxbuf->pmap); - rxbuf->buf->m_flags |= M_PKTHDR; - m_freem(rxbuf->buf); - } - rxbuf->buf = NULL; - if (rxbuf->pmap != NULL) { - bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); - rxbuf->pmap = NULL; - } - } - if (rxr->rx_buffers != NULL) { - free(rxr->rx_buffers, M_DEVBUF); - rxr->rx_buffers = NULL; - } - } - - if (rxr->ptag != NULL) { - bus_dma_tag_destroy(rxr->ptag); - rxr->ptag = NULL; - } - - return; -} - -static __inline void -ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) -{ - - /* - * ATM LRO is only for IP/TCP packets and TCP checksum of the packet - * should be computed by hardware. Also it should not have VLAN tag in - * ethernet header. In case of IPv6 we do not yet support ext. hdrs. - */ - if (rxr->lro_enabled && - (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && - (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && - ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == - (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) || - (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) == - (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) && - (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == - (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { - /* - * Send to the stack if: - ** - LRO not enabled, or - ** - no LRO resources, or - ** - lro enqueue fails - */ - if (rxr->lro.lro_cnt != 0) - if (tcp_lro_rx(&rxr->lro, m, 0) == 0) - return; - } - IXGBE_RX_UNLOCK(rxr); - (*ifp->if_input)(ifp, m); - IXGBE_RX_LOCK(rxr); -} - -static __inline void -ixgbe_rx_discard(struct rx_ring *rxr, int i) -{ - struct ixgbe_rx_buf *rbuf; - - rbuf = &rxr->rx_buffers[i]; - - - /* - ** With advanced descriptors the writeback - ** clobbers the buffer addrs, so its easier - ** to just free the existing mbufs and take - ** the normal refresh path to get new buffers - ** and mapping. - */ - - if (rbuf->fmp != NULL) {/* Partial chain ? */ - rbuf->fmp->m_flags |= M_PKTHDR; - m_freem(rbuf->fmp); - rbuf->fmp = NULL; - rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */ - } else if (rbuf->buf) { - m_free(rbuf->buf); - rbuf->buf = NULL; - } - bus_dmamap_unload(rxr->ptag, rbuf->pmap); - - rbuf->flags = 0; - - return; -} - - -/********************************************************************* - * - * This routine executes in interrupt context. It replenishes - * the mbufs in the descriptor and sends data which has been - * dma'ed into host memory to upper layer. - * - * Return TRUE for more work, FALSE for all clean. - *********************************************************************/ -bool -ixgbe_rxeof(struct ix_queue *que) -{ - struct adapter *adapter = que->adapter; - struct rx_ring *rxr = que->rxr; - struct ifnet *ifp = adapter->ifp; - struct lro_ctrl *lro = &rxr->lro; - int i, nextp, processed = 0; - u32 staterr = 0; - u32 count = adapter->rx_process_limit; - union ixgbe_adv_rx_desc *cur; - struct ixgbe_rx_buf *rbuf, *nbuf; - u16 pkt_info; - - IXGBE_RX_LOCK(rxr); - -#ifdef DEV_NETMAP - /* Same as the txeof routine: wakeup clients on intr. */ - if (netmap_rx_irq(ifp, rxr->me, &processed)) { - IXGBE_RX_UNLOCK(rxr); - return (FALSE); - } -#endif /* DEV_NETMAP */ - - for (i = rxr->next_to_check; count != 0;) { - struct mbuf *sendmp, *mp; - u32 rsc, ptype; - u16 len; - u16 vtag = 0; - bool eop; - - /* Sync the ring. */ - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - - cur = &rxr->rx_base[i]; - staterr = le32toh(cur->wb.upper.status_error); - pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); - - if ((staterr & IXGBE_RXD_STAT_DD) == 0) - break; - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - break; - - count--; - sendmp = NULL; - nbuf = NULL; - rsc = 0; - cur->wb.upper.status_error = 0; - rbuf = &rxr->rx_buffers[i]; - mp = rbuf->buf; - - len = le16toh(cur->wb.upper.length); - ptype = le32toh(cur->wb.lower.lo_dword.data) & - IXGBE_RXDADV_PKTTYPE_MASK; - eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); - - /* Make sure bad packets are discarded */ - if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) { -#if __FreeBSD_version >= 1100036 - if (IXGBE_IS_VF(adapter)) - if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); -#endif - rxr->rx_discarded++; - ixgbe_rx_discard(rxr, i); - goto next_desc; - } - - /* - ** On 82599 which supports a hardware - ** LRO (called HW RSC), packets need - ** not be fragmented across sequential - ** descriptors, rather the next descriptor - ** is indicated in bits of the descriptor. - ** This also means that we might proceses - ** more than one packet at a time, something - ** that has never been true before, it - ** required eliminating global chain pointers - ** in favor of what we are doing here. -jfv - */ - if (!eop) { - /* - ** Figure out the next descriptor - ** of this frame. - */ - if (rxr->hw_rsc == TRUE) { - rsc = ixgbe_rsc_count(cur); - rxr->rsc_num += (rsc - 1); - } - if (rsc) { /* Get hardware index */ - nextp = ((staterr & - IXGBE_RXDADV_NEXTP_MASK) >> - IXGBE_RXDADV_NEXTP_SHIFT); - } else { /* Just sequential */ - nextp = i + 1; - if (nextp == adapter->num_rx_desc) - nextp = 0; - } - nbuf = &rxr->rx_buffers[nextp]; - prefetch(nbuf); - } - /* - ** Rather than using the fmp/lmp global pointers - ** we now keep the head of a packet chain in the - ** buffer struct and pass this along from one - ** descriptor to the next, until we get EOP. - */ - mp->m_len = len; - /* - ** See if there is a stored head - ** that determines what we are - */ - sendmp = rbuf->fmp; - if (sendmp != NULL) { /* secondary frag */ - rbuf->buf = rbuf->fmp = NULL; - mp->m_flags &= ~M_PKTHDR; - sendmp->m_pkthdr.len += mp->m_len; - } else { - /* - * Optimize. This might be a small packet, - * maybe just a TCP ACK. Do a fast copy that - * is cache aligned into a new mbuf, and - * leave the old mbuf+cluster for re-use. - */ - if (eop && len <= IXGBE_RX_COPY_LEN) { - sendmp = m_gethdr(M_NOWAIT, MT_DATA); - if (sendmp != NULL) { - sendmp->m_data += - IXGBE_RX_COPY_ALIGN; - ixgbe_bcopy(mp->m_data, - sendmp->m_data, len); - sendmp->m_len = len; - rxr->rx_copies++; - rbuf->flags |= IXGBE_RX_COPY; - } - } - if (sendmp == NULL) { - rbuf->buf = rbuf->fmp = NULL; - sendmp = mp; - } - - /* first desc of a non-ps chain */ - sendmp->m_flags |= M_PKTHDR; - sendmp->m_pkthdr.len = mp->m_len; - } - ++processed; - - /* Pass the head pointer on */ - if (eop == 0) { - nbuf->fmp = sendmp; - sendmp = NULL; - mp->m_next = nbuf->buf; - } else { /* Sending this frame */ - sendmp->m_pkthdr.rcvif = ifp; - rxr->rx_packets++; - /* capture data for AIM */ - rxr->bytes += sendmp->m_pkthdr.len; - rxr->rx_bytes += sendmp->m_pkthdr.len; - /* Process vlan info */ - if ((rxr->vtag_strip) && - (staterr & IXGBE_RXD_STAT_VP)) - vtag = le16toh(cur->wb.upper.vlan); - if (vtag) { - sendmp->m_pkthdr.ether_vtag = vtag; - sendmp->m_flags |= M_VLANTAG; - } - if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) - ixgbe_rx_checksum(staterr, sendmp, ptype); - - /* - * In case of multiqueue, we have RXCSUM.PCSD bit set - * and never cleared. This means we have RSS hash - * available to be used. - */ - if (adapter->num_queues > 1) { - sendmp->m_pkthdr.flowid = - le32toh(cur->wb.lower.hi_dword.rss); - switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { - case IXGBE_RXDADV_RSSTYPE_IPV4: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_IPV4); - break; - case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_TCP_IPV4); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_IPV6); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_TCP_IPV6); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6_EX: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_IPV6_EX); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_TCP_IPV6_EX); - break; -#if __FreeBSD_version > 1100000 - case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_UDP_IPV4); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_UDP_IPV6); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_UDP_IPV6_EX); - break; -#endif - default: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_OPAQUE_HASH); - } - } else { - sendmp->m_pkthdr.flowid = que->msix; - M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); - } - } -next_desc: - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - /* Advance our pointers to the next descriptor. */ - if (++i == rxr->num_desc) - i = 0; - - /* Now send to the stack or do LRO */ - if (sendmp != NULL) { - rxr->next_to_check = i; - ixgbe_rx_input(rxr, ifp, sendmp, ptype); - i = rxr->next_to_check; - } - - /* Every 8 descriptors we go to refresh mbufs */ - if (processed == 8) { - ixgbe_refresh_mbufs(rxr, i); - processed = 0; - } - } - - /* Refresh any remaining buf structs */ - if (ixgbe_rx_unrefreshed(rxr)) - ixgbe_refresh_mbufs(rxr, i); - - rxr->next_to_check = i; - - /* - * Flush any outstanding LRO work - */ - tcp_lro_flush_all(lro); - - IXGBE_RX_UNLOCK(rxr); - - /* - ** Still have cleaning to do? - */ - if ((staterr & IXGBE_RXD_STAT_DD) != 0) - return (TRUE); - else - return (FALSE); -} - - -/********************************************************************* - * - * Verify that the hardware indicated that the checksum is valid. - * Inform the stack about the status of checksum so that stack - * doesn't spend time verifying the checksum. - * - *********************************************************************/ -static void -ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype) -{ - u16 status = (u16) staterr; - u8 errors = (u8) (staterr >> 24); - bool sctp = false; - - if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && - (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) - sctp = true; - - /* IPv4 checksum */ - if (status & IXGBE_RXD_STAT_IPCS) { - mp->m_pkthdr.csum_flags |= CSUM_L3_CALC; - /* IP Checksum Good */ - if (!(errors & IXGBE_RXD_ERR_IPE)) - mp->m_pkthdr.csum_flags |= CSUM_L3_VALID; - } - /* TCP/UDP/SCTP checksum */ - if (status & IXGBE_RXD_STAT_L4CS) { - mp->m_pkthdr.csum_flags |= CSUM_L4_CALC; - if (!(errors & IXGBE_RXD_ERR_TCPE)) { - mp->m_pkthdr.csum_flags |= CSUM_L4_VALID; - if (!sctp) - mp->m_pkthdr.csum_data = htons(0xffff); - } - } -} - -/******************************************************************** - * Manage DMA'able memory. - *******************************************************************/ -static void -ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) -{ - if (error) - return; - *(bus_addr_t *) arg = segs->ds_addr; - return; -} - -int -ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size, - struct ixgbe_dma_alloc *dma, int mapflags) -{ - device_t dev = adapter->dev; - int r; - - r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ - DBA_ALIGN, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - size, /* maxsize */ - 1, /* nsegments */ - size, /* maxsegsize */ - BUS_DMA_ALLOCNOW, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockfuncarg */ - &dma->dma_tag); - if (r != 0) { - device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; " - "error %u\n", r); - goto fail_0; - } - r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, - BUS_DMA_NOWAIT, &dma->dma_map); - if (r != 0) { - device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; " - "error %u\n", r); - goto fail_1; - } - r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, - size, - ixgbe_dmamap_cb, - &dma->dma_paddr, - mapflags | BUS_DMA_NOWAIT); - if (r != 0) { - device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; " - "error %u\n", r); - goto fail_2; - } - dma->dma_size = size; - return (0); -fail_2: - bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); -fail_1: - bus_dma_tag_destroy(dma->dma_tag); -fail_0: - dma->dma_tag = NULL; - return (r); -} - -void -ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma) -{ - bus_dmamap_sync(dma->dma_tag, dma->dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(dma->dma_tag, dma->dma_map); - bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); - bus_dma_tag_destroy(dma->dma_tag); -} - - -/********************************************************************* - * - * Allocate memory for the transmit and receive rings, and then - * the descriptors associated with each, called only once at attach. - * - **********************************************************************/ -int -ixgbe_allocate_queues(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct ix_queue *que; - struct tx_ring *txr; - struct rx_ring *rxr; - int rsize, tsize, error = IXGBE_SUCCESS; - int txconf = 0, rxconf = 0; -#ifdef PCI_IOV - enum ixgbe_iov_mode iov_mode; -#endif - - /* First allocate the top level queue structs */ - if (!(adapter->queues = - (struct ix_queue *) malloc(sizeof(struct ix_queue) * - adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate queue memory\n"); - error = ENOMEM; - goto fail; - } - - /* First allocate the TX ring struct memory */ - if (!(adapter->tx_rings = - (struct tx_ring *) malloc(sizeof(struct tx_ring) * - adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate TX ring memory\n"); - error = ENOMEM; - goto tx_fail; - } - - /* Next allocate the RX */ - if (!(adapter->rx_rings = - (struct rx_ring *) malloc(sizeof(struct rx_ring) * - adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { - device_printf(dev, "Unable to allocate RX ring memory\n"); - error = ENOMEM; - goto rx_fail; - } - - /* For the ring itself */ - tsize = roundup2(adapter->num_tx_desc * - sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN); - -#ifdef PCI_IOV - iov_mode = ixgbe_get_iov_mode(adapter); - adapter->pool = ixgbe_max_vfs(iov_mode); -#else - adapter->pool = 0; -#endif - /* - * Now set up the TX queues, txconf is needed to handle the - * possibility that things fail midcourse and we need to - * undo memory gracefully - */ - for (int i = 0; i < adapter->num_queues; i++, txconf++) { - /* Set up some basics */ - txr = &adapter->tx_rings[i]; - txr->adapter = adapter; -#ifdef PCI_IOV - txr->me = ixgbe_pf_que_index(iov_mode, i); -#else - txr->me = i; -#endif - txr->num_desc = adapter->num_tx_desc; - - /* Initialize the TX side lock */ - snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", - device_get_nameunit(dev), txr->me); - mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); - - if (ixgbe_dma_malloc(adapter, tsize, - &txr->txdma, BUS_DMA_NOWAIT)) { - device_printf(dev, - "Unable to allocate TX Descriptor memory\n"); - error = ENOMEM; - goto err_tx_desc; - } - txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr; - bzero((void *)txr->tx_base, tsize); - - /* Now allocate transmit buffers for the ring */ - if (ixgbe_allocate_transmit_buffers(txr)) { - device_printf(dev, - "Critical Failure setting up transmit buffers\n"); - error = ENOMEM; - goto err_tx_desc; - } -#ifndef IXGBE_LEGACY_TX - /* Allocate a buf ring */ - txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF, - M_WAITOK, &txr->tx_mtx); - if (txr->br == NULL) { - device_printf(dev, - "Critical Failure setting up buf ring\n"); - error = ENOMEM; - goto err_tx_desc; - } -#endif - } - - /* - * Next the RX queues... - */ - rsize = roundup2(adapter->num_rx_desc * - sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); - for (int i = 0; i < adapter->num_queues; i++, rxconf++) { - rxr = &adapter->rx_rings[i]; - /* Set up some basics */ - rxr->adapter = adapter; -#ifdef PCI_IOV - rxr->me = ixgbe_pf_que_index(iov_mode, i); -#else - rxr->me = i; -#endif - rxr->num_desc = adapter->num_rx_desc; - - /* Initialize the RX side lock */ - snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", - device_get_nameunit(dev), rxr->me); - mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); - - if (ixgbe_dma_malloc(adapter, rsize, - &rxr->rxdma, BUS_DMA_NOWAIT)) { - device_printf(dev, - "Unable to allocate RxDescriptor memory\n"); - error = ENOMEM; - goto err_rx_desc; - } - rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr; - bzero((void *)rxr->rx_base, rsize); - - /* Allocate receive buffers for the ring*/ - if (ixgbe_allocate_receive_buffers(rxr)) { - device_printf(dev, - "Critical Failure setting up receive buffers\n"); - error = ENOMEM; - goto err_rx_desc; - } - } - - /* - ** Finally set up the queue holding structs - */ - for (int i = 0; i < adapter->num_queues; i++) { - que = &adapter->queues[i]; - que->adapter = adapter; - que->me = i; - que->txr = &adapter->tx_rings[i]; - que->rxr = &adapter->rx_rings[i]; - } - - return (0); - -err_rx_desc: - for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) - ixgbe_dma_free(adapter, &rxr->rxdma); -err_tx_desc: - for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) - ixgbe_dma_free(adapter, &txr->txdma); - free(adapter->rx_rings, M_DEVBUF); -rx_fail: - free(adapter->tx_rings, M_DEVBUF); -tx_fail: - free(adapter->queues, M_DEVBUF); -fail: - return (error); -} Index: sys/dev/ixgbe/ixgbe.h =================================================================== --- sys/dev/ixgbe/ixgbe.h +++ sys/dev/ixgbe/ixgbe.h @@ -32,869 +32,12 @@ ******************************************************************************/ /*$FreeBSD$*/ - -#ifndef _IXGBE_H_ -#define _IXGBE_H_ - - -#include -#include -#ifndef IXGBE_LEGACY_TX -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef PCI_IOV -#include -#include -#include -#endif - -#include "ixgbe_api.h" -#include "ixgbe_common.h" -#include "ixgbe_phy.h" -#include "ixgbe_vf.h" - -#ifdef PCI_IOV -#include "ixgbe_common.h" -#include "ixgbe_mbx.h" -#endif - -/* Tunables */ - -/* - * TxDescriptors Valid Range: 64-4096 Default Value: 256 This value is the - * number of transmit descriptors allocated by the driver. Increasing this - * value allows the driver to queue more transmits. Each descriptor is 16 - * bytes. Performance tests have show the 2K value to be optimal for top - * performance. - */ -#define DEFAULT_TXD 1024 -#define PERFORM_TXD 2048 -#define MAX_TXD 4096 -#define MIN_TXD 64 - -/* - * RxDescriptors Valid Range: 64-4096 Default Value: 256 This value is the - * number of receive descriptors allocated for each RX queue. Increasing this - * value allows the driver to buffer more incoming packets. Each descriptor - * is 16 bytes. A receive buffer is also allocated for each descriptor. - * - * Note: with 8 rings and a dual port card, it is possible to bump up - * against the system mbuf pool limit, you can tune nmbclusters - * to adjust for this. - */ -#define DEFAULT_RXD 1024 -#define PERFORM_RXD 2048 -#define MAX_RXD 4096 -#define MIN_RXD 64 - -/* Alignment for rings */ -#define DBA_ALIGN 128 - -/* - * This is the max watchdog interval, ie. the time that can - * pass between any two TX clean operations, such only happening - * when the TX hardware is functioning. - */ -#define IXGBE_WATCHDOG (10 * hz) - -/* - * This parameters control when the driver calls the routine to reclaim - * transmit descriptors. - */ -#define IXGBE_TX_CLEANUP_THRESHOLD (adapter->num_tx_desc / 8) -#define IXGBE_TX_OP_THRESHOLD (adapter->num_tx_desc / 32) - -/* These defines are used in MTU calculations */ -#define IXGBE_MAX_FRAME_SIZE 9728 -#define IXGBE_MTU_HDR (ETHER_HDR_LEN + ETHER_CRC_LEN) -#define IXGBE_MTU_HDR_VLAN (ETHER_HDR_LEN + ETHER_CRC_LEN + \ - ETHER_VLAN_ENCAP_LEN) -#define IXGBE_MAX_MTU (IXGBE_MAX_FRAME_SIZE - IXGBE_MTU_HDR) -#define IXGBE_MAX_MTU_VLAN (IXGBE_MAX_FRAME_SIZE - IXGBE_MTU_HDR_VLAN) - -/* Flow control constants */ -#define IXGBE_FC_PAUSE 0xFFFF -#define IXGBE_FC_HI 0x20000 -#define IXGBE_FC_LO 0x10000 - -/* - * Used for optimizing small rx mbufs. Effort is made to keep the copy - * small and aligned for the CPU L1 cache. - * - * MHLEN is typically 168 bytes, giving us 8-byte alignment. Getting - * 32 byte alignment needed for the fast bcopy results in 8 bytes being - * wasted. Getting 64 byte alignment, which _should_ be ideal for - * modern Intel CPUs, results in 40 bytes wasted and a significant drop - * in observed efficiency of the optimization, 97.9% -> 81.8%. - */ -#if __FreeBSD_version < 1002000 -#define MPKTHSIZE (sizeof(struct m_hdr) + sizeof(struct pkthdr)) -#endif -#define IXGBE_RX_COPY_HDR_PADDED ((((MPKTHSIZE - 1) / 32) + 1) * 32) -#define IXGBE_RX_COPY_LEN (MSIZE - IXGBE_RX_COPY_HDR_PADDED) -#define IXGBE_RX_COPY_ALIGN (IXGBE_RX_COPY_HDR_PADDED - MPKTHSIZE) - -/* Keep older OS drivers building... */ -#if !defined(SYSCTL_ADD_UQUAD) -#define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD -#endif - -/* Defines for printing debug information */ -#define DEBUG_INIT 0 -#define DEBUG_IOCTL 0 -#define DEBUG_HW 0 - -#define INIT_DEBUGOUT(S) if (DEBUG_INIT) printf(S "\n") -#define INIT_DEBUGOUT1(S, A) if (DEBUG_INIT) printf(S "\n", A) -#define INIT_DEBUGOUT2(S, A, B) if (DEBUG_INIT) printf(S "\n", A, B) -#define IOCTL_DEBUGOUT(S) if (DEBUG_IOCTL) printf(S "\n") -#define IOCTL_DEBUGOUT1(S, A) if (DEBUG_IOCTL) printf(S "\n", A) -#define IOCTL_DEBUGOUT2(S, A, B) if (DEBUG_IOCTL) printf(S "\n", A, B) -#define HW_DEBUGOUT(S) if (DEBUG_HW) printf(S "\n") -#define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) -#define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) - -#define MAX_NUM_MULTICAST_ADDRESSES 128 -#define IXGBE_82598_SCATTER 100 -#define IXGBE_82599_SCATTER 32 -#define MSIX_82598_BAR 3 -#define MSIX_82599_BAR 4 -#define IXGBE_TSO_SIZE 262140 -#define IXGBE_RX_HDR 128 -#define IXGBE_VFTA_SIZE 128 -#define IXGBE_BR_SIZE 4096 -#define IXGBE_QUEUE_MIN_FREE 32 -#define IXGBE_MAX_TX_BUSY 10 -#define IXGBE_QUEUE_HUNG 0x80000000 - -#define IXV_EITR_DEFAULT 128 - -/* Supported offload bits in mbuf flag */ -#if __FreeBSD_version >= 1000000 -#define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ - CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ - CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) -#elif __FreeBSD_version >= 800000 -#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP) -#else -#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP) -#endif - -/* Backward compatibility items for very old versions */ -#ifndef pci_find_cap -#define pci_find_cap pci_find_extcap -#endif - -#ifndef DEVMETHOD_END -#define DEVMETHOD_END { NULL, NULL } -#endif - -/* - * Interrupt Moderation parameters - */ -#define IXGBE_LOW_LATENCY 128 -#define IXGBE_AVE_LATENCY 400 -#define IXGBE_BULK_LATENCY 1200 - -/* Using 1FF (the max value), the interval is ~1.05ms */ -#define IXGBE_LINK_ITR_QUANTA 0x1FF -#define IXGBE_LINK_ITR ((IXGBE_LINK_ITR_QUANTA << 3) & \ - IXGBE_EITR_ITR_INT_MASK) - -/* MAC type macros */ -#define IXGBE_IS_X550VF(_adapter) \ - ((_adapter->hw.mac.type == ixgbe_mac_X550_vf) || \ - (_adapter->hw.mac.type == ixgbe_mac_X550EM_x_vf)) - -#define IXGBE_IS_VF(_adapter) \ - (IXGBE_IS_X550VF(_adapter) || \ - (_adapter->hw.mac.type == ixgbe_mac_X540_vf) || \ - (_adapter->hw.mac.type == ixgbe_mac_82599_vf)) - -#ifdef PCI_IOV -#define IXGBE_VF_INDEX(vmdq) ((vmdq) / 32) -#define IXGBE_VF_BIT(vmdq) (1 << ((vmdq) % 32)) - -#define IXGBE_VT_MSG_MASK 0xFFFF - -#define IXGBE_VT_MSGINFO(msg) \ - (((msg) & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT) - -#define IXGBE_VF_GET_QUEUES_RESP_LEN 5 - -#define IXGBE_API_VER_1_0 0 -#define IXGBE_API_VER_2_0 1 /* Solaris API. Not supported. */ -#define IXGBE_API_VER_1_1 2 -#define IXGBE_API_VER_UNKNOWN UINT16_MAX - -enum ixgbe_iov_mode { - IXGBE_64_VM, - IXGBE_32_VM, - IXGBE_NO_VM -}; -#endif /* PCI_IOV */ - - -/* - ***************************************************************************** - * vendor_info_array - * - * This array contains the list of Subvendor/Subdevice IDs on which the driver - * should load. - * - ***************************************************************************** - */ -typedef struct _ixgbe_vendor_info_t { - unsigned int vendor_id; - unsigned int device_id; - unsigned int subvendor_id; - unsigned int subdevice_id; - unsigned int index; -} ixgbe_vendor_info_t; - - -struct ixgbe_tx_buf { - union ixgbe_adv_tx_desc *eop; - struct mbuf *m_head; - bus_dmamap_t map; -}; - -struct ixgbe_rx_buf { - struct mbuf *buf; - struct mbuf *fmp; - bus_dmamap_t pmap; - u_int flags; -#define IXGBE_RX_COPY 0x01 - uint64_t addr; -}; - -/* - * Bus dma allocation structure used by ixgbe_dma_malloc and ixgbe_dma_free. - */ -struct ixgbe_dma_alloc { - bus_addr_t dma_paddr; - caddr_t dma_vaddr; - bus_dma_tag_t dma_tag; - bus_dmamap_t dma_map; - bus_dma_segment_t dma_seg; - bus_size_t dma_size; - int dma_nseg; -}; - -struct ixgbe_mc_addr { - u8 addr[IXGBE_ETH_LENGTH_OF_ADDRESS]; - u32 vmdq; -}; - -/* -** Driver queue struct: this is the interrupt container -** for the associated tx and rx ring. -*/ -struct ix_queue { - struct adapter *adapter; - u32 msix; /* This queue's MSIX vector */ - u32 eims; /* This queue's EIMS bit */ - u32 eitr_setting; - u32 me; - struct resource *res; - void *tag; - int busy; - struct tx_ring *txr; - struct rx_ring *rxr; - struct task que_task; - struct taskqueue *tq; - u64 irqs; -}; - -/* - * The transmit ring, one per queue - */ -struct tx_ring { - struct adapter *adapter; - struct mtx tx_mtx; - u32 me; - u32 tail; - int busy; - union ixgbe_adv_tx_desc *tx_base; - struct ixgbe_tx_buf *tx_buffers; - struct ixgbe_dma_alloc txdma; - volatile u16 tx_avail; - u16 next_avail_desc; - u16 next_to_clean; - u16 num_desc; - u32 txd_cmd; - bus_dma_tag_t txtag; - char mtx_name[16]; -#ifndef IXGBE_LEGACY_TX - struct buf_ring *br; - struct task txq_task; -#endif -#ifdef IXGBE_FDIR - u16 atr_sample; - u16 atr_count; -#endif - u32 bytes; /* used for AIM */ - u32 packets; - /* Soft Stats */ - unsigned long tso_tx; - unsigned long no_tx_map_avail; - unsigned long no_tx_dma_setup; - u64 no_desc_avail; - u64 total_packets; -}; - - -/* - * The Receive ring, one per rx queue - */ -struct rx_ring { - struct adapter *adapter; - struct mtx rx_mtx; - u32 me; - u32 tail; - union ixgbe_adv_rx_desc *rx_base; - struct ixgbe_dma_alloc rxdma; - struct lro_ctrl lro; - bool lro_enabled; - bool hw_rsc; - bool vtag_strip; - u16 next_to_refresh; - u16 next_to_check; - u16 num_desc; - u16 mbuf_sz; - char mtx_name[16]; - struct ixgbe_rx_buf *rx_buffers; - bus_dma_tag_t ptag; - - u32 bytes; /* Used for AIM calc */ - u32 packets; - - /* Soft stats */ - u64 rx_irq; - u64 rx_copies; - u64 rx_packets; - u64 rx_bytes; - u64 rx_discarded; - u64 rsc_num; -#ifdef IXGBE_FDIR - u64 flm; +#ifndef KLD_MODULE +#include "opt_iflib.h" #endif -}; -#ifdef PCI_IOV -#define IXGBE_VF_CTS (1 << 0) /* VF is clear to send. */ -#define IXGBE_VF_CAP_MAC (1 << 1) /* VF is permitted to change MAC. */ -#define IXGBE_VF_CAP_VLAN (1 << 2) /* VF is permitted to join vlans. */ -#define IXGBE_VF_ACTIVE (1 << 3) /* VF is active. */ - -#define IXGBE_MAX_VF_MC 30 /* Max number of multicast entries */ - -struct ixgbe_vf { - u_int pool; - u_int rar_index; - u_int max_frame_size; - uint32_t flags; - uint8_t ether_addr[ETHER_ADDR_LEN]; - uint16_t mc_hash[IXGBE_MAX_VF_MC]; - uint16_t num_mc_hashes; - uint16_t default_vlan; - uint16_t vlan_tag; - uint16_t api_ver; -}; -#endif /* PCI_IOV */ - -/* Our adapter structure */ -struct adapter { - struct ixgbe_hw hw; - struct ixgbe_osdep osdep; - - device_t dev; - struct ifnet *ifp; - - struct resource *pci_mem; - struct resource *msix_mem; - - /* - * Interrupt resources: this set is - * either used for legacy, or for Link - * when doing MSIX - */ - void *tag; - struct resource *res; - - struct ifmedia media; - struct callout timer; - int msix; - int if_flags; - - struct mtx core_mtx; - - eventhandler_tag vlan_attach; - eventhandler_tag vlan_detach; - - u16 num_vlans; - u16 num_queues; - - /* - ** Shadow VFTA table, this is needed because - ** the real vlan filter table gets cleared during - ** a soft reset and the driver needs to be able - ** to repopulate it. - */ - u32 shadow_vfta[IXGBE_VFTA_SIZE]; - - /* Info about the interface */ - u32 optics; - u32 fc; /* local flow ctrl setting */ - int advertise; /* link speeds */ - bool enable_aim; /* adaptive interrupt moderation */ - bool link_active; - u16 max_frame_size; - u16 num_segs; - u32 link_speed; - bool link_up; - u32 vector; - u16 dmac; - bool eee_enabled; - u32 phy_layer; - - /* Power management-related */ - bool wol_support; - u32 wufc; - - /* Mbuf cluster size */ - u32 rx_mbuf_sz; - - /* Support for pluggable optics */ - bool sfp_probe; - struct task link_task; /* Link tasklet */ - struct task mod_task; /* SFP tasklet */ - struct task msf_task; /* Multispeed Fiber */ -#ifdef PCI_IOV - struct task mbx_task; /* VF -> PF mailbox interrupt */ -#endif /* PCI_IOV */ -#ifdef IXGBE_FDIR - int fdir_reinit; - struct task fdir_task; -#endif - struct task phy_task; /* PHY intr tasklet */ - struct taskqueue *tq; - - /* - ** Queues: - ** This is the irq holder, it has - ** and RX/TX pair or rings associated - ** with it. - */ - struct ix_queue *queues; - - /* - * Transmit rings: - * Allocated at run time, an array of rings. - */ - struct tx_ring *tx_rings; - u32 num_tx_desc; - u32 tx_process_limit; - - /* - * Receive rings: - * Allocated at run time, an array of rings. - */ - struct rx_ring *rx_rings; - u64 active_queues; - u32 num_rx_desc; - u32 rx_process_limit; - - /* Multicast array memory */ - struct ixgbe_mc_addr *mta; - int num_vfs; - int pool; -#ifdef PCI_IOV - struct ixgbe_vf *vfs; -#endif -#ifdef DEV_NETMAP - void (*init_locked)(struct adapter *); - void (*stop_locked)(void *); -#endif - - /* Misc stats maintained by the driver */ - unsigned long dropped_pkts; - unsigned long mbuf_defrag_failed; - unsigned long mbuf_header_failed; - unsigned long mbuf_packet_failed; - unsigned long watchdog_events; - unsigned long link_irq; - union { - struct ixgbe_hw_stats pf; - struct ixgbevf_hw_stats vf; - } stats; -#if __FreeBSD_version >= 1100036 - /* counter(9) stats */ - u64 ipackets; - u64 ierrors; - u64 opackets; - u64 oerrors; - u64 ibytes; - u64 obytes; - u64 imcasts; - u64 omcasts; - u64 iqdrops; - u64 noproto; -#endif -}; - - -/* Precision Time Sync (IEEE 1588) defines */ -#define ETHERTYPE_IEEE1588 0x88F7 -#define PICOSECS_PER_TICK 20833 -#define TSYNC_UDP_PORT 319 /* UDP port for the protocol */ -#define IXGBE_ADVTXD_TSTAMP 0x00080000 - - -#define IXGBE_CORE_LOCK_INIT(_sc, _name) \ - mtx_init(&(_sc)->core_mtx, _name, "IXGBE Core Lock", MTX_DEF) -#define IXGBE_CORE_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->core_mtx) -#define IXGBE_TX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->tx_mtx) -#define IXGBE_RX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_mtx) -#define IXGBE_CORE_LOCK(_sc) mtx_lock(&(_sc)->core_mtx) -#define IXGBE_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_mtx) -#define IXGBE_TX_TRYLOCK(_sc) mtx_trylock(&(_sc)->tx_mtx) -#define IXGBE_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_mtx) -#define IXGBE_CORE_UNLOCK(_sc) mtx_unlock(&(_sc)->core_mtx) -#define IXGBE_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_mtx) -#define IXGBE_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_mtx) -#define IXGBE_CORE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->core_mtx, MA_OWNED) -#define IXGBE_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_mtx, MA_OWNED) - -/* For backward compatibility */ -#if !defined(PCIER_LINK_STA) -#define PCIER_LINK_STA PCIR_EXPRESS_LINK_STA -#endif - -/* Stats macros */ -#if __FreeBSD_version >= 1100036 -#define IXGBE_SET_IPACKETS(sc, count) (sc)->ipackets = (count) -#define IXGBE_SET_IERRORS(sc, count) (sc)->ierrors = (count) -#define IXGBE_SET_OPACKETS(sc, count) (sc)->opackets = (count) -#define IXGBE_SET_OERRORS(sc, count) (sc)->oerrors = (count) -#define IXGBE_SET_COLLISIONS(sc, count) -#define IXGBE_SET_IBYTES(sc, count) (sc)->ibytes = (count) -#define IXGBE_SET_OBYTES(sc, count) (sc)->obytes = (count) -#define IXGBE_SET_IMCASTS(sc, count) (sc)->imcasts = (count) -#define IXGBE_SET_OMCASTS(sc, count) (sc)->omcasts = (count) -#define IXGBE_SET_IQDROPS(sc, count) (sc)->iqdrops = (count) +#ifdef IFLIB +#include #else -#define IXGBE_SET_IPACKETS(sc, count) (sc)->ifp->if_ipackets = (count) -#define IXGBE_SET_IERRORS(sc, count) (sc)->ifp->if_ierrors = (count) -#define IXGBE_SET_OPACKETS(sc, count) (sc)->ifp->if_opackets = (count) -#define IXGBE_SET_OERRORS(sc, count) (sc)->ifp->if_oerrors = (count) -#define IXGBE_SET_COLLISIONS(sc, count) (sc)->ifp->if_collisions = (count) -#define IXGBE_SET_IBYTES(sc, count) (sc)->ifp->if_ibytes = (count) -#define IXGBE_SET_OBYTES(sc, count) (sc)->ifp->if_obytes = (count) -#define IXGBE_SET_IMCASTS(sc, count) (sc)->ifp->if_imcasts = (count) -#define IXGBE_SET_OMCASTS(sc, count) (sc)->ifp->if_omcasts = (count) -#define IXGBE_SET_IQDROPS(sc, count) (sc)->ifp->if_iqdrops = (count) +#include #endif - -/* External PHY register addresses */ -#define IXGBE_PHY_CURRENT_TEMP 0xC820 -#define IXGBE_PHY_OVERTEMP_STATUS 0xC830 - -/* Sysctl help messages; displayed with sysctl -d */ -#define IXGBE_SYSCTL_DESC_ADV_SPEED \ - "\nControl advertised link speed using these flags:\n" \ - "\t0x1 - advertise 100M\n" \ - "\t0x2 - advertise 1G\n" \ - "\t0x4 - advertise 10G\n\n" \ - "\t100M is only supported on certain 10GBaseT adapters.\n" - -#define IXGBE_SYSCTL_DESC_SET_FC \ - "\nSet flow control mode using these values:\n" \ - "\t0 - off\n" \ - "\t1 - rx pause\n" \ - "\t2 - tx pause\n" \ - "\t3 - tx and rx pause" - -static inline bool -ixgbe_is_sfp(struct ixgbe_hw *hw) -{ - switch (hw->phy.type) { - case ixgbe_phy_sfp_avago: - case ixgbe_phy_sfp_ftl: - case ixgbe_phy_sfp_intel: - case ixgbe_phy_sfp_unknown: - case ixgbe_phy_sfp_passive_tyco: - case ixgbe_phy_sfp_passive_unknown: - case ixgbe_phy_qsfp_passive_unknown: - case ixgbe_phy_qsfp_active_unknown: - case ixgbe_phy_qsfp_intel: - case ixgbe_phy_qsfp_unknown: - return TRUE; - default: - return FALSE; - } -} - -/* Workaround to make 8.0 buildable */ -#if __FreeBSD_version >= 800000 && __FreeBSD_version < 800504 -static __inline int -drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br) -{ -#ifdef ALTQ - if (ALTQ_IS_ENABLED(&ifp->if_snd)) - return (1); -#endif - return (!buf_ring_empty(br)); -} -#endif - -/* -** Find the number of unrefreshed RX descriptors -*/ -static inline u16 -ixgbe_rx_unrefreshed(struct rx_ring *rxr) -{ - if (rxr->next_to_check > rxr->next_to_refresh) - return (rxr->next_to_check - rxr->next_to_refresh - 1); - else - return ((rxr->num_desc + rxr->next_to_check) - - rxr->next_to_refresh - 1); -} - -/* -** This checks for a zero mac addr, something that will be likely -** unless the Admin on the Host has created one. -*/ -static inline bool -ixv_check_ether_addr(u8 *addr) -{ - bool status = TRUE; - - if ((addr[0] == 0 && addr[1]== 0 && addr[2] == 0 && - addr[3] == 0 && addr[4]== 0 && addr[5] == 0)) - status = FALSE; - return (status); -} - -/* Shared Prototypes */ - -#ifdef IXGBE_LEGACY_TX -void ixgbe_start(struct ifnet *); -void ixgbe_start_locked(struct tx_ring *, struct ifnet *); -#else /* ! IXGBE_LEGACY_TX */ -int ixgbe_mq_start(struct ifnet *, struct mbuf *); -int ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *); -void ixgbe_qflush(struct ifnet *); -void ixgbe_deferred_mq_start(void *, int); -#endif /* IXGBE_LEGACY_TX */ - -int ixgbe_allocate_queues(struct adapter *); -int ixgbe_allocate_transmit_buffers(struct tx_ring *); -int ixgbe_setup_transmit_structures(struct adapter *); -void ixgbe_free_transmit_structures(struct adapter *); -int ixgbe_allocate_receive_buffers(struct rx_ring *); -int ixgbe_setup_receive_structures(struct adapter *); -void ixgbe_free_receive_structures(struct adapter *); -void ixgbe_txeof(struct tx_ring *); -bool ixgbe_rxeof(struct ix_queue *); - -int ixgbe_dma_malloc(struct adapter *, - bus_size_t, struct ixgbe_dma_alloc *, int); -void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *); - -#ifdef PCI_IOV - -static inline boolean_t -ixgbe_vf_mac_changed(struct ixgbe_vf *vf, const uint8_t *mac) -{ - return (bcmp(mac, vf->ether_addr, ETHER_ADDR_LEN) != 0); -} - -static inline void -ixgbe_send_vf_msg(struct adapter *adapter, struct ixgbe_vf *vf, u32 msg) -{ - - if (vf->flags & IXGBE_VF_CTS) - msg |= IXGBE_VT_MSGTYPE_CTS; - - ixgbe_write_mbx(&adapter->hw, &msg, 1, vf->pool); -} - -static inline void -ixgbe_send_vf_ack(struct adapter *adapter, struct ixgbe_vf *vf, u32 msg) -{ - msg &= IXGBE_VT_MSG_MASK; - ixgbe_send_vf_msg(adapter, vf, msg | IXGBE_VT_MSGTYPE_ACK); -} - -static inline void -ixgbe_send_vf_nack(struct adapter *adapter, struct ixgbe_vf *vf, u32 msg) -{ - msg &= IXGBE_VT_MSG_MASK; - ixgbe_send_vf_msg(adapter, vf, msg | IXGBE_VT_MSGTYPE_NACK); -} - -static inline void -ixgbe_process_vf_ack(struct adapter *adapter, struct ixgbe_vf *vf) -{ - if (!(vf->flags & IXGBE_VF_CTS)) - ixgbe_send_vf_nack(adapter, vf, 0); -} - -static inline enum ixgbe_iov_mode -ixgbe_get_iov_mode(struct adapter *adapter) -{ - if (adapter->num_vfs == 0) - return (IXGBE_NO_VM); - if (adapter->num_queues <= 2) - return (IXGBE_64_VM); - else if (adapter->num_queues <= 4) - return (IXGBE_32_VM); - else - return (IXGBE_NO_VM); -} - -static inline u16 -ixgbe_max_vfs(enum ixgbe_iov_mode mode) -{ - /* - * We return odd numbers below because we - * reserve 1 VM's worth of queues for the PF. - */ - switch (mode) { - case IXGBE_64_VM: - return (63); - case IXGBE_32_VM: - return (31); - case IXGBE_NO_VM: - default: - return (0); - } -} - -static inline int -ixgbe_vf_queues(enum ixgbe_iov_mode mode) -{ - switch (mode) { - case IXGBE_64_VM: - return (2); - case IXGBE_32_VM: - return (4); - case IXGBE_NO_VM: - default: - return (0); - } -} - -static inline int -ixgbe_vf_que_index(enum ixgbe_iov_mode mode, u32 vfnum, int num) -{ - return ((vfnum * ixgbe_vf_queues(mode)) + num); -} - -static inline int -ixgbe_pf_que_index(enum ixgbe_iov_mode mode, int num) -{ - return (ixgbe_vf_que_index(mode, ixgbe_max_vfs(mode), num)); -} - -static inline void -ixgbe_update_max_frame(struct adapter * adapter, int max_frame) -{ - if (adapter->max_frame_size < max_frame) - adapter->max_frame_size = max_frame; -} - -static inline u32 -ixgbe_get_mrqc(enum ixgbe_iov_mode mode) -{ - u32 mrqc = 0; - switch (mode) { - case IXGBE_64_VM: - mrqc = IXGBE_MRQC_VMDQRSS64EN; - break; - case IXGBE_32_VM: - mrqc = IXGBE_MRQC_VMDQRSS32EN; - break; - case IXGBE_NO_VM: - mrqc = 0; - break; - default: - panic("Unexpected SR-IOV mode %d", mode); - } - return(mrqc); -} - - -static inline u32 -ixgbe_get_mtqc(enum ixgbe_iov_mode mode) -{ - uint32_t mtqc = 0; - switch (mode) { - case IXGBE_64_VM: - mtqc |= IXGBE_MTQC_64VF | IXGBE_MTQC_VT_ENA; - break; - case IXGBE_32_VM: - mtqc |= IXGBE_MTQC_32VF | IXGBE_MTQC_VT_ENA; - break; - case IXGBE_NO_VM: - mtqc = IXGBE_MTQC_64Q_1PB; - break; - default: - panic("Unexpected SR-IOV mode %d", mode); - } - return(mtqc); -} -#endif /* PCI_IOV */ - -#endif /* _IXGBE_H_ */ Index: sys/dev/ixgbe/ixgbe_sysctl.c =================================================================== --- /dev/null +++ sys/dev/ixgbe/ixgbe_sysctl.c @@ -0,0 +1,559 @@ +/*- + * Copyright (c) 2016, Matthew Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Neither the name of Matthew Macy nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef KLD_MODULE +#include "opt_iflib.h" +#endif + +#ifdef IFLIB +#include "ixgbe.h" +/******************************************************************** + * + * Dump Registers + * + ********************************************************************/ +#define IXGBE_REGS_LEN 1139 +extern if_shared_ctx_t ixgbe_sctx; + +int ixgbe_get_regs(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; +#if 0 + struct ix_tx_queue *tx_que = &adapter->tx_queues[0]; + struct ix_rx_queue *rx_que = &adapter->rx_queues[0]; + struct rx_ring *rxr = &rx_que->rxr; + struct tx_ring *txr = &tx_que->txr; + int ntxd = ixgbe_sctx->isc_ntxd[0]; + int nrxd = ixgbe_sctx->isc_nrxd[0]; + int j; +#endif + + struct sbuf *sb; + u32 *regs_buff = (u32 *)malloc(sizeof(u32) * IXGBE_REGS_LEN, M_DEVBUF, M_NOWAIT); + int i; + int rc; + + memset(regs_buff, 0, IXGBE_REGS_LEN * sizeof(u32)); + + rc = sysctl_wire_old_buffer(req, 0); + MPASS(rc == 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 32*PAGE_SIZE, req); + MPASS(sb != NULL); + if (sb == NULL) + return (ENOMEM); + + /* General Registers */ + sbuf_printf(sb, "General Registers\n"); + regs_buff[0] = IXGBE_READ_REG(hw, IXGBE_CTRL); + sbuf_printf(sb, "\tIXGBE_CTRL\t %08x\n", regs_buff[0]); + regs_buff[1] = IXGBE_READ_REG(hw, IXGBE_STATUS); + sbuf_printf(sb, "\tIXGBE_STATUS\t %08x\n", regs_buff[1]); + regs_buff[2] = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); + sbuf_printf(sb, "\tIXGBE_CTRL_EXT\t %08x\n", regs_buff[2]); + regs_buff[3] = IXGBE_READ_REG(hw, IXGBE_ESDP); + sbuf_printf(sb, "\tIXGBE_ESDP\t %08x\n", regs_buff[3]); + regs_buff[4] = IXGBE_READ_REG(hw, IXGBE_EODSDP); + sbuf_printf(sb, "\tIXGBE_EODSDP\t %08x\n", regs_buff[4]); + regs_buff[5] = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + sbuf_printf(sb, "\tIXGBE_LEDCTL\t %08x\n", regs_buff[5]); + regs_buff[6] = IXGBE_READ_REG(hw, IXGBE_FRTIMER); + sbuf_printf(sb, "\tIXGBE_FRTIMER\t %08x\n", regs_buff[6]); + regs_buff[7] = IXGBE_READ_REG(hw, IXGBE_TCPTIMER); + sbuf_printf(sb, "\tIXGBE_TCPTIMER\t %08x\n\n", regs_buff[7]); + + /* Interrupt */ + /* don't read EICR because it can clear interrupt causes, instead + * read EICS which is a shadow but doesn't clear EICR */ + sbuf_printf(sb, "Interrupt\n"); + regs_buff[18] = IXGBE_READ_REG(hw, IXGBE_EICS); + sbuf_printf(sb, "\tIXGBE_EICS\t %08x\n", regs_buff[18]); + regs_buff[19] = IXGBE_READ_REG(hw, IXGBE_EICS); + sbuf_printf(sb, "\tIXGBE_EICS\t %08x\n", regs_buff[19]); + regs_buff[20] = IXGBE_READ_REG(hw, IXGBE_EIMS); + sbuf_printf(sb, "\tIXGBE_EIMS\t %08x\n", regs_buff[20]); + regs_buff[21] = IXGBE_READ_REG(hw, IXGBE_EIMC); + sbuf_printf(sb, "\tIXGBE_EIMC\t %08x\n", regs_buff[21]); + regs_buff[22] = IXGBE_READ_REG(hw, IXGBE_EIAC); + sbuf_printf(sb, "\tIXGBE_EIAC\t %08x\n", regs_buff[22]); + regs_buff[23] = IXGBE_READ_REG(hw, IXGBE_EIAM); + sbuf_printf(sb, "\tIXGBE_EIAM\t %08x\n", regs_buff[23]); + regs_buff[24] = IXGBE_READ_REG(hw, IXGBE_EITR(0)); + sbuf_printf(sb, "\tIXGBE_EITR(0)\t %08x\n", regs_buff[24]); + regs_buff[25] = IXGBE_READ_REG(hw, IXGBE_IVAR(0)); + sbuf_printf(sb, "\tIXGBE_IVAR(0)\t %08x\n", regs_buff[25]); + regs_buff[26] = IXGBE_READ_REG(hw, IXGBE_MSIXT); + sbuf_printf(sb, "\tIXGBE_MSIXT\t %08x\n", regs_buff[26]); + regs_buff[27] = IXGBE_READ_REG(hw, IXGBE_MSIXPBA); + sbuf_printf(sb, "\tIXGBE_MSIXPBA\t %08x\n", regs_buff[27]); + regs_buff[28] = IXGBE_READ_REG(hw, IXGBE_PBACL(0)); + sbuf_printf(sb, "\tIXGBE_PBACL(0)\t %08x\n", regs_buff[28]); + regs_buff[29] = IXGBE_READ_REG(hw, IXGBE_GPIE); + sbuf_printf(sb, "\tIXGBE_GPIE\t %08x\n", regs_buff[29]); + + /* Flow Control */ + sbuf_printf(sb, "\nFlow Control\n"); + regs_buff[30] = IXGBE_READ_REG(hw, IXGBE_PFCTOP); + sbuf_printf(sb, "\tIXGBE_PFCTOP\t %08x\n", regs_buff[30]); + regs_buff[31] = IXGBE_READ_REG(hw, IXGBE_FCTTV(0)); + sbuf_printf(sb, "\tIXGBE_FCTTV(0)\t %08x\n", regs_buff[31]); + regs_buff[32] = IXGBE_READ_REG(hw, IXGBE_FCTTV(1)); + sbuf_printf(sb, "\tIXGBE_FCTTV(1)\t %08x\n", regs_buff[32]); + regs_buff[33] = IXGBE_READ_REG(hw, IXGBE_FCTTV(2)); + sbuf_printf(sb, "\tIXGBE_FCTTV(2)\t %08x\n", regs_buff[33]); + regs_buff[34] = IXGBE_READ_REG(hw, IXGBE_FCTTV(3)); + sbuf_printf(sb, "\tIXGBE_FCTTV(3)\t %08x\n", regs_buff[34]); + + for (i = 0; i < adapter->num_tx_queues; i++) { + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + regs_buff[35 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTL(i)); + sbuf_printf(sb, "\tIXGBE_FCRTL(%2d)\t %08x\n", i, regs_buff[35+i]); + regs_buff[43 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTH(i)); + sbuf_printf(sb, "\tIXGBE_FCRTH(%2d)\t %08x\n", i, regs_buff[43+i]); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + regs_buff[35 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTL_82599(i)); + regs_buff[43 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTH_82599(i)); + break; + default: + break; + } + } + regs_buff[51] = IXGBE_READ_REG(hw, IXGBE_FCRTV); + sbuf_printf(sb, "\tIXGBE_FCRTV\t %08x\n", regs_buff[51]); + regs_buff[52] = IXGBE_READ_REG(hw, IXGBE_TFCS); + sbuf_printf(sb, "\tIXGBE_TFCS\t %08x\n", regs_buff[52]); + + /* Receive DMA */ + sbuf_printf(sb, "\nReceive DMA\n"); + for (i = 0; i < adapter->num_rx_queues; i++) { + regs_buff[53 + i] = IXGBE_READ_REG(hw, IXGBE_RDBAL(i)); + sbuf_printf(sb, "\tIXGBE_RDBAL(%2d)\t %08x\n", i, regs_buff[53+i]); + } + for (i = 0; i < adapter->num_rx_queues; i++) { + regs_buff[117 + i] = IXGBE_READ_REG(hw, IXGBE_RDBAH(i)); + sbuf_printf(sb, "\tIXGBE_RDBAH(%2d)\t %08x\n", i, regs_buff[117+i]); + } + for (i = 0; i < adapter->num_rx_queues; i++) { + regs_buff[181 + i] = IXGBE_READ_REG(hw, IXGBE_RDLEN(i)); + sbuf_printf(sb, "\tIXGBE_RDLEN(%2d)\t %08x\n", i, regs_buff[181+i]); + } + for (i = 0; i < adapter->num_rx_queues; i++) { + regs_buff[245 + i] = IXGBE_READ_REG(hw, IXGBE_RDH(i)); + sbuf_printf(sb, "\tIXGBE_RDH(%2d)\t %08x\n", i, regs_buff[245+i]); + } + for (i = 0; i < adapter->num_rx_queues; i++) { + regs_buff[309 + i] = IXGBE_READ_REG(hw, IXGBE_RDT(i)); + sbuf_printf(sb, "\tIXGBE_RDT(%2d)\t %08x\n", i, regs_buff[309+i]); + } + for (i = 0; i < adapter->num_rx_queues; i++) { + regs_buff[373 + i] = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); + sbuf_printf(sb, "\tIXGBE_RXDCTL(%2d)\t %08x\n", i, regs_buff[373+i]); + } + for (i = 0; i < adapter->num_rx_queues; i++) { + regs_buff[437 + i] = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); + sbuf_printf(sb, "\tIXGBE_SRRCTL(%2d)\t %08x\n", i, regs_buff[437+i]); + } + for (i = 0; i < adapter->num_rx_queues; i++) { + regs_buff[453 + i] = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); + sbuf_printf(sb, "\tIXGBE_DCA_RXCTRL(%2d)\t %08x\n", i, regs_buff[453+i]); + } + regs_buff[469] = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); + sbuf_printf(sb, "\tIXGBE_RDRXCTL\t %08x\n", regs_buff[469]); + + for (i = 0; i < adapter->num_rx_queues; i++) { + regs_buff[470 + i] = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)); + sbuf_printf(sb, "\tIXGBE_RXPBSIZE(%2d)\t %08x\n", i, regs_buff[470+i]); + } + regs_buff[478] = IXGBE_READ_REG(hw, IXGBE_RXCTRL); + sbuf_printf(sb, "\tIXGBE_RXCTRL\t %08x\n", regs_buff[478]); + regs_buff[479] = IXGBE_READ_REG(hw, IXGBE_DROPEN); + sbuf_printf(sb, "\tIXGBE_DROPEN\t %08x\n", regs_buff[479]); + + /* Receive */ + sbuf_printf(sb, "\nReceive\n"); + regs_buff[480] = IXGBE_READ_REG(hw, IXGBE_RXCSUM); + sbuf_printf(sb, "\tIXGBE_RXCSUM\t %08x\n", regs_buff[480]); + regs_buff[481] = IXGBE_READ_REG(hw, IXGBE_RFCTL); + sbuf_printf(sb, "\tIXGBE_RFCTL\t %08x\n", regs_buff[481]); + + for (i = 0; i < adapter->num_rx_queues; i++) { + regs_buff[482 + i] = IXGBE_READ_REG(hw, IXGBE_RAL(i)); + sbuf_printf(sb, "\tIXGBE_RAL(%2d)\t %08x\n", i, regs_buff[482+i]); + } + for (i = 0; i < adapter->num_rx_queues; i++) { + regs_buff[498 + i] = IXGBE_READ_REG(hw, IXGBE_RAH(i)); + sbuf_printf(sb, "\tIXGBE_RAH(%2d)\t %08x\n", i, regs_buff[498+i]); + } + regs_buff[514] = IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)); + sbuf_printf(sb, "\tIXGBE_PSRTYPE\t %08x\n", regs_buff[514]); + regs_buff[515] = IXGBE_READ_REG(hw, IXGBE_FCTRL); + sbuf_printf(sb, "\tIXGBE_FCTRL\t %08x\n", regs_buff[515]); + regs_buff[516] = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + sbuf_printf(sb, "\tIXGBE_VLNCTRL\t %08x\n", regs_buff[516]); + regs_buff[517] = IXGBE_READ_REG(hw, IXGBE_MCSTCTRL); + sbuf_printf(sb, "\tIXGBE_MCSTCTRL\t %08x\n", regs_buff[517]); + regs_buff[518] = IXGBE_READ_REG(hw, IXGBE_MRQC); + sbuf_printf(sb, "\tIXGBE_MRQC\t %08x\n", regs_buff[518]); + regs_buff[519] = IXGBE_READ_REG(hw, IXGBE_VMD_CTL); + sbuf_printf(sb, "\tIXGBE_VMD_CTL\t %08x\n", regs_buff[519]); + + for (i = 0; i < adapter->num_rx_queues; i++) { + regs_buff[520 + i] = IXGBE_READ_REG(hw, IXGBE_IMIR(i)); + sbuf_printf(sb, "\tIXGBE_IMIR(%2d)\t %08x\n", i, regs_buff[520+i]); + } + for (i = 0; i < adapter->num_rx_queues; i++) { + regs_buff[528 + i] = IXGBE_READ_REG(hw, IXGBE_IMIREXT(i)); + sbuf_printf(sb, "\tIXGBE_IMIREXT(%2d)\t %08x\n", i, regs_buff[528+i]); + } + regs_buff[536] = IXGBE_READ_REG(hw, IXGBE_IMIRVP); + sbuf_printf(sb, "\tIXGBE_IMIRVP\t %08x\n", regs_buff[536]); + + /* Transmit */ + sbuf_printf(sb, "\nTransmit\n"); + for (i = 0; i < adapter->num_tx_queues; i++) { + regs_buff[537 + i] = IXGBE_READ_REG(hw, IXGBE_TDBAL(i)); + sbuf_printf(sb, "\tIXGBE_TDBAL(%2d)\t %08x\n", i, regs_buff[537+i]); + } + + for (i = 0; i < adapter->num_tx_queues; i++) { + regs_buff[569 + i] = IXGBE_READ_REG(hw, IXGBE_TDBAH(i)); + sbuf_printf(sb, "\tIXGBE_TDBAH(%2d)\t %08x\n", i, regs_buff[569+i]); + } + for (i = 0; i < adapter->num_tx_queues; i++) { + regs_buff[601 + i] = IXGBE_READ_REG(hw, IXGBE_TDLEN(i)); + sbuf_printf(sb, "\tIXGBE_TDLEN(%2d)\t %08x\n", i, regs_buff[601+i]); + } + for (i = 0; i < adapter->num_tx_queues; i++) { + regs_buff[633 + i] = IXGBE_READ_REG(hw, IXGBE_TDH(i)); + sbuf_printf(sb, "\tIXGBE_TDH(%2d)\t %08x\n", i, regs_buff[633+i]); + } + for (i = 0; i < adapter->num_tx_queues; i++){ + regs_buff[665 + i] = IXGBE_READ_REG(hw, IXGBE_TDT(i)); + sbuf_printf(sb, "\tIXGBE_TDT(%2d)\t %08x\n", i, regs_buff[665+i]); + } + for (i = 0; i < adapter->num_tx_queues; i++) { + regs_buff[697 + i] = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i)); + sbuf_printf(sb, "\tIXGBE_TXDCTL(%2d)\t %08x\n", i, regs_buff[697+i]); + } + for (i = 0; i < adapter->num_tx_queues; i++) { + regs_buff[729 + i] = IXGBE_READ_REG(hw, IXGBE_TDWBAL(i)); + sbuf_printf(sb, "\tIXGBE_TDWBAL(%2d)\t %08x\n", i, regs_buff[729+i]); + } + for (i = 0; i < adapter->num_tx_queues; i++) { + regs_buff[761 + i] = IXGBE_READ_REG(hw, IXGBE_TDWBAH(i)); + sbuf_printf(sb, "\tIXGBE_TDWBAH(%2d)\t %08x\n", i, regs_buff[761+i]); + } + regs_buff[793] = IXGBE_READ_REG(hw, IXGBE_DTXCTL); + sbuf_printf(sb, "\tIXGBE_DTXCTL\t %08x\n", regs_buff[793]); + for (i = 0; i < adapter->num_tx_queues; i++) { + regs_buff[794 + i] = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i)); + sbuf_printf(sb, "\tIXGBE_DCA_TXCTRL(%2d)\t %08x\n", i, regs_buff[794+i]); + } + regs_buff[810] = IXGBE_READ_REG(hw, IXGBE_TIPG); + sbuf_printf(sb, "\tIXGBE_TIPG\t %08x\n", regs_buff[810]); + + for (i = 0; i < adapter->num_tx_queues; i++) { + regs_buff[811 + i] = IXGBE_READ_REG(hw, IXGBE_TXPBSIZE(i)); + sbuf_printf(sb, "\tIXGBE_TXPBSIZE(%2d)\t %08x\n", i, regs_buff[811+i]); + } + regs_buff[819] = IXGBE_READ_REG(hw, IXGBE_MNGTXMAP); + sbuf_printf(sb, "\tIXGBE_MNGTXMAP\t %08x\n", regs_buff[819]); + + /* Wake Up */ + sbuf_printf(sb, "\nWake Up\n"); + regs_buff[820] = IXGBE_READ_REG(hw, IXGBE_WUC); + sbuf_printf(sb, "\tIXGBE_WUC\t %08x\n", regs_buff[820]); + regs_buff[821] = IXGBE_READ_REG(hw, IXGBE_WUFC); + sbuf_printf(sb, "\tIXGBE_WUFC\t %08x\n", regs_buff[821]); + regs_buff[822] = IXGBE_READ_REG(hw, IXGBE_WUS); + sbuf_printf(sb, "\tIXGBE_WUS\t %08x\n", regs_buff[822]); + regs_buff[823] = IXGBE_READ_REG(hw, IXGBE_IPAV); + sbuf_printf(sb, "\tIXGBE_IPAV\t %08x\n", regs_buff[823]); + regs_buff[824] = IXGBE_READ_REG(hw, IXGBE_IP4AT); + sbuf_printf(sb, "\tIXGBE_IP4AT\t %08x\n", regs_buff[824]); + regs_buff[825] = IXGBE_READ_REG(hw, IXGBE_IP6AT); + sbuf_printf(sb, "\tIXGBE_IP6AT\t %08x\n", regs_buff[825]); + regs_buff[826] = IXGBE_READ_REG(hw, IXGBE_WUPL); + sbuf_printf(sb, "\tIXGBE_WUPL\t %08x\n", regs_buff[826]); + regs_buff[827] = IXGBE_READ_REG(hw, IXGBE_WUPM); + sbuf_printf(sb, "\tIXGBE_WUPM\t %08x\n", regs_buff[827]); + regs_buff[828] = IXGBE_READ_REG(hw, IXGBE_FHFT(0)); + sbuf_printf(sb, "\tIXGBE_FHFT\t %08x\n", regs_buff[828]); + + /* DCB */ + sbuf_printf(sb, "\nDCB\n"); + regs_buff[829] = IXGBE_READ_REG(hw, IXGBE_RMCS); /* same as FCCFG */ + sbuf_printf(sb, "\tIXGBE_RMCS\t %08x\n", regs_buff[829]); + regs_buff[831] = IXGBE_READ_REG(hw, IXGBE_PDPMCS); /* same as RTTPCS */ + sbuf_printf(sb, "\tIXGBE_PDPMCS\t %08x\n", regs_buff[831]); + + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + regs_buff[830] = IXGBE_READ_REG(hw, IXGBE_DPMCS); + sbuf_printf(sb, "\tIXGBE_DPMCS\t %08x\n", regs_buff[830]); + regs_buff[832] = IXGBE_READ_REG(hw, IXGBE_RUPPBMR); + sbuf_printf(sb, "\tIXGBE_RUPPBMR\t %08x\n", regs_buff[832]); + for (i = 0; i < adapter->num_tx_queues; i++) { + regs_buff[833 + i] = IXGBE_READ_REG(hw, IXGBE_RT2CR(i)); + sbuf_printf(sb, "\tIXGBE_RT2CR(%2d)\t %08x\n", i, regs_buff[833+i]); + } + for (i = 0; i < adapter->num_tx_queues; i++) { + regs_buff[841 + i] = IXGBE_READ_REG(hw, IXGBE_RT2SR(i)); + sbuf_printf(sb, "\tIXGBE_RT2SR(%2d)\t %08x\n", i, regs_buff[841+i]); + } + for (i = 0; i < adapter->num_tx_queues; i++) { + regs_buff[849 + i] = IXGBE_READ_REG(hw, IXGBE_TDTQ2TCCR(i)); + sbuf_printf(sb, "\tIXGBE_TDTQ2TCCR(%2d)\t %08x\n", i, regs_buff[849+i]); + } + for (i = 0; i < adapter->num_tx_queues; i++) { + regs_buff[857 + i] = IXGBE_READ_REG(hw, IXGBE_TDTQ2TCSR(i)); + sbuf_printf(sb, "\tIXGBE_TDTQ2TCSR(%2d)\t %08x\n", i, regs_buff[857+i]); + } + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + regs_buff[830] = IXGBE_READ_REG(hw, IXGBE_RTTDCS); + regs_buff[832] = IXGBE_READ_REG(hw, IXGBE_RTRPCS); + for (i = 0; i < adapter->num_tx_queues; i++) + regs_buff[833 + i] = + IXGBE_READ_REG(hw, IXGBE_RTRPT4C(i)); + for (i = 0; i < adapter->num_tx_queues; i++) + regs_buff[841 + i] = + IXGBE_READ_REG(hw, IXGBE_RTRPT4S(i)); + for (i = 0; i < adapter->num_tx_queues; i++) + regs_buff[849 + i] = + IXGBE_READ_REG(hw, IXGBE_RTTDT2C(i)); + for (i = 0; i < adapter->num_tx_queues; i++) + regs_buff[857 + i] = + IXGBE_READ_REG(hw, IXGBE_RTTDT2S(i)); + break; + default: + break; + } + for (i = 0; i < adapter->num_tx_queues; i++) { + regs_buff[865 + i] = IXGBE_READ_REG(hw, IXGBE_TDPT2TCCR(i)); /* same as RTTPT2C */ + sbuf_printf(sb, "\tIXGBE_TDPT2TCCR(%2d)\t %08x\n", i, regs_buff[865+i]); + } + for (i = 0; i < adapter->num_tx_queues; i++) { + regs_buff[873 + i] = IXGBE_READ_REG(hw, IXGBE_TDPT2TCSR(i)); /* same as RTTPT2S */ + sbuf_printf(sb, "\tIXGBE_TDPT2TCSR(%2d)\t %08x\n", i, regs_buff[873+i]); + } + /* MAC */ + sbuf_printf(sb, "\nMAC\n"); + regs_buff[1038] = IXGBE_READ_REG(hw, IXGBE_PCS1GCFIG); + sbuf_printf(sb, "\tIXGBE_PCS1GCFIG\t %08x\n", regs_buff[1038]); + regs_buff[1039] = IXGBE_READ_REG(hw, IXGBE_PCS1GLCTL); + sbuf_printf(sb, "\tIXGBE_PCSIG1CTL\t %08x\n", regs_buff[1039]); + regs_buff[1040] = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA); + sbuf_printf(sb, "\tIXGBE_PCSIGLSTA\t %08x\n", regs_buff[1040]); + regs_buff[1041] = IXGBE_READ_REG(hw, IXGBE_PCS1GDBG0); + sbuf_printf(sb, "\tIXGBE_PCS1GDBG0\t %08x\n", regs_buff[1041]); + regs_buff[1042] = IXGBE_READ_REG(hw, IXGBE_PCS1GDBG1); + sbuf_printf(sb, "\tIXGBE_PCS1GDBG1\t %08x\n", regs_buff[1042]); + regs_buff[1043] = IXGBE_READ_REG(hw, IXGBE_PCS1GANA); + sbuf_printf(sb, "\tIXGBE_PCS1GANA\t %08x\n", regs_buff[1043]); + regs_buff[1044] = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP); + sbuf_printf(sb, "\tIXGBE_PCS1GANLP\t %08x\n", regs_buff[1044]); + regs_buff[1045] = IXGBE_READ_REG(hw, IXGBE_PCS1GANNP); + sbuf_printf(sb, "\tIXGBE_PCS1GANNP\t %08x\n", regs_buff[1045]); + regs_buff[1046] = IXGBE_READ_REG(hw, IXGBE_PCS1GANLPNP); + sbuf_printf(sb, "\tIXGBE_PCS1GANLPNP\t %08x\n", regs_buff[1046]); + regs_buff[1047] = IXGBE_READ_REG(hw, IXGBE_HLREG0); + sbuf_printf(sb, "\tIXGBE_HILREG0\t %08x\n", regs_buff[1047]); + regs_buff[1048] = IXGBE_READ_REG(hw, IXGBE_HLREG1); + sbuf_printf(sb, "\tIXGBE_HILREG1\t %08x\n", regs_buff[1048]); + regs_buff[1049] = IXGBE_READ_REG(hw, IXGBE_PAP); + sbuf_printf(sb, "\tIXGBE_PAP\t %08x\n", regs_buff[1049]); + regs_buff[1050] = IXGBE_READ_REG(hw, IXGBE_MACA); + sbuf_printf(sb, "\tIXGBE_MACA\t %08x\n", regs_buff[1050]); + regs_buff[1051] = IXGBE_READ_REG(hw, IXGBE_APAE); + sbuf_printf(sb, "\tIXGBE_APAE\t %08x\n", regs_buff[1051]); + regs_buff[1052] = IXGBE_READ_REG(hw, IXGBE_ARD); + sbuf_printf(sb, "\tIXGBE_ARD\t %08x\n", regs_buff[1052]); + regs_buff[1053] = IXGBE_READ_REG(hw, IXGBE_AIS); + sbuf_printf(sb, "\tIXGBE_AIS\t %08x\n", regs_buff[1053]); + regs_buff[1054] = IXGBE_READ_REG(hw, IXGBE_MSCA); + sbuf_printf(sb, "\tIXGBE_MSCA\t %08x\n", regs_buff[1054]); + regs_buff[1055] = IXGBE_READ_REG(hw, IXGBE_MSRWD); + sbuf_printf(sb, "\tIXGBE_MSRWD\t %08x\n", regs_buff[1055]); + regs_buff[1056] = IXGBE_READ_REG(hw, IXGBE_MLADD); + sbuf_printf(sb, "\tIXGBE_MLADD\t %08x\n", regs_buff[1056]); + regs_buff[1057] = IXGBE_READ_REG(hw, IXGBE_MHADD); + sbuf_printf(sb, "\tIXGBE_MHADD\t %08x\n", regs_buff[1057]); + regs_buff[1058] = IXGBE_READ_REG(hw, IXGBE_TREG); + sbuf_printf(sb, "\tIXGBE_TREG\t %08x\n", regs_buff[1058]); + regs_buff[1059] = IXGBE_READ_REG(hw, IXGBE_PCSS1); + sbuf_printf(sb, "\tIXGBE_PCSS1\t %08x\n", regs_buff[1059]); + regs_buff[1060] = IXGBE_READ_REG(hw, IXGBE_PCSS2); + sbuf_printf(sb, "\tIXGBE_PCSS2\t %08x\n", regs_buff[1060]); + regs_buff[1061] = IXGBE_READ_REG(hw, IXGBE_XPCSS); + sbuf_printf(sb, "\tIXGBE_XPCSS\t %08x\n", regs_buff[1061]); + regs_buff[1062] = IXGBE_READ_REG(hw, IXGBE_SERDESC); + sbuf_printf(sb, "\tIXGBE_SERDESC\t %08x\n", regs_buff[1062]); + regs_buff[1063] = IXGBE_READ_REG(hw, IXGBE_MACS); + sbuf_printf(sb, "\tIXGBE_MACS\t %08x\n", regs_buff[1063]); + regs_buff[1064] = IXGBE_READ_REG(hw, IXGBE_AUTOC); + sbuf_printf(sb, "\tIXGBE_AUTOC\t %08x\n", regs_buff[1064]); + regs_buff[1065] = IXGBE_READ_REG(hw, IXGBE_LINKS); + sbuf_printf(sb, "\tIXGBE_LINKS\t %08x\n", regs_buff[1065]); + regs_buff[1066] = IXGBE_READ_REG(hw, IXGBE_AUTOC2); + sbuf_printf(sb, "\tIXGBE_AUTOC2\t %08x\n", regs_buff[1066]); + regs_buff[1067] = IXGBE_READ_REG(hw, IXGBE_AUTOC3); + sbuf_printf(sb, "\tIXGBE_AUTOC3\t %08x\n", regs_buff[1067]); + regs_buff[1068] = IXGBE_READ_REG(hw, IXGBE_ANLP1); + sbuf_printf(sb, "\tIXGBE_ANLP1\t %08x\n", regs_buff[1068]); + regs_buff[1069] = IXGBE_READ_REG(hw, IXGBE_ANLP2); + sbuf_printf(sb, "\tIXGBE_ANLP2\t %08x\n", regs_buff[1069]); + regs_buff[1070] = IXGBE_READ_REG(hw, IXGBE_ATLASCTL); + sbuf_printf(sb, "\tIXGBE_ATLASCTL\t %08x\n", regs_buff[1070]); + + /* Diagnostic */ + sbuf_printf(sb, "\nDiagnostic\n"); + regs_buff[1071] = IXGBE_READ_REG(hw, IXGBE_RDSTATCTL); + sbuf_printf(sb, "\tIXGBE_RDSTATCTL\t %08x\n", regs_buff[1071]); + for (i = 0; i < adapter->num_rx_queues; i++) { + regs_buff[1072 + i] = IXGBE_READ_REG(hw, IXGBE_RDSTAT(i)); + sbuf_printf(sb, "\tIXGBE_RDSTAT(%2d)\t %08x\n", i, regs_buff[1072+i]); + } + regs_buff[1080] = IXGBE_READ_REG(hw, IXGBE_RDHMPN); + sbuf_printf(sb, "\tIXGBE_RDHMPN\t %08x\n", regs_buff[1080]); + for (i = 0; i < 4; i++) { + regs_buff[1081 + i] = IXGBE_READ_REG(hw, IXGBE_RIC_DW(i)); + sbuf_printf(sb, "\tIXGBE_RIC_DW(%2d)\t %08x\n", i, regs_buff[1081+i]); + } + regs_buff[1085] = IXGBE_READ_REG(hw, IXGBE_RDPROBE); + sbuf_printf(sb, "\tIXGBE_RDPROBE\t %08x\n", regs_buff[1085]); + /* regs_buff[1086] = IXGBE_READ_REG(hw, IXGBE_TDSTATCTL); + sbuf_printf(sb, "\tIXGBE_TDSTATCTL\t %08x\n", regs_buff[1086]); */ + /* for (i = 0; i < adapter->num_queues; i++) { + regs_buff[1087 + i] = IXGBE_READ_REG(hw, IXGBE_TDSTAT(i)); + sbuf_printf(sb, "\n\tIXGBE_TDSTAT(%2d)\t %08x\n", i, regs_buff[1087+i]); + } */ + regs_buff[1095] = IXGBE_READ_REG(hw, IXGBE_TDHMPN); + sbuf_printf(sb, "\tIXGBE_TDHMPN\t %08x\n", regs_buff[1095]); + + for (i = 0; i < 4; i++) { + regs_buff[1096 + i] = IXGBE_READ_REG(hw, IXGBE_TIC_DW(i)); + sbuf_printf(sb, "\tIXGBE_TIC_DW(%2d)\t %08x\n", i, regs_buff[1096+i]); + } + regs_buff[1100] = IXGBE_READ_REG(hw, IXGBE_TDPROBE); + sbuf_printf(sb, "\tIXGBE_TDPROBE\t %08x\n", regs_buff[1100]); + regs_buff[1101] = IXGBE_READ_REG(hw, IXGBE_TXBUFCTRL); + sbuf_printf(sb, "\tIXGBE_TXBUFCTRL\t %08x\n", regs_buff[1101]); + regs_buff[1102] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA0); + sbuf_printf(sb, "\tIXGBE_TXBUFDATA0\t %08x\n", regs_buff[1102]); + regs_buff[1103] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA1); + sbuf_printf(sb, "\tIXGBE_TXBUFDATA1\t %08x\n", regs_buff[1103]); + regs_buff[1104] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA2); + sbuf_printf(sb, "\tIXGBE_TXBUFDATA2\t %08x\n", regs_buff[1104]); + regs_buff[1105] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA3); + sbuf_printf(sb, "\tIXGBE_TXBUFDATA3\t %08x\n", regs_buff[1105]); + regs_buff[1106] = IXGBE_READ_REG(hw, IXGBE_RXBUFCTRL); + sbuf_printf(sb, "\tIXGBE_RXBUFCTRL\t %08x\n", regs_buff[1106]); + regs_buff[1107] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA0); + sbuf_printf(sb, "\tIXGBE_RXBIFDATA0\t %08x\n", regs_buff[1107]); + regs_buff[1108] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA1); + sbuf_printf(sb, "\tIXGBE_RXBUFDATA1\t %08x\n", regs_buff[1108]); + regs_buff[1109] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA2); + sbuf_printf(sb, "\tIXGBE_RXBUFDATA2\t %08x\n", regs_buff[1109]); + regs_buff[1110] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA3); + sbuf_printf(sb, "\tIXGBE_RXBUFDATA3\t %08x\n", regs_buff[1110]); + for (i = 0; i < adapter->num_rx_queues; i++) { + regs_buff[1111 + i] = IXGBE_READ_REG(hw, IXGBE_PCIE_DIAG(i)); + sbuf_printf(sb, "\tIXGBE_PCIE_DIAG(%2d)\t %08x\n", i, regs_buff[1111+i]); + } + regs_buff[1119] = IXGBE_READ_REG(hw, IXGBE_RFVAL); + sbuf_printf(sb, "\tIXGBE_RFVAL\t %08x\n", regs_buff[1119]); + regs_buff[1120] = IXGBE_READ_REG(hw, IXGBE_MDFTC1); + sbuf_printf(sb, "\tIXGBE_MDFTC1\t %08x\n", regs_buff[1120]); + regs_buff[1121] = IXGBE_READ_REG(hw, IXGBE_MDFTC2); + sbuf_printf(sb, "\tIXGBE_MDFTC2\t %08x\n", regs_buff[1121]); + regs_buff[1122] = IXGBE_READ_REG(hw, IXGBE_MDFTFIFO1); + sbuf_printf(sb, "\tIXGBE_MDFTFIF01\t %08x\n", regs_buff[1122]); + regs_buff[1123] = IXGBE_READ_REG(hw, IXGBE_MDFTFIFO2); + sbuf_printf(sb, "\tIXGBE_MDFTFIF02\t %08x\n", regs_buff[1123]); + regs_buff[1124] = IXGBE_READ_REG(hw, IXGBE_MDFTS); + sbuf_printf(sb, "\tIXGBE_MDFTS\t %08x\n", regs_buff[1124]); + regs_buff[1125] = IXGBE_READ_REG(hw, IXGBE_PCIEECCCTL); + sbuf_printf(sb, "\tIXGBE_PCIEECCCTL\t %08x\n", regs_buff[1125]); + regs_buff[1126] = IXGBE_READ_REG(hw, IXGBE_PBTXECC); + sbuf_printf(sb, "\tIXGBE_PBTXECC\t %08x\n", regs_buff[1126]); + regs_buff[1127] = IXGBE_READ_REG(hw, IXGBE_PBRXECC); + sbuf_printf(sb, "\tIXGBE_PBRXECC\t %08x\n", regs_buff[1127]); + + /* 82599 X540 specific registers */ + regs_buff[1128] = IXGBE_READ_REG(hw, IXGBE_MFLCN); + + /* 82599 X540 specific DCB registers */ + regs_buff[1129] = IXGBE_READ_REG(hw, IXGBE_RTRUP2TC); + regs_buff[1130] = IXGBE_READ_REG(hw, IXGBE_RTTUP2TC); + for (i = 0; i < 4; i++) { + regs_buff[1131 + i] = IXGBE_READ_REG(hw, IXGBE_TXLLQ(i)); + sbuf_printf(sb, "\tIXGBE_TXLLQ(%2d)\t %08x\n", i, regs_buff[1131+i]); + } + regs_buff[1135] = IXGBE_READ_REG(hw, IXGBE_RTTBCNRM); + /* same as RTTQCNRM */ + sbuf_printf(sb, "\tIXGBE_RTTBCNRM\t %08x\n", regs_buff[1135]); + regs_buff[1136] = IXGBE_READ_REG(hw, IXGBE_RTTBCNRD); + /* same as RTTQCNRR */ + sbuf_printf(sb, "\tIXGBE_RTTBCNRD\t %08x\n", regs_buff[1136]); +#ifdef PRINT_QSET + for (j = 0; j < nrxd; j++) { + u32 staterr = le32toh(rxr->rx_base[j].wb.upper.status_error); + u32 length = le32toh(rxr->rx_base[j].wb.upper.length); + sbuf_printf(sb, "\tReceive Descriptor Address %d: %08lx Error:%d Length:%d\n", j, rxr->rx_base[j].read.pkt_addr, staterr, length); + } + + for (j = 0; j < min(ntxd, 256); j++) { + struct ixgbe_tx_buf *buf = &txr->tx_buffers[j]; + unsigned int *ptr = (unsigned int *)&txr->tx_base[j].read; + + sbuf_printf(sb, "\tTXD[%03d] [0]: %08x [1]: %08x [2]: %08x [3]: %08x eop: %d DD=%d\n", + j, ptr[0], ptr[1], ptr[2], ptr[3], buf->eop, + buf->eop != -1 ? txr->tx_base[buf->eop].wb.status & IXGBE_TXD_STAT_DD : 0); + + } +#endif + /* X540 specific DCB registers + regs_buff[1137] = IXGBE_READ_REG(hw, IXGBE_RTTQCNCR); + regs_buff[1138] = IXGBE_READ_REG(hw, IXGBE_RTTQCNTG); */ + + rc = sbuf_finish(sb); + sbuf_delete(sb); + return(rc); +} +#endif Index: sys/dev/ixgbe/legacy_if_ix.c =================================================================== --- /dev/null +++ sys/dev/ixgbe/legacy_if_ix.c @@ -0,0 +1,6001 @@ +/****************************************************************************** + + Copyright (c) 2001-2015, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ +/*$FreeBSD$*/ + + +#ifndef IXGBE_STANDALONE_BUILD +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_rss.h" +#endif + +#include "ixgbe.h" + +#ifdef RSS +#include +#include +#endif + +/********************************************************************* + * Driver version + *********************************************************************/ +char ixgbe_driver_version[] = "3.1.13-k"; + + +/********************************************************************* + * PCI Device ID Table + * + * Used by probe to select devices to load on + * Last field stores an index into ixgbe_strings + * Last entry must be all 0s + * + * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } + *********************************************************************/ + +static ixgbe_vendor_info_t ixgbe_vendor_info_array[] = +{ + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_QSFP_SF_QP, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T1, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KR, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KX4, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_10G_T, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_SFP, 0, 0, 0}, + /* required last entry */ + {0, 0, 0, 0, 0} +}; + +/********************************************************************* + * Table of branding strings + *********************************************************************/ + +static char *ixgbe_strings[] = { + "Intel(R) PRO/10GbE PCI-Express Network Driver" +}; + +/********************************************************************* + * Function prototypes + *********************************************************************/ +static int ixgbe_probe(device_t); +static int ixgbe_attach(device_t); +static int ixgbe_detach(device_t); +static int ixgbe_shutdown(device_t); +static int ixgbe_suspend(device_t); +static int ixgbe_resume(device_t); +static int ixgbe_ioctl(struct ifnet *, u_long, caddr_t); +static void ixgbe_init(void *); +static void ixgbe_init_locked(struct adapter *); +static void ixgbe_stop(void *); +#if __FreeBSD_version >= 1100036 +static uint64_t ixgbe_get_counter(struct ifnet *, ift_counter); +#endif +static void ixgbe_add_media_types(struct adapter *); +static void ixgbe_media_status(struct ifnet *, struct ifmediareq *); +static int ixgbe_media_change(struct ifnet *); +static void ixgbe_identify_hardware(struct adapter *); +static int ixgbe_allocate_pci_resources(struct adapter *); +static void ixgbe_get_slot_info(struct adapter *); +static int ixgbe_allocate_msix(struct adapter *); +static int ixgbe_allocate_legacy(struct adapter *); +static int ixgbe_setup_msix(struct adapter *); +static void ixgbe_free_pci_resources(struct adapter *); +static void ixgbe_local_timer(void *); +static int ixgbe_setup_interface(device_t, struct adapter *); +static void ixgbe_config_gpie(struct adapter *); +static void ixgbe_config_dmac(struct adapter *); +static void ixgbe_config_delay_values(struct adapter *); +static void ixgbe_config_link(struct adapter *); +static void ixgbe_check_wol_support(struct adapter *); +static int ixgbe_setup_low_power_mode(struct adapter *); +static void ixgbe_rearm_queues(struct adapter *, u64); + +static void ixgbe_initialize_transmit_units(struct adapter *); +static void ixgbe_initialize_receive_units(struct adapter *); +static void ixgbe_enable_rx_drop(struct adapter *); +static void ixgbe_disable_rx_drop(struct adapter *); +static void ixgbe_initialize_rss_mapping(struct adapter *); + +static void ixgbe_enable_intr(struct adapter *); +static void ixgbe_disable_intr(struct adapter *); +static void ixgbe_update_stats_counters(struct adapter *); +static void ixgbe_set_promisc(struct adapter *); +static void ixgbe_set_multi(struct adapter *); +static void ixgbe_update_link_status(struct adapter *); +static void ixgbe_set_ivar(struct adapter *, u8, u8, s8); +static void ixgbe_configure_ivars(struct adapter *); +static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); + +static void ixgbe_setup_vlan_hw_support(struct adapter *); +static void ixgbe_register_vlan(void *, struct ifnet *, u16); +static void ixgbe_unregister_vlan(void *, struct ifnet *, u16); + +static void ixgbe_add_device_sysctls(struct adapter *); +static void ixgbe_add_hw_stats(struct adapter *); +static int ixgbe_set_flowcntl(struct adapter *, int); +static int ixgbe_set_advertise(struct adapter *, int); + +/* Sysctl handlers */ +static void ixgbe_set_sysctl_value(struct adapter *, const char *, + const char *, int *, int); +static int ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_thermal_test(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS); +#ifdef IXGBE_DEBUG +static int ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS); +#endif +static int ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_eee_enable(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_eee_negotiated(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_eee_rx_lpi_status(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_eee_tx_lpi_status(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_eee_tx_lpi_delay(SYSCTL_HANDLER_ARGS); + +/* Support for pluggable optic modules */ +static bool ixgbe_sfp_probe(struct adapter *); +static void ixgbe_setup_optics(struct adapter *); + +/* Legacy (single vector interrupt handler */ +static void ixgbe_legacy_irq(void *); + +/* The MSI/X Interrupt handlers */ +static void ixgbe_msix_que(void *); +static void ixgbe_msix_link(void *); + +/* Deferred interrupt tasklets */ +static void ixgbe_handle_que(void *, int); +static void ixgbe_handle_link(void *, int); +static void ixgbe_handle_msf(void *, int); +static void ixgbe_handle_mod(void *, int); +static void ixgbe_handle_phy(void *, int); + +#ifdef IXGBE_FDIR +static void ixgbe_reinit_fdir(void *, int); +#endif + +#ifdef PCI_IOV +static void ixgbe_ping_all_vfs(struct adapter *); +static void ixgbe_handle_mbx(void *, int); +static int ixgbe_init_iov(device_t, u16, const nvlist_t *); +static void ixgbe_uninit_iov(device_t); +static int ixgbe_add_vf(device_t, u16, const nvlist_t *); +static void ixgbe_initialize_iov(struct adapter *); +static void ixgbe_recalculate_max_frame(struct adapter *); +static void ixgbe_init_vf(struct adapter *, struct ixgbe_vf *); +#endif /* PCI_IOV */ + + +/********************************************************************* + * FreeBSD Device Interface Entry Points + *********************************************************************/ + +static device_method_t ix_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, ixgbe_probe), + DEVMETHOD(device_attach, ixgbe_attach), + DEVMETHOD(device_detach, ixgbe_detach), + DEVMETHOD(device_shutdown, ixgbe_shutdown), + DEVMETHOD(device_suspend, ixgbe_suspend), + DEVMETHOD(device_resume, ixgbe_resume), +#ifdef PCI_IOV + DEVMETHOD(pci_iov_init, ixgbe_init_iov), + DEVMETHOD(pci_iov_uninit, ixgbe_uninit_iov), + DEVMETHOD(pci_iov_add_vf, ixgbe_add_vf), +#endif /* PCI_IOV */ + DEVMETHOD_END +}; + +static driver_t ix_driver = { + "ix", ix_methods, sizeof(struct adapter), +}; + +devclass_t ix_devclass; +DRIVER_MODULE(ix, pci, ix_driver, ix_devclass, 0, 0); + +MODULE_DEPEND(ix, pci, 1, 1, 1); +MODULE_DEPEND(ix, ether, 1, 1, 1); +#ifdef DEV_NETMAP +MODULE_DEPEND(ix, netmap, 1, 1, 1); +#endif /* DEV_NETMAP */ + +/* +** TUNEABLE PARAMETERS: +*/ + +static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0, + "IXGBE driver parameters"); + +/* +** AIM: Adaptive Interrupt Moderation +** which means that the interrupt rate +** is varied over time based on the +** traffic for that interrupt vector +*/ +static int ixgbe_enable_aim = TRUE; +SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &ixgbe_enable_aim, 0, + "Enable adaptive interrupt moderation"); + +static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY); +SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, + &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second"); + +/* How many packets rxeof tries to clean at a time */ +static int ixgbe_rx_process_limit = 256; +SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, + &ixgbe_rx_process_limit, 0, + "Maximum number of received packets to process at a time," + "-1 means unlimited"); + +/* How many packets txeof tries to clean at a time */ +static int ixgbe_tx_process_limit = 256; +SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN, + &ixgbe_tx_process_limit, 0, + "Maximum number of sent packets to process at a time," + "-1 means unlimited"); + +/* Flow control setting, default to full */ +static int ixgbe_flow_control = ixgbe_fc_full; +SYSCTL_INT(_hw_ix, OID_AUTO, flow_control, CTLFLAG_RDTUN, + &ixgbe_flow_control, 0, "Default flow control used for all adapters"); + +/* Advertise Speed, default to 0 (auto) */ +static int ixgbe_advertise_speed = 0; +SYSCTL_INT(_hw_ix, OID_AUTO, advertise_speed, CTLFLAG_RDTUN, + &ixgbe_advertise_speed, 0, "Default advertised speed for all adapters"); + +/* +** Smart speed setting, default to on +** this only works as a compile option +** right now as its during attach, set +** this to 'ixgbe_smart_speed_off' to +** disable. +*/ +static int ixgbe_smart_speed = ixgbe_smart_speed_on; + +/* + * MSIX should be the default for best performance, + * but this allows it to be forced off for testing. + */ +static int ixgbe_enable_msix = 1; +SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0, + "Enable MSI-X interrupts"); + +/* + * Number of Queues, can be set to 0, + * it then autoconfigures based on the + * number of cpus with a max of 8. This + * can be overridden manually here. + */ +static int ixgbe_num_queues = 0; +SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0, + "Number of queues to configure, 0 indicates autoconfigure"); + +/* +** Number of TX descriptors per ring, +** setting higher than RX as this seems +** the better performing choice. +*/ +static int ixgbe_txd = PERFORM_TXD; +SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0, + "Number of transmit descriptors per queue"); + +/* Number of RX descriptors per ring */ +static int ixgbe_rxd = PERFORM_RXD; +SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0, + "Number of receive descriptors per queue"); + +/* +** Defining this on will allow the use +** of unsupported SFP+ modules, note that +** doing so you are on your own :) +*/ +static int allow_unsupported_sfp = FALSE; +TUNABLE_INT("hw.ix.unsupported_sfp", &allow_unsupported_sfp); + +/* Keep running tab on them for sanity check */ +static int ixgbe_total_ports; + +#ifdef IXGBE_FDIR +/* +** Flow Director actually 'steals' +** part of the packet buffer as its +** filter pool, this variable controls +** how much it uses: +** 0 = 64K, 1 = 128K, 2 = 256K +*/ +static int fdir_pballoc = 1; +#endif + +#ifdef DEV_NETMAP +/* + * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to + * be a reference on how to implement netmap support in a driver. + * Additional comments are in ixgbe_netmap.h . + * + * contains functions for netmap support + * that extend the standard driver. + */ +#include +#endif /* DEV_NETMAP */ + +static MALLOC_DEFINE(M_IXGBE, "ix", "ix driver allocations"); + +/********************************************************************* + * Device identification routine + * + * ixgbe_probe determines if the driver should be loaded on + * adapter based on PCI vendor/device id of the adapter. + * + * return BUS_PROBE_DEFAULT on success, positive on failure + *********************************************************************/ + +static int +ixgbe_probe(device_t dev) +{ + ixgbe_vendor_info_t *ent; + + u16 pci_vendor_id = 0; + u16 pci_device_id = 0; + u16 pci_subvendor_id = 0; + u16 pci_subdevice_id = 0; + char adapter_name[256]; + + INIT_DEBUGOUT("ixgbe_probe: begin"); + + pci_vendor_id = pci_get_vendor(dev); + if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID) + return (ENXIO); + + pci_device_id = pci_get_device(dev); + pci_subvendor_id = pci_get_subvendor(dev); + pci_subdevice_id = pci_get_subdevice(dev); + + ent = ixgbe_vendor_info_array; + while (ent->vendor_id != 0) { + if ((pci_vendor_id == ent->vendor_id) && + (pci_device_id == ent->device_id) && + + ((pci_subvendor_id == ent->subvendor_id) || + (ent->subvendor_id == 0)) && + + ((pci_subdevice_id == ent->subdevice_id) || + (ent->subdevice_id == 0))) { + sprintf(adapter_name, "%s, Version - %s", + ixgbe_strings[ent->index], + ixgbe_driver_version); + device_set_desc_copy(dev, adapter_name); + ++ixgbe_total_ports; + return (BUS_PROBE_DEFAULT); + } + ent++; + } + return (ENXIO); +} + +/********************************************************************* + * Device initialization routine + * + * The attach entry point is called when the driver is being loaded. + * This routine identifies the type of hardware, allocates all resources + * and initializes the hardware. + * + * return 0 on success, positive on failure + *********************************************************************/ + +static int +ixgbe_attach(device_t dev) +{ + struct adapter *adapter; + struct ixgbe_hw *hw; + int error = 0; + u16 csum; + u32 ctrl_ext; + + INIT_DEBUGOUT("ixgbe_attach: begin"); + + /* Allocate, clear, and link in our adapter structure */ + adapter = device_get_softc(dev); + adapter->dev = dev; + hw = &adapter->hw; + +#ifdef DEV_NETMAP + adapter->init_locked = ixgbe_init_locked; + adapter->stop_locked = ixgbe_stop; +#endif + + /* Core Lock Init*/ + IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); + + /* Set up the timer callout */ + callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); + + /* Determine hardware revision */ + ixgbe_identify_hardware(adapter); + + /* Do base PCI setup - map BAR0 */ + if (ixgbe_allocate_pci_resources(adapter)) { + device_printf(dev, "Allocation of PCI resources failed\n"); + error = ENXIO; + goto err_out; + } + + /* Sysctls for limiting the amount of work done in the taskqueues */ + ixgbe_set_sysctl_value(adapter, "rx_processing_limit", + "max number of rx packets to process", + &adapter->rx_process_limit, ixgbe_rx_process_limit); + + ixgbe_set_sysctl_value(adapter, "tx_processing_limit", + "max number of tx packets to process", + &adapter->tx_process_limit, ixgbe_tx_process_limit); + + /* Do descriptor calc and sanity checks */ + if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 || + ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) { + device_printf(dev, "TXD config issue, using default!\n"); + adapter->num_tx_desc = DEFAULT_TXD; + } else + adapter->num_tx_desc = ixgbe_txd; + + /* + ** With many RX rings it is easy to exceed the + ** system mbuf allocation. Tuning nmbclusters + ** can alleviate this. + */ + if (nmbclusters > 0) { + int s; + s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports; + if (s > nmbclusters) { + device_printf(dev, "RX Descriptors exceed " + "system mbuf max, using default instead!\n"); + ixgbe_rxd = DEFAULT_RXD; + } + } + + if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 || + ixgbe_rxd < MIN_RXD || ixgbe_rxd > MAX_RXD) { + device_printf(dev, "RXD config issue, using default!\n"); + adapter->num_rx_desc = DEFAULT_RXD; + } else + adapter->num_rx_desc = ixgbe_rxd; + + /* Allocate our TX/RX Queues */ + if (ixgbe_allocate_queues(adapter)) { + error = ENOMEM; + goto err_out; + } + + /* Allocate multicast array memory. */ + adapter->mta = malloc(sizeof(*adapter->mta) * + MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); + if (adapter->mta == NULL) { + device_printf(dev, "Can not allocate multicast setup array\n"); + error = ENOMEM; + goto err_late; + } + + /* Initialize the shared code */ + hw->allow_unsupported_sfp = allow_unsupported_sfp; + error = ixgbe_init_shared_code(hw); + if (error == IXGBE_ERR_SFP_NOT_PRESENT) { + /* + ** No optics in this port, set up + ** so the timer routine will probe + ** for later insertion. + */ + adapter->sfp_probe = TRUE; + error = 0; + } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) { + device_printf(dev, "Unsupported SFP+ module detected!\n"); + error = EIO; + goto err_late; + } else if (error) { + device_printf(dev, "Unable to initialize the shared code\n"); + error = EIO; + goto err_late; + } + + /* Make sure we have a good EEPROM before we read from it */ + if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) { + device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); + error = EIO; + goto err_late; + } + + error = ixgbe_init_hw(hw); + switch (error) { + case IXGBE_ERR_EEPROM_VERSION: + device_printf(dev, "This device is a pre-production adapter/" + "LOM. Please be aware there may be issues associated " + "with your hardware.\nIf you are experiencing problems " + "please contact your Intel or hardware representative " + "who provided you with this hardware.\n"); + break; + case IXGBE_ERR_SFP_NOT_SUPPORTED: + device_printf(dev, "Unsupported SFP+ Module\n"); + error = EIO; + goto err_late; + case IXGBE_ERR_SFP_NOT_PRESENT: + device_printf(dev, "No SFP+ Module found\n"); + /* falls thru */ + default: + break; + } + + /* hw.ix defaults init */ + ixgbe_set_advertise(adapter, ixgbe_advertise_speed); + ixgbe_set_flowcntl(adapter, ixgbe_flow_control); + adapter->enable_aim = ixgbe_enable_aim; + + if ((adapter->msix > 1) && (ixgbe_enable_msix)) + error = ixgbe_allocate_msix(adapter); + else + error = ixgbe_allocate_legacy(adapter); + if (error) + goto err_late; + + /* Enable the optics for 82599 SFP+ fiber */ + ixgbe_enable_tx_laser(hw); + + /* Enable power to the phy. */ + ixgbe_set_phy_power(hw, TRUE); + + /* Setup OS specific network interface */ + if (ixgbe_setup_interface(dev, adapter) != 0) + goto err_late; + + /* Initialize statistics */ + ixgbe_update_stats_counters(adapter); + + /* Register for VLAN events */ + adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, + ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); + adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, + ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); + + /* Check PCIE slot type/speed/width */ + ixgbe_get_slot_info(adapter); + + /* Set an initial default flow control & dmac value */ + adapter->fc = ixgbe_fc_full; + adapter->dmac = 0; + adapter->eee_enabled = 0; + +#ifdef PCI_IOV + if ((hw->mac.type != ixgbe_mac_82598EB) && (adapter->msix > 1)) { + nvlist_t *pf_schema, *vf_schema; + + hw->mbx.ops.init_params(hw); + pf_schema = pci_iov_schema_alloc_node(); + vf_schema = pci_iov_schema_alloc_node(); + pci_iov_schema_add_unicast_mac(vf_schema, "mac-addr", 0, NULL); + pci_iov_schema_add_bool(vf_schema, "mac-anti-spoof", + IOV_SCHEMA_HASDEFAULT, TRUE); + pci_iov_schema_add_bool(vf_schema, "allow-set-mac", + IOV_SCHEMA_HASDEFAULT, FALSE); + pci_iov_schema_add_bool(vf_schema, "allow-promisc", + IOV_SCHEMA_HASDEFAULT, FALSE); + error = pci_iov_attach(dev, pf_schema, vf_schema); + if (error != 0) { + device_printf(dev, + "Error %d setting up SR-IOV\n", error); + } + } +#endif /* PCI_IOV */ + + /* Check for certain supported features */ + ixgbe_check_wol_support(adapter); + + /* Add sysctls */ + ixgbe_add_device_sysctls(adapter); + ixgbe_add_hw_stats(adapter); + + /* let hardware know driver is loaded */ + ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); + ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD; + IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); + +#ifdef DEV_NETMAP + ixgbe_netmap_attach(adapter); +#endif /* DEV_NETMAP */ + INIT_DEBUGOUT("ixgbe_attach: end"); + return (0); + +err_late: + ixgbe_free_transmit_structures(adapter); + ixgbe_free_receive_structures(adapter); +err_out: + if (adapter->ifp != NULL) + if_free(adapter->ifp); + ixgbe_free_pci_resources(adapter); + free(adapter->mta, M_DEVBUF); + return (error); +} + +/********************************************************************* + * Device removal routine + * + * The detach entry point is called when the driver is being removed. + * This routine stops the adapter and deallocates all the resources + * that were allocated for driver operation. + * + * return 0 on success, positive on failure + *********************************************************************/ + +static int +ixgbe_detach(device_t dev) +{ + struct adapter *adapter = device_get_softc(dev); + struct ix_queue *que = adapter->queues; + struct tx_ring *txr = adapter->tx_rings; + u32 ctrl_ext; + + INIT_DEBUGOUT("ixgbe_detach: begin"); + + /* Make sure VLANS are not using driver */ + if (adapter->ifp->if_vlantrunk != NULL) { + device_printf(dev,"Vlan in use, detach first\n"); + return (EBUSY); + } + +#ifdef PCI_IOV + if (pci_iov_detach(dev) != 0) { + device_printf(dev, "SR-IOV in use; detach first.\n"); + return (EBUSY); + } +#endif /* PCI_IOV */ + + ether_ifdetach(adapter->ifp); + /* Stop the adapter */ + IXGBE_CORE_LOCK(adapter); + ixgbe_setup_low_power_mode(adapter); + IXGBE_CORE_UNLOCK(adapter); + + for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { + if (que->tq) { +#ifndef IXGBE_LEGACY_TX + taskqueue_drain(que->tq, &txr->txq_task); +#endif + taskqueue_drain(que->tq, &que->que_task); + taskqueue_free(que->tq); + } + } + + /* Drain the Link queue */ + if (adapter->tq) { + taskqueue_drain(adapter->tq, &adapter->link_task); + taskqueue_drain(adapter->tq, &adapter->mod_task); + taskqueue_drain(adapter->tq, &adapter->msf_task); +#ifdef PCI_IOV + taskqueue_drain(adapter->tq, &adapter->mbx_task); +#endif + taskqueue_drain(adapter->tq, &adapter->phy_task); +#ifdef IXGBE_FDIR + taskqueue_drain(adapter->tq, &adapter->fdir_task); +#endif + taskqueue_free(adapter->tq); + } + + /* let hardware know driver is unloading */ + ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); + ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext); + + /* Unregister VLAN events */ + if (adapter->vlan_attach != NULL) + EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); + if (adapter->vlan_detach != NULL) + EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); + + callout_drain(&adapter->timer); +#ifdef DEV_NETMAP + netmap_detach(adapter->ifp); +#endif /* DEV_NETMAP */ + ixgbe_free_pci_resources(adapter); + bus_generic_detach(dev); + if_free(adapter->ifp); + + ixgbe_free_transmit_structures(adapter); + ixgbe_free_receive_structures(adapter); + free(adapter->mta, M_DEVBUF); + + IXGBE_CORE_LOCK_DESTROY(adapter); + return (0); +} + +/********************************************************************* + * + * Shutdown entry point + * + **********************************************************************/ + +static int +ixgbe_shutdown(device_t dev) +{ + struct adapter *adapter = device_get_softc(dev); + int error = 0; + + INIT_DEBUGOUT("ixgbe_shutdown: begin"); + + IXGBE_CORE_LOCK(adapter); + error = ixgbe_setup_low_power_mode(adapter); + IXGBE_CORE_UNLOCK(adapter); + + return (error); +} + +/** + * Methods for going from: + * D0 -> D3: ixgbe_suspend + * D3 -> D0: ixgbe_resume + */ +static int +ixgbe_suspend(device_t dev) +{ + struct adapter *adapter = device_get_softc(dev); + int error = 0; + + INIT_DEBUGOUT("ixgbe_suspend: begin"); + + IXGBE_CORE_LOCK(adapter); + + error = ixgbe_setup_low_power_mode(adapter); + + IXGBE_CORE_UNLOCK(adapter); + + return (error); +} + +static int +ixgbe_resume(device_t dev) +{ + struct adapter *adapter = device_get_softc(dev); + struct ifnet *ifp = adapter->ifp; + struct ixgbe_hw *hw = &adapter->hw; + u32 wus; + + INIT_DEBUGOUT("ixgbe_resume: begin"); + + IXGBE_CORE_LOCK(adapter); + + /* Read & clear WUS register */ + wus = IXGBE_READ_REG(hw, IXGBE_WUS); + if (wus) + device_printf(dev, "Woken up by (WUS): %#010x\n", + IXGBE_READ_REG(hw, IXGBE_WUS)); + IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff); + /* And clear WUFC until next low-power transition */ + IXGBE_WRITE_REG(hw, IXGBE_WUFC, 0); + + /* + * Required after D3->D0 transition; + * will re-advertise all previous advertised speeds + */ + if (ifp->if_flags & IFF_UP) + ixgbe_init_locked(adapter); + + IXGBE_CORE_UNLOCK(adapter); + + return (0); +} + + +/********************************************************************* + * Ioctl entry point + * + * ixgbe_ioctl is called when the user wants to configure the + * interface. + * + * return 0 on success, positive on failure + **********************************************************************/ + +static int +ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data) +{ + struct adapter *adapter = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *) data; +#if defined(INET) || defined(INET6) + struct ifaddr *ifa = (struct ifaddr *)data; +#endif + int error = 0; + bool avoid_reset = FALSE; + + switch (command) { + + case SIOCSIFADDR: +#ifdef INET + if (ifa->ifa_addr->sa_family == AF_INET) + avoid_reset = TRUE; +#endif +#ifdef INET6 + if (ifa->ifa_addr->sa_family == AF_INET6) + avoid_reset = TRUE; +#endif + /* + ** Calling init results in link renegotiation, + ** so we avoid doing it when possible. + */ + if (avoid_reset) { + ifp->if_flags |= IFF_UP; + if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) + ixgbe_init(adapter); +#ifdef INET + if (!(ifp->if_flags & IFF_NOARP)) + arp_ifinit(ifp, ifa); +#endif + } else + error = ether_ioctl(ifp, command, data); + break; + case SIOCSIFMTU: + IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); + if (ifr->ifr_mtu > IXGBE_MAX_MTU) { + error = EINVAL; + } else { + IXGBE_CORE_LOCK(adapter); + ifp->if_mtu = ifr->ifr_mtu; + adapter->max_frame_size = + ifp->if_mtu + IXGBE_MTU_HDR; + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + ixgbe_init_locked(adapter); +#ifdef PCI_IOV + ixgbe_recalculate_max_frame(adapter); +#endif + IXGBE_CORE_UNLOCK(adapter); + } + break; + case SIOCSIFFLAGS: + IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)"); + IXGBE_CORE_LOCK(adapter); + if (ifp->if_flags & IFF_UP) { + if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { + if ((ifp->if_flags ^ adapter->if_flags) & + (IFF_PROMISC | IFF_ALLMULTI)) { + ixgbe_set_promisc(adapter); + } + } else + ixgbe_init_locked(adapter); + } else + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + ixgbe_stop(adapter); + adapter->if_flags = ifp->if_flags; + IXGBE_CORE_UNLOCK(adapter); + break; + case SIOCADDMULTI: + case SIOCDELMULTI: + IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI"); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + IXGBE_CORE_LOCK(adapter); + ixgbe_disable_intr(adapter); + ixgbe_set_multi(adapter); + ixgbe_enable_intr(adapter); + IXGBE_CORE_UNLOCK(adapter); + } + break; + case SIOCSIFMEDIA: + case SIOCGIFMEDIA: + IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)"); + error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); + break; + case SIOCSIFCAP: + { + IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)"); + + int mask = ifr->ifr_reqcap ^ ifp->if_capenable; + if (!mask) + break; + + /* HW cannot turn these on/off separately */ + if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { + ifp->if_capenable ^= IFCAP_RXCSUM; + ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; + } + if (mask & IFCAP_TXCSUM) + ifp->if_capenable ^= IFCAP_TXCSUM; + if (mask & IFCAP_TXCSUM_IPV6) + ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; + if (mask & IFCAP_TSO4) + ifp->if_capenable ^= IFCAP_TSO4; + if (mask & IFCAP_TSO6) + ifp->if_capenable ^= IFCAP_TSO6; + if (mask & IFCAP_LRO) + ifp->if_capenable ^= IFCAP_LRO; + if (mask & IFCAP_VLAN_HWTAGGING) + ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; + if (mask & IFCAP_VLAN_HWFILTER) + ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; + if (mask & IFCAP_VLAN_HWTSO) + ifp->if_capenable ^= IFCAP_VLAN_HWTSO; + + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + IXGBE_CORE_LOCK(adapter); + ixgbe_init_locked(adapter); + IXGBE_CORE_UNLOCK(adapter); + } + VLAN_CAPABILITIES(ifp); + break; + } +#if __FreeBSD_version >= 1100036 + case SIOCGI2C: + { + struct ixgbe_hw *hw = &adapter->hw; + struct ifi2creq i2c; + int i; + IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)"); + error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); + if (error != 0) + break; + if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { + error = EINVAL; + break; + } + if (i2c.len > sizeof(i2c.data)) { + error = EINVAL; + break; + } + + for (i = 0; i < i2c.len; i++) + hw->phy.ops.read_i2c_byte(hw, i2c.offset + i, + i2c.dev_addr, &i2c.data[i]); + error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); + break; + } +#endif + default: + IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command); + error = ether_ioctl(ifp, command, data); + break; + } + + return (error); +} + +/* + * Set the various hardware offload abilities. + * + * This takes the ifnet's if_capenable flags (e.g. set by the user using + * ifconfig) and indicates to the OS via the ifnet's if_hwassist field what + * mbuf offload flags the driver will understand. + */ +static void +ixgbe_set_if_hwassist(struct adapter *adapter) +{ + struct ifnet *ifp = adapter->ifp; + struct ixgbe_hw *hw = &adapter->hw; + + ifp->if_hwassist = 0; +#if __FreeBSD_version >= 1000000 + if (ifp->if_capenable & IFCAP_TSO4) + ifp->if_hwassist |= CSUM_IP_TSO; + if (ifp->if_capenable & IFCAP_TSO6) + ifp->if_hwassist |= CSUM_IP6_TSO; + if (ifp->if_capenable & IFCAP_TXCSUM) { + ifp->if_hwassist |= (CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP); + if (hw->mac.type != ixgbe_mac_82598EB) + ifp->if_hwassist |= CSUM_IP_SCTP; + } + if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) { + ifp->if_hwassist |= (CSUM_IP6_UDP | CSUM_IP6_TCP); + if (hw->mac.type != ixgbe_mac_82598EB) + ifp->if_hwassist |= CSUM_IP6_SCTP; + } +#else + if (ifp->if_capenable & IFCAP_TSO) + ifp->if_hwassist |= CSUM_TSO; + if (ifp->if_capenable & IFCAP_TXCSUM) { + ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); + if (hw->mac.type != ixgbe_mac_82598EB) + ifp->if_hwassist |= CSUM_SCTP; + } +#endif +} + +/********************************************************************* + * Init entry point + * + * This routine is used in two ways. It is used by the stack as + * init entry point in network interface structure. It is also used + * by the driver as a hw/sw initialization routine to get to a + * consistent state. + * + * return 0 on success, positive on failure + **********************************************************************/ +#define IXGBE_MHADD_MFS_SHIFT 16 + +static void +ixgbe_init_locked(struct adapter *adapter) +{ + struct ifnet *ifp = adapter->ifp; + device_t dev = adapter->dev; + struct ixgbe_hw *hw = &adapter->hw; + struct tx_ring *txr; + struct rx_ring *rxr; + u32 txdctl, mhadd; + u32 rxdctl, rxctrl; + int err = 0; +#ifdef PCI_IOV + enum ixgbe_iov_mode mode; +#endif + + mtx_assert(&adapter->core_mtx, MA_OWNED); + INIT_DEBUGOUT("ixgbe_init_locked: begin"); + + hw->adapter_stopped = FALSE; + ixgbe_stop_adapter(hw); + callout_stop(&adapter->timer); + +#ifdef PCI_IOV + mode = ixgbe_get_iov_mode(adapter); + adapter->pool = ixgbe_max_vfs(mode); + /* Queue indices may change with IOV mode */ + for (int i = 0; i < adapter->num_queues; i++) { + adapter->rx_rings[i].me = ixgbe_pf_que_index(mode, i); + adapter->tx_rings[i].me = ixgbe_pf_que_index(mode, i); + } +#endif + /* reprogram the RAR[0] in case user changed it. */ + ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, IXGBE_RAH_AV); + + /* Get the latest mac address, User can use a LAA */ + bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS); + ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, 1); + hw->addr_ctrl.rar_used_count = 1; + + /* Set hardware offload abilities from ifnet flags */ + ixgbe_set_if_hwassist(adapter); + + /* Prepare transmit descriptors and buffers */ + if (ixgbe_setup_transmit_structures(adapter)) { + device_printf(dev, "Could not setup transmit structures\n"); + ixgbe_stop(adapter); + return; + } + + ixgbe_init_hw(hw); +#ifdef PCI_IOV + ixgbe_initialize_iov(adapter); +#endif + ixgbe_initialize_transmit_units(adapter); + + /* Setup Multicast table */ + ixgbe_set_multi(adapter); + + /* Determine the correct mbuf pool, based on frame size */ + if (adapter->max_frame_size <= MCLBYTES) + adapter->rx_mbuf_sz = MCLBYTES; + else + adapter->rx_mbuf_sz = MJUMPAGESIZE; + + /* Prepare receive descriptors and buffers */ + if (ixgbe_setup_receive_structures(adapter)) { + device_printf(dev, "Could not setup receive structures\n"); + ixgbe_stop(adapter); + return; + } + + /* Configure RX settings */ + ixgbe_initialize_receive_units(adapter); + + /* Enable SDP & MSIX interrupts based on adapter */ + ixgbe_config_gpie(adapter); + + /* Set MTU size */ + if (ifp->if_mtu > ETHERMTU) { + /* aka IXGBE_MAXFRS on 82599 and newer */ + mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); + mhadd &= ~IXGBE_MHADD_MFS_MASK; + mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT; + IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); + } + + /* Now enable all the queues */ + for (int i = 0; i < adapter->num_queues; i++) { + txr = &adapter->tx_rings[i]; + txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txr->me)); + txdctl |= IXGBE_TXDCTL_ENABLE; + /* Set WTHRESH to 8, burst writeback */ + txdctl |= (8 << 16); + /* + * When the internal queue falls below PTHRESH (32), + * start prefetching as long as there are at least + * HTHRESH (1) buffers ready. The values are taken + * from the Intel linux driver 3.8.21. + * Prefetching enables tx line rate even with 1 queue. + */ + txdctl |= (32 << 0) | (1 << 8); + IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txr->me), txdctl); + } + + for (int i = 0, j = 0; i < adapter->num_queues; i++) { + rxr = &adapter->rx_rings[i]; + rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); + if (hw->mac.type == ixgbe_mac_82598EB) { + /* + ** PTHRESH = 21 + ** HTHRESH = 4 + ** WTHRESH = 8 + */ + rxdctl &= ~0x3FFFFF; + rxdctl |= 0x080420; + } + rxdctl |= IXGBE_RXDCTL_ENABLE; + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), rxdctl); + for (; j < 10; j++) { + if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)) & + IXGBE_RXDCTL_ENABLE) + break; + else + msec_delay(1); + } + wmb(); +#ifdef DEV_NETMAP + /* + * In netmap mode, we must preserve the buffers made + * available to userspace before the if_init() + * (this is true by default on the TX side, because + * init makes all buffers available to userspace). + * + * netmap_reset() and the device specific routines + * (e.g. ixgbe_setup_receive_rings()) map these + * buffers at the end of the NIC ring, so here we + * must set the RDT (tail) register to make sure + * they are not overwritten. + * + * In this driver the NIC ring starts at RDH = 0, + * RDT points to the last slot available for reception (?), + * so RDT = num_rx_desc - 1 means the whole ring is available. + */ + if (ifp->if_capenable & IFCAP_NETMAP) { + struct netmap_adapter *na = NA(adapter->ifp); + struct netmap_kring *kring = &na->rx_rings[i]; + int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); + + IXGBE_WRITE_REG(hw, IXGBE_RDT(rxr->me), t); + } else +#endif /* DEV_NETMAP */ + IXGBE_WRITE_REG(hw, IXGBE_RDT(rxr->me), adapter->num_rx_desc - 1); + } + + /* Enable Receive engine */ + rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); + if (hw->mac.type == ixgbe_mac_82598EB) + rxctrl |= IXGBE_RXCTRL_DMBYPS; + rxctrl |= IXGBE_RXCTRL_RXEN; + ixgbe_enable_rx_dma(hw, rxctrl); + + callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); + + /* Set up MSI/X routing */ + if (ixgbe_enable_msix) { + ixgbe_configure_ivars(adapter); + /* Set up auto-mask */ + if (hw->mac.type == ixgbe_mac_82598EB) + IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); + else { + IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF); + IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF); + } + } else { /* Simple settings for Legacy/MSI */ + ixgbe_set_ivar(adapter, 0, 0, 0); + ixgbe_set_ivar(adapter, 0, 0, 1); + IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); + } + +#ifdef IXGBE_FDIR + /* Init Flow director */ + if (hw->mac.type != ixgbe_mac_82598EB) { + u32 hdrm = 32 << fdir_pballoc; + + hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL); + ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc); + } +#endif + + /* + * Check on any SFP devices that + * need to be kick-started + */ + if (hw->phy.type == ixgbe_phy_none) { + err = hw->phy.ops.identify(hw); + if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { + device_printf(dev, + "Unsupported SFP+ module type was detected.\n"); + return; + } + } + + /* Set moderation on the Link interrupt */ + IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->vector), IXGBE_LINK_ITR); + + /* Configure Energy Efficient Ethernet for supported devices */ + if (hw->mac.ops.setup_eee) { + err = hw->mac.ops.setup_eee(hw, adapter->eee_enabled); + if (err) + device_printf(dev, "Error setting up EEE: %d\n", err); + } + + /* Enable power to the phy. */ + ixgbe_set_phy_power(hw, TRUE); + + /* Config/Enable Link */ + ixgbe_config_link(adapter); + + /* Hardware Packet Buffer & Flow Control setup */ + ixgbe_config_delay_values(adapter); + + /* Initialize the FC settings */ + ixgbe_start_hw(hw); + + /* Set up VLAN support and filter */ + ixgbe_setup_vlan_hw_support(adapter); + + /* Setup DMA Coalescing */ + ixgbe_config_dmac(adapter); + + /* And now turn on interrupts */ + ixgbe_enable_intr(adapter); + +#ifdef PCI_IOV + /* Enable the use of the MBX by the VF's */ + { + u32 reg = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); + reg |= IXGBE_CTRL_EXT_PFRSTD; + IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, reg); + } +#endif + + /* Now inform the stack we're ready */ + ifp->if_drv_flags |= IFF_DRV_RUNNING; + + return; +} + +static void +ixgbe_init(void *arg) +{ + struct adapter *adapter = arg; + + IXGBE_CORE_LOCK(adapter); + ixgbe_init_locked(adapter); + IXGBE_CORE_UNLOCK(adapter); + return; +} + +static void +ixgbe_config_gpie(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 gpie; + + gpie = IXGBE_READ_REG(hw, IXGBE_GPIE); + + /* Fan Failure Interrupt */ + if (hw->device_id == IXGBE_DEV_ID_82598AT) + gpie |= IXGBE_SDP1_GPIEN; + + /* + * Module detection (SDP2) + * Media ready (SDP1) + */ + if (hw->mac.type == ixgbe_mac_82599EB) { + gpie |= IXGBE_SDP2_GPIEN; + if (hw->device_id != IXGBE_DEV_ID_82599_QSFP_SF_QP) + gpie |= IXGBE_SDP1_GPIEN; + } + + /* + * Thermal Failure Detection (X540) + * Link Detection (X552 SFP+, X552/X557-AT) + */ + if (hw->mac.type == ixgbe_mac_X540 || + hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP || + hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) + gpie |= IXGBE_SDP0_GPIEN_X540; + + if (adapter->msix > 1) { + /* Enable Enhanced MSIX mode */ + gpie |= IXGBE_GPIE_MSIX_MODE; + gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT | + IXGBE_GPIE_OCD; + } + + IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); + return; +} + +/* + * Requires adapter->max_frame_size to be set. + */ +static void +ixgbe_config_delay_values(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 rxpb, frame, size, tmp; + + frame = adapter->max_frame_size; + + /* Calculate High Water */ + switch (hw->mac.type) { + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + tmp = IXGBE_DV_X540(frame, frame); + break; + default: + tmp = IXGBE_DV(frame, frame); + break; + } + size = IXGBE_BT2KB(tmp); + rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10; + hw->fc.high_water[0] = rxpb - size; + + /* Now calculate Low Water */ + switch (hw->mac.type) { + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + tmp = IXGBE_LOW_DV_X540(frame); + break; + default: + tmp = IXGBE_LOW_DV(frame); + break; + } + hw->fc.low_water[0] = IXGBE_BT2KB(tmp); + + hw->fc.requested_mode = adapter->fc; + hw->fc.pause_time = IXGBE_FC_PAUSE; + hw->fc.send_xon = TRUE; +} + +/* +** +** MSIX Interrupt Handlers and Tasklets +** +*/ + +static inline void +ixgbe_enable_queue(struct adapter *adapter, u32 vector) +{ + struct ixgbe_hw *hw = &adapter->hw; + u64 queue = (u64)(1 << vector); + u32 mask; + + if (hw->mac.type == ixgbe_mac_82598EB) { + mask = (IXGBE_EIMS_RTX_QUEUE & queue); + IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); + } else { + mask = (queue & 0xFFFFFFFF); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); + mask = (queue >> 32); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); + } +} + +static inline void +ixgbe_disable_queue(struct adapter *adapter, u32 vector) +{ + struct ixgbe_hw *hw = &adapter->hw; + u64 queue = (u64)(1 << vector); + u32 mask; + + if (hw->mac.type == ixgbe_mac_82598EB) { + mask = (IXGBE_EIMS_RTX_QUEUE & queue); + IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask); + } else { + mask = (queue & 0xFFFFFFFF); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask); + mask = (queue >> 32); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask); + } +} + +static void +ixgbe_handle_que(void *context, int pending) +{ + struct ix_queue *que = context; + struct adapter *adapter = que->adapter; + struct tx_ring *txr = que->txr; + struct ifnet *ifp = adapter->ifp; + + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + ixgbe_rxeof(que); + IXGBE_TX_LOCK(txr); + ixgbe_txeof(txr); +#ifndef IXGBE_LEGACY_TX + if (!drbr_empty(ifp, txr->br)) + ixgbe_mq_start_locked(ifp, txr); +#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + ixgbe_start_locked(txr, ifp); +#endif + IXGBE_TX_UNLOCK(txr); + } + + /* Re-enable this interrupt */ + if (que->res != NULL) + ixgbe_enable_queue(adapter, que->msix); + else + ixgbe_enable_intr(adapter); + return; +} + + +/********************************************************************* + * + * Legacy Interrupt Service routine + * + **********************************************************************/ + +static void +ixgbe_legacy_irq(void *arg) +{ + struct ix_queue *que = arg; + struct adapter *adapter = que->adapter; + struct ixgbe_hw *hw = &adapter->hw; + struct ifnet *ifp = adapter->ifp; + struct tx_ring *txr = adapter->tx_rings; + bool more; + u32 reg_eicr; + + + reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR); + + ++que->irqs; + if (reg_eicr == 0) { + ixgbe_enable_intr(adapter); + return; + } + + more = ixgbe_rxeof(que); + + IXGBE_TX_LOCK(txr); + ixgbe_txeof(txr); +#ifdef IXGBE_LEGACY_TX + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + ixgbe_start_locked(txr, ifp); +#else + if (!drbr_empty(ifp, txr->br)) + ixgbe_mq_start_locked(ifp, txr); +#endif + IXGBE_TX_UNLOCK(txr); + + /* Check for fan failure */ + if ((hw->device_id == IXGBE_DEV_ID_82598AT) && + (reg_eicr & IXGBE_EICR_GPI_SDP1)) { + device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! " + "REPLACE IMMEDIATELY!!\n"); + IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); + } + + /* Link status change */ + if (reg_eicr & IXGBE_EICR_LSC) + taskqueue_enqueue(adapter->tq, &adapter->link_task); + + /* External PHY interrupt */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T && + (reg_eicr & IXGBE_EICR_GPI_SDP0_X540)) + taskqueue_enqueue(adapter->tq, &adapter->phy_task); + + if (more) + taskqueue_enqueue(que->tq, &que->que_task); + else + ixgbe_enable_intr(adapter); + return; +} + + +/********************************************************************* + * + * MSIX Queue Interrupt Service routine + * + **********************************************************************/ +void +ixgbe_msix_que(void *arg) +{ + struct ix_queue *que = arg; + struct adapter *adapter = que->adapter; + struct ifnet *ifp = adapter->ifp; + struct tx_ring *txr = que->txr; + struct rx_ring *rxr = que->rxr; + bool more; + u32 newitr = 0; + + + /* Protect against spurious interrupts */ + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + return; + + ixgbe_disable_queue(adapter, que->msix); + ++que->irqs; + + more = ixgbe_rxeof(que); + + IXGBE_TX_LOCK(txr); + ixgbe_txeof(txr); +#ifdef IXGBE_LEGACY_TX + if (!IFQ_DRV_IS_EMPTY(ifp->if_snd)) + ixgbe_start_locked(txr, ifp); +#else + if (!drbr_empty(ifp, txr->br)) + ixgbe_mq_start_locked(ifp, txr); +#endif + IXGBE_TX_UNLOCK(txr); + + /* Do AIM now? */ + + if (adapter->enable_aim == FALSE) + goto no_calc; + /* + ** Do Adaptive Interrupt Moderation: + ** - Write out last calculated setting + ** - Calculate based on average size over + ** the last interval. + */ + if (que->eitr_setting) + IXGBE_WRITE_REG(&adapter->hw, + IXGBE_EITR(que->msix), que->eitr_setting); + + que->eitr_setting = 0; + + /* Idle, do nothing */ + if ((txr->bytes == 0) && (rxr->bytes == 0)) + goto no_calc; + + if ((txr->bytes) && (txr->packets)) + newitr = txr->bytes/txr->packets; + if ((rxr->bytes) && (rxr->packets)) + newitr = max(newitr, + (rxr->bytes / rxr->packets)); + newitr += 24; /* account for hardware frame, crc */ + + /* set an upper boundary */ + newitr = min(newitr, 3000); + + /* Be nice to the mid range */ + if ((newitr > 300) && (newitr < 1200)) + newitr = (newitr / 3); + else + newitr = (newitr / 2); + + if (adapter->hw.mac.type == ixgbe_mac_82598EB) + newitr |= newitr << 16; + else + newitr |= IXGBE_EITR_CNT_WDIS; + + /* save for next interrupt */ + que->eitr_setting = newitr; + + /* Reset state */ + txr->bytes = 0; + txr->packets = 0; + rxr->bytes = 0; + rxr->packets = 0; + +no_calc: + if (more) + taskqueue_enqueue(que->tq, &que->que_task); + else + ixgbe_enable_queue(adapter, que->msix); + return; +} + + +static void +ixgbe_msix_link(void *arg) +{ + struct adapter *adapter = arg; + struct ixgbe_hw *hw = &adapter->hw; + u32 reg_eicr, mod_mask; + + ++adapter->link_irq; + + /* Pause other interrupts */ + IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_OTHER); + + /* First get the cause */ + reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS); + /* Be sure the queue bits are not cleared */ + reg_eicr &= ~IXGBE_EICR_RTX_QUEUE; + /* Clear interrupt with write */ + IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr); + + /* Link status change */ + if (reg_eicr & IXGBE_EICR_LSC) { + IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC); + taskqueue_enqueue(adapter->tq, &adapter->link_task); + } + + if (adapter->hw.mac.type != ixgbe_mac_82598EB) { +#ifdef IXGBE_FDIR + if (reg_eicr & IXGBE_EICR_FLOW_DIR) { + /* This is probably overkill :) */ + if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1)) + return; + /* Disable the interrupt */ + IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR); + taskqueue_enqueue(adapter->tq, &adapter->fdir_task); + } else +#endif + if (reg_eicr & IXGBE_EICR_ECC) { + device_printf(adapter->dev, "CRITICAL: ECC ERROR!! " + "Please Reboot!!\n"); + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); + } + + /* Check for over temp condition */ + if (reg_eicr & IXGBE_EICR_TS) { + device_printf(adapter->dev, "CRITICAL: OVER TEMP!! " + "PHY IS SHUT DOWN!!\n"); + device_printf(adapter->dev, "System shutdown required!\n"); + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS); + } +#ifdef PCI_IOV + if (reg_eicr & IXGBE_EICR_MAILBOX) + taskqueue_enqueue(adapter->tq, &adapter->mbx_task); +#endif + } + + /* Pluggable optics-related interrupt */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP) + mod_mask = IXGBE_EICR_GPI_SDP0_X540; + else + mod_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw); + + if (ixgbe_is_sfp(hw)) { + if (reg_eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw)) { + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); + taskqueue_enqueue(adapter->tq, &adapter->msf_task); + } else if (reg_eicr & mod_mask) { + IXGBE_WRITE_REG(hw, IXGBE_EICR, mod_mask); + taskqueue_enqueue(adapter->tq, &adapter->mod_task); + } + } + + /* Check for fan failure */ + if ((hw->device_id == IXGBE_DEV_ID_82598AT) && + (reg_eicr & IXGBE_EICR_GPI_SDP1)) { + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1); + device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! " + "REPLACE IMMEDIATELY!!\n"); + } + + /* External PHY interrupt */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T && + (reg_eicr & IXGBE_EICR_GPI_SDP0_X540)) { + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0_X540); + taskqueue_enqueue(adapter->tq, &adapter->phy_task); + } + + /* Re-enable other interrupts */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER); + return; +} + +/********************************************************************* + * + * Media Ioctl callback + * + * This routine is called whenever the user queries the status of + * the interface using ifconfig. + * + **********************************************************************/ +static void +ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) +{ + struct adapter *adapter = ifp->if_softc; + struct ixgbe_hw *hw = &adapter->hw; + int layer; + + INIT_DEBUGOUT("ixgbe_media_status: begin"); + IXGBE_CORE_LOCK(adapter); + ixgbe_update_link_status(adapter); + + ifmr->ifm_status = IFM_AVALID; + ifmr->ifm_active = IFM_ETHER; + + if (!adapter->link_active) { + IXGBE_CORE_UNLOCK(adapter); + return; + } + + ifmr->ifm_status |= IFM_ACTIVE; + layer = adapter->phy_layer; + + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T || + layer & IXGBE_PHYSICAL_LAYER_1000BASE_T || + layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_T | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_T | IFM_FDX; + break; + case IXGBE_LINK_SPEED_100_FULL: + ifmr->ifm_active |= IFM_100_TX | IFM_FDX; + break; + } + if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || + layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX; + break; + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_LR | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_LX | IFM_FDX; + break; + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_LX | IFM_FDX; + break; + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR || + layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_SX | IFM_FDX; + break; + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX; + break; + } + /* + ** XXX: These need to use the proper media types once + ** they're added. + */ +#ifndef IFM_ETH_XTYPE + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; + break; + case IXGBE_LINK_SPEED_2_5GB_FULL: + ifmr->ifm_active |= IFM_2500_SX | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_CX | IFM_FDX; + break; + } + else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4 + || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX; + break; + case IXGBE_LINK_SPEED_2_5GB_FULL: + ifmr->ifm_active |= IFM_2500_SX | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_CX | IFM_FDX; + break; + } +#else + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_KR | IFM_FDX; + break; + case IXGBE_LINK_SPEED_2_5GB_FULL: + ifmr->ifm_active |= IFM_2500_KX | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_KX | IFM_FDX; + break; + } + else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4 + || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_10G_KX4 | IFM_FDX; + break; + case IXGBE_LINK_SPEED_2_5GB_FULL: + ifmr->ifm_active |= IFM_2500_KX | IFM_FDX; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_KX | IFM_FDX; + break; + } +#endif + + /* If nothing is recognized... */ + if (IFM_SUBTYPE(ifmr->ifm_active) == 0) + ifmr->ifm_active |= IFM_UNKNOWN; + +#if __FreeBSD_version >= 900025 + /* Display current flow control setting used on link */ + if (hw->fc.current_mode == ixgbe_fc_rx_pause || + hw->fc.current_mode == ixgbe_fc_full) + ifmr->ifm_active |= IFM_ETH_RXPAUSE; + if (hw->fc.current_mode == ixgbe_fc_tx_pause || + hw->fc.current_mode == ixgbe_fc_full) + ifmr->ifm_active |= IFM_ETH_TXPAUSE; +#endif + + IXGBE_CORE_UNLOCK(adapter); + + return; +} + +/********************************************************************* + * + * Media Ioctl callback + * + * This routine is called when the user changes speed/duplex using + * media/mediopt option with ifconfig. + * + **********************************************************************/ +static int +ixgbe_media_change(struct ifnet * ifp) +{ + struct adapter *adapter = ifp->if_softc; + struct ifmedia *ifm = &adapter->media; + struct ixgbe_hw *hw = &adapter->hw; + ixgbe_link_speed speed = 0; + + INIT_DEBUGOUT("ixgbe_media_change: begin"); + + if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) + return (EINVAL); + + if (hw->phy.media_type == ixgbe_media_type_backplane) + return (ENODEV); + + /* + ** We don't actually need to check against the supported + ** media types of the adapter; ifmedia will take care of + ** that for us. + */ +#ifndef IFM_ETH_XTYPE + switch (IFM_SUBTYPE(ifm->ifm_media)) { + case IFM_AUTO: + case IFM_10G_T: + speed |= IXGBE_LINK_SPEED_100_FULL; + case IFM_10G_LRM: + case IFM_10G_SR: /* KR, too */ + case IFM_10G_LR: + case IFM_10G_CX4: /* KX4 */ + speed |= IXGBE_LINK_SPEED_1GB_FULL; + case IFM_10G_TWINAX: + speed |= IXGBE_LINK_SPEED_10GB_FULL; + break; + case IFM_1000_T: + speed |= IXGBE_LINK_SPEED_100_FULL; + case IFM_1000_LX: + case IFM_1000_SX: + case IFM_1000_CX: /* KX */ + speed |= IXGBE_LINK_SPEED_1GB_FULL; + break; + case IFM_100_TX: + speed |= IXGBE_LINK_SPEED_100_FULL; + break; + default: + goto invalid; + } +#else + switch (IFM_SUBTYPE(ifm->ifm_media)) { + case IFM_AUTO: + case IFM_10G_T: + speed |= IXGBE_LINK_SPEED_100_FULL; + case IFM_10G_LRM: + case IFM_10G_KR: + case IFM_10G_LR: + case IFM_10G_KX4: + speed |= IXGBE_LINK_SPEED_1GB_FULL; + case IFM_10G_TWINAX: + speed |= IXGBE_LINK_SPEED_10GB_FULL; + break; + case IFM_1000_T: + speed |= IXGBE_LINK_SPEED_100_FULL; + case IFM_1000_LX: + case IFM_1000_SX: + case IFM_1000_KX: + speed |= IXGBE_LINK_SPEED_1GB_FULL; + break; + case IFM_100_TX: + speed |= IXGBE_LINK_SPEED_100_FULL; + break; + default: + goto invalid; + } +#endif + + hw->mac.autotry_restart = TRUE; + hw->mac.ops.setup_link(hw, speed, TRUE); + if (IFM_SUBTYPE(ifm->ifm_media) == IFM_AUTO) { + adapter->advertise = 0; + } else { + if ((speed & IXGBE_LINK_SPEED_10GB_FULL) != 0) + adapter->advertise |= 1 << 2; + if ((speed & IXGBE_LINK_SPEED_1GB_FULL) != 0) + adapter->advertise |= 1 << 1; + if ((speed & IXGBE_LINK_SPEED_100_FULL) != 0) + adapter->advertise |= 1 << 0; + } + + return (0); + +invalid: + device_printf(adapter->dev, "Invalid media type!\n"); + return (EINVAL); +} + +static void +ixgbe_set_promisc(struct adapter *adapter) +{ + u_int32_t reg_rctl; + struct ifnet *ifp = adapter->ifp; + int mcnt = 0; + + reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); + reg_rctl &= (~IXGBE_FCTRL_UPE); + if (ifp->if_flags & IFF_ALLMULTI) + mcnt = MAX_NUM_MULTICAST_ADDRESSES; + else { + struct ifmultiaddr *ifma; +#if __FreeBSD_version < 800000 + IF_ADDR_LOCK(ifp); +#else + if_maddr_rlock(ifp); +#endif + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_LINK) + continue; + if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) + break; + mcnt++; + } +#if __FreeBSD_version < 800000 + IF_ADDR_UNLOCK(ifp); +#else + if_maddr_runlock(ifp); +#endif + } + if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) + reg_rctl &= (~IXGBE_FCTRL_MPE); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); + + if (ifp->if_flags & IFF_PROMISC) { + reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); + } else if (ifp->if_flags & IFF_ALLMULTI) { + reg_rctl |= IXGBE_FCTRL_MPE; + reg_rctl &= ~IXGBE_FCTRL_UPE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl); + } + return; +} + + +/********************************************************************* + * Multicast Update + * + * This routine is called whenever multicast address list is updated. + * + **********************************************************************/ +#define IXGBE_RAR_ENTRIES 16 + +static void +ixgbe_set_multi(struct adapter *adapter) +{ + u32 fctrl; + u8 *update_ptr; + struct ifmultiaddr *ifma; + struct ixgbe_mc_addr *mta; + int mcnt = 0; + struct ifnet *ifp = adapter->ifp; + + IOCTL_DEBUGOUT("ixgbe_set_multi: begin"); + + mta = adapter->mta; + bzero(mta, sizeof(*mta) * MAX_NUM_MULTICAST_ADDRESSES); + +#if __FreeBSD_version < 800000 + IF_ADDR_LOCK(ifp); +#else + if_maddr_rlock(ifp); +#endif + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_LINK) + continue; + if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) + break; + bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), + mta[mcnt].addr, IXGBE_ETH_LENGTH_OF_ADDRESS); + mta[mcnt].vmdq = adapter->pool; + mcnt++; + } +#if __FreeBSD_version < 800000 + IF_ADDR_UNLOCK(ifp); +#else + if_maddr_runlock(ifp); +#endif + + fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); + fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); + if (ifp->if_flags & IFF_PROMISC) + fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); + else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES || + ifp->if_flags & IFF_ALLMULTI) { + fctrl |= IXGBE_FCTRL_MPE; + fctrl &= ~IXGBE_FCTRL_UPE; + } else + fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); + + IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl); + + if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) { + update_ptr = (u8 *)mta; + ixgbe_update_mc_addr_list(&adapter->hw, + update_ptr, mcnt, ixgbe_mc_array_itr, TRUE); + } + + return; +} + +/* + * This is an iterator function now needed by the multicast + * shared code. It simply feeds the shared code routine the + * addresses in the array of ixgbe_set_multi() one by one. + */ +static u8 * +ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) +{ + struct ixgbe_mc_addr *mta; + + mta = (struct ixgbe_mc_addr *)*update_ptr; + *vmdq = mta->vmdq; + + *update_ptr = (u8*)(mta + 1); + return (mta->addr); +} + + +/********************************************************************* + * Timer routine + * + * This routine checks for link status,updates statistics, + * and runs the watchdog check. + * + **********************************************************************/ + +static void +ixgbe_local_timer(void *arg) +{ + struct adapter *adapter = arg; + device_t dev = adapter->dev; + struct ix_queue *que = adapter->queues; + u64 queues = 0; + int hung = 0; + + mtx_assert(&adapter->core_mtx, MA_OWNED); + + /* Check for pluggable optics */ + if (adapter->sfp_probe) + if (!ixgbe_sfp_probe(adapter)) + goto out; /* Nothing to do */ + + ixgbe_update_link_status(adapter); + ixgbe_update_stats_counters(adapter); + + /* + ** Check the TX queues status + ** - mark hung queues so we don't schedule on them + ** - watchdog only if all queues show hung + */ + for (int i = 0; i < adapter->num_queues; i++, que++) { + /* Keep track of queues with work for soft irq */ + if (que->txr->busy) + queues |= ((u64)1 << que->me); + /* + ** Each time txeof runs without cleaning, but there + ** are uncleaned descriptors it increments busy. If + ** we get to the MAX we declare it hung. + */ + if (que->busy == IXGBE_QUEUE_HUNG) { + ++hung; + /* Mark the queue as inactive */ + adapter->active_queues &= ~((u64)1 << que->me); + continue; + } else { + /* Check if we've come back from hung */ + if ((adapter->active_queues & ((u64)1 << que->me)) == 0) + adapter->active_queues |= ((u64)1 << que->me); + } + if (que->busy >= IXGBE_MAX_TX_BUSY) { + device_printf(dev,"Warning queue %d " + "appears to be hung!\n", i); + que->txr->busy = IXGBE_QUEUE_HUNG; + ++hung; + } + + } + + /* Only truly watchdog if all queues show hung */ + if (hung == adapter->num_queues) + goto watchdog; + else if (queues != 0) { /* Force an IRQ on queues with work */ + ixgbe_rearm_queues(adapter, queues); + } + +out: + callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); + return; + +watchdog: + device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); + adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + adapter->watchdog_events++; + ixgbe_init_locked(adapter); +} + + +/* +** Note: this routine updates the OS on the link state +** the real check of the hardware only happens with +** a link interrupt. +*/ +static void +ixgbe_update_link_status(struct adapter *adapter) +{ + struct ifnet *ifp = adapter->ifp; + device_t dev = adapter->dev; + + if (adapter->link_up){ + if (adapter->link_active == FALSE) { + if (bootverbose) + device_printf(dev,"Link is up %d Gbps %s \n", + ((adapter->link_speed == 128)? 10:1), + "Full Duplex"); + adapter->link_active = TRUE; + /* Update any Flow Control changes */ + ixgbe_fc_enable(&adapter->hw); + /* Update DMA coalescing config */ + ixgbe_config_dmac(adapter); + if_link_state_change(ifp, LINK_STATE_UP); +#ifdef PCI_IOV + ixgbe_ping_all_vfs(adapter); +#endif + } + } else { /* Link down */ + if (adapter->link_active == TRUE) { + if (bootverbose) + device_printf(dev,"Link is Down\n"); + if_link_state_change(ifp, LINK_STATE_DOWN); + adapter->link_active = FALSE; +#ifdef PCI_IOV + ixgbe_ping_all_vfs(adapter); +#endif + } + } + + return; +} + + +/********************************************************************* + * + * This routine disables all traffic on the adapter by issuing a + * global reset on the MAC and deallocates TX/RX buffers. + * + **********************************************************************/ + +static void +ixgbe_stop(void *arg) +{ + struct ifnet *ifp; + struct adapter *adapter = arg; + struct ixgbe_hw *hw = &adapter->hw; + ifp = adapter->ifp; + + mtx_assert(&adapter->core_mtx, MA_OWNED); + + INIT_DEBUGOUT("ixgbe_stop: begin\n"); + ixgbe_disable_intr(adapter); + callout_stop(&adapter->timer); + + /* Let the stack know...*/ + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + + ixgbe_reset_hw(hw); + hw->adapter_stopped = FALSE; + ixgbe_stop_adapter(hw); + if (hw->mac.type == ixgbe_mac_82599EB) + ixgbe_stop_mac_link_on_d3_82599(hw); + /* Turn off the laser - noop with no optics */ + ixgbe_disable_tx_laser(hw); + + /* Update the stack */ + adapter->link_up = FALSE; + ixgbe_update_link_status(adapter); + + /* reprogram the RAR[0] in case user changed it. */ + ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV); + + return; +} + + +/********************************************************************* + * + * Determine hardware revision. + * + **********************************************************************/ +static void +ixgbe_identify_hardware(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct ixgbe_hw *hw = &adapter->hw; + + /* Save off the information about this board */ + hw->vendor_id = pci_get_vendor(dev); + hw->device_id = pci_get_device(dev); + hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); + hw->subsystem_vendor_id = + pci_read_config(dev, PCIR_SUBVEND_0, 2); + hw->subsystem_device_id = + pci_read_config(dev, PCIR_SUBDEV_0, 2); + + /* + ** Make sure BUSMASTER is set + */ + pci_enable_busmaster(dev); + + /* We need this here to set the num_segs below */ + ixgbe_set_mac_type(hw); + + /* Pick up the 82599 settings */ + if (hw->mac.type != ixgbe_mac_82598EB) { + hw->phy.smart_speed = ixgbe_smart_speed; + adapter->num_segs = IXGBE_82599_SCATTER; + } else + adapter->num_segs = IXGBE_82598_SCATTER; + + return; +} + +/********************************************************************* + * + * Determine optic type + * + **********************************************************************/ +static void +ixgbe_setup_optics(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int layer; + + layer = adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); + + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) { + adapter->optics = IFM_10G_T; + return; + } + + if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) { + adapter->optics = IFM_1000_T; + return; + } + + if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) { + adapter->optics = IFM_1000_SX; + return; + } + + if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR | + IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) { + adapter->optics = IFM_10G_LR; + return; + } + + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) { + adapter->optics = IFM_10G_SR; + return; + } + + if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) { + adapter->optics = IFM_10G_TWINAX; + return; + } + + if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 | + IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) { + adapter->optics = IFM_10G_CX4; + return; + } + + /* If we get here just set the default */ + adapter->optics = IFM_ETHER | IFM_AUTO; + return; +} + +/********************************************************************* + * + * Setup the Legacy or MSI Interrupt handler + * + **********************************************************************/ +static int +ixgbe_allocate_legacy(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct ix_queue *que = adapter->queues; +#ifndef IXGBE_LEGACY_TX + struct tx_ring *txr = adapter->tx_rings; +#endif + int error, rid = 0; + + /* MSI RID at 1 */ + if (adapter->msix == 1) + rid = 1; + + /* We allocate a single interrupt resource */ + adapter->res = bus_alloc_resource_any(dev, + SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); + if (adapter->res == NULL) { + device_printf(dev, "Unable to allocate bus resource: " + "interrupt\n"); + return (ENXIO); + } + + /* + * Try allocating a fast interrupt and the associated deferred + * processing contexts. + */ +#ifndef IXGBE_LEGACY_TX + TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr); +#endif + TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que); + que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT, + taskqueue_thread_enqueue, &que->tq); + taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq", + device_get_nameunit(adapter->dev)); + + /* Tasklets for Link, SFP and Multispeed Fiber */ + TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter); + TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter); + TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter); + TASK_INIT(&adapter->phy_task, 0, ixgbe_handle_phy, adapter); +#ifdef IXGBE_FDIR + TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter); +#endif + adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT, + taskqueue_thread_enqueue, &adapter->tq); + taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq", + device_get_nameunit(adapter->dev)); + + if ((error = bus_setup_intr(dev, adapter->res, + INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq, + que, &adapter->tag)) != 0) { + device_printf(dev, "Failed to register fast interrupt " + "handler: %d\n", error); + taskqueue_free(que->tq); + taskqueue_free(adapter->tq); + que->tq = NULL; + adapter->tq = NULL; + return (error); + } + /* For simplicity in the handlers */ + adapter->active_queues = IXGBE_EIMS_ENABLE_MASK; + + return (0); +} + + +/********************************************************************* + * + * Setup MSIX Interrupt resources and handlers + * + **********************************************************************/ +static int +ixgbe_allocate_msix(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct ix_queue *que = adapter->queues; + struct tx_ring *txr = adapter->tx_rings; + int error, rid, vector = 0; + int cpu_id = 0; +#ifdef RSS + cpuset_t cpu_mask; +#endif + +#ifdef RSS + /* + * If we're doing RSS, the number of queues needs to + * match the number of RSS buckets that are configured. + * + * + If there's more queues than RSS buckets, we'll end + * up with queues that get no traffic. + * + * + If there's more RSS buckets than queues, we'll end + * up having multiple RSS buckets map to the same queue, + * so there'll be some contention. + */ + if (adapter->num_queues != rss_getnumbuckets()) { + device_printf(dev, + "%s: number of queues (%d) != number of RSS buckets (%d)" + "; performance will be impacted.\n", + __func__, + adapter->num_queues, + rss_getnumbuckets()); + } +#endif + + for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) { + rid = vector + 1; + que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, + RF_SHAREABLE | RF_ACTIVE); + if (que->res == NULL) { + device_printf(dev,"Unable to allocate" + " bus resource: que interrupt [%d]\n", vector); + return (ENXIO); + } + /* Set the handler function */ + error = bus_setup_intr(dev, que->res, + INTR_TYPE_NET | INTR_MPSAFE, NULL, + ixgbe_msix_que, que, &que->tag); + if (error) { + que->res = NULL; + device_printf(dev, "Failed to register QUE handler"); + return (error); + } +#if __FreeBSD_version >= 800504 + bus_describe_intr(dev, que->res, que->tag, "q%d", i); +#endif + que->msix = vector; + adapter->active_queues |= (u64)(1 << que->msix); +#ifdef RSS + /* + * The queue ID is used as the RSS layer bucket ID. + * We look up the queue ID -> RSS CPU ID and select + * that. + */ + cpu_id = rss_getcpu(i % rss_getnumbuckets()); +#else + /* + * Bind the msix vector, and thus the + * rings to the corresponding cpu. + * + * This just happens to match the default RSS round-robin + * bucket -> queue -> CPU allocation. + */ + if (adapter->num_queues > 1) + cpu_id = i; +#endif + if (adapter->num_queues > 1) + bus_bind_intr(dev, que->res, cpu_id); +#ifdef IXGBE_DEBUG +#ifdef RSS + device_printf(dev, + "Bound RSS bucket %d to CPU %d\n", + i, cpu_id); +#else + device_printf(dev, + "Bound queue %d to cpu %d\n", + i, cpu_id); +#endif +#endif /* IXGBE_DEBUG */ + + +#ifndef IXGBE_LEGACY_TX + TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr); +#endif + TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que); + que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT, + taskqueue_thread_enqueue, &que->tq); +#ifdef RSS + CPU_SETOF(cpu_id, &cpu_mask); + taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, + &cpu_mask, + "%s (bucket %d)", + device_get_nameunit(adapter->dev), + cpu_id); +#else + taskqueue_start_threads(&que->tq, 1, PI_NET, "%s:q%d", + device_get_nameunit(adapter->dev), i); +#endif + } + + /* and Link */ + rid = vector + 1; + adapter->res = bus_alloc_resource_any(dev, + SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); + if (!adapter->res) { + device_printf(dev,"Unable to allocate" + " bus resource: Link interrupt [%d]\n", rid); + return (ENXIO); + } + /* Set the link handler function */ + error = bus_setup_intr(dev, adapter->res, + INTR_TYPE_NET | INTR_MPSAFE, NULL, + ixgbe_msix_link, adapter, &adapter->tag); + if (error) { + adapter->res = NULL; + device_printf(dev, "Failed to register LINK handler"); + return (error); + } +#if __FreeBSD_version >= 800504 + bus_describe_intr(dev, adapter->res, adapter->tag, "link"); +#endif + adapter->vector = vector; + /* Tasklets for Link, SFP and Multispeed Fiber */ + TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter); + TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter); + TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter); +#ifdef PCI_IOV + TASK_INIT(&adapter->mbx_task, 0, ixgbe_handle_mbx, adapter); +#endif + TASK_INIT(&adapter->phy_task, 0, ixgbe_handle_phy, adapter); +#ifdef IXGBE_FDIR + TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter); +#endif + adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT, + taskqueue_thread_enqueue, &adapter->tq); + taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq", + device_get_nameunit(adapter->dev)); + + return (0); +} + +/* + * Setup Either MSI/X or MSI + */ +static int +ixgbe_setup_msix(struct adapter *adapter) +{ + device_t dev = adapter->dev; + int rid, want, queues, msgs; + + /* Override by tuneable */ + if (ixgbe_enable_msix == 0) + goto msi; + + /* First try MSI/X */ + msgs = pci_msix_count(dev); + if (msgs == 0) + goto msi; + rid = PCIR_BAR(MSIX_82598_BAR); + adapter->msix_mem = bus_alloc_resource_any(dev, + SYS_RES_MEMORY, &rid, RF_ACTIVE); + if (adapter->msix_mem == NULL) { + rid += 4; /* 82599 maps in higher BAR */ + adapter->msix_mem = bus_alloc_resource_any(dev, + SYS_RES_MEMORY, &rid, RF_ACTIVE); + } + if (adapter->msix_mem == NULL) { + /* May not be enabled */ + device_printf(adapter->dev, + "Unable to map MSIX table \n"); + goto msi; + } + + /* Figure out a reasonable auto config value */ + queues = (mp_ncpus > (msgs - 1)) ? (msgs - 1) : mp_ncpus; + +#ifdef RSS + /* If we're doing RSS, clamp at the number of RSS buckets */ + if (queues > rss_getnumbuckets()) + queues = rss_getnumbuckets(); +#endif + + if (ixgbe_num_queues != 0) + queues = ixgbe_num_queues; + /* Set max queues to 8 when autoconfiguring */ + else if ((ixgbe_num_queues == 0) && (queues > 8)) + queues = 8; + + /* reflect correct sysctl value */ + ixgbe_num_queues = queues; + + /* + ** Want one vector (RX/TX pair) per queue + ** plus an additional for Link. + */ + want = queues + 1; + if (msgs >= want) + msgs = want; + else { + device_printf(adapter->dev, + "MSIX Configuration Problem, " + "%d vectors but %d queues wanted!\n", + msgs, want); + goto msi; + } + if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { + device_printf(adapter->dev, + "Using MSIX interrupts with %d vectors\n", msgs); + adapter->num_queues = queues; + return (msgs); + } + /* + ** If MSIX alloc failed or provided us with + ** less than needed, free and fall through to MSI + */ + pci_release_msi(dev); + +msi: + if (adapter->msix_mem != NULL) { + bus_release_resource(dev, SYS_RES_MEMORY, + rid, adapter->msix_mem); + adapter->msix_mem = NULL; + } + msgs = 1; + if (pci_alloc_msi(dev, &msgs) == 0) { + device_printf(adapter->dev, "Using an MSI interrupt\n"); + return (msgs); + } + device_printf(adapter->dev, "Using a Legacy interrupt\n"); + return (0); +} + + +static int +ixgbe_allocate_pci_resources(struct adapter *adapter) +{ + int rid; + device_t dev = adapter->dev; + + rid = PCIR_BAR(0); + adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &rid, RF_ACTIVE); + + if (!(adapter->pci_mem)) { + device_printf(dev, "Unable to allocate bus resource: memory\n"); + return (ENXIO); + } + + /* Save bus_space values for READ/WRITE_REG macros */ + adapter->osdep.mem_bus_space_tag = + rman_get_bustag(adapter->pci_mem); + adapter->osdep.mem_bus_space_handle = + rman_get_bushandle(adapter->pci_mem); + /* Set hw values for shared code */ + adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle; + adapter->hw.back = adapter; + + /* Default to 1 queue if MSI-X setup fails */ + adapter->num_queues = 1; + + /* + ** Now setup MSI or MSI-X, should + ** return us the number of supported + ** vectors. (Will be 1 for MSI) + */ + adapter->msix = ixgbe_setup_msix(adapter); + return (0); +} + +static void +ixgbe_free_pci_resources(struct adapter * adapter) +{ + struct ix_queue *que = adapter->queues; + device_t dev = adapter->dev; + int rid, memrid; + + if (adapter->hw.mac.type == ixgbe_mac_82598EB) + memrid = PCIR_BAR(MSIX_82598_BAR); + else + memrid = PCIR_BAR(MSIX_82599_BAR); + + /* + ** There is a slight possibility of a failure mode + ** in attach that will result in entering this function + ** before interrupt resources have been initialized, and + ** in that case we do not want to execute the loops below + ** We can detect this reliably by the state of the adapter + ** res pointer. + */ + if (adapter->res == NULL) + goto mem; + + /* + ** Release all msix queue resources: + */ + for (int i = 0; i < adapter->num_queues; i++, que++) { + rid = que->msix + 1; + if (que->tag != NULL) { + bus_teardown_intr(dev, que->res, que->tag); + que->tag = NULL; + } + if (que->res != NULL) + bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); + } + + + /* Clean the Legacy or Link interrupt last */ + if (adapter->vector) /* we are doing MSIX */ + rid = adapter->vector + 1; + else + (adapter->msix != 0) ? (rid = 1):(rid = 0); + + if (adapter->tag != NULL) { + bus_teardown_intr(dev, adapter->res, adapter->tag); + adapter->tag = NULL; + } + if (adapter->res != NULL) + bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); + +mem: + if (adapter->msix) + pci_release_msi(dev); + + if (adapter->msix_mem != NULL) + bus_release_resource(dev, SYS_RES_MEMORY, + memrid, adapter->msix_mem); + + if (adapter->pci_mem != NULL) + bus_release_resource(dev, SYS_RES_MEMORY, + PCIR_BAR(0), adapter->pci_mem); + + return; +} + +/********************************************************************* + * + * Setup networking device structure and register an interface. + * + **********************************************************************/ +static int +ixgbe_setup_interface(device_t dev, struct adapter *adapter) +{ + struct ifnet *ifp; + + INIT_DEBUGOUT("ixgbe_setup_interface: begin"); + + ifp = adapter->ifp = if_alloc(IFT_ETHER); + if (ifp == NULL) { + device_printf(dev, "can not allocate ifnet structure\n"); + return (-1); + } + if_initname(ifp, device_get_name(dev), device_get_unit(dev)); + ifp->if_baudrate = IF_Gbps(10); + ifp->if_init = ixgbe_init; + ifp->if_softc = adapter; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_ioctl = ixgbe_ioctl; +#if __FreeBSD_version >= 1100036 + if_setgetcounterfn(ifp, ixgbe_get_counter); +#endif +#if __FreeBSD_version >= 1100045 + /* TSO parameters */ + ifp->if_hw_tsomax = 65518; + ifp->if_hw_tsomaxsegcount = IXGBE_82599_SCATTER; + ifp->if_hw_tsomaxsegsize = 2048; +#endif +#ifndef IXGBE_LEGACY_TX + ifp->if_transmit = ixgbe_mq_start; + ifp->if_qflush = ixgbe_qflush; +#else + ifp->if_start = ixgbe_start; + IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2); + ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2; + IFQ_SET_READY(&ifp->if_snd); +#endif + + ether_ifattach(ifp, adapter->hw.mac.addr); + + adapter->max_frame_size = + ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; + + /* + * Tell the upper layer(s) we support long frames. + */ + ifp->if_hdrlen = sizeof(struct ether_vlan_header); + + /* Set capability flags */ + ifp->if_capabilities |= IFCAP_RXCSUM + | IFCAP_TXCSUM + | IFCAP_RXCSUM_IPV6 + | IFCAP_TXCSUM_IPV6 + | IFCAP_TSO4 + | IFCAP_TSO6 + | IFCAP_LRO + | IFCAP_VLAN_HWTAGGING + | IFCAP_VLAN_HWTSO + | IFCAP_VLAN_HWCSUM + | IFCAP_JUMBO_MTU + | IFCAP_VLAN_MTU + | IFCAP_HWSTATS; + + /* Enable the above capabilities by default */ + ifp->if_capenable = ifp->if_capabilities; + + /* + ** Don't turn this on by default, if vlans are + ** created on another pseudo device (eg. lagg) + ** then vlan events are not passed thru, breaking + ** operation, but with HW FILTER off it works. If + ** using vlans directly on the ixgbe driver you can + ** enable this and get full hardware tag filtering. + */ + ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; + + /* + * Specify the media types supported by this adapter and register + * callbacks to update media and link information + */ + ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change, + ixgbe_media_status); + + adapter->phy_layer = ixgbe_get_supported_physical_layer(&adapter->hw); + ixgbe_add_media_types(adapter); + + /* Set autoselect media by default */ + ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); + + return (0); +} + +static void +ixgbe_add_media_types(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + device_t dev = adapter->dev; + int layer; + + layer = adapter->phy_layer; + + /* Media types with matching FreeBSD media defines */ + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) + ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_T, 0, NULL); + if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) + ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); + if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) + ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); + + if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || + layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) + ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); + + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) { + ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_LR, 0, NULL); + if (hw->phy.multispeed_fiber) + ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_LX, 0, NULL); + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) { + ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); + if (hw->phy.multispeed_fiber) + ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); + } else if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) + ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) + ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); + +#ifdef IFM_ETH_XTYPE + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) + ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_KR, 0, NULL); + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) + ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_KX4, 0, NULL); + if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) + ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_KX, 0, NULL); +#else + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) { + device_printf(dev, "Media supported: 10GbaseKR\n"); + device_printf(dev, "10GbaseKR mapped to 10GbaseSR\n"); + ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); + } + if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) { + device_printf(dev, "Media supported: 10GbaseKX4\n"); + device_printf(dev, "10GbaseKX4 mapped to 10GbaseCX4\n"); + ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); + } + if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) { + device_printf(dev, "Media supported: 1000baseKX\n"); + device_printf(dev, "1000baseKX mapped to 1000baseCX\n"); + ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_CX, 0, NULL); + } +#endif + if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) + device_printf(dev, "Media supported: 1000baseBX\n"); + + if (hw->device_id == IXGBE_DEV_ID_82598AT) { + ifmedia_add(&adapter->media, + IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); + ifmedia_add(&adapter->media, + IFM_ETHER | IFM_1000_T, 0, NULL); + } + + ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); +} + +static void +ixgbe_config_link(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 autoneg, err = 0; + bool sfp, negotiate; + + sfp = ixgbe_is_sfp(hw); + + if (sfp) { + taskqueue_enqueue(adapter->tq, &adapter->mod_task); + } else { + if (hw->mac.ops.check_link) + err = ixgbe_check_link(hw, &adapter->link_speed, + &adapter->link_up, FALSE); + if (err) + goto out; + autoneg = hw->phy.autoneg_advertised; + if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) + err = hw->mac.ops.get_link_capabilities(hw, + &autoneg, &negotiate); + if (err) + goto out; + if (hw->mac.ops.setup_link) + err = hw->mac.ops.setup_link(hw, + autoneg, adapter->link_up); + } +out: + return; +} + + +/********************************************************************* + * + * Enable transmit units. + * + **********************************************************************/ +static void +ixgbe_initialize_transmit_units(struct adapter *adapter) +{ + struct tx_ring *txr = adapter->tx_rings; + struct ixgbe_hw *hw = &adapter->hw; + + /* Setup the Base and Length of the Tx Descriptor Ring */ + for (int i = 0; i < adapter->num_queues; i++, txr++) { + u64 tdba = txr->txdma.dma_paddr; + u32 txctrl = 0; + int j = txr->me; + + IXGBE_WRITE_REG(hw, IXGBE_TDBAL(j), + (tdba & 0x00000000ffffffffULL)); + IXGBE_WRITE_REG(hw, IXGBE_TDBAH(j), (tdba >> 32)); + IXGBE_WRITE_REG(hw, IXGBE_TDLEN(j), + adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc)); + + /* Setup the HW Tx Head and Tail descriptor pointers */ + IXGBE_WRITE_REG(hw, IXGBE_TDH(j), 0); + IXGBE_WRITE_REG(hw, IXGBE_TDT(j), 0); + + /* Cache the tail address */ + txr->tail = IXGBE_TDT(j); + + /* Disable Head Writeback */ + /* + * Note: for X550 series devices, these registers are actually + * prefixed with TPH_ isntead of DCA_, but the addresses and + * fields remain the same. + */ + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(j)); + break; + default: + txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(j)); + break; + } + txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(j), txctrl); + break; + default: + IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(j), txctrl); + break; + } + + } + + if (hw->mac.type != ixgbe_mac_82598EB) { + u32 dmatxctl, rttdcs; +#ifdef PCI_IOV + enum ixgbe_iov_mode mode = ixgbe_get_iov_mode(adapter); +#endif + dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL); + dmatxctl |= IXGBE_DMATXCTL_TE; + IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl); + /* Disable arbiter to set MTQC */ + rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS); + rttdcs |= IXGBE_RTTDCS_ARBDIS; + IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); +#ifdef PCI_IOV + IXGBE_WRITE_REG(hw, IXGBE_MTQC, ixgbe_get_mtqc(mode)); +#else + IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB); +#endif + rttdcs &= ~IXGBE_RTTDCS_ARBDIS; + IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); + } + + return; +} + +static void +ixgbe_initialize_rss_mapping(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 reta = 0, mrqc, rss_key[10]; + int queue_id, table_size, index_mult; +#ifdef RSS + u32 rss_hash_config; +#endif +#ifdef PCI_IOV + enum ixgbe_iov_mode mode; +#endif + +#ifdef RSS + /* Fetch the configured RSS key */ + rss_getkey((uint8_t *) &rss_key); +#else + /* set up random bits */ + arc4rand(&rss_key, sizeof(rss_key), 0); +#endif + + /* Set multiplier for RETA setup and table size based on MAC */ + index_mult = 0x1; + table_size = 128; + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + index_mult = 0x11; + break; + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + table_size = 512; + break; + default: + break; + } + + /* Set up the redirection table */ + for (int i = 0, j = 0; i < table_size; i++, j++) { + if (j == adapter->num_queues) j = 0; +#ifdef RSS + /* + * Fetch the RSS bucket id for the given indirection entry. + * Cap it at the number of configured buckets (which is + * num_queues.) + */ + queue_id = rss_get_indirection_to_bucket(i); + queue_id = queue_id % adapter->num_queues; +#else + queue_id = (j * index_mult); +#endif + /* + * The low 8 bits are for hash value (n+0); + * The next 8 bits are for hash value (n+1), etc. + */ + reta = reta >> 8; + reta = reta | ( ((uint32_t) queue_id) << 24); + if ((i & 3) == 3) { + if (i < 128) + IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); + else + IXGBE_WRITE_REG(hw, IXGBE_ERETA((i >> 2) - 32), reta); + reta = 0; + } + } + + /* Now fill our hash function seeds */ + for (int i = 0; i < 10; i++) + IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]); + + /* Perform hash on these packet types */ +#ifdef RSS + mrqc = IXGBE_MRQC_RSSEN; + rss_hash_config = rss_gethashconfig(); + if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4; + if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP; + if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6; + if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP; + if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX; + if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP; + if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP; + if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4_EX) + device_printf(adapter->dev, + "%s: RSS_HASHTYPE_RSS_UDP_IPV4_EX defined, " + "but not supported\n", __func__); + if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP; + if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; +#else + /* + * Disable UDP - IP fragments aren't currently being handled + * and so we end up with a mix of 2-tuple and 4-tuple + * traffic. + */ + mrqc = IXGBE_MRQC_RSSEN + | IXGBE_MRQC_RSS_FIELD_IPV4 + | IXGBE_MRQC_RSS_FIELD_IPV4_TCP + | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP + | IXGBE_MRQC_RSS_FIELD_IPV6_EX + | IXGBE_MRQC_RSS_FIELD_IPV6 + | IXGBE_MRQC_RSS_FIELD_IPV6_TCP + ; +#endif /* RSS */ +#ifdef PCI_IOV + mode = ixgbe_get_iov_mode(adapter); + mrqc |= ixgbe_get_mrqc(mode); +#endif + IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); +} + + +/********************************************************************* + * + * Setup receive registers and features. + * + **********************************************************************/ +#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 + +#define BSIZEPKT_ROUNDUP ((1<rx_rings; + struct ixgbe_hw *hw = &adapter->hw; + struct ifnet *ifp = adapter->ifp; + u32 bufsz, fctrl, srrctl, rxcsum; + u32 hlreg; + + /* + * Make sure receives are disabled while + * setting up the descriptor ring + */ + ixgbe_disable_rx(hw); + + /* Enable broadcasts */ + fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); + fctrl |= IXGBE_FCTRL_BAM; + if (adapter->hw.mac.type == ixgbe_mac_82598EB) { + fctrl |= IXGBE_FCTRL_DPF; + fctrl |= IXGBE_FCTRL_PMCF; + } + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); + + /* Set for Jumbo Frames? */ + hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); + if (ifp->if_mtu > ETHERMTU) + hlreg |= IXGBE_HLREG0_JUMBOEN; + else + hlreg &= ~IXGBE_HLREG0_JUMBOEN; +#ifdef DEV_NETMAP + /* crcstrip is conditional in netmap (in RDRXCTL too ?) */ + if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) + hlreg &= ~IXGBE_HLREG0_RXCRCSTRP; + else + hlreg |= IXGBE_HLREG0_RXCRCSTRP; +#endif /* DEV_NETMAP */ + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); + + bufsz = (adapter->rx_mbuf_sz + + BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + + for (int i = 0; i < adapter->num_queues; i++, rxr++) { + u64 rdba = rxr->rxdma.dma_paddr; + int j = rxr->me; + + /* Setup the Base and Length of the Rx Descriptor Ring */ + IXGBE_WRITE_REG(hw, IXGBE_RDBAL(j), + (rdba & 0x00000000ffffffffULL)); + IXGBE_WRITE_REG(hw, IXGBE_RDBAH(j), (rdba >> 32)); + IXGBE_WRITE_REG(hw, IXGBE_RDLEN(j), + adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc)); + + /* Set up the SRRCTL register */ + srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(j)); + srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; + srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; + srrctl |= bufsz; + srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; + + /* + * Set DROP_EN iff we have no flow control and >1 queue. + * Note that srrctl was cleared shortly before during reset, + * so we do not need to clear the bit, but do it just in case + * this code is moved elsewhere. + */ + if (adapter->num_queues > 1 && + adapter->hw.fc.requested_mode == ixgbe_fc_none) { + srrctl |= IXGBE_SRRCTL_DROP_EN; + } else { + srrctl &= ~IXGBE_SRRCTL_DROP_EN; + } + + IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(j), srrctl); + + /* Setup the HW Rx Head and Tail Descriptor Pointers */ + IXGBE_WRITE_REG(hw, IXGBE_RDH(j), 0); + IXGBE_WRITE_REG(hw, IXGBE_RDT(j), 0); + + /* Set the driver rx tail address */ + rxr->tail = IXGBE_RDT(rxr->me); + } + + if (adapter->hw.mac.type != ixgbe_mac_82598EB) { + u32 psrtype = IXGBE_PSRTYPE_TCPHDR | + IXGBE_PSRTYPE_UDPHDR | + IXGBE_PSRTYPE_IPV4HDR | + IXGBE_PSRTYPE_IPV6HDR; + IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype); + } + + rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); + + ixgbe_initialize_rss_mapping(adapter); + + if (adapter->num_queues > 1) { + /* RSS and RX IPP Checksum are mutually exclusive */ + rxcsum |= IXGBE_RXCSUM_PCSD; + } + + if (ifp->if_capenable & IFCAP_RXCSUM) + rxcsum |= IXGBE_RXCSUM_PCSD; + + /* This is useful for calculating UDP/IP fragment checksums */ + if (!(rxcsum & IXGBE_RXCSUM_PCSD)) + rxcsum |= IXGBE_RXCSUM_IPPCSE; + + IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); + + return; +} + + +/* +** This routine is run via an vlan config EVENT, +** it enables us to use the HW Filter table since +** we can get the vlan id. This just creates the +** entry in the soft version of the VFTA, init will +** repopulate the real table. +*/ +static void +ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) +{ + struct adapter *adapter = ifp->if_softc; + u16 index, bit; + + if (ifp->if_softc != arg) /* Not our event */ + return; + + if ((vtag == 0) || (vtag > 4095)) /* Invalid */ + return; + + IXGBE_CORE_LOCK(adapter); + index = (vtag >> 5) & 0x7F; + bit = vtag & 0x1F; + adapter->shadow_vfta[index] |= (1 << bit); + ++adapter->num_vlans; + ixgbe_setup_vlan_hw_support(adapter); + IXGBE_CORE_UNLOCK(adapter); +} + +/* +** This routine is run via an vlan +** unconfig EVENT, remove our entry +** in the soft vfta. +*/ +static void +ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) +{ + struct adapter *adapter = ifp->if_softc; + u16 index, bit; + + if (ifp->if_softc != arg) + return; + + if ((vtag == 0) || (vtag > 4095)) /* Invalid */ + return; + + IXGBE_CORE_LOCK(adapter); + index = (vtag >> 5) & 0x7F; + bit = vtag & 0x1F; + adapter->shadow_vfta[index] &= ~(1 << bit); + --adapter->num_vlans; + /* Re-init to load the changes */ + ixgbe_setup_vlan_hw_support(adapter); + IXGBE_CORE_UNLOCK(adapter); +} + +static void +ixgbe_setup_vlan_hw_support(struct adapter *adapter) +{ + struct ifnet *ifp = adapter->ifp; + struct ixgbe_hw *hw = &adapter->hw; + struct rx_ring *rxr; + u32 ctrl; + + + /* + ** We get here thru init_locked, meaning + ** a soft reset, this has already cleared + ** the VFTA and other state, so if there + ** have been no vlan's registered do nothing. + */ + if (adapter->num_vlans == 0) + return; + + /* Setup the queues for vlans */ + for (int i = 0; i < adapter->num_queues; i++) { + rxr = &adapter->rx_rings[i]; + /* On 82599 the VLAN enable is per/queue in RXDCTL */ + if (hw->mac.type != ixgbe_mac_82598EB) { + ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); + ctrl |= IXGBE_RXDCTL_VME; + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), ctrl); + } + rxr->vtag_strip = TRUE; + } + + if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) + return; + /* + ** A soft reset zero's out the VFTA, so + ** we need to repopulate it now. + */ + for (int i = 0; i < IXGBE_VFTA_SIZE; i++) + if (adapter->shadow_vfta[i] != 0) + IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), + adapter->shadow_vfta[i]); + + ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + /* Enable the Filter Table if enabled */ + if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { + ctrl &= ~IXGBE_VLNCTRL_CFIEN; + ctrl |= IXGBE_VLNCTRL_VFE; + } + if (hw->mac.type == ixgbe_mac_82598EB) + ctrl |= IXGBE_VLNCTRL_VME; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl); +} + +static void +ixgbe_enable_intr(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct ix_queue *que = adapter->queues; + u32 mask, fwsm; + + mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); + /* Enable Fan Failure detection */ + if (hw->device_id == IXGBE_DEV_ID_82598AT) + mask |= IXGBE_EIMS_GPI_SDP1; + + switch (adapter->hw.mac.type) { + case ixgbe_mac_82599EB: + mask |= IXGBE_EIMS_ECC; + /* Temperature sensor on some adapters */ + mask |= IXGBE_EIMS_GPI_SDP0; + /* SFP+ (RX_LOS_N & MOD_ABS_N) */ + mask |= IXGBE_EIMS_GPI_SDP1; + mask |= IXGBE_EIMS_GPI_SDP2; +#ifdef IXGBE_FDIR + mask |= IXGBE_EIMS_FLOW_DIR; +#endif +#ifdef PCI_IOV + mask |= IXGBE_EIMS_MAILBOX; +#endif + break; + case ixgbe_mac_X540: + /* Detect if Thermal Sensor is enabled */ + fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM); + if (fwsm & IXGBE_FWSM_TS_ENABLED) + mask |= IXGBE_EIMS_TS; + mask |= IXGBE_EIMS_ECC; +#ifdef IXGBE_FDIR + mask |= IXGBE_EIMS_FLOW_DIR; +#endif + break; + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + /* MAC thermal sensor is automatically enabled */ + mask |= IXGBE_EIMS_TS; + /* Some devices use SDP0 for important information */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP || + hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) + mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw); + mask |= IXGBE_EIMS_ECC; +#ifdef IXGBE_FDIR + mask |= IXGBE_EIMS_FLOW_DIR; +#endif +#ifdef PCI_IOV + mask |= IXGBE_EIMS_MAILBOX; +#endif + /* falls through */ + default: + break; + } + + IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); + + /* With MSI-X we use auto clear */ + if (adapter->msix_mem) { + mask = IXGBE_EIMS_ENABLE_MASK; + /* Don't autoclear Link */ + mask &= ~IXGBE_EIMS_OTHER; + mask &= ~IXGBE_EIMS_LSC; +#ifdef PCI_IOV + mask &= ~IXGBE_EIMS_MAILBOX; +#endif + IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask); + } + + /* + ** Now enable all queues, this is done separately to + ** allow for handling the extended (beyond 32) MSIX + ** vectors that can be used by 82599 + */ + for (int i = 0; i < adapter->num_queues; i++, que++) + ixgbe_enable_queue(adapter, que->msix); + + IXGBE_WRITE_FLUSH(hw); + + return; +} + +static void +ixgbe_disable_intr(struct adapter *adapter) +{ + if (adapter->msix_mem) + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0); + if (adapter->hw.mac.type == ixgbe_mac_82598EB) { + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0); + } else { + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0); + } + IXGBE_WRITE_FLUSH(&adapter->hw); + return; +} + +/* +** Get the width and transaction speed of +** the slot this adapter is plugged into. +*/ +static void +ixgbe_get_slot_info(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_mac_info *mac = &hw->mac; + u16 link; + u32 offset; + + /* For most devices simply call the shared code routine */ + if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) { + ixgbe_get_bus_info(hw); + /* These devices don't use PCI-E */ + switch (hw->mac.type) { + case ixgbe_mac_X550EM_x: + return; + default: + goto display; + } + } + + /* + ** For the Quad port adapter we need to parse back + ** up the PCI tree to find the speed of the expansion + ** slot into which this adapter is plugged. A bit more work. + */ + dev = device_get_parent(device_get_parent(dev)); +#ifdef IXGBE_DEBUG + device_printf(dev, "parent pcib = %x,%x,%x\n", + pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); +#endif + dev = device_get_parent(device_get_parent(dev)); +#ifdef IXGBE_DEBUG + device_printf(dev, "slot pcib = %x,%x,%x\n", + pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); +#endif + /* Now get the PCI Express Capabilities offset */ + pci_find_cap(dev, PCIY_EXPRESS, &offset); + /* ...and read the Link Status Register */ + link = pci_read_config(dev, offset + PCIER_LINK_STA, 2); + switch (link & IXGBE_PCI_LINK_WIDTH) { + case IXGBE_PCI_LINK_WIDTH_1: + hw->bus.width = ixgbe_bus_width_pcie_x1; + break; + case IXGBE_PCI_LINK_WIDTH_2: + hw->bus.width = ixgbe_bus_width_pcie_x2; + break; + case IXGBE_PCI_LINK_WIDTH_4: + hw->bus.width = ixgbe_bus_width_pcie_x4; + break; + case IXGBE_PCI_LINK_WIDTH_8: + hw->bus.width = ixgbe_bus_width_pcie_x8; + break; + default: + hw->bus.width = ixgbe_bus_width_unknown; + break; + } + + switch (link & IXGBE_PCI_LINK_SPEED) { + case IXGBE_PCI_LINK_SPEED_2500: + hw->bus.speed = ixgbe_bus_speed_2500; + break; + case IXGBE_PCI_LINK_SPEED_5000: + hw->bus.speed = ixgbe_bus_speed_5000; + break; + case IXGBE_PCI_LINK_SPEED_8000: + hw->bus.speed = ixgbe_bus_speed_8000; + break; + default: + hw->bus.speed = ixgbe_bus_speed_unknown; + break; + } + + mac->ops.set_lan_id(hw); + +display: + device_printf(dev,"PCI Express Bus: Speed %s %s\n", + ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s": + (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s": + (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"), + (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" : + (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" : + (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" : + ("Unknown")); + + if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) && + ((hw->bus.width <= ixgbe_bus_width_pcie_x4) && + (hw->bus.speed == ixgbe_bus_speed_2500))) { + device_printf(dev, "PCI-Express bandwidth available" + " for this card\n is not sufficient for" + " optimal performance.\n"); + device_printf(dev, "For optimal performance a x8 " + "PCIE, or x4 PCIE Gen2 slot is required.\n"); + } + if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) && + ((hw->bus.width <= ixgbe_bus_width_pcie_x8) && + (hw->bus.speed < ixgbe_bus_speed_8000))) { + device_printf(dev, "PCI-Express bandwidth available" + " for this card\n is not sufficient for" + " optimal performance.\n"); + device_printf(dev, "For optimal performance a x8 " + "PCIE Gen3 slot is required.\n"); + } + + return; +} + + +/* +** Setup the correct IVAR register for a particular MSIX interrupt +** (yes this is all very magic and confusing :) +** - entry is the register array entry +** - vector is the MSIX vector for this queue +** - type is RX/TX/MISC +*/ +static void +ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 ivar, index; + + vector |= IXGBE_IVAR_ALLOC_VAL; + + switch (hw->mac.type) { + + case ixgbe_mac_82598EB: + if (type == -1) + entry = IXGBE_IVAR_OTHER_CAUSES_INDEX; + else + entry += (type * 64); + index = (entry >> 2) & 0x1F; + ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index)); + ivar &= ~(0xFF << (8 * (entry & 0x3))); + ivar |= (vector << (8 * (entry & 0x3))); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar); + break; + + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + if (type == -1) { /* MISC IVAR */ + index = (entry & 1) * 8; + ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC); + ivar &= ~(0xFF << index); + ivar |= (vector << index); + IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar); + } else { /* RX/TX IVARS */ + index = (16 * (entry & 1)) + (8 * type); + ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1)); + ivar &= ~(0xFF << index); + ivar |= (vector << index); + IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar); + } + + default: + break; + } +} + +static void +ixgbe_configure_ivars(struct adapter *adapter) +{ + struct ix_queue *que = adapter->queues; + u32 newitr; + + if (ixgbe_max_interrupt_rate > 0) + newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8; + else { + /* + ** Disable DMA coalescing if interrupt moderation is + ** disabled. + */ + adapter->dmac = 0; + newitr = 0; + } + + for (int i = 0; i < adapter->num_queues; i++, que++) { + struct rx_ring *rxr = &adapter->rx_rings[i]; + struct tx_ring *txr = &adapter->tx_rings[i]; + /* First the RX queue entry */ + ixgbe_set_ivar(adapter, rxr->me, que->msix, 0); + /* ... and the TX */ + ixgbe_set_ivar(adapter, txr->me, que->msix, 1); + /* Set an Initial EITR value */ + IXGBE_WRITE_REG(&adapter->hw, + IXGBE_EITR(que->msix), newitr); + } + + /* For the Link interrupt */ + ixgbe_set_ivar(adapter, 1, adapter->vector, -1); +} + +/* +** ixgbe_sfp_probe - called in the local timer to +** determine if a port had optics inserted. +*/ +static bool +ixgbe_sfp_probe(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + device_t dev = adapter->dev; + bool result = FALSE; + + if ((hw->phy.type == ixgbe_phy_nl) && + (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) { + s32 ret = hw->phy.ops.identify_sfp(hw); + if (ret) + goto out; + ret = hw->phy.ops.reset(hw); + if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) { + device_printf(dev, "Unsupported SFP+ module detected!"); + device_printf(dev, "Reload driver with supported module.\n"); + adapter->sfp_probe = FALSE; + goto out; + } else + device_printf(dev, "SFP+ module detected!\n"); + /* We now have supported optics */ + adapter->sfp_probe = FALSE; + /* Set the optics type so system reports correctly */ + ixgbe_setup_optics(adapter); + result = TRUE; + } +out: + return (result); +} + +/* +** Tasklet handler for MSIX Link interrupts +** - do outside interrupt since it might sleep +*/ +static void +ixgbe_handle_link(void *context, int pending) +{ + struct adapter *adapter = context; + struct ixgbe_hw *hw = &adapter->hw; + + ixgbe_check_link(hw, + &adapter->link_speed, &adapter->link_up, 0); + ixgbe_update_link_status(adapter); + + /* Re-enable link interrupts */ + IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_LSC); +} + +/* +** Tasklet for handling SFP module interrupts +*/ +static void +ixgbe_handle_mod(void *context, int pending) +{ + struct adapter *adapter = context; + struct ixgbe_hw *hw = &adapter->hw; + enum ixgbe_phy_type orig_type = hw->phy.type; + device_t dev = adapter->dev; + u32 err; + + IXGBE_CORE_LOCK(adapter); + + /* Check to see if the PHY type changed */ + if (hw->phy.ops.identify) { + hw->phy.type = ixgbe_phy_unknown; + hw->phy.ops.identify(hw); + } + + if (hw->phy.type != orig_type) { + device_printf(dev, "Detected phy_type %d\n", hw->phy.type); + + if (hw->phy.type == ixgbe_phy_none) { + hw->phy.sfp_type = ixgbe_sfp_type_unknown; + goto out; + } + + /* Try to do the initialization that was skipped before */ + if (hw->phy.ops.init) + hw->phy.ops.init(hw); + if (hw->phy.ops.reset) + hw->phy.ops.reset(hw); + } + + err = hw->phy.ops.identify_sfp(hw); + if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { + device_printf(dev, + "Unsupported SFP+ module type was detected.\n"); + goto out; + } + + err = hw->mac.ops.setup_sfp(hw); + if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { + device_printf(dev, + "Setup failure - unsupported SFP+ module type.\n"); + goto out; + } + if (hw->phy.multispeed_fiber) + taskqueue_enqueue(adapter->tq, &adapter->msf_task); +out: + /* Update media type */ + switch (hw->mac.ops.get_media_type(hw)) { + case ixgbe_media_type_fiber: + adapter->optics = IFM_10G_SR; + break; + case ixgbe_media_type_copper: + adapter->optics = IFM_10G_TWINAX; + break; + case ixgbe_media_type_cx4: + adapter->optics = IFM_10G_CX4; + break; + default: + adapter->optics = 0; + break; + } + + IXGBE_CORE_UNLOCK(adapter); + return; +} + + +/* +** Tasklet for handling MSF (multispeed fiber) interrupts +*/ +static void +ixgbe_handle_msf(void *context, int pending) +{ + struct adapter *adapter = context; + struct ixgbe_hw *hw = &adapter->hw; + u32 autoneg; + bool negotiate; + + IXGBE_CORE_LOCK(adapter); + /* get_supported_phy_layer will call hw->phy.ops.identify_sfp() */ + adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); + + autoneg = hw->phy.autoneg_advertised; + if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) + hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); + if (hw->mac.ops.setup_link) + hw->mac.ops.setup_link(hw, autoneg, TRUE); + + /* Adjust media types shown in ifconfig */ + ifmedia_removeall(&adapter->media); + ixgbe_add_media_types(adapter); + IXGBE_CORE_UNLOCK(adapter); + return; +} + +/* +** Tasklet for handling interrupts from an external PHY +*/ +static void +ixgbe_handle_phy(void *context, int pending) +{ + struct adapter *adapter = context; + struct ixgbe_hw *hw = &adapter->hw; + int error; + + error = hw->phy.ops.handle_lasi(hw); + if (error == IXGBE_ERR_OVERTEMP) + device_printf(adapter->dev, + "CRITICAL: EXTERNAL PHY OVER TEMP!! " + " PHY will downshift to lower power state!\n"); + else if (error) + device_printf(adapter->dev, + "Error handling LASI interrupt: %d\n", + error); + return; +} + +#ifdef IXGBE_FDIR +/* +** Tasklet for reinitializing the Flow Director filter table +*/ +static void +ixgbe_reinit_fdir(void *context, int pending) +{ + struct adapter *adapter = context; + struct ifnet *ifp = adapter->ifp; + + if (adapter->fdir_reinit != 1) /* Shouldn't happen */ + return; + ixgbe_reinit_fdir_tables_82599(&adapter->hw); + adapter->fdir_reinit = 0; + /* re-enable flow director interrupts */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR); + /* Restart the interface */ + ifp->if_drv_flags |= IFF_DRV_RUNNING; + return; +} +#endif + +/********************************************************************* + * + * Configure DMA Coalescing + * + **********************************************************************/ +static void +ixgbe_config_dmac(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config; + + if (hw->mac.type < ixgbe_mac_X550 || + !hw->mac.ops.dmac_config) + return; + + if (dcfg->watchdog_timer ^ adapter->dmac || + dcfg->link_speed ^ adapter->link_speed) { + dcfg->watchdog_timer = adapter->dmac; + dcfg->fcoe_en = false; + dcfg->link_speed = adapter->link_speed; + dcfg->num_tcs = 1; + + INIT_DEBUGOUT2("dmac settings: watchdog %d, link speed %d\n", + dcfg->watchdog_timer, dcfg->link_speed); + + hw->mac.ops.dmac_config(hw); + } +} + +/* + * Checks whether the adapter's ports are capable of + * Wake On LAN by reading the adapter's NVM. + * + * Sets each port's hw->wol_enabled value depending + * on the value read here. + */ +static void +ixgbe_check_wol_support(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u16 dev_caps = 0; + + /* Find out WoL support for port */ + adapter->wol_support = hw->wol_enabled = 0; + ixgbe_get_device_caps(hw, &dev_caps); + if ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0_1) || + ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0) && + hw->bus.func == 0)) + adapter->wol_support = hw->wol_enabled = 1; + + /* Save initial wake up filter configuration */ + adapter->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC); + + return; +} + +/* + * Prepare the adapter/port for LPLU and/or WoL + */ +static int +ixgbe_setup_low_power_mode(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + device_t dev = adapter->dev; + s32 error = 0; + + mtx_assert(&adapter->core_mtx, MA_OWNED); + + if (!hw->wol_enabled) + ixgbe_set_phy_power(hw, FALSE); + + /* Limit power management flow to X550EM baseT */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T + && hw->phy.ops.enter_lplu) { + /* Turn off support for APM wakeup. (Using ACPI instead) */ + IXGBE_WRITE_REG(hw, IXGBE_GRC, + IXGBE_READ_REG(hw, IXGBE_GRC) & ~(u32)2); + + /* + * Clear Wake Up Status register to prevent any previous wakeup + * events from waking us up immediately after we suspend. + */ + IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff); + + /* + * Program the Wakeup Filter Control register with user filter + * settings + */ + IXGBE_WRITE_REG(hw, IXGBE_WUFC, adapter->wufc); + + /* Enable wakeups and power management in Wakeup Control */ + IXGBE_WRITE_REG(hw, IXGBE_WUC, + IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN); + + /* X550EM baseT adapters need a special LPLU flow */ + hw->phy.reset_disable = true; + ixgbe_stop(adapter); + error = hw->phy.ops.enter_lplu(hw); + if (error) + device_printf(dev, + "Error entering LPLU: %d\n", error); + hw->phy.reset_disable = false; + } else { + /* Just stop for other adapters */ + ixgbe_stop(adapter); + } + + return error; +} + +/********************************************************************** + * + * Update the board statistics counters. + * + **********************************************************************/ +static void +ixgbe_update_stats_counters(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 missed_rx = 0, bprc, lxon, lxoff, total; + u64 total_missed_rx = 0; + + adapter->stats.pf.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS); + adapter->stats.pf.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC); + adapter->stats.pf.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC); + adapter->stats.pf.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC); + + for (int i = 0; i < 16; i++) { + adapter->stats.pf.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); + adapter->stats.pf.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); + adapter->stats.pf.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); + } + adapter->stats.pf.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC); + adapter->stats.pf.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC); + adapter->stats.pf.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC); + + /* Hardware workaround, gprc counts missed packets */ + adapter->stats.pf.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC); + adapter->stats.pf.gprc -= missed_rx; + + if (hw->mac.type != ixgbe_mac_82598EB) { + adapter->stats.pf.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) + + ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32); + adapter->stats.pf.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) + + ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32); + adapter->stats.pf.tor += IXGBE_READ_REG(hw, IXGBE_TORL) + + ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32); + adapter->stats.pf.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); + adapter->stats.pf.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); + } else { + adapter->stats.pf.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC); + adapter->stats.pf.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); + /* 82598 only has a counter in the high register */ + adapter->stats.pf.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH); + adapter->stats.pf.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH); + adapter->stats.pf.tor += IXGBE_READ_REG(hw, IXGBE_TORH); + } + + /* + * Workaround: mprc hardware is incorrectly counting + * broadcasts, so for now we subtract those. + */ + bprc = IXGBE_READ_REG(hw, IXGBE_BPRC); + adapter->stats.pf.bprc += bprc; + adapter->stats.pf.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC); + if (hw->mac.type == ixgbe_mac_82598EB) + adapter->stats.pf.mprc -= bprc; + + adapter->stats.pf.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64); + adapter->stats.pf.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127); + adapter->stats.pf.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255); + adapter->stats.pf.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511); + adapter->stats.pf.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023); + adapter->stats.pf.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522); + + lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC); + adapter->stats.pf.lxontxc += lxon; + lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); + adapter->stats.pf.lxofftxc += lxoff; + total = lxon + lxoff; + + adapter->stats.pf.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC); + adapter->stats.pf.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC); + adapter->stats.pf.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64); + adapter->stats.pf.gptc -= total; + adapter->stats.pf.mptc -= total; + adapter->stats.pf.ptc64 -= total; + adapter->stats.pf.gotc -= total * ETHER_MIN_LEN; + + adapter->stats.pf.ruc += IXGBE_READ_REG(hw, IXGBE_RUC); + adapter->stats.pf.rfc += IXGBE_READ_REG(hw, IXGBE_RFC); + adapter->stats.pf.roc += IXGBE_READ_REG(hw, IXGBE_ROC); + adapter->stats.pf.rjc += IXGBE_READ_REG(hw, IXGBE_RJC); + adapter->stats.pf.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC); + adapter->stats.pf.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC); + adapter->stats.pf.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC); + adapter->stats.pf.tpr += IXGBE_READ_REG(hw, IXGBE_TPR); + adapter->stats.pf.tpt += IXGBE_READ_REG(hw, IXGBE_TPT); + adapter->stats.pf.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127); + adapter->stats.pf.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255); + adapter->stats.pf.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511); + adapter->stats.pf.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023); + adapter->stats.pf.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522); + adapter->stats.pf.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC); + adapter->stats.pf.xec += IXGBE_READ_REG(hw, IXGBE_XEC); + adapter->stats.pf.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC); + adapter->stats.pf.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST); + /* Only read FCOE on 82599 */ + if (hw->mac.type != ixgbe_mac_82598EB) { + adapter->stats.pf.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC); + adapter->stats.pf.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC); + adapter->stats.pf.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC); + adapter->stats.pf.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC); + adapter->stats.pf.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC); + } + + /* Fill out the OS statistics structure */ + IXGBE_SET_IPACKETS(adapter, adapter->stats.pf.gprc); + IXGBE_SET_OPACKETS(adapter, adapter->stats.pf.gptc); + IXGBE_SET_IBYTES(adapter, adapter->stats.pf.gorc); + IXGBE_SET_OBYTES(adapter, adapter->stats.pf.gotc); + IXGBE_SET_IMCASTS(adapter, adapter->stats.pf.mprc); + IXGBE_SET_OMCASTS(adapter, adapter->stats.pf.mptc); + IXGBE_SET_COLLISIONS(adapter, 0); + IXGBE_SET_IQDROPS(adapter, total_missed_rx); + IXGBE_SET_IERRORS(adapter, adapter->stats.pf.crcerrs + + adapter->stats.pf.rlec); +} + +#if __FreeBSD_version >= 1100036 +static uint64_t +ixgbe_get_counter(struct ifnet *ifp, ift_counter cnt) +{ + struct adapter *adapter; + struct tx_ring *txr; + uint64_t rv; + + adapter = if_getsoftc(ifp); + + switch (cnt) { + case IFCOUNTER_IPACKETS: + return (adapter->ipackets); + case IFCOUNTER_OPACKETS: + return (adapter->opackets); + case IFCOUNTER_IBYTES: + return (adapter->ibytes); + case IFCOUNTER_OBYTES: + return (adapter->obytes); + case IFCOUNTER_IMCASTS: + return (adapter->imcasts); + case IFCOUNTER_OMCASTS: + return (adapter->omcasts); + case IFCOUNTER_COLLISIONS: + return (0); + case IFCOUNTER_IQDROPS: + return (adapter->iqdrops); + case IFCOUNTER_OQDROPS: + rv = 0; + txr = adapter->tx_rings; + for (int i = 0; i < adapter->num_queues; i++, txr++) + rv += txr->br->br_drops; + return (rv); + case IFCOUNTER_IERRORS: + return (adapter->ierrors); + default: + return (if_get_counter_default(ifp, cnt)); + } +} +#endif + +/** ixgbe_sysctl_tdh_handler - Handler function + * Retrieves the TDH value from the hardware + */ +static int +ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS) +{ + int error; + + struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); + if (!txr) return 0; + + unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me)); + error = sysctl_handle_int(oidp, &val, 0, req); + if (error || !req->newptr) + return error; + return 0; +} + +/** ixgbe_sysctl_tdt_handler - Handler function + * Retrieves the TDT value from the hardware + */ +static int +ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS) +{ + int error; + + struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); + if (!txr) return 0; + + unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me)); + error = sysctl_handle_int(oidp, &val, 0, req); + if (error || !req->newptr) + return error; + return 0; +} + +/** ixgbe_sysctl_rdh_handler - Handler function + * Retrieves the RDH value from the hardware + */ +static int +ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS) +{ + int error; + + struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); + if (!rxr) return 0; + + unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me)); + error = sysctl_handle_int(oidp, &val, 0, req); + if (error || !req->newptr) + return error; + return 0; +} + +/** ixgbe_sysctl_rdt_handler - Handler function + * Retrieves the RDT value from the hardware + */ +static int +ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS) +{ + int error; + + struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); + if (!rxr) return 0; + + unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me)); + error = sysctl_handle_int(oidp, &val, 0, req); + if (error || !req->newptr) + return error; + return 0; +} + +static int +ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) +{ + int error; + struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1); + unsigned int reg, usec, rate; + + reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix)); + usec = ((reg & 0x0FF8) >> 3); + if (usec > 0) + rate = 500000 / usec; + else + rate = 0; + error = sysctl_handle_int(oidp, &rate, 0, req); + if (error || !req->newptr) + return error; + reg &= ~0xfff; /* default, no limitation */ + ixgbe_max_interrupt_rate = 0; + if (rate > 0 && rate < 500000) { + if (rate < 1000) + rate = 1000; + ixgbe_max_interrupt_rate = rate; + reg |= ((4000000/rate) & 0xff8 ); + } + IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg); + return 0; +} + +static void +ixgbe_add_device_sysctls(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct ixgbe_hw *hw = &adapter->hw; + struct sysctl_oid_list *child; + struct sysctl_ctx_list *ctx; + + ctx = device_get_sysctl_ctx(dev); + child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); + + /* Sysctls for all devices */ + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "fc", + CTLTYPE_INT | CTLFLAG_RW, adapter, 0, + ixgbe_sysctl_flowcntl, "I", IXGBE_SYSCTL_DESC_SET_FC); + + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "enable_aim", + CTLFLAG_RW, + &ixgbe_enable_aim, 1, "Interrupt Moderation"); + + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "advertise_speed", + CTLTYPE_INT | CTLFLAG_RW, adapter, 0, + ixgbe_sysctl_advertise, "I", IXGBE_SYSCTL_DESC_ADV_SPEED); + + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "thermal_test", + CTLTYPE_INT | CTLFLAG_RW, adapter, 0, + ixgbe_sysctl_thermal_test, "I", "Thermal Test"); + +#ifdef IXGBE_DEBUG + /* testing sysctls (for all devices) */ + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "power_state", + CTLTYPE_INT | CTLFLAG_RW, adapter, 0, + ixgbe_sysctl_power_state, "I", "PCI Power State"); + + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "print_rss_config", + CTLTYPE_STRING | CTLFLAG_RD, adapter, 0, + ixgbe_sysctl_print_rss_config, "A", "Prints RSS Configuration"); +#endif + /* for X550 series devices */ + if (hw->mac.type >= ixgbe_mac_X550) + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "dmac", + CTLTYPE_INT | CTLFLAG_RW, adapter, 0, + ixgbe_sysctl_dmac, "I", "DMA Coalesce"); + + /* for X552 backplane devices */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_KR) { + struct sysctl_oid *eee_node; + struct sysctl_oid_list *eee_list; + + eee_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eee", + CTLFLAG_RD, NULL, + "Energy Efficient Ethernet sysctls"); + eee_list = SYSCTL_CHILDREN(eee_node); + + SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "enable", + CTLTYPE_INT | CTLFLAG_RW, adapter, 0, + ixgbe_sysctl_eee_enable, "I", + "Enable or Disable EEE"); + + SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "negotiated", + CTLTYPE_INT | CTLFLAG_RD, adapter, 0, + ixgbe_sysctl_eee_negotiated, "I", + "EEE negotiated on link"); + + SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "tx_lpi_status", + CTLTYPE_INT | CTLFLAG_RD, adapter, 0, + ixgbe_sysctl_eee_tx_lpi_status, "I", + "Whether or not TX link is in LPI state"); + + SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "rx_lpi_status", + CTLTYPE_INT | CTLFLAG_RD, adapter, 0, + ixgbe_sysctl_eee_rx_lpi_status, "I", + "Whether or not RX link is in LPI state"); + + SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "tx_lpi_delay", + CTLTYPE_INT | CTLFLAG_RD, adapter, 0, + ixgbe_sysctl_eee_tx_lpi_delay, "I", + "TX LPI entry delay in microseconds"); + } + + /* for WoL-capable devices */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "wol_enable", + CTLTYPE_INT | CTLFLAG_RW, adapter, 0, + ixgbe_sysctl_wol_enable, "I", + "Enable/Disable Wake on LAN"); + + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "wufc", + CTLTYPE_INT | CTLFLAG_RW, adapter, 0, + ixgbe_sysctl_wufc, "I", + "Enable/Disable Wake Up Filters"); + } + + /* for X552/X557-AT devices */ + if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { + struct sysctl_oid *phy_node; + struct sysctl_oid_list *phy_list; + + phy_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "phy", + CTLFLAG_RD, NULL, + "External PHY sysctls"); + phy_list = SYSCTL_CHILDREN(phy_node); + + SYSCTL_ADD_PROC(ctx, phy_list, OID_AUTO, "temp", + CTLTYPE_INT | CTLFLAG_RD, adapter, 0, + ixgbe_sysctl_phy_temp, "I", + "Current External PHY Temperature (Celsius)"); + + SYSCTL_ADD_PROC(ctx, phy_list, OID_AUTO, "overtemp_occurred", + CTLTYPE_INT | CTLFLAG_RD, adapter, 0, + ixgbe_sysctl_phy_overtemp_occurred, "I", + "External PHY High Temperature Event Occurred"); + } +} + +/* + * Add sysctl variables, one per statistic, to the system. + */ +static void +ixgbe_add_hw_stats(struct adapter *adapter) +{ + device_t dev = adapter->dev; + + struct tx_ring *txr = adapter->tx_rings; + struct rx_ring *rxr = adapter->rx_rings; + + struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); + struct sysctl_oid *tree = device_get_sysctl_tree(dev); + struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); + struct ixgbe_hw_stats *stats = &adapter->stats.pf; + + struct sysctl_oid *stat_node, *queue_node; + struct sysctl_oid_list *stat_list, *queue_list; + +#define QUEUE_NAME_LEN 32 + char namebuf[QUEUE_NAME_LEN]; + + /* Driver Statistics */ + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", + CTLFLAG_RD, &adapter->dropped_pkts, + "Driver dropped packets"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed", + CTLFLAG_RD, &adapter->mbuf_defrag_failed, + "m_defrag() failed"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", + CTLFLAG_RD, &adapter->watchdog_events, + "Watchdog timeouts"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", + CTLFLAG_RD, &adapter->link_irq, + "Link MSIX IRQ Handled"); + + for (int i = 0; i < adapter->num_queues; i++, txr++) { + snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); + queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, + CTLFLAG_RD, NULL, "Queue Name"); + queue_list = SYSCTL_CHILDREN(queue_node); + + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", + CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i], + sizeof(&adapter->queues[i]), + ixgbe_sysctl_interrupt_rate_handler, "IU", + "Interrupt Rate"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", + CTLFLAG_RD, &(adapter->queues[i].irqs), + "irqs on this queue"); + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", + CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr), + ixgbe_sysctl_tdh_handler, "IU", + "Transmit Descriptor Head"); + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", + CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr), + ixgbe_sysctl_tdt_handler, "IU", + "Transmit Descriptor Tail"); + SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx", + CTLFLAG_RD, &txr->tso_tx, + "TSO"); + SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_tx_dma_setup", + CTLFLAG_RD, &txr->no_tx_dma_setup, + "Driver tx dma failure in xmit"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", + CTLFLAG_RD, &txr->no_desc_avail, + "Queue No Descriptor Available"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", + CTLFLAG_RD, &txr->total_packets, + "Queue Packets Transmitted"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "br_drops", + CTLFLAG_RD, &txr->br->br_drops, + "Packets dropped in buf_ring"); + } + + for (int i = 0; i < adapter->num_queues; i++, rxr++) { + snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); + queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, + CTLFLAG_RD, NULL, "Queue Name"); + queue_list = SYSCTL_CHILDREN(queue_node); + + struct lro_ctrl *lro = &rxr->lro; + + snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); + queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, + CTLFLAG_RD, NULL, "Queue Name"); + queue_list = SYSCTL_CHILDREN(queue_node); + + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", + CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr), + ixgbe_sysctl_rdh_handler, "IU", + "Receive Descriptor Head"); + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", + CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr), + ixgbe_sysctl_rdt_handler, "IU", + "Receive Descriptor Tail"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", + CTLFLAG_RD, &rxr->rx_packets, + "Queue Packets Received"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", + CTLFLAG_RD, &rxr->rx_bytes, + "Queue Bytes Received"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies", + CTLFLAG_RD, &rxr->rx_copies, + "Copied RX Frames"); + SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued", + CTLFLAG_RD, &lro->lro_queued, 0, + "LRO Queued"); + SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed", + CTLFLAG_RD, &lro->lro_flushed, 0, + "LRO Flushed"); + } + + /* MAC stats get the own sub node */ + + stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", + CTLFLAG_RD, NULL, "MAC Statistics"); + stat_list = SYSCTL_CHILDREN(stat_node); + + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", + CTLFLAG_RD, &stats->crcerrs, + "CRC Errors"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs", + CTLFLAG_RD, &stats->illerrc, + "Illegal Byte Errors"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs", + CTLFLAG_RD, &stats->errbc, + "Byte Errors"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards", + CTLFLAG_RD, &stats->mspdc, + "MAC Short Packets Discarded"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults", + CTLFLAG_RD, &stats->mlfc, + "MAC Local Faults"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults", + CTLFLAG_RD, &stats->mrfc, + "MAC Remote Faults"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs", + CTLFLAG_RD, &stats->rlec, + "Receive Length Errors"); + + /* Flow Control stats */ + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", + CTLFLAG_RD, &stats->lxontxc, + "Link XON Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", + CTLFLAG_RD, &stats->lxonrxc, + "Link XON Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", + CTLFLAG_RD, &stats->lxofftxc, + "Link XOFF Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", + CTLFLAG_RD, &stats->lxoffrxc, + "Link XOFF Received"); + + /* Packet Reception Stats */ + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd", + CTLFLAG_RD, &stats->tor, + "Total Octets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd", + CTLFLAG_RD, &stats->gorc, + "Good Octets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd", + CTLFLAG_RD, &stats->tpr, + "Total Packets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd", + CTLFLAG_RD, &stats->gprc, + "Good Packets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd", + CTLFLAG_RD, &stats->mprc, + "Multicast Packets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd", + CTLFLAG_RD, &stats->bprc, + "Broadcast Packets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", + CTLFLAG_RD, &stats->prc64, + "64 byte frames received "); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", + CTLFLAG_RD, &stats->prc127, + "65-127 byte frames received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", + CTLFLAG_RD, &stats->prc255, + "128-255 byte frames received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", + CTLFLAG_RD, &stats->prc511, + "256-511 byte frames received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", + CTLFLAG_RD, &stats->prc1023, + "512-1023 byte frames received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", + CTLFLAG_RD, &stats->prc1522, + "1023-1522 byte frames received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized", + CTLFLAG_RD, &stats->ruc, + "Receive Undersized"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", + CTLFLAG_RD, &stats->rfc, + "Fragmented Packets Received "); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized", + CTLFLAG_RD, &stats->roc, + "Oversized Packets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd", + CTLFLAG_RD, &stats->rjc, + "Received Jabber"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd", + CTLFLAG_RD, &stats->mngprc, + "Management Packets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd", + CTLFLAG_RD, &stats->mngptc, + "Management Packets Dropped"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs", + CTLFLAG_RD, &stats->xec, + "Checksum Errors"); + + /* Packet Transmission Stats */ + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", + CTLFLAG_RD, &stats->gotc, + "Good Octets Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", + CTLFLAG_RD, &stats->tpt, + "Total Packets Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", + CTLFLAG_RD, &stats->gptc, + "Good Packets Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", + CTLFLAG_RD, &stats->bptc, + "Broadcast Packets Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", + CTLFLAG_RD, &stats->mptc, + "Multicast Packets Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd", + CTLFLAG_RD, &stats->mngptc, + "Management Packets Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", + CTLFLAG_RD, &stats->ptc64, + "64 byte frames transmitted "); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", + CTLFLAG_RD, &stats->ptc127, + "65-127 byte frames transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", + CTLFLAG_RD, &stats->ptc255, + "128-255 byte frames transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", + CTLFLAG_RD, &stats->ptc511, + "256-511 byte frames transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", + CTLFLAG_RD, &stats->ptc1023, + "512-1023 byte frames transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", + CTLFLAG_RD, &stats->ptc1522, + "1024-1522 byte frames transmitted"); +} + +static void +ixgbe_set_sysctl_value(struct adapter *adapter, const char *name, + const char *description, int *limit, int value) +{ + *limit = value; + SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), + OID_AUTO, name, CTLFLAG_RW, limit, value, description); +} + +/* +** Set flow control using sysctl: +** Flow control values: +** 0 - off +** 1 - rx pause +** 2 - tx pause +** 3 - full +*/ +static int +ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS) +{ + int error, fc; + struct adapter *adapter; + + adapter = (struct adapter *) arg1; + fc = adapter->fc; + + error = sysctl_handle_int(oidp, &fc, 0, req); + if ((error) || (req->newptr == NULL)) + return (error); + + /* Don't bother if it's not changed */ + if (adapter->fc == fc) + return (0); + + return ixgbe_set_flowcntl(adapter, fc); +} + + +static int +ixgbe_set_flowcntl(struct adapter *adapter, int fc) +{ + + switch (fc) { + case ixgbe_fc_rx_pause: + case ixgbe_fc_tx_pause: + case ixgbe_fc_full: + adapter->hw.fc.requested_mode = adapter->fc; + if (adapter->num_queues > 1) + ixgbe_disable_rx_drop(adapter); + break; + case ixgbe_fc_none: + adapter->hw.fc.requested_mode = ixgbe_fc_none; + if (adapter->num_queues > 1) + ixgbe_enable_rx_drop(adapter); + break; + default: + return (EINVAL); + } + adapter->fc = fc; + /* Don't autoneg if forcing a value */ + adapter->hw.fc.disable_fc_autoneg = TRUE; + ixgbe_fc_enable(&adapter->hw); + return (0); +} + +/* +** Control advertised link speed: +** Flags: +** 0x1 - advertise 100 Mb +** 0x2 - advertise 1G +** 0x4 - advertise 10G +*/ +static int +ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS) +{ + int error, advertise; + struct adapter *adapter; + + adapter = (struct adapter *) arg1; + advertise = adapter->advertise; + + error = sysctl_handle_int(oidp, &advertise, 0, req); + if ((error) || (req->newptr == NULL)) + return (error); + + return ixgbe_set_advertise(adapter, advertise); +} + +static int +ixgbe_set_advertise(struct adapter *adapter, int advertise) +{ + device_t dev; + struct ixgbe_hw *hw; + ixgbe_link_speed speed; + + /* Checks to validate new value */ + if (adapter->advertise == advertise) /* no change */ + return (0); + + hw = &adapter->hw; + dev = adapter->dev; + + /* No speed changes for backplane media */ + if (hw->phy.media_type == ixgbe_media_type_backplane) + return (ENODEV); + + if (!((hw->phy.media_type == ixgbe_media_type_copper) || + (hw->phy.multispeed_fiber))) { + device_printf(dev, + "Advertised speed can only be set on copper or " + "multispeed fiber media types.\n"); + return (EINVAL); + } + + if (advertise < 0x1 || advertise > 0x7) { + device_printf(dev, + "Invalid advertised speed; valid modes are 0x1 through 0x7\n"); + return (EINVAL); + } + + if ((advertise & 0x1) + && (hw->mac.type != ixgbe_mac_X540) + && (hw->mac.type != ixgbe_mac_X550)) { + device_printf(dev, "Set Advertise: 100Mb on X540/X550 only\n"); + return (EINVAL); + } + + /* Set new value and report new advertised mode */ + speed = 0; + if (advertise & 0x1) + speed |= IXGBE_LINK_SPEED_100_FULL; + if (advertise & 0x2) + speed |= IXGBE_LINK_SPEED_1GB_FULL; + if (advertise & 0x4) + speed |= IXGBE_LINK_SPEED_10GB_FULL; + adapter->advertise = advertise; + + hw->mac.autotry_restart = TRUE; + hw->mac.ops.setup_link(hw, speed, TRUE); + + return (0); +} + +/* + * The following two sysctls are for X552/X557-AT devices; + * they deal with the external PHY used in them. + */ +static int +ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + u16 reg; + + if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { + device_printf(adapter->dev, + "Device has no supported external thermal sensor.\n"); + return (ENODEV); + } + + if (hw->phy.ops.read_reg(hw, IXGBE_PHY_CURRENT_TEMP, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + ®)) { + device_printf(adapter->dev, + "Error reading from PHY's current temperature register\n"); + return (EAGAIN); + } + + /* Shift temp for output */ + reg = reg >> 8; + + return (sysctl_handle_int(oidp, NULL, reg, req)); +} + +/* + * Reports whether the current PHY temperature is over + * the overtemp threshold. + * - This is reported directly from the PHY + */ +static int +ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + u16 reg; + + if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { + device_printf(adapter->dev, + "Device has no supported external thermal sensor.\n"); + return (ENODEV); + } + + if (hw->phy.ops.read_reg(hw, IXGBE_PHY_OVERTEMP_STATUS, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + ®)) { + device_printf(adapter->dev, + "Error reading from PHY's temperature status register\n"); + return (EAGAIN); + } + + /* Get occurrence bit */ + reg = !!(reg & 0x4000); + return (sysctl_handle_int(oidp, 0, reg, req)); +} + +/* +** Thermal Shutdown Trigger (internal MAC) +** - Set this to 1 to cause an overtemp event to occur +*/ +static int +ixgbe_sysctl_thermal_test(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + int error, fire = 0; + + error = sysctl_handle_int(oidp, &fire, 0, req); + if ((error) || (req->newptr == NULL)) + return (error); + + if (fire) { + u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS); + reg |= IXGBE_EICR_TS; + IXGBE_WRITE_REG(hw, IXGBE_EICS, reg); + } + + return (0); +} + +/* +** Manage DMA Coalescing. +** Control values: +** 0/1 - off / on (use default value of 1000) +** +** Legal timer values are: +** 50,100,250,500,1000,2000,5000,10000 +** +** Turning off interrupt moderation will also turn this off. +*/ +static int +ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ifnet *ifp = adapter->ifp; + int error; + u32 newval; + + newval = adapter->dmac; + error = sysctl_handle_int(oidp, &newval, 0, req); + if ((error) || (req->newptr == NULL)) + return (error); + + switch (newval) { + case 0: + /* Disabled */ + adapter->dmac = 0; + break; + case 1: + /* Enable and use default */ + adapter->dmac = 1000; + break; + case 50: + case 100: + case 250: + case 500: + case 1000: + case 2000: + case 5000: + case 10000: + /* Legal values - allow */ + adapter->dmac = newval; + break; + default: + /* Do nothing, illegal value */ + return (EINVAL); + } + + /* Re-initialize hardware if it's already running */ + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + ixgbe_init(adapter); + + return (0); +} + +#ifdef IXGBE_DEBUG +/** + * Sysctl to test power states + * Values: + * 0 - set device to D0 + * 3 - set device to D3 + * (none) - get current device power state + */ +static int +ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + device_t dev = adapter->dev; + int curr_ps, new_ps, error = 0; + + curr_ps = new_ps = pci_get_powerstate(dev); + + error = sysctl_handle_int(oidp, &new_ps, 0, req); + if ((error) || (req->newptr == NULL)) + return (error); + + if (new_ps == curr_ps) + return (0); + + if (new_ps == 3 && curr_ps == 0) + error = DEVICE_SUSPEND(dev); + else if (new_ps == 0 && curr_ps == 3) + error = DEVICE_RESUME(dev); + else + return (EINVAL); + + device_printf(dev, "New state: %d\n", pci_get_powerstate(dev)); + + return (error); +} +#endif +/* + * Sysctl to enable/disable the WoL capability, if supported by the adapter. + * Values: + * 0 - disabled + * 1 - enabled + */ +static int +ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + int new_wol_enabled; + int error = 0; + + new_wol_enabled = hw->wol_enabled; + error = sysctl_handle_int(oidp, &new_wol_enabled, 0, req); + if ((error) || (req->newptr == NULL)) + return (error); + new_wol_enabled = !!(new_wol_enabled); + if (new_wol_enabled == hw->wol_enabled) + return (0); + + if (new_wol_enabled > 0 && !adapter->wol_support) + return (ENODEV); + else + hw->wol_enabled = new_wol_enabled; + + return (0); +} + +/* + * Sysctl to enable/disable the Energy Efficient Ethernet capability, + * if supported by the adapter. + * Values: + * 0 - disabled + * 1 - enabled + */ +static int +ixgbe_sysctl_eee_enable(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + struct ifnet *ifp = adapter->ifp; + int new_eee_enabled, error = 0; + + new_eee_enabled = adapter->eee_enabled; + error = sysctl_handle_int(oidp, &new_eee_enabled, 0, req); + if ((error) || (req->newptr == NULL)) + return (error); + new_eee_enabled = !!(new_eee_enabled); + if (new_eee_enabled == adapter->eee_enabled) + return (0); + + if (new_eee_enabled > 0 && !hw->mac.ops.setup_eee) + return (ENODEV); + else + adapter->eee_enabled = new_eee_enabled; + + /* Re-initialize hardware if it's already running */ + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + ixgbe_init(adapter); + + return (0); +} + +/* + * Read-only sysctl indicating whether EEE support was negotiated + * on the link. + */ +static int +ixgbe_sysctl_eee_negotiated(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + bool status; + + status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & IXGBE_EEE_STAT_NEG); + + return (sysctl_handle_int(oidp, 0, status, req)); +} + +/* + * Read-only sysctl indicating whether RX Link is in LPI state. + */ +static int +ixgbe_sysctl_eee_rx_lpi_status(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + bool status; + + status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & + IXGBE_EEE_RX_LPI_STATUS); + + return (sysctl_handle_int(oidp, 0, status, req)); +} + +/* + * Read-only sysctl indicating whether TX Link is in LPI state. + */ +static int +ixgbe_sysctl_eee_tx_lpi_status(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + bool status; + + status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & + IXGBE_EEE_TX_LPI_STATUS); + + return (sysctl_handle_int(oidp, 0, status, req)); +} + +/* + * Read-only sysctl indicating TX Link LPI delay + */ +static int +ixgbe_sysctl_eee_tx_lpi_delay(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + struct ixgbe_hw *hw = &adapter->hw; + u32 reg; + + reg = IXGBE_READ_REG(hw, IXGBE_EEE_SU); + + return (sysctl_handle_int(oidp, 0, reg >> 26, req)); +} + +/* + * Sysctl to enable/disable the types of packets that the + * adapter will wake up on upon receipt. + * WUFC - Wake Up Filter Control + * Flags: + * 0x1 - Link Status Change + * 0x2 - Magic Packet + * 0x4 - Direct Exact + * 0x8 - Directed Multicast + * 0x10 - Broadcast + * 0x20 - ARP/IPv4 Request Packet + * 0x40 - Direct IPv4 Packet + * 0x80 - Direct IPv6 Packet + * + * Setting another flag will cause the sysctl to return an + * error. + */ +static int +ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *) arg1; + int error = 0; + u32 new_wufc; + + new_wufc = adapter->wufc; + + error = sysctl_handle_int(oidp, &new_wufc, 0, req); + if ((error) || (req->newptr == NULL)) + return (error); + if (new_wufc == adapter->wufc) + return (0); + + if (new_wufc & 0xffffff00) + return (EINVAL); + else { + new_wufc &= 0xff; + new_wufc |= (0xffffff & adapter->wufc); + adapter->wufc = new_wufc; + } + + return (0); +} + +#ifdef IXGBE_DEBUG +static int +ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter = (struct adapter *)arg1; + struct ixgbe_hw *hw = &adapter->hw; + device_t dev = adapter->dev; + int error = 0, reta_size; + struct sbuf *buf; + u32 reg; + + buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); + if (!buf) { + device_printf(dev, "Could not allocate sbuf for output.\n"); + return (ENOMEM); + } + + // TODO: use sbufs to make a string to print out + /* Set multiplier for RETA setup and table size based on MAC */ + switch (adapter->hw.mac.type) { + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + reta_size = 128; + break; + default: + reta_size = 32; + break; + } + + /* Print out the redirection table */ + sbuf_cat(buf, "\n"); + for (int i = 0; i < reta_size; i++) { + if (i < 32) { + reg = IXGBE_READ_REG(hw, IXGBE_RETA(i)); + sbuf_printf(buf, "RETA(%2d): 0x%08x\n", i, reg); + } else { + reg = IXGBE_READ_REG(hw, IXGBE_ERETA(i - 32)); + sbuf_printf(buf, "ERETA(%2d): 0x%08x\n", i - 32, reg); + } + } + + // TODO: print more config + + error = sbuf_finish(buf); + if (error) + device_printf(dev, "Error finishing sbuf: %d\n", error); + + sbuf_delete(buf); + return (0); +} +#endif /* IXGBE_DEBUG */ + +/* +** Enable the hardware to drop packets when the buffer is +** full. This is useful when multiqueue,so that no single +** queue being full stalls the entire RX engine. We only +** enable this when Multiqueue AND when Flow Control is +** disabled. +*/ +static void +ixgbe_enable_rx_drop(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + for (int i = 0; i < adapter->num_queues; i++) { + struct rx_ring *rxr = &adapter->rx_rings[i]; + u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); + srrctl |= IXGBE_SRRCTL_DROP_EN; + IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); + } +#ifdef PCI_IOV + /* enable drop for each vf */ + for (int i = 0; i < adapter->num_vfs; i++) { + IXGBE_WRITE_REG(hw, IXGBE_QDE, + (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT) | + IXGBE_QDE_ENABLE)); + } +#endif +} + +static void +ixgbe_disable_rx_drop(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + for (int i = 0; i < adapter->num_queues; i++) { + struct rx_ring *rxr = &adapter->rx_rings[i]; + u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); + srrctl &= ~IXGBE_SRRCTL_DROP_EN; + IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); + } +#ifdef PCI_IOV + /* disable drop for each vf */ + for (int i = 0; i < adapter->num_vfs; i++) { + IXGBE_WRITE_REG(hw, IXGBE_QDE, + (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT))); + } +#endif +} + +static void +ixgbe_rearm_queues(struct adapter *adapter, u64 queues) +{ + u32 mask; + + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + mask = (IXGBE_EIMS_RTX_QUEUE & queues); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + mask = (queues & 0xFFFFFFFF); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask); + mask = (queues >> 32); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask); + break; + default: + break; + } +} + +#ifdef PCI_IOV + +/* +** Support functions for SRIOV/VF management +*/ + +static void +ixgbe_ping_all_vfs(struct adapter *adapter) +{ + struct ixgbe_vf *vf; + + for (int i = 0; i < adapter->num_vfs; i++) { + vf = &adapter->vfs[i]; + if (vf->flags & IXGBE_VF_ACTIVE) + ixgbe_send_vf_msg(adapter, vf, IXGBE_PF_CONTROL_MSG); + } +} + + +static void +ixgbe_vf_set_default_vlan(struct adapter *adapter, struct ixgbe_vf *vf, + uint16_t tag) +{ + struct ixgbe_hw *hw; + uint32_t vmolr, vmvir; + + hw = &adapter->hw; + + vf->vlan_tag = tag; + + vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf->pool)); + + /* Do not receive packets that pass inexact filters. */ + vmolr &= ~(IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_ROPE); + + /* Disable Multicast Promicuous Mode. */ + vmolr &= ~IXGBE_VMOLR_MPE; + + /* Accept broadcasts. */ + vmolr |= IXGBE_VMOLR_BAM; + + if (tag == 0) { + /* Accept non-vlan tagged traffic. */ + //vmolr |= IXGBE_VMOLR_AUPE; + + /* Allow VM to tag outgoing traffic; no default tag. */ + vmvir = 0; + } else { + /* Require vlan-tagged traffic. */ + vmolr &= ~IXGBE_VMOLR_AUPE; + + /* Tag all traffic with provided vlan tag. */ + vmvir = (tag | IXGBE_VMVIR_VLANA_DEFAULT); + } + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf->pool), vmolr); + IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf->pool), vmvir); +} + + +static boolean_t +ixgbe_vf_frame_size_compatible(struct adapter *adapter, struct ixgbe_vf *vf) +{ + + /* + * Frame size compatibility between PF and VF is only a problem on + * 82599-based cards. X540 and later support any combination of jumbo + * frames on PFs and VFs. + */ + if (adapter->hw.mac.type != ixgbe_mac_82599EB) + return (TRUE); + + switch (vf->api_ver) { + case IXGBE_API_VER_1_0: + case IXGBE_API_VER_UNKNOWN: + /* + * On legacy (1.0 and older) VF versions, we don't support jumbo + * frames on either the PF or the VF. + */ + if (adapter->max_frame_size > ETHER_MAX_LEN || + vf->max_frame_size > ETHER_MAX_LEN) + return (FALSE); + + return (TRUE); + + break; + case IXGBE_API_VER_1_1: + default: + /* + * 1.1 or later VF versions always work if they aren't using + * jumbo frames. + */ + if (vf->max_frame_size <= ETHER_MAX_LEN) + return (TRUE); + + /* + * Jumbo frames only work with VFs if the PF is also using jumbo + * frames. + */ + if (adapter->max_frame_size <= ETHER_MAX_LEN) + return (TRUE); + + return (FALSE); + + } +} + + +static void +ixgbe_process_vf_reset(struct adapter *adapter, struct ixgbe_vf *vf) +{ + ixgbe_vf_set_default_vlan(adapter, vf, vf->default_vlan); + + // XXX clear multicast addresses + + ixgbe_clear_rar(&adapter->hw, vf->rar_index); + + vf->api_ver = IXGBE_API_VER_UNKNOWN; +} + + +static void +ixgbe_vf_enable_transmit(struct adapter *adapter, struct ixgbe_vf *vf) +{ + struct ixgbe_hw *hw; + uint32_t vf_index, vfte; + + hw = &adapter->hw; + + vf_index = IXGBE_VF_INDEX(vf->pool); + vfte = IXGBE_READ_REG(hw, IXGBE_VFTE(vf_index)); + vfte |= IXGBE_VF_BIT(vf->pool); + IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_index), vfte); +} + + +static void +ixgbe_vf_enable_receive(struct adapter *adapter, struct ixgbe_vf *vf) +{ + struct ixgbe_hw *hw; + uint32_t vf_index, vfre; + + hw = &adapter->hw; + + vf_index = IXGBE_VF_INDEX(vf->pool); + vfre = IXGBE_READ_REG(hw, IXGBE_VFRE(vf_index)); + if (ixgbe_vf_frame_size_compatible(adapter, vf)) + vfre |= IXGBE_VF_BIT(vf->pool); + else + vfre &= ~IXGBE_VF_BIT(vf->pool); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_index), vfre); +} + + +static void +ixgbe_vf_reset_msg(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) +{ + struct ixgbe_hw *hw; + uint32_t ack; + uint32_t resp[IXGBE_VF_PERMADDR_MSG_LEN]; + + hw = &adapter->hw; + + ixgbe_process_vf_reset(adapter, vf); + + if (ixgbe_validate_mac_addr(vf->ether_addr) == 0) { + ixgbe_set_rar(&adapter->hw, vf->rar_index, + vf->ether_addr, vf->pool, TRUE); + ack = IXGBE_VT_MSGTYPE_ACK; + } else + ack = IXGBE_VT_MSGTYPE_NACK; + + ixgbe_vf_enable_transmit(adapter, vf); + ixgbe_vf_enable_receive(adapter, vf); + + vf->flags |= IXGBE_VF_CTS; + + resp[0] = IXGBE_VF_RESET | ack | IXGBE_VT_MSGTYPE_CTS; + bcopy(vf->ether_addr, &resp[1], ETHER_ADDR_LEN); + resp[3] = hw->mac.mc_filter_type; + ixgbe_write_mbx(hw, resp, IXGBE_VF_PERMADDR_MSG_LEN, vf->pool); +} + + +static void +ixgbe_vf_set_mac(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) +{ + uint8_t *mac; + + mac = (uint8_t*)&msg[1]; + + /* Check that the VF has permission to change the MAC address. */ + if (!(vf->flags & IXGBE_VF_CAP_MAC) && ixgbe_vf_mac_changed(vf, mac)) { + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; + } + + if (ixgbe_validate_mac_addr(mac) != 0) { + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; + } + + bcopy(mac, vf->ether_addr, ETHER_ADDR_LEN); + + ixgbe_set_rar(&adapter->hw, vf->rar_index, vf->ether_addr, + vf->pool, TRUE); + + ixgbe_send_vf_ack(adapter, vf, msg[0]); +} + + +/* +** VF multicast addresses are set by using the appropriate bit in +** 1 of 128 32 bit addresses (4096 possible). +*/ +static void +ixgbe_vf_set_mc_addr(struct adapter *adapter, struct ixgbe_vf *vf, u32 *msg) +{ + u16 *list = (u16*)&msg[1]; + int entries; + u32 vmolr, vec_bit, vec_reg, mta_reg; + + entries = (msg[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT; + entries = min(entries, IXGBE_MAX_VF_MC); + + vmolr = IXGBE_READ_REG(&adapter->hw, IXGBE_VMOLR(vf->pool)); + + vf->num_mc_hashes = entries; + + /* Set the appropriate MTA bit */ + for (int i = 0; i < entries; i++) { + vf->mc_hash[i] = list[i]; + vec_reg = (vf->mc_hash[i] >> 5) & 0x7F; + vec_bit = vf->mc_hash[i] & 0x1F; + mta_reg = IXGBE_READ_REG(&adapter->hw, IXGBE_MTA(vec_reg)); + mta_reg |= (1 << vec_bit); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_MTA(vec_reg), mta_reg); + } + + vmolr |= IXGBE_VMOLR_ROMPE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VMOLR(vf->pool), vmolr); + ixgbe_send_vf_ack(adapter, vf, msg[0]); + return; +} + + +static void +ixgbe_vf_set_vlan(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) +{ + struct ixgbe_hw *hw; + int enable; + uint16_t tag; + + hw = &adapter->hw; + enable = IXGBE_VT_MSGINFO(msg[0]); + tag = msg[1] & IXGBE_VLVF_VLANID_MASK; + + if (!(vf->flags & IXGBE_VF_CAP_VLAN)) { + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; + } + + /* It is illegal to enable vlan tag 0. */ + if (tag == 0 && enable != 0){ + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; + } + + ixgbe_set_vfta(hw, tag, vf->pool, enable); + ixgbe_send_vf_ack(adapter, vf, msg[0]); +} + + +static void +ixgbe_vf_set_lpe(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) +{ + struct ixgbe_hw *hw; + uint32_t vf_max_size, pf_max_size, mhadd; + + hw = &adapter->hw; + vf_max_size = msg[1]; + + if (vf_max_size < ETHER_CRC_LEN) { + /* We intentionally ACK invalid LPE requests. */ + ixgbe_send_vf_ack(adapter, vf, msg[0]); + return; + } + + vf_max_size -= ETHER_CRC_LEN; + + if (vf_max_size > IXGBE_MAX_FRAME_SIZE) { + /* We intentionally ACK invalid LPE requests. */ + ixgbe_send_vf_ack(adapter, vf, msg[0]); + return; + } + + vf->max_frame_size = vf_max_size; + ixgbe_update_max_frame(adapter, vf->max_frame_size); + + /* + * We might have to disable reception to this VF if the frame size is + * not compatible with the config on the PF. + */ + ixgbe_vf_enable_receive(adapter, vf); + + mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); + pf_max_size = (mhadd & IXGBE_MHADD_MFS_MASK) >> IXGBE_MHADD_MFS_SHIFT; + + if (pf_max_size < adapter->max_frame_size) { + mhadd &= ~IXGBE_MHADD_MFS_MASK; + mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT; + IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); + } + + ixgbe_send_vf_ack(adapter, vf, msg[0]); +} + + +static void +ixgbe_vf_set_macvlan(struct adapter *adapter, struct ixgbe_vf *vf, + uint32_t *msg) +{ + //XXX implement this + ixgbe_send_vf_nack(adapter, vf, msg[0]); +} + + +static void +ixgbe_vf_api_negotiate(struct adapter *adapter, struct ixgbe_vf *vf, + uint32_t *msg) +{ + + switch (msg[1]) { + case IXGBE_API_VER_1_0: + case IXGBE_API_VER_1_1: + vf->api_ver = msg[1]; + ixgbe_send_vf_ack(adapter, vf, msg[0]); + break; + default: + vf->api_ver = IXGBE_API_VER_UNKNOWN; + ixgbe_send_vf_nack(adapter, vf, msg[0]); + break; + } +} + + +static void +ixgbe_vf_get_queues(struct adapter *adapter, struct ixgbe_vf *vf, + uint32_t *msg) +{ + struct ixgbe_hw *hw; + uint32_t resp[IXGBE_VF_GET_QUEUES_RESP_LEN]; + int num_queues; + + hw = &adapter->hw; + + /* GET_QUEUES is not supported on pre-1.1 APIs. */ + switch (msg[0]) { + case IXGBE_API_VER_1_0: + case IXGBE_API_VER_UNKNOWN: + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; + } + + resp[0] = IXGBE_VF_GET_QUEUES | IXGBE_VT_MSGTYPE_ACK | + IXGBE_VT_MSGTYPE_CTS; + + num_queues = ixgbe_vf_queues(ixgbe_get_iov_mode(adapter)); + resp[IXGBE_VF_TX_QUEUES] = num_queues; + resp[IXGBE_VF_RX_QUEUES] = num_queues; + resp[IXGBE_VF_TRANS_VLAN] = (vf->default_vlan != 0); + resp[IXGBE_VF_DEF_QUEUE] = 0; + + ixgbe_write_mbx(hw, resp, IXGBE_VF_GET_QUEUES_RESP_LEN, vf->pool); +} + + +static void +ixgbe_process_vf_msg(struct adapter *adapter, struct ixgbe_vf *vf) +{ + struct ixgbe_hw *hw; + uint32_t msg[IXGBE_VFMAILBOX_SIZE]; + int error; + + hw = &adapter->hw; + + error = ixgbe_read_mbx(hw, msg, IXGBE_VFMAILBOX_SIZE, vf->pool); + + if (error != 0) + return; + + CTR3(KTR_MALLOC, "%s: received msg %x from %d", + adapter->ifp->if_xname, msg[0], vf->pool); + if (msg[0] == IXGBE_VF_RESET) { + ixgbe_vf_reset_msg(adapter, vf, msg); + return; + } + + if (!(vf->flags & IXGBE_VF_CTS)) { + ixgbe_send_vf_nack(adapter, vf, msg[0]); + return; + } + + switch (msg[0] & IXGBE_VT_MSG_MASK) { + case IXGBE_VF_SET_MAC_ADDR: + ixgbe_vf_set_mac(adapter, vf, msg); + break; + case IXGBE_VF_SET_MULTICAST: + ixgbe_vf_set_mc_addr(adapter, vf, msg); + break; + case IXGBE_VF_SET_VLAN: + ixgbe_vf_set_vlan(adapter, vf, msg); + break; + case IXGBE_VF_SET_LPE: + ixgbe_vf_set_lpe(adapter, vf, msg); + break; + case IXGBE_VF_SET_MACVLAN: + ixgbe_vf_set_macvlan(adapter, vf, msg); + break; + case IXGBE_VF_API_NEGOTIATE: + ixgbe_vf_api_negotiate(adapter, vf, msg); + break; + case IXGBE_VF_GET_QUEUES: + ixgbe_vf_get_queues(adapter, vf, msg); + break; + default: + ixgbe_send_vf_nack(adapter, vf, msg[0]); + } +} + + +/* + * Tasklet for handling VF -> PF mailbox messages. + */ +static void +ixgbe_handle_mbx(void *context, int pending) +{ + struct adapter *adapter; + struct ixgbe_hw *hw; + struct ixgbe_vf *vf; + int i; + + adapter = context; + hw = &adapter->hw; + + IXGBE_CORE_LOCK(adapter); + for (i = 0; i < adapter->num_vfs; i++) { + vf = &adapter->vfs[i]; + + if (vf->flags & IXGBE_VF_ACTIVE) { + if (ixgbe_check_for_rst(hw, vf->pool) == 0) + ixgbe_process_vf_reset(adapter, vf); + + if (ixgbe_check_for_msg(hw, vf->pool) == 0) + ixgbe_process_vf_msg(adapter, vf); + + if (ixgbe_check_for_ack(hw, vf->pool) == 0) + ixgbe_process_vf_ack(adapter, vf); + } + } + IXGBE_CORE_UNLOCK(adapter); +} + + +static int +ixgbe_init_iov(device_t dev, u16 num_vfs, const nvlist_t *config) +{ + struct adapter *adapter; + enum ixgbe_iov_mode mode; + + adapter = device_get_softc(dev); + adapter->num_vfs = num_vfs; + mode = ixgbe_get_iov_mode(adapter); + + if (num_vfs > ixgbe_max_vfs(mode)) { + adapter->num_vfs = 0; + return (ENOSPC); + } + + IXGBE_CORE_LOCK(adapter); + + adapter->vfs = malloc(sizeof(*adapter->vfs) * num_vfs, M_IXGBE, + M_NOWAIT | M_ZERO); + + if (adapter->vfs == NULL) { + adapter->num_vfs = 0; + IXGBE_CORE_UNLOCK(adapter); + return (ENOMEM); + } + + ixgbe_init_locked(adapter); + + IXGBE_CORE_UNLOCK(adapter); + + return (0); +} + + +static void +ixgbe_uninit_iov(device_t dev) +{ + struct ixgbe_hw *hw; + struct adapter *adapter; + uint32_t pf_reg, vf_reg; + + adapter = device_get_softc(dev); + hw = &adapter->hw; + + IXGBE_CORE_LOCK(adapter); + + /* Enable rx/tx for the PF and disable it for all VFs. */ + pf_reg = IXGBE_VF_INDEX(adapter->pool); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(pf_reg), + IXGBE_VF_BIT(adapter->pool)); + IXGBE_WRITE_REG(hw, IXGBE_VFTE(pf_reg), + IXGBE_VF_BIT(adapter->pool)); + + if (pf_reg == 0) + vf_reg = 1; + else + vf_reg = 0; + IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_reg), 0); + IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_reg), 0); + + IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0); + + free(adapter->vfs, M_IXGBE); + adapter->vfs = NULL; + adapter->num_vfs = 0; + + IXGBE_CORE_UNLOCK(adapter); +} + + +static void +ixgbe_initialize_iov(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + uint32_t mrqc, mtqc, vt_ctl, vf_reg, gcr_ext, gpie; + enum ixgbe_iov_mode mode; + int i; + + mode = ixgbe_get_iov_mode(adapter); + if (mode == IXGBE_NO_VM) + return; + + IXGBE_CORE_LOCK_ASSERT(adapter); + + mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC); + mrqc &= ~IXGBE_MRQC_MRQE_MASK; + + switch (mode) { + case IXGBE_64_VM: + mrqc |= IXGBE_MRQC_VMDQRSS64EN; + break; + case IXGBE_32_VM: + mrqc |= IXGBE_MRQC_VMDQRSS32EN; + break; + default: + panic("Unexpected SR-IOV mode %d", mode); + } + IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); + + mtqc = IXGBE_MTQC_VT_ENA; + switch (mode) { + case IXGBE_64_VM: + mtqc |= IXGBE_MTQC_64VF; + break; + case IXGBE_32_VM: + mtqc |= IXGBE_MTQC_32VF; + break; + default: + panic("Unexpected SR-IOV mode %d", mode); + } + IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc); + + + gcr_ext = IXGBE_READ_REG(hw, IXGBE_GCR_EXT); + gcr_ext |= IXGBE_GCR_EXT_MSIX_EN; + gcr_ext &= ~IXGBE_GCR_EXT_VT_MODE_MASK; + switch (mode) { + case IXGBE_64_VM: + gcr_ext |= IXGBE_GCR_EXT_VT_MODE_64; + break; + case IXGBE_32_VM: + gcr_ext |= IXGBE_GCR_EXT_VT_MODE_32; + break; + default: + panic("Unexpected SR-IOV mode %d", mode); + } + IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext); + + + gpie = IXGBE_READ_REG(hw, IXGBE_GPIE); + gcr_ext &= ~IXGBE_GPIE_VTMODE_MASK; + switch (mode) { + case IXGBE_64_VM: + gpie |= IXGBE_GPIE_VTMODE_64; + break; + case IXGBE_32_VM: + gpie |= IXGBE_GPIE_VTMODE_32; + break; + default: + panic("Unexpected SR-IOV mode %d", mode); + } + IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); + + /* Enable rx/tx for the PF. */ + vf_reg = IXGBE_VF_INDEX(adapter->pool); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_reg), + IXGBE_VF_BIT(adapter->pool)); + IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_reg), + IXGBE_VF_BIT(adapter->pool)); + + /* Allow VM-to-VM communication. */ + IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN); + + vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN; + vt_ctl |= (adapter->pool << IXGBE_VT_CTL_POOL_SHIFT); + IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl); + + for (i = 0; i < adapter->num_vfs; i++) + ixgbe_init_vf(adapter, &adapter->vfs[i]); +} + + +/* +** Check the max frame setting of all active VF's +*/ +static void +ixgbe_recalculate_max_frame(struct adapter *adapter) +{ + struct ixgbe_vf *vf; + + IXGBE_CORE_LOCK_ASSERT(adapter); + + for (int i = 0; i < adapter->num_vfs; i++) { + vf = &adapter->vfs[i]; + if (vf->flags & IXGBE_VF_ACTIVE) + ixgbe_update_max_frame(adapter, vf->max_frame_size); + } +} + + +static void +ixgbe_init_vf(struct adapter *adapter, struct ixgbe_vf *vf) +{ + struct ixgbe_hw *hw; + uint32_t vf_index, pfmbimr; + + IXGBE_CORE_LOCK_ASSERT(adapter); + + hw = &adapter->hw; + + if (!(vf->flags & IXGBE_VF_ACTIVE)) + return; + + vf_index = IXGBE_VF_INDEX(vf->pool); + pfmbimr = IXGBE_READ_REG(hw, IXGBE_PFMBIMR(vf_index)); + pfmbimr |= IXGBE_VF_BIT(vf->pool); + IXGBE_WRITE_REG(hw, IXGBE_PFMBIMR(vf_index), pfmbimr); + + ixgbe_vf_set_default_vlan(adapter, vf, vf->vlan_tag); + + // XXX multicast addresses + + if (ixgbe_validate_mac_addr(vf->ether_addr) == 0) { + ixgbe_set_rar(&adapter->hw, vf->rar_index, + vf->ether_addr, vf->pool, TRUE); + } + + ixgbe_vf_enable_transmit(adapter, vf); + ixgbe_vf_enable_receive(adapter, vf); + + ixgbe_send_vf_msg(adapter, vf, IXGBE_PF_CONTROL_MSG); +} + +static int +ixgbe_add_vf(device_t dev, u16 vfnum, const nvlist_t *config) +{ + struct adapter *adapter; + struct ixgbe_vf *vf; + const void *mac; + + adapter = device_get_softc(dev); + + KASSERT(vfnum < adapter->num_vfs, ("VF index %d is out of range %d", + vfnum, adapter->num_vfs)); + + IXGBE_CORE_LOCK(adapter); + vf = &adapter->vfs[vfnum]; + vf->pool= vfnum; + + /* RAR[0] is used by the PF so use vfnum + 1 for VF RAR. */ + vf->rar_index = vfnum + 1; + vf->default_vlan = 0; + vf->max_frame_size = ETHER_MAX_LEN; + ixgbe_update_max_frame(adapter, vf->max_frame_size); + + if (nvlist_exists_binary(config, "mac-addr")) { + mac = nvlist_get_binary(config, "mac-addr", NULL); + bcopy(mac, vf->ether_addr, ETHER_ADDR_LEN); + if (nvlist_get_bool(config, "allow-set-mac")) + vf->flags |= IXGBE_VF_CAP_MAC; + } else + /* + * If the administrator has not specified a MAC address then + * we must allow the VF to choose one. + */ + vf->flags |= IXGBE_VF_CAP_MAC; + + vf->flags = IXGBE_VF_ACTIVE; + + ixgbe_init_vf(adapter, vf); + IXGBE_CORE_UNLOCK(adapter); + + return (0); +} +#endif /* PCI_IOV */ + Index: sys/dev/ixgbe/legacy_if_ixv.c =================================================================== --- /dev/null +++ sys/dev/ixgbe/legacy_if_ixv.c @@ -0,0 +1,2203 @@ +/****************************************************************************** + + Copyright (c) 2001-2015, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ +/*$FreeBSD$*/ + + +#ifndef IXGBE_STANDALONE_BUILD +#include "opt_inet.h" +#include "opt_inet6.h" +#endif + +#include "ixgbe.h" + +#include +#include + +/********************************************************************* + * Driver version + *********************************************************************/ +char ixv_driver_version[] = "1.4.6-k"; + +/********************************************************************* + * PCI Device ID Table + * + * Used by probe to select devices to load on + * Last field stores an index into ixv_strings + * Last entry must be all 0s + * + * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } + *********************************************************************/ + +static ixgbe_vendor_info_t ixv_vendor_info_array[] = +{ + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_VF, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_VF, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550_VF, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_VF, 0, 0, 0}, + /* required last entry */ + {0, 0, 0, 0, 0} +}; + +/********************************************************************* + * Table of branding strings + *********************************************************************/ + +static char *ixv_strings[] = { + "Intel(R) PRO/10GbE Virtual Function Network Driver" +}; + +/********************************************************************* + * Function prototypes + *********************************************************************/ +static int ixv_probe(device_t); +static int ixv_attach(device_t); +static int ixv_detach(device_t); +static int ixv_shutdown(device_t); +static int ixv_ioctl(struct ifnet *, u_long, caddr_t); +static void ixv_init(void *); +static void ixv_init_locked(struct adapter *); +static void ixv_stop(void *); +static void ixv_media_status(struct ifnet *, struct ifmediareq *); +static int ixv_media_change(struct ifnet *); +static void ixv_identify_hardware(struct adapter *); +static int ixv_allocate_pci_resources(struct adapter *); +static int ixv_allocate_msix(struct adapter *); +static int ixv_setup_msix(struct adapter *); +static void ixv_free_pci_resources(struct adapter *); +static void ixv_local_timer(void *); +static void ixv_setup_interface(device_t, struct adapter *); +static void ixv_config_link(struct adapter *); + +static void ixv_initialize_transmit_units(struct adapter *); +static void ixv_initialize_receive_units(struct adapter *); + +static void ixv_enable_intr(struct adapter *); +static void ixv_disable_intr(struct adapter *); +static void ixv_set_multi(struct adapter *); +static void ixv_update_link_status(struct adapter *); +static int ixv_sysctl_debug(SYSCTL_HANDLER_ARGS); +static void ixv_set_ivar(struct adapter *, u8, u8, s8); +static void ixv_configure_ivars(struct adapter *); +static u8 * ixv_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); + +static void ixv_setup_vlan_support(struct adapter *); +static void ixv_register_vlan(void *, struct ifnet *, u16); +static void ixv_unregister_vlan(void *, struct ifnet *, u16); + +static void ixv_save_stats(struct adapter *); +static void ixv_init_stats(struct adapter *); +static void ixv_update_stats(struct adapter *); +static void ixv_add_stats_sysctls(struct adapter *); +static void ixv_set_sysctl_value(struct adapter *, const char *, + const char *, int *, int); + +/* The MSI/X Interrupt handlers */ +static void ixv_msix_que(void *); +static void ixv_msix_mbx(void *); + +/* Deferred interrupt tasklets */ +static void ixv_handle_que(void *, int); +static void ixv_handle_mbx(void *, int); + +#ifdef DEV_NETMAP +/* + * This is defined in , which is included by + * if_ix.c. + */ +extern void ixgbe_netmap_attach(struct adapter *adapter); +#endif /* DEV_NETMAP */ + +/********************************************************************* + * FreeBSD Device Interface Entry Points + *********************************************************************/ + +static device_method_t ixv_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, ixv_probe), + DEVMETHOD(device_attach, ixv_attach), + DEVMETHOD(device_detach, ixv_detach), + DEVMETHOD(device_shutdown, ixv_shutdown), + DEVMETHOD_END +}; + +static driver_t ixv_driver = { + "ixv", ixv_methods, sizeof(struct adapter), +}; + +devclass_t ixv_devclass; +DRIVER_MODULE(ixv, pci, ixv_driver, ixv_devclass, 0, 0); +MODULE_DEPEND(ixv, pci, 1, 1, 1); +MODULE_DEPEND(ixv, ether, 1, 1, 1); +#ifdef DEV_NETMAP +MODULE_DEPEND(ix, netmap, 1, 1, 1); +#endif /* DEV_NETMAP */ +/* XXX depend on 'ix' ? */ + +/* +** TUNEABLE PARAMETERS: +*/ + +/* Number of Queues - do not exceed MSIX vectors - 1 */ +static int ixv_num_queues = 1; +TUNABLE_INT("hw.ixv.num_queues", &ixv_num_queues); + +/* +** AIM: Adaptive Interrupt Moderation +** which means that the interrupt rate +** is varied over time based on the +** traffic for that interrupt vector +*/ +static int ixv_enable_aim = FALSE; +TUNABLE_INT("hw.ixv.enable_aim", &ixv_enable_aim); + +/* How many packets rxeof tries to clean at a time */ +static int ixv_rx_process_limit = 256; +TUNABLE_INT("hw.ixv.rx_process_limit", &ixv_rx_process_limit); + +/* How many packets txeof tries to clean at a time */ +static int ixv_tx_process_limit = 256; +TUNABLE_INT("hw.ixv.tx_process_limit", &ixv_tx_process_limit); + +/* Flow control setting, default to full */ +static int ixv_flow_control = ixgbe_fc_full; +TUNABLE_INT("hw.ixv.flow_control", &ixv_flow_control); + +/* + * Header split: this causes the hardware to DMA + * the header into a separate mbuf from the payload, + * it can be a performance win in some workloads, but + * in others it actually hurts, its off by default. + */ +static int ixv_header_split = FALSE; +TUNABLE_INT("hw.ixv.hdr_split", &ixv_header_split); + +/* +** Number of TX descriptors per ring, +** setting higher than RX as this seems +** the better performing choice. +*/ +static int ixv_txd = DEFAULT_TXD; +TUNABLE_INT("hw.ixv.txd", &ixv_txd); + +/* Number of RX descriptors per ring */ +static int ixv_rxd = DEFAULT_RXD; +TUNABLE_INT("hw.ixv.rxd", &ixv_rxd); + +/* +** Shadow VFTA table, this is needed because +** the real filter table gets cleared during +** a soft reset and we need to repopulate it. +*/ +static u32 ixv_shadow_vfta[IXGBE_VFTA_SIZE]; + +/********************************************************************* + * Device identification routine + * + * ixv_probe determines if the driver should be loaded on + * adapter based on PCI vendor/device id of the adapter. + * + * return BUS_PROBE_DEFAULT on success, positive on failure + *********************************************************************/ + +static int +ixv_probe(device_t dev) +{ + ixgbe_vendor_info_t *ent; + + u16 pci_vendor_id = 0; + u16 pci_device_id = 0; + u16 pci_subvendor_id = 0; + u16 pci_subdevice_id = 0; + char adapter_name[256]; + + + pci_vendor_id = pci_get_vendor(dev); + if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID) + return (ENXIO); + + pci_device_id = pci_get_device(dev); + pci_subvendor_id = pci_get_subvendor(dev); + pci_subdevice_id = pci_get_subdevice(dev); + + ent = ixv_vendor_info_array; + while (ent->vendor_id != 0) { + if ((pci_vendor_id == ent->vendor_id) && + (pci_device_id == ent->device_id) && + + ((pci_subvendor_id == ent->subvendor_id) || + (ent->subvendor_id == 0)) && + + ((pci_subdevice_id == ent->subdevice_id) || + (ent->subdevice_id == 0))) { + sprintf(adapter_name, "%s, Version - %s", + ixv_strings[ent->index], + ixv_driver_version); + device_set_desc_copy(dev, adapter_name); + return (BUS_PROBE_DEFAULT); + } + ent++; + } + return (ENXIO); +} + +/********************************************************************* + * Device initialization routine + * + * The attach entry point is called when the driver is being loaded. + * This routine identifies the type of hardware, allocates all resources + * and initializes the hardware. + * + * return 0 on success, positive on failure + *********************************************************************/ + +static int +ixv_attach(device_t dev) +{ + struct adapter *adapter; + struct ixgbe_hw *hw; + int error = 0; + + INIT_DEBUGOUT("ixv_attach: begin"); + + /* Allocate, clear, and link in our adapter structure */ + adapter = device_get_softc(dev); + adapter->dev = dev; + hw = &adapter->hw; + +#ifdef DEV_NETMAP + adapter->init_locked = ixv_init_locked; + adapter->stop_locked = ixv_stop; +#endif + + /* Core Lock Init*/ + IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); + + /* SYSCTL APIs */ + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "debug", CTLTYPE_INT | CTLFLAG_RW, + adapter, 0, ixv_sysctl_debug, "I", "Debug Info"); + + SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "enable_aim", CTLFLAG_RW, + &ixv_enable_aim, 1, "Interrupt Moderation"); + + /* Set up the timer callout */ + callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); + + /* Determine hardware revision */ + ixv_identify_hardware(adapter); + + /* Do base PCI setup - map BAR0 */ + if (ixv_allocate_pci_resources(adapter)) { + device_printf(dev, "ixv_allocate_pci_resources() failed!\n"); + error = ENXIO; + goto err_out; + } + + /* Sysctls for limiting the amount of work done in the taskqueues */ + ixv_set_sysctl_value(adapter, "rx_processing_limit", + "max number of rx packets to process", + &adapter->rx_process_limit, ixv_rx_process_limit); + + ixv_set_sysctl_value(adapter, "tx_processing_limit", + "max number of tx packets to process", + &adapter->tx_process_limit, ixv_tx_process_limit); + + /* Do descriptor calc and sanity checks */ + if (((ixv_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 || + ixv_txd < MIN_TXD || ixv_txd > MAX_TXD) { + device_printf(dev, "TXD config issue, using default!\n"); + adapter->num_tx_desc = DEFAULT_TXD; + } else + adapter->num_tx_desc = ixv_txd; + + if (((ixv_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 || + ixv_rxd < MIN_RXD || ixv_rxd > MAX_RXD) { + device_printf(dev, "RXD config issue, using default!\n"); + adapter->num_rx_desc = DEFAULT_RXD; + } else + adapter->num_rx_desc = ixv_rxd; + + /* Allocate our TX/RX Queues */ + if (ixgbe_allocate_queues(adapter)) { + device_printf(dev, "ixgbe_allocate_queues() failed!\n"); + error = ENOMEM; + goto err_out; + } + + /* + ** Initialize the shared code: its + ** at this point the mac type is set. + */ + error = ixgbe_init_shared_code(hw); + if (error) { + device_printf(dev, "ixgbe_init_shared_code() failed!\n"); + error = EIO; + goto err_late; + } + + /* Setup the mailbox */ + ixgbe_init_mbx_params_vf(hw); + + /* Reset mbox api to 1.0 */ + error = ixgbe_reset_hw(hw); + if (error == IXGBE_ERR_RESET_FAILED) + device_printf(dev, "ixgbe_reset_hw() failure: Reset Failed!\n"); + else if (error) + device_printf(dev, "ixgbe_reset_hw() failed with error %d\n", error); + if (error) { + error = EIO; + goto err_late; + } + + /* Negotiate mailbox API version */ + error = ixgbevf_negotiate_api_version(hw, ixgbe_mbox_api_11); + if (error) { + device_printf(dev, "MBX API 1.1 negotiation failed! Error %d\n", error); + error = EIO; + goto err_late; + } + + error = ixgbe_init_hw(hw); + if (error) { + device_printf(dev, "ixgbe_init_hw() failed!\n"); + error = EIO; + goto err_late; + } + + error = ixv_allocate_msix(adapter); + if (error) { + device_printf(dev, "ixv_allocate_msix() failed!\n"); + goto err_late; + } + + /* If no mac address was assigned, make a random one */ + if (!ixv_check_ether_addr(hw->mac.addr)) { + u8 addr[ETHER_ADDR_LEN]; + arc4rand(&addr, sizeof(addr), 0); + addr[0] &= 0xFE; + addr[0] |= 0x02; + bcopy(addr, hw->mac.addr, sizeof(addr)); + } + + /* Setup OS specific network interface */ + ixv_setup_interface(dev, adapter); + + /* Do the stats setup */ + ixv_save_stats(adapter); + ixv_init_stats(adapter); + ixv_add_stats_sysctls(adapter); + + /* Register for VLAN events */ + adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, + ixv_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); + adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, + ixv_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); + +#ifdef DEV_NETMAP + ixgbe_netmap_attach(adapter); +#endif /* DEV_NETMAP */ + INIT_DEBUGOUT("ixv_attach: end"); + return (0); + +err_late: + ixgbe_free_transmit_structures(adapter); + ixgbe_free_receive_structures(adapter); +err_out: + ixv_free_pci_resources(adapter); + return (error); + +} + +/********************************************************************* + * Device removal routine + * + * The detach entry point is called when the driver is being removed. + * This routine stops the adapter and deallocates all the resources + * that were allocated for driver operation. + * + * return 0 on success, positive on failure + *********************************************************************/ + +static int +ixv_detach(device_t dev) +{ + struct adapter *adapter = device_get_softc(dev); + struct ix_queue *que = adapter->queues; + + INIT_DEBUGOUT("ixv_detach: begin"); + + /* Make sure VLANS are not using driver */ + if (adapter->ifp->if_vlantrunk != NULL) { + device_printf(dev, "Vlan in use, detach first\n"); + return (EBUSY); + } + + IXGBE_CORE_LOCK(adapter); + ixv_stop(adapter); + IXGBE_CORE_UNLOCK(adapter); + + for (int i = 0; i < adapter->num_queues; i++, que++) { + if (que->tq) { + struct tx_ring *txr = que->txr; + taskqueue_drain(que->tq, &txr->txq_task); + taskqueue_drain(que->tq, &que->que_task); + taskqueue_free(que->tq); + } + } + + /* Drain the Mailbox(link) queue */ + if (adapter->tq) { + taskqueue_drain(adapter->tq, &adapter->link_task); + taskqueue_free(adapter->tq); + } + + /* Unregister VLAN events */ + if (adapter->vlan_attach != NULL) + EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); + if (adapter->vlan_detach != NULL) + EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); + + ether_ifdetach(adapter->ifp); + callout_drain(&adapter->timer); +#ifdef DEV_NETMAP + netmap_detach(adapter->ifp); +#endif /* DEV_NETMAP */ + ixv_free_pci_resources(adapter); + bus_generic_detach(dev); + if_free(adapter->ifp); + + ixgbe_free_transmit_structures(adapter); + ixgbe_free_receive_structures(adapter); + + IXGBE_CORE_LOCK_DESTROY(adapter); + return (0); +} + +/********************************************************************* + * + * Shutdown entry point + * + **********************************************************************/ +static int +ixv_shutdown(device_t dev) +{ + struct adapter *adapter = device_get_softc(dev); + IXGBE_CORE_LOCK(adapter); + ixv_stop(adapter); + IXGBE_CORE_UNLOCK(adapter); + return (0); +} + + +/********************************************************************* + * Ioctl entry point + * + * ixv_ioctl is called when the user wants to configure the + * interface. + * + * return 0 on success, positive on failure + **********************************************************************/ + +static int +ixv_ioctl(struct ifnet * ifp, u_long command, caddr_t data) +{ + struct adapter *adapter = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *) data; +#if defined(INET) || defined(INET6) + struct ifaddr *ifa = (struct ifaddr *) data; + bool avoid_reset = FALSE; +#endif + int error = 0; + + switch (command) { + + case SIOCSIFADDR: +#ifdef INET + if (ifa->ifa_addr->sa_family == AF_INET) + avoid_reset = TRUE; +#endif +#ifdef INET6 + if (ifa->ifa_addr->sa_family == AF_INET6) + avoid_reset = TRUE; +#endif +#if defined(INET) || defined(INET6) + /* + ** Calling init results in link renegotiation, + ** so we avoid doing it when possible. + */ + if (avoid_reset) { + ifp->if_flags |= IFF_UP; + if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) + ixv_init(adapter); + if (!(ifp->if_flags & IFF_NOARP)) + arp_ifinit(ifp, ifa); + } else + error = ether_ioctl(ifp, command, data); + break; +#endif + case SIOCSIFMTU: + IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); + if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - IXGBE_MTU_HDR) { + error = EINVAL; + } else { + IXGBE_CORE_LOCK(adapter); + ifp->if_mtu = ifr->ifr_mtu; + adapter->max_frame_size = + ifp->if_mtu + IXGBE_MTU_HDR; + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + ixv_init_locked(adapter); + IXGBE_CORE_UNLOCK(adapter); + } + break; + case SIOCSIFFLAGS: + IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)"); + IXGBE_CORE_LOCK(adapter); + if (ifp->if_flags & IFF_UP) { + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + ixv_init_locked(adapter); + } else + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + ixv_stop(adapter); + adapter->if_flags = ifp->if_flags; + IXGBE_CORE_UNLOCK(adapter); + break; + case SIOCADDMULTI: + case SIOCDELMULTI: + IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI"); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + IXGBE_CORE_LOCK(adapter); + ixv_disable_intr(adapter); + ixv_set_multi(adapter); + ixv_enable_intr(adapter); + IXGBE_CORE_UNLOCK(adapter); + } + break; + case SIOCSIFMEDIA: + case SIOCGIFMEDIA: + IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)"); + error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); + break; + case SIOCSIFCAP: + { + int mask = ifr->ifr_reqcap ^ ifp->if_capenable; + IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)"); + if (mask & IFCAP_HWCSUM) + ifp->if_capenable ^= IFCAP_HWCSUM; + if (mask & IFCAP_TSO4) + ifp->if_capenable ^= IFCAP_TSO4; + if (mask & IFCAP_LRO) + ifp->if_capenable ^= IFCAP_LRO; + if (mask & IFCAP_VLAN_HWTAGGING) + ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + IXGBE_CORE_LOCK(adapter); + ixv_init_locked(adapter); + IXGBE_CORE_UNLOCK(adapter); + } + VLAN_CAPABILITIES(ifp); + break; + } + + default: + IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command); + error = ether_ioctl(ifp, command, data); + break; + } + + return (error); +} + +/********************************************************************* + * Init entry point + * + * This routine is used in two ways. It is used by the stack as + * init entry point in network interface structure. It is also used + * by the driver as a hw/sw initialization routine to get to a + * consistent state. + * + * return 0 on success, positive on failure + **********************************************************************/ +#define IXGBE_MHADD_MFS_SHIFT 16 + +static void +ixv_init_locked(struct adapter *adapter) +{ + struct ifnet *ifp = adapter->ifp; + device_t dev = adapter->dev; + struct ixgbe_hw *hw = &adapter->hw; + int error = 0; + + INIT_DEBUGOUT("ixv_init_locked: begin"); + mtx_assert(&adapter->core_mtx, MA_OWNED); + hw->adapter_stopped = FALSE; + ixgbe_stop_adapter(hw); + callout_stop(&adapter->timer); + + /* reprogram the RAR[0] in case user changed it. */ + ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); + + /* Get the latest mac address, User can use a LAA */ + bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr, + IXGBE_ETH_LENGTH_OF_ADDRESS); + ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1); + hw->addr_ctrl.rar_used_count = 1; + + /* Prepare transmit descriptors and buffers */ + if (ixgbe_setup_transmit_structures(adapter)) { + device_printf(dev, "Could not setup transmit structures\n"); + ixv_stop(adapter); + return; + } + + /* Reset VF and renegotiate mailbox API version */ + ixgbe_reset_hw(hw); + error = ixgbevf_negotiate_api_version(hw, ixgbe_mbox_api_11); + if (error) + device_printf(dev, "MBX API 1.1 negotiation failed! Error %d\n", error); + + ixv_initialize_transmit_units(adapter); + + /* Setup Multicast table */ + ixv_set_multi(adapter); + + /* + ** Determine the correct mbuf pool + ** for doing jumbo/headersplit + */ + if (ifp->if_mtu > ETHERMTU) + adapter->rx_mbuf_sz = MJUMPAGESIZE; + else + adapter->rx_mbuf_sz = MCLBYTES; + + /* Prepare receive descriptors and buffers */ + if (ixgbe_setup_receive_structures(adapter)) { + device_printf(dev, "Could not setup receive structures\n"); + ixv_stop(adapter); + return; + } + + /* Configure RX settings */ + ixv_initialize_receive_units(adapter); + + /* Set the various hardware offload abilities */ + ifp->if_hwassist = 0; + if (ifp->if_capenable & IFCAP_TSO4) + ifp->if_hwassist |= CSUM_TSO; + if (ifp->if_capenable & IFCAP_TXCSUM) { + ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); +#if __FreeBSD_version >= 800000 + ifp->if_hwassist |= CSUM_SCTP; +#endif + } + + /* Set up VLAN offload and filter */ + ixv_setup_vlan_support(adapter); + + /* Set up MSI/X routing */ + ixv_configure_ivars(adapter); + + /* Set up auto-mask */ + IXGBE_WRITE_REG(hw, IXGBE_VTEIAM, IXGBE_EICS_RTX_QUEUE); + + /* Set moderation on the Link interrupt */ + IXGBE_WRITE_REG(hw, IXGBE_VTEITR(adapter->vector), IXGBE_LINK_ITR); + + /* Stats init */ + ixv_init_stats(adapter); + + /* Config/Enable Link */ + ixv_config_link(adapter); + + /* Start watchdog */ + callout_reset(&adapter->timer, hz, ixv_local_timer, adapter); + + /* And now turn on interrupts */ + ixv_enable_intr(adapter); + + /* Now inform the stack we're ready */ + ifp->if_drv_flags |= IFF_DRV_RUNNING; + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + + return; +} + +static void +ixv_init(void *arg) +{ + struct adapter *adapter = arg; + + IXGBE_CORE_LOCK(adapter); + ixv_init_locked(adapter); + IXGBE_CORE_UNLOCK(adapter); + return; +} + + +/* +** +** MSIX Interrupt Handlers and Tasklets +** +*/ + +static inline void +ixv_enable_queue(struct adapter *adapter, u32 vector) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 queue = 1 << vector; + u32 mask; + + mask = (IXGBE_EIMS_RTX_QUEUE & queue); + IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); +} + +static inline void +ixv_disable_queue(struct adapter *adapter, u32 vector) +{ + struct ixgbe_hw *hw = &adapter->hw; + u64 queue = (u64)(1 << vector); + u32 mask; + + mask = (IXGBE_EIMS_RTX_QUEUE & queue); + IXGBE_WRITE_REG(hw, IXGBE_VTEIMC, mask); +} + +static inline void +ixv_rearm_queues(struct adapter *adapter, u64 queues) +{ + u32 mask = (IXGBE_EIMS_RTX_QUEUE & queues); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEICS, mask); +} + + +static void +ixv_handle_que(void *context, int pending) +{ + struct ix_queue *que = context; + struct adapter *adapter = que->adapter; + struct tx_ring *txr = que->txr; + struct ifnet *ifp = adapter->ifp; + bool more; + + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + more = ixgbe_rxeof(que); + IXGBE_TX_LOCK(txr); + ixgbe_txeof(txr); +#if __FreeBSD_version >= 800000 + if (!drbr_empty(ifp, txr->br)) + ixgbe_mq_start_locked(ifp, txr); +#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + ixgbe_start_locked(txr, ifp); +#endif + IXGBE_TX_UNLOCK(txr); + if (more) { + taskqueue_enqueue(que->tq, &que->que_task); + return; + } + } + + /* Re-enable this interrupt */ + ixv_enable_queue(adapter, que->msix); + return; +} + +/********************************************************************* + * + * MSI Queue Interrupt Service routine + * + **********************************************************************/ +void +ixv_msix_que(void *arg) +{ + struct ix_queue *que = arg; + struct adapter *adapter = que->adapter; + struct ifnet *ifp = adapter->ifp; + struct tx_ring *txr = que->txr; + struct rx_ring *rxr = que->rxr; + bool more; + u32 newitr = 0; + + ixv_disable_queue(adapter, que->msix); + ++que->irqs; + + more = ixgbe_rxeof(que); + + IXGBE_TX_LOCK(txr); + ixgbe_txeof(txr); + /* + ** Make certain that if the stack + ** has anything queued the task gets + ** scheduled to handle it. + */ +#ifdef IXGBE_LEGACY_TX + if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd)) + ixgbe_start_locked(txr, ifp); +#else + if (!drbr_empty(adapter->ifp, txr->br)) + ixgbe_mq_start_locked(ifp, txr); +#endif + IXGBE_TX_UNLOCK(txr); + + /* Do AIM now? */ + + if (ixv_enable_aim == FALSE) + goto no_calc; + /* + ** Do Adaptive Interrupt Moderation: + ** - Write out last calculated setting + ** - Calculate based on average size over + ** the last interval. + */ + if (que->eitr_setting) + IXGBE_WRITE_REG(&adapter->hw, + IXGBE_VTEITR(que->msix), + que->eitr_setting); + + que->eitr_setting = 0; + + /* Idle, do nothing */ + if ((txr->bytes == 0) && (rxr->bytes == 0)) + goto no_calc; + + if ((txr->bytes) && (txr->packets)) + newitr = txr->bytes/txr->packets; + if ((rxr->bytes) && (rxr->packets)) + newitr = max(newitr, + (rxr->bytes / rxr->packets)); + newitr += 24; /* account for hardware frame, crc */ + + /* set an upper boundary */ + newitr = min(newitr, 3000); + + /* Be nice to the mid range */ + if ((newitr > 300) && (newitr < 1200)) + newitr = (newitr / 3); + else + newitr = (newitr / 2); + + newitr |= newitr << 16; + + /* save for next interrupt */ + que->eitr_setting = newitr; + + /* Reset state */ + txr->bytes = 0; + txr->packets = 0; + rxr->bytes = 0; + rxr->packets = 0; + +no_calc: + if (more) + taskqueue_enqueue(que->tq, &que->que_task); + else /* Re-enable this interrupt */ + ixv_enable_queue(adapter, que->msix); + return; +} + +static void +ixv_msix_mbx(void *arg) +{ + struct adapter *adapter = arg; + struct ixgbe_hw *hw = &adapter->hw; + u32 reg; + + ++adapter->link_irq; + + /* First get the cause */ + reg = IXGBE_READ_REG(hw, IXGBE_VTEICS); + /* Clear interrupt with write */ + IXGBE_WRITE_REG(hw, IXGBE_VTEICR, reg); + + /* Link status change */ + if (reg & IXGBE_EICR_LSC) + taskqueue_enqueue(adapter->tq, &adapter->link_task); + + IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, IXGBE_EIMS_OTHER); + return; +} + +/********************************************************************* + * + * Media Ioctl callback + * + * This routine is called whenever the user queries the status of + * the interface using ifconfig. + * + **********************************************************************/ +static void +ixv_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) +{ + struct adapter *adapter = ifp->if_softc; + + INIT_DEBUGOUT("ixv_media_status: begin"); + IXGBE_CORE_LOCK(adapter); + ixv_update_link_status(adapter); + + ifmr->ifm_status = IFM_AVALID; + ifmr->ifm_active = IFM_ETHER; + + if (!adapter->link_active) { + IXGBE_CORE_UNLOCK(adapter); + return; + } + + ifmr->ifm_status |= IFM_ACTIVE; + + switch (adapter->link_speed) { + case IXGBE_LINK_SPEED_1GB_FULL: + ifmr->ifm_active |= IFM_1000_T | IFM_FDX; + break; + case IXGBE_LINK_SPEED_10GB_FULL: + ifmr->ifm_active |= IFM_FDX; + break; + } + + IXGBE_CORE_UNLOCK(adapter); + + return; +} + +/********************************************************************* + * + * Media Ioctl callback + * + * This routine is called when the user changes speed/duplex using + * media/mediopt option with ifconfig. + * + **********************************************************************/ +static int +ixv_media_change(struct ifnet * ifp) +{ + struct adapter *adapter = ifp->if_softc; + struct ifmedia *ifm = &adapter->media; + + INIT_DEBUGOUT("ixv_media_change: begin"); + + if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) + return (EINVAL); + + switch (IFM_SUBTYPE(ifm->ifm_media)) { + case IFM_AUTO: + break; + default: + device_printf(adapter->dev, "Only auto media type\n"); + return (EINVAL); + } + + return (0); +} + + +/********************************************************************* + * Multicast Update + * + * This routine is called whenever multicast address list is updated. + * + **********************************************************************/ +#define IXGBE_RAR_ENTRIES 16 + +static void +ixv_set_multi(struct adapter *adapter) +{ + u8 mta[MAX_NUM_MULTICAST_ADDRESSES * IXGBE_ETH_LENGTH_OF_ADDRESS]; + u8 *update_ptr; + struct ifmultiaddr *ifma; + int mcnt = 0; + struct ifnet *ifp = adapter->ifp; + + IOCTL_DEBUGOUT("ixv_set_multi: begin"); + +#if __FreeBSD_version < 800000 + IF_ADDR_LOCK(ifp); +#else + if_maddr_rlock(ifp); +#endif + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_LINK) + continue; + bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), + &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS], + IXGBE_ETH_LENGTH_OF_ADDRESS); + mcnt++; + } +#if __FreeBSD_version < 800000 + IF_ADDR_UNLOCK(ifp); +#else + if_maddr_runlock(ifp); +#endif + + update_ptr = mta; + + ixgbe_update_mc_addr_list(&adapter->hw, + update_ptr, mcnt, ixv_mc_array_itr, TRUE); + + return; +} + +/* + * This is an iterator function now needed by the multicast + * shared code. It simply feeds the shared code routine the + * addresses in the array of ixv_set_multi() one by one. + */ +static u8 * +ixv_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) +{ + u8 *addr = *update_ptr; + u8 *newptr; + *vmdq = 0; + + newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS; + *update_ptr = newptr; + return addr; +} + +/********************************************************************* + * Timer routine + * + * This routine checks for link status,updates statistics, + * and runs the watchdog check. + * + **********************************************************************/ + +static void +ixv_local_timer(void *arg) +{ + struct adapter *adapter = arg; + device_t dev = adapter->dev; + struct ix_queue *que = adapter->queues; + u64 queues = 0; + int hung = 0; + + mtx_assert(&adapter->core_mtx, MA_OWNED); + + ixv_update_link_status(adapter); + + /* Stats Update */ + ixv_update_stats(adapter); + + /* + ** Check the TX queues status + ** - mark hung queues so we don't schedule on them + ** - watchdog only if all queues show hung + */ + for (int i = 0; i < adapter->num_queues; i++, que++) { + /* Keep track of queues with work for soft irq */ + if (que->txr->busy) + queues |= ((u64)1 << que->me); + /* + ** Each time txeof runs without cleaning, but there + ** are uncleaned descriptors it increments busy. If + ** we get to the MAX we declare it hung. + */ + if (que->busy == IXGBE_QUEUE_HUNG) { + ++hung; + /* Mark the queue as inactive */ + adapter->active_queues &= ~((u64)1 << que->me); + continue; + } else { + /* Check if we've come back from hung */ + if ((adapter->active_queues & ((u64)1 << que->me)) == 0) + adapter->active_queues |= ((u64)1 << que->me); + } + if (que->busy >= IXGBE_MAX_TX_BUSY) { + device_printf(dev,"Warning queue %d " + "appears to be hung!\n", i); + que->txr->busy = IXGBE_QUEUE_HUNG; + ++hung; + } + + } + + /* Only truly watchdog if all queues show hung */ + if (hung == adapter->num_queues) + goto watchdog; + else if (queues != 0) { /* Force an IRQ on queues with work */ + ixv_rearm_queues(adapter, queues); + } + + callout_reset(&adapter->timer, hz, ixv_local_timer, adapter); + return; + +watchdog: + device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); + adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + adapter->watchdog_events++; + ixv_init_locked(adapter); +} + +/* +** Note: this routine updates the OS on the link state +** the real check of the hardware only happens with +** a link interrupt. +*/ +static void +ixv_update_link_status(struct adapter *adapter) +{ + struct ifnet *ifp = adapter->ifp; + device_t dev = adapter->dev; + + if (adapter->link_up){ + if (adapter->link_active == FALSE) { + if (bootverbose) + device_printf(dev,"Link is up %d Gbps %s \n", + ((adapter->link_speed == 128)? 10:1), + "Full Duplex"); + adapter->link_active = TRUE; + if_link_state_change(ifp, LINK_STATE_UP); + } + } else { /* Link down */ + if (adapter->link_active == TRUE) { + if (bootverbose) + device_printf(dev,"Link is Down\n"); + if_link_state_change(ifp, LINK_STATE_DOWN); + adapter->link_active = FALSE; + } + } + + return; +} + + +/********************************************************************* + * + * This routine disables all traffic on the adapter by issuing a + * global reset on the MAC and deallocates TX/RX buffers. + * + **********************************************************************/ + +static void +ixv_stop(void *arg) +{ + struct ifnet *ifp; + struct adapter *adapter = arg; + struct ixgbe_hw *hw = &adapter->hw; + ifp = adapter->ifp; + + mtx_assert(&adapter->core_mtx, MA_OWNED); + + INIT_DEBUGOUT("ixv_stop: begin\n"); + ixv_disable_intr(adapter); + + /* Tell the stack that the interface is no longer active */ + ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); + + ixgbe_reset_hw(hw); + adapter->hw.adapter_stopped = FALSE; + ixgbe_stop_adapter(hw); + callout_stop(&adapter->timer); + + /* reprogram the RAR[0] in case user changed it. */ + ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); + + return; +} + + +/********************************************************************* + * + * Determine hardware revision. + * + **********************************************************************/ +static void +ixv_identify_hardware(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct ixgbe_hw *hw = &adapter->hw; + + /* + ** Make sure BUSMASTER is set, on a VM under + ** KVM it may not be and will break things. + */ + pci_enable_busmaster(dev); + + /* Save off the information about this board */ + hw->vendor_id = pci_get_vendor(dev); + hw->device_id = pci_get_device(dev); + hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); + hw->subsystem_vendor_id = + pci_read_config(dev, PCIR_SUBVEND_0, 2); + hw->subsystem_device_id = + pci_read_config(dev, PCIR_SUBDEV_0, 2); + + /* We need this to determine device-specific things */ + ixgbe_set_mac_type(hw); + + /* Set the right number of segments */ + adapter->num_segs = IXGBE_82599_SCATTER; + + return; +} + +/********************************************************************* + * + * Setup MSIX Interrupt resources and handlers + * + **********************************************************************/ +static int +ixv_allocate_msix(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct ix_queue *que = adapter->queues; + struct tx_ring *txr = adapter->tx_rings; + int error, rid, vector = 0; + + for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) { + rid = vector + 1; + que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, + RF_SHAREABLE | RF_ACTIVE); + if (que->res == NULL) { + device_printf(dev,"Unable to allocate" + " bus resource: que interrupt [%d]\n", vector); + return (ENXIO); + } + /* Set the handler function */ + error = bus_setup_intr(dev, que->res, + INTR_TYPE_NET | INTR_MPSAFE, NULL, + ixv_msix_que, que, &que->tag); + if (error) { + que->res = NULL; + device_printf(dev, "Failed to register QUE handler"); + return (error); + } +#if __FreeBSD_version >= 800504 + bus_describe_intr(dev, que->res, que->tag, "que %d", i); +#endif + que->msix = vector; + adapter->active_queues |= (u64)(1 << que->msix); + /* + ** Bind the msix vector, and thus the + ** ring to the corresponding cpu. + */ + if (adapter->num_queues > 1) + bus_bind_intr(dev, que->res, i); + TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr); + TASK_INIT(&que->que_task, 0, ixv_handle_que, que); + que->tq = taskqueue_create_fast("ixv_que", M_NOWAIT, + taskqueue_thread_enqueue, &que->tq); + taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", + device_get_nameunit(adapter->dev)); + } + + /* and Mailbox */ + rid = vector + 1; + adapter->res = bus_alloc_resource_any(dev, + SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); + if (!adapter->res) { + device_printf(dev,"Unable to allocate" + " bus resource: MBX interrupt [%d]\n", rid); + return (ENXIO); + } + /* Set the mbx handler function */ + error = bus_setup_intr(dev, adapter->res, + INTR_TYPE_NET | INTR_MPSAFE, NULL, + ixv_msix_mbx, adapter, &adapter->tag); + if (error) { + adapter->res = NULL; + device_printf(dev, "Failed to register LINK handler"); + return (error); + } +#if __FreeBSD_version >= 800504 + bus_describe_intr(dev, adapter->res, adapter->tag, "mbx"); +#endif + adapter->vector = vector; + /* Tasklets for Mailbox */ + TASK_INIT(&adapter->link_task, 0, ixv_handle_mbx, adapter); + adapter->tq = taskqueue_create_fast("ixv_mbx", M_NOWAIT, + taskqueue_thread_enqueue, &adapter->tq); + taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s mbxq", + device_get_nameunit(adapter->dev)); + /* + ** Due to a broken design QEMU will fail to properly + ** enable the guest for MSIX unless the vectors in + ** the table are all set up, so we must rewrite the + ** ENABLE in the MSIX control register again at this + ** point to cause it to successfully initialize us. + */ + if (adapter->hw.mac.type == ixgbe_mac_82599_vf) { + int msix_ctrl; + pci_find_cap(dev, PCIY_MSIX, &rid); + rid += PCIR_MSIX_CTRL; + msix_ctrl = pci_read_config(dev, rid, 2); + msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; + pci_write_config(dev, rid, msix_ctrl, 2); + } + + return (0); +} + +/* + * Setup MSIX resources, note that the VF + * device MUST use MSIX, there is no fallback. + */ +static int +ixv_setup_msix(struct adapter *adapter) +{ + device_t dev = adapter->dev; + int rid, want, msgs; + + + /* Must have at least 2 MSIX vectors */ + msgs = pci_msix_count(dev); + if (msgs < 2) + goto out; + rid = PCIR_BAR(3); + adapter->msix_mem = bus_alloc_resource_any(dev, + SYS_RES_MEMORY, &rid, RF_ACTIVE); + if (adapter->msix_mem == NULL) { + device_printf(adapter->dev, + "Unable to map MSIX table \n"); + goto out; + } + + /* + ** Want vectors for the queues, + ** plus an additional for mailbox. + */ + want = adapter->num_queues + 1; + if (want > msgs) { + want = msgs; + adapter->num_queues = msgs - 1; + } else + msgs = want; + if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { + device_printf(adapter->dev, + "Using MSIX interrupts with %d vectors\n", want); + return (want); + } + /* Release in case alloc was insufficient */ + pci_release_msi(dev); +out: + if (adapter->msix_mem != NULL) { + bus_release_resource(dev, SYS_RES_MEMORY, + rid, adapter->msix_mem); + adapter->msix_mem = NULL; + } + device_printf(adapter->dev,"MSIX config error\n"); + return (ENXIO); +} + + +static int +ixv_allocate_pci_resources(struct adapter *adapter) +{ + int rid; + device_t dev = adapter->dev; + + rid = PCIR_BAR(0); + adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &rid, RF_ACTIVE); + + if (!(adapter->pci_mem)) { + device_printf(dev, "Unable to allocate bus resource: memory\n"); + return (ENXIO); + } + + adapter->osdep.mem_bus_space_tag = + rman_get_bustag(adapter->pci_mem); + adapter->osdep.mem_bus_space_handle = + rman_get_bushandle(adapter->pci_mem); + adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; + + /* Pick up the tuneable queues */ + adapter->num_queues = ixv_num_queues; + adapter->hw.back = adapter; + + /* + ** Now setup MSI/X, should + ** return us the number of + ** configured vectors. + */ + adapter->msix = ixv_setup_msix(adapter); + if (adapter->msix == ENXIO) + return (ENXIO); + else + return (0); +} + +static void +ixv_free_pci_resources(struct adapter * adapter) +{ + struct ix_queue *que = adapter->queues; + device_t dev = adapter->dev; + int rid, memrid; + + memrid = PCIR_BAR(MSIX_82598_BAR); + + /* + ** There is a slight possibility of a failure mode + ** in attach that will result in entering this function + ** before interrupt resources have been initialized, and + ** in that case we do not want to execute the loops below + ** We can detect this reliably by the state of the adapter + ** res pointer. + */ + if (adapter->res == NULL) + goto mem; + + /* + ** Release all msix queue resources: + */ + for (int i = 0; i < adapter->num_queues; i++, que++) { + rid = que->msix + 1; + if (que->tag != NULL) { + bus_teardown_intr(dev, que->res, que->tag); + que->tag = NULL; + } + if (que->res != NULL) + bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); + } + + + /* Clean the Legacy or Link interrupt last */ + if (adapter->vector) /* we are doing MSIX */ + rid = adapter->vector + 1; + else + (adapter->msix != 0) ? (rid = 1):(rid = 0); + + if (adapter->tag != NULL) { + bus_teardown_intr(dev, adapter->res, adapter->tag); + adapter->tag = NULL; + } + if (adapter->res != NULL) + bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); + +mem: + if (adapter->msix) + pci_release_msi(dev); + + if (adapter->msix_mem != NULL) + bus_release_resource(dev, SYS_RES_MEMORY, + memrid, adapter->msix_mem); + + if (adapter->pci_mem != NULL) + bus_release_resource(dev, SYS_RES_MEMORY, + PCIR_BAR(0), adapter->pci_mem); + + return; +} + +/********************************************************************* + * + * Setup networking device structure and register an interface. + * + **********************************************************************/ +static void +ixv_setup_interface(device_t dev, struct adapter *adapter) +{ + struct ifnet *ifp; + + INIT_DEBUGOUT("ixv_setup_interface: begin"); + + ifp = adapter->ifp = if_alloc(IFT_ETHER); + if (ifp == NULL) + panic("%s: can not if_alloc()\n", device_get_nameunit(dev)); + if_initname(ifp, device_get_name(dev), device_get_unit(dev)); + ifp->if_baudrate = 1000000000; + ifp->if_init = ixv_init; + ifp->if_softc = adapter; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_ioctl = ixv_ioctl; +#if __FreeBSD_version >= 800000 + ifp->if_transmit = ixgbe_mq_start; + ifp->if_qflush = ixgbe_qflush; +#else + ifp->if_start = ixgbe_start; +#endif + ifp->if_snd.ifq_maxlen = adapter->num_tx_desc - 2; + + ether_ifattach(ifp, adapter->hw.mac.addr); + + adapter->max_frame_size = + ifp->if_mtu + IXGBE_MTU_HDR_VLAN; + + /* + * Tell the upper layer(s) we support long frames. + */ + ifp->if_hdrlen = sizeof(struct ether_vlan_header); + + ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_VLAN_HWCSUM; + ifp->if_capabilities |= IFCAP_JUMBO_MTU; + ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING + | IFCAP_VLAN_HWTSO + | IFCAP_VLAN_MTU; + ifp->if_capabilities |= IFCAP_LRO; + ifp->if_capenable = ifp->if_capabilities; + + /* + * Specify the media types supported by this adapter and register + * callbacks to update media and link information + */ + ifmedia_init(&adapter->media, IFM_IMASK, ixv_media_change, + ixv_media_status); + ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); + + return; +} + +static void +ixv_config_link(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 autoneg; + + if (hw->mac.ops.check_link) + hw->mac.ops.check_link(hw, &autoneg, + &adapter->link_up, FALSE); +} + + +/********************************************************************* + * + * Enable transmit unit. + * + **********************************************************************/ +static void +ixv_initialize_transmit_units(struct adapter *adapter) +{ + struct tx_ring *txr = adapter->tx_rings; + struct ixgbe_hw *hw = &adapter->hw; + + + for (int i = 0; i < adapter->num_queues; i++, txr++) { + u64 tdba = txr->txdma.dma_paddr; + u32 txctrl, txdctl; + + /* Set WTHRESH to 8, burst writeback */ + txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i)); + txdctl |= (8 << 16); + IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl); + + /* Set the HW Tx Head and Tail indices */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDH(i), 0); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDT(i), 0); + + /* Set Tx Tail register */ + txr->tail = IXGBE_VFTDT(i); + + /* Set Ring parameters */ + IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i), + (tdba & 0x00000000ffffffffULL)); + IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i), (tdba >> 32)); + IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i), + adapter->num_tx_desc * + sizeof(struct ixgbe_legacy_tx_desc)); + txctrl = IXGBE_READ_REG(hw, IXGBE_VFDCA_TXCTRL(i)); + txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; + IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i), txctrl); + + /* Now enable */ + txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i)); + txdctl |= IXGBE_TXDCTL_ENABLE; + IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl); + } + + return; +} + + +/********************************************************************* + * + * Setup receive registers and features. + * + **********************************************************************/ +#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 + +static void +ixv_initialize_receive_units(struct adapter *adapter) +{ + struct rx_ring *rxr = adapter->rx_rings; + struct ixgbe_hw *hw = &adapter->hw; + struct ifnet *ifp = adapter->ifp; + u32 bufsz, rxcsum, psrtype; + + if (ifp->if_mtu > ETHERMTU) + bufsz = 4096 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + else + bufsz = 2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + + psrtype = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | + IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR | + IXGBE_PSRTYPE_L2HDR; + + IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype); + + /* Tell PF our max_frame size */ + ixgbevf_rlpml_set_vf(hw, adapter->max_frame_size); + + for (int i = 0; i < adapter->num_queues; i++, rxr++) { + u64 rdba = rxr->rxdma.dma_paddr; + u32 reg, rxdctl; + + /* Disable the queue */ + rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)); + rxdctl &= ~IXGBE_RXDCTL_ENABLE; + IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl); + for (int j = 0; j < 10; j++) { + if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)) & + IXGBE_RXDCTL_ENABLE) + msec_delay(1); + else + break; + } + wmb(); + /* Setup the Base and Length of the Rx Descriptor Ring */ + IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i), + (rdba & 0x00000000ffffffffULL)); + IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i), + (rdba >> 32)); + IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i), + adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc)); + + /* Reset the ring indices */ + IXGBE_WRITE_REG(hw, IXGBE_VFRDH(rxr->me), 0); + IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), 0); + + /* Set up the SRRCTL register */ + reg = IXGBE_READ_REG(hw, IXGBE_VFSRRCTL(i)); + reg &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; + reg &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; + reg |= bufsz; + reg |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; + IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), reg); + + /* Capture Rx Tail index */ + rxr->tail = IXGBE_VFRDT(rxr->me); + + /* Do the queue enabling last */ + rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME; + IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl); + for (int k = 0; k < 10; k++) { + if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)) & + IXGBE_RXDCTL_ENABLE) + break; + else + msec_delay(1); + } + wmb(); + + /* Set the Tail Pointer */ +#ifdef DEV_NETMAP + /* + * In netmap mode, we must preserve the buffers made + * available to userspace before the if_init() + * (this is true by default on the TX side, because + * init makes all buffers available to userspace). + * + * netmap_reset() and the device specific routines + * (e.g. ixgbe_setup_receive_rings()) map these + * buffers at the end of the NIC ring, so here we + * must set the RDT (tail) register to make sure + * they are not overwritten. + * + * In this driver the NIC ring starts at RDH = 0, + * RDT points to the last slot available for reception (?), + * so RDT = num_rx_desc - 1 means the whole ring is available. + */ + if (ifp->if_capenable & IFCAP_NETMAP) { + struct netmap_adapter *na = NA(adapter->ifp); + struct netmap_kring *kring = &na->rx_rings[i]; + int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); + + IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), t); + } else +#endif /* DEV_NETMAP */ + IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), + adapter->num_rx_desc - 1); + } + + rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); + + if (ifp->if_capenable & IFCAP_RXCSUM) + rxcsum |= IXGBE_RXCSUM_PCSD; + + if (!(rxcsum & IXGBE_RXCSUM_PCSD)) + rxcsum |= IXGBE_RXCSUM_IPPCSE; + + IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); + + return; +} + +static void +ixv_setup_vlan_support(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 ctrl, vid, vfta, retry; + struct rx_ring *rxr; + + /* + ** We get here thru init_locked, meaning + ** a soft reset, this has already cleared + ** the VFTA and other state, so if there + ** have been no vlan's registered do nothing. + */ + if (adapter->num_vlans == 0) + return; + + /* Enable the queues */ + for (int i = 0; i < adapter->num_queues; i++) { + ctrl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)); + ctrl |= IXGBE_RXDCTL_VME; + IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), ctrl); + /* + * Let Rx path know that it needs to store VLAN tag + * as part of extra mbuf info. + */ + rxr = &adapter->rx_rings[i]; + rxr->vtag_strip = TRUE; + } + + /* + ** A soft reset zero's out the VFTA, so + ** we need to repopulate it now. + */ + for (int i = 0; i < IXGBE_VFTA_SIZE; i++) { + if (ixv_shadow_vfta[i] == 0) + continue; + vfta = ixv_shadow_vfta[i]; + /* + ** Reconstruct the vlan id's + ** based on the bits set in each + ** of the array ints. + */ + for (int j = 0; j < 32; j++) { + retry = 0; + if ((vfta & (1 << j)) == 0) + continue; + vid = (i * 32) + j; + /* Call the shared code mailbox routine */ + while (ixgbe_set_vfta(hw, vid, 0, TRUE)) { + if (++retry > 5) + break; + } + } + } +} + +/* +** This routine is run via an vlan config EVENT, +** it enables us to use the HW Filter table since +** we can get the vlan id. This just creates the +** entry in the soft version of the VFTA, init will +** repopulate the real table. +*/ +static void +ixv_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) +{ + struct adapter *adapter = ifp->if_softc; + u16 index, bit; + + if (ifp->if_softc != arg) /* Not our event */ + return; + + if ((vtag == 0) || (vtag > 4095)) /* Invalid */ + return; + + IXGBE_CORE_LOCK(adapter); + index = (vtag >> 5) & 0x7F; + bit = vtag & 0x1F; + ixv_shadow_vfta[index] |= (1 << bit); + ++adapter->num_vlans; + /* Re-init to load the changes */ + ixv_init_locked(adapter); + IXGBE_CORE_UNLOCK(adapter); +} + +/* +** This routine is run via an vlan +** unconfig EVENT, remove our entry +** in the soft vfta. +*/ +static void +ixv_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) +{ + struct adapter *adapter = ifp->if_softc; + u16 index, bit; + + if (ifp->if_softc != arg) + return; + + if ((vtag == 0) || (vtag > 4095)) /* Invalid */ + return; + + IXGBE_CORE_LOCK(adapter); + index = (vtag >> 5) & 0x7F; + bit = vtag & 0x1F; + ixv_shadow_vfta[index] &= ~(1 << bit); + --adapter->num_vlans; + /* Re-init to load the changes */ + ixv_init_locked(adapter); + IXGBE_CORE_UNLOCK(adapter); +} + +static void +ixv_enable_intr(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct ix_queue *que = adapter->queues; + u32 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); + + + IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); + + mask = IXGBE_EIMS_ENABLE_MASK; + mask &= ~(IXGBE_EIMS_OTHER | IXGBE_EIMS_LSC); + IXGBE_WRITE_REG(hw, IXGBE_VTEIAC, mask); + + for (int i = 0; i < adapter->num_queues; i++, que++) + ixv_enable_queue(adapter, que->msix); + + IXGBE_WRITE_FLUSH(hw); + + return; +} + +static void +ixv_disable_intr(struct adapter *adapter) +{ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEIAC, 0); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEIMC, ~0); + IXGBE_WRITE_FLUSH(&adapter->hw); + return; +} + +/* +** Setup the correct IVAR register for a particular MSIX interrupt +** - entry is the register array entry +** - vector is the MSIX vector for this queue +** - type is RX/TX/MISC +*/ +static void +ixv_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 ivar, index; + + vector |= IXGBE_IVAR_ALLOC_VAL; + + if (type == -1) { /* MISC IVAR */ + ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR_MISC); + ivar &= ~0xFF; + ivar |= vector; + IXGBE_WRITE_REG(hw, IXGBE_VTIVAR_MISC, ivar); + } else { /* RX/TX IVARS */ + index = (16 * (entry & 1)) + (8 * type); + ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR(entry >> 1)); + ivar &= ~(0xFF << index); + ivar |= (vector << index); + IXGBE_WRITE_REG(hw, IXGBE_VTIVAR(entry >> 1), ivar); + } +} + +static void +ixv_configure_ivars(struct adapter *adapter) +{ + struct ix_queue *que = adapter->queues; + + for (int i = 0; i < adapter->num_queues; i++, que++) { + /* First the RX queue entry */ + ixv_set_ivar(adapter, i, que->msix, 0); + /* ... and the TX */ + ixv_set_ivar(adapter, i, que->msix, 1); + /* Set an initial value in EITR */ + IXGBE_WRITE_REG(&adapter->hw, + IXGBE_VTEITR(que->msix), IXV_EITR_DEFAULT); + } + + /* For the mailbox interrupt */ + ixv_set_ivar(adapter, 1, adapter->vector, -1); +} + + +/* +** Tasklet handler for MSIX MBX interrupts +** - do outside interrupt since it might sleep +*/ +static void +ixv_handle_mbx(void *context, int pending) +{ + struct adapter *adapter = context; + + ixgbe_check_link(&adapter->hw, + &adapter->link_speed, &adapter->link_up, 0); + ixv_update_link_status(adapter); +} + +/* +** The VF stats registers never have a truly virgin +** starting point, so this routine tries to make an +** artificial one, marking ground zero on attach as +** it were. +*/ +static void +ixv_save_stats(struct adapter *adapter) +{ + if (adapter->stats.vf.vfgprc || adapter->stats.vf.vfgptc) { + adapter->stats.vf.saved_reset_vfgprc += + adapter->stats.vf.vfgprc - adapter->stats.vf.base_vfgprc; + adapter->stats.vf.saved_reset_vfgptc += + adapter->stats.vf.vfgptc - adapter->stats.vf.base_vfgptc; + adapter->stats.vf.saved_reset_vfgorc += + adapter->stats.vf.vfgorc - adapter->stats.vf.base_vfgorc; + adapter->stats.vf.saved_reset_vfgotc += + adapter->stats.vf.vfgotc - adapter->stats.vf.base_vfgotc; + adapter->stats.vf.saved_reset_vfmprc += + adapter->stats.vf.vfmprc - adapter->stats.vf.base_vfmprc; + } +} + +static void +ixv_init_stats(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + adapter->stats.vf.last_vfgprc = IXGBE_READ_REG(hw, IXGBE_VFGPRC); + adapter->stats.vf.last_vfgorc = IXGBE_READ_REG(hw, IXGBE_VFGORC_LSB); + adapter->stats.vf.last_vfgorc |= + (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGORC_MSB))) << 32); + + adapter->stats.vf.last_vfgptc = IXGBE_READ_REG(hw, IXGBE_VFGPTC); + adapter->stats.vf.last_vfgotc = IXGBE_READ_REG(hw, IXGBE_VFGOTC_LSB); + adapter->stats.vf.last_vfgotc |= + (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGOTC_MSB))) << 32); + + adapter->stats.vf.last_vfmprc = IXGBE_READ_REG(hw, IXGBE_VFMPRC); + + adapter->stats.vf.base_vfgprc = adapter->stats.vf.last_vfgprc; + adapter->stats.vf.base_vfgorc = adapter->stats.vf.last_vfgorc; + adapter->stats.vf.base_vfgptc = adapter->stats.vf.last_vfgptc; + adapter->stats.vf.base_vfgotc = adapter->stats.vf.last_vfgotc; + adapter->stats.vf.base_vfmprc = adapter->stats.vf.last_vfmprc; +} + +#define UPDATE_STAT_32(reg, last, count) \ +{ \ + u32 current = IXGBE_READ_REG(hw, reg); \ + if (current < last) \ + count += 0x100000000LL; \ + last = current; \ + count &= 0xFFFFFFFF00000000LL; \ + count |= current; \ +} + +#define UPDATE_STAT_36(lsb, msb, last, count) \ +{ \ + u64 cur_lsb = IXGBE_READ_REG(hw, lsb); \ + u64 cur_msb = IXGBE_READ_REG(hw, msb); \ + u64 current = ((cur_msb << 32) | cur_lsb); \ + if (current < last) \ + count += 0x1000000000LL; \ + last = current; \ + count &= 0xFFFFFFF000000000LL; \ + count |= current; \ +} + +/* +** ixv_update_stats - Update the board statistics counters. +*/ +void +ixv_update_stats(struct adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + UPDATE_STAT_32(IXGBE_VFGPRC, adapter->stats.vf.last_vfgprc, + adapter->stats.vf.vfgprc); + UPDATE_STAT_32(IXGBE_VFGPTC, adapter->stats.vf.last_vfgptc, + adapter->stats.vf.vfgptc); + UPDATE_STAT_36(IXGBE_VFGORC_LSB, IXGBE_VFGORC_MSB, + adapter->stats.vf.last_vfgorc, adapter->stats.vf.vfgorc); + UPDATE_STAT_36(IXGBE_VFGOTC_LSB, IXGBE_VFGOTC_MSB, + adapter->stats.vf.last_vfgotc, adapter->stats.vf.vfgotc); + UPDATE_STAT_32(IXGBE_VFMPRC, adapter->stats.vf.last_vfmprc, + adapter->stats.vf.vfmprc); +} + +/* + * Add statistic sysctls for the VF. + */ +static void +ixv_add_stats_sysctls(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct ix_queue *que = &adapter->queues[0]; + struct tx_ring *txr = que->txr; + struct rx_ring *rxr = que->rxr; + + struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); + struct sysctl_oid *tree = device_get_sysctl_tree(dev); + struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); + struct ixgbevf_hw_stats *stats = &adapter->stats.vf; + + struct sysctl_oid *stat_node, *queue_node; + struct sysctl_oid_list *stat_list, *queue_list; + + /* Driver Statistics */ + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", + CTLFLAG_RD, &adapter->dropped_pkts, + "Driver dropped packets"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed", + CTLFLAG_RD, &adapter->mbuf_defrag_failed, + "m_defrag() failed"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", + CTLFLAG_RD, &adapter->watchdog_events, + "Watchdog timeouts"); + + stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac", + CTLFLAG_RD, NULL, + "VF Statistics (read from HW registers)"); + stat_list = SYSCTL_CHILDREN(stat_node); + + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd", + CTLFLAG_RD, &stats->vfgprc, + "Good Packets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd", + CTLFLAG_RD, &stats->vfgorc, + "Good Octets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd", + CTLFLAG_RD, &stats->vfmprc, + "Multicast Packets Received"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", + CTLFLAG_RD, &stats->vfgptc, + "Good Packets Transmitted"); + SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", + CTLFLAG_RD, &stats->vfgotc, + "Good Octets Transmitted"); + + queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "que", + CTLFLAG_RD, NULL, + "Queue Statistics (collected by SW)"); + queue_list = SYSCTL_CHILDREN(queue_node); + + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", + CTLFLAG_RD, &(que->irqs), + "IRQs on queue"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_irqs", + CTLFLAG_RD, &(rxr->rx_irq), + "RX irqs on queue"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", + CTLFLAG_RD, &(rxr->rx_packets), + "RX packets"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", + CTLFLAG_RD, &(rxr->rx_bytes), + "RX bytes"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_discarded", + CTLFLAG_RD, &(rxr->rx_discarded), + "Discarded RX packets"); + + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", + CTLFLAG_RD, &(txr->total_packets), + "TX Packets"); + + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_no_desc", + CTLFLAG_RD, &(txr->no_desc_avail), + "# of times not enough descriptors were available during TX"); +} + +static void +ixv_set_sysctl_value(struct adapter *adapter, const char *name, + const char *description, int *limit, int value) +{ + *limit = value; + SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), + OID_AUTO, name, CTLFLAG_RW, limit, value, description); +} + +/********************************************************************** + * + * This routine is called only when em_display_debug_stats is enabled. + * This routine provides a way to take a look at important statistics + * maintained by the driver and hardware. + * + **********************************************************************/ +static void +ixv_print_debug_info(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct ixgbe_hw *hw = &adapter->hw; + struct ix_queue *que = adapter->queues; + struct rx_ring *rxr; + struct tx_ring *txr; + struct lro_ctrl *lro; + + device_printf(dev,"Error Byte Count = %u \n", + IXGBE_READ_REG(hw, IXGBE_ERRBC)); + + for (int i = 0; i < adapter->num_queues; i++, que++) { + txr = que->txr; + rxr = que->rxr; + lro = &rxr->lro; + device_printf(dev,"QUE(%d) IRQs Handled: %lu\n", + que->msix, (long)que->irqs); + device_printf(dev,"RX(%d) Packets Received: %lld\n", + rxr->me, (long long)rxr->rx_packets); + device_printf(dev,"RX(%d) Bytes Received: %lu\n", + rxr->me, (long)rxr->rx_bytes); + device_printf(dev,"RX(%d) LRO Queued= %lld\n", + rxr->me, (long long)lro->lro_queued); + device_printf(dev,"RX(%d) LRO Flushed= %lld\n", + rxr->me, (long long)lro->lro_flushed); + device_printf(dev,"TX(%d) Packets Sent: %lu\n", + txr->me, (long)txr->total_packets); + device_printf(dev,"TX(%d) NO Desc Avail: %lu\n", + txr->me, (long)txr->no_desc_avail); + } + + device_printf(dev,"MBX IRQ Handled: %lu\n", + (long)adapter->link_irq); + return; +} + +static int +ixv_sysctl_debug(SYSCTL_HANDLER_ARGS) +{ + int error, result; + struct adapter *adapter; + + result = -1; + error = sysctl_handle_int(oidp, &result, 0, req); + + if (error || !req->newptr) + return (error); + + if (result == 1) { + adapter = (struct adapter *) arg1; + ixv_print_debug_info(adapter); + } + return error; +} + Index: sys/dev/ixgbe/legacy_ix_txrx.c =================================================================== --- /dev/null +++ sys/dev/ixgbe/legacy_ix_txrx.c @@ -0,0 +1,2302 @@ +/****************************************************************************** + + Copyright (c) 2001-2015, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ +/*$FreeBSD$*/ + + +#ifndef IXGBE_STANDALONE_BUILD +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_rss.h" +#endif + +#include "ixgbe.h" + +#ifdef RSS +#include +#include +#endif /* RSS */ + +#include +#include + +#ifdef DEV_NETMAP +extern int ix_crcstrip; +#endif /* DEV_NETMAP */ + +/* +** HW RSC control: +** this feature only works with +** IPv4, and only on 82599 and later. +** Also this will cause IP forwarding to +** fail and that can't be controlled by +** the stack as LRO can. For all these +** reasons I've deemed it best to leave +** this off and not bother with a tuneable +** interface, this would need to be compiled +** to enable. +*/ +static bool ixgbe_rsc_enable = FALSE; + +#ifdef IXGBE_FDIR +/* +** For Flow Director: this is the +** number of TX packets we sample +** for the filter pool, this means +** every 20th packet will be probed. +** +** This feature can be disabled by +** setting this to 0. +*/ +static int atr_sample_rate = 20; +#endif + +/********************************************************************* + * Local Function prototypes + *********************************************************************/ +static void ixgbe_setup_transmit_ring(struct tx_ring *); +static void ixgbe_free_transmit_buffers(struct tx_ring *); +static int ixgbe_setup_receive_ring(struct rx_ring *); +static void ixgbe_free_receive_buffers(struct rx_ring *); + +static void ixgbe_rx_checksum(u32, struct mbuf *, u32); +static void ixgbe_refresh_mbufs(struct rx_ring *, int); +static int ixgbe_xmit(struct tx_ring *, struct mbuf **); +static int ixgbe_tx_ctx_setup(struct tx_ring *, + struct mbuf *, u32 *, u32 *); +static int ixgbe_tso_setup(struct tx_ring *, + struct mbuf *, u32 *, u32 *); +#ifdef IXGBE_FDIR +static void ixgbe_atr(struct tx_ring *, struct mbuf *); +#endif +static __inline void ixgbe_rx_discard(struct rx_ring *, int); +static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *, + struct mbuf *, u32); + +#ifdef IXGBE_LEGACY_TX +/********************************************************************* + * Transmit entry point + * + * ixgbe_start is called by the stack to initiate a transmit. + * The driver will remain in this routine as long as there are + * packets to transmit and transmit resources are available. + * In case resources are not available stack is notified and + * the packet is requeued. + **********************************************************************/ + +void +ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp) +{ + struct mbuf *m_head; + struct adapter *adapter = txr->adapter; + + IXGBE_TX_LOCK_ASSERT(txr); + + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + return; + if (!adapter->link_active) + return; + + while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { + if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) + break; + + IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); + if (m_head == NULL) + break; + + if (ixgbe_xmit(txr, &m_head)) { + if (m_head != NULL) + IFQ_DRV_PREPEND(&ifp->if_snd, m_head); + break; + } + /* Send a copy of the frame to the BPF listener */ + ETHER_BPF_MTAP(ifp, m_head); + } + return; +} + +/* + * Legacy TX start - called by the stack, this + * always uses the first tx ring, and should + * not be used with multiqueue tx enabled. + */ +void +ixgbe_start(struct ifnet *ifp) +{ + struct adapter *adapter = ifp->if_softc; + struct tx_ring *txr = adapter->tx_rings; + + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + IXGBE_TX_LOCK(txr); + ixgbe_start_locked(txr, ifp); + IXGBE_TX_UNLOCK(txr); + } + return; +} + +#else /* ! IXGBE_LEGACY_TX */ + +/* +** Multiqueue Transmit Entry Point +** (if_transmit function) +*/ +int +ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) +{ + struct adapter *adapter = ifp->if_softc; + struct ix_queue *que; + struct tx_ring *txr; + int i, err = 0; +#ifdef RSS + uint32_t bucket_id; +#endif + + /* + * When doing RSS, map it to the same outbound queue + * as the incoming flow would be mapped to. + * + * If everything is setup correctly, it should be the + * same bucket that the current CPU we're on is. + */ + if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { +#ifdef RSS + if (rss_hash2bucket(m->m_pkthdr.flowid, + M_HASHTYPE_GET(m), &bucket_id) == 0) { + i = bucket_id % adapter->num_queues; +#ifdef IXGBE_DEBUG + if (bucket_id > adapter->num_queues) + if_printf(ifp, "bucket_id (%d) > num_queues " + "(%d)\n", bucket_id, adapter->num_queues); +#endif + } else +#endif + i = m->m_pkthdr.flowid % adapter->num_queues; + } else + i = curcpu % adapter->num_queues; + + /* Check for a hung queue and pick alternative */ + if (((1 << i) & adapter->active_queues) == 0) + i = ffsl(adapter->active_queues); + + txr = &adapter->tx_rings[i]; + que = &adapter->queues[i]; + + err = drbr_enqueue(ifp, txr->br, m); + if (err) + return (err); + if (IXGBE_TX_TRYLOCK(txr)) { + ixgbe_mq_start_locked(ifp, txr); + IXGBE_TX_UNLOCK(txr); + } else + taskqueue_enqueue(que->tq, &txr->txq_task); + + return (0); +} + +int +ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) +{ + struct adapter *adapter = txr->adapter; + struct mbuf *next; + int enqueued = 0, err = 0; + + if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || + adapter->link_active == 0) + return (ENETDOWN); + + /* Process the queue */ +#if __FreeBSD_version < 901504 + next = drbr_dequeue(ifp, txr->br); + while (next != NULL) { + if ((err = ixgbe_xmit(txr, &next)) != 0) { + if (next != NULL) + err = drbr_enqueue(ifp, txr->br, next); +#else + while ((next = drbr_peek(ifp, txr->br)) != NULL) { + if ((err = ixgbe_xmit(txr, &next)) != 0) { + if (next == NULL) { + drbr_advance(ifp, txr->br); + } else { + drbr_putback(ifp, txr->br, next); + } +#endif + break; + } +#if __FreeBSD_version >= 901504 + drbr_advance(ifp, txr->br); +#endif + enqueued++; +#if 0 // this is VF-only +#if __FreeBSD_version >= 1100036 + /* + * Since we're looking at the tx ring, we can check + * to see if we're a VF by examing our tail register + * address. + */ + if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST) + if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); +#endif +#endif + /* Send a copy of the frame to the BPF listener */ + ETHER_BPF_MTAP(ifp, next); + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + break; +#if __FreeBSD_version < 901504 + next = drbr_dequeue(ifp, txr->br); +#endif + } + + if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD) + ixgbe_txeof(txr); + + return (err); +} + +/* + * Called from a taskqueue to drain queued transmit packets. + */ +void +ixgbe_deferred_mq_start(void *arg, int pending) +{ + struct tx_ring *txr = arg; + struct adapter *adapter = txr->adapter; + struct ifnet *ifp = adapter->ifp; + + IXGBE_TX_LOCK(txr); + if (!drbr_empty(ifp, txr->br)) + ixgbe_mq_start_locked(ifp, txr); + IXGBE_TX_UNLOCK(txr); +} + +/* + * Flush all ring buffers + */ +void +ixgbe_qflush(struct ifnet *ifp) +{ + struct adapter *adapter = ifp->if_softc; + struct tx_ring *txr = adapter->tx_rings; + struct mbuf *m; + + for (int i = 0; i < adapter->num_queues; i++, txr++) { + IXGBE_TX_LOCK(txr); + while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) + m_freem(m); + IXGBE_TX_UNLOCK(txr); + } + if_qflush(ifp); +} +#endif /* IXGBE_LEGACY_TX */ + + +/********************************************************************* + * + * This routine maps the mbufs to tx descriptors, allowing the + * TX engine to transmit the packets. + * - return 0 on success, positive on failure + * + **********************************************************************/ + +static int +ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) +{ + struct adapter *adapter = txr->adapter; + u32 olinfo_status = 0, cmd_type_len; + int i, j, error, nsegs; + int first; + bool remap = TRUE; + struct mbuf *m_head; + bus_dma_segment_t segs[adapter->num_segs]; + bus_dmamap_t map; + struct ixgbe_tx_buf *txbuf; + union ixgbe_adv_tx_desc *txd = NULL; + + m_head = *m_headp; + + /* Basic descriptor defines */ + cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA | + IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); + + if (m_head->m_flags & M_VLANTAG) + cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; + + /* + * Important to capture the first descriptor + * used because it will contain the index of + * the one we tell the hardware to report back + */ + first = txr->next_avail_desc; + txbuf = &txr->tx_buffers[first]; + map = txbuf->map; + + /* + * Map the packet for DMA. + */ +retry: + error = bus_dmamap_load_mbuf_sg(txr->txtag, map, + *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); + + if (__predict_false(error)) { + struct mbuf *m; + + switch (error) { + case EFBIG: + /* Try it again? - one try */ + if (remap == TRUE) { + remap = FALSE; + /* + * XXX: m_defrag will choke on + * non-MCLBYTES-sized clusters + */ + m = m_defrag(*m_headp, M_NOWAIT); + if (m == NULL) { + adapter->mbuf_defrag_failed++; + m_freem(*m_headp); + *m_headp = NULL; + return (ENOBUFS); + } + *m_headp = m; + goto retry; + } else + return (error); + case ENOMEM: + txr->no_tx_dma_setup++; + return (error); + default: + txr->no_tx_dma_setup++; + m_freem(*m_headp); + *m_headp = NULL; + return (error); + } + } + + /* Make certain there are enough descriptors */ + if (txr->tx_avail < (nsegs + 2)) { + txr->no_desc_avail++; + bus_dmamap_unload(txr->txtag, map); + return (ENOBUFS); + } + m_head = *m_headp; + + /* + * Set up the appropriate offload context + * this will consume the first descriptor + */ + error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); + if (__predict_false(error)) { + if (error == ENOBUFS) + *m_headp = NULL; + return (error); + } + +#ifdef IXGBE_FDIR + /* Do the flow director magic */ + if ((txr->atr_sample) && (!adapter->fdir_reinit)) { + ++txr->atr_count; + if (txr->atr_count >= atr_sample_rate) { + ixgbe_atr(txr, m_head); + txr->atr_count = 0; + } + } +#endif + + olinfo_status |= IXGBE_ADVTXD_CC; + i = txr->next_avail_desc; + for (j = 0; j < nsegs; j++) { + bus_size_t seglen; + bus_addr_t segaddr; + + txbuf = &txr->tx_buffers[i]; + txd = &txr->tx_base[i]; + seglen = segs[j].ds_len; + segaddr = htole64(segs[j].ds_addr); + + txd->read.buffer_addr = segaddr; + txd->read.cmd_type_len = htole32(txr->txd_cmd | + cmd_type_len |seglen); + txd->read.olinfo_status = htole32(olinfo_status); + + if (++i == txr->num_desc) + i = 0; + } + + txd->read.cmd_type_len |= + htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); + txr->tx_avail -= nsegs; + txr->next_avail_desc = i; + + txbuf->m_head = m_head; + /* + * Here we swap the map so the last descriptor, + * which gets the completion interrupt has the + * real map, and the first descriptor gets the + * unused map from this descriptor. + */ + txr->tx_buffers[first].map = txbuf->map; + txbuf->map = map; + bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); + + /* Set the EOP descriptor that will be marked done */ + txbuf = &txr->tx_buffers[first]; + txbuf->eop = txd; + + bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + /* + * Advance the Transmit Descriptor Tail (Tdt), this tells the + * hardware that this frame is available to transmit. + */ + ++txr->total_packets; + IXGBE_WRITE_REG(&adapter->hw, txr->tail, i); + + /* Mark queue as having work */ + if (txr->busy == 0) + txr->busy = 1; + + return (0); +} + + +/********************************************************************* + * + * Allocate memory for tx_buffer structures. The tx_buffer stores all + * the information needed to transmit a packet on the wire. This is + * called only once at attach, setup is done every reset. + * + **********************************************************************/ +int +ixgbe_allocate_transmit_buffers(struct tx_ring *txr) +{ + struct adapter *adapter = txr->adapter; + device_t dev = adapter->dev; + struct ixgbe_tx_buf *txbuf; + int error, i; + + /* + * Setup DMA descriptor areas. + */ + if ((error = bus_dma_tag_create( + bus_get_dma_tag(adapter->dev), /* parent */ + 1, 0, /* alignment, bounds */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + IXGBE_TSO_SIZE, /* maxsize */ + adapter->num_segs, /* nsegments */ + PAGE_SIZE, /* maxsegsize */ + 0, /* flags */ + NULL, /* lockfunc */ + NULL, /* lockfuncarg */ + &txr->txtag))) { + device_printf(dev,"Unable to allocate TX DMA tag\n"); + goto fail; + } + + if (!(txr->tx_buffers = + (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) * + adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(dev, "Unable to allocate tx_buffer memory\n"); + error = ENOMEM; + goto fail; + } + + /* Create the descriptor buffer dma maps */ + txbuf = txr->tx_buffers; + for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { + error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); + if (error != 0) { + device_printf(dev, "Unable to create TX DMA map\n"); + goto fail; + } + } + + return 0; +fail: + /* We free all, it handles case where we are in the middle */ + ixgbe_free_transmit_structures(adapter); + return (error); +} + +/********************************************************************* + * + * Initialize a transmit ring. + * + **********************************************************************/ +static void +ixgbe_setup_transmit_ring(struct tx_ring *txr) +{ + struct adapter *adapter = txr->adapter; + struct ixgbe_tx_buf *txbuf; +#ifdef DEV_NETMAP + struct netmap_adapter *na = NA(adapter->ifp); + struct netmap_slot *slot; +#endif /* DEV_NETMAP */ + + /* Clear the old ring contents */ + IXGBE_TX_LOCK(txr); +#ifdef DEV_NETMAP + /* + * (under lock): if in netmap mode, do some consistency + * checks and set slot to entry 0 of the netmap ring. + */ + slot = netmap_reset(na, NR_TX, txr->me, 0); +#endif /* DEV_NETMAP */ + bzero((void *)txr->tx_base, + (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); + /* Reset indices */ + txr->next_avail_desc = 0; + txr->next_to_clean = 0; + + /* Free any existing tx buffers. */ + txbuf = txr->tx_buffers; + for (int i = 0; i < txr->num_desc; i++, txbuf++) { + if (txbuf->m_head != NULL) { + bus_dmamap_sync(txr->txtag, txbuf->map, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(txr->txtag, txbuf->map); + m_freem(txbuf->m_head); + txbuf->m_head = NULL; + } +#ifdef DEV_NETMAP + /* + * In netmap mode, set the map for the packet buffer. + * NOTE: Some drivers (not this one) also need to set + * the physical buffer address in the NIC ring. + * Slots in the netmap ring (indexed by "si") are + * kring->nkr_hwofs positions "ahead" wrt the + * corresponding slot in the NIC ring. In some drivers + * (not here) nkr_hwofs can be negative. Function + * netmap_idx_n2k() handles wraparounds properly. + */ + if (slot) { + int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); + netmap_load_map(na, txr->txtag, + txbuf->map, NMB(na, slot + si)); + } +#endif /* DEV_NETMAP */ + /* Clear the EOP descriptor pointer */ + txbuf->eop = NULL; + } + +#ifdef IXGBE_FDIR + /* Set the rate at which we sample packets */ + if (adapter->hw.mac.type != ixgbe_mac_82598EB) + txr->atr_sample = atr_sample_rate; +#endif + + /* Set number of descriptors available */ + txr->tx_avail = adapter->num_tx_desc; + + bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + IXGBE_TX_UNLOCK(txr); +} + +/********************************************************************* + * + * Initialize all transmit rings. + * + **********************************************************************/ +int +ixgbe_setup_transmit_structures(struct adapter *adapter) +{ + struct tx_ring *txr = adapter->tx_rings; + + for (int i = 0; i < adapter->num_queues; i++, txr++) + ixgbe_setup_transmit_ring(txr); + + return (0); +} + +/********************************************************************* + * + * Free all transmit rings. + * + **********************************************************************/ +void +ixgbe_free_transmit_structures(struct adapter *adapter) +{ + struct tx_ring *txr = adapter->tx_rings; + + for (int i = 0; i < adapter->num_queues; i++, txr++) { + IXGBE_TX_LOCK(txr); + ixgbe_free_transmit_buffers(txr); + ixgbe_dma_free(adapter, &txr->txdma); + IXGBE_TX_UNLOCK(txr); + IXGBE_TX_LOCK_DESTROY(txr); + } + free(adapter->tx_rings, M_DEVBUF); +} + +/********************************************************************* + * + * Free transmit ring related data structures. + * + **********************************************************************/ +static void +ixgbe_free_transmit_buffers(struct tx_ring *txr) +{ + struct adapter *adapter = txr->adapter; + struct ixgbe_tx_buf *tx_buffer; + int i; + + INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin"); + + if (txr->tx_buffers == NULL) + return; + + tx_buffer = txr->tx_buffers; + for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { + if (tx_buffer->m_head != NULL) { + bus_dmamap_sync(txr->txtag, tx_buffer->map, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(txr->txtag, + tx_buffer->map); + m_freem(tx_buffer->m_head); + tx_buffer->m_head = NULL; + if (tx_buffer->map != NULL) { + bus_dmamap_destroy(txr->txtag, + tx_buffer->map); + tx_buffer->map = NULL; + } + } else if (tx_buffer->map != NULL) { + bus_dmamap_unload(txr->txtag, + tx_buffer->map); + bus_dmamap_destroy(txr->txtag, + tx_buffer->map); + tx_buffer->map = NULL; + } + } +#ifdef IXGBE_LEGACY_TX + if (txr->br != NULL) + buf_ring_free(txr->br, M_DEVBUF); +#endif + if (txr->tx_buffers != NULL) { + free(txr->tx_buffers, M_DEVBUF); + txr->tx_buffers = NULL; + } + if (txr->txtag != NULL) { + bus_dma_tag_destroy(txr->txtag); + txr->txtag = NULL; + } + return; +} + +/********************************************************************* + * + * Advanced Context Descriptor setup for VLAN, CSUM or TSO + * + **********************************************************************/ + +static int +ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, + u32 *cmd_type_len, u32 *olinfo_status) +{ + struct adapter *adapter = txr->adapter; + struct ixgbe_adv_tx_context_desc *TXD; + struct ether_vlan_header *eh; +#ifdef INET + struct ip *ip; +#endif +#ifdef INET6 + struct ip6_hdr *ip6; +#endif + u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; + int ehdrlen, ip_hlen = 0; + u16 etype; + u8 ipproto = 0; + int offload = TRUE; + int ctxd = txr->next_avail_desc; + u16 vtag = 0; + caddr_t l3d; + + + /* First check if TSO is to be used */ + if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO|CSUM_IP6_TSO)) + return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status)); + + if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) + offload = FALSE; + + /* Indicate the whole packet as payload when not doing TSO */ + *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; + + /* Now ready a context descriptor */ + TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; + + /* + ** In advanced descriptors the vlan tag must + ** be placed into the context descriptor. Hence + ** we need to make one even if not doing offloads. + */ + if (mp->m_flags & M_VLANTAG) { + vtag = htole16(mp->m_pkthdr.ether_vtag); + vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); + } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE)) + return (0); + + /* + * Determine where frame payload starts. + * Jump over vlan headers if already present, + * helpful for QinQ too. + */ + eh = mtod(mp, struct ether_vlan_header *); + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + etype = ntohs(eh->evl_proto); + ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + } else { + etype = ntohs(eh->evl_encap_proto); + ehdrlen = ETHER_HDR_LEN; + } + + /* Set the ether header length */ + vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; + + if (offload == FALSE) + goto no_offloads; + + /* + * If the first mbuf only includes the ethernet header, jump to the next one + * XXX: This assumes the stack splits mbufs containing headers on header boundaries + * XXX: And assumes the entire IP header is contained in one mbuf + */ + if (mp->m_len == ehdrlen && mp->m_next) + l3d = mtod(mp->m_next, caddr_t); + else + l3d = mtod(mp, caddr_t) + ehdrlen; + + switch (etype) { +#ifdef INET + case ETHERTYPE_IP: + ip = (struct ip *)(l3d); + ip_hlen = ip->ip_hl << 2; + ipproto = ip->ip_p; + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; + /* Insert IPv4 checksum into data descriptors */ + if (mp->m_pkthdr.csum_flags & CSUM_IP) { + ip->ip_sum = 0; + *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; + } + break; +#endif +#ifdef INET6 + case ETHERTYPE_IPV6: + ip6 = (struct ip6_hdr *)(l3d); + ip_hlen = sizeof(struct ip6_hdr); + ipproto = ip6->ip6_nxt; + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; + break; +#endif + default: + offload = FALSE; + break; + } + + vlan_macip_lens |= ip_hlen; + + /* No support for offloads for non-L4 next headers */ + switch (ipproto) { + case IPPROTO_TCP: + if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; + else + offload = false; + break; + case IPPROTO_UDP: + if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; + else + offload = false; + break; + case IPPROTO_SCTP: + if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; + else + offload = false; + break; + default: + offload = false; + break; + } + + if (offload) /* Insert L4 checksum into data descriptors */ + *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; + +no_offloads: + type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; + + /* Now copy bits into descriptor */ + TXD->vlan_macip_lens = htole32(vlan_macip_lens); + TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); + TXD->seqnum_seed = htole32(0); + TXD->mss_l4len_idx = htole32(0); + + /* We've consumed the first desc, adjust counters */ + if (++ctxd == txr->num_desc) + ctxd = 0; + txr->next_avail_desc = ctxd; + --txr->tx_avail; + + return (0); +} + +/********************************************************************** + * + * Setup work for hardware segmentation offload (TSO) on + * adapters using advanced tx descriptors + * + **********************************************************************/ +static int +ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, + u32 *cmd_type_len, u32 *olinfo_status) +{ + struct ixgbe_adv_tx_context_desc *TXD; + u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; + u32 mss_l4len_idx = 0, paylen; + u16 vtag = 0, eh_type; + int ctxd, ehdrlen, ip_hlen, tcp_hlen; + struct ether_vlan_header *eh; +#ifdef INET6 + struct ip6_hdr *ip6; +#endif +#ifdef INET + struct ip *ip; +#endif + struct tcphdr *th; + + /* + * Determine where frame payload starts. + * Jump over vlan headers if already present + */ + eh = mtod(mp, struct ether_vlan_header *); + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + eh_type = eh->evl_proto; + } else { + ehdrlen = ETHER_HDR_LEN; + eh_type = eh->evl_encap_proto; + } + + switch (ntohs(eh_type)) { +#ifdef INET6 + case ETHERTYPE_IPV6: + ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); + /* XXX-BZ For now we do not pretend to support ext. hdrs. */ + if (ip6->ip6_nxt != IPPROTO_TCP) + return (ENXIO); + ip_hlen = sizeof(struct ip6_hdr); + ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); + th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); + th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; + break; +#endif +#ifdef INET + case ETHERTYPE_IP: + ip = (struct ip *)(mp->m_data + ehdrlen); + if (ip->ip_p != IPPROTO_TCP) + return (ENXIO); + ip->ip_sum = 0; + ip_hlen = ip->ip_hl << 2; + th = (struct tcphdr *)((caddr_t)ip + ip_hlen); + th->th_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, htons(IPPROTO_TCP)); + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; + /* Tell transmit desc to also do IPv4 checksum. */ + *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; + break; +#endif + default: + panic("%s: CSUM_TSO but no supported IP version (0x%04x)", + __func__, ntohs(eh_type)); + break; + } + + ctxd = txr->next_avail_desc; + TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; + + tcp_hlen = th->th_off << 2; + + /* This is used in the transmit desc in encap */ + paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; + + /* VLAN MACLEN IPLEN */ + if (mp->m_flags & M_VLANTAG) { + vtag = htole16(mp->m_pkthdr.ether_vtag); + vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); + } + + vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens |= ip_hlen; + TXD->vlan_macip_lens = htole32(vlan_macip_lens); + + /* ADV DTYPE TUCMD */ + type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; + TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); + + /* MSS L4LEN IDX */ + mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); + mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); + TXD->mss_l4len_idx = htole32(mss_l4len_idx); + + TXD->seqnum_seed = htole32(0); + + if (++ctxd == txr->num_desc) + ctxd = 0; + + txr->tx_avail--; + txr->next_avail_desc = ctxd; + *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; + *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; + *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; + ++txr->tso_tx; + return (0); +} + + +/********************************************************************** + * + * Examine each tx_buffer in the used queue. If the hardware is done + * processing the packet then free associated resources. The + * tx_buffer is put back on the free queue. + * + **********************************************************************/ +void +ixgbe_txeof(struct tx_ring *txr) +{ + struct adapter *adapter = txr->adapter; +#ifdef DEV_NETMAP + struct ifnet *ifp = adapter->ifp; +#endif + u32 work, processed = 0; + u32 limit = adapter->tx_process_limit; + struct ixgbe_tx_buf *buf; + union ixgbe_adv_tx_desc *txd; + + mtx_assert(&txr->tx_mtx, MA_OWNED); + +#ifdef DEV_NETMAP + if (ifp->if_capenable & IFCAP_NETMAP) { + struct netmap_adapter *na = NA(ifp); + struct netmap_kring *kring = &na->tx_rings[txr->me]; + txd = txr->tx_base; + bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, + BUS_DMASYNC_POSTREAD); + /* + * In netmap mode, all the work is done in the context + * of the client thread. Interrupt handlers only wake up + * clients, which may be sleeping on individual rings + * or on a global resource for all rings. + * To implement tx interrupt mitigation, we wake up the client + * thread roughly every half ring, even if the NIC interrupts + * more frequently. This is implemented as follows: + * - ixgbe_txsync() sets kring->nr_kflags with the index of + * the slot that should wake up the thread (nkr_num_slots + * means the user thread should not be woken up); + * - the driver ignores tx interrupts unless netmap_mitigate=0 + * or the slot has the DD bit set. + */ + if (!netmap_mitigate || + (kring->nr_kflags < kring->nkr_num_slots && + txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) { + netmap_tx_irq(ifp, txr->me); + } + return; + } +#endif /* DEV_NETMAP */ + + if (txr->tx_avail == txr->num_desc) { + txr->busy = 0; + return; + } + + /* Get work starting point */ + work = txr->next_to_clean; + buf = &txr->tx_buffers[work]; + txd = &txr->tx_base[work]; + work -= txr->num_desc; /* The distance to ring end */ + bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, + BUS_DMASYNC_POSTREAD); + + do { + union ixgbe_adv_tx_desc *eop = buf->eop; + if (eop == NULL) /* No work */ + break; + + if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0) + break; /* I/O not complete */ + + if (buf->m_head) { + txr->bytes += + buf->m_head->m_pkthdr.len; + bus_dmamap_sync(txr->txtag, + buf->map, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(txr->txtag, + buf->map); + m_freem(buf->m_head); + buf->m_head = NULL; + } + buf->eop = NULL; + ++txr->tx_avail; + + /* We clean the range if multi segment */ + while (txd != eop) { + ++txd; + ++buf; + ++work; + /* wrap the ring? */ + if (__predict_false(!work)) { + work -= txr->num_desc; + buf = txr->tx_buffers; + txd = txr->tx_base; + } + if (buf->m_head) { + txr->bytes += + buf->m_head->m_pkthdr.len; + bus_dmamap_sync(txr->txtag, + buf->map, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(txr->txtag, + buf->map); + m_freem(buf->m_head); + buf->m_head = NULL; + } + ++txr->tx_avail; + buf->eop = NULL; + + } + ++txr->packets; + ++processed; + + /* Try the next packet */ + ++txd; + ++buf; + ++work; + /* reset with a wrap */ + if (__predict_false(!work)) { + work -= txr->num_desc; + buf = txr->tx_buffers; + txd = txr->tx_base; + } + prefetch(txd); + } while (__predict_true(--limit)); + + bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + + work += txr->num_desc; + txr->next_to_clean = work; + + /* + ** Queue Hang detection, we know there's + ** work outstanding or the first return + ** would have been taken, so increment busy + ** if nothing managed to get cleaned, then + ** in local_timer it will be checked and + ** marked as HUNG if it exceeds a MAX attempt. + */ + if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG)) + ++txr->busy; + /* + ** If anything gets cleaned we reset state to 1, + ** note this will turn off HUNG if its set. + */ + if (processed) + txr->busy = 1; + + if (txr->tx_avail == txr->num_desc) + txr->busy = 0; + + return; +} + + +#ifdef IXGBE_FDIR +/* +** This routine parses packet headers so that Flow +** Director can make a hashed filter table entry +** allowing traffic flows to be identified and kept +** on the same cpu. This would be a performance +** hit, but we only do it at IXGBE_FDIR_RATE of +** packets. +*/ +static void +ixgbe_atr(struct tx_ring *txr, struct mbuf *mp) +{ + struct adapter *adapter = txr->adapter; + struct ix_queue *que; + struct ip *ip; + struct tcphdr *th; + struct udphdr *uh; + struct ether_vlan_header *eh; + union ixgbe_atr_hash_dword input = {.dword = 0}; + union ixgbe_atr_hash_dword common = {.dword = 0}; + int ehdrlen, ip_hlen; + u16 etype; + + eh = mtod(mp, struct ether_vlan_header *); + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + etype = eh->evl_proto; + } else { + ehdrlen = ETHER_HDR_LEN; + etype = eh->evl_encap_proto; + } + + /* Only handling IPv4 */ + if (etype != htons(ETHERTYPE_IP)) + return; + + ip = (struct ip *)(mp->m_data + ehdrlen); + ip_hlen = ip->ip_hl << 2; + + /* check if we're UDP or TCP */ + switch (ip->ip_p) { + case IPPROTO_TCP: + th = (struct tcphdr *)((caddr_t)ip + ip_hlen); + /* src and dst are inverted */ + common.port.dst ^= th->th_sport; + common.port.src ^= th->th_dport; + input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4; + break; + case IPPROTO_UDP: + uh = (struct udphdr *)((caddr_t)ip + ip_hlen); + /* src and dst are inverted */ + common.port.dst ^= uh->uh_sport; + common.port.src ^= uh->uh_dport; + input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4; + break; + default: + return; + } + + input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag); + if (mp->m_pkthdr.ether_vtag) + common.flex_bytes ^= htons(ETHERTYPE_VLAN); + else + common.flex_bytes ^= etype; + common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr; + + que = &adapter->queues[txr->me]; + /* + ** This assumes the Rx queue and Tx + ** queue are bound to the same CPU + */ + ixgbe_fdir_add_signature_filter_82599(&adapter->hw, + input, common, que->msix); +} +#endif /* IXGBE_FDIR */ + +/* +** Used to detect a descriptor that has +** been merged by Hardware RSC. +*/ +static inline u32 +ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx) +{ + return (le32toh(rx->wb.lower.lo_dword.data) & + IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT; +} + +/********************************************************************* + * + * Initialize Hardware RSC (LRO) feature on 82599 + * for an RX ring, this is toggled by the LRO capability + * even though it is transparent to the stack. + * + * NOTE: since this HW feature only works with IPV4 and + * our testing has shown soft LRO to be as effective + * I have decided to disable this by default. + * + **********************************************************************/ +static void +ixgbe_setup_hw_rsc(struct rx_ring *rxr) +{ + struct adapter *adapter = rxr->adapter; + struct ixgbe_hw *hw = &adapter->hw; + u32 rscctrl, rdrxctl; + + /* If turning LRO/RSC off we need to disable it */ + if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) { + rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); + rscctrl &= ~IXGBE_RSCCTL_RSCEN; + return; + } + + rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); + rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; +#ifdef DEV_NETMAP /* crcstrip is optional in netmap */ + if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) +#endif /* DEV_NETMAP */ + rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; + rdrxctl |= IXGBE_RDRXCTL_RSCACKC; + IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); + + rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); + rscctrl |= IXGBE_RSCCTL_RSCEN; + /* + ** Limit the total number of descriptors that + ** can be combined, so it does not exceed 64K + */ + if (rxr->mbuf_sz == MCLBYTES) + rscctrl |= IXGBE_RSCCTL_MAXDESC_16; + else if (rxr->mbuf_sz == MJUMPAGESIZE) + rscctrl |= IXGBE_RSCCTL_MAXDESC_8; + else if (rxr->mbuf_sz == MJUM9BYTES) + rscctrl |= IXGBE_RSCCTL_MAXDESC_4; + else /* Using 16K cluster */ + rscctrl |= IXGBE_RSCCTL_MAXDESC_1; + + IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl); + + /* Enable TCP header recognition */ + IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), + (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | + IXGBE_PSRTYPE_TCPHDR)); + + /* Disable RSC for ACK packets */ + IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, + (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU))); + + rxr->hw_rsc = TRUE; +} + +/********************************************************************* + * + * Refresh mbuf buffers for RX descriptor rings + * - now keeps its own state so discards due to resource + * exhaustion are unnecessary, if an mbuf cannot be obtained + * it just returns, keeping its placeholder, thus it can simply + * be recalled to try again. + * + **********************************************************************/ +static void +ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit) +{ + struct adapter *adapter = rxr->adapter; + bus_dma_segment_t seg[1]; + struct ixgbe_rx_buf *rxbuf; + struct mbuf *mp; + int i, j, nsegs, error; + bool refreshed = FALSE; + + i = j = rxr->next_to_refresh; + /* Control the loop with one beyond */ + if (++j == rxr->num_desc) + j = 0; + + while (j != limit) { + rxbuf = &rxr->rx_buffers[i]; + if (rxbuf->buf == NULL) { + mp = m_getjcl(M_NOWAIT, MT_DATA, + M_PKTHDR, rxr->mbuf_sz); + if (mp == NULL) + goto update; + if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) + m_adj(mp, ETHER_ALIGN); + } else + mp = rxbuf->buf; + + mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; + + /* If we're dealing with an mbuf that was copied rather + * than replaced, there's no need to go through busdma. + */ + if ((rxbuf->flags & IXGBE_RX_COPY) == 0) { + /* Get the memory mapping */ + bus_dmamap_unload(rxr->ptag, rxbuf->pmap); + error = bus_dmamap_load_mbuf_sg(rxr->ptag, + rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT); + if (error != 0) { + printf("Refresh mbufs: payload dmamap load" + " failure - %d\n", error); + m_free(mp); + rxbuf->buf = NULL; + goto update; + } + rxbuf->buf = mp; + bus_dmamap_sync(rxr->ptag, rxbuf->pmap, + BUS_DMASYNC_PREREAD); + rxbuf->addr = rxr->rx_base[i].read.pkt_addr = + htole64(seg[0].ds_addr); + } else { + rxr->rx_base[i].read.pkt_addr = rxbuf->addr; + rxbuf->flags &= ~IXGBE_RX_COPY; + } + + refreshed = TRUE; + /* Next is precalculated */ + i = j; + rxr->next_to_refresh = i; + if (++j == rxr->num_desc) + j = 0; + } +update: + if (refreshed) /* Update hardware tail index */ + IXGBE_WRITE_REG(&adapter->hw, + rxr->tail, rxr->next_to_refresh); + return; +} + +/********************************************************************* + * + * Allocate memory for rx_buffer structures. Since we use one + * rx_buffer per received packet, the maximum number of rx_buffer's + * that we'll need is equal to the number of receive descriptors + * that we've allocated. + * + **********************************************************************/ +int +ixgbe_allocate_receive_buffers(struct rx_ring *rxr) +{ + struct adapter *adapter = rxr->adapter; + device_t dev = adapter->dev; + struct ixgbe_rx_buf *rxbuf; + int bsize, error; + + bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc; + if (!(rxr->rx_buffers = + (struct ixgbe_rx_buf *) malloc(bsize, + M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(dev, "Unable to allocate rx_buffer memory\n"); + error = ENOMEM; + goto fail; + } + + if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ + 1, 0, /* alignment, bounds */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + MJUM16BYTES, /* maxsize */ + 1, /* nsegments */ + MJUM16BYTES, /* maxsegsize */ + 0, /* flags */ + NULL, /* lockfunc */ + NULL, /* lockfuncarg */ + &rxr->ptag))) { + device_printf(dev, "Unable to create RX DMA tag\n"); + goto fail; + } + + for (int i = 0; i < rxr->num_desc; i++, rxbuf++) { + rxbuf = &rxr->rx_buffers[i]; + error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); + if (error) { + device_printf(dev, "Unable to create RX dma map\n"); + goto fail; + } + } + + return (0); + +fail: + /* Frees all, but can handle partial completion */ + ixgbe_free_receive_structures(adapter); + return (error); +} + +static void +ixgbe_free_receive_ring(struct rx_ring *rxr) +{ + struct ixgbe_rx_buf *rxbuf; + + for (int i = 0; i < rxr->num_desc; i++) { + rxbuf = &rxr->rx_buffers[i]; + if (rxbuf->buf != NULL) { + bus_dmamap_sync(rxr->ptag, rxbuf->pmap, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(rxr->ptag, rxbuf->pmap); + rxbuf->buf->m_flags |= M_PKTHDR; + m_freem(rxbuf->buf); + rxbuf->buf = NULL; + rxbuf->flags = 0; + } + } +} + +/********************************************************************* + * + * Initialize a receive ring and its buffers. + * + **********************************************************************/ +static int +ixgbe_setup_receive_ring(struct rx_ring *rxr) +{ + struct adapter *adapter; + struct ifnet *ifp; + device_t dev; + struct ixgbe_rx_buf *rxbuf; + bus_dma_segment_t seg[1]; + struct lro_ctrl *lro = &rxr->lro; + int rsize, nsegs, error = 0; +#ifdef DEV_NETMAP + struct netmap_adapter *na = NA(rxr->adapter->ifp); + struct netmap_slot *slot; +#endif /* DEV_NETMAP */ + + adapter = rxr->adapter; + ifp = adapter->ifp; + dev = adapter->dev; + + /* Clear the ring contents */ + IXGBE_RX_LOCK(rxr); +#ifdef DEV_NETMAP + /* same as in ixgbe_setup_transmit_ring() */ + slot = netmap_reset(na, NR_RX, rxr->me, 0); +#endif /* DEV_NETMAP */ + rsize = roundup2(adapter->num_rx_desc * + sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); + bzero((void *)rxr->rx_base, rsize); + /* Cache the size */ + rxr->mbuf_sz = adapter->rx_mbuf_sz; + + /* Free current RX buffer structs and their mbufs */ + ixgbe_free_receive_ring(rxr); + + /* Now replenish the mbufs */ + for (int j = 0; j != rxr->num_desc; ++j) { + struct mbuf *mp; + + rxbuf = &rxr->rx_buffers[j]; +#ifdef DEV_NETMAP + /* + * In netmap mode, fill the map and set the buffer + * address in the NIC ring, considering the offset + * between the netmap and NIC rings (see comment in + * ixgbe_setup_transmit_ring() ). No need to allocate + * an mbuf, so end the block with a continue; + */ + if (slot) { + int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); + uint64_t paddr; + void *addr; + + addr = PNMB(na, slot + sj, &paddr); + netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); + /* Update descriptor and the cached value */ + rxr->rx_base[j].read.pkt_addr = htole64(paddr); + rxbuf->addr = htole64(paddr); + continue; + } +#endif /* DEV_NETMAP */ + rxbuf->flags = 0; + rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, + M_PKTHDR, adapter->rx_mbuf_sz); + if (rxbuf->buf == NULL) { + error = ENOBUFS; + goto fail; + } + mp = rxbuf->buf; + mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; + /* Get the memory mapping */ + error = bus_dmamap_load_mbuf_sg(rxr->ptag, + rxbuf->pmap, mp, seg, + &nsegs, BUS_DMA_NOWAIT); + if (error != 0) + goto fail; + bus_dmamap_sync(rxr->ptag, + rxbuf->pmap, BUS_DMASYNC_PREREAD); + /* Update the descriptor and the cached value */ + rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr); + rxbuf->addr = htole64(seg[0].ds_addr); + } + + + /* Setup our descriptor indices */ + rxr->next_to_check = 0; + rxr->next_to_refresh = 0; + rxr->lro_enabled = FALSE; + rxr->rx_copies = 0; + rxr->rx_bytes = 0; + rxr->vtag_strip = FALSE; + + bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + + /* + ** Now set up the LRO interface: + */ + if (ixgbe_rsc_enable) + ixgbe_setup_hw_rsc(rxr); + else if (ifp->if_capenable & IFCAP_LRO) { + int err = tcp_lro_init(lro); + if (err) { + device_printf(dev, "LRO Initialization failed!\n"); + goto fail; + } + INIT_DEBUGOUT("RX Soft LRO Initialized\n"); + rxr->lro_enabled = TRUE; + lro->ifp = adapter->ifp; + } + + IXGBE_RX_UNLOCK(rxr); + return (0); + +fail: + ixgbe_free_receive_ring(rxr); + IXGBE_RX_UNLOCK(rxr); + return (error); +} + +/********************************************************************* + * + * Initialize all receive rings. + * + **********************************************************************/ +int +ixgbe_setup_receive_structures(struct adapter *adapter) +{ + struct rx_ring *rxr = adapter->rx_rings; + int j; + + for (j = 0; j < adapter->num_queues; j++, rxr++) + if (ixgbe_setup_receive_ring(rxr)) + goto fail; + + return (0); +fail: + /* + * Free RX buffers allocated so far, we will only handle + * the rings that completed, the failing case will have + * cleaned up for itself. 'j' failed, so its the terminus. + */ + for (int i = 0; i < j; ++i) { + rxr = &adapter->rx_rings[i]; + ixgbe_free_receive_ring(rxr); + } + + return (ENOBUFS); +} + + +/********************************************************************* + * + * Free all receive rings. + * + **********************************************************************/ +void +ixgbe_free_receive_structures(struct adapter *adapter) +{ + struct rx_ring *rxr = adapter->rx_rings; + + INIT_DEBUGOUT("ixgbe_free_receive_structures: begin"); + + for (int i = 0; i < adapter->num_queues; i++, rxr++) { + struct lro_ctrl *lro = &rxr->lro; + ixgbe_free_receive_buffers(rxr); + /* Free LRO memory */ + tcp_lro_free(lro); + /* Free the ring memory as well */ + ixgbe_dma_free(adapter, &rxr->rxdma); + } + + free(adapter->rx_rings, M_DEVBUF); +} + + +/********************************************************************* + * + * Free receive ring data structures + * + **********************************************************************/ +void +ixgbe_free_receive_buffers(struct rx_ring *rxr) +{ + struct adapter *adapter = rxr->adapter; + struct ixgbe_rx_buf *rxbuf; + + INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin"); + + /* Cleanup any existing buffers */ + if (rxr->rx_buffers != NULL) { + for (int i = 0; i < adapter->num_rx_desc; i++) { + rxbuf = &rxr->rx_buffers[i]; + if (rxbuf->buf != NULL) { + bus_dmamap_sync(rxr->ptag, rxbuf->pmap, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(rxr->ptag, rxbuf->pmap); + rxbuf->buf->m_flags |= M_PKTHDR; + m_freem(rxbuf->buf); + } + rxbuf->buf = NULL; + if (rxbuf->pmap != NULL) { + bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); + rxbuf->pmap = NULL; + } + } + if (rxr->rx_buffers != NULL) { + free(rxr->rx_buffers, M_DEVBUF); + rxr->rx_buffers = NULL; + } + } + + if (rxr->ptag != NULL) { + bus_dma_tag_destroy(rxr->ptag); + rxr->ptag = NULL; + } + + return; +} + +static __inline void +ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) +{ + + /* + * ATM LRO is only for IP/TCP packets and TCP checksum of the packet + * should be computed by hardware. Also it should not have VLAN tag in + * ethernet header. In case of IPv6 we do not yet support ext. hdrs. + */ + if (rxr->lro_enabled && + (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && + (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && + ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == + (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) || + (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) == + (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) && + (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == + (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { + /* + * Send to the stack if: + ** - LRO not enabled, or + ** - no LRO resources, or + ** - lro enqueue fails + */ + if (rxr->lro.lro_cnt != 0) + if (tcp_lro_rx(&rxr->lro, m, 0) == 0) + return; + } + IXGBE_RX_UNLOCK(rxr); + (*ifp->if_input)(ifp, m); + IXGBE_RX_LOCK(rxr); +} + +static __inline void +ixgbe_rx_discard(struct rx_ring *rxr, int i) +{ + struct ixgbe_rx_buf *rbuf; + + rbuf = &rxr->rx_buffers[i]; + + + /* + ** With advanced descriptors the writeback + ** clobbers the buffer addrs, so its easier + ** to just free the existing mbufs and take + ** the normal refresh path to get new buffers + ** and mapping. + */ + + if (rbuf->fmp != NULL) {/* Partial chain ? */ + rbuf->fmp->m_flags |= M_PKTHDR; + m_freem(rbuf->fmp); + rbuf->fmp = NULL; + rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */ + } else if (rbuf->buf) { + m_free(rbuf->buf); + rbuf->buf = NULL; + } + bus_dmamap_unload(rxr->ptag, rbuf->pmap); + + rbuf->flags = 0; + + return; +} + + +/********************************************************************* + * + * This routine executes in interrupt context. It replenishes + * the mbufs in the descriptor and sends data which has been + * dma'ed into host memory to upper layer. + * + * Return TRUE for more work, FALSE for all clean. + *********************************************************************/ +bool +ixgbe_rxeof(struct ix_queue *que) +{ + struct adapter *adapter = que->adapter; + struct rx_ring *rxr = que->rxr; + struct ifnet *ifp = adapter->ifp; + struct lro_ctrl *lro = &rxr->lro; + int i, nextp, processed = 0; + u32 staterr = 0; + u32 count = adapter->rx_process_limit; + union ixgbe_adv_rx_desc *cur; + struct ixgbe_rx_buf *rbuf, *nbuf; + u16 pkt_info; + + IXGBE_RX_LOCK(rxr); + +#ifdef DEV_NETMAP + /* Same as the txeof routine: wakeup clients on intr. */ + if (netmap_rx_irq(ifp, rxr->me, &processed)) { + IXGBE_RX_UNLOCK(rxr); + return (FALSE); + } +#endif /* DEV_NETMAP */ + + for (i = rxr->next_to_check; count != 0;) { + struct mbuf *sendmp, *mp; + u32 rsc, ptype; + u16 len; + u16 vtag = 0; + bool eop; + + /* Sync the ring. */ + bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + + cur = &rxr->rx_base[i]; + staterr = le32toh(cur->wb.upper.status_error); + pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); + + if ((staterr & IXGBE_RXD_STAT_DD) == 0) + break; + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + break; + + count--; + sendmp = NULL; + nbuf = NULL; + rsc = 0; + cur->wb.upper.status_error = 0; + rbuf = &rxr->rx_buffers[i]; + mp = rbuf->buf; + + len = le16toh(cur->wb.upper.length); + ptype = le32toh(cur->wb.lower.lo_dword.data) & + IXGBE_RXDADV_PKTTYPE_MASK; + eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); + + /* Make sure bad packets are discarded */ + if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) { +#if __FreeBSD_version >= 1100036 + if (IXGBE_IS_VF(adapter)) + if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); +#endif + rxr->rx_discarded++; + ixgbe_rx_discard(rxr, i); + goto next_desc; + } + + /* + ** On 82599 which supports a hardware + ** LRO (called HW RSC), packets need + ** not be fragmented across sequential + ** descriptors, rather the next descriptor + ** is indicated in bits of the descriptor. + ** This also means that we might proceses + ** more than one packet at a time, something + ** that has never been true before, it + ** required eliminating global chain pointers + ** in favor of what we are doing here. -jfv + */ + if (!eop) { + /* + ** Figure out the next descriptor + ** of this frame. + */ + if (rxr->hw_rsc == TRUE) { + rsc = ixgbe_rsc_count(cur); + rxr->rsc_num += (rsc - 1); + } + if (rsc) { /* Get hardware index */ + nextp = ((staterr & + IXGBE_RXDADV_NEXTP_MASK) >> + IXGBE_RXDADV_NEXTP_SHIFT); + } else { /* Just sequential */ + nextp = i + 1; + if (nextp == adapter->num_rx_desc) + nextp = 0; + } + nbuf = &rxr->rx_buffers[nextp]; + prefetch(nbuf); + } + /* + ** Rather than using the fmp/lmp global pointers + ** we now keep the head of a packet chain in the + ** buffer struct and pass this along from one + ** descriptor to the next, until we get EOP. + */ + mp->m_len = len; + /* + ** See if there is a stored head + ** that determines what we are + */ + sendmp = rbuf->fmp; + if (sendmp != NULL) { /* secondary frag */ + rbuf->buf = rbuf->fmp = NULL; + mp->m_flags &= ~M_PKTHDR; + sendmp->m_pkthdr.len += mp->m_len; + } else { + /* + * Optimize. This might be a small packet, + * maybe just a TCP ACK. Do a fast copy that + * is cache aligned into a new mbuf, and + * leave the old mbuf+cluster for re-use. + */ + if (eop && len <= IXGBE_RX_COPY_LEN) { + sendmp = m_gethdr(M_NOWAIT, MT_DATA); + if (sendmp != NULL) { + sendmp->m_data += + IXGBE_RX_COPY_ALIGN; + ixgbe_bcopy(mp->m_data, + sendmp->m_data, len); + sendmp->m_len = len; + rxr->rx_copies++; + rbuf->flags |= IXGBE_RX_COPY; + } + } + if (sendmp == NULL) { + rbuf->buf = rbuf->fmp = NULL; + sendmp = mp; + } + + /* first desc of a non-ps chain */ + sendmp->m_flags |= M_PKTHDR; + sendmp->m_pkthdr.len = mp->m_len; + } + ++processed; + + /* Pass the head pointer on */ + if (eop == 0) { + nbuf->fmp = sendmp; + sendmp = NULL; + mp->m_next = nbuf->buf; + } else { /* Sending this frame */ + sendmp->m_pkthdr.rcvif = ifp; + rxr->rx_packets++; + /* capture data for AIM */ + rxr->bytes += sendmp->m_pkthdr.len; + rxr->rx_bytes += sendmp->m_pkthdr.len; + /* Process vlan info */ + if ((rxr->vtag_strip) && + (staterr & IXGBE_RXD_STAT_VP)) + vtag = le16toh(cur->wb.upper.vlan); + if (vtag) { + sendmp->m_pkthdr.ether_vtag = vtag; + sendmp->m_flags |= M_VLANTAG; + } + if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) + ixgbe_rx_checksum(staterr, sendmp, ptype); + + /* + * In case of multiqueue, we have RXCSUM.PCSD bit set + * and never cleared. This means we have RSS hash + * available to be used. + */ + if (adapter->num_queues > 1) { + sendmp->m_pkthdr.flowid = + le32toh(cur->wb.lower.hi_dword.rss); + switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { + case IXGBE_RXDADV_RSSTYPE_IPV4: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_IPV4); + break; + case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_TCP_IPV4); + break; + case IXGBE_RXDADV_RSSTYPE_IPV6: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_IPV6); + break; + case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_TCP_IPV6); + break; + case IXGBE_RXDADV_RSSTYPE_IPV6_EX: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_IPV6_EX); + break; + case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_TCP_IPV6_EX); + break; +#if __FreeBSD_version > 1100000 + case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_UDP_IPV4); + break; + case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_UDP_IPV6); + break; + case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_RSS_UDP_IPV6_EX); + break; +#endif + default: + M_HASHTYPE_SET(sendmp, + M_HASHTYPE_OPAQUE_HASH); + } + } else { + sendmp->m_pkthdr.flowid = que->msix; + M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); + } + } +next_desc: + bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + + /* Advance our pointers to the next descriptor. */ + if (++i == rxr->num_desc) + i = 0; + + /* Now send to the stack or do LRO */ + if (sendmp != NULL) { + rxr->next_to_check = i; + ixgbe_rx_input(rxr, ifp, sendmp, ptype); + i = rxr->next_to_check; + } + + /* Every 8 descriptors we go to refresh mbufs */ + if (processed == 8) { + ixgbe_refresh_mbufs(rxr, i); + processed = 0; + } + } + + /* Refresh any remaining buf structs */ + if (ixgbe_rx_unrefreshed(rxr)) + ixgbe_refresh_mbufs(rxr, i); + + rxr->next_to_check = i; + + /* + * Flush any outstanding LRO work + */ + tcp_lro_flush_all(lro); + + IXGBE_RX_UNLOCK(rxr); + + /* + ** Still have cleaning to do? + */ + if ((staterr & IXGBE_RXD_STAT_DD) != 0) + return (TRUE); + else + return (FALSE); +} + + +/********************************************************************* + * + * Verify that the hardware indicated that the checksum is valid. + * Inform the stack about the status of checksum so that stack + * doesn't spend time verifying the checksum. + * + *********************************************************************/ +static void +ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype) +{ + u16 status = (u16) staterr; + u8 errors = (u8) (staterr >> 24); + bool sctp = false; + + if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && + (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) + sctp = true; + + /* IPv4 checksum */ + if (status & IXGBE_RXD_STAT_IPCS) { + mp->m_pkthdr.csum_flags |= CSUM_L3_CALC; + /* IP Checksum Good */ + if (!(errors & IXGBE_RXD_ERR_IPE)) + mp->m_pkthdr.csum_flags |= CSUM_L3_VALID; + } + /* TCP/UDP/SCTP checksum */ + if (status & IXGBE_RXD_STAT_L4CS) { + mp->m_pkthdr.csum_flags |= CSUM_L4_CALC; + if (!(errors & IXGBE_RXD_ERR_TCPE)) { + mp->m_pkthdr.csum_flags |= CSUM_L4_VALID; + if (!sctp) + mp->m_pkthdr.csum_data = htons(0xffff); + } + } +} + +/******************************************************************** + * Manage DMA'able memory. + *******************************************************************/ +static void +ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) +{ + if (error) + return; + *(bus_addr_t *) arg = segs->ds_addr; + return; +} + +int +ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size, + struct ixgbe_dma_alloc *dma, int mapflags) +{ + device_t dev = adapter->dev; + int r; + + r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ + DBA_ALIGN, 0, /* alignment, bounds */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + size, /* maxsize */ + 1, /* nsegments */ + size, /* maxsegsize */ + BUS_DMA_ALLOCNOW, /* flags */ + NULL, /* lockfunc */ + NULL, /* lockfuncarg */ + &dma->dma_tag); + if (r != 0) { + device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; " + "error %u\n", r); + goto fail_0; + } + r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, + BUS_DMA_NOWAIT, &dma->dma_map); + if (r != 0) { + device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; " + "error %u\n", r); + goto fail_1; + } + r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, + size, + ixgbe_dmamap_cb, + &dma->dma_paddr, + mapflags | BUS_DMA_NOWAIT); + if (r != 0) { + device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; " + "error %u\n", r); + goto fail_2; + } + dma->dma_size = size; + return (0); +fail_2: + bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); +fail_1: + bus_dma_tag_destroy(dma->dma_tag); +fail_0: + dma->dma_tag = NULL; + return (r); +} + +void +ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma) +{ + bus_dmamap_sync(dma->dma_tag, dma->dma_map, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(dma->dma_tag, dma->dma_map); + bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); + bus_dma_tag_destroy(dma->dma_tag); +} + + +/********************************************************************* + * + * Allocate memory for the transmit and receive rings, and then + * the descriptors associated with each, called only once at attach. + * + **********************************************************************/ +int +ixgbe_allocate_queues(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct ix_queue *que; + struct tx_ring *txr; + struct rx_ring *rxr; + int rsize, tsize, error = IXGBE_SUCCESS; + int txconf = 0, rxconf = 0; +#ifdef PCI_IOV + enum ixgbe_iov_mode iov_mode; +#endif + + /* First allocate the top level queue structs */ + if (!(adapter->queues = + (struct ix_queue *) malloc(sizeof(struct ix_queue) * + adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(dev, "Unable to allocate queue memory\n"); + error = ENOMEM; + goto fail; + } + + /* First allocate the TX ring struct memory */ + if (!(adapter->tx_rings = + (struct tx_ring *) malloc(sizeof(struct tx_ring) * + adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(dev, "Unable to allocate TX ring memory\n"); + error = ENOMEM; + goto tx_fail; + } + + /* Next allocate the RX */ + if (!(adapter->rx_rings = + (struct rx_ring *) malloc(sizeof(struct rx_ring) * + adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(dev, "Unable to allocate RX ring memory\n"); + error = ENOMEM; + goto rx_fail; + } + + /* For the ring itself */ + tsize = roundup2(adapter->num_tx_desc * + sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN); + +#ifdef PCI_IOV + iov_mode = ixgbe_get_iov_mode(adapter); + adapter->pool = ixgbe_max_vfs(iov_mode); +#else + adapter->pool = 0; +#endif + /* + * Now set up the TX queues, txconf is needed to handle the + * possibility that things fail midcourse and we need to + * undo memory gracefully + */ + for (int i = 0; i < adapter->num_queues; i++, txconf++) { + /* Set up some basics */ + txr = &adapter->tx_rings[i]; + txr->adapter = adapter; +#ifdef PCI_IOV + txr->me = ixgbe_pf_que_index(iov_mode, i); +#else + txr->me = i; +#endif + txr->num_desc = adapter->num_tx_desc; + + /* Initialize the TX side lock */ + snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", + device_get_nameunit(dev), txr->me); + mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); + + if (ixgbe_dma_malloc(adapter, tsize, + &txr->txdma, BUS_DMA_NOWAIT)) { + device_printf(dev, + "Unable to allocate TX Descriptor memory\n"); + error = ENOMEM; + goto err_tx_desc; + } + txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr; + bzero((void *)txr->tx_base, tsize); + + /* Now allocate transmit buffers for the ring */ + if (ixgbe_allocate_transmit_buffers(txr)) { + device_printf(dev, + "Critical Failure setting up transmit buffers\n"); + error = ENOMEM; + goto err_tx_desc; + } +#ifndef IXGBE_LEGACY_TX + /* Allocate a buf ring */ + txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF, + M_WAITOK, &txr->tx_mtx); + if (txr->br == NULL) { + device_printf(dev, + "Critical Failure setting up buf ring\n"); + error = ENOMEM; + goto err_tx_desc; + } +#endif + } + + /* + * Next the RX queues... + */ + rsize = roundup2(adapter->num_rx_desc * + sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); + for (int i = 0; i < adapter->num_queues; i++, rxconf++) { + rxr = &adapter->rx_rings[i]; + /* Set up some basics */ + rxr->adapter = adapter; +#ifdef PCI_IOV + rxr->me = ixgbe_pf_que_index(iov_mode, i); +#else + rxr->me = i; +#endif + rxr->num_desc = adapter->num_rx_desc; + + /* Initialize the RX side lock */ + snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", + device_get_nameunit(dev), rxr->me); + mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); + + if (ixgbe_dma_malloc(adapter, rsize, + &rxr->rxdma, BUS_DMA_NOWAIT)) { + device_printf(dev, + "Unable to allocate RxDescriptor memory\n"); + error = ENOMEM; + goto err_rx_desc; + } + rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr; + bzero((void *)rxr->rx_base, rsize); + + /* Allocate receive buffers for the ring*/ + if (ixgbe_allocate_receive_buffers(rxr)) { + device_printf(dev, + "Critical Failure setting up receive buffers\n"); + error = ENOMEM; + goto err_rx_desc; + } + } + + /* + ** Finally set up the queue holding structs + */ + for (int i = 0; i < adapter->num_queues; i++) { + que = &adapter->queues[i]; + que->adapter = adapter; + que->me = i; + que->txr = &adapter->tx_rings[i]; + que->rxr = &adapter->rx_rings[i]; + } + + return (0); + +err_rx_desc: + for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) + ixgbe_dma_free(adapter, &rxr->rxdma); +err_tx_desc: + for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) + ixgbe_dma_free(adapter, &txr->txdma); + free(adapter->rx_rings, M_DEVBUF); +rx_fail: + free(adapter->tx_rings, M_DEVBUF); +tx_fail: + free(adapter->queues, M_DEVBUF); +fail: + return (error); +} Index: sys/dev/ixgbe/legacy_ixgbe.h =================================================================== --- /dev/null +++ sys/dev/ixgbe/legacy_ixgbe.h @@ -0,0 +1,900 @@ +/****************************************************************************** + + Copyright (c) 2001-2015, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ +/*$FreeBSD$*/ + + +#ifndef _IXGBE_H_ +#define _IXGBE_H_ + + +#include +#include +#ifndef IXGBE_LEGACY_TX +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef PCI_IOV +#include +#include +#include +#endif + +#include "ixgbe_api.h" +#include "ixgbe_common.h" +#include "ixgbe_phy.h" +#include "ixgbe_vf.h" + +#ifdef PCI_IOV +#include "ixgbe_common.h" +#include "ixgbe_mbx.h" +#endif + +/* Tunables */ + +/* + * TxDescriptors Valid Range: 64-4096 Default Value: 256 This value is the + * number of transmit descriptors allocated by the driver. Increasing this + * value allows the driver to queue more transmits. Each descriptor is 16 + * bytes. Performance tests have show the 2K value to be optimal for top + * performance. + */ +#define DEFAULT_TXD 1024 +#define PERFORM_TXD 2048 +#define MAX_TXD 4096 +#define MIN_TXD 64 + +/* + * RxDescriptors Valid Range: 64-4096 Default Value: 256 This value is the + * number of receive descriptors allocated for each RX queue. Increasing this + * value allows the driver to buffer more incoming packets. Each descriptor + * is 16 bytes. A receive buffer is also allocated for each descriptor. + * + * Note: with 8 rings and a dual port card, it is possible to bump up + * against the system mbuf pool limit, you can tune nmbclusters + * to adjust for this. + */ +#define DEFAULT_RXD 1024 +#define PERFORM_RXD 2048 +#define MAX_RXD 4096 +#define MIN_RXD 64 + +/* Alignment for rings */ +#define DBA_ALIGN 128 + +/* + * This is the max watchdog interval, ie. the time that can + * pass between any two TX clean operations, such only happening + * when the TX hardware is functioning. + */ +#define IXGBE_WATCHDOG (10 * hz) + +/* + * This parameters control when the driver calls the routine to reclaim + * transmit descriptors. + */ +#define IXGBE_TX_CLEANUP_THRESHOLD (adapter->num_tx_desc / 8) +#define IXGBE_TX_OP_THRESHOLD (adapter->num_tx_desc / 32) + +/* These defines are used in MTU calculations */ +#define IXGBE_MAX_FRAME_SIZE 9728 +#define IXGBE_MTU_HDR (ETHER_HDR_LEN + ETHER_CRC_LEN) +#define IXGBE_MTU_HDR_VLAN (ETHER_HDR_LEN + ETHER_CRC_LEN + \ + ETHER_VLAN_ENCAP_LEN) +#define IXGBE_MAX_MTU (IXGBE_MAX_FRAME_SIZE - IXGBE_MTU_HDR) +#define IXGBE_MAX_MTU_VLAN (IXGBE_MAX_FRAME_SIZE - IXGBE_MTU_HDR_VLAN) + +/* Flow control constants */ +#define IXGBE_FC_PAUSE 0xFFFF +#define IXGBE_FC_HI 0x20000 +#define IXGBE_FC_LO 0x10000 + +/* + * Used for optimizing small rx mbufs. Effort is made to keep the copy + * small and aligned for the CPU L1 cache. + * + * MHLEN is typically 168 bytes, giving us 8-byte alignment. Getting + * 32 byte alignment needed for the fast bcopy results in 8 bytes being + * wasted. Getting 64 byte alignment, which _should_ be ideal for + * modern Intel CPUs, results in 40 bytes wasted and a significant drop + * in observed efficiency of the optimization, 97.9% -> 81.8%. + */ +#if __FreeBSD_version < 1002000 +#define MPKTHSIZE (sizeof(struct m_hdr) + sizeof(struct pkthdr)) +#endif +#define IXGBE_RX_COPY_HDR_PADDED ((((MPKTHSIZE - 1) / 32) + 1) * 32) +#define IXGBE_RX_COPY_LEN (MSIZE - IXGBE_RX_COPY_HDR_PADDED) +#define IXGBE_RX_COPY_ALIGN (IXGBE_RX_COPY_HDR_PADDED - MPKTHSIZE) + +/* Keep older OS drivers building... */ +#if !defined(SYSCTL_ADD_UQUAD) +#define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD +#endif + +/* Defines for printing debug information */ +#define DEBUG_INIT 0 +#define DEBUG_IOCTL 0 +#define DEBUG_HW 0 + +#define INIT_DEBUGOUT(S) if (DEBUG_INIT) printf(S "\n") +#define INIT_DEBUGOUT1(S, A) if (DEBUG_INIT) printf(S "\n", A) +#define INIT_DEBUGOUT2(S, A, B) if (DEBUG_INIT) printf(S "\n", A, B) +#define IOCTL_DEBUGOUT(S) if (DEBUG_IOCTL) printf(S "\n") +#define IOCTL_DEBUGOUT1(S, A) if (DEBUG_IOCTL) printf(S "\n", A) +#define IOCTL_DEBUGOUT2(S, A, B) if (DEBUG_IOCTL) printf(S "\n", A, B) +#define HW_DEBUGOUT(S) if (DEBUG_HW) printf(S "\n") +#define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) +#define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) + +#define MAX_NUM_MULTICAST_ADDRESSES 128 +#define IXGBE_82598_SCATTER 100 +#define IXGBE_82599_SCATTER 32 +#define MSIX_82598_BAR 3 +#define MSIX_82599_BAR 4 +#define IXGBE_TSO_SIZE 262140 +#define IXGBE_RX_HDR 128 +#define IXGBE_VFTA_SIZE 128 +#define IXGBE_BR_SIZE 4096 +#define IXGBE_QUEUE_MIN_FREE 32 +#define IXGBE_MAX_TX_BUSY 10 +#define IXGBE_QUEUE_HUNG 0x80000000 + +#define IXV_EITR_DEFAULT 128 + +/* Supported offload bits in mbuf flag */ +#if __FreeBSD_version >= 1000000 +#define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ + CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ + CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) +#elif __FreeBSD_version >= 800000 +#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP) +#else +#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP) +#endif + +/* Backward compatibility items for very old versions */ +#ifndef pci_find_cap +#define pci_find_cap pci_find_extcap +#endif + +#ifndef DEVMETHOD_END +#define DEVMETHOD_END { NULL, NULL } +#endif + +/* + * Interrupt Moderation parameters + */ +#define IXGBE_LOW_LATENCY 128 +#define IXGBE_AVE_LATENCY 400 +#define IXGBE_BULK_LATENCY 1200 + +/* Using 1FF (the max value), the interval is ~1.05ms */ +#define IXGBE_LINK_ITR_QUANTA 0x1FF +#define IXGBE_LINK_ITR ((IXGBE_LINK_ITR_QUANTA << 3) & \ + IXGBE_EITR_ITR_INT_MASK) + +/* MAC type macros */ +#define IXGBE_IS_X550VF(_adapter) \ + ((_adapter->hw.mac.type == ixgbe_mac_X550_vf) || \ + (_adapter->hw.mac.type == ixgbe_mac_X550EM_x_vf)) + +#define IXGBE_IS_VF(_adapter) \ + (IXGBE_IS_X550VF(_adapter) || \ + (_adapter->hw.mac.type == ixgbe_mac_X540_vf) || \ + (_adapter->hw.mac.type == ixgbe_mac_82599_vf)) + +#ifdef PCI_IOV +#define IXGBE_VF_INDEX(vmdq) ((vmdq) / 32) +#define IXGBE_VF_BIT(vmdq) (1 << ((vmdq) % 32)) + +#define IXGBE_VT_MSG_MASK 0xFFFF + +#define IXGBE_VT_MSGINFO(msg) \ + (((msg) & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT) + +#define IXGBE_VF_GET_QUEUES_RESP_LEN 5 + +#define IXGBE_API_VER_1_0 0 +#define IXGBE_API_VER_2_0 1 /* Solaris API. Not supported. */ +#define IXGBE_API_VER_1_1 2 +#define IXGBE_API_VER_UNKNOWN UINT16_MAX + +enum ixgbe_iov_mode { + IXGBE_64_VM, + IXGBE_32_VM, + IXGBE_NO_VM +}; +#endif /* PCI_IOV */ + + +/* + ***************************************************************************** + * vendor_info_array + * + * This array contains the list of Subvendor/Subdevice IDs on which the driver + * should load. + * + ***************************************************************************** + */ +typedef struct _ixgbe_vendor_info_t { + unsigned int vendor_id; + unsigned int device_id; + unsigned int subvendor_id; + unsigned int subdevice_id; + unsigned int index; +} ixgbe_vendor_info_t; + + +struct ixgbe_tx_buf { + union ixgbe_adv_tx_desc *eop; + struct mbuf *m_head; + bus_dmamap_t map; +}; + +struct ixgbe_rx_buf { + struct mbuf *buf; + struct mbuf *fmp; + bus_dmamap_t pmap; + u_int flags; +#define IXGBE_RX_COPY 0x01 + uint64_t addr; +}; + +/* + * Bus dma allocation structure used by ixgbe_dma_malloc and ixgbe_dma_free. + */ +struct ixgbe_dma_alloc { + bus_addr_t dma_paddr; + caddr_t dma_vaddr; + bus_dma_tag_t dma_tag; + bus_dmamap_t dma_map; + bus_dma_segment_t dma_seg; + bus_size_t dma_size; + int dma_nseg; +}; + +struct ixgbe_mc_addr { + u8 addr[IXGBE_ETH_LENGTH_OF_ADDRESS]; + u32 vmdq; +}; + +/* +** Driver queue struct: this is the interrupt container +** for the associated tx and rx ring. +*/ +struct ix_queue { + struct adapter *adapter; + u32 msix; /* This queue's MSIX vector */ + u32 eims; /* This queue's EIMS bit */ + u32 eitr_setting; + u32 me; + struct resource *res; + void *tag; + int busy; + struct tx_ring *txr; + struct rx_ring *rxr; + struct task que_task; + struct taskqueue *tq; + u64 irqs; +}; + +/* + * The transmit ring, one per queue + */ +struct tx_ring { + struct adapter *adapter; + struct mtx tx_mtx; + u32 me; + u32 tail; + int busy; + union ixgbe_adv_tx_desc *tx_base; + struct ixgbe_tx_buf *tx_buffers; + struct ixgbe_dma_alloc txdma; + volatile u16 tx_avail; + u16 next_avail_desc; + u16 next_to_clean; + u16 num_desc; + u32 txd_cmd; + bus_dma_tag_t txtag; + char mtx_name[16]; +#ifndef IXGBE_LEGACY_TX + struct buf_ring *br; + struct task txq_task; +#endif +#ifdef IXGBE_FDIR + u16 atr_sample; + u16 atr_count; +#endif + u32 bytes; /* used for AIM */ + u32 packets; + /* Soft Stats */ + unsigned long tso_tx; + unsigned long no_tx_map_avail; + unsigned long no_tx_dma_setup; + u64 no_desc_avail; + u64 total_packets; +}; + + +/* + * The Receive ring, one per rx queue + */ +struct rx_ring { + struct adapter *adapter; + struct mtx rx_mtx; + u32 me; + u32 tail; + union ixgbe_adv_rx_desc *rx_base; + struct ixgbe_dma_alloc rxdma; + struct lro_ctrl lro; + bool lro_enabled; + bool hw_rsc; + bool vtag_strip; + u16 next_to_refresh; + u16 next_to_check; + u16 num_desc; + u16 mbuf_sz; + char mtx_name[16]; + struct ixgbe_rx_buf *rx_buffers; + bus_dma_tag_t ptag; + + u32 bytes; /* Used for AIM calc */ + u32 packets; + + /* Soft stats */ + u64 rx_irq; + u64 rx_copies; + u64 rx_packets; + u64 rx_bytes; + u64 rx_discarded; + u64 rsc_num; +#ifdef IXGBE_FDIR + u64 flm; +#endif +}; + +#ifdef PCI_IOV +#define IXGBE_VF_CTS (1 << 0) /* VF is clear to send. */ +#define IXGBE_VF_CAP_MAC (1 << 1) /* VF is permitted to change MAC. */ +#define IXGBE_VF_CAP_VLAN (1 << 2) /* VF is permitted to join vlans. */ +#define IXGBE_VF_ACTIVE (1 << 3) /* VF is active. */ + +#define IXGBE_MAX_VF_MC 30 /* Max number of multicast entries */ + +struct ixgbe_vf { + u_int pool; + u_int rar_index; + u_int max_frame_size; + uint32_t flags; + uint8_t ether_addr[ETHER_ADDR_LEN]; + uint16_t mc_hash[IXGBE_MAX_VF_MC]; + uint16_t num_mc_hashes; + uint16_t default_vlan; + uint16_t vlan_tag; + uint16_t api_ver; +}; +#endif /* PCI_IOV */ + +/* Our adapter structure */ +struct adapter { + struct ixgbe_hw hw; + struct ixgbe_osdep osdep; + + device_t dev; + struct ifnet *ifp; + + struct resource *pci_mem; + struct resource *msix_mem; + + /* + * Interrupt resources: this set is + * either used for legacy, or for Link + * when doing MSIX + */ + void *tag; + struct resource *res; + + struct ifmedia media; + struct callout timer; + int msix; + int if_flags; + + struct mtx core_mtx; + + eventhandler_tag vlan_attach; + eventhandler_tag vlan_detach; + + u16 num_vlans; + u16 num_queues; + + /* + ** Shadow VFTA table, this is needed because + ** the real vlan filter table gets cleared during + ** a soft reset and the driver needs to be able + ** to repopulate it. + */ + u32 shadow_vfta[IXGBE_VFTA_SIZE]; + + /* Info about the interface */ + u32 optics; + u32 fc; /* local flow ctrl setting */ + int advertise; /* link speeds */ + bool enable_aim; /* adaptive interrupt moderation */ + bool link_active; + u16 max_frame_size; + u16 num_segs; + u32 link_speed; + bool link_up; + u32 vector; + u16 dmac; + bool eee_enabled; + u32 phy_layer; + + /* Power management-related */ + bool wol_support; + u32 wufc; + + /* Mbuf cluster size */ + u32 rx_mbuf_sz; + + /* Support for pluggable optics */ + bool sfp_probe; + struct task link_task; /* Link tasklet */ + struct task mod_task; /* SFP tasklet */ + struct task msf_task; /* Multispeed Fiber */ +#ifdef PCI_IOV + struct task mbx_task; /* VF -> PF mailbox interrupt */ +#endif /* PCI_IOV */ +#ifdef IXGBE_FDIR + int fdir_reinit; + struct task fdir_task; +#endif + struct task phy_task; /* PHY intr tasklet */ + struct taskqueue *tq; + + /* + ** Queues: + ** This is the irq holder, it has + ** and RX/TX pair or rings associated + ** with it. + */ + struct ix_queue *queues; + + /* + * Transmit rings: + * Allocated at run time, an array of rings. + */ + struct tx_ring *tx_rings; + u32 num_tx_desc; + u32 tx_process_limit; + + /* + * Receive rings: + * Allocated at run time, an array of rings. + */ + struct rx_ring *rx_rings; + u64 active_queues; + u32 num_rx_desc; + u32 rx_process_limit; + + /* Multicast array memory */ + struct ixgbe_mc_addr *mta; + int num_vfs; + int pool; +#ifdef PCI_IOV + struct ixgbe_vf *vfs; +#endif +#ifdef DEV_NETMAP + void (*init_locked)(struct adapter *); + void (*stop_locked)(void *); +#endif + + /* Misc stats maintained by the driver */ + unsigned long dropped_pkts; + unsigned long mbuf_defrag_failed; + unsigned long mbuf_header_failed; + unsigned long mbuf_packet_failed; + unsigned long watchdog_events; + unsigned long link_irq; + union { + struct ixgbe_hw_stats pf; + struct ixgbevf_hw_stats vf; + } stats; +#if __FreeBSD_version >= 1100036 + /* counter(9) stats */ + u64 ipackets; + u64 ierrors; + u64 opackets; + u64 oerrors; + u64 ibytes; + u64 obytes; + u64 imcasts; + u64 omcasts; + u64 iqdrops; + u64 noproto; +#endif +}; + + +/* Precision Time Sync (IEEE 1588) defines */ +#define ETHERTYPE_IEEE1588 0x88F7 +#define PICOSECS_PER_TICK 20833 +#define TSYNC_UDP_PORT 319 /* UDP port for the protocol */ +#define IXGBE_ADVTXD_TSTAMP 0x00080000 + + +#define IXGBE_CORE_LOCK_INIT(_sc, _name) \ + mtx_init(&(_sc)->core_mtx, _name, "IXGBE Core Lock", MTX_DEF) +#define IXGBE_CORE_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->core_mtx) +#define IXGBE_TX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->tx_mtx) +#define IXGBE_RX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_mtx) +#define IXGBE_CORE_LOCK(_sc) mtx_lock(&(_sc)->core_mtx) +#define IXGBE_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_mtx) +#define IXGBE_TX_TRYLOCK(_sc) mtx_trylock(&(_sc)->tx_mtx) +#define IXGBE_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_mtx) +#define IXGBE_CORE_UNLOCK(_sc) mtx_unlock(&(_sc)->core_mtx) +#define IXGBE_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_mtx) +#define IXGBE_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_mtx) +#define IXGBE_CORE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->core_mtx, MA_OWNED) +#define IXGBE_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_mtx, MA_OWNED) + +/* For backward compatibility */ +#if !defined(PCIER_LINK_STA) +#define PCIER_LINK_STA PCIR_EXPRESS_LINK_STA +#endif + +/* Stats macros */ +#if __FreeBSD_version >= 1100036 +#define IXGBE_SET_IPACKETS(sc, count) (sc)->ipackets = (count) +#define IXGBE_SET_IERRORS(sc, count) (sc)->ierrors = (count) +#define IXGBE_SET_OPACKETS(sc, count) (sc)->opackets = (count) +#define IXGBE_SET_OERRORS(sc, count) (sc)->oerrors = (count) +#define IXGBE_SET_COLLISIONS(sc, count) +#define IXGBE_SET_IBYTES(sc, count) (sc)->ibytes = (count) +#define IXGBE_SET_OBYTES(sc, count) (sc)->obytes = (count) +#define IXGBE_SET_IMCASTS(sc, count) (sc)->imcasts = (count) +#define IXGBE_SET_OMCASTS(sc, count) (sc)->omcasts = (count) +#define IXGBE_SET_IQDROPS(sc, count) (sc)->iqdrops = (count) +#else +#define IXGBE_SET_IPACKETS(sc, count) (sc)->ifp->if_ipackets = (count) +#define IXGBE_SET_IERRORS(sc, count) (sc)->ifp->if_ierrors = (count) +#define IXGBE_SET_OPACKETS(sc, count) (sc)->ifp->if_opackets = (count) +#define IXGBE_SET_OERRORS(sc, count) (sc)->ifp->if_oerrors = (count) +#define IXGBE_SET_COLLISIONS(sc, count) (sc)->ifp->if_collisions = (count) +#define IXGBE_SET_IBYTES(sc, count) (sc)->ifp->if_ibytes = (count) +#define IXGBE_SET_OBYTES(sc, count) (sc)->ifp->if_obytes = (count) +#define IXGBE_SET_IMCASTS(sc, count) (sc)->ifp->if_imcasts = (count) +#define IXGBE_SET_OMCASTS(sc, count) (sc)->ifp->if_omcasts = (count) +#define IXGBE_SET_IQDROPS(sc, count) (sc)->ifp->if_iqdrops = (count) +#endif + +/* External PHY register addresses */ +#define IXGBE_PHY_CURRENT_TEMP 0xC820 +#define IXGBE_PHY_OVERTEMP_STATUS 0xC830 + +/* Sysctl help messages; displayed with sysctl -d */ +#define IXGBE_SYSCTL_DESC_ADV_SPEED \ + "\nControl advertised link speed using these flags:\n" \ + "\t0x1 - advertise 100M\n" \ + "\t0x2 - advertise 1G\n" \ + "\t0x4 - advertise 10G\n\n" \ + "\t100M is only supported on certain 10GBaseT adapters.\n" + +#define IXGBE_SYSCTL_DESC_SET_FC \ + "\nSet flow control mode using these values:\n" \ + "\t0 - off\n" \ + "\t1 - rx pause\n" \ + "\t2 - tx pause\n" \ + "\t3 - tx and rx pause" + +static inline bool +ixgbe_is_sfp(struct ixgbe_hw *hw) +{ + switch (hw->phy.type) { + case ixgbe_phy_sfp_avago: + case ixgbe_phy_sfp_ftl: + case ixgbe_phy_sfp_intel: + case ixgbe_phy_sfp_unknown: + case ixgbe_phy_sfp_passive_tyco: + case ixgbe_phy_sfp_passive_unknown: + case ixgbe_phy_qsfp_passive_unknown: + case ixgbe_phy_qsfp_active_unknown: + case ixgbe_phy_qsfp_intel: + case ixgbe_phy_qsfp_unknown: + return TRUE; + default: + return FALSE; + } +} + +/* Workaround to make 8.0 buildable */ +#if __FreeBSD_version >= 800000 && __FreeBSD_version < 800504 +static __inline int +drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br) +{ +#ifdef ALTQ + if (ALTQ_IS_ENABLED(&ifp->if_snd)) + return (1); +#endif + return (!buf_ring_empty(br)); +} +#endif + +/* +** Find the number of unrefreshed RX descriptors +*/ +static inline u16 +ixgbe_rx_unrefreshed(struct rx_ring *rxr) +{ + if (rxr->next_to_check > rxr->next_to_refresh) + return (rxr->next_to_check - rxr->next_to_refresh - 1); + else + return ((rxr->num_desc + rxr->next_to_check) - + rxr->next_to_refresh - 1); +} + +/* +** This checks for a zero mac addr, something that will be likely +** unless the Admin on the Host has created one. +*/ +static inline bool +ixv_check_ether_addr(u8 *addr) +{ + bool status = TRUE; + + if ((addr[0] == 0 && addr[1]== 0 && addr[2] == 0 && + addr[3] == 0 && addr[4]== 0 && addr[5] == 0)) + status = FALSE; + return (status); +} + +/* Shared Prototypes */ + +#ifdef IXGBE_LEGACY_TX +void ixgbe_start(struct ifnet *); +void ixgbe_start_locked(struct tx_ring *, struct ifnet *); +#else /* ! IXGBE_LEGACY_TX */ +int ixgbe_mq_start(struct ifnet *, struct mbuf *); +int ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *); +void ixgbe_qflush(struct ifnet *); +void ixgbe_deferred_mq_start(void *, int); +#endif /* IXGBE_LEGACY_TX */ + +int ixgbe_allocate_queues(struct adapter *); +int ixgbe_allocate_transmit_buffers(struct tx_ring *); +int ixgbe_setup_transmit_structures(struct adapter *); +void ixgbe_free_transmit_structures(struct adapter *); +int ixgbe_allocate_receive_buffers(struct rx_ring *); +int ixgbe_setup_receive_structures(struct adapter *); +void ixgbe_free_receive_structures(struct adapter *); +void ixgbe_txeof(struct tx_ring *); +bool ixgbe_rxeof(struct ix_queue *); + +int ixgbe_dma_malloc(struct adapter *, + bus_size_t, struct ixgbe_dma_alloc *, int); +void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *); + +#ifdef PCI_IOV + +static inline boolean_t +ixgbe_vf_mac_changed(struct ixgbe_vf *vf, const uint8_t *mac) +{ + return (bcmp(mac, vf->ether_addr, ETHER_ADDR_LEN) != 0); +} + +static inline void +ixgbe_send_vf_msg(struct adapter *adapter, struct ixgbe_vf *vf, u32 msg) +{ + + if (vf->flags & IXGBE_VF_CTS) + msg |= IXGBE_VT_MSGTYPE_CTS; + + ixgbe_write_mbx(&adapter->hw, &msg, 1, vf->pool); +} + +static inline void +ixgbe_send_vf_ack(struct adapter *adapter, struct ixgbe_vf *vf, u32 msg) +{ + msg &= IXGBE_VT_MSG_MASK; + ixgbe_send_vf_msg(adapter, vf, msg | IXGBE_VT_MSGTYPE_ACK); +} + +static inline void +ixgbe_send_vf_nack(struct adapter *adapter, struct ixgbe_vf *vf, u32 msg) +{ + msg &= IXGBE_VT_MSG_MASK; + ixgbe_send_vf_msg(adapter, vf, msg | IXGBE_VT_MSGTYPE_NACK); +} + +static inline void +ixgbe_process_vf_ack(struct adapter *adapter, struct ixgbe_vf *vf) +{ + if (!(vf->flags & IXGBE_VF_CTS)) + ixgbe_send_vf_nack(adapter, vf, 0); +} + +static inline enum ixgbe_iov_mode +ixgbe_get_iov_mode(struct adapter *adapter) +{ + if (adapter->num_vfs == 0) + return (IXGBE_NO_VM); + if (adapter->num_queues <= 2) + return (IXGBE_64_VM); + else if (adapter->num_queues <= 4) + return (IXGBE_32_VM); + else + return (IXGBE_NO_VM); +} + +static inline u16 +ixgbe_max_vfs(enum ixgbe_iov_mode mode) +{ + /* + * We return odd numbers below because we + * reserve 1 VM's worth of queues for the PF. + */ + switch (mode) { + case IXGBE_64_VM: + return (63); + case IXGBE_32_VM: + return (31); + case IXGBE_NO_VM: + default: + return (0); + } +} + +static inline int +ixgbe_vf_queues(enum ixgbe_iov_mode mode) +{ + switch (mode) { + case IXGBE_64_VM: + return (2); + case IXGBE_32_VM: + return (4); + case IXGBE_NO_VM: + default: + return (0); + } +} + +static inline int +ixgbe_vf_que_index(enum ixgbe_iov_mode mode, u32 vfnum, int num) +{ + return ((vfnum * ixgbe_vf_queues(mode)) + num); +} + +static inline int +ixgbe_pf_que_index(enum ixgbe_iov_mode mode, int num) +{ + return (ixgbe_vf_que_index(mode, ixgbe_max_vfs(mode), num)); +} + +static inline void +ixgbe_update_max_frame(struct adapter * adapter, int max_frame) +{ + if (adapter->max_frame_size < max_frame) + adapter->max_frame_size = max_frame; +} + +static inline u32 +ixgbe_get_mrqc(enum ixgbe_iov_mode mode) +{ + u32 mrqc = 0; + switch (mode) { + case IXGBE_64_VM: + mrqc = IXGBE_MRQC_VMDQRSS64EN; + break; + case IXGBE_32_VM: + mrqc = IXGBE_MRQC_VMDQRSS32EN; + break; + case IXGBE_NO_VM: + mrqc = 0; + break; + default: + panic("Unexpected SR-IOV mode %d", mode); + } + return(mrqc); +} + + +static inline u32 +ixgbe_get_mtqc(enum ixgbe_iov_mode mode) +{ + uint32_t mtqc = 0; + switch (mode) { + case IXGBE_64_VM: + mtqc |= IXGBE_MTQC_64VF | IXGBE_MTQC_VT_ENA; + break; + case IXGBE_32_VM: + mtqc |= IXGBE_MTQC_32VF | IXGBE_MTQC_VT_ENA; + break; + case IXGBE_NO_VM: + mtqc = IXGBE_MTQC_64Q_1PB; + break; + default: + panic("Unexpected SR-IOV mode %d", mode); + } + return(mtqc); +} +#endif /* PCI_IOV */ + +#endif /* _IXGBE_H_ */ Index: sys/kern/subr_gtaskqueue.c =================================================================== --- sys/kern/subr_gtaskqueue.c +++ sys/kern/subr_gtaskqueue.c @@ -99,6 +99,15 @@ } while (0) #define TQ_ASSERT_UNLOCKED(tq) mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED) +#ifdef INVARIANTS +static void +gtask_dump(struct gtask *gtask) +{ + printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n", + gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context); +} +#endif + static __inline int TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm, int t) @@ -172,6 +181,12 @@ int grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask) { +#ifdef INVARIANTS + if (queue == NULL) { + gtask_dump(gtask); + panic("queue == NULL"); + } +#endif TQ_LOCK(queue); if (gtask->ta_flags & TASK_ENQUEUED) { TQ_UNLOCK(queue); Index: sys/modules/ix/Makefile =================================================================== --- sys/modules/ix/Makefile +++ sys/modules/ix/Makefile @@ -1,15 +1,12 @@ -#$FreeBSD$ +# $FreeBSD$ -.PATH: ${.CURDIR}/../../dev/ixgbe +SYSDIR?=${.CURDIR}/../.. +.include "${SYSDIR}/conf/kern.opts.mk" -KMOD = if_ix -SRCS = device_if.h bus_if.h pci_if.h pci_iov_if.h -SRCS += opt_inet.h opt_inet6.h opt_rss.h -SRCS += if_ix.c ix_txrx.c ixgbe_osdep.c -# Shared source -SRCS += ixgbe_common.c ixgbe_api.c ixgbe_phy.c ixgbe_mbx.c ixgbe_vf.c -SRCS += ixgbe_dcb.c ixgbe_dcb_82598.c ixgbe_dcb_82599.c -SRCS += ixgbe_82598.c ixgbe_82599.c ixgbe_x540.c ixgbe_x550.c -CFLAGS+= -I${.CURDIR}/../../dev/ixgbe -DSMP -.include + +SUBDIR = \ + ix_iflib \ + ix_legacy + +.include Index: sys/modules/ix/ix_iflib/Makefile =================================================================== --- /dev/null +++ sys/modules/ix/ix_iflib/Makefile @@ -0,0 +1,17 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../../dev/ixgbe + +# Switch this to if_ix when IFLIB becomes default +KMOD = if_ix_iflib +SRCS = device_if.h bus_if.h pci_if.h pci_iov_if.h ifdi_if.h +SRCS += opt_inet.h opt_inet6.h opt_rss.h opt_iflib.h +SRCS += if_ix.c ix_txrx.c ixgbe_osdep.c ixgbe_sysctl.c +# Shared source +SRCS += ixgbe_common.c ixgbe_api.c ixgbe_phy.c ixgbe_mbx.c ixgbe_vf.c +SRCS += ixgbe_dcb.c ixgbe_dcb_82598.c ixgbe_dcb_82599.c +SRCS += ixgbe_82598.c ixgbe_82599.c ixgbe_x540.c ixgbe_x550.c +CFLAGS+= -I${.CURDIR}/../../../dev/ixgbe -DSMP +CFLAGS+= -DIFLIB + +.include Index: sys/modules/ix/ix_legacy/Makefile =================================================================== --- /dev/null +++ sys/modules/ix/ix_legacy/Makefile @@ -0,0 +1,16 @@ +#$FreeBSD$ + +.PATH: ${.CURDIR}/../../../dev/ixgbe + +# Switch this to if_ix_legacy when IFLIB becomes default +KMOD = if_ix +SRCS = device_if.h bus_if.h pci_if.h pci_iov_if.h ifdi_if.h +SRCS += opt_inet.h opt_inet6.h opt_rss.h opt_iflib.h +SRCS += if_ix.c ix_txrx.c ixgbe_osdep.c ixgbe_sysctl.c +# Shared source +SRCS += ixgbe_common.c ixgbe_api.c ixgbe_phy.c ixgbe_mbx.c ixgbe_vf.c +SRCS += ixgbe_dcb.c ixgbe_dcb_82598.c ixgbe_dcb_82599.c +SRCS += ixgbe_82598.c ixgbe_82599.c ixgbe_x540.c ixgbe_x550.c +CFLAGS+= -I${.CURDIR}/../../../dev/ixgbe -DSMP + +.include Index: sys/net/iflib.c =================================================================== --- sys/net/iflib.c +++ sys/net/iflib.c @@ -3590,6 +3590,10 @@ if (scctx->isc_rss_table_size == 0) scctx->isc_rss_table_size = 64; scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; + + GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); + /* XXX format name */ + taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin"); /* ** Now setup MSI or MSI/X, should ** return us the number of supported @@ -4342,6 +4346,7 @@ gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_if_io_tqg; fn = _task_fn_tx; + GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; @@ -4349,6 +4354,7 @@ gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; + GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_ADMIN: q = ctx; @@ -4360,17 +4366,20 @@ default: panic("unknown net intr type"); } - GROUPTASK_INIT(gtask, 0, fn, q); info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; - /* XXX query cpu that rid belongs to */ - err = _iflib_irq_alloc(ctx, irq, rid, iflib_fast_intr, NULL, info, name); - if (err != 0) + if (err != 0) { + device_printf(ctx->ifc_dev, "_iflib_irq_alloc failed %d\n", err); return (err); + } + + if (type == IFLIB_INTR_ADMIN) + return (0); + if (tqrid != -1) { cpuid = find_nth(ctx, &cpus, qid); taskqgroup_attach_cpu(tqg, gtask, q, cpuid, irq->ii_rid, name); @@ -4402,13 +4411,6 @@ tqg = qgroup_if_io_tqg; fn = _task_fn_rx; break; - case IFLIB_INTR_ADMIN: - q = ctx; - gtask = &ctx->ifc_admin_task; - tqg = qgroup_if_config_tqg; - rid = -1; - fn = _task_fn_admin; - break; case IFLIB_INTR_IOV: q = ctx; gtask = &ctx->ifc_vflr_task;