diff --git a/sys/dev/bnxt/if_bnxt.c b/sys/dev/bnxt/if_bnxt.c index 7811f4fdebf0..9990e26263b3 100644 --- a/sys/dev/bnxt/if_bnxt.c +++ b/sys/dev/bnxt/if_bnxt.c @@ -1,2507 +1,2505 @@ /*- * Broadcom NetXtreme-C/E network driver. * * Copyright (c) 2016 Broadcom, All Rights Reserved. * The term Broadcom refers to Broadcom Limited and/or its subsidiaries * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #include "ifdi_if.h" #include "bnxt.h" #include "bnxt_hwrm.h" #include "bnxt_ioctl.h" #include "bnxt_sysctl.h" #include "hsi_struct_def.h" /* * PCI Device ID Table */ static pci_vendor_info_t bnxt_vendor_info_array[] = { PVID(BROADCOM_VENDOR_ID, BCM57301, "Broadcom BCM57301 NetXtreme-C 10Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57302, "Broadcom BCM57302 NetXtreme-C 10Gb/25Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57304, "Broadcom BCM57304 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57311, "Broadcom BCM57311 NetXtreme-C 10Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57312, "Broadcom BCM57312 NetXtreme-C 10Gb/25Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57314, "Broadcom BCM57314 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57402, "Broadcom BCM57402 NetXtreme-E 10Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57402_NPAR, "Broadcom BCM57402 NetXtreme-E Partition"), PVID(BROADCOM_VENDOR_ID, BCM57404, "Broadcom BCM57404 NetXtreme-E 10Gb/25Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57404_NPAR, "Broadcom BCM57404 NetXtreme-E Partition"), PVID(BROADCOM_VENDOR_ID, BCM57406, "Broadcom BCM57406 NetXtreme-E 10GBase-T Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57406_NPAR, "Broadcom BCM57406 NetXtreme-E Partition"), PVID(BROADCOM_VENDOR_ID, BCM57407, "Broadcom BCM57407 NetXtreme-E 10GBase-T Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57407_NPAR, "Broadcom BCM57407 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57407_SFP, "Broadcom BCM57407 NetXtreme-E 25Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57412, "Broadcom BCM57412 NetXtreme-E 10Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57412_NPAR1, "Broadcom BCM57412 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57412_NPAR2, "Broadcom BCM57412 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57414, "Broadcom BCM57414 NetXtreme-E 10Gb/25Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57414_NPAR1, "Broadcom BCM57414 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57414_NPAR2, "Broadcom BCM57414 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57416, "Broadcom BCM57416 NetXtreme-E 10GBase-T Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57416_NPAR1, "Broadcom BCM57416 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57416_NPAR2, "Broadcom BCM57416 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57416_SFP, "Broadcom BCM57416 NetXtreme-E 10Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57417, "Broadcom BCM57417 NetXtreme-E 10GBase-T Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57417_NPAR1, "Broadcom BCM57417 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57417_NPAR2, "Broadcom BCM57417 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57417_SFP, "Broadcom BCM57417 NetXtreme-E 10Gb/25Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57454, "Broadcom BCM57454 NetXtreme-E 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM58700, "Broadcom BCM58700 Nitro 1Gb/2.5Gb/10Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, NETXTREME_C_VF1, "Broadcom NetXtreme-C Ethernet Virtual Function"), PVID(BROADCOM_VENDOR_ID, NETXTREME_C_VF2, "Broadcom NetXtreme-C Ethernet Virtual Function"), PVID(BROADCOM_VENDOR_ID, NETXTREME_C_VF3, "Broadcom NetXtreme-C Ethernet Virtual Function"), PVID(BROADCOM_VENDOR_ID, NETXTREME_E_VF1, "Broadcom NetXtreme-E Ethernet Virtual Function"), PVID(BROADCOM_VENDOR_ID, NETXTREME_E_VF2, "Broadcom NetXtreme-E Ethernet Virtual Function"), PVID(BROADCOM_VENDOR_ID, NETXTREME_E_VF3, "Broadcom NetXtreme-E Ethernet Virtual Function"), /* required last entry */ PVID_END }; /* * Function prototypes */ static void *bnxt_register(device_t dev); /* Soft queue setup and teardown */ static int bnxt_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int bnxt_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets); static void bnxt_queues_free(if_ctx_t ctx); /* Device setup and teardown */ static int bnxt_attach_pre(if_ctx_t ctx); static int bnxt_attach_post(if_ctx_t ctx); static int bnxt_detach(if_ctx_t ctx); /* Device configuration */ static void bnxt_init(if_ctx_t ctx); static void bnxt_stop(if_ctx_t ctx); static void bnxt_multi_set(if_ctx_t ctx); static int bnxt_mtu_set(if_ctx_t ctx, uint32_t mtu); static void bnxt_media_status(if_ctx_t ctx, struct ifmediareq * ifmr); static int bnxt_media_change(if_ctx_t ctx); static int bnxt_promisc_set(if_ctx_t ctx, int flags); static uint64_t bnxt_get_counter(if_ctx_t, ift_counter); static void bnxt_update_admin_status(if_ctx_t ctx); static void bnxt_if_timer(if_ctx_t ctx, uint16_t qid); /* Interrupt enable / disable */ static void bnxt_intr_enable(if_ctx_t ctx); static int bnxt_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid); static int bnxt_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid); static void bnxt_disable_intr(if_ctx_t ctx); static int bnxt_msix_intr_assign(if_ctx_t ctx, int msix); /* vlan support */ static void bnxt_vlan_register(if_ctx_t ctx, uint16_t vtag); static void bnxt_vlan_unregister(if_ctx_t ctx, uint16_t vtag); /* ioctl */ static int bnxt_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data); static int bnxt_shutdown(if_ctx_t ctx); static int bnxt_suspend(if_ctx_t ctx); static int bnxt_resume(if_ctx_t ctx); /* Internal support functions */ static int bnxt_probe_phy(struct bnxt_softc *softc); static void bnxt_add_media_types(struct bnxt_softc *softc); static int bnxt_pci_mapping(struct bnxt_softc *softc); static void bnxt_pci_mapping_free(struct bnxt_softc *softc); static int bnxt_update_link(struct bnxt_softc *softc, bool chng_link_state); static int bnxt_handle_def_cp(void *arg); static int bnxt_handle_rx_cp(void *arg); static void bnxt_clear_ids(struct bnxt_softc *softc); static void inline bnxt_do_enable_intr(struct bnxt_cp_ring *cpr); static void inline bnxt_do_disable_intr(struct bnxt_cp_ring *cpr); static void bnxt_mark_cpr_invalid(struct bnxt_cp_ring *cpr); static void bnxt_def_cp_task(void *context); static void bnxt_handle_async_event(struct bnxt_softc *softc, struct cmpl_base *cmpl); static uint8_t get_phy_type(struct bnxt_softc *softc); static uint64_t bnxt_get_baudrate(struct bnxt_link_info *link); static void bnxt_get_wol_settings(struct bnxt_softc *softc); static int bnxt_wol_config(if_ctx_t ctx); /* * Device Interface Declaration */ static device_method_t bnxt_methods[] = { /* Device interface */ DEVMETHOD(device_register, bnxt_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static driver_t bnxt_driver = { "bnxt", bnxt_methods, sizeof(struct bnxt_softc), }; devclass_t bnxt_devclass; DRIVER_MODULE(bnxt, pci, bnxt_driver, bnxt_devclass, 0, 0); MODULE_DEPEND(bnxt, pci, 1, 1, 1); MODULE_DEPEND(bnxt, ether, 1, 1, 1); MODULE_DEPEND(bnxt, iflib, 1, 1, 1); IFLIB_PNP_INFO(pci, bnxt, bnxt_vendor_info_array); static device_method_t bnxt_iflib_methods[] = { DEVMETHOD(ifdi_tx_queues_alloc, bnxt_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, bnxt_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, bnxt_queues_free), DEVMETHOD(ifdi_attach_pre, bnxt_attach_pre), DEVMETHOD(ifdi_attach_post, bnxt_attach_post), DEVMETHOD(ifdi_detach, bnxt_detach), DEVMETHOD(ifdi_init, bnxt_init), DEVMETHOD(ifdi_stop, bnxt_stop), DEVMETHOD(ifdi_multi_set, bnxt_multi_set), DEVMETHOD(ifdi_mtu_set, bnxt_mtu_set), DEVMETHOD(ifdi_media_status, bnxt_media_status), DEVMETHOD(ifdi_media_change, bnxt_media_change), DEVMETHOD(ifdi_promisc_set, bnxt_promisc_set), DEVMETHOD(ifdi_get_counter, bnxt_get_counter), DEVMETHOD(ifdi_update_admin_status, bnxt_update_admin_status), DEVMETHOD(ifdi_timer, bnxt_if_timer), DEVMETHOD(ifdi_intr_enable, bnxt_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, bnxt_tx_queue_intr_enable), DEVMETHOD(ifdi_rx_queue_intr_enable, bnxt_rx_queue_intr_enable), DEVMETHOD(ifdi_intr_disable, bnxt_disable_intr), DEVMETHOD(ifdi_msix_intr_assign, bnxt_msix_intr_assign), DEVMETHOD(ifdi_vlan_register, bnxt_vlan_register), DEVMETHOD(ifdi_vlan_unregister, bnxt_vlan_unregister), DEVMETHOD(ifdi_priv_ioctl, bnxt_priv_ioctl), DEVMETHOD(ifdi_suspend, bnxt_suspend), DEVMETHOD(ifdi_shutdown, bnxt_shutdown), DEVMETHOD(ifdi_resume, bnxt_resume), DEVMETHOD_END }; static driver_t bnxt_iflib_driver = { "bnxt", bnxt_iflib_methods, sizeof(struct bnxt_softc) }; /* * iflib shared context */ #define BNXT_DRIVER_VERSION "1.0.0.2" char bnxt_driver_version[] = BNXT_DRIVER_VERSION; extern struct if_txrx bnxt_txrx; static struct if_shared_ctx bnxt_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_driver = &bnxt_iflib_driver, .isc_nfl = 2, // Number of Free Lists .isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_NEED_ETHER_PAD, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = BNXT_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = BNXT_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsize = BNXT_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = BNXT_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_rx_maxsize = BNXT_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_rx_maxsegsize = BNXT_TSO_SIZE + sizeof(struct ether_vlan_header), // Only use a single segment to avoid page size constraints .isc_rx_nsegments = 1, .isc_ntxqs = 2, .isc_nrxqs = 3, .isc_nrxd_min = {16, 16, 16}, .isc_nrxd_default = {PAGE_SIZE / sizeof(struct cmpl_base) * 8, PAGE_SIZE / sizeof(struct rx_prod_pkt_bd), PAGE_SIZE / sizeof(struct rx_prod_pkt_bd)}, .isc_nrxd_max = {BNXT_MAX_RXD, BNXT_MAX_RXD, BNXT_MAX_RXD}, .isc_ntxd_min = {16, 16, 16}, .isc_ntxd_default = {PAGE_SIZE / sizeof(struct cmpl_base) * 2, PAGE_SIZE / sizeof(struct tx_bd_short)}, .isc_ntxd_max = {BNXT_MAX_TXD, BNXT_MAX_TXD, BNXT_MAX_TXD}, .isc_admin_intrcnt = 1, .isc_vendor_info = bnxt_vendor_info_array, .isc_driver_version = bnxt_driver_version, }; -if_shared_ctx_t bnxt_sctx = &bnxt_sctx_init; - /* * Device Methods */ static void * bnxt_register(device_t dev) { - return bnxt_sctx; + return (&bnxt_sctx_init); } /* * Device Dependent Configuration Functions */ /* Soft queue setup and teardown */ static int bnxt_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct bnxt_softc *softc; int i; int rc; softc = iflib_get_softc(ctx); softc->tx_cp_rings = malloc(sizeof(struct bnxt_cp_ring) * ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->tx_cp_rings) { device_printf(iflib_get_dev(ctx), "unable to allocate TX completion rings\n"); rc = ENOMEM; goto cp_alloc_fail; } softc->tx_rings = malloc(sizeof(struct bnxt_ring) * ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->tx_rings) { device_printf(iflib_get_dev(ctx), "unable to allocate TX rings\n"); rc = ENOMEM; goto ring_alloc_fail; } rc = iflib_dma_alloc(ctx, sizeof(struct ctx_hw_stats) * ntxqsets, &softc->tx_stats, 0); if (rc) goto dma_alloc_fail; bus_dmamap_sync(softc->tx_stats.idi_tag, softc->tx_stats.idi_map, BUS_DMASYNC_PREREAD); for (i = 0; i < ntxqsets; i++) { /* Set up the completion ring */ softc->tx_cp_rings[i].stats_ctx_id = HWRM_NA_SIGNATURE; softc->tx_cp_rings[i].ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->tx_cp_rings[i].ring.softc = softc; softc->tx_cp_rings[i].ring.id = (softc->scctx->isc_nrxqsets * 2) + 1 + i; softc->tx_cp_rings[i].ring.doorbell = softc->tx_cp_rings[i].ring.id * 0x80; softc->tx_cp_rings[i].ring.ring_size = softc->scctx->isc_ntxd[0]; softc->tx_cp_rings[i].ring.vaddr = vaddrs[i * ntxqs]; softc->tx_cp_rings[i].ring.paddr = paddrs[i * ntxqs]; /* Set up the TX ring */ softc->tx_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->tx_rings[i].softc = softc; softc->tx_rings[i].id = (softc->scctx->isc_nrxqsets * 2) + 1 + i; softc->tx_rings[i].doorbell = softc->tx_rings[i].id * 0x80; softc->tx_rings[i].ring_size = softc->scctx->isc_ntxd[1]; softc->tx_rings[i].vaddr = vaddrs[i * ntxqs + 1]; softc->tx_rings[i].paddr = paddrs[i * ntxqs + 1]; bnxt_create_tx_sysctls(softc, i); } softc->ntxqsets = ntxqsets; return rc; dma_alloc_fail: free(softc->tx_rings, M_DEVBUF); ring_alloc_fail: free(softc->tx_cp_rings, M_DEVBUF); cp_alloc_fail: return rc; } static void bnxt_queues_free(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); // Free TX queues iflib_dma_free(&softc->tx_stats); free(softc->tx_rings, M_DEVBUF); softc->tx_rings = NULL; free(softc->tx_cp_rings, M_DEVBUF); softc->tx_cp_rings = NULL; softc->ntxqsets = 0; // Free RX queues iflib_dma_free(&softc->rx_stats); iflib_dma_free(&softc->hw_tx_port_stats); iflib_dma_free(&softc->hw_rx_port_stats); free(softc->grp_info, M_DEVBUF); free(softc->ag_rings, M_DEVBUF); free(softc->rx_rings, M_DEVBUF); free(softc->rx_cp_rings, M_DEVBUF); } static int bnxt_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct bnxt_softc *softc; int i; int rc; softc = iflib_get_softc(ctx); softc->rx_cp_rings = malloc(sizeof(struct bnxt_cp_ring) * nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->rx_cp_rings) { device_printf(iflib_get_dev(ctx), "unable to allocate RX completion rings\n"); rc = ENOMEM; goto cp_alloc_fail; } softc->rx_rings = malloc(sizeof(struct bnxt_ring) * nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->rx_rings) { device_printf(iflib_get_dev(ctx), "unable to allocate RX rings\n"); rc = ENOMEM; goto ring_alloc_fail; } softc->ag_rings = malloc(sizeof(struct bnxt_ring) * nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->ag_rings) { device_printf(iflib_get_dev(ctx), "unable to allocate aggregation rings\n"); rc = ENOMEM; goto ag_alloc_fail; } softc->grp_info = malloc(sizeof(struct bnxt_grp_info) * nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->grp_info) { device_printf(iflib_get_dev(ctx), "unable to allocate ring groups\n"); rc = ENOMEM; goto grp_alloc_fail; } rc = iflib_dma_alloc(ctx, sizeof(struct ctx_hw_stats) * nrxqsets, &softc->rx_stats, 0); if (rc) goto hw_stats_alloc_fail; bus_dmamap_sync(softc->rx_stats.idi_tag, softc->rx_stats.idi_map, BUS_DMASYNC_PREREAD); /* * Additional 512 bytes for future expansion. * To prevent corruption when loaded with newer firmwares with added counters. * This can be deleted when there will be no further additions of counters. */ #define BNXT_PORT_STAT_PADDING 512 rc = iflib_dma_alloc(ctx, sizeof(struct rx_port_stats) + BNXT_PORT_STAT_PADDING, &softc->hw_rx_port_stats, 0); if (rc) goto hw_port_rx_stats_alloc_fail; bus_dmamap_sync(softc->hw_rx_port_stats.idi_tag, softc->hw_rx_port_stats.idi_map, BUS_DMASYNC_PREREAD); rc = iflib_dma_alloc(ctx, sizeof(struct tx_port_stats) + BNXT_PORT_STAT_PADDING, &softc->hw_tx_port_stats, 0); if (rc) goto hw_port_tx_stats_alloc_fail; bus_dmamap_sync(softc->hw_tx_port_stats.idi_tag, softc->hw_tx_port_stats.idi_map, BUS_DMASYNC_PREREAD); softc->rx_port_stats = (void *) softc->hw_rx_port_stats.idi_vaddr; softc->tx_port_stats = (void *) softc->hw_tx_port_stats.idi_vaddr; for (i = 0; i < nrxqsets; i++) { /* Allocation the completion ring */ softc->rx_cp_rings[i].stats_ctx_id = HWRM_NA_SIGNATURE; softc->rx_cp_rings[i].ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->rx_cp_rings[i].ring.softc = softc; softc->rx_cp_rings[i].ring.id = i + 1; softc->rx_cp_rings[i].ring.doorbell = softc->rx_cp_rings[i].ring.id * 0x80; /* * If this ring overflows, RX stops working. */ softc->rx_cp_rings[i].ring.ring_size = softc->scctx->isc_nrxd[0]; softc->rx_cp_rings[i].ring.vaddr = vaddrs[i * nrxqs]; softc->rx_cp_rings[i].ring.paddr = paddrs[i * nrxqs]; /* Allocate the RX ring */ softc->rx_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->rx_rings[i].softc = softc; softc->rx_rings[i].id = i + 1; softc->rx_rings[i].doorbell = softc->rx_rings[i].id * 0x80; softc->rx_rings[i].ring_size = softc->scctx->isc_nrxd[1]; softc->rx_rings[i].vaddr = vaddrs[i * nrxqs + 1]; softc->rx_rings[i].paddr = paddrs[i * nrxqs + 1]; /* Allocate the TPA start buffer */ softc->rx_rings[i].tpa_start = malloc(sizeof(struct bnxt_full_tpa_start) * (RX_TPA_START_CMPL_AGG_ID_MASK >> RX_TPA_START_CMPL_AGG_ID_SFT), M_DEVBUF, M_NOWAIT | M_ZERO); if (softc->rx_rings[i].tpa_start == NULL) { rc = -ENOMEM; device_printf(softc->dev, "Unable to allocate space for TPA\n"); goto tpa_alloc_fail; } /* Allocate the AG ring */ softc->ag_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->ag_rings[i].softc = softc; softc->ag_rings[i].id = nrxqsets + i + 1; softc->ag_rings[i].doorbell = softc->ag_rings[i].id * 0x80; softc->ag_rings[i].ring_size = softc->scctx->isc_nrxd[2]; softc->ag_rings[i].vaddr = vaddrs[i * nrxqs + 2]; softc->ag_rings[i].paddr = paddrs[i * nrxqs + 2]; /* Allocate the ring group */ softc->grp_info[i].grp_id = (uint16_t)HWRM_NA_SIGNATURE; softc->grp_info[i].stats_ctx = softc->rx_cp_rings[i].stats_ctx_id; softc->grp_info[i].rx_ring_id = softc->rx_rings[i].phys_id; softc->grp_info[i].ag_ring_id = softc->ag_rings[i].phys_id; softc->grp_info[i].cp_ring_id = softc->rx_cp_rings[i].ring.phys_id; bnxt_create_rx_sysctls(softc, i); } /* * When SR-IOV is enabled, avoid each VF sending PORT_QSTATS * HWRM every sec with which firmware timeouts can happen */ if (BNXT_PF(softc)) bnxt_create_port_stats_sysctls(softc); /* And finally, the VNIC */ softc->vnic_info.id = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.flow_id = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.filter_id = -1; softc->vnic_info.def_ring_grp = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.cos_rule = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.lb_rule = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.rx_mask = HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_BCAST; softc->vnic_info.mc_list_count = 0; softc->vnic_info.flags = BNXT_VNIC_FLAG_DEFAULT; rc = iflib_dma_alloc(ctx, BNXT_MAX_MC_ADDRS * ETHER_ADDR_LEN, &softc->vnic_info.mc_list, 0); if (rc) goto mc_list_alloc_fail; /* The VNIC RSS Hash Key */ rc = iflib_dma_alloc(ctx, HW_HASH_KEY_SIZE, &softc->vnic_info.rss_hash_key_tbl, 0); if (rc) goto rss_hash_alloc_fail; bus_dmamap_sync(softc->vnic_info.rss_hash_key_tbl.idi_tag, softc->vnic_info.rss_hash_key_tbl.idi_map, BUS_DMASYNC_PREWRITE); memcpy(softc->vnic_info.rss_hash_key_tbl.idi_vaddr, softc->vnic_info.rss_hash_key, HW_HASH_KEY_SIZE); /* Allocate the RSS tables */ rc = iflib_dma_alloc(ctx, HW_HASH_INDEX_SIZE * sizeof(uint16_t), &softc->vnic_info.rss_grp_tbl, 0); if (rc) goto rss_grp_alloc_fail; bus_dmamap_sync(softc->vnic_info.rss_grp_tbl.idi_tag, softc->vnic_info.rss_grp_tbl.idi_map, BUS_DMASYNC_PREWRITE); memset(softc->vnic_info.rss_grp_tbl.idi_vaddr, 0xff, softc->vnic_info.rss_grp_tbl.idi_size); softc->nrxqsets = nrxqsets; return rc; rss_grp_alloc_fail: iflib_dma_free(&softc->vnic_info.rss_hash_key_tbl); rss_hash_alloc_fail: iflib_dma_free(&softc->vnic_info.mc_list); tpa_alloc_fail: mc_list_alloc_fail: for (i = i - 1; i >= 0; i--) free(softc->rx_rings[i].tpa_start, M_DEVBUF); iflib_dma_free(&softc->hw_tx_port_stats); hw_port_tx_stats_alloc_fail: iflib_dma_free(&softc->hw_rx_port_stats); hw_port_rx_stats_alloc_fail: iflib_dma_free(&softc->rx_stats); hw_stats_alloc_fail: free(softc->grp_info, M_DEVBUF); grp_alloc_fail: free(softc->ag_rings, M_DEVBUF); ag_alloc_fail: free(softc->rx_rings, M_DEVBUF); ring_alloc_fail: free(softc->rx_cp_rings, M_DEVBUF); cp_alloc_fail: return rc; } static void bnxt_free_hwrm_short_cmd_req(struct bnxt_softc *softc) { if (softc->hwrm_short_cmd_req_addr.idi_vaddr) iflib_dma_free(&softc->hwrm_short_cmd_req_addr); softc->hwrm_short_cmd_req_addr.idi_vaddr = NULL; } static int bnxt_alloc_hwrm_short_cmd_req(struct bnxt_softc *softc) { int rc; rc = iflib_dma_alloc(softc->ctx, softc->hwrm_max_req_len, &softc->hwrm_short_cmd_req_addr, BUS_DMA_NOWAIT); return rc; } /* Device setup and teardown */ static int bnxt_attach_pre(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); if_softc_ctx_t scctx; int rc = 0; softc->ctx = ctx; softc->dev = iflib_get_dev(ctx); softc->media = iflib_get_media(ctx); softc->scctx = iflib_get_softc_ctx(ctx); softc->sctx = iflib_get_sctx(ctx); scctx = softc->scctx; /* TODO: Better way of detecting NPAR/VF is needed */ switch (pci_get_device(softc->dev)) { case BCM57402_NPAR: case BCM57404_NPAR: case BCM57406_NPAR: case BCM57407_NPAR: case BCM57412_NPAR1: case BCM57412_NPAR2: case BCM57414_NPAR1: case BCM57414_NPAR2: case BCM57416_NPAR1: case BCM57416_NPAR2: softc->flags |= BNXT_FLAG_NPAR; break; case NETXTREME_C_VF1: case NETXTREME_C_VF2: case NETXTREME_C_VF3: case NETXTREME_E_VF1: case NETXTREME_E_VF2: case NETXTREME_E_VF3: softc->flags |= BNXT_FLAG_VF; break; } pci_enable_busmaster(softc->dev); if (bnxt_pci_mapping(softc)) return (ENXIO); /* HWRM setup/init */ BNXT_HWRM_LOCK_INIT(softc, device_get_nameunit(softc->dev)); rc = bnxt_alloc_hwrm_dma_mem(softc); if (rc) goto dma_fail; /* Get firmware version and compare with driver */ softc->ver_info = malloc(sizeof(struct bnxt_ver_info), M_DEVBUF, M_NOWAIT | M_ZERO); if (softc->ver_info == NULL) { rc = ENOMEM; device_printf(softc->dev, "Unable to allocate space for version info\n"); goto ver_alloc_fail; } /* Default minimum required HWRM version */ softc->ver_info->hwrm_min_major = 1; softc->ver_info->hwrm_min_minor = 2; softc->ver_info->hwrm_min_update = 2; rc = bnxt_hwrm_ver_get(softc); if (rc) { device_printf(softc->dev, "attach: hwrm ver get failed\n"); goto ver_fail; } if (softc->flags & BNXT_FLAG_SHORT_CMD) { rc = bnxt_alloc_hwrm_short_cmd_req(softc); if (rc) goto hwrm_short_cmd_alloc_fail; } /* Get NVRAM info */ if (BNXT_PF(softc)) { softc->nvm_info = malloc(sizeof(struct bnxt_nvram_info), M_DEVBUF, M_NOWAIT | M_ZERO); if (softc->nvm_info == NULL) { rc = ENOMEM; device_printf(softc->dev, "Unable to allocate space for NVRAM info\n"); goto nvm_alloc_fail; } rc = bnxt_hwrm_nvm_get_dev_info(softc, &softc->nvm_info->mfg_id, &softc->nvm_info->device_id, &softc->nvm_info->sector_size, &softc->nvm_info->size, &softc->nvm_info->reserved_size, &softc->nvm_info->available_size); } /* Register the driver with the FW */ rc = bnxt_hwrm_func_drv_rgtr(softc); if (rc) { device_printf(softc->dev, "attach: hwrm drv rgtr failed\n"); goto drv_rgtr_fail; } rc = bnxt_hwrm_func_rgtr_async_events(softc, NULL, 0); if (rc) { device_printf(softc->dev, "attach: hwrm rgtr async evts failed\n"); goto drv_rgtr_fail; } /* Get the HW capabilities */ rc = bnxt_hwrm_func_qcaps(softc); if (rc) goto failed; /* Get the current configuration of this function */ rc = bnxt_hwrm_func_qcfg(softc); if (rc) { device_printf(softc->dev, "attach: hwrm func qcfg failed\n"); goto failed; } iflib_set_mac(ctx, softc->func.mac_addr); scctx->isc_txrx = &bnxt_txrx; scctx->isc_tx_csum_flags = (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_TSO); scctx->isc_capabilities = scctx->isc_capenable = /* These are translated to hwassit bits */ IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 | IFCAP_TSO4 | IFCAP_TSO6 | /* These are checked by iflib */ IFCAP_LRO | IFCAP_VLAN_HWFILTER | /* These are part of the iflib mask */ IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | /* These likely get lost... */ IFCAP_VLAN_HWCSUM | IFCAP_JUMBO_MTU; if (bnxt_wol_supported(softc)) scctx->isc_capenable |= IFCAP_WOL_MAGIC; /* Get the queue config */ rc = bnxt_hwrm_queue_qportcfg(softc); if (rc) { device_printf(softc->dev, "attach: hwrm qportcfg failed\n"); goto failed; } bnxt_get_wol_settings(softc); /* Now perform a function reset */ rc = bnxt_hwrm_func_reset(softc); bnxt_clear_ids(softc); if (rc) goto failed; /* Now set up iflib sc */ scctx->isc_tx_nsegments = 31, scctx->isc_tx_tso_segments_max = 31; scctx->isc_tx_tso_size_max = BNXT_TSO_SIZE; scctx->isc_tx_tso_segsize_max = BNXT_TSO_SIZE; scctx->isc_vectors = softc->func.max_cp_rings; scctx->isc_min_frame_size = BNXT_MIN_FRAME_SIZE; scctx->isc_txrx = &bnxt_txrx; if (scctx->isc_nrxd[0] < ((scctx->isc_nrxd[1] * 4) + scctx->isc_nrxd[2])) device_printf(softc->dev, "WARNING: nrxd0 (%d) should be at least 4 * nrxd1 (%d) + nrxd2 (%d). Driver may be unstable\n", scctx->isc_nrxd[0], scctx->isc_nrxd[1], scctx->isc_nrxd[2]); if (scctx->isc_ntxd[0] < scctx->isc_ntxd[1] * 2) device_printf(softc->dev, "WARNING: ntxd0 (%d) should be at least 2 * ntxd1 (%d). Driver may be unstable\n", scctx->isc_ntxd[0], scctx->isc_ntxd[1]); scctx->isc_txqsizes[0] = sizeof(struct cmpl_base) * scctx->isc_ntxd[0]; scctx->isc_txqsizes[1] = sizeof(struct tx_bd_short) * scctx->isc_ntxd[1]; scctx->isc_rxqsizes[0] = sizeof(struct cmpl_base) * scctx->isc_nrxd[0]; scctx->isc_rxqsizes[1] = sizeof(struct rx_prod_pkt_bd) * scctx->isc_nrxd[1]; scctx->isc_rxqsizes[2] = sizeof(struct rx_prod_pkt_bd) * scctx->isc_nrxd[2]; scctx->isc_nrxqsets_max = min(pci_msix_count(softc->dev)-1, softc->fn_qcfg.alloc_completion_rings - 1); scctx->isc_nrxqsets_max = min(scctx->isc_nrxqsets_max, softc->fn_qcfg.alloc_rx_rings); scctx->isc_nrxqsets_max = min(scctx->isc_nrxqsets_max, softc->fn_qcfg.alloc_vnics); scctx->isc_ntxqsets_max = min(softc->fn_qcfg.alloc_tx_rings, softc->fn_qcfg.alloc_completion_rings - scctx->isc_nrxqsets_max - 1); scctx->isc_rss_table_size = HW_HASH_INDEX_SIZE; scctx->isc_rss_table_mask = scctx->isc_rss_table_size - 1; /* iflib will map and release this bar */ scctx->isc_msix_bar = pci_msix_table_bar(softc->dev); /* * Default settings for HW LRO (TPA): * Disable HW LRO by default * Can be enabled after taking care of 'packet forwarding' */ softc->hw_lro.enable = 0; softc->hw_lro.is_mode_gro = 0; softc->hw_lro.max_agg_segs = 5; /* 2^5 = 32 segs */ softc->hw_lro.max_aggs = HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_MAX; softc->hw_lro.min_agg_len = 512; /* Allocate the default completion ring */ softc->def_cp_ring.stats_ctx_id = HWRM_NA_SIGNATURE; softc->def_cp_ring.ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->def_cp_ring.ring.softc = softc; softc->def_cp_ring.ring.id = 0; softc->def_cp_ring.ring.doorbell = softc->def_cp_ring.ring.id * 0x80; softc->def_cp_ring.ring.ring_size = PAGE_SIZE / sizeof(struct cmpl_base); rc = iflib_dma_alloc(ctx, sizeof(struct cmpl_base) * softc->def_cp_ring.ring.ring_size, &softc->def_cp_ring_mem, 0); softc->def_cp_ring.ring.vaddr = softc->def_cp_ring_mem.idi_vaddr; softc->def_cp_ring.ring.paddr = softc->def_cp_ring_mem.idi_paddr; iflib_config_gtask_init(ctx, &softc->def_cp_task, bnxt_def_cp_task, "dflt_cp"); rc = bnxt_init_sysctl_ctx(softc); if (rc) goto init_sysctl_failed; if (BNXT_PF(softc)) { rc = bnxt_create_nvram_sysctls(softc->nvm_info); if (rc) goto failed; } arc4rand(softc->vnic_info.rss_hash_key, HW_HASH_KEY_SIZE, 0); softc->vnic_info.rss_hash_type = HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_IPV4 | HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_TCP_IPV4 | HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_UDP_IPV4 | HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_IPV6 | HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_TCP_IPV6 | HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_UDP_IPV6; rc = bnxt_create_config_sysctls_pre(softc); if (rc) goto failed; rc = bnxt_create_hw_lro_sysctls(softc); if (rc) goto failed; rc = bnxt_create_pause_fc_sysctls(softc); if (rc) goto failed; /* Initialize the vlan list */ SLIST_INIT(&softc->vnic_info.vlan_tags); softc->vnic_info.vlan_tag_list.idi_vaddr = NULL; return (rc); failed: bnxt_free_sysctl_ctx(softc); init_sysctl_failed: bnxt_hwrm_func_drv_unrgtr(softc, false); drv_rgtr_fail: if (BNXT_PF(softc)) free(softc->nvm_info, M_DEVBUF); nvm_alloc_fail: bnxt_free_hwrm_short_cmd_req(softc); hwrm_short_cmd_alloc_fail: ver_fail: free(softc->ver_info, M_DEVBUF); ver_alloc_fail: bnxt_free_hwrm_dma_mem(softc); dma_fail: BNXT_HWRM_LOCK_DESTROY(softc); bnxt_pci_mapping_free(softc); pci_disable_busmaster(softc->dev); return (rc); } static int bnxt_attach_post(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); int rc; bnxt_create_config_sysctls_post(softc); /* Update link state etc... */ rc = bnxt_probe_phy(softc); if (rc) goto failed; /* Needs to be done after probing the phy */ bnxt_create_ver_sysctls(softc); bnxt_add_media_types(softc); ifmedia_set(softc->media, IFM_ETHER | IFM_AUTO); softc->scctx->isc_max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; failed: return rc; } static int bnxt_detach(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct bnxt_vlan_tag *tag; struct bnxt_vlan_tag *tmp; int i; bnxt_wol_config(ctx); bnxt_do_disable_intr(&softc->def_cp_ring); bnxt_free_sysctl_ctx(softc); bnxt_hwrm_func_reset(softc); bnxt_clear_ids(softc); iflib_irq_free(ctx, &softc->def_cp_ring.irq); iflib_config_gtask_deinit(&softc->def_cp_task); /* We need to free() these here... */ for (i = softc->nrxqsets-1; i>=0; i--) { iflib_irq_free(ctx, &softc->rx_cp_rings[i].irq); } iflib_dma_free(&softc->vnic_info.mc_list); iflib_dma_free(&softc->vnic_info.rss_hash_key_tbl); iflib_dma_free(&softc->vnic_info.rss_grp_tbl); if (softc->vnic_info.vlan_tag_list.idi_vaddr) iflib_dma_free(&softc->vnic_info.vlan_tag_list); SLIST_FOREACH_SAFE(tag, &softc->vnic_info.vlan_tags, next, tmp) free(tag, M_DEVBUF); iflib_dma_free(&softc->def_cp_ring_mem); for (i = 0; i < softc->nrxqsets; i++) free(softc->rx_rings[i].tpa_start, M_DEVBUF); free(softc->ver_info, M_DEVBUF); if (BNXT_PF(softc)) free(softc->nvm_info, M_DEVBUF); bnxt_hwrm_func_drv_unrgtr(softc, false); bnxt_free_hwrm_dma_mem(softc); bnxt_free_hwrm_short_cmd_req(softc); BNXT_HWRM_LOCK_DESTROY(softc); pci_disable_busmaster(softc->dev); bnxt_pci_mapping_free(softc); return 0; } /* Device configuration */ static void bnxt_init(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct ifmediareq ifmr; int i, j; int rc; rc = bnxt_hwrm_func_reset(softc); if (rc) return; bnxt_clear_ids(softc); /* Allocate the default completion ring */ softc->def_cp_ring.cons = UINT32_MAX; softc->def_cp_ring.v_bit = 1; bnxt_mark_cpr_invalid(&softc->def_cp_ring); rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL, &softc->def_cp_ring.ring, (uint16_t)HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE, true); if (rc) goto fail; /* And now set the default CP ring as the async CP ring */ rc = bnxt_cfg_async_cr(softc); if (rc) goto fail; for (i = 0; i < softc->nrxqsets; i++) { /* Allocate the statistics context */ rc = bnxt_hwrm_stat_ctx_alloc(softc, &softc->rx_cp_rings[i], softc->rx_stats.idi_paddr + (sizeof(struct ctx_hw_stats) * i)); if (rc) goto fail; /* Allocate the completion ring */ softc->rx_cp_rings[i].cons = UINT32_MAX; softc->rx_cp_rings[i].v_bit = 1; softc->rx_cp_rings[i].last_idx = UINT32_MAX; bnxt_mark_cpr_invalid(&softc->rx_cp_rings[i]); rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL, &softc->rx_cp_rings[i].ring, (uint16_t)HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE, true); if (rc) goto fail; /* Allocate the RX ring */ rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_RX, &softc->rx_rings[i], (uint16_t)HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE, false); if (rc) goto fail; BNXT_RX_DB(&softc->rx_rings[i], 0); /* TODO: Cumulus+ doesn't need the double doorbell */ BNXT_RX_DB(&softc->rx_rings[i], 0); /* Allocate the AG ring */ rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_RX, &softc->ag_rings[i], (uint16_t)HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE, false); if (rc) goto fail; BNXT_RX_DB(&softc->rx_rings[i], 0); /* TODO: Cumulus+ doesn't need the double doorbell */ BNXT_RX_DB(&softc->ag_rings[i], 0); /* Allocate the ring group */ softc->grp_info[i].stats_ctx = softc->rx_cp_rings[i].stats_ctx_id; softc->grp_info[i].rx_ring_id = softc->rx_rings[i].phys_id; softc->grp_info[i].ag_ring_id = softc->ag_rings[i].phys_id; softc->grp_info[i].cp_ring_id = softc->rx_cp_rings[i].ring.phys_id; rc = bnxt_hwrm_ring_grp_alloc(softc, &softc->grp_info[i]); if (rc) goto fail; } /* Allocate the VNIC RSS context */ rc = bnxt_hwrm_vnic_ctx_alloc(softc, &softc->vnic_info.rss_id); if (rc) goto fail; /* Allocate the vnic */ softc->vnic_info.def_ring_grp = softc->grp_info[0].grp_id; softc->vnic_info.mru = softc->scctx->isc_max_frame_size; rc = bnxt_hwrm_vnic_alloc(softc, &softc->vnic_info); if (rc) goto fail; rc = bnxt_hwrm_vnic_cfg(softc, &softc->vnic_info); if (rc) goto fail; rc = bnxt_hwrm_set_filter(softc, &softc->vnic_info); if (rc) goto fail; /* Enable RSS on the VNICs */ for (i = 0, j = 0; i < HW_HASH_INDEX_SIZE; i++) { ((uint16_t *) softc->vnic_info.rss_grp_tbl.idi_vaddr)[i] = htole16(softc->grp_info[j].grp_id); if (++j == softc->nrxqsets) j = 0; } rc = bnxt_hwrm_rss_cfg(softc, &softc->vnic_info, softc->vnic_info.rss_hash_type); if (rc) goto fail; rc = bnxt_hwrm_vnic_tpa_cfg(softc); if (rc) goto fail; for (i = 0; i < softc->ntxqsets; i++) { /* Allocate the statistics context */ rc = bnxt_hwrm_stat_ctx_alloc(softc, &softc->tx_cp_rings[i], softc->tx_stats.idi_paddr + (sizeof(struct ctx_hw_stats) * i)); if (rc) goto fail; /* Allocate the completion ring */ softc->tx_cp_rings[i].cons = UINT32_MAX; softc->tx_cp_rings[i].v_bit = 1; bnxt_mark_cpr_invalid(&softc->tx_cp_rings[i]); rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL, &softc->tx_cp_rings[i].ring, (uint16_t)HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE, false); if (rc) goto fail; /* Allocate the TX ring */ rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_TX, &softc->tx_rings[i], softc->tx_cp_rings[i].ring.phys_id, softc->tx_cp_rings[i].stats_ctx_id, false); if (rc) goto fail; BNXT_TX_DB(&softc->tx_rings[i], 0); /* TODO: Cumulus+ doesn't need the double doorbell */ BNXT_TX_DB(&softc->tx_rings[i], 0); } bnxt_do_enable_intr(&softc->def_cp_ring); bnxt_media_status(softc->ctx, &ifmr); bnxt_hwrm_cfa_l2_set_rx_mask(softc, &softc->vnic_info); return; fail: bnxt_hwrm_func_reset(softc); bnxt_clear_ids(softc); return; } static void bnxt_stop(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); bnxt_do_disable_intr(&softc->def_cp_ring); bnxt_hwrm_func_reset(softc); bnxt_clear_ids(softc); return; } static u_int bnxt_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) { uint8_t *mta = arg; if (cnt == BNXT_MAX_MC_ADDRS) return (1); bcopy(LLADDR(sdl), &mta[cnt * ETHER_ADDR_LEN], ETHER_ADDR_LEN); return (1); } static void bnxt_multi_set(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); uint8_t *mta; int mcnt; mta = softc->vnic_info.mc_list.idi_vaddr; bzero(mta, softc->vnic_info.mc_list.idi_size); mcnt = if_foreach_llmaddr(ifp, bnxt_copy_maddr, mta); if (mcnt > BNXT_MAX_MC_ADDRS) { softc->vnic_info.rx_mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ALL_MCAST; bnxt_hwrm_cfa_l2_set_rx_mask(softc, &softc->vnic_info); } else { softc->vnic_info.rx_mask &= ~HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ALL_MCAST; bus_dmamap_sync(softc->vnic_info.mc_list.idi_tag, softc->vnic_info.mc_list.idi_map, BUS_DMASYNC_PREWRITE); softc->vnic_info.mc_list_count = mcnt; softc->vnic_info.rx_mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_MCAST; if (bnxt_hwrm_cfa_l2_set_rx_mask(softc, &softc->vnic_info)) device_printf(softc->dev, "set_multi: rx_mask set failed\n"); } } static int bnxt_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct bnxt_softc *softc = iflib_get_softc(ctx); if (mtu > BNXT_MAX_MTU) return EINVAL; softc->scctx->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; return 0; } static void bnxt_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct bnxt_link_info *link_info = &softc->link_info; struct ifmedia_entry *next; uint64_t target_baudrate = bnxt_get_baudrate(link_info); int active_media = IFM_UNKNOWN; bnxt_update_link(softc, true); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (link_info->link_up) ifmr->ifm_status |= IFM_ACTIVE; else ifmr->ifm_status &= ~IFM_ACTIVE; if (link_info->duplex == HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_CFG_FULL) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; /* * Go through the list of supported media which got prepared * as part of bnxt_add_media_types() using api ifmedia_add(). */ LIST_FOREACH(next, &(iflib_get_media(ctx)->ifm_list), ifm_list) { if (ifmedia_baudrate(next->ifm_media) == target_baudrate) { active_media = next->ifm_media; break; } } ifmr->ifm_active |= active_media; if (link_info->flow_ctrl.rx) ifmr->ifm_active |= IFM_ETH_RXPAUSE; if (link_info->flow_ctrl.tx) ifmr->ifm_active |= IFM_ETH_TXPAUSE; bnxt_report_link(softc); return; } static int bnxt_media_change(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct ifmedia *ifm = iflib_get_media(ctx); struct ifmediareq ifmr; int rc; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return EINVAL; switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_100_T: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_100MB; break; case IFM_1000_KX: case IFM_1000_T: case IFM_1000_SGMII: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_1GB; break; case IFM_2500_KX: case IFM_2500_T: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_2_5GB; break; case IFM_10G_CR1: case IFM_10G_KR: case IFM_10G_LR: case IFM_10G_SR: case IFM_10G_T: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_10GB; break; case IFM_20G_KR2: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_20GB; break; case IFM_25G_CR: case IFM_25G_KR: case IFM_25G_SR: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_25GB; break; case IFM_40G_CR4: case IFM_40G_KR4: case IFM_40G_LR4: case IFM_40G_SR4: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_40GB; break; case IFM_50G_CR2: case IFM_50G_KR2: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_50GB; break; case IFM_100G_CR4: case IFM_100G_KR4: case IFM_100G_LR4: case IFM_100G_SR4: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_100GB; break; default: device_printf(softc->dev, "Unsupported media type! Using auto\n"); /* Fall-through */ case IFM_AUTO: // Auto softc->link_info.autoneg |= BNXT_AUTONEG_SPEED; break; } rc = bnxt_hwrm_set_link_setting(softc, true, true, true); bnxt_media_status(softc->ctx, &ifmr); return rc; } static int bnxt_promisc_set(if_ctx_t ctx, int flags) { struct bnxt_softc *softc = iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); int rc; if (ifp->if_flags & IFF_ALLMULTI || if_llmaddr_count(ifp) > BNXT_MAX_MC_ADDRS) softc->vnic_info.rx_mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ALL_MCAST; else softc->vnic_info.rx_mask &= ~HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ALL_MCAST; if (ifp->if_flags & IFF_PROMISC) softc->vnic_info.rx_mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_PROMISCUOUS | HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ANYVLAN_NONVLAN; else softc->vnic_info.rx_mask &= ~(HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_PROMISCUOUS | HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ANYVLAN_NONVLAN); rc = bnxt_hwrm_cfa_l2_set_rx_mask(softc, &softc->vnic_info); return rc; } static uint64_t bnxt_get_counter(if_ctx_t ctx, ift_counter cnt) { if_t ifp = iflib_get_ifp(ctx); if (cnt < IFCOUNTERS) return if_get_counter_default(ifp, cnt); return 0; } static void bnxt_update_admin_status(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); /* * When SR-IOV is enabled, avoid each VF sending this HWRM * request every sec with which firmware timeouts can happen */ if (BNXT_PF(softc)) { bnxt_hwrm_port_qstats(softc); } return; } static void bnxt_if_timer(if_ctx_t ctx, uint16_t qid) { struct bnxt_softc *softc = iflib_get_softc(ctx); uint64_t ticks_now = ticks; /* Schedule bnxt_update_admin_status() once per sec */ if (ticks_now - softc->admin_ticks >= hz) { softc->admin_ticks = ticks_now; iflib_admin_intr_deferred(ctx); } return; } static void inline bnxt_do_enable_intr(struct bnxt_cp_ring *cpr) { if (cpr->ring.phys_id != (uint16_t)HWRM_NA_SIGNATURE) { /* First time enabling, do not set index */ if (cpr->cons == UINT32_MAX) BNXT_CP_ENABLE_DB(&cpr->ring); else BNXT_CP_IDX_ENABLE_DB(&cpr->ring, cpr->cons); } } static void inline bnxt_do_disable_intr(struct bnxt_cp_ring *cpr) { if (cpr->ring.phys_id != (uint16_t)HWRM_NA_SIGNATURE) BNXT_CP_DISABLE_DB(&cpr->ring); } /* Enable all interrupts */ static void bnxt_intr_enable(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); int i; bnxt_do_enable_intr(&softc->def_cp_ring); for (i = 0; i < softc->nrxqsets; i++) bnxt_do_enable_intr(&softc->rx_cp_rings[i]); return; } /* Enable interrupt for a single queue */ static int bnxt_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { struct bnxt_softc *softc = iflib_get_softc(ctx); bnxt_do_enable_intr(&softc->tx_cp_rings[qid]); return 0; } static int bnxt_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { struct bnxt_softc *softc = iflib_get_softc(ctx); bnxt_do_enable_intr(&softc->rx_cp_rings[qid]); return 0; } /* Disable all interrupts */ static void bnxt_disable_intr(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); int i; /* * NOTE: These TX interrupts should never get enabled, so don't * update the index */ for (i = 0; i < softc->ntxqsets; i++) bnxt_do_disable_intr(&softc->tx_cp_rings[i]); for (i = 0; i < softc->nrxqsets; i++) bnxt_do_disable_intr(&softc->rx_cp_rings[i]); return; } static int bnxt_msix_intr_assign(if_ctx_t ctx, int msix) { struct bnxt_softc *softc = iflib_get_softc(ctx); int rc; int i; char irq_name[16]; rc = iflib_irq_alloc_generic(ctx, &softc->def_cp_ring.irq, softc->def_cp_ring.ring.id + 1, IFLIB_INTR_ADMIN, bnxt_handle_def_cp, softc, 0, "def_cp"); if (rc) { device_printf(iflib_get_dev(ctx), "Failed to register default completion ring handler\n"); return rc; } for (i=0; iscctx->isc_nrxqsets; i++) { snprintf(irq_name, sizeof(irq_name), "rxq%d", i); rc = iflib_irq_alloc_generic(ctx, &softc->rx_cp_rings[i].irq, softc->rx_cp_rings[i].ring.id + 1, IFLIB_INTR_RXTX, bnxt_handle_rx_cp, &softc->rx_cp_rings[i], i, irq_name); if (rc) { device_printf(iflib_get_dev(ctx), "Failed to register RX completion ring handler\n"); i--; goto fail; } } for (i=0; iscctx->isc_ntxqsets; i++) iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i, "tx_cp"); return rc; fail: for (; i>=0; i--) iflib_irq_free(ctx, &softc->rx_cp_rings[i].irq); iflib_irq_free(ctx, &softc->def_cp_ring.irq); return rc; } /* * We're explicitly allowing duplicates here. They will need to be * removed as many times as they are added. */ static void bnxt_vlan_register(if_ctx_t ctx, uint16_t vtag) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct bnxt_vlan_tag *new_tag; new_tag = malloc(sizeof(struct bnxt_vlan_tag), M_DEVBUF, M_NOWAIT); if (new_tag == NULL) return; new_tag->tag = vtag; new_tag->tpid = 8100; SLIST_INSERT_HEAD(&softc->vnic_info.vlan_tags, new_tag, next); }; static void bnxt_vlan_unregister(if_ctx_t ctx, uint16_t vtag) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct bnxt_vlan_tag *vlan_tag; SLIST_FOREACH(vlan_tag, &softc->vnic_info.vlan_tags, next) { if (vlan_tag->tag == vtag) { SLIST_REMOVE(&softc->vnic_info.vlan_tags, vlan_tag, bnxt_vlan_tag, next); free(vlan_tag, M_DEVBUF); break; } } } static int bnxt_wol_config(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); if (!softc) return -EBUSY; if (!bnxt_wol_supported(softc)) return -ENOTSUP; if (if_getcapabilities(ifp) & IFCAP_WOL_MAGIC) { if (!softc->wol) { if (bnxt_hwrm_alloc_wol_fltr(softc)) return -EBUSY; softc->wol = 1; } } else { if (softc->wol) { if (bnxt_hwrm_free_wol_fltr(softc)) return -EBUSY; softc->wol = 0; } } return 0; } static int bnxt_shutdown(if_ctx_t ctx) { bnxt_wol_config(ctx); return 0; } static int bnxt_suspend(if_ctx_t ctx) { bnxt_wol_config(ctx); return 0; } static int bnxt_resume(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); bnxt_get_wol_settings(softc); return 0; } static int bnxt_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct ifreq *ifr = (struct ifreq *)data; struct bnxt_ioctl_header *ioh; size_t iol; int rc = ENOTSUP; struct bnxt_ioctl_data iod_storage, *iod = &iod_storage; switch (command) { case SIOCGPRIVATE_0: if ((rc = priv_check(curthread, PRIV_DRIVER)) != 0) goto exit; ioh = ifr_buffer_get_buffer(ifr); iol = ifr_buffer_get_length(ifr); if (iol > sizeof(iod_storage)) return (EINVAL); if ((rc = copyin(ioh, iod, iol)) != 0) goto exit; switch (iod->hdr.type) { case BNXT_HWRM_NVM_FIND_DIR_ENTRY: { struct bnxt_ioctl_hwrm_nvm_find_dir_entry *find = &iod->find; rc = bnxt_hwrm_nvm_find_dir_entry(softc, find->type, &find->ordinal, find->ext, &find->index, find->use_index, find->search_opt, &find->data_length, &find->item_length, &find->fw_ver); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_NVM_READ: { struct bnxt_ioctl_hwrm_nvm_read *rd = &iod->read; struct iflib_dma_info dma_data; size_t offset; size_t remain; size_t csize; /* * Some HWRM versions can't read more than 0x8000 bytes */ rc = iflib_dma_alloc(softc->ctx, min(rd->length, 0x8000), &dma_data, BUS_DMA_NOWAIT); if (rc) break; for (remain = rd->length, offset = 0; remain && offset < rd->length; offset += 0x8000) { csize = min(remain, 0x8000); rc = bnxt_hwrm_nvm_read(softc, rd->index, rd->offset + offset, csize, &dma_data); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); break; } else { copyout(dma_data.idi_vaddr, rd->data + offset, csize); iod->hdr.rc = 0; } remain -= csize; } if (iod->hdr.rc == 0) copyout(iod, ioh, iol); iflib_dma_free(&dma_data); rc = 0; goto exit; } case BNXT_HWRM_FW_RESET: { struct bnxt_ioctl_hwrm_fw_reset *rst = &iod->reset; rc = bnxt_hwrm_fw_reset(softc, rst->processor, &rst->selfreset); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_FW_QSTATUS: { struct bnxt_ioctl_hwrm_fw_qstatus *qstat = &iod->status; rc = bnxt_hwrm_fw_qstatus(softc, qstat->processor, &qstat->selfreset); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_NVM_WRITE: { struct bnxt_ioctl_hwrm_nvm_write *wr = &iod->write; rc = bnxt_hwrm_nvm_write(softc, wr->data, true, wr->type, wr->ordinal, wr->ext, wr->attr, wr->option, wr->data_length, wr->keep, &wr->item_length, &wr->index); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_NVM_ERASE_DIR_ENTRY: { struct bnxt_ioctl_hwrm_nvm_erase_dir_entry *erase = &iod->erase; rc = bnxt_hwrm_nvm_erase_dir_entry(softc, erase->index); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_NVM_GET_DIR_INFO: { struct bnxt_ioctl_hwrm_nvm_get_dir_info *info = &iod->dir_info; rc = bnxt_hwrm_nvm_get_dir_info(softc, &info->entries, &info->entry_length); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_NVM_GET_DIR_ENTRIES: { struct bnxt_ioctl_hwrm_nvm_get_dir_entries *get = &iod->dir_entries; struct iflib_dma_info dma_data; rc = iflib_dma_alloc(softc->ctx, get->max_size, &dma_data, BUS_DMA_NOWAIT); if (rc) break; rc = bnxt_hwrm_nvm_get_dir_entries(softc, &get->entries, &get->entry_length, &dma_data); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { copyout(dma_data.idi_vaddr, get->data, get->entry_length * get->entries); iod->hdr.rc = 0; copyout(iod, ioh, iol); } iflib_dma_free(&dma_data); rc = 0; goto exit; } case BNXT_HWRM_NVM_VERIFY_UPDATE: { struct bnxt_ioctl_hwrm_nvm_verify_update *vrfy = &iod->verify; rc = bnxt_hwrm_nvm_verify_update(softc, vrfy->type, vrfy->ordinal, vrfy->ext); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_NVM_INSTALL_UPDATE: { struct bnxt_ioctl_hwrm_nvm_install_update *inst = &iod->install; rc = bnxt_hwrm_nvm_install_update(softc, inst->install_type, &inst->installed_items, &inst->result, &inst->problem_item, &inst->reset_required); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_NVM_MODIFY: { struct bnxt_ioctl_hwrm_nvm_modify *mod = &iod->modify; rc = bnxt_hwrm_nvm_modify(softc, mod->index, mod->offset, mod->data, true, mod->length); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_FW_GET_TIME: { struct bnxt_ioctl_hwrm_fw_get_time *gtm = &iod->get_time; rc = bnxt_hwrm_fw_get_time(softc, >m->year, >m->month, >m->day, >m->hour, >m->minute, >m->second, >m->millisecond, >m->zone); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } case BNXT_HWRM_FW_SET_TIME: { struct bnxt_ioctl_hwrm_fw_set_time *stm = &iod->set_time; rc = bnxt_hwrm_fw_set_time(softc, stm->year, stm->month, stm->day, stm->hour, stm->minute, stm->second, stm->millisecond, stm->zone); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, iol); } rc = 0; goto exit; } } break; } exit: return rc; } /* * Support functions */ static int bnxt_probe_phy(struct bnxt_softc *softc) { struct bnxt_link_info *link_info = &softc->link_info; int rc = 0; rc = bnxt_update_link(softc, false); if (rc) { device_printf(softc->dev, "Probe phy can't update link (rc: %x)\n", rc); return (rc); } /*initialize the ethool setting copy with NVM settings */ if (link_info->auto_mode != HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_NONE) link_info->autoneg |= BNXT_AUTONEG_SPEED; link_info->req_duplex = link_info->duplex_setting; if (link_info->autoneg & BNXT_AUTONEG_SPEED) link_info->req_link_speed = link_info->auto_link_speed; else link_info->req_link_speed = link_info->force_link_speed; return (rc); } static void bnxt_add_media_types(struct bnxt_softc *softc) { struct bnxt_link_info *link_info = &softc->link_info; uint16_t supported; uint8_t phy_type = get_phy_type(softc); supported = link_info->support_speeds; /* Auto is always supported */ ifmedia_add(softc->media, IFM_ETHER | IFM_AUTO, 0, NULL); if (softc->flags & BNXT_FLAG_NPAR) return; switch (phy_type) { case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASECR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_BASECR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_25G_BASECR_CA_L: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_25G_BASECR_CA_S: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_25G_BASECR_CA_N: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASECR: BNXT_IFMEDIA_ADD(supported, SPEEDS_100GB, IFM_100G_CR4); BNXT_IFMEDIA_ADD(supported, SPEEDS_50GB, IFM_50G_CR2); BNXT_IFMEDIA_ADD(supported, SPEEDS_40GB, IFM_40G_CR4); BNXT_IFMEDIA_ADD(supported, SPEEDS_25GB, IFM_25G_CR); BNXT_IFMEDIA_ADD(supported, SPEEDS_10GB, IFM_10G_CR1); BNXT_IFMEDIA_ADD(supported, SPEEDS_1GB, IFM_1000_T); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASELR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_BASELR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASELR: BNXT_IFMEDIA_ADD(supported, SPEEDS_100GB, IFM_100G_LR4); BNXT_IFMEDIA_ADD(supported, SPEEDS_40GB, IFM_40G_LR4); BNXT_IFMEDIA_ADD(supported, SPEEDS_25GB, IFM_25G_LR); BNXT_IFMEDIA_ADD(supported, SPEEDS_10GB, IFM_10G_LR); BNXT_IFMEDIA_ADD(supported, SPEEDS_1GB, IFM_1000_LX); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASESR10: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASESR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_BASESR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASESR: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_BASEER4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASEER4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_25G_BASESR: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_1G_BASESX: BNXT_IFMEDIA_ADD(supported, SPEEDS_100GB, IFM_100G_SR4); BNXT_IFMEDIA_ADD(supported, SPEEDS_40GB, IFM_40G_SR4); BNXT_IFMEDIA_ADD(supported, SPEEDS_25GB, IFM_25G_SR); BNXT_IFMEDIA_ADD(supported, SPEEDS_10GB, IFM_10G_SR); BNXT_IFMEDIA_ADD(supported, SPEEDS_1GB, IFM_1000_SX); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR2: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR: BNXT_IFMEDIA_ADD(supported, SPEEDS_100GB, IFM_100G_KR4); BNXT_IFMEDIA_ADD(supported, SPEEDS_50GB, IFM_50G_KR2); BNXT_IFMEDIA_ADD(supported, SPEEDS_40GB, IFM_40G_KR4); BNXT_IFMEDIA_ADD(supported, SPEEDS_25GB, IFM_25G_KR); BNXT_IFMEDIA_ADD(supported, SPEEDS_20GB, IFM_20G_KR2); BNXT_IFMEDIA_ADD(supported, SPEEDS_10GB, IFM_10G_KR); BNXT_IFMEDIA_ADD(supported, SPEEDS_1GB, IFM_1000_KX); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_ACTIVE_CABLE: BNXT_IFMEDIA_ADD(supported, SPEEDS_25GB, IFM_25G_ACC); BNXT_IFMEDIA_ADD(supported, SPEEDS_10GB, IFM_10G_AOC); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_1G_BASECX: BNXT_IFMEDIA_ADD(supported, SPEEDS_1GBHD, IFM_1000_CX); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_1G_BASET: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASET: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASETE: BNXT_IFMEDIA_ADD(supported, SPEEDS_10GB, IFM_10G_T); BNXT_IFMEDIA_ADD(supported, SPEEDS_2_5GB, IFM_2500_T); BNXT_IFMEDIA_ADD(supported, SPEEDS_1GB, IFM_1000_T); BNXT_IFMEDIA_ADD(supported, SPEEDS_100MB, IFM_100_T); BNXT_IFMEDIA_ADD(supported, SPEEDS_10MB, IFM_10_T); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKX: BNXT_IFMEDIA_ADD(supported, SPEEDS_10GB, IFM_10G_KR); BNXT_IFMEDIA_ADD(supported, SPEEDS_2_5GB, IFM_2500_KX); BNXT_IFMEDIA_ADD(supported, SPEEDS_1GB, IFM_1000_KX); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_SGMIIEXTPHY: BNXT_IFMEDIA_ADD(supported, SPEEDS_1GB, IFM_1000_SGMII); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_UNKNOWN: /* Only Autoneg is supported for TYPE_UNKNOWN */ device_printf(softc->dev, "Unknown phy type\n"); break; default: /* Only Autoneg is supported for new phy type values */ device_printf(softc->dev, "phy type %d not supported by driver\n", phy_type); break; } return; } static int bnxt_map_bar(struct bnxt_softc *softc, struct bnxt_bar_info *bar, int bar_num, bool shareable) { uint32_t flag; if (bar->res != NULL) { device_printf(softc->dev, "Bar %d already mapped\n", bar_num); return EDOOFUS; } bar->rid = PCIR_BAR(bar_num); flag = RF_ACTIVE; if (shareable) flag |= RF_SHAREABLE; if ((bar->res = bus_alloc_resource_any(softc->dev, SYS_RES_MEMORY, &bar->rid, flag)) == NULL) { device_printf(softc->dev, "PCI BAR%d mapping failure\n", bar_num); return (ENXIO); } bar->tag = rman_get_bustag(bar->res); bar->handle = rman_get_bushandle(bar->res); bar->size = rman_get_size(bar->res); return 0; } static int bnxt_pci_mapping(struct bnxt_softc *softc) { int rc; rc = bnxt_map_bar(softc, &softc->hwrm_bar, 0, true); if (rc) return rc; rc = bnxt_map_bar(softc, &softc->doorbell_bar, 2, false); return rc; } static void bnxt_pci_mapping_free(struct bnxt_softc *softc) { if (softc->hwrm_bar.res != NULL) bus_release_resource(softc->dev, SYS_RES_MEMORY, softc->hwrm_bar.rid, softc->hwrm_bar.res); softc->hwrm_bar.res = NULL; if (softc->doorbell_bar.res != NULL) bus_release_resource(softc->dev, SYS_RES_MEMORY, softc->doorbell_bar.rid, softc->doorbell_bar.res); softc->doorbell_bar.res = NULL; } static int bnxt_update_link(struct bnxt_softc *softc, bool chng_link_state) { struct bnxt_link_info *link_info = &softc->link_info; uint8_t link_up = link_info->link_up; int rc = 0; rc = bnxt_hwrm_port_phy_qcfg(softc); if (rc) goto exit; /* TODO: need to add more logic to report VF link */ if (chng_link_state) { if (link_info->phy_link_status == HWRM_PORT_PHY_QCFG_OUTPUT_LINK_LINK) link_info->link_up = 1; else link_info->link_up = 0; if (link_up != link_info->link_up) bnxt_report_link(softc); } else { /* always link down if not require to update link state */ link_info->link_up = 0; } exit: return rc; } void bnxt_report_link(struct bnxt_softc *softc) { struct bnxt_link_info *link_info = &softc->link_info; const char *duplex = NULL, *flow_ctrl = NULL; if (link_info->link_up == link_info->last_link_up) { if (!link_info->link_up) return; if ((link_info->duplex == link_info->last_duplex) && (!(BNXT_IS_FLOW_CTRL_CHANGED(link_info)))) return; } if (link_info->link_up) { if (link_info->duplex == HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_CFG_FULL) duplex = "full duplex"; else duplex = "half duplex"; if (link_info->flow_ctrl.tx & link_info->flow_ctrl.rx) flow_ctrl = "FC - receive & transmit"; else if (link_info->flow_ctrl.tx) flow_ctrl = "FC - transmit"; else if (link_info->flow_ctrl.rx) flow_ctrl = "FC - receive"; else flow_ctrl = "FC - none"; iflib_link_state_change(softc->ctx, LINK_STATE_UP, IF_Gbps(100)); device_printf(softc->dev, "Link is UP %s, %s - %d Mbps \n", duplex, flow_ctrl, (link_info->link_speed * 100)); } else { iflib_link_state_change(softc->ctx, LINK_STATE_DOWN, bnxt_get_baudrate(&softc->link_info)); device_printf(softc->dev, "Link is Down\n"); } link_info->last_link_up = link_info->link_up; link_info->last_duplex = link_info->duplex; link_info->last_flow_ctrl.tx = link_info->flow_ctrl.tx; link_info->last_flow_ctrl.rx = link_info->flow_ctrl.rx; link_info->last_flow_ctrl.autoneg = link_info->flow_ctrl.autoneg; /* update media types */ ifmedia_removeall(softc->media); bnxt_add_media_types(softc); ifmedia_set(softc->media, IFM_ETHER | IFM_AUTO); } static int bnxt_handle_rx_cp(void *arg) { struct bnxt_cp_ring *cpr = arg; /* Disable further interrupts for this queue */ BNXT_CP_DISABLE_DB(&cpr->ring); return FILTER_SCHEDULE_THREAD; } static int bnxt_handle_def_cp(void *arg) { struct bnxt_softc *softc = arg; BNXT_CP_DISABLE_DB(&softc->def_cp_ring.ring); GROUPTASK_ENQUEUE(&softc->def_cp_task); return FILTER_HANDLED; } static void bnxt_clear_ids(struct bnxt_softc *softc) { int i; softc->def_cp_ring.stats_ctx_id = HWRM_NA_SIGNATURE; softc->def_cp_ring.ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; for (i = 0; i < softc->ntxqsets; i++) { softc->tx_cp_rings[i].stats_ctx_id = HWRM_NA_SIGNATURE; softc->tx_cp_rings[i].ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->tx_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; } for (i = 0; i < softc->nrxqsets; i++) { softc->rx_cp_rings[i].stats_ctx_id = HWRM_NA_SIGNATURE; softc->rx_cp_rings[i].ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->rx_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->ag_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->grp_info[i].grp_id = (uint16_t)HWRM_NA_SIGNATURE; } softc->vnic_info.filter_id = -1; softc->vnic_info.id = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.rss_id = (uint16_t)HWRM_NA_SIGNATURE; memset(softc->vnic_info.rss_grp_tbl.idi_vaddr, 0xff, softc->vnic_info.rss_grp_tbl.idi_size); } static void bnxt_mark_cpr_invalid(struct bnxt_cp_ring *cpr) { struct cmpl_base *cmp = (void *)cpr->ring.vaddr; int i; for (i = 0; i < cpr->ring.ring_size; i++) cmp[i].info3_v = !cpr->v_bit; } static void bnxt_handle_async_event(struct bnxt_softc *softc, struct cmpl_base *cmpl) { struct hwrm_async_event_cmpl *ae = (void *)cmpl; uint16_t async_id = le16toh(ae->event_id); struct ifmediareq ifmr; switch (async_id) { case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE: bnxt_media_status(softc->ctx, &ifmr); break; case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_MTU_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_LOAD: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_FLR: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR: device_printf(softc->dev, "Unhandled async completion type %u\n", async_id); break; default: device_printf(softc->dev, "Unknown async completion type %u\n", async_id); break; } } static void bnxt_def_cp_task(void *context) { if_ctx_t ctx = context; struct bnxt_softc *softc = iflib_get_softc(ctx); struct bnxt_cp_ring *cpr = &softc->def_cp_ring; /* Handle completions on the default completion ring */ struct cmpl_base *cmpl; uint32_t cons = cpr->cons; bool v_bit = cpr->v_bit; bool last_v_bit; uint32_t last_cons; uint16_t type; for (;;) { last_cons = cons; last_v_bit = v_bit; NEXT_CP_CONS_V(&cpr->ring, cons, v_bit); cmpl = &((struct cmpl_base *)cpr->ring.vaddr)[cons]; if (!CMP_VALID(cmpl, v_bit)) break; type = le16toh(cmpl->type) & CMPL_BASE_TYPE_MASK; switch (type) { case CMPL_BASE_TYPE_HWRM_ASYNC_EVENT: bnxt_handle_async_event(softc, cmpl); break; case CMPL_BASE_TYPE_TX_L2: case CMPL_BASE_TYPE_RX_L2: case CMPL_BASE_TYPE_RX_AGG: case CMPL_BASE_TYPE_RX_TPA_START: case CMPL_BASE_TYPE_RX_TPA_END: case CMPL_BASE_TYPE_STAT_EJECT: case CMPL_BASE_TYPE_HWRM_DONE: case CMPL_BASE_TYPE_HWRM_FWD_REQ: case CMPL_BASE_TYPE_HWRM_FWD_RESP: case CMPL_BASE_TYPE_CQ_NOTIFICATION: case CMPL_BASE_TYPE_SRQ_EVENT: case CMPL_BASE_TYPE_DBQ_EVENT: case CMPL_BASE_TYPE_QP_EVENT: case CMPL_BASE_TYPE_FUNC_EVENT: device_printf(softc->dev, "Unhandled completion type %u\n", type); break; default: device_printf(softc->dev, "Unknown completion type %u\n", type); break; } } cpr->cons = last_cons; cpr->v_bit = last_v_bit; BNXT_CP_IDX_ENABLE_DB(&cpr->ring, cpr->cons); } static uint8_t get_phy_type(struct bnxt_softc *softc) { struct bnxt_link_info *link_info = &softc->link_info; uint8_t phy_type = link_info->phy_type; uint16_t supported; if (phy_type != HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_UNKNOWN) return phy_type; /* Deduce the phy type from the media type and supported speeds */ supported = link_info->support_speeds; if (link_info->media_type == HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_TP) return HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASET; if (link_info->media_type == HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_DAC) { if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_2_5GB) return HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKX; if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_20GB) return HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR; return HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASECR; } if (link_info->media_type == HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_FIBRE) return HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASESR; return phy_type; } bool bnxt_check_hwrm_version(struct bnxt_softc *softc) { char buf[16]; sprintf(buf, "%hhu.%hhu.%hhu", softc->ver_info->hwrm_min_major, softc->ver_info->hwrm_min_minor, softc->ver_info->hwrm_min_update); if (softc->ver_info->hwrm_min_major > softc->ver_info->hwrm_if_major) { device_printf(softc->dev, "WARNING: HWRM version %s is too old (older than %s)\n", softc->ver_info->hwrm_if_ver, buf); return false; } else if(softc->ver_info->hwrm_min_major == softc->ver_info->hwrm_if_major) { if (softc->ver_info->hwrm_min_minor > softc->ver_info->hwrm_if_minor) { device_printf(softc->dev, "WARNING: HWRM version %s is too old (older than %s)\n", softc->ver_info->hwrm_if_ver, buf); return false; } else if (softc->ver_info->hwrm_min_minor == softc->ver_info->hwrm_if_minor) { if (softc->ver_info->hwrm_min_update > softc->ver_info->hwrm_if_update) { device_printf(softc->dev, "WARNING: HWRM version %s is too old (older than %s)\n", softc->ver_info->hwrm_if_ver, buf); return false; } } } return true; } static uint64_t bnxt_get_baudrate(struct bnxt_link_info *link) { switch (link->link_speed) { case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_100MB: return IF_Mbps(100); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_1GB: return IF_Gbps(1); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_2GB: return IF_Gbps(2); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_2_5GB: return IF_Mbps(2500); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_10GB: return IF_Gbps(10); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_20GB: return IF_Gbps(20); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_25GB: return IF_Gbps(25); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_40GB: return IF_Gbps(40); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_50GB: return IF_Gbps(50); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_100GB: return IF_Gbps(100); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_10MB: return IF_Mbps(10); } return IF_Gbps(100); } static void bnxt_get_wol_settings(struct bnxt_softc *softc) { uint16_t wol_handle = 0; if (!bnxt_wol_supported(softc)) return; do { wol_handle = bnxt_hwrm_get_wol_fltrs(softc, wol_handle); } while (wol_handle && wol_handle != BNXT_NO_MORE_WOL_FILTERS); } diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index 11d11e7bf790..abc78952a560 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -1,4634 +1,4630 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2016 Nicole Graziano * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #include "if_em.h" #include #include #define em_mac_min e1000_82547 #define igb_mac_min e1000_82575 /********************************************************************* * Driver version: *********************************************************************/ char em_driver_version[] = "7.6.1-k"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static pci_vendor_info_t em_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection - Legacy em*/ PVID(0x8086, E1000_DEV_ID_82540EM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EM_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EP, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EP_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EP_LP, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541EI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541ER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541ER_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541EI_MOBILE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541GI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541GI_LF, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541GI_MOBILE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82542, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82543GC_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82543GC_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544EI_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544EI_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544GC_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544GC_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545EM_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545EM_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545GM_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545GM_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545GM_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546EB_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546EB_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_PCIE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82547EI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82547EI_MOBILE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82547GI, "Intel(R) PRO/1000 Network Connection"), /* Intel(R) PRO/1000 Network Connection - em */ PVID(0x8086, E1000_DEV_ID_82571EB_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82573E, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82573E_IAMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82573L, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82583V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_C, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE_GT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE_G, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_M, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_82567V_3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_C, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE_GT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE_G, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_BM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82574L, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82574LA, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_LF, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_LF, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_M_HV_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_M_HV_LC, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_D_HV_DM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_D_HV_DC, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH2_LV_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH2_LV_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPT_I217_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_LM2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_V2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_LM3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_V3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CNP_I219_LM6, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CNP_I219_V6, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CNP_I219_LM7, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CNP_I219_V7, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_LM8, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_V8, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_LM9, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_V9, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CMP_I219_LM10, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CMP_I219_V10, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CMP_I219_LM11, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CMP_I219_V11, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CMP_I219_LM12, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CMP_I219_V12, "Intel(R) PRO/1000 Network Connection"), /* required last entry */ PVID_END }; static pci_vendor_info_t igb_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection - igb */ PVID(0x8086, E1000_DEV_ID_82575EB_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_NS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_NS_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_SERDES_QUAD, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_QUAD_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_VF, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_COPPER_DUAL, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_QUAD_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SFP, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_VF, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_IT, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_OEM1, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I211_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I354_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), /* required last entry */ PVID_END }; /********************************************************************* * Function prototypes *********************************************************************/ static void *em_register(device_t dev); static void *igb_register(device_t dev); static int em_if_attach_pre(if_ctx_t ctx); static int em_if_attach_post(if_ctx_t ctx); static int em_if_detach(if_ctx_t ctx); static int em_if_shutdown(if_ctx_t ctx); static int em_if_suspend(if_ctx_t ctx); static int em_if_resume(if_ctx_t ctx); static int em_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int em_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets); static void em_if_queues_free(if_ctx_t ctx); static uint64_t em_if_get_counter(if_ctx_t, ift_counter); static void em_if_init(if_ctx_t ctx); static void em_if_stop(if_ctx_t ctx); static void em_if_media_status(if_ctx_t, struct ifmediareq *); static int em_if_media_change(if_ctx_t ctx); static int em_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void em_if_timer(if_ctx_t ctx, uint16_t qid); static void em_if_vlan_register(if_ctx_t ctx, u16 vtag); static void em_if_vlan_unregister(if_ctx_t ctx, u16 vtag); static void em_if_watchdog_reset(if_ctx_t ctx); static bool em_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event); static void em_identify_hardware(if_ctx_t ctx); static int em_allocate_pci_resources(if_ctx_t ctx); static void em_free_pci_resources(if_ctx_t ctx); static void em_reset(if_ctx_t ctx); static int em_setup_interface(if_ctx_t ctx); static int em_setup_msix(if_ctx_t ctx); static void em_initialize_transmit_unit(if_ctx_t ctx); static void em_initialize_receive_unit(if_ctx_t ctx); static void em_if_intr_enable(if_ctx_t ctx); static void em_if_intr_disable(if_ctx_t ctx); static void igb_if_intr_enable(if_ctx_t ctx); static void igb_if_intr_disable(if_ctx_t ctx); static int em_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static int igb_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int igb_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static void em_if_multi_set(if_ctx_t ctx); static void em_if_update_admin_status(if_ctx_t ctx); static void em_if_debug(if_ctx_t ctx); static void em_update_stats_counters(struct adapter *); static void em_add_hw_stats(struct adapter *adapter); static int em_if_set_promisc(if_ctx_t ctx, int flags); static void em_setup_vlan_hw_support(struct adapter *); static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void em_print_nvm_info(struct adapter *); static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS); static int em_get_rs(SYSCTL_HANDLER_ARGS); static void em_print_debug_info(struct adapter *); static int em_is_valid_ether_addr(u8 *); static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS); static void em_add_int_delay_sysctl(struct adapter *, const char *, const char *, struct em_int_delay_info *, int, int); /* Management and WOL Support */ static void em_init_manageability(struct adapter *); static void em_release_manageability(struct adapter *); static void em_get_hw_control(struct adapter *); static void em_release_hw_control(struct adapter *); static void em_get_wakeup(if_ctx_t ctx); static void em_enable_wakeup(if_ctx_t ctx); static int em_enable_phy_wakeup(struct adapter *); static void em_disable_aspm(struct adapter *); int em_intr(void *arg); static void em_disable_promisc(if_ctx_t ctx); /* MSI-X handlers */ static int em_if_msix_intr_assign(if_ctx_t, int); static int em_msix_link(void *); static void em_handle_link(void *context); static void em_enable_vectors_82574(if_ctx_t); static int em_set_flowcntl(SYSCTL_HANDLER_ARGS); static int em_sysctl_eee(SYSCTL_HANDLER_ARGS); static void em_if_led_func(if_ctx_t ctx, int onoff); static int em_get_regs(SYSCTL_HANDLER_ARGS); static void lem_smartspeed(struct adapter *adapter); static void igb_configure_queues(struct adapter *adapter); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t em_methods[] = { /* Device interface */ DEVMETHOD(device_register, em_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static device_method_t igb_methods[] = { /* Device interface */ DEVMETHOD(device_register, igb_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static driver_t em_driver = { "em", em_methods, sizeof(struct adapter), }; static devclass_t em_devclass; DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0); MODULE_DEPEND(em, pci, 1, 1, 1); MODULE_DEPEND(em, ether, 1, 1, 1); MODULE_DEPEND(em, iflib, 1, 1, 1); IFLIB_PNP_INFO(pci, em, em_vendor_info_array); static driver_t igb_driver = { "igb", igb_methods, sizeof(struct adapter), }; static devclass_t igb_devclass; DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0); MODULE_DEPEND(igb, pci, 1, 1, 1); MODULE_DEPEND(igb, ether, 1, 1, 1); MODULE_DEPEND(igb, iflib, 1, 1, 1); IFLIB_PNP_INFO(pci, igb, igb_vendor_info_array); static device_method_t em_if_methods[] = { DEVMETHOD(ifdi_attach_pre, em_if_attach_pre), DEVMETHOD(ifdi_attach_post, em_if_attach_post), DEVMETHOD(ifdi_detach, em_if_detach), DEVMETHOD(ifdi_shutdown, em_if_shutdown), DEVMETHOD(ifdi_suspend, em_if_suspend), DEVMETHOD(ifdi_resume, em_if_resume), DEVMETHOD(ifdi_init, em_if_init), DEVMETHOD(ifdi_stop, em_if_stop), DEVMETHOD(ifdi_msix_intr_assign, em_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, em_if_intr_enable), DEVMETHOD(ifdi_intr_disable, em_if_intr_disable), DEVMETHOD(ifdi_tx_queues_alloc, em_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, em_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, em_if_queues_free), DEVMETHOD(ifdi_update_admin_status, em_if_update_admin_status), DEVMETHOD(ifdi_multi_set, em_if_multi_set), DEVMETHOD(ifdi_media_status, em_if_media_status), DEVMETHOD(ifdi_media_change, em_if_media_change), DEVMETHOD(ifdi_mtu_set, em_if_mtu_set), DEVMETHOD(ifdi_promisc_set, em_if_set_promisc), DEVMETHOD(ifdi_timer, em_if_timer), DEVMETHOD(ifdi_watchdog_reset, em_if_watchdog_reset), DEVMETHOD(ifdi_vlan_register, em_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, em_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, em_if_get_counter), DEVMETHOD(ifdi_led_func, em_if_led_func), DEVMETHOD(ifdi_rx_queue_intr_enable, em_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, em_if_tx_queue_intr_enable), DEVMETHOD(ifdi_debug, em_if_debug), DEVMETHOD(ifdi_needs_restart, em_if_needs_restart), DEVMETHOD_END }; static driver_t em_if_driver = { "em_if", em_if_methods, sizeof(struct adapter) }; static device_method_t igb_if_methods[] = { DEVMETHOD(ifdi_attach_pre, em_if_attach_pre), DEVMETHOD(ifdi_attach_post, em_if_attach_post), DEVMETHOD(ifdi_detach, em_if_detach), DEVMETHOD(ifdi_shutdown, em_if_shutdown), DEVMETHOD(ifdi_suspend, em_if_suspend), DEVMETHOD(ifdi_resume, em_if_resume), DEVMETHOD(ifdi_init, em_if_init), DEVMETHOD(ifdi_stop, em_if_stop), DEVMETHOD(ifdi_msix_intr_assign, em_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, igb_if_intr_enable), DEVMETHOD(ifdi_intr_disable, igb_if_intr_disable), DEVMETHOD(ifdi_tx_queues_alloc, em_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, em_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, em_if_queues_free), DEVMETHOD(ifdi_update_admin_status, em_if_update_admin_status), DEVMETHOD(ifdi_multi_set, em_if_multi_set), DEVMETHOD(ifdi_media_status, em_if_media_status), DEVMETHOD(ifdi_media_change, em_if_media_change), DEVMETHOD(ifdi_mtu_set, em_if_mtu_set), DEVMETHOD(ifdi_promisc_set, em_if_set_promisc), DEVMETHOD(ifdi_timer, em_if_timer), DEVMETHOD(ifdi_watchdog_reset, em_if_watchdog_reset), DEVMETHOD(ifdi_vlan_register, em_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, em_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, em_if_get_counter), DEVMETHOD(ifdi_led_func, em_if_led_func), DEVMETHOD(ifdi_rx_queue_intr_enable, igb_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, igb_if_tx_queue_intr_enable), DEVMETHOD(ifdi_debug, em_if_debug), DEVMETHOD(ifdi_needs_restart, em_if_needs_restart), DEVMETHOD_END }; static driver_t igb_if_driver = { "igb_if", igb_if_methods, sizeof(struct adapter) }; /********************************************************************* * Tunable default values. *********************************************************************/ #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) #define MAX_INTS_PER_SEC 8000 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) /* Allow common code without TSO */ #ifndef CSUM_TSO #define CSUM_TSO 0 #endif static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "EM driver parameters"); static int em_disable_crc_stripping = 0; SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN, &em_disable_crc_stripping, 0, "Disable CRC Stripping"); static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt, 0, "Default transmit interrupt delay in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt, 0, "Default receive interrupt delay in usecs"); static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN, &em_tx_abs_int_delay_dflt, 0, "Default transmit interrupt delay limit in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN, &em_rx_abs_int_delay_dflt, 0, "Default receive interrupt delay limit in usecs"); static int em_smart_pwr_down = FALSE; SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down, 0, "Set to true to leave smart power down enabled on newer adapters"); /* Controls whether promiscuous also shows bad packets */ static int em_debug_sbp = TRUE; SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0, "Show bad packets in promiscuous mode"); /* How many packets rxeof tries to clean at a time */ static int em_rx_process_limit = 100; SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &em_rx_process_limit, 0, "Maximum number of received packets to process " "at a time, -1 means unlimited"); /* Energy efficient ethernet - default to OFF */ static int eee_setting = 1; SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0, "Enable Energy Efficient Ethernet"); /* ** Tuneable Interrupt rate */ static int em_max_interrupt_rate = 8000; SYSCTL_INT(_hw_em, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &em_max_interrupt_rate, 0, "Maximum interrupts per second"); /* Global used in WOL setup with multiport cards */ static int global_quad_port_a = 0; extern struct if_txrx igb_txrx; extern struct if_txrx em_txrx; extern struct if_txrx lem_txrx; static struct if_shared_ctx em_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = EM_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = PAGE_SIZE, .isc_tso_maxsize = EM_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = EM_TSO_SEG_SIZE, .isc_rx_maxsize = MJUM9BYTES, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = MJUM9BYTES, .isc_nfl = 1, .isc_nrxqs = 1, .isc_ntxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = em_vendor_info_array, .isc_driver_version = em_driver_version, .isc_driver = &em_if_driver, .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM, .isc_nrxd_min = {EM_MIN_RXD}, .isc_ntxd_min = {EM_MIN_TXD}, .isc_nrxd_max = {EM_MAX_RXD}, .isc_ntxd_max = {EM_MAX_TXD}, .isc_nrxd_default = {EM_DEFAULT_RXD}, .isc_ntxd_default = {EM_DEFAULT_TXD}, }; -if_shared_ctx_t em_sctx = &em_sctx_init; - static struct if_shared_ctx igb_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = EM_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = PAGE_SIZE, .isc_tso_maxsize = EM_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = EM_TSO_SEG_SIZE, .isc_rx_maxsize = MJUM9BYTES, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = MJUM9BYTES, .isc_nfl = 1, .isc_nrxqs = 1, .isc_ntxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = igb_vendor_info_array, .isc_driver_version = em_driver_version, .isc_driver = &igb_if_driver, .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM, .isc_nrxd_min = {EM_MIN_RXD}, .isc_ntxd_min = {EM_MIN_TXD}, .isc_nrxd_max = {IGB_MAX_RXD}, .isc_ntxd_max = {IGB_MAX_TXD}, .isc_nrxd_default = {EM_DEFAULT_RXD}, .isc_ntxd_default = {EM_DEFAULT_TXD}, }; -if_shared_ctx_t igb_sctx = &igb_sctx_init; - /***************************************************************** * * Dump Registers * ****************************************************************/ #define IGB_REGS_LEN 739 static int em_get_regs(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct e1000_hw *hw = &adapter->hw; struct sbuf *sb; u32 *regs_buff; int rc; regs_buff = malloc(sizeof(u32) * IGB_REGS_LEN, M_DEVBUF, M_WAITOK); memset(regs_buff, 0, IGB_REGS_LEN * sizeof(u32)); rc = sysctl_wire_old_buffer(req, 0); MPASS(rc == 0); if (rc != 0) { free(regs_buff, M_DEVBUF); return (rc); } sb = sbuf_new_for_sysctl(NULL, NULL, 32*400, req); MPASS(sb != NULL); if (sb == NULL) { free(regs_buff, M_DEVBUF); return (ENOMEM); } /* General Registers */ regs_buff[0] = E1000_READ_REG(hw, E1000_CTRL); regs_buff[1] = E1000_READ_REG(hw, E1000_STATUS); regs_buff[2] = E1000_READ_REG(hw, E1000_CTRL_EXT); regs_buff[3] = E1000_READ_REG(hw, E1000_ICR); regs_buff[4] = E1000_READ_REG(hw, E1000_RCTL); regs_buff[5] = E1000_READ_REG(hw, E1000_RDLEN(0)); regs_buff[6] = E1000_READ_REG(hw, E1000_RDH(0)); regs_buff[7] = E1000_READ_REG(hw, E1000_RDT(0)); regs_buff[8] = E1000_READ_REG(hw, E1000_RXDCTL(0)); regs_buff[9] = E1000_READ_REG(hw, E1000_RDBAL(0)); regs_buff[10] = E1000_READ_REG(hw, E1000_RDBAH(0)); regs_buff[11] = E1000_READ_REG(hw, E1000_TCTL); regs_buff[12] = E1000_READ_REG(hw, E1000_TDBAL(0)); regs_buff[13] = E1000_READ_REG(hw, E1000_TDBAH(0)); regs_buff[14] = E1000_READ_REG(hw, E1000_TDLEN(0)); regs_buff[15] = E1000_READ_REG(hw, E1000_TDH(0)); regs_buff[16] = E1000_READ_REG(hw, E1000_TDT(0)); regs_buff[17] = E1000_READ_REG(hw, E1000_TXDCTL(0)); regs_buff[18] = E1000_READ_REG(hw, E1000_TDFH); regs_buff[19] = E1000_READ_REG(hw, E1000_TDFT); regs_buff[20] = E1000_READ_REG(hw, E1000_TDFHS); regs_buff[21] = E1000_READ_REG(hw, E1000_TDFPC); sbuf_printf(sb, "General Registers\n"); sbuf_printf(sb, "\tCTRL\t %08x\n", regs_buff[0]); sbuf_printf(sb, "\tSTATUS\t %08x\n", regs_buff[1]); sbuf_printf(sb, "\tCTRL_EXIT\t %08x\n\n", regs_buff[2]); sbuf_printf(sb, "Interrupt Registers\n"); sbuf_printf(sb, "\tICR\t %08x\n\n", regs_buff[3]); sbuf_printf(sb, "RX Registers\n"); sbuf_printf(sb, "\tRCTL\t %08x\n", regs_buff[4]); sbuf_printf(sb, "\tRDLEN\t %08x\n", regs_buff[5]); sbuf_printf(sb, "\tRDH\t %08x\n", regs_buff[6]); sbuf_printf(sb, "\tRDT\t %08x\n", regs_buff[7]); sbuf_printf(sb, "\tRXDCTL\t %08x\n", regs_buff[8]); sbuf_printf(sb, "\tRDBAL\t %08x\n", regs_buff[9]); sbuf_printf(sb, "\tRDBAH\t %08x\n\n", regs_buff[10]); sbuf_printf(sb, "TX Registers\n"); sbuf_printf(sb, "\tTCTL\t %08x\n", regs_buff[11]); sbuf_printf(sb, "\tTDBAL\t %08x\n", regs_buff[12]); sbuf_printf(sb, "\tTDBAH\t %08x\n", regs_buff[13]); sbuf_printf(sb, "\tTDLEN\t %08x\n", regs_buff[14]); sbuf_printf(sb, "\tTDH\t %08x\n", regs_buff[15]); sbuf_printf(sb, "\tTDT\t %08x\n", regs_buff[16]); sbuf_printf(sb, "\tTXDCTL\t %08x\n", regs_buff[17]); sbuf_printf(sb, "\tTDFH\t %08x\n", regs_buff[18]); sbuf_printf(sb, "\tTDFT\t %08x\n", regs_buff[19]); sbuf_printf(sb, "\tTDFHS\t %08x\n", regs_buff[20]); sbuf_printf(sb, "\tTDFPC\t %08x\n\n", regs_buff[21]); free(regs_buff, M_DEVBUF); #ifdef DUMP_DESCS { if_softc_ctx_t scctx = adapter->shared; struct rx_ring *rxr = &rx_que->rxr; struct tx_ring *txr = &tx_que->txr; int ntxd = scctx->isc_ntxd[0]; int nrxd = scctx->isc_nrxd[0]; int j; for (j = 0; j < nrxd; j++) { u32 staterr = le32toh(rxr->rx_base[j].wb.upper.status_error); u32 length = le32toh(rxr->rx_base[j].wb.upper.length); sbuf_printf(sb, "\tReceive Descriptor Address %d: %08" PRIx64 " Error:%d Length:%d\n", j, rxr->rx_base[j].read.buffer_addr, staterr, length); } for (j = 0; j < min(ntxd, 256); j++) { unsigned int *ptr = (unsigned int *)&txr->tx_base[j]; sbuf_printf(sb, "\tTXD[%03d] [0]: %08x [1]: %08x [2]: %08x [3]: %08x eop: %d DD=%d\n", j, ptr[0], ptr[1], ptr[2], ptr[3], buf->eop, buf->eop != -1 ? txr->tx_base[buf->eop].upper.fields.status & E1000_TXD_STAT_DD : 0); } } #endif rc = sbuf_finish(sb); sbuf_delete(sb); return(rc); } static void * em_register(device_t dev) { - return (em_sctx); + return (&em_sctx_init); } static void * igb_register(device_t dev) { - return (igb_sctx); + return (&igb_sctx_init); } static int em_set_num_queues(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); int maxqueues; /* Sanity check based on HW */ switch (adapter->hw.mac.type) { case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: maxqueues = 8; break; case e1000_i210: case e1000_82575: maxqueues = 4; break; case e1000_i211: case e1000_82574: maxqueues = 2; break; default: maxqueues = 1; break; } return (maxqueues); } #define LEM_CAPS \ IFCAP_HWCSUM | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_HWCSUM | IFCAP_WOL | IFCAP_VLAN_HWFILTER #define EM_CAPS \ IFCAP_HWCSUM | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_HWCSUM | IFCAP_WOL | IFCAP_VLAN_HWFILTER | IFCAP_TSO4 | \ IFCAP_LRO | IFCAP_VLAN_HWTSO #define IGB_CAPS \ IFCAP_HWCSUM | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_HWCSUM | IFCAP_WOL | IFCAP_VLAN_HWFILTER | IFCAP_TSO4 | \ IFCAP_LRO | IFCAP_VLAN_HWTSO | IFCAP_JUMBO_MTU | IFCAP_HWCSUM_IPV6 |\ IFCAP_TSO6 /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int em_if_attach_pre(if_ctx_t ctx) { struct adapter *adapter; if_softc_ctx_t scctx; device_t dev; struct e1000_hw *hw; int error = 0; INIT_DEBUGOUT("em_if_attach_pre: begin"); dev = iflib_get_dev(ctx); adapter = iflib_get_softc(ctx); adapter->ctx = adapter->osdep.ctx = ctx; adapter->dev = adapter->osdep.dev = dev; scctx = adapter->shared = iflib_get_softc_ctx(ctx); adapter->media = iflib_get_media(ctx); hw = &adapter->hw; adapter->tx_process_limit = scctx->isc_ntxd[0]; /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "nvm", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, em_sysctl_nvm_info, "I", "NVM Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, em_sysctl_debug_info, "I", "Debug Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, em_set_flowcntl, "I", "Flow Control"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "reg_dump", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, adapter, 0, em_get_regs, "A", "Dump Registers"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "rs_dump", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, em_get_rs, "I", "Dump RS indexes"); /* Determine hardware and mac info */ em_identify_hardware(ctx); scctx->isc_tx_nsegments = EM_MAX_SCATTER; scctx->isc_nrxqsets_max = scctx->isc_ntxqsets_max = em_set_num_queues(ctx); if (bootverbose) device_printf(dev, "attach_pre capping queues at %d\n", scctx->isc_ntxqsets_max); if (adapter->hw.mac.type >= igb_mac_min) { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(union e1000_adv_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union e1000_adv_rx_desc), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(union e1000_adv_tx_desc); scctx->isc_rxd_size[0] = sizeof(union e1000_adv_rx_desc); scctx->isc_txrx = &igb_txrx; scctx->isc_tx_tso_segments_max = EM_MAX_SCATTER; scctx->isc_tx_tso_size_max = EM_TSO_SIZE; scctx->isc_tx_tso_segsize_max = EM_TSO_SEG_SIZE; scctx->isc_capabilities = scctx->isc_capenable = IGB_CAPS; scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_IP6_TCP | CSUM_IP6_UDP; if (adapter->hw.mac.type != e1000_82575) scctx->isc_tx_csum_flags |= CSUM_SCTP | CSUM_IP6_SCTP; /* ** Some new devices, as with ixgbe, now may ** use a different BAR, so we need to keep ** track of which is used. */ scctx->isc_msix_bar = pci_msix_table_bar(dev); } else if (adapter->hw.mac.type >= em_mac_min) { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]* sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(struct e1000_tx_desc); scctx->isc_rxd_size[0] = sizeof(union e1000_rx_desc_extended); scctx->isc_txrx = &em_txrx; scctx->isc_tx_tso_segments_max = EM_MAX_SCATTER; scctx->isc_tx_tso_size_max = EM_TSO_SIZE; scctx->isc_tx_tso_segsize_max = EM_TSO_SEG_SIZE; scctx->isc_capabilities = scctx->isc_capenable = EM_CAPS; /* * For EM-class devices, don't enable IFCAP_{TSO4,VLAN_HWTSO} * by default as we don't have workarounds for all associated * silicon errata. E. g., with several MACs such as 82573E, * TSO only works at Gigabit speed and otherwise can cause the * hardware to hang (which also would be next to impossible to * work around given that already queued TSO-using descriptors * would need to be flushed and vlan(4) reconfigured at runtime * in case of a link speed change). Moreover, MACs like 82579 * still can hang at Gigabit even with all publicly documented * TSO workarounds implemented. Generally, the penality of * these workarounds is rather high and may involve copying * mbuf data around so advantages of TSO lapse. Still, TSO may * work for a few MACs of this class - at least when sticking * with Gigabit - in which case users may enable TSO manually. */ scctx->isc_capenable &= ~(IFCAP_TSO4 | IFCAP_VLAN_HWTSO); scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; /* * We support MSI-X with 82574 only, but indicate to iflib(4) * that it shall give MSI at least a try with other devices. */ if (adapter->hw.mac.type == e1000_82574) { scctx->isc_msix_bar = pci_msix_table_bar(dev);; } else { scctx->isc_msix_bar = -1; scctx->isc_disable_msix = 1; } } else { scctx->isc_txqsizes[0] = roundup2((scctx->isc_ntxd[0] + 1) * sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2((scctx->isc_nrxd[0] + 1) * sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(struct e1000_tx_desc); scctx->isc_rxd_size[0] = sizeof(struct e1000_rx_desc); scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP; scctx->isc_txrx = &lem_txrx; scctx->isc_capabilities = scctx->isc_capenable = LEM_CAPS; if (adapter->hw.mac.type < e1000_82543) scctx->isc_capenable &= ~(IFCAP_HWCSUM|IFCAP_VLAN_HWCSUM); /* INTx only */ scctx->isc_msix_bar = 0; } /* Setup PCI resources */ if (em_allocate_pci_resources(ctx)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* ** For ICH8 and family we need to ** map the flash memory, and this ** must happen after the MAC is ** identified */ if ((hw->mac.type == e1000_ich8lan) || (hw->mac.type == e1000_ich9lan) || (hw->mac.type == e1000_ich10lan) || (hw->mac.type == e1000_pchlan) || (hw->mac.type == e1000_pch2lan) || (hw->mac.type == e1000_pch_lpt)) { int rid = EM_BAR_TYPE_FLASH; adapter->flash = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->flash == NULL) { device_printf(dev, "Mapping of Flash failed\n"); error = ENXIO; goto err_pci; } /* This is used in the shared code */ hw->flash_address = (u8 *)adapter->flash; adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash); adapter->osdep.flash_bus_space_handle = rman_get_bushandle(adapter->flash); } /* ** In the new SPT device flash is not a ** separate BAR, rather it is also in BAR0, ** so use the same tag and an offset handle for the ** FLASH read/write macros in the shared code. */ else if (hw->mac.type >= e1000_pch_spt) { adapter->osdep.flash_bus_space_tag = adapter->osdep.mem_bus_space_tag; adapter->osdep.flash_bus_space_handle = adapter->osdep.mem_bus_space_handle + E1000_FLASH_BASE_ADDR; } /* Do Shared Code initialization */ error = e1000_setup_init_funcs(hw, TRUE); if (error) { device_printf(dev, "Setup of Shared code failed, error %d\n", error); error = ENXIO; goto err_pci; } em_setup_msix(ctx); e1000_get_bus_info(hw); /* Set up some sysctls for the tunable interrupt delays */ em_add_int_delay_sysctl(adapter, "rx_int_delay", "receive interrupt delay in usecs", &adapter->rx_int_delay, E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_int_delay", "transmit interrupt delay in usecs", &adapter->tx_int_delay, E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "rx_abs_int_delay", "receive interrupt delay limit in usecs", &adapter->rx_abs_int_delay, E1000_REGISTER(hw, E1000_RADV), em_rx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_abs_int_delay", "transmit interrupt delay limit in usecs", &adapter->tx_abs_int_delay, E1000_REGISTER(hw, E1000_TADV), em_tx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "itr", "interrupt delay limit in usecs/4", &adapter->tx_itr, E1000_REGISTER(hw, E1000_ITR), DEFAULT_ITR); hw->mac.autoneg = DO_AUTO_NEG; hw->phy.autoneg_wait_to_complete = FALSE; hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; if (adapter->hw.mac.type < em_mac_min) { e1000_init_script_state_82541(&adapter->hw, TRUE); e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE); } /* Copper options */ if (hw->phy.media_type == e1000_media_type_copper) { hw->phy.mdix = AUTO_ALL_MODES; hw->phy.disable_polarity_correction = FALSE; hw->phy.ms_type = EM_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ scctx->isc_max_frame_size = adapter->hw.mac.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; /* * This controls when hardware reports transmit completion * status. */ hw->mac.report_tx_early = 1; /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETHER_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Check SOL/IDER usage */ if (e1000_check_reset_block(hw)) device_printf(dev, "PHY reset is blocked" " due to SOL/IDER session.\n"); /* Sysctl for setting Energy Efficient Ethernet */ hw->dev_spec.ich8lan.eee_disable = eee_setting; SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "eee_control", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, em_sysctl_eee, "I", "Disable Energy Efficient Ethernet"); /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(hw); /* Make sure we have a good EEPROM before we read from it */ if (e1000_validate_nvm_checksum(hw) < 0) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } } /* Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_late; } if (!em_is_valid_ether_addr(hw->mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_late; } /* Disable ULP support */ e1000_disable_ulp_lpt_lp(hw, TRUE); /* * Get Wake-on-Lan and Management info for later use */ em_get_wakeup(ctx); /* Enable only WOL MAGIC by default */ scctx->isc_capenable &= ~IFCAP_WOL; if (adapter->wol != 0) scctx->isc_capenable |= IFCAP_WOL_MAGIC; iflib_set_mac(ctx, hw->mac.addr); return (0); err_late: em_release_hw_control(adapter); err_pci: em_free_pci_resources(ctx); free(adapter->mta, M_DEVBUF); return (error); } static int em_if_attach_post(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; int error = 0; /* Setup OS specific network interface */ error = em_setup_interface(ctx); if (error != 0) { device_printf(adapter->dev, "Interface setup failed: %d\n", error); goto err_late; } em_reset(ctx); /* Initialize statistics */ em_update_stats_counters(adapter); hw->mac.get_link_status = 1; em_if_update_admin_status(ctx); em_add_hw_stats(adapter); /* Non-AMT based hardware can now take control from firmware */ if (adapter->has_manage && !adapter->has_amt) em_get_hw_control(adapter); INIT_DEBUGOUT("em_if_attach_post: end"); return (0); err_late: /* upon attach_post() error, iflib calls _if_detach() to free resources. */ return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int em_if_detach(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("em_if_detach: begin"); e1000_phy_hw_reset(&adapter->hw); em_release_manageability(adapter); em_release_hw_control(adapter); em_free_pci_resources(ctx); free(adapter->mta, M_DEVBUF); adapter->mta = NULL; return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int em_if_shutdown(if_ctx_t ctx) { return em_if_suspend(ctx); } /* * Suspend/resume device methods. */ static int em_if_suspend(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); em_release_manageability(adapter); em_release_hw_control(adapter); em_enable_wakeup(ctx); return (0); } static int em_if_resume(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if (adapter->hw.mac.type == e1000_pch2lan) e1000_resume_workarounds_pchlan(&adapter->hw); em_if_init(ctx); em_init_manageability(adapter); return(0); } static int em_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { int max_frame_size; struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = iflib_get_softc_ctx(ctx); IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); switch (adapter->hw.mac.type) { case e1000_82571: case e1000_82572: case e1000_ich9lan: case e1000_ich10lan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: case e1000_82574: case e1000_82583: case e1000_80003es2lan: /* 9K Jumbo Frame size */ max_frame_size = 9234; break; case e1000_pchlan: max_frame_size = 4096; break; case e1000_82542: case e1000_ich8lan: /* Adapters that do not support jumbo frames */ max_frame_size = ETHER_MAX_LEN; break; default: if (adapter->hw.mac.type >= igb_mac_min) max_frame_size = 9234; else /* lem */ max_frame_size = MAX_JUMBO_FRAME_SIZE; } if (mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { return (EINVAL); } scctx->isc_max_frame_size = adapter->hw.mac.max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; return (0); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * **********************************************************************/ static void em_if_init(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct ifnet *ifp = iflib_get_ifp(ctx); struct em_tx_queue *tx_que; int i; INIT_DEBUGOUT("em_if_init: begin"); /* Get the latest mac address, User can use a LAA */ bcopy(if_getlladdr(ifp), adapter->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* * With the 82571 adapter, RAR[0] may be overwritten * when the other port is reset, we make a duplicate * in RAR[14] for that eventuality, this assures * the interface continues to function. */ if (adapter->hw.mac.type == e1000_82571) { e1000_set_laa_state_82571(&adapter->hw, TRUE); e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, E1000_RAR_ENTRIES - 1); } /* Initialize the hardware */ em_reset(ctx); em_if_update_admin_status(ctx); for (i = 0, tx_que = adapter->tx_queues; i < adapter->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; txr->tx_rs_cidx = txr->tx_rs_pidx; /* Initialize the last processed descriptor to be the end of * the ring, rather than the start, so that we avoid an * off-by-one error when calculating how many descriptors are * done in the credits_update function. */ txr->tx_cidx_processed = scctx->isc_ntxd[0] - 1; } /* Setup VLAN support, basic and offload if available */ E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); /* Clear bad data from Rx FIFOs */ if (adapter->hw.mac.type >= igb_mac_min) e1000_rx_fifo_flush_82575(&adapter->hw); /* Configure for OS presence */ em_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ em_initialize_transmit_unit(ctx); /* Setup Multicast table */ em_if_multi_set(ctx); adapter->rx_mbuf_sz = iflib_get_rx_mbuf_sz(ctx); em_initialize_receive_unit(ctx); /* Use real VLAN Filter support? */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) { if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) /* Use real VLAN Filter support */ em_setup_vlan_hw_support(adapter); else { u32 ctrl; ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= E1000_CTRL_VME; E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); } } else { u32 ctrl; ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl &= ~E1000_CTRL_VME; E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); } /* Don't lose promiscuous settings */ em_if_set_promisc(ctx, if_getflags(ifp)); e1000_clear_hw_cntrs_base_generic(&adapter->hw); /* MSI-X configuration for 82574 */ if (adapter->hw.mac.type == e1000_82574) { int tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); /* Set the IVAR - interrupt vector routing. */ E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); } else if (adapter->intr_type == IFLIB_INTR_MSIX) /* Set up queue routing */ igb_configure_queues(adapter); /* this clears any pending interrupts */ E1000_READ_REG(&adapter->hw, E1000_ICR); E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); /* AMT based hardware can now take control from firmware */ if (adapter->has_manage && adapter->has_amt) em_get_hw_control(adapter); /* Set Energy Efficient Ethernet */ if (adapter->hw.mac.type >= igb_mac_min && adapter->hw.phy.media_type == e1000_media_type_copper) { if (adapter->hw.mac.type == e1000_i354) e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); else e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); } } /********************************************************************* * * Fast Legacy/MSI Combined Interrupt Service routine * *********************************************************************/ int em_intr(void *arg) { struct adapter *adapter = arg; if_ctx_t ctx = adapter->ctx; u32 reg_icr; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; /* * Starting with the 82571 chip, bit 31 should be used to * determine whether the interrupt belongs to us. */ if (adapter->hw.mac.type >= e1000_82571 && (reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; /* * Only MSI-X interrupts have one-shot behavior by taking advantage * of the EIAC register. Thus, explicitly disable interrupts. This * also works around the MSI message reordering errata on certain * systems. */ IFDI_INTR_DISABLE(ctx); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) em_handle_link(ctx); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; return (FILTER_SCHEDULE_THREAD); } static int em_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *rxq = &adapter->rx_queues[rxqid]; E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxq->eims); return (0); } static int em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct adapter *adapter = iflib_get_softc(ctx); struct em_tx_queue *txq = &adapter->tx_queues[txqid]; E1000_WRITE_REG(&adapter->hw, E1000_IMS, txq->eims); return (0); } static int igb_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *rxq = &adapter->rx_queues[rxqid]; E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxq->eims); return (0); } static int igb_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct adapter *adapter = iflib_get_softc(ctx); struct em_tx_queue *txq = &adapter->tx_queues[txqid]; E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txq->eims); return (0); } /********************************************************************* * * MSI-X RX Interrupt Service routine * **********************************************************************/ static int em_msix_que(void *arg) { struct em_rx_queue *que = arg; ++que->irqs; return (FILTER_SCHEDULE_THREAD); } /********************************************************************* * * MSI-X Link Fast Interrupt Service routine * **********************************************************************/ static int em_msix_link(void *arg) { struct adapter *adapter = arg; u32 reg_icr; ++adapter->link_irq; MPASS(adapter->hw.back != NULL); reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { em_handle_link(adapter->ctx); } else if (adapter->hw.mac.type == e1000_82574) { /* Only re-arm 82574 if em_if_update_admin_status() won't. */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); } if (adapter->hw.mac.type == e1000_82574) { /* * Because we must read the ICR for this interrupt it may * clear other causes using autoclear, for this reason we * simply create a soft interrupt for all these vectors. */ if (reg_icr) E1000_WRITE_REG(&adapter->hw, E1000_ICS, adapter->ims); } else { /* Re-arm unconditionally */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); } return (FILTER_HANDLED); } static void em_handle_link(void *context) { if_ctx_t ctx = context; struct adapter *adapter = iflib_get_softc(ctx); adapter->hw.mac.get_link_status = 1; iflib_admin_intr_deferred(ctx); } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void em_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) { struct adapter *adapter = iflib_get_softc(ctx); u_char fiber_type = IFM_1000_SX; INIT_DEBUGOUT("em_if_media_status: begin"); iflib_admin_intr_deferred(ctx); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { return; } ifmr->ifm_status |= IFM_ACTIVE; if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { if (adapter->hw.mac.type == e1000_82545) fiber_type = IFM_1000_LX; ifmr->ifm_active |= fiber_type | IFM_FDX; } else { switch (adapter->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; } if (adapter->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int em_if_media_change(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifmedia *ifm = iflib_get_media(ctx); INIT_DEBUGOUT("em_if_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(adapter->dev, "Unsupported media type\n"); } em_if_init(ctx); return (0); } static int em_if_set_promisc(if_ctx_t ctx, int flags) { struct adapter *adapter = iflib_get_softc(ctx); u32 reg_rctl; em_disable_promisc(ctx); reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (flags & IFF_PROMISC) { reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); /* Turn this on if you want to see bad packets */ if (em_debug_sbp) reg_rctl |= E1000_RCTL_SBP; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else if (flags & IFF_ALLMULTI) { reg_rctl |= E1000_RCTL_MPE; reg_rctl &= ~E1000_RCTL_UPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } return (0); } static void em_disable_promisc(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); u32 reg_rctl; int mcnt = 0; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= (~E1000_RCTL_UPE); if (if_getflags(ifp) & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else mcnt = if_llmaddr_count(ifp); /* Don't disable if in MAX groups */ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg_rctl &= (~E1000_RCTL_MPE); reg_rctl &= (~E1000_RCTL_SBP); E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } static u_int em_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) { u8 *mta = arg; if (cnt == MAX_NUM_MULTICAST_ADDRESSES) return (1); bcopy(LLADDR(sdl), &mta[cnt * ETHER_ADDR_LEN], ETHER_ADDR_LEN); return (1); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void em_if_multi_set(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); u32 reg_rctl = 0; u8 *mta; /* Multicast array memory */ int mcnt = 0; IOCTL_DEBUGOUT("em_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(u8) * ETHER_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_clear_mwi(&adapter->hw); reg_rctl |= E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); } mcnt = if_foreach_llmaddr(ifp, em_copy_maddr, mta); if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= ~E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_set_mwi(&adapter->hw); } } /********************************************************************* * Timer routine * * This routine schedules em_if_update_admin_status() to check for * link status and to gather statistics as well as to perform some * controller-specific hardware patting. * **********************************************************************/ static void em_if_timer(if_ctx_t ctx, uint16_t qid) { if (qid != 0) return; iflib_admin_intr_deferred(ctx); } static void em_if_update_admin_status(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; device_t dev = iflib_get_dev(ctx); u32 link_check, thstat, ctrl; link_check = thstat = ctrl = 0; /* Get the cached link value or read phy for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { if (hw->mac.type == e1000_pch_spt) msec_delay(50); /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; if (link_check) /* ESB2 fix */ e1000_cfg_on_link_up(hw); } else { link_check = TRUE; } break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = adapter->hw.mac.serdes_has_link; break; /* VF device is type_unknown */ case e1000_media_type_unknown: e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; /* FALLTHROUGH */ default: break; } /* Check for thermal downshift or shutdown */ if (hw->mac.type == e1000_i350) { thstat = E1000_READ_REG(hw, E1000_THSTAT); ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); } /* Now check for a transition */ if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(hw, &adapter->link_speed, &adapter->link_duplex); /* Check if we must disable SPEED_MODE bit on PCI-E */ if ((adapter->link_speed != SPEED_1000) && ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572))) { int tarc0; tarc0 = E1000_READ_REG(hw, E1000_TARC(0)); tarc0 &= ~TARC_SPEED_MODE_BIT; E1000_WRITE_REG(hw, E1000_TARC(0), tarc0); } if (bootverbose) device_printf(dev, "Link is up %d Mbps %s\n", adapter->link_speed, ((adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex")); adapter->link_active = 1; adapter->smartspeed = 0; if ((ctrl & E1000_CTRL_EXT_LINK_MODE_MASK) == E1000_CTRL_EXT_LINK_MODE_GMII && (thstat & E1000_THSTAT_LINK_THROTTLE)) device_printf(dev, "Link: thermal downshift\n"); /* Delay Link Up for Phy update */ if (((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) && (hw->phy.id == I210_I_PHY_ID)) msec_delay(I210_LINK_DELAY); /* Reset if the media type changed. */ if ((hw->dev_spec._82575.media_changed) && (adapter->hw.mac.type >= igb_mac_min)) { hw->dev_spec._82575.media_changed = false; adapter->flags |= IGB_MEDIA_RESET; em_reset(ctx); } iflib_link_state_change(ctx, LINK_STATE_UP, IF_Mbps(adapter->link_speed)); } else if (!link_check && (adapter->link_active == 1)) { adapter->link_speed = 0; adapter->link_duplex = 0; adapter->link_active = 0; iflib_link_state_change(ctx, LINK_STATE_DOWN, 0); } em_update_stats_counters(adapter); /* Reset LAA into RAR[0] on 82571 */ if (hw->mac.type == e1000_82571 && e1000_get_laa_state_82571(hw)) e1000_rar_set(hw, hw->mac.addr, 0); if (hw->mac.type < em_mac_min) lem_smartspeed(adapter); else if (hw->mac.type == e1000_82574 && adapter->intr_type == IFLIB_INTR_MSIX) E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); } static void em_if_watchdog_reset(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); /* * Just count the event; iflib(4) will already trigger a * sufficient reset of the controller. */ adapter->watchdog_events++; } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC. * **********************************************************************/ static void em_if_stop(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("em_if_stop: begin"); e1000_reset_hw(&adapter->hw); if (adapter->hw.mac.type >= e1000_82544) E1000_WRITE_REG(&adapter->hw, E1000_WUFC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void em_identify_hardware(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct adapter *adapter = iflib_get_softc(ctx); /* Make sure our PCI config space has the necessary stuff set */ adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Do Shared Code Init and Setup */ if (e1000_set_mac_type(&adapter->hw)) { device_printf(dev, "Setup init failure\n"); return; } } static int em_allocate_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); int rid, val; rid = PCIR_BAR(0); adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->memory == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->memory); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->memory); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; /* Only older adapters use IO mapping */ if (adapter->hw.mac.type < em_mac_min && adapter->hw.mac.type > e1000_82543) { /* Figure our where our IO BAR is ? */ for (rid = PCIR_BAR(0); rid < PCIR_CIS;) { val = pci_read_config(dev, rid, 4); if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) { break; } rid += 4; /* check for 64bit BAR */ if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT) rid += 4; } if (rid >= PCIR_CIS) { device_printf(dev, "Unable to locate IO BAR\n"); return (ENXIO); } adapter->ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid, RF_ACTIVE); if (adapter->ioport == NULL) { device_printf(dev, "Unable to allocate bus resource: " "ioport\n"); return (ENXIO); } adapter->hw.io_base = 0; adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->ioport); adapter->osdep.io_bus_space_handle = rman_get_bushandle(adapter->ioport); } adapter->hw.back = &adapter->osdep; return (0); } /********************************************************************* * * Set up the MSI-X Interrupt handlers * **********************************************************************/ static int em_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *rx_que = adapter->rx_queues; struct em_tx_queue *tx_que = adapter->tx_queues; int error, rid, i, vector = 0, rx_vectors; char buf[16]; /* First set up ring resources */ for (i = 0; i < adapter->rx_num_queues; i++, rx_que++, vector++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, em_msix_que, rx_que, rx_que->me, buf); if (error) { device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); adapter->rx_num_queues = i + 1; goto fail; } rx_que->msix = vector; /* * Set the bit to enable interrupt * in E1000_IMS -- bits 20 and 21 * are for RX0 and RX1, note this has * NOTHING to do with the MSI-X vector */ if (adapter->hw.mac.type == e1000_82574) { rx_que->eims = 1 << (20 + i); adapter->ims |= rx_que->eims; adapter->ivars |= (8 | rx_que->msix) << (i * 4); } else if (adapter->hw.mac.type == e1000_82575) rx_que->eims = E1000_EICR_TX_QUEUE0 << vector; else rx_que->eims = 1 << vector; } rx_vectors = vector; vector = 0; for (i = 0; i < adapter->tx_num_queues; i++, tx_que++, vector++) { snprintf(buf, sizeof(buf), "txq%d", i); tx_que = &adapter->tx_queues[i]; iflib_softirq_alloc_generic(ctx, &adapter->rx_queues[i % adapter->rx_num_queues].que_irq, IFLIB_INTR_TX, tx_que, tx_que->me, buf); tx_que->msix = (vector % adapter->rx_num_queues); /* * Set the bit to enable interrupt * in E1000_IMS -- bits 22 and 23 * are for TX0 and TX1, note this has * NOTHING to do with the MSI-X vector */ if (adapter->hw.mac.type == e1000_82574) { tx_que->eims = 1 << (22 + i); adapter->ims |= tx_que->eims; adapter->ivars |= (8 | tx_que->msix) << (8 + (i * 4)); } else if (adapter->hw.mac.type == e1000_82575) { tx_que->eims = E1000_EICR_TX_QUEUE0 << i; } else { tx_que->eims = 1 << i; } } /* Link interrupt */ rid = rx_vectors + 1; error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, em_msix_link, adapter, 0, "aq"); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); goto fail; } adapter->linkvec = rx_vectors; if (adapter->hw.mac.type < igb_mac_min) { adapter->ivars |= (8 | rx_vectors) << 16; adapter->ivars |= 0x80000000; } return (0); fail: iflib_irq_free(ctx, &adapter->irq); rx_que = adapter->rx_queues; for (int i = 0; i < adapter->rx_num_queues; i++, rx_que++) iflib_irq_free(ctx, &rx_que->que_irq); return (error); } static void igb_configure_queues(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct em_rx_queue *rx_que; struct em_tx_queue *tx_que; u32 tmp, ivar = 0, newitr = 0; /* First turn on RSS capability */ if (adapter->hw.mac.type != e1000_82575) E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | E1000_GPIE_PBA | E1000_GPIE_NSICR); /* Turn on MSI-X */ switch (adapter->hw.mac.type) { case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: /* RX entries */ for (int i = 0; i < adapter->rx_num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); rx_que = &adapter->rx_queues[i]; if (i & 1) { ivar &= 0xFF00FFFF; ivar |= (rx_que->msix | E1000_IVAR_VALID) << 16; } else { ivar &= 0xFFFFFF00; ivar |= rx_que->msix | E1000_IVAR_VALID; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); } /* TX entries */ for (int i = 0; i < adapter->tx_num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); tx_que = &adapter->tx_queues[i]; if (i & 1) { ivar &= 0x00FFFFFF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 24; } else { ivar &= 0xFFFF00FF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 8; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= tx_que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82576: /* RX entries */ for (int i = 0; i < adapter->rx_num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); rx_que = &adapter->rx_queues[i]; if (i < 8) { ivar &= 0xFFFFFF00; ivar |= rx_que->msix | E1000_IVAR_VALID; } else { ivar &= 0xFF00FFFF; ivar |= (rx_que->msix | E1000_IVAR_VALID) << 16; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= rx_que->eims; } /* TX entries */ for (int i = 0; i < adapter->tx_num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); tx_que = &adapter->tx_queues[i]; if (i < 8) { ivar &= 0xFFFF00FF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 8; } else { ivar &= 0x00FFFFFF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 24; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= tx_que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82575: /* enable MSI-X support*/ tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; /* Auto-Mask interrupts upon ICR read. */ tmp |= E1000_CTRL_EXT_EIAME; tmp |= E1000_CTRL_EXT_IRCA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); /* Queues */ for (int i = 0; i < adapter->rx_num_queues; i++) { rx_que = &adapter->rx_queues[i]; tmp = E1000_EICR_RX_QUEUE0 << i; tmp |= E1000_EICR_TX_QUEUE0 << i; rx_que->eims = tmp; E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), i, rx_que->eims); adapter->que_mask |= rx_que->eims; } /* Link */ E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec), E1000_EIMS_OTHER); adapter->link_mask |= E1000_EIMS_OTHER; default: break; } /* Set the starting interrupt rate */ if (em_max_interrupt_rate > 0) newitr = (4000000 / em_max_interrupt_rate) & 0x7FFC; if (hw->mac.type == e1000_82575) newitr |= newitr << 16; else newitr |= E1000_EITR_CNT_IGNR; for (int i = 0; i < adapter->rx_num_queues; i++) { rx_que = &adapter->rx_queues[i]; E1000_WRITE_REG(hw, E1000_EITR(rx_que->msix), newitr); } return; } static void em_free_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *que = adapter->rx_queues; device_t dev = iflib_get_dev(ctx); /* Release all MSI-X queue resources */ if (adapter->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &adapter->irq); if (que != NULL) { for (int i = 0; i < adapter->rx_num_queues; i++, que++) { iflib_irq_free(ctx, &que->que_irq); } } if (adapter->memory != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(adapter->memory), adapter->memory); adapter->memory = NULL; } if (adapter->flash != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(adapter->flash), adapter->flash); adapter->flash = NULL; } if (adapter->ioport != NULL) { bus_release_resource(dev, SYS_RES_IOPORT, rman_get_rid(adapter->ioport), adapter->ioport); adapter->ioport = NULL; } } /* Set up MSI or MSI-X */ static int em_setup_msix(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if (adapter->hw.mac.type == e1000_82574) { em_enable_vectors_82574(ctx); } return (0); } /********************************************************************* * * Workaround for SmartSpeed on 82541 and 82547 controllers * **********************************************************************/ static void lem_smartspeed(struct adapter *adapter) { u16 phy_tmp; if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) || adapter->hw.mac.autoneg == 0 || (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0) return; if (adapter->smartspeed == 0) { /* If Master/Slave config fault is asserted twice, * we assume back-to-back */ e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT)) return; e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) { e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); if(phy_tmp & CR_1000T_MS_ENABLE) { phy_tmp &= ~CR_1000T_MS_ENABLE; e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); adapter->smartspeed++; if(adapter->hw.mac.autoneg && !e1000_copper_link_autoneg(&adapter->hw) && !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { phy_tmp |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); } } } return; } else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) { /* If still no link, perhaps using 2/3 pair cable */ e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); phy_tmp |= CR_1000T_MS_ENABLE; e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); if(adapter->hw.mac.autoneg && !e1000_copper_link_autoneg(&adapter->hw) && !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { phy_tmp |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); } } /* Restart process after EM_SMARTSPEED_MAX iterations */ if(adapter->smartspeed++ == EM_SMARTSPEED_MAX) adapter->smartspeed = 0; } /********************************************************************* * * Initialize the DMA Coalescing feature * **********************************************************************/ static void igb_init_dmac(struct adapter *adapter, u32 pba) { device_t dev = adapter->dev; struct e1000_hw *hw = &adapter->hw; u32 dmac, reg = ~E1000_DMACR_DMAC_EN; u16 hwm; u16 max_frame_size; if (hw->mac.type == e1000_i211) return; max_frame_size = adapter->shared->isc_max_frame_size; if (hw->mac.type > e1000_82580) { if (adapter->dmac == 0) { /* Disabling it */ E1000_WRITE_REG(hw, E1000_DMACR, reg); return; } else device_printf(dev, "DMA Coalescing enabled\n"); /* Set starting threshold */ E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); hwm = 64 * pba - max_frame_size / 16; if (hwm < 64 * (pba - 6)) hwm = 64 * (pba - 6); reg = E1000_READ_REG(hw, E1000_FCRTC); reg &= ~E1000_FCRTC_RTH_COAL_MASK; reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) & E1000_FCRTC_RTH_COAL_MASK); E1000_WRITE_REG(hw, E1000_FCRTC, reg); dmac = pba - max_frame_size / 512; if (dmac < pba - 10) dmac = pba - 10; reg = E1000_READ_REG(hw, E1000_DMACR); reg &= ~E1000_DMACR_DMACTHR_MASK; reg |= ((dmac << E1000_DMACR_DMACTHR_SHIFT) & E1000_DMACR_DMACTHR_MASK); /* transition to L0x or L1 if available..*/ reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); /* Check if status is 2.5Gb backplane connection * before configuration of watchdog timer, which is * in msec values in 12.8usec intervals * watchdog timer= msec values in 32usec intervals * for non 2.5Gb connection */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= ((adapter->dmac * 5) >> 6); else reg |= (adapter->dmac >> 5); } else { reg |= (adapter->dmac >> 5); } E1000_WRITE_REG(hw, E1000_DMACR, reg); E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); /* Set the interval before transition */ reg = E1000_READ_REG(hw, E1000_DMCTLX); if (hw->mac.type == e1000_i350) reg |= IGB_DMCTLX_DCFLUSH_DIS; /* ** in 2.5Gb connection, TTLX unit is 0.4 usec ** which is 0x4*2 = 0xA. But delay is still 4 usec */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= 0xA; else reg |= 0x4; } else { reg |= 0x4; } E1000_WRITE_REG(hw, E1000_DMCTLX, reg); /* free space in tx packet buffer to wake from DMA coal */ E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE - (2 * max_frame_size)) >> 6); /* make low power state decision controlled by DMA coal */ reg = E1000_READ_REG(hw, E1000_PCIEMISC); reg &= ~E1000_PCIEMISC_LX_DECISION; E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); } else if (hw->mac.type == e1000_82580) { u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); E1000_WRITE_REG(hw, E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION); E1000_WRITE_REG(hw, E1000_DMACR, 0); } } /********************************************************************* * * Initialize the hardware to a configuration as specified by the * adapter structure. * **********************************************************************/ static void em_reset(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); struct e1000_hw *hw = &adapter->hw; u16 rx_buffer_size; u32 pba; INIT_DEBUGOUT("em_reset: begin"); /* Let the firmware know the OS is in control */ em_get_hw_control(adapter); /* Set up smart power down as default off on newer adapters. */ if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 || hw->mac.type == e1000_82572)) { u16 phy_tmp = 0; /* Speed up time to link by disabling smart power down. */ e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp); phy_tmp &= ~IGP02E1000_PM_SPD; e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp); } /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. */ switch (hw->mac.type) { /* Total Packet Buffer on these is 48K */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */ break; case e1000_82573: /* 82573: Total Packet Buffer is 32K */ pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */ break; case e1000_82574: case e1000_82583: pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */ break; case e1000_ich8lan: pba = E1000_PBA_8K; break; case e1000_ich9lan: case e1000_ich10lan: /* Boost Receive side for jumbo frames */ if (adapter->hw.mac.max_frame_size > 4096) pba = E1000_PBA_14K; else pba = E1000_PBA_10K; break; case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: pba = E1000_PBA_26K; break; case e1000_82575: pba = E1000_PBA_32K; break; case e1000_82576: case e1000_vfadapt: pba = E1000_READ_REG(hw, E1000_RXPBS); pba &= E1000_RXPBS_SIZE_MASK_82576; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_vfadapt_i350: pba = E1000_READ_REG(hw, E1000_RXPBS); pba = e1000_rxpbs_adjust_82580(pba); break; case e1000_i210: case e1000_i211: pba = E1000_PBA_34K; break; default: if (adapter->hw.mac.max_frame_size > 8192) pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ else pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ } /* Special needs in case of Jumbo frames */ if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) { u32 tx_space, min_tx, min_rx; pba = E1000_READ_REG(hw, E1000_PBA); tx_space = pba >> 16; pba &= 0xffff; min_tx = (adapter->hw.mac.max_frame_size + sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2; min_tx = roundup2(min_tx, 1024); min_tx >>= 10; min_rx = adapter->hw.mac.max_frame_size; min_rx = roundup2(min_rx, 1024); min_rx >>= 10; if (tx_space < min_tx && ((min_tx - tx_space) < pba)) { pba = pba - (min_tx - tx_space); /* * if short on rx space, rx wins * and must trump tx adjustment */ if (pba < min_rx) pba = min_rx; } E1000_WRITE_REG(hw, E1000_PBA, pba); } if (hw->mac.type < igb_mac_min) E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); INIT_DEBUGOUT1("em_reset: pba=%dK",pba); /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. Here we use an arbitrary value of 1500 which will * restart after one full frame is pulled from the buffer. There * could be several smaller frames in the buffer and if so they will * not trigger the XON until their total number reduces the buffer * by 1500. * - The pause time is fairly large at 1000 x 512ns = 512 usec. */ rx_buffer_size = (pba & 0xffff) << 10; hw->fc.high_water = rx_buffer_size - roundup2(adapter->hw.mac.max_frame_size, 1024); hw->fc.low_water = hw->fc.high_water - 1500; if (adapter->fc) /* locally set flow control value? */ hw->fc.requested_mode = adapter->fc; else hw->fc.requested_mode = e1000_fc_full; if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; else hw->fc.pause_time = EM_FC_PAUSE_TIME; hw->fc.send_xon = TRUE; /* Device specific overrides/settings */ switch (hw->mac.type) { case e1000_pchlan: /* Workaround: no TX flow ctrl for PCH */ hw->fc.requested_mode = e1000_fc_rx_pause; hw->fc.pause_time = 0xFFFF; /* override */ if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x3500; hw->fc.low_water = 0x1500; } else { hw->fc.high_water = 0x5000; hw->fc.low_water = 0x3000; } hw->fc.refresh_time = 0x1000; break; case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: hw->fc.high_water = 0x5C20; hw->fc.low_water = 0x5048; hw->fc.pause_time = 0x0650; hw->fc.refresh_time = 0x0400; /* Jumbos need adjusted PBA */ if (if_getmtu(ifp) > ETHERMTU) E1000_WRITE_REG(hw, E1000_PBA, 12); else E1000_WRITE_REG(hw, E1000_PBA, 26); break; case e1000_82575: case e1000_82576: /* 8-byte granularity */ hw->fc.low_water = hw->fc.high_water - 8; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: /* 16-byte granularity */ hw->fc.low_water = hw->fc.high_water - 16; break; case e1000_ich9lan: case e1000_ich10lan: if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x2800; hw->fc.low_water = hw->fc.high_water - 8; break; } /* FALLTHROUGH */ default: if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; break; } /* Issue a global reset */ e1000_reset_hw(hw); if (adapter->hw.mac.type >= igb_mac_min) { E1000_WRITE_REG(hw, E1000_WUC, 0); } else { E1000_WRITE_REG(hw, E1000_WUFC, 0); em_disable_aspm(adapter); } if (adapter->flags & IGB_MEDIA_RESET) { e1000_setup_init_funcs(hw, TRUE); e1000_get_bus_info(hw); adapter->flags &= ~IGB_MEDIA_RESET; } /* and a re-init */ if (e1000_init_hw(hw) < 0) { device_printf(dev, "Hardware Initialization Failed\n"); return; } if (adapter->hw.mac.type >= igb_mac_min) igb_init_dmac(adapter, pba); E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); } /* * Initialise the RSS mapping for NICs that support multiple transmit/ * receive rings. */ #define RSSKEYLEN 10 static void em_initialize_rss_mapping(struct adapter *adapter) { uint8_t rss_key[4 * RSSKEYLEN]; uint32_t reta = 0; struct e1000_hw *hw = &adapter->hw; int i; /* * Configure RSS key */ arc4rand(rss_key, sizeof(rss_key), 0); for (i = 0; i < RSSKEYLEN; ++i) { uint32_t rssrk = 0; rssrk = EM_RSSRK_VAL(rss_key, i); E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk); } /* * Configure RSS redirect table in following fashion: * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] */ for (i = 0; i < sizeof(reta); ++i) { uint32_t q; q = (i % adapter->rx_num_queues) << 7; reta |= q << (8 * i); } for (i = 0; i < 32; ++i) E1000_WRITE_REG(hw, E1000_RETA(i), reta); E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | E1000_MRQC_RSS_FIELD_IPV4_TCP | E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | E1000_MRQC_RSS_FIELD_IPV6_EX | E1000_MRQC_RSS_FIELD_IPV6); } static void igb_initialize_rss_mapping(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; int i; int queue_id; u32 reta; u32 rss_key[10], mrqc, shift = 0; /* XXX? */ if (adapter->hw.mac.type == e1000_82575) shift = 6; /* * The redirection table controls which destination * queue each bucket redirects traffic to. * Each DWORD represents four queues, with the LSB * being the first queue in the DWORD. * * This just allocates buckets to queues using round-robin * allocation. * * NOTE: It Just Happens to line up with the default * RSS allocation method. */ /* Warning FM follows */ reta = 0; for (i = 0; i < 128; i++) { #ifdef RSS queue_id = rss_get_indirection_to_bucket(i); /* * If we have more queues than buckets, we'll * end up mapping buckets to a subset of the * queues. * * If we have more buckets than queues, we'll * end up instead assigning multiple buckets * to queues. * * Both are suboptimal, but we need to handle * the case so we don't go out of bounds * indexing arrays and such. */ queue_id = queue_id % adapter->rx_num_queues; #else queue_id = (i % adapter->rx_num_queues); #endif /* Adjust if required */ queue_id = queue_id << shift; /* * The low 8 bits are for hash value (n+0); * The next 8 bits are for hash value (n+1), etc. */ reta = reta >> 8; reta = reta | ( ((uint32_t) queue_id) << 24); if ((i & 3) == 3) { E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta); reta = 0; } } /* Now fill in hash table */ /* * MRQC: Multiple Receive Queues Command * Set queuing to RSS control, number depends on the device. */ mrqc = E1000_MRQC_ENABLE_RSS_8Q; #ifdef RSS /* XXX ew typecasting */ rss_getkey((uint8_t *) &rss_key); #else arc4rand(&rss_key, sizeof(rss_key), 0); #endif for (i = 0; i < 10; i++) E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key[i]); /* * Configure the RSS fields to hash upon. */ mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV4_TCP); mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 | E1000_MRQC_RSS_FIELD_IPV6_TCP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP | E1000_MRQC_RSS_FIELD_IPV6_UDP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); E1000_WRITE_REG(hw, E1000_MRQC, mrqc); } /********************************************************************* * * Setup networking device structure and register interface media. * **********************************************************************/ static int em_setup_interface(if_ctx_t ctx) { struct ifnet *ifp = iflib_get_ifp(ctx); struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; INIT_DEBUGOUT("em_setup_interface: begin"); /* Single Queue */ if (adapter->tx_num_queues == 1) { if_setsendqlen(ifp, scctx->isc_ntxd[0] - 1); if_setsendqready(ifp); } /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { u_char fiber_type = IFM_1000_SX; /* default type */ if (adapter->hw.mac.type == e1000_82545) fiber_type = IFM_1000_LX; ifmedia_add(adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | fiber_type, 0, NULL); } else { ifmedia_add(adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); return (0); } static int em_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; int error = E1000_SUCCESS; struct em_tx_queue *que; int i, j; MPASS(adapter->tx_num_queues > 0); MPASS(adapter->tx_num_queues == ntxqsets); /* First allocate the top level queue structs */ if (!(adapter->tx_queues = (struct em_tx_queue *) malloc(sizeof(struct em_tx_queue) * adapter->tx_num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate queue memory\n"); return(ENOMEM); } for (i = 0, que = adapter->tx_queues; i < adapter->tx_num_queues; i++, que++) { /* Set up some basics */ struct tx_ring *txr = &que->txr; txr->adapter = que->adapter = adapter; que->me = txr->me = i; /* Allocate report status array */ if (!(txr->tx_rsq = (qidx_t *) malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "failed to allocate rs_idxs memory\n"); error = ENOMEM; goto fail; } for (j = 0; j < scctx->isc_ntxd[0]; j++) txr->tx_rsq[j] = QIDX_INVALID; /* get the virtual and physical address of the hardware queues */ txr->tx_base = (struct e1000_tx_desc *)vaddrs[i*ntxqs]; txr->tx_paddr = paddrs[i*ntxqs]; } if (bootverbose) device_printf(iflib_get_dev(ctx), "allocated for %d tx_queues\n", adapter->tx_num_queues); return (0); fail: em_if_queues_free(ctx); return (error); } static int em_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct adapter *adapter = iflib_get_softc(ctx); int error = E1000_SUCCESS; struct em_rx_queue *que; int i; MPASS(adapter->rx_num_queues > 0); MPASS(adapter->rx_num_queues == nrxqsets); /* First allocate the top level queue structs */ if (!(adapter->rx_queues = (struct em_rx_queue *) malloc(sizeof(struct em_rx_queue) * adapter->rx_num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate queue memory\n"); error = ENOMEM; goto fail; } for (i = 0, que = adapter->rx_queues; i < nrxqsets; i++, que++) { /* Set up some basics */ struct rx_ring *rxr = &que->rxr; rxr->adapter = que->adapter = adapter; rxr->que = que; que->me = rxr->me = i; /* get the virtual and physical address of the hardware queues */ rxr->rx_base = (union e1000_rx_desc_extended *)vaddrs[i*nrxqs]; rxr->rx_paddr = paddrs[i*nrxqs]; } if (bootverbose) device_printf(iflib_get_dev(ctx), "allocated for %d rx_queues\n", adapter->rx_num_queues); return (0); fail: em_if_queues_free(ctx); return (error); } static void em_if_queues_free(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct em_tx_queue *tx_que = adapter->tx_queues; struct em_rx_queue *rx_que = adapter->rx_queues; if (tx_que != NULL) { for (int i = 0; i < adapter->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; if (txr->tx_rsq == NULL) break; free(txr->tx_rsq, M_DEVBUF); txr->tx_rsq = NULL; } free(adapter->tx_queues, M_DEVBUF); adapter->tx_queues = NULL; } if (rx_que != NULL) { free(adapter->rx_queues, M_DEVBUF); adapter->rx_queues = NULL; } } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void em_initialize_transmit_unit(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct em_tx_queue *que; struct tx_ring *txr; struct e1000_hw *hw = &adapter->hw; u32 tctl, txdctl = 0, tarc, tipg = 0; INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); for (int i = 0; i < adapter->tx_num_queues; i++, txr++) { u64 bus_addr; caddr_t offp, endp; que = &adapter->tx_queues[i]; txr = &que->txr; bus_addr = txr->tx_paddr; /* Clear checksum offload context. */ offp = (caddr_t)&txr->csum_flags; endp = (caddr_t)(txr + 1); bzero(offp, endp - offp); /* Base and Len of TX Ring */ E1000_WRITE_REG(hw, E1000_TDLEN(i), scctx->isc_ntxd[0] * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(hw, E1000_TDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_TDBAL(i), (u32)bus_addr); /* Init the HEAD/TAIL indices */ E1000_WRITE_REG(hw, E1000_TDT(i), 0); E1000_WRITE_REG(hw, E1000_TDH(i), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)), E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); txdctl = 0; /* clear txdctl */ txdctl |= 0x1f; /* PTHRESH */ txdctl |= 1 << 8; /* HTHRESH */ txdctl |= 1 << 16;/* WTHRESH */ txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */ txdctl |= E1000_TXDCTL_GRAN; txdctl |= 1 << 25; /* LWTHRESH */ E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); } /* Set the default values for the Tx Inter Packet Gap timer */ switch (adapter->hw.mac.type) { case e1000_80003es2lan: tipg = DEFAULT_82543_TIPG_IPGR1; tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; case e1000_82542: tipg = DEFAULT_82542_TIPG_IPGT; tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; default: if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) tipg = DEFAULT_82543_TIPG_IPGT_FIBER; else tipg = DEFAULT_82543_TIPG_IPGT_COPPER; tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; } E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); if(adapter->hw.mac.type >= e1000_82540) E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value); if ((adapter->hw.mac.type == e1000_82571) || (adapter->hw.mac.type == e1000_82572)) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= TARC_SPEED_MODE_BIT; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } else if (adapter->hw.mac.type == e1000_80003es2lan) { /* errata: program both queues to unweighted RR */ tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } else if (adapter->hw.mac.type == e1000_82574) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= TARC_ERRATA_BIT; if ( adapter->tx_num_queues > 1) { tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX); E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } else E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } if (adapter->tx_int_delay.value > 0) adapter->txd_cmd |= E1000_TXD_CMD_IDE; /* Program the Transmit Control Register */ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); if (adapter->hw.mac.type >= e1000_82571) tctl |= E1000_TCTL_MULR; /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); /* SPT and KBL errata workarounds */ if (hw->mac.type == e1000_pch_spt) { u32 reg; reg = E1000_READ_REG(hw, E1000_IOSFPC); reg |= E1000_RCTL_RDMTS_HEX; E1000_WRITE_REG(hw, E1000_IOSFPC, reg); /* i218-i219 Specification Update 1.5.4.5 */ reg = E1000_READ_REG(hw, E1000_TARC(0)); reg &= ~E1000_TARC0_CB_MULTIQ_3_REQ; reg |= E1000_TARC0_CB_MULTIQ_2_REQ; E1000_WRITE_REG(hw, E1000_TARC(0), reg); } } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void em_initialize_receive_unit(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct ifnet *ifp = iflib_get_ifp(ctx); struct e1000_hw *hw = &adapter->hw; struct em_rx_queue *que; int i; u32 rctl, rxcsum, rfctl; INIT_DEBUGOUT("em_initialize_receive_units: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(hw, E1000_RCTL); /* Do not disable if ever enabled on this hardware */ if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583)) E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Do not store bad packets */ rctl &= ~E1000_RCTL_SBP; /* Enable Long Packet receive */ if (if_getmtu(ifp) > ETHERMTU) rctl |= E1000_RCTL_LPE; else rctl &= ~E1000_RCTL_LPE; /* Strip the CRC */ if (!em_disable_crc_stripping) rctl |= E1000_RCTL_SECRC; if (adapter->hw.mac.type >= e1000_82540) { E1000_WRITE_REG(&adapter->hw, E1000_RADV, adapter->rx_abs_int_delay.value); /* * Set the interrupt throttling rate. Value is calculated * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */ E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); } E1000_WRITE_REG(&adapter->hw, E1000_RDTR, adapter->rx_int_delay.value); /* Use extended rx descriptor formats */ rfctl = E1000_READ_REG(hw, E1000_RFCTL); rfctl |= E1000_RFCTL_EXTEN; /* * When using MSI-X interrupts we need to throttle * using the EITR register (82574 only) */ if (hw->mac.type == e1000_82574) { for (int i = 0; i < 4; i++) E1000_WRITE_REG(hw, E1000_EITR_82574(i), DEFAULT_ITR); /* Disable accelerated acknowledge */ rfctl |= E1000_RFCTL_ACK_DIS; } E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); if (if_getcapenable(ifp) & IFCAP_RXCSUM && adapter->hw.mac.type >= e1000_82543) { if (adapter->tx_num_queues > 1) { if (adapter->hw.mac.type >= igb_mac_min) { rxcsum |= E1000_RXCSUM_PCSD; if (hw->mac.type != e1000_82575) rxcsum |= E1000_RXCSUM_CRCOFL; } else rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL | E1000_RXCSUM_PCSD; } else { if (adapter->hw.mac.type >= igb_mac_min) rxcsum |= E1000_RXCSUM_IPPCSE; else rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL; if (adapter->hw.mac.type > e1000_82575) rxcsum |= E1000_RXCSUM_CRCOFL; } } else rxcsum &= ~E1000_RXCSUM_TUOFL; E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); if (adapter->rx_num_queues > 1) { if (adapter->hw.mac.type >= igb_mac_min) igb_initialize_rss_mapping(adapter); else em_initialize_rss_mapping(adapter); } /* * XXX TEMPORARY WORKAROUND: on some systems with 82573 * long latencies are observed, like Lenovo X60. This * change eliminates the problem, but since having positive * values in RDTR is a known source of problems on other * platforms another solution is being sought. */ if (hw->mac.type == e1000_82573) E1000_WRITE_REG(hw, E1000_RDTR, 0x20); for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; /* Setup the Base and Length of the Rx Descriptor Ring */ u64 bus_addr = rxr->rx_paddr; #if 0 u32 rdt = adapter->rx_num_queues -1; /* default */ #endif E1000_WRITE_REG(hw, E1000_RDLEN(i), scctx->isc_nrxd[0] * sizeof(union e1000_rx_desc_extended)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); /* Setup the Head and Tail Descriptor Pointers */ E1000_WRITE_REG(hw, E1000_RDH(i), 0); E1000_WRITE_REG(hw, E1000_RDT(i), 0); } /* * Set PTHRESH for improved jumbo performance * According to 10.2.5.11 of Intel 82574 Datasheet, * RXDCTL(1) is written whenever RXDCTL(0) is written. * Only write to RXDCTL(1) if there is a need for different * settings. */ if (((adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_pch2lan) || (adapter->hw.mac.type == e1000_ich10lan)) && (if_getmtu(ifp) > ETHERMTU)) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); } else if (adapter->hw.mac.type == e1000_82574) { for (int i = 0; i < adapter->rx_num_queues; i++) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= 0x20; /* PTHRESH */ rxdctl |= 4 << 8; /* HTHRESH */ rxdctl |= 4 << 16;/* WTHRESH */ rxdctl |= 1 << 24; /* Switch to granularity */ E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } } else if (adapter->hw.mac.type >= igb_mac_min) { u32 psize, srrctl = 0; if (if_getmtu(ifp) > ETHERMTU) { /* Set maximum packet len */ if (adapter->rx_mbuf_sz <= 4096) { srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; } else if (adapter->rx_mbuf_sz > 4096) { srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; } psize = scctx->isc_max_frame_size; /* are we on a vlan? */ if (ifp->if_vlantrunk != NULL) psize += VLAN_TAG_SIZE; E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); } else { srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_2048; } /* * If TX flow control is disabled and there's >1 queue defined, * enable DROP. * * This drops frames rather than hanging the RX MAC for all queues. */ if ((adapter->rx_num_queues > 1) && (adapter->fc == e1000_fc_none || adapter->fc == e1000_fc_rx_pause)) { srrctl |= E1000_SRRCTL_DROP_EN; } /* Setup the Base and Length of the Rx Descriptor Rings */ for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; u64 bus_addr = rxr->rx_paddr; u32 rxdctl; #ifdef notyet /* Configure for header split? -- ignore for now */ rxr->hdr_split = igb_header_split; #else srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; #endif E1000_WRITE_REG(hw, E1000_RDLEN(i), scctx->isc_nrxd[0] * sizeof(struct e1000_rx_desc)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (uint32_t)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr); E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); /* Enable this Queue */ rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; rxdctl &= 0xFFF00000; rxdctl |= IGB_RX_PTHRESH; rxdctl |= IGB_RX_HTHRESH << 8; rxdctl |= IGB_RX_WTHRESH << 16; E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } } else if (adapter->hw.mac.type >= e1000_pch2lan) { if (if_getmtu(ifp) > ETHERMTU) e1000_lv_jumbo_workaround_ich8lan(hw, TRUE); else e1000_lv_jumbo_workaround_ich8lan(hw, FALSE); } /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; if (adapter->hw.mac.type < igb_mac_min) { if (adapter->rx_mbuf_sz == MCLBYTES) rctl |= E1000_RCTL_SZ_2048; else if (adapter->rx_mbuf_sz == MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; /* ensure we clear use DTYPE of 00 here */ rctl &= ~0x00000C00; } /* Write out the settings */ E1000_WRITE_REG(hw, E1000_RCTL, rctl); return; } static void em_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u32 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; } static void em_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u32 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; } static void em_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 reg; /* * We get here thru init_locked, meaning * a soft reset, this has already cleared * the VFTA and other state, so if there * have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* * A soft reset zero's out the VFTA, so * we need to repopulate it now. */ for (int i = 0; i < EM_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, i, adapter->shadow_vfta[i]); reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Enable the Filter Table */ reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } static void em_if_intr_enable(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; u32 ims_mask = IMS_ENABLE_MASK; if (hw->mac.type == e1000_82574) { E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK); ims_mask |= adapter->ims; } E1000_WRITE_REG(hw, E1000_IMS, ims_mask); } static void em_if_intr_disable(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; if (hw->mac.type == e1000_82574) E1000_WRITE_REG(hw, EM_EIAC, 0); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); } static void igb_if_intr_enable(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; u32 mask; if (__predict_true(adapter->intr_type == IFLIB_INTR_MSIX)) { mask = (adapter->que_mask | adapter->link_mask); E1000_WRITE_REG(hw, E1000_EIAC, mask); E1000_WRITE_REG(hw, E1000_EIAM, mask); E1000_WRITE_REG(hw, E1000_EIMS, mask); E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_LSC); } else E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK); E1000_WRITE_FLUSH(hw); } static void igb_if_intr_disable(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; if (__predict_true(adapter->intr_type == IFLIB_INTR_MSIX)) { E1000_WRITE_REG(hw, E1000_EIMC, 0xffffffff); E1000_WRITE_REG(hw, E1000_EIAC, 0); } E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_WRITE_FLUSH(hw); } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka * to disable special hardware management features */ static void em_init_manageability(struct adapter *adapter) { /* A shared code workaround */ #define E1000_82542_MANC2H E1000_MANC2H if (adapter->has_manage) { int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); /* enable receiving management packets to the host */ manc |= E1000_MANC_EN_MNG2HOST; #define E1000_MNG2HOST_PORT_623 (1 << 5) #define E1000_MNG2HOST_PORT_664 (1 << 6) manc2h |= E1000_MNG2HOST_PORT_623; manc2h |= E1000_MNG2HOST_PORT_664; E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void em_release_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; manc &= ~E1000_MANC_EN_MNG2HOST; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means * that the driver is loaded. For AMT version type f/w * this means that the network i/f is open. */ static void em_get_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (adapter->vf_ifp) return; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm | E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); } /* * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is no longer loaded. For AMT versions of the * f/w this means that the network i/f is closed. */ static void em_release_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (!adapter->has_manage) return; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm & ~E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); return; } static int em_is_valid_ether_addr(u8 *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (FALSE); } return (TRUE); } /* ** Parse the interface capabilities with regard ** to both system management and wake-on-lan for ** later use. */ static void em_get_wakeup(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); u16 eeprom_data = 0, device_id, apme_mask; adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); apme_mask = EM_EEPROM_APME; switch (adapter->hw.mac.type) { case e1000_82542: case e1000_82543: break; case e1000_82544: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL2_REG, 1, &eeprom_data); apme_mask = EM_82544_APME; break; case e1000_82546: case e1000_82546_rev_3: if (adapter->hw.bus.func == 1) { e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; case e1000_82573: case e1000_82583: adapter->has_amt = TRUE; /* FALLTHROUGH */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: if (adapter->hw.bus.func == 1) { e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_82575: /* listing all igb devices */ case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: apme_mask = E1000_WUC_APME; adapter->has_amt = TRUE; eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC); break; default: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; } if (eeprom_data & apme_mask) adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); /* * We have the eeprom settings, now apply the special cases * where the eeprom may be wrong or the board won't support * wake on lan on a particular port */ device_id = pci_get_device(dev); switch (device_id) { case E1000_DEV_ID_82546GB_PCIE: adapter->wol = 0; break; case E1000_DEV_ID_82546EB_FIBER: case E1000_DEV_ID_82546GB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->wol = 0; break; case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; case E1000_DEV_ID_82571EB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->wol = 0; break; case E1000_DEV_ID_82571EB_QUAD_COPPER: case E1000_DEV_ID_82571EB_QUAD_FIBER: case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; } return; } /* * Enable PCI Wake On Lan capability */ static void em_enable_wakeup(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); if_t ifp = iflib_get_ifp(ctx); int error = 0; u32 pmc, ctrl, ctrl_ext, rctl; u16 status; if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0) return; /* * Determine type of Wakeup: note that wol * is set with all bits on by default. */ if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0) adapter->wol &= ~E1000_WUFC_MAG; if ((if_getcapenable(ifp) & IFCAP_WOL_UCAST) == 0) adapter->wol &= ~E1000_WUFC_EX; if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0) adapter->wol &= ~E1000_WUFC_MC; else { rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); } if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC))) goto pme; /* Advertise the wakeup capability */ ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); /* Keep the laser running on Fiber adapters */ if (adapter->hw.phy.media_type == e1000_media_type_fiber || adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); } if ((adapter->hw.mac.type == e1000_ich8lan) || (adapter->hw.mac.type == e1000_pchlan) || (adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_ich10lan)) e1000_suspend_workarounds_ich8lan(&adapter->hw); if ( adapter->hw.mac.type >= e1000_pchlan) { error = em_enable_phy_wakeup(adapter); if (error) goto pme; } else { /* Enable wakeup by the MAC */ E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); } if (adapter->hw.phy.type == e1000_phy_igp_3) e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); pme: status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if (!error && (if_getcapenable(ifp) & IFCAP_WOL)) status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); return; } /* * WOL in the newer chipset interfaces (pchlan) * require thing to be copied into the phy */ static int em_enable_phy_wakeup(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mreg, ret = 0; u16 preg; /* copy MAC RARs to PHY RARs */ e1000_copy_rx_addrs_to_phy_ich8lan(hw); /* copy MAC MTA to PHY MTA */ for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); e1000_write_phy_reg(hw, BM_MTA(i) + 1, (u16)((mreg >> 16) & 0xFFFF)); } /* configure PHY Rx Control register */ e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); mreg = E1000_READ_REG(hw, E1000_RCTL); if (mreg & E1000_RCTL_UPE) preg |= BM_RCTL_UPE; if (mreg & E1000_RCTL_MPE) preg |= BM_RCTL_MPE; preg &= ~(BM_RCTL_MO_MASK); if (mreg & E1000_RCTL_MO_3) preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) << BM_RCTL_MO_SHIFT); if (mreg & E1000_RCTL_BAM) preg |= BM_RCTL_BAM; if (mreg & E1000_RCTL_PMCF) preg |= BM_RCTL_PMCF; mreg = E1000_READ_REG(hw, E1000_CTRL); if (mreg & E1000_CTRL_RFCE) preg |= BM_RCTL_RFCE; e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); /* enable PHY wakeup in MAC register */ E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN | E1000_WUC_APME); E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); /* configure and enable PHY wakeup in PHY registers */ e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); /* activate PHY wakeup */ ret = hw->phy.ops.acquire(hw); if (ret) { printf("Could not acquire PHY\n"); return ret; } e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); if (ret) { printf("Could not read PHY page 769\n"); goto out; } preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); if (ret) printf("Could not set PHY Host Wakeup bit\n"); out: hw->phy.ops.release(hw); return ret; } static void em_if_led_func(if_ctx_t ctx, int onoff) { struct adapter *adapter = iflib_get_softc(ctx); if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); } else { e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } } /* * Disable the L0S and L1 LINK states */ static void em_disable_aspm(struct adapter *adapter) { int base, reg; u16 link_cap,link_ctrl; device_t dev = adapter->dev; switch (adapter->hw.mac.type) { case e1000_82573: case e1000_82574: case e1000_82583: break; default: return; } if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0) return; reg = base + PCIER_LINK_CAP; link_cap = pci_read_config(dev, reg, 2); if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0) return; reg = base + PCIER_LINK_CTL; link_ctrl = pci_read_config(dev, reg, 2); link_ctrl &= ~PCIEM_LINK_CTL_ASPMC; pci_write_config(dev, reg, link_ctrl, 2); return; } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void em_update_stats_counters(struct adapter *adapter) { u64 prev_xoffrxc = adapter->stats.xoffrxc; if(adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); } adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); /* ** For watchdog management we need to know if we have been ** paused during the last interval, so capture that here. */ if (adapter->stats.xoffrxc != prev_xoffrxc) adapter->shared->isc_pause_frames = 1; adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32); adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32); adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); /* Interrupt Counts */ adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC); adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC); adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC); adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC); adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC); adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC); adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC); adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC); adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC); if (adapter->hw.mac.type >= e1000_82543) { adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, E1000_RXERRC); adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, E1000_TNCRS); adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, E1000_CEXTERR); adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, E1000_TSCTC); adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, E1000_TSCTFC); } } static uint64_t em_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_COLLISIONS: return (adapter->stats.colc); case IFCOUNTER_IERRORS: return (adapter->dropped_pkts + adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc + adapter->stats.mpc + adapter->stats.cexterr); case IFCOUNTER_OERRORS: return (adapter->stats.ecol + adapter->stats.latecol + adapter->watchdog_events); default: return (if_get_counter_default(ifp, cnt)); } } /* em_if_needs_restart - Tell iflib when the driver needs to be reinitialized * @ctx: iflib context * @event: event code to check * * Defaults to returning true for unknown events. * * @returns true if iflib needs to reinit the interface */ static bool em_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event) { switch (event) { case IFLIB_RESTART_VLAN_CONFIG: default: return (true); } } /* Export a single 32-bit register via a read-only sysctl. */ static int em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; u_int val; adapter = oidp->oid_arg1; val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* * Add sysctl variables, one per statistic, to the system. */ static void em_add_hw_stats(struct adapter *adapter) { device_t dev = iflib_get_dev(adapter->ctx); struct em_tx_queue *tx_que = adapter->tx_queues; struct em_rx_queue *rx_que = adapter->rx_queues; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = &adapter->stats; struct sysctl_oid *stat_node, *queue_node, *int_node; struct sysctl_oid_list *stat_list, *queue_list, *int_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSI-X IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &adapter->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, adapter, E1000_CTRL, em_sysctl_reg_handler, "IU", "Device Control Register"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, adapter, E1000_RCTL, em_sysctl_reg_handler, "IU", "Receiver Control Register"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); for (int i = 0; i < adapter->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, adapter, E1000_TDH(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, adapter, E1000_TDT(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq", CTLFLAG_RD, &txr->tx_irq, "Queue MSI-X Transmit Interrupts"); } for (int j = 0; j < adapter->rx_num_queues; j++, rx_que++) { struct rx_ring *rxr = &rx_que->rxr; snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", j); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, adapter, E1000_RDH(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, adapter, E1000_RDT(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq", CTLFLAG_RD, &rxr->rx_irq, "Queue MSI-X Receive Interrupts"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &adapter->stats.symerrs, "Symbol Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &adapter->stats.sec, "Sequence Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &adapter->stats.dc, "Defer Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &adapter->stats.mpc, "Missed Packets"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &adapter->stats.rnbc, "Receive No Buffers"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &adapter->stats.ruc, "Receive Undersize"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &adapter->stats.rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &adapter->stats.roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &adapter->stats.rjc, "Recevied Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &adapter->stats.rxerrc, "Receive Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &adapter->stats.crcerrs, "CRC errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &adapter->stats.algnerrc, "Alignment Errors"); /* On 82575 these are collision counts */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &adapter->stats.cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &adapter->stats.xonrxc, "XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &adapter->stats.xontxc, "XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &adapter->stats.xoffrxc, "XOFF Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &adapter->stats.xofftxc, "XOFF Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &adapter->stats.tpr, "Total Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &adapter->stats.gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &adapter->stats.prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &adapter->stats.prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &adapter->stats.prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &adapter->stats.prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &adapter->stats.prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &adapter->stats.gorc, "Good Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &adapter->stats.gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &adapter->stats.tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &adapter->stats.gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &adapter->stats.bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &adapter->stats.mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &adapter->stats.ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &adapter->stats.ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &adapter->stats.ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &adapter->stats.ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &adapter->stats.ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &adapter->stats.tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &adapter->stats.tsctfc, "TSO Contexts Failed"); /* Interrupt Stats */ int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Interrupt Statistics"); int_list = SYSCTL_CHILDREN(int_node); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts", CTLFLAG_RD, &adapter->stats.iac, "Interrupt Assertion Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", CTLFLAG_RD, &adapter->stats.icrxptc, "Interrupt Cause Rx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", CTLFLAG_RD, &adapter->stats.icrxatc, "Interrupt Cause Rx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", CTLFLAG_RD, &adapter->stats.ictxptc, "Interrupt Cause Tx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", CTLFLAG_RD, &adapter->stats.ictxatc, "Interrupt Cause Tx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", CTLFLAG_RD, &adapter->stats.ictxqec, "Interrupt Cause Tx Queue Empty Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", CTLFLAG_RD, &adapter->stats.ictxqmtc, "Interrupt Cause Tx Queue Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", CTLFLAG_RD, &adapter->stats.icrxdmtc, "Interrupt Cause Rx Desc Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun", CTLFLAG_RD, &adapter->stats.icrxoc, "Interrupt Cause Receiver Overrun Count"); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) em_print_nvm_info(adapter); return (error); } static void em_print_nvm_info(struct adapter *adapter) { u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } printf("\n"); } static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { struct em_int_delay_info *info; struct adapter *adapter; u32 regval; int error, usecs, ticks; info = (struct em_int_delay_info *) arg1; usecs = info->value; error = sysctl_handle_int(oidp, &usecs, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) return (EINVAL); info->value = usecs; ticks = EM_USECS_TO_TICKS(usecs); if (info->offset == E1000_ITR) /* units are 256ns here */ ticks *= 4; adapter = info->adapter; regval = E1000_READ_OFFSET(&adapter->hw, info->offset); regval = (regval & ~0xffff) | (ticks & 0xffff); /* Handle a few special cases. */ switch (info->offset) { case E1000_RDTR: break; case E1000_TIDV: if (ticks == 0) { adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; /* Don't write 0 into the TIDV register. */ regval++; } else adapter->txd_cmd |= E1000_TXD_CMD_IDE; break; } E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); return (0); } static void em_add_int_delay_sysctl(struct adapter *adapter, const char *name, const char *description, struct em_int_delay_info *info, int offset, int value) { info->adapter = adapter; info->offset = offset; info->value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, info, 0, em_sysctl_int_delay, "I", description); } /* * Set flow control using sysctl: * Flow control values: * 0 - off * 1 - rx pause * 2 - tx pause * 3 - full */ static int em_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error; static int input = 3; /* default is full */ struct adapter *adapter = (struct adapter *) arg1; error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (input == adapter->fc) /* no change? */ return (error); switch (input) { case e1000_fc_rx_pause: case e1000_fc_tx_pause: case e1000_fc_full: case e1000_fc_none: adapter->hw.fc.requested_mode = input; adapter->fc = input; break; default: /* Do nothing */ return (error); } adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; e1000_force_mac_fc(&adapter->hw); return (error); } /* * Manage Energy Efficient Ethernet: * Control values: * 0/1 - enabled/disabled */ static int em_sysctl_eee(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error, value; value = adapter->hw.dev_spec.ich8lan.eee_disable; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0); em_if_init(adapter->ctx); return (0); } static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { adapter = (struct adapter *) arg1; em_print_debug_info(adapter); } return (error); } static int em_get_rs(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error; int result; result = 0; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr || result != 1) return (error); em_dump_rs(adapter); return (error); } static void em_if_debug(if_ctx_t ctx) { em_dump_rs(iflib_get_softc(ctx)); } /* * This routine is meant to be fluid, add whatever is * needed for debugging a problem. -jfv */ static void em_print_debug_info(struct adapter *adapter) { device_t dev = iflib_get_dev(adapter->ctx); struct ifnet *ifp = iflib_get_ifp(adapter->ctx); struct tx_ring *txr = &adapter->tx_queues->txr; struct rx_ring *rxr = &adapter->rx_queues->rxr; if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) printf("Interface is RUNNING "); else printf("Interface is NOT RUNNING\n"); if (if_getdrvflags(ifp) & IFF_DRV_OACTIVE) printf("and INACTIVE\n"); else printf("and ACTIVE\n"); for (int i = 0; i < adapter->tx_num_queues; i++, txr++) { device_printf(dev, "TX Queue %d ------\n", i); device_printf(dev, "hw tdh = %d, hw tdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_TDH(i)), E1000_READ_REG(&adapter->hw, E1000_TDT(i))); } for (int j=0; j < adapter->rx_num_queues; j++, rxr++) { device_printf(dev, "RX Queue %d ------\n", j); device_printf(dev, "hw rdh = %d, hw rdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_RDH(j)), E1000_READ_REG(&adapter->hw, E1000_RDT(j))); } } /* * 82574 only: * Write a new value to the EEPROM increasing the number of MSI-X * vectors from 3 to 5, for proper multiqueue support. */ static void em_enable_vectors_82574(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; device_t dev = iflib_get_dev(ctx); u16 edata; e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); if (bootverbose) device_printf(dev, "EM_NVM_PCIE_CTRL = %#06x\n", edata); if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) { device_printf(dev, "Writing to eeprom: increasing " "reported MSI-X vectors from 3 to 5...\n"); edata &= ~(EM_NVM_MSIX_N_MASK); edata |= 4 << EM_NVM_MSIX_N_SHIFT; e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); e1000_update_nvm_checksum(hw); device_printf(dev, "Writing to eeprom: done\n"); } } diff --git a/sys/dev/e1000/igb_txrx.c b/sys/dev/e1000/igb_txrx.c index 6c41d440c769..9f1921bf0c7e 100644 --- a/sys/dev/e1000/igb_txrx.c +++ b/sys/dev/e1000/igb_txrx.c @@ -1,580 +1,578 @@ /*- * Copyright (c) 2016 Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #include "if_em.h" #ifdef RSS #include #include #endif #ifdef VERBOSE_DEBUG #define DPRINTF device_printf #else #define DPRINTF(...) #endif /********************************************************************* * Local Function prototypes *********************************************************************/ static int igb_isc_txd_encap(void *arg, if_pkt_info_t pi); static void igb_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx); static int igb_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear); static void igb_isc_rxd_refill(void *arg, if_rxd_update_t iru); static void igb_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx); static int igb_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget); static int igb_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); static int igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status); static int igb_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status); static void igb_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype); static int igb_determine_rsstype(u16 pkt_info); extern void igb_if_enable_intr(if_ctx_t ctx); extern int em_intr(void *arg); struct if_txrx igb_txrx = { .ift_txd_encap = igb_isc_txd_encap, .ift_txd_flush = igb_isc_txd_flush, .ift_txd_credits_update = igb_isc_txd_credits_update, .ift_rxd_available = igb_isc_rxd_available, .ift_rxd_pkt_get = igb_isc_rxd_pkt_get, .ift_rxd_refill = igb_isc_rxd_refill, .ift_rxd_flush = igb_isc_rxd_flush, .ift_legacy_intr = em_intr }; -extern if_shared_ctx_t em_sctx; - /********************************************************************** * * Setup work for hardware segmentation offload (TSO) on * adapters using advanced tx descriptors * **********************************************************************/ static int igb_tso_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status) { struct e1000_adv_tx_context_desc *TXD; struct adapter *adapter = txr->adapter; u32 type_tucmd_mlhl = 0, vlan_macip_lens = 0; u32 mss_l4len_idx = 0; u32 paylen; switch(pi->ipi_etype) { case ETHERTYPE_IPV6: type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; case ETHERTYPE_IP: type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; /* Tell transmit desc to also do IPv4 checksum. */ *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; break; default: panic("%s: CSUM_TSO but no supported IP version (0x%04x)", __func__, ntohs(pi->ipi_etype)); break; } TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx]; /* This is used in the transmit desc in encap */ paylen = pi->ipi_len - pi->ipi_ehdrlen - pi->ipi_ip_hlen - pi->ipi_tcp_hlen; /* VLAN MACLEN IPLEN */ if (pi->ipi_mflags & M_VLANTAG) { vlan_macip_lens |= (pi->ipi_vtag << E1000_ADVTXD_VLAN_SHIFT); } vlan_macip_lens |= pi->ipi_ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= pi->ipi_ip_hlen; TXD->vlan_macip_lens = htole32(vlan_macip_lens); /* ADV DTYPE TUCMD */ type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); /* MSS L4LEN IDX */ mss_l4len_idx |= (pi->ipi_tso_segsz << E1000_ADVTXD_MSS_SHIFT); mss_l4len_idx |= (pi->ipi_tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT); /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) mss_l4len_idx |= txr->me << 4; TXD->mss_l4len_idx = htole32(mss_l4len_idx); TXD->seqnum_seed = htole32(0); *cmd_type_len |= E1000_ADVTXD_DCMD_TSE; *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT; return (1); } /********************************************************************* * * Advanced Context Descriptor setup for VLAN, CSUM or TSO * **********************************************************************/ static int igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 *olinfo_status) { struct e1000_adv_tx_context_desc *TXD; struct adapter *adapter = txr->adapter; u32 vlan_macip_lens, type_tucmd_mlhl; u32 mss_l4len_idx; mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0; /* First check if TSO is to be used */ if (pi->ipi_csum_flags & CSUM_TSO) return (igb_tso_setup(txr, pi, cmd_type_len, olinfo_status)); /* Indicate the whole packet as payload when not doing TSO */ *olinfo_status |= pi->ipi_len << E1000_ADVTXD_PAYLEN_SHIFT; /* Now ready a context descriptor */ TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[pi->ipi_pidx]; /* ** In advanced descriptors the vlan tag must ** be placed into the context descriptor. Hence ** we need to make one even if not doing offloads. */ if (pi->ipi_mflags & M_VLANTAG) { vlan_macip_lens |= (pi->ipi_vtag << E1000_ADVTXD_VLAN_SHIFT); } else if ((pi->ipi_csum_flags & IGB_CSUM_OFFLOAD) == 0) { return (0); } /* Set the ether header length */ vlan_macip_lens |= pi->ipi_ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; switch(pi->ipi_etype) { case ETHERTYPE_IP: type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; break; case ETHERTYPE_IPV6: type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; default: break; } vlan_macip_lens |= pi->ipi_ip_hlen; type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; switch (pi->ipi_ipproto) { case IPPROTO_TCP: if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) { type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; } break; case IPPROTO_UDP: if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) { type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; } break; case IPPROTO_SCTP: if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) { type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP; *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; } break; default: break; } /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) mss_l4len_idx = txr->me << 4; /* Now copy bits into descriptor */ TXD->vlan_macip_lens = htole32(vlan_macip_lens); TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); TXD->seqnum_seed = htole32(0); TXD->mss_l4len_idx = htole32(mss_l4len_idx); return (1); } static int igb_isc_txd_encap(void *arg, if_pkt_info_t pi) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; int nsegs = pi->ipi_nsegs; bus_dma_segment_t *segs = pi->ipi_segs; union e1000_adv_tx_desc *txd = NULL; int i, j, pidx_last; u32 olinfo_status, cmd_type_len, txd_flags; qidx_t ntxd; pidx_last = olinfo_status = 0; /* Basic descriptor defines */ cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT); if (pi->ipi_mflags & M_VLANTAG) cmd_type_len |= E1000_ADVTXD_DCMD_VLE; i = pi->ipi_pidx; ntxd = scctx->isc_ntxd[0]; txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_ADVTXD_DCMD_RS : 0; /* Consume the first descriptor */ i += igb_tx_ctx_setup(txr, pi, &cmd_type_len, &olinfo_status); if (i == scctx->isc_ntxd[0]) i = 0; /* 82575 needs the queue index added */ if (sc->hw.mac.type == e1000_82575) olinfo_status |= txr->me << 4; for (j = 0; j < nsegs; j++) { bus_size_t seglen; bus_addr_t segaddr; txd = (union e1000_adv_tx_desc *)&txr->tx_base[i]; seglen = segs[j].ds_len; segaddr = htole64(segs[j].ds_addr); txd->read.buffer_addr = segaddr; txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS | cmd_type_len | seglen); txd->read.olinfo_status = htole32(olinfo_status); pidx_last = i; if (++i == scctx->isc_ntxd[0]) { i = 0; } } if (txd_flags) { txr->tx_rsq[txr->tx_rs_pidx] = pidx_last; txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1); MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx); } txd->read.cmd_type_len |= htole32(E1000_TXD_CMD_EOP | txd_flags); pi->ipi_new_pidx = i; return (0); } static void igb_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx) { struct adapter *adapter = arg; struct em_tx_queue *que = &adapter->tx_queues[txqid]; struct tx_ring *txr = &que->txr; E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), pidx); } static int igb_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear) { struct adapter *adapter = arg; if_softc_ctx_t scctx = adapter->shared; struct em_tx_queue *que = &adapter->tx_queues[txqid]; struct tx_ring *txr = &que->txr; qidx_t processed = 0; int updated; qidx_t cur, prev, ntxd, rs_cidx; int32_t delta; uint8_t status; rs_cidx = txr->tx_rs_cidx; if (rs_cidx == txr->tx_rs_pidx) return (0); cur = txr->tx_rsq[rs_cidx]; status = ((union e1000_adv_tx_desc *)&txr->tx_base[cur])->wb.status; updated = !!(status & E1000_TXD_STAT_DD); if (!updated) return (0); /* If clear is false just let caller know that there * are descriptors to reclaim */ if (!clear) return (1); prev = txr->tx_cidx_processed; ntxd = scctx->isc_ntxd[0]; do { MPASS(prev != cur); delta = (int32_t)cur - (int32_t)prev; if (delta < 0) delta += ntxd; MPASS(delta > 0); processed += delta; prev = cur; rs_cidx = (rs_cidx + 1) & (ntxd-1); if (rs_cidx == txr->tx_rs_pidx) break; cur = txr->tx_rsq[rs_cidx]; status = ((union e1000_adv_tx_desc *)&txr->tx_base[cur])->wb.status; } while ((status & E1000_TXD_STAT_DD)); txr->tx_rs_cidx = rs_cidx; txr->tx_cidx_processed = prev; return (processed); } static void igb_isc_rxd_refill(void *arg, if_rxd_update_t iru) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; uint16_t rxqid = iru->iru_qsidx; struct em_rx_queue *que = &sc->rx_queues[rxqid]; union e1000_adv_rx_desc *rxd; struct rx_ring *rxr = &que->rxr; uint64_t *paddrs; uint32_t next_pidx, pidx; uint16_t count; int i; paddrs = iru->iru_paddrs; pidx = iru->iru_pidx; count = iru->iru_count; for (i = 0, next_pidx = pidx; i < count; i++) { rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[next_pidx]; rxd->read.pkt_addr = htole64(paddrs[i]); if (++next_pidx == scctx->isc_nrxd[0]) next_pidx = 0; } } static void igb_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx) { struct adapter *sc = arg; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx); } static int igb_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; union e1000_adv_rx_desc *rxd; u32 staterr = 0; int cnt, i; for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[i]; staterr = le32toh(rxd->wb.upper.status_error); if ((staterr & E1000_RXD_STAT_DD) == 0) break; if (++i == scctx->isc_nrxd[0]) i = 0; if (staterr & E1000_RXD_STAT_EOP) cnt++; } return (cnt); } /**************************************************************** * Routine sends data which has been dma'ed into host memory * to upper layer. Initialize ri structure. * * Returns 0 upon success, errno on failure ***************************************************************/ static int igb_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct adapter *adapter = arg; if_softc_ctx_t scctx = adapter->shared; struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; struct ifnet *ifp = iflib_get_ifp(adapter->ctx); union e1000_adv_rx_desc *rxd; u16 pkt_info, len; u16 vtag = 0; u32 ptype; u32 staterr = 0; bool eop; int i = 0; int cidx = ri->iri_cidx; do { rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[cidx]; staterr = le32toh(rxd->wb.upper.status_error); pkt_info = le16toh(rxd->wb.lower.lo_dword.hs_rss.pkt_info); MPASS ((staterr & E1000_RXD_STAT_DD) != 0); len = le16toh(rxd->wb.upper.length); ptype = le32toh(rxd->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK; ri->iri_len += len; rxr->rx_bytes += ri->iri_len; rxd->wb.upper.status_error = 0; eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP); if (((adapter->hw.mac.type == e1000_i350) || (adapter->hw.mac.type == e1000_i354)) && (staterr & E1000_RXDEXT_STATERR_LB)) vtag = be16toh(rxd->wb.upper.vlan); else vtag = le16toh(rxd->wb.upper.vlan); /* Make sure bad packets are discarded */ if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) { adapter->dropped_pkts++; ++rxr->rx_discarded; return (EBADMSG); } ri->iri_frags[i].irf_flid = 0; ri->iri_frags[i].irf_idx = cidx; ri->iri_frags[i].irf_len = len; if (++cidx == scctx->isc_nrxd[0]) cidx = 0; #ifdef notyet if (rxr->hdr_split == TRUE) { ri->iri_frags[i].irf_flid = 1; ri->iri_frags[i].irf_idx = cidx; if (++cidx == scctx->isc_nrxd[0]) cidx = 0; } #endif i++; } while (!eop); rxr->rx_packets++; if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) igb_rx_checksum(staterr, ri, ptype); if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (staterr & E1000_RXD_STAT_VP) != 0) { ri->iri_vtag = vtag; ri->iri_flags |= M_VLANTAG; } ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss); ri->iri_rsstype = igb_determine_rsstype(pkt_info); ri->iri_nfrags = i; return (0); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void igb_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype) { u16 status = (u16)staterr; u8 errors = (u8) (staterr >> 24); bool sctp = FALSE; /* Ignore Checksum bit is set */ if (status & E1000_RXD_STAT_IXSM) { ri->iri_csum_flags = 0; return; } if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0) sctp = 1; else sctp = 0; if (status & E1000_RXD_STAT_IPCS) { /* Did it pass? */ if (!(errors & E1000_RXD_ERR_IPE)) { /* IP Checksum Good */ ri->iri_csum_flags = CSUM_IP_CHECKED; ri->iri_csum_flags |= CSUM_IP_VALID; } else ri->iri_csum_flags = 0; } if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); if (sctp) /* reassign */ type = CSUM_SCTP_VALID; /* Did it pass? */ if (!(errors & E1000_RXD_ERR_TCPE)) { ri->iri_csum_flags |= type; if (sctp == 0) ri->iri_csum_data = htons(0xffff); } } return; } /******************************************************************** * * Parse the packet type to determine the appropriate hash * ******************************************************************/ static int igb_determine_rsstype(u16 pkt_info) { switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { case E1000_RXDADV_RSSTYPE_IPV4_TCP: return M_HASHTYPE_RSS_TCP_IPV4; case E1000_RXDADV_RSSTYPE_IPV4: return M_HASHTYPE_RSS_IPV4; case E1000_RXDADV_RSSTYPE_IPV6_TCP: return M_HASHTYPE_RSS_TCP_IPV6; case E1000_RXDADV_RSSTYPE_IPV6_EX: return M_HASHTYPE_RSS_IPV6_EX; case E1000_RXDADV_RSSTYPE_IPV6: return M_HASHTYPE_RSS_IPV6; case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: return M_HASHTYPE_RSS_TCP_IPV6_EX; default: return M_HASHTYPE_OPAQUE; } } diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c index 9f3674cdab5d..6e65f6bae55a 100644 --- a/sys/dev/ixgbe/if_ix.c +++ b/sys/dev/ixgbe/if_ix.c @@ -1,4605 +1,4603 @@ /****************************************************************************** Copyright (c) 2001-2017, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #include "ixgbe.h" #include "ixgbe_sriov.h" #include "ifdi_if.h" #include #include /************************************************************************ * Driver version ************************************************************************/ char ixgbe_driver_version[] = "4.0.1-k"; /************************************************************************ * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into ixgbe_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } ************************************************************************/ static pci_vendor_info_t ixgbe_vendor_info_array[] = { PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_QSFP_SF_QP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T1, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KR, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KX4, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_10G_T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_1G_T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_SFP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_KR, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_KR_L, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SFP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SFP_N, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SGMII, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SGMII_L, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_10G_T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_1G_T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_1G_T_L, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_BYPASS, "Intel(R) PRO/10GbE PCI-Express Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BYPASS, "Intel(R) PRO/10GbE PCI-Express Network Driver"), /* required last entry */ PVID_END }; static void *ixgbe_register(device_t dev); static int ixgbe_if_attach_pre(if_ctx_t ctx); static int ixgbe_if_attach_post(if_ctx_t ctx); static int ixgbe_if_detach(if_ctx_t ctx); static int ixgbe_if_shutdown(if_ctx_t ctx); static int ixgbe_if_suspend(if_ctx_t ctx); static int ixgbe_if_resume(if_ctx_t ctx); static void ixgbe_if_stop(if_ctx_t ctx); void ixgbe_if_enable_intr(if_ctx_t ctx); static void ixgbe_if_disable_intr(if_ctx_t ctx); static void ixgbe_link_intr_enable(if_ctx_t ctx); static int ixgbe_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid); static void ixgbe_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr); static int ixgbe_if_media_change(if_ctx_t ctx); static int ixgbe_if_msix_intr_assign(if_ctx_t, int); static int ixgbe_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void ixgbe_if_crcstrip_set(if_ctx_t ctx, int onoff, int strip); static void ixgbe_if_multi_set(if_ctx_t ctx); static int ixgbe_if_promisc_set(if_ctx_t ctx, int flags); static int ixgbe_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets); static int ixgbe_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets); static void ixgbe_if_queues_free(if_ctx_t ctx); static void ixgbe_if_timer(if_ctx_t ctx, uint16_t); static void ixgbe_if_update_admin_status(if_ctx_t ctx); static void ixgbe_if_vlan_register(if_ctx_t ctx, u16 vtag); static void ixgbe_if_vlan_unregister(if_ctx_t ctx, u16 vtag); static int ixgbe_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req); static bool ixgbe_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event); int ixgbe_intr(void *arg); /************************************************************************ * Function prototypes ************************************************************************/ #if __FreeBSD_version >= 1100036 static uint64_t ixgbe_if_get_counter(if_ctx_t, ift_counter); #endif static void ixgbe_enable_queue(struct adapter *adapter, u32 vector); static void ixgbe_disable_queue(struct adapter *adapter, u32 vector); static void ixgbe_add_device_sysctls(if_ctx_t ctx); static int ixgbe_allocate_pci_resources(if_ctx_t ctx); static int ixgbe_setup_low_power_mode(if_ctx_t ctx); static void ixgbe_config_dmac(struct adapter *adapter); static void ixgbe_configure_ivars(struct adapter *adapter); static void ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type); static u8 *ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); static bool ixgbe_sfp_probe(if_ctx_t ctx); static void ixgbe_free_pci_resources(if_ctx_t ctx); static int ixgbe_msix_link(void *arg); static int ixgbe_msix_que(void *arg); static void ixgbe_initialize_rss_mapping(struct adapter *adapter); static void ixgbe_initialize_receive_units(if_ctx_t ctx); static void ixgbe_initialize_transmit_units(if_ctx_t ctx); static int ixgbe_setup_interface(if_ctx_t ctx); static void ixgbe_init_device_features(struct adapter *adapter); static void ixgbe_check_fan_failure(struct adapter *, u32, bool); static void ixgbe_add_media_types(if_ctx_t ctx); static void ixgbe_update_stats_counters(struct adapter *adapter); static void ixgbe_config_link(if_ctx_t ctx); static void ixgbe_get_slot_info(struct adapter *); static void ixgbe_check_wol_support(struct adapter *adapter); static void ixgbe_enable_rx_drop(struct adapter *); static void ixgbe_disable_rx_drop(struct adapter *); static void ixgbe_add_hw_stats(struct adapter *adapter); static int ixgbe_set_flowcntl(struct adapter *, int); static int ixgbe_set_advertise(struct adapter *, int); static int ixgbe_get_advertise(struct adapter *); static void ixgbe_setup_vlan_hw_support(if_ctx_t ctx); static void ixgbe_config_gpie(struct adapter *adapter); static void ixgbe_config_delay_values(struct adapter *adapter); /* Sysctl handlers */ static int ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS); #ifdef IXGBE_DEBUG static int ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS); #endif static int ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_eee_state(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS); /* Deferred interrupt tasklets */ static void ixgbe_handle_msf(void *); static void ixgbe_handle_mod(void *); static void ixgbe_handle_phy(void *); /************************************************************************ * FreeBSD Device Interface Entry Points ************************************************************************/ static device_method_t ix_methods[] = { /* Device interface */ DEVMETHOD(device_register, ixgbe_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), #ifdef PCI_IOV DEVMETHOD(pci_iov_init, iflib_device_iov_init), DEVMETHOD(pci_iov_uninit, iflib_device_iov_uninit), DEVMETHOD(pci_iov_add_vf, iflib_device_iov_add_vf), #endif /* PCI_IOV */ DEVMETHOD_END }; static driver_t ix_driver = { "ix", ix_methods, sizeof(struct adapter), }; devclass_t ix_devclass; DRIVER_MODULE(ix, pci, ix_driver, ix_devclass, 0, 0); IFLIB_PNP_INFO(pci, ix_driver, ixgbe_vendor_info_array); MODULE_DEPEND(ix, pci, 1, 1, 1); MODULE_DEPEND(ix, ether, 1, 1, 1); MODULE_DEPEND(ix, iflib, 1, 1, 1); static device_method_t ixgbe_if_methods[] = { DEVMETHOD(ifdi_attach_pre, ixgbe_if_attach_pre), DEVMETHOD(ifdi_attach_post, ixgbe_if_attach_post), DEVMETHOD(ifdi_detach, ixgbe_if_detach), DEVMETHOD(ifdi_shutdown, ixgbe_if_shutdown), DEVMETHOD(ifdi_suspend, ixgbe_if_suspend), DEVMETHOD(ifdi_resume, ixgbe_if_resume), DEVMETHOD(ifdi_init, ixgbe_if_init), DEVMETHOD(ifdi_stop, ixgbe_if_stop), DEVMETHOD(ifdi_msix_intr_assign, ixgbe_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, ixgbe_if_enable_intr), DEVMETHOD(ifdi_intr_disable, ixgbe_if_disable_intr), DEVMETHOD(ifdi_link_intr_enable, ixgbe_link_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, ixgbe_if_rx_queue_intr_enable), DEVMETHOD(ifdi_rx_queue_intr_enable, ixgbe_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queues_alloc, ixgbe_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, ixgbe_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, ixgbe_if_queues_free), DEVMETHOD(ifdi_update_admin_status, ixgbe_if_update_admin_status), DEVMETHOD(ifdi_multi_set, ixgbe_if_multi_set), DEVMETHOD(ifdi_mtu_set, ixgbe_if_mtu_set), DEVMETHOD(ifdi_crcstrip_set, ixgbe_if_crcstrip_set), DEVMETHOD(ifdi_media_status, ixgbe_if_media_status), DEVMETHOD(ifdi_media_change, ixgbe_if_media_change), DEVMETHOD(ifdi_promisc_set, ixgbe_if_promisc_set), DEVMETHOD(ifdi_timer, ixgbe_if_timer), DEVMETHOD(ifdi_vlan_register, ixgbe_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, ixgbe_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, ixgbe_if_get_counter), DEVMETHOD(ifdi_i2c_req, ixgbe_if_i2c_req), DEVMETHOD(ifdi_needs_restart, ixgbe_if_needs_restart), #ifdef PCI_IOV DEVMETHOD(ifdi_iov_init, ixgbe_if_iov_init), DEVMETHOD(ifdi_iov_uninit, ixgbe_if_iov_uninit), DEVMETHOD(ifdi_iov_vf_add, ixgbe_if_iov_vf_add), #endif /* PCI_IOV */ DEVMETHOD_END }; /* * TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "IXGBE driver parameters"); static driver_t ixgbe_if_driver = { "ixgbe_if", ixgbe_if_methods, sizeof(struct adapter) }; static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY); SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second"); /* Flow control setting, default to full */ static int ixgbe_flow_control = ixgbe_fc_full; SYSCTL_INT(_hw_ix, OID_AUTO, flow_control, CTLFLAG_RDTUN, &ixgbe_flow_control, 0, "Default flow control used for all adapters"); /* Advertise Speed, default to 0 (auto) */ static int ixgbe_advertise_speed = 0; SYSCTL_INT(_hw_ix, OID_AUTO, advertise_speed, CTLFLAG_RDTUN, &ixgbe_advertise_speed, 0, "Default advertised speed for all adapters"); /* * Smart speed setting, default to on * this only works as a compile option * right now as its during attach, set * this to 'ixgbe_smart_speed_off' to * disable. */ static int ixgbe_smart_speed = ixgbe_smart_speed_on; /* * MSI-X should be the default for best performance, * but this allows it to be forced off for testing. */ static int ixgbe_enable_msix = 1; SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0, "Enable MSI-X interrupts"); /* * Defining this on will allow the use * of unsupported SFP+ modules, note that * doing so you are on your own :) */ static int allow_unsupported_sfp = FALSE; SYSCTL_INT(_hw_ix, OID_AUTO, unsupported_sfp, CTLFLAG_RDTUN, &allow_unsupported_sfp, 0, "Allow unsupported SFP modules...use at your own risk"); /* * Not sure if Flow Director is fully baked, * so we'll default to turning it off. */ static int ixgbe_enable_fdir = 0; SYSCTL_INT(_hw_ix, OID_AUTO, enable_fdir, CTLFLAG_RDTUN, &ixgbe_enable_fdir, 0, "Enable Flow Director"); /* Receive-Side Scaling */ static int ixgbe_enable_rss = 1; SYSCTL_INT(_hw_ix, OID_AUTO, enable_rss, CTLFLAG_RDTUN, &ixgbe_enable_rss, 0, "Enable Receive-Side Scaling (RSS)"); #if 0 /* Keep running tab on them for sanity check */ static int ixgbe_total_ports; #endif MALLOC_DEFINE(M_IXGBE, "ix", "ix driver allocations"); /* * For Flow Director: this is the number of TX packets we sample * for the filter pool, this means every 20th packet will be probed. * * This feature can be disabled by setting this to 0. */ static int atr_sample_rate = 20; extern struct if_txrx ixgbe_txrx; static struct if_shared_ctx ixgbe_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE,/* max(DBA_ALIGN, PAGE_SIZE) */ .isc_tx_maxsize = IXGBE_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = PAGE_SIZE, .isc_tso_maxsize = IXGBE_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = PAGE_SIZE, .isc_rx_maxsize = PAGE_SIZE*4, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = PAGE_SIZE*4, .isc_nfl = 1, .isc_ntxqs = 1, .isc_nrxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = ixgbe_vendor_info_array, .isc_driver_version = ixgbe_driver_version, .isc_driver = &ixgbe_if_driver, .isc_flags = IFLIB_TSO_INIT_IP, .isc_nrxd_min = {MIN_RXD}, .isc_ntxd_min = {MIN_TXD}, .isc_nrxd_max = {MAX_RXD}, .isc_ntxd_max = {MAX_TXD}, .isc_nrxd_default = {DEFAULT_RXD}, .isc_ntxd_default = {DEFAULT_TXD}, }; -if_shared_ctx_t ixgbe_sctx = &ixgbe_sctx_init; - /************************************************************************ * ixgbe_if_tx_queues_alloc ************************************************************************/ static int ixgbe_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct ix_tx_queue *que; int i, j, error; MPASS(adapter->num_tx_queues > 0); MPASS(adapter->num_tx_queues == ntxqsets); MPASS(ntxqs == 1); /* Allocate queue structure memory */ adapter->tx_queues = (struct ix_tx_queue *)malloc(sizeof(struct ix_tx_queue) * ntxqsets, M_IXGBE, M_NOWAIT | M_ZERO); if (!adapter->tx_queues) { device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); return (ENOMEM); } for (i = 0, que = adapter->tx_queues; i < ntxqsets; i++, que++) { struct tx_ring *txr = &que->txr; /* In case SR-IOV is enabled, align the index properly */ txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool, i); txr->adapter = que->adapter = adapter; /* Allocate report status array */ txr->tx_rsq = (qidx_t *)malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_IXGBE, M_NOWAIT | M_ZERO); if (txr->tx_rsq == NULL) { error = ENOMEM; goto fail; } for (j = 0; j < scctx->isc_ntxd[0]; j++) txr->tx_rsq[j] = QIDX_INVALID; /* get the virtual and physical address of the hardware queues */ txr->tail = IXGBE_TDT(txr->me); txr->tx_base = (union ixgbe_adv_tx_desc *)vaddrs[i]; txr->tx_paddr = paddrs[i]; txr->bytes = 0; txr->total_packets = 0; /* Set the rate at which we sample packets */ if (adapter->feat_en & IXGBE_FEATURE_FDIR) txr->atr_sample = atr_sample_rate; } device_printf(iflib_get_dev(ctx), "allocated for %d queues\n", adapter->num_tx_queues); return (0); fail: ixgbe_if_queues_free(ctx); return (error); } /* ixgbe_if_tx_queues_alloc */ /************************************************************************ * ixgbe_if_rx_queues_alloc ************************************************************************/ static int ixgbe_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_rx_queue *que; int i; MPASS(adapter->num_rx_queues > 0); MPASS(adapter->num_rx_queues == nrxqsets); MPASS(nrxqs == 1); /* Allocate queue structure memory */ adapter->rx_queues = (struct ix_rx_queue *)malloc(sizeof(struct ix_rx_queue)*nrxqsets, M_IXGBE, M_NOWAIT | M_ZERO); if (!adapter->rx_queues) { device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); return (ENOMEM); } for (i = 0, que = adapter->rx_queues; i < nrxqsets; i++, que++) { struct rx_ring *rxr = &que->rxr; /* In case SR-IOV is enabled, align the index properly */ rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool, i); rxr->adapter = que->adapter = adapter; /* get the virtual and physical address of the hw queues */ rxr->tail = IXGBE_RDT(rxr->me); rxr->rx_base = (union ixgbe_adv_rx_desc *)vaddrs[i]; rxr->rx_paddr = paddrs[i]; rxr->bytes = 0; rxr->que = que; } device_printf(iflib_get_dev(ctx), "allocated for %d rx queues\n", adapter->num_rx_queues); return (0); } /* ixgbe_if_rx_queues_alloc */ /************************************************************************ * ixgbe_if_queues_free ************************************************************************/ static void ixgbe_if_queues_free(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_tx_queue *tx_que = adapter->tx_queues; struct ix_rx_queue *rx_que = adapter->rx_queues; int i; if (tx_que != NULL) { for (i = 0; i < adapter->num_tx_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; if (txr->tx_rsq == NULL) break; free(txr->tx_rsq, M_IXGBE); txr->tx_rsq = NULL; } free(adapter->tx_queues, M_IXGBE); adapter->tx_queues = NULL; } if (rx_que != NULL) { free(adapter->rx_queues, M_IXGBE); adapter->rx_queues = NULL; } } /* ixgbe_if_queues_free */ /************************************************************************ * ixgbe_initialize_rss_mapping ************************************************************************/ static void ixgbe_initialize_rss_mapping(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 reta = 0, mrqc, rss_key[10]; int queue_id, table_size, index_mult; int i, j; u32 rss_hash_config; if (adapter->feat_en & IXGBE_FEATURE_RSS) { /* Fetch the configured RSS key */ rss_getkey((uint8_t *)&rss_key); } else { /* set up random bits */ arc4rand(&rss_key, sizeof(rss_key), 0); } /* Set multiplier for RETA setup and table size based on MAC */ index_mult = 0x1; table_size = 128; switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB: index_mult = 0x11; break; case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: table_size = 512; break; default: break; } /* Set up the redirection table */ for (i = 0, j = 0; i < table_size; i++, j++) { if (j == adapter->num_rx_queues) j = 0; if (adapter->feat_en & IXGBE_FEATURE_RSS) { /* * Fetch the RSS bucket id for the given indirection * entry. Cap it at the number of configured buckets * (which is num_rx_queues.) */ queue_id = rss_get_indirection_to_bucket(i); queue_id = queue_id % adapter->num_rx_queues; } else queue_id = (j * index_mult); /* * The low 8 bits are for hash value (n+0); * The next 8 bits are for hash value (n+1), etc. */ reta = reta >> 8; reta = reta | (((uint32_t)queue_id) << 24); if ((i & 3) == 3) { if (i < 128) IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); else IXGBE_WRITE_REG(hw, IXGBE_ERETA((i >> 2) - 32), reta); reta = 0; } } /* Now fill our hash function seeds */ for (i = 0; i < 10; i++) IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]); /* Perform hash on these packet types */ if (adapter->feat_en & IXGBE_FEATURE_RSS) rss_hash_config = rss_gethashconfig(); else { /* * Disable UDP - IP fragments aren't currently being handled * and so we end up with a mix of 2-tuple and 4-tuple * traffic. */ rss_hash_config = RSS_HASHTYPE_RSS_IPV4 | RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_IPV6_EX | RSS_HASHTYPE_RSS_TCP_IPV6_EX; } mrqc = IXGBE_MRQC_RSSEN; if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4; if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP; if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6; if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP; if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX; if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP; if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP; if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP; if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; mrqc |= ixgbe_get_mrqc(adapter->iov_mode); IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); } /* ixgbe_initialize_rss_mapping */ /************************************************************************ * ixgbe_initialize_receive_units - Setup receive registers and features. ************************************************************************/ #define BSIZEPKT_ROUNDUP ((1<shared; struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp = iflib_get_ifp(ctx); struct ix_rx_queue *que; int i, j; u32 bufsz, fctrl, srrctl, rxcsum; u32 hlreg; /* * Make sure receives are disabled while * setting up the descriptor ring */ ixgbe_disable_rx(hw); /* Enable broadcasts */ fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); fctrl |= IXGBE_FCTRL_BAM; if (adapter->hw.mac.type == ixgbe_mac_82598EB) { fctrl |= IXGBE_FCTRL_DPF; fctrl |= IXGBE_FCTRL_PMCF; } IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); /* Set for Jumbo Frames? */ hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); if (ifp->if_mtu > ETHERMTU) hlreg |= IXGBE_HLREG0_JUMBOEN; else hlreg &= ~IXGBE_HLREG0_JUMBOEN; IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); bufsz = (adapter->rx_mbuf_sz + BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; /* Setup the Base and Length of the Rx Descriptor Ring */ for (i = 0, que = adapter->rx_queues; i < adapter->num_rx_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; u64 rdba = rxr->rx_paddr; j = rxr->me; /* Setup the Base and Length of the Rx Descriptor Ring */ IXGBE_WRITE_REG(hw, IXGBE_RDBAL(j), (rdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_RDBAH(j), (rdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_RDLEN(j), scctx->isc_nrxd[0] * sizeof(union ixgbe_adv_rx_desc)); /* Set up the SRRCTL register */ srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(j)); srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; srrctl |= bufsz; srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; /* * Set DROP_EN iff we have no flow control and >1 queue. * Note that srrctl was cleared shortly before during reset, * so we do not need to clear the bit, but do it just in case * this code is moved elsewhere. */ if (adapter->num_rx_queues > 1 && adapter->hw.fc.requested_mode == ixgbe_fc_none) { srrctl |= IXGBE_SRRCTL_DROP_EN; } else { srrctl &= ~IXGBE_SRRCTL_DROP_EN; } IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(j), srrctl); /* Setup the HW Rx Head and Tail Descriptor Pointers */ IXGBE_WRITE_REG(hw, IXGBE_RDH(j), 0); IXGBE_WRITE_REG(hw, IXGBE_RDT(j), 0); /* Set the driver rx tail address */ rxr->tail = IXGBE_RDT(rxr->me); } if (adapter->hw.mac.type != ixgbe_mac_82598EB) { u32 psrtype = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR; IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype); } rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); ixgbe_initialize_rss_mapping(adapter); if (adapter->num_rx_queues > 1) { /* RSS and RX IPP Checksum are mutually exclusive */ rxcsum |= IXGBE_RXCSUM_PCSD; } if (ifp->if_capenable & IFCAP_RXCSUM) rxcsum |= IXGBE_RXCSUM_PCSD; /* This is useful for calculating UDP/IP fragment checksums */ if (!(rxcsum & IXGBE_RXCSUM_PCSD)) rxcsum |= IXGBE_RXCSUM_IPPCSE; IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); } /* ixgbe_initialize_receive_units */ /************************************************************************ * ixgbe_initialize_transmit_units - Enable transmit units. ************************************************************************/ static void ixgbe_initialize_transmit_units(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; if_softc_ctx_t scctx = adapter->shared; struct ix_tx_queue *que; int i; /* Setup the Base and Length of the Tx Descriptor Ring */ for (i = 0, que = adapter->tx_queues; i < adapter->num_tx_queues; i++, que++) { struct tx_ring *txr = &que->txr; u64 tdba = txr->tx_paddr; u32 txctrl = 0; int j = txr->me; IXGBE_WRITE_REG(hw, IXGBE_TDBAL(j), (tdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_TDBAH(j), (tdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_TDLEN(j), scctx->isc_ntxd[0] * sizeof(union ixgbe_adv_tx_desc)); /* Setup the HW Tx Head and Tail descriptor pointers */ IXGBE_WRITE_REG(hw, IXGBE_TDH(j), 0); IXGBE_WRITE_REG(hw, IXGBE_TDT(j), 0); /* Cache the tail address */ txr->tail = IXGBE_TDT(txr->me); txr->tx_rs_cidx = txr->tx_rs_pidx; txr->tx_cidx_processed = scctx->isc_ntxd[0] - 1; for (int k = 0; k < scctx->isc_ntxd[0]; k++) txr->tx_rsq[k] = QIDX_INVALID; /* Disable Head Writeback */ /* * Note: for X550 series devices, these registers are actually * prefixed with TPH_ isntead of DCA_, but the addresses and * fields remain the same. */ switch (hw->mac.type) { case ixgbe_mac_82598EB: txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(j)); break; default: txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(j)); break; } txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; switch (hw->mac.type) { case ixgbe_mac_82598EB: IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(j), txctrl); break; default: IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(j), txctrl); break; } } if (hw->mac.type != ixgbe_mac_82598EB) { u32 dmatxctl, rttdcs; dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL); dmatxctl |= IXGBE_DMATXCTL_TE; IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl); /* Disable arbiter to set MTQC */ rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS); rttdcs |= IXGBE_RTTDCS_ARBDIS; IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); IXGBE_WRITE_REG(hw, IXGBE_MTQC, ixgbe_get_mtqc(adapter->iov_mode)); rttdcs &= ~IXGBE_RTTDCS_ARBDIS; IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); } } /* ixgbe_initialize_transmit_units */ /************************************************************************ * ixgbe_register ************************************************************************/ static void * ixgbe_register(device_t dev) { - return (ixgbe_sctx); + return (&ixgbe_sctx_init); } /* ixgbe_register */ /************************************************************************ * ixgbe_if_attach_pre - Device initialization routine, part 1 * * Called when the driver is being loaded. * Identifies the type of hardware, initializes the hardware, * and initializes iflib structures. * * return 0 on success, positive on failure ************************************************************************/ static int ixgbe_if_attach_pre(if_ctx_t ctx) { struct adapter *adapter; device_t dev; if_softc_ctx_t scctx; struct ixgbe_hw *hw; int error = 0; u32 ctrl_ext; INIT_DEBUGOUT("ixgbe_attach: begin"); /* Allocate, clear, and link in our adapter structure */ dev = iflib_get_dev(ctx); adapter = iflib_get_softc(ctx); adapter->hw.back = adapter; adapter->ctx = ctx; adapter->dev = dev; scctx = adapter->shared = iflib_get_softc_ctx(ctx); adapter->media = iflib_get_media(ctx); hw = &adapter->hw; /* Determine hardware revision */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_get_revid(dev); hw->subsystem_vendor_id = pci_get_subvendor(dev); hw->subsystem_device_id = pci_get_subdevice(dev); /* Do base PCI setup - map BAR0 */ if (ixgbe_allocate_pci_resources(ctx)) { device_printf(dev, "Allocation of PCI resources failed\n"); return (ENXIO); } /* let hardware know driver is loaded */ ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); /* * Initialize the shared code */ if (ixgbe_init_shared_code(hw) != 0) { device_printf(dev, "Unable to initialize the shared code\n"); error = ENXIO; goto err_pci; } if (hw->mbx.ops.init_params) hw->mbx.ops.init_params(hw); hw->allow_unsupported_sfp = allow_unsupported_sfp; if (hw->mac.type != ixgbe_mac_82598EB) hw->phy.smart_speed = ixgbe_smart_speed; ixgbe_init_device_features(adapter); /* Enable WoL (if supported) */ ixgbe_check_wol_support(adapter); /* Verify adapter fan is still functional (if applicable) */ if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL) { u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); ixgbe_check_fan_failure(adapter, esdp, FALSE); } /* Ensure SW/FW semaphore is free */ ixgbe_init_swfw_semaphore(hw); /* Set an initial default flow control value */ hw->fc.requested_mode = ixgbe_flow_control; hw->phy.reset_if_overtemp = TRUE; error = ixgbe_reset_hw(hw); hw->phy.reset_if_overtemp = FALSE; if (error == IXGBE_ERR_SFP_NOT_PRESENT) { /* * No optics in this port, set up * so the timer routine will probe * for later insertion. */ adapter->sfp_probe = TRUE; error = 0; } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module detected!\n"); error = EIO; goto err_pci; } else if (error) { device_printf(dev, "Hardware initialization failed\n"); error = EIO; goto err_pci; } /* Make sure we have a good EEPROM before we read from it */ if (ixgbe_validate_eeprom_checksum(&adapter->hw, NULL) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_pci; } error = ixgbe_start_hw(hw); switch (error) { case IXGBE_ERR_EEPROM_VERSION: device_printf(dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues associated with your hardware.\nIf you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n"); break; case IXGBE_ERR_SFP_NOT_SUPPORTED: device_printf(dev, "Unsupported SFP+ Module\n"); error = EIO; goto err_pci; case IXGBE_ERR_SFP_NOT_PRESENT: device_printf(dev, "No SFP+ Module found\n"); /* falls thru */ default: break; } /* Most of the iflib initialization... */ iflib_set_mac(ctx, hw->mac.addr); switch (adapter->hw.mac.type) { case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: scctx->isc_rss_table_size = 512; scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 64; break; default: scctx->isc_rss_table_size = 128; scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 16; } /* Allow legacy interrupts */ ixgbe_txrx.ift_legacy_intr = ixgbe_intr; scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(union ixgbe_adv_tx_desc) + sizeof(u32), DBA_ALIGN), scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); /* XXX */ scctx->isc_tx_csum_flags = CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_TSO; if (adapter->hw.mac.type == ixgbe_mac_82598EB) { scctx->isc_tx_nsegments = IXGBE_82598_SCATTER; } else { scctx->isc_tx_csum_flags |= CSUM_SCTP |CSUM_IP6_SCTP; scctx->isc_tx_nsegments = IXGBE_82599_SCATTER; } scctx->isc_msix_bar = pci_msix_table_bar(dev); scctx->isc_tx_tso_segments_max = scctx->isc_tx_nsegments; scctx->isc_tx_tso_size_max = IXGBE_TSO_SIZE; scctx->isc_tx_tso_segsize_max = PAGE_SIZE; scctx->isc_txrx = &ixgbe_txrx; scctx->isc_capabilities = scctx->isc_capenable = IXGBE_CAPS; return (0); err_pci: ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext); ixgbe_free_pci_resources(ctx); return (error); } /* ixgbe_if_attach_pre */ /********************************************************************* * ixgbe_if_attach_post - Device initialization routine, part 2 * * Called during driver load, but after interrupts and * resources have been allocated and configured. * Sets up some data structures not relevant to iflib. * * return 0 on success, positive on failure *********************************************************************/ static int ixgbe_if_attach_post(if_ctx_t ctx) { device_t dev; struct adapter *adapter; struct ixgbe_hw *hw; int error = 0; dev = iflib_get_dev(ctx); adapter = iflib_get_softc(ctx); hw = &adapter->hw; if (adapter->intr_type == IFLIB_INTR_LEGACY && (adapter->feat_cap & IXGBE_FEATURE_LEGACY_IRQ) == 0) { device_printf(dev, "Device does not support legacy interrupts"); error = ENXIO; goto err; } /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(*adapter->mta) * MAX_NUM_MULTICAST_ADDRESSES, M_IXGBE, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err; } /* hw.ix defaults init */ ixgbe_set_advertise(adapter, ixgbe_advertise_speed); /* Enable the optics for 82599 SFP+ fiber */ ixgbe_enable_tx_laser(hw); /* Enable power to the phy. */ ixgbe_set_phy_power(hw, TRUE); ixgbe_initialize_iov(adapter); error = ixgbe_setup_interface(ctx); if (error) { device_printf(dev, "Interface setup failed: %d\n", error); goto err; } ixgbe_if_update_admin_status(ctx); /* Initialize statistics */ ixgbe_update_stats_counters(adapter); ixgbe_add_hw_stats(adapter); /* Check PCIE slot type/speed/width */ ixgbe_get_slot_info(adapter); /* * Do time init and sysctl init here, but * only on the first port of a bypass adapter. */ ixgbe_bypass_init(adapter); /* Set an initial dmac value */ adapter->dmac = 0; /* Set initial advertised speeds (if applicable) */ adapter->advertise = ixgbe_get_advertise(adapter); if (adapter->feat_cap & IXGBE_FEATURE_SRIOV) ixgbe_define_iov_schemas(dev, &error); /* Add sysctls */ ixgbe_add_device_sysctls(ctx); return (0); err: return (error); } /* ixgbe_if_attach_post */ /************************************************************************ * ixgbe_check_wol_support * * Checks whether the adapter's ports are capable of * Wake On LAN by reading the adapter's NVM. * * Sets each port's hw->wol_enabled value depending * on the value read here. ************************************************************************/ static void ixgbe_check_wol_support(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u16 dev_caps = 0; /* Find out WoL support for port */ adapter->wol_support = hw->wol_enabled = 0; ixgbe_get_device_caps(hw, &dev_caps); if ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0_1) || ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0) && hw->bus.func == 0)) adapter->wol_support = hw->wol_enabled = 1; /* Save initial wake up filter configuration */ adapter->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC); return; } /* ixgbe_check_wol_support */ /************************************************************************ * ixgbe_setup_interface * * Setup networking device structure and register an interface. ************************************************************************/ static int ixgbe_setup_interface(if_ctx_t ctx) { struct ifnet *ifp = iflib_get_ifp(ctx); struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("ixgbe_setup_interface: begin"); if_setbaudrate(ifp, IF_Gbps(10)); adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; adapter->phy_layer = ixgbe_get_supported_physical_layer(&adapter->hw); ixgbe_add_media_types(ctx); /* Autoselect media by default */ ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); return (0); } /* ixgbe_setup_interface */ /************************************************************************ * ixgbe_if_get_counter ************************************************************************/ static uint64_t ixgbe_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct adapter *adapter = iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_IPACKETS: return (adapter->ipackets); case IFCOUNTER_OPACKETS: return (adapter->opackets); case IFCOUNTER_IBYTES: return (adapter->ibytes); case IFCOUNTER_OBYTES: return (adapter->obytes); case IFCOUNTER_IMCASTS: return (adapter->imcasts); case IFCOUNTER_OMCASTS: return (adapter->omcasts); case IFCOUNTER_COLLISIONS: return (0); case IFCOUNTER_IQDROPS: return (adapter->iqdrops); case IFCOUNTER_OQDROPS: return (0); case IFCOUNTER_IERRORS: return (adapter->ierrors); default: return (if_get_counter_default(ifp, cnt)); } } /* ixgbe_if_get_counter */ /************************************************************************ * ixgbe_if_i2c_req ************************************************************************/ static int ixgbe_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; int i; if (hw->phy.ops.read_i2c_byte == NULL) return (ENXIO); for (i = 0; i < req->len; i++) hw->phy.ops.read_i2c_byte(hw, req->offset + i, req->dev_addr, &req->data[i]); return (0); } /* ixgbe_if_i2c_req */ /* ixgbe_if_needs_restart - Tell iflib when the driver needs to be reinitialized * @ctx: iflib context * @event: event code to check * * Defaults to returning true for unknown events. * * @returns true if iflib needs to reinit the interface */ static bool ixgbe_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event) { switch (event) { case IFLIB_RESTART_VLAN_CONFIG: return (false); default: return (true); } } /************************************************************************ * ixgbe_add_media_types ************************************************************************/ static void ixgbe_add_media_types(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; device_t dev = iflib_get_dev(ctx); u64 layer; layer = adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); /* Media types with matching FreeBSD media defines */ if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_T, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10BASE_T) ifmedia_add(adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) { ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_LR, 0, NULL); if (hw->phy.multispeed_fiber) ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_LX, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) { ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); if (hw->phy.multispeed_fiber) ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); } else if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); #ifdef IFM_ETH_XTYPE if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_KR, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) ifmedia_add( adapter->media, IFM_ETHER | IFM_10G_KX4, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_KX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX) ifmedia_add(adapter->media, IFM_ETHER | IFM_2500_KX, 0, NULL); #else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) { device_printf(dev, "Media supported: 10GbaseKR\n"); device_printf(dev, "10GbaseKR mapped to 10GbaseSR\n"); ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) { device_printf(dev, "Media supported: 10GbaseKX4\n"); device_printf(dev, "10GbaseKX4 mapped to 10GbaseCX4\n"); ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) { device_printf(dev, "Media supported: 1000baseKX\n"); device_printf(dev, "1000baseKX mapped to 1000baseCX\n"); ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_CX, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX) { device_printf(dev, "Media supported: 2500baseKX\n"); device_printf(dev, "2500baseKX mapped to 2500baseSX\n"); ifmedia_add(adapter->media, IFM_ETHER | IFM_2500_SX, 0, NULL); } #endif if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) device_printf(dev, "Media supported: 1000baseBX\n"); if (hw->device_id == IXGBE_DEV_ID_82598AT) { ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } ifmedia_add(adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); } /* ixgbe_add_media_types */ /************************************************************************ * ixgbe_is_sfp ************************************************************************/ static inline bool ixgbe_is_sfp(struct ixgbe_hw *hw) { switch (hw->mac.type) { case ixgbe_mac_82598EB: if (hw->phy.type == ixgbe_phy_nl) return (TRUE); return (FALSE); case ixgbe_mac_82599EB: switch (hw->mac.ops.get_media_type(hw)) { case ixgbe_media_type_fiber: case ixgbe_media_type_fiber_qsfp: return (TRUE); default: return (FALSE); } case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) return (TRUE); return (FALSE); default: return (FALSE); } } /* ixgbe_is_sfp */ /************************************************************************ * ixgbe_config_link ************************************************************************/ static void ixgbe_config_link(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; u32 autoneg, err = 0; bool sfp, negotiate; sfp = ixgbe_is_sfp(hw); if (sfp) { adapter->task_requests |= IXGBE_REQUEST_TASK_MOD; iflib_admin_intr_deferred(ctx); } else { if (hw->mac.ops.check_link) err = ixgbe_check_link(hw, &adapter->link_speed, &adapter->link_up, FALSE); if (err) return; autoneg = hw->phy.autoneg_advertised; if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) err = hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); if (err) return; if (hw->mac.ops.setup_link) err = hw->mac.ops.setup_link(hw, autoneg, adapter->link_up); } } /* ixgbe_config_link */ /************************************************************************ * ixgbe_update_stats_counters - Update board statistics counters. ************************************************************************/ static void ixgbe_update_stats_counters(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ixgbe_hw_stats *stats = &adapter->stats.pf; u32 missed_rx = 0, bprc, lxon, lxoff, total; u32 lxoffrxc; u64 total_missed_rx = 0; stats->crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS); stats->illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC); stats->errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC); stats->mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC); stats->mpc[0] += IXGBE_READ_REG(hw, IXGBE_MPC(0)); for (int i = 0; i < 16; i++) { stats->qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); stats->qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); stats->qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); } stats->mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC); stats->mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC); stats->rlec += IXGBE_READ_REG(hw, IXGBE_RLEC); /* Hardware workaround, gprc counts missed packets */ stats->gprc += IXGBE_READ_REG(hw, IXGBE_GPRC); stats->gprc -= missed_rx; if (hw->mac.type != ixgbe_mac_82598EB) { stats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) + ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32); stats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) + ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32); stats->tor += IXGBE_READ_REG(hw, IXGBE_TORL) + ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32); stats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); lxoffrxc = IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); stats->lxoffrxc += lxoffrxc; } else { stats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC); lxoffrxc = IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); stats->lxoffrxc += lxoffrxc; /* 82598 only has a counter in the high register */ stats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCH); stats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH); stats->tor += IXGBE_READ_REG(hw, IXGBE_TORH); } /* * For watchdog management we need to know if we have been paused * during the last interval, so capture that here. */ if (lxoffrxc) adapter->shared->isc_pause_frames = 1; /* * Workaround: mprc hardware is incorrectly counting * broadcasts, so for now we subtract those. */ bprc = IXGBE_READ_REG(hw, IXGBE_BPRC); stats->bprc += bprc; stats->mprc += IXGBE_READ_REG(hw, IXGBE_MPRC); if (hw->mac.type == ixgbe_mac_82598EB) stats->mprc -= bprc; stats->prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64); stats->prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127); stats->prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255); stats->prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511); stats->prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023); stats->prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522); lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC); stats->lxontxc += lxon; lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); stats->lxofftxc += lxoff; total = lxon + lxoff; stats->gptc += IXGBE_READ_REG(hw, IXGBE_GPTC); stats->mptc += IXGBE_READ_REG(hw, IXGBE_MPTC); stats->ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64); stats->gptc -= total; stats->mptc -= total; stats->ptc64 -= total; stats->gotc -= total * ETHER_MIN_LEN; stats->ruc += IXGBE_READ_REG(hw, IXGBE_RUC); stats->rfc += IXGBE_READ_REG(hw, IXGBE_RFC); stats->roc += IXGBE_READ_REG(hw, IXGBE_ROC); stats->rjc += IXGBE_READ_REG(hw, IXGBE_RJC); stats->mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC); stats->mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC); stats->mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC); stats->tpr += IXGBE_READ_REG(hw, IXGBE_TPR); stats->tpt += IXGBE_READ_REG(hw, IXGBE_TPT); stats->ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127); stats->ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255); stats->ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511); stats->ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023); stats->ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522); stats->bptc += IXGBE_READ_REG(hw, IXGBE_BPTC); stats->xec += IXGBE_READ_REG(hw, IXGBE_XEC); stats->fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC); stats->fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST); /* Only read FCOE on 82599 */ if (hw->mac.type != ixgbe_mac_82598EB) { stats->fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC); stats->fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC); stats->fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC); stats->fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC); stats->fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC); } /* Fill out the OS statistics structure */ IXGBE_SET_IPACKETS(adapter, stats->gprc); IXGBE_SET_OPACKETS(adapter, stats->gptc); IXGBE_SET_IBYTES(adapter, stats->gorc); IXGBE_SET_OBYTES(adapter, stats->gotc); IXGBE_SET_IMCASTS(adapter, stats->mprc); IXGBE_SET_OMCASTS(adapter, stats->mptc); IXGBE_SET_COLLISIONS(adapter, 0); IXGBE_SET_IQDROPS(adapter, total_missed_rx); /* * Aggregate following types of errors as RX errors: * - CRC error count, * - illegal byte error count, * - checksum error count, * - missed packets count, * - length error count, * - undersized packets count, * - fragmented packets count, * - oversized packets count, * - jabber count. */ IXGBE_SET_IERRORS(adapter, stats->crcerrs + stats->illerrc + stats->xec + stats->mpc[0] + stats->rlec + stats->ruc + stats->rfc + stats->roc + stats->rjc); } /* ixgbe_update_stats_counters */ /************************************************************************ * ixgbe_add_hw_stats * * Add sysctl variables, one per statistic, to the system. ************************************************************************/ static void ixgbe_add_hw_stats(struct adapter *adapter) { device_t dev = iflib_get_dev(adapter->ctx); struct ix_rx_queue *rx_que; struct ix_tx_queue *tx_que; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct ixgbe_hw_stats *stats = &adapter->stats.pf; struct sysctl_oid *stat_node, *queue_node; struct sysctl_oid_list *stat_list, *queue_list; int i; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSI-X IRQ Handled"); for (i = 0, tx_que = adapter->tx_queues; i < adapter->num_tx_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, txr, 0, ixgbe_sysctl_tdh_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, txr, 0, ixgbe_sysctl_tdt_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso_tx", CTLFLAG_RD, &txr->tso_tx, "TSO"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &txr->total_packets, "Queue Packets Transmitted"); } for (i = 0, rx_que = adapter->rx_queues; i < adapter->num_rx_queues; i++, rx_que++) { struct rx_ring *rxr = &rx_que->rxr; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &adapter->rx_queues[i], 0, ixgbe_sysctl_interrupt_rate_handler, "IU", "Interrupt Rate"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", CTLFLAG_RD, &(adapter->rx_queues[i].irqs), "irqs on this queue"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, rxr, 0, ixgbe_sysctl_rdh_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, rxr, 0, ixgbe_sysctl_rdt_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &rxr->rx_packets, "Queue Packets Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &rxr->rx_bytes, "Queue Bytes Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies", CTLFLAG_RD, &rxr->rx_copies, "Copied RX Frames"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_discarded", CTLFLAG_RD, &rxr->rx_discarded, "Discarded RX packets"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "MAC Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_errs", CTLFLAG_RD, &adapter->ierrors, IXGBE_SYSCTL_DESC_RX_ERRS); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &stats->crcerrs, "CRC Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs", CTLFLAG_RD, &stats->illerrc, "Illegal Byte Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs", CTLFLAG_RD, &stats->errbc, "Byte Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards", CTLFLAG_RD, &stats->mspdc, "MAC Short Packets Discarded"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults", CTLFLAG_RD, &stats->mlfc, "MAC Local Faults"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults", CTLFLAG_RD, &stats->mrfc, "MAC Remote Faults"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs", CTLFLAG_RD, &stats->rlec, "Receive Length Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_missed_packets", CTLFLAG_RD, &stats->mpc[0], "RX Missed Packet Count"); /* Flow Control stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &stats->lxontxc, "Link XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &stats->lxonrxc, "Link XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &stats->lxofftxc, "Link XOFF Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &stats->lxoffrxc, "Link XOFF Received"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd", CTLFLAG_RD, &stats->tor, "Total Octets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd", CTLFLAG_RD, &stats->gorc, "Good Octets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd", CTLFLAG_RD, &stats->tpr, "Total Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd", CTLFLAG_RD, &stats->gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd", CTLFLAG_RD, &stats->mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd", CTLFLAG_RD, &stats->bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &stats->prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &stats->prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &stats->prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &stats->prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &stats->prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &stats->prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized", CTLFLAG_RD, &stats->ruc, "Receive Undersized"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &stats->rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized", CTLFLAG_RD, &stats->roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd", CTLFLAG_RD, &stats->rjc, "Received Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd", CTLFLAG_RD, &stats->mngprc, "Management Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd", CTLFLAG_RD, &stats->mngptc, "Management Packets Dropped"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs", CTLFLAG_RD, &stats->xec, "Checksum Errors"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &stats->tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &stats->bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &stats->mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd", CTLFLAG_RD, &stats->mngptc, "Management Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &stats->ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &stats->ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &stats->ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &stats->ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &stats->ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &stats->ptc1522, "1024-1522 byte frames transmitted"); } /* ixgbe_add_hw_stats */ /************************************************************************ * ixgbe_sysctl_tdh_handler - Transmit Descriptor Head handler function * * Retrieves the TDH value from the hardware ************************************************************************/ static int ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS) { struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); int error; unsigned int val; if (!txr) return (0); val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /* ixgbe_sysctl_tdh_handler */ /************************************************************************ * ixgbe_sysctl_tdt_handler - Transmit Descriptor Tail handler function * * Retrieves the TDT value from the hardware ************************************************************************/ static int ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS) { struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); int error; unsigned int val; if (!txr) return (0); val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /* ixgbe_sysctl_tdt_handler */ /************************************************************************ * ixgbe_sysctl_rdh_handler - Receive Descriptor Head handler function * * Retrieves the RDH value from the hardware ************************************************************************/ static int ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS) { struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); int error; unsigned int val; if (!rxr) return (0); val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /* ixgbe_sysctl_rdh_handler */ /************************************************************************ * ixgbe_sysctl_rdt_handler - Receive Descriptor Tail handler function * * Retrieves the RDT value from the hardware ************************************************************************/ static int ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS) { struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); int error; unsigned int val; if (!rxr) return (0); val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /* ixgbe_sysctl_rdt_handler */ /************************************************************************ * ixgbe_if_vlan_register * * Run via vlan config EVENT, it enables us to use the * HW Filter table since we can get the vlan id. This * just creates the entry in the soft version of the * VFTA, init will repopulate the real table. ************************************************************************/ static void ixgbe_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u16 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; ixgbe_setup_vlan_hw_support(ctx); } /* ixgbe_if_vlan_register */ /************************************************************************ * ixgbe_if_vlan_unregister * * Run via vlan unconfig EVENT, remove our entry in the soft vfta. ************************************************************************/ static void ixgbe_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u16 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Re-init to load the changes */ ixgbe_setup_vlan_hw_support(ctx); } /* ixgbe_if_vlan_unregister */ /************************************************************************ * ixgbe_setup_vlan_hw_support ************************************************************************/ static void ixgbe_setup_vlan_hw_support(if_ctx_t ctx) { struct ifnet *ifp = iflib_get_ifp(ctx); struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; struct rx_ring *rxr; int i; u32 ctrl; /* * We get here thru init_locked, meaning * a soft reset, this has already cleared * the VFTA and other state, so if there * have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* Setup the queues for vlans */ if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { for (i = 0; i < adapter->num_rx_queues; i++) { rxr = &adapter->rx_queues[i].rxr; /* On 82599 the VLAN enable is per/queue in RXDCTL */ if (hw->mac.type != ixgbe_mac_82598EB) { ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); ctrl |= IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), ctrl); } rxr->vtag_strip = TRUE; } } if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) return; /* * A soft reset zero's out the VFTA, so * we need to repopulate it now. */ for (i = 0; i < IXGBE_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), adapter->shadow_vfta[i]); ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); /* Enable the Filter Table if enabled */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { ctrl &= ~IXGBE_VLNCTRL_CFIEN; ctrl |= IXGBE_VLNCTRL_VFE; } if (hw->mac.type == ixgbe_mac_82598EB) ctrl |= IXGBE_VLNCTRL_VME; IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl); } /* ixgbe_setup_vlan_hw_support */ /************************************************************************ * ixgbe_get_slot_info * * Get the width and transaction speed of * the slot this adapter is plugged into. ************************************************************************/ static void ixgbe_get_slot_info(struct adapter *adapter) { device_t dev = iflib_get_dev(adapter->ctx); struct ixgbe_hw *hw = &adapter->hw; int bus_info_valid = TRUE; u32 offset; u16 link; /* Some devices are behind an internal bridge */ switch (hw->device_id) { case IXGBE_DEV_ID_82599_SFP_SF_QP: case IXGBE_DEV_ID_82599_QSFP_SF_QP: goto get_parent_info; default: break; } ixgbe_get_bus_info(hw); /* * Some devices don't use PCI-E, but there is no need * to display "Unknown" for bus speed and width. */ switch (hw->mac.type) { case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: return; default: goto display; } get_parent_info: /* * For the Quad port adapter we need to parse back * up the PCI tree to find the speed of the expansion * slot into which this adapter is plugged. A bit more work. */ dev = device_get_parent(device_get_parent(dev)); #ifdef IXGBE_DEBUG device_printf(dev, "parent pcib = %x,%x,%x\n", pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); #endif dev = device_get_parent(device_get_parent(dev)); #ifdef IXGBE_DEBUG device_printf(dev, "slot pcib = %x,%x,%x\n", pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); #endif /* Now get the PCI Express Capabilities offset */ if (pci_find_cap(dev, PCIY_EXPRESS, &offset)) { /* * Hmm...can't get PCI-Express capabilities. * Falling back to default method. */ bus_info_valid = FALSE; ixgbe_get_bus_info(hw); goto display; } /* ...and read the Link Status Register */ link = pci_read_config(dev, offset + PCIER_LINK_STA, 2); ixgbe_set_pci_config_data_generic(hw, link); display: device_printf(dev, "PCI Express Bus: Speed %s %s\n", ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s" : (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s" : (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s" : "Unknown"), ((hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" : (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" : (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" : "Unknown")); if (bus_info_valid) { if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) && ((hw->bus.width <= ixgbe_bus_width_pcie_x4) && (hw->bus.speed == ixgbe_bus_speed_2500))) { device_printf(dev, "PCI-Express bandwidth available for this card\n is not sufficient for optimal performance.\n"); device_printf(dev, "For optimal performance a x8 PCIE, or x4 PCIE Gen2 slot is required.\n"); } if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) && ((hw->bus.width <= ixgbe_bus_width_pcie_x8) && (hw->bus.speed < ixgbe_bus_speed_8000))) { device_printf(dev, "PCI-Express bandwidth available for this card\n is not sufficient for optimal performance.\n"); device_printf(dev, "For optimal performance a x8 PCIE Gen3 slot is required.\n"); } } else device_printf(dev, "Unable to determine slot speed/width. The speed/width reported are that of the internal switch.\n"); return; } /* ixgbe_get_slot_info */ /************************************************************************ * ixgbe_if_msix_intr_assign * * Setup MSI-X Interrupt resources and handlers ************************************************************************/ static int ixgbe_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_rx_queue *rx_que = adapter->rx_queues; struct ix_tx_queue *tx_que; int error, rid, vector = 0; int cpu_id = 0; char buf[16]; /* Admin Que is vector 0*/ rid = vector + 1; for (int i = 0; i < adapter->num_rx_queues; i++, vector++, rx_que++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, ixgbe_msix_que, rx_que, rx_que->rxr.me, buf); if (error) { device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); adapter->num_rx_queues = i + 1; goto fail; } rx_que->msix = vector; if (adapter->feat_en & IXGBE_FEATURE_RSS) { /* * The queue ID is used as the RSS layer bucket ID. * We look up the queue ID -> RSS CPU ID and select * that. */ cpu_id = rss_getcpu(i % rss_getnumbuckets()); } else { /* * Bind the MSI-X vector, and thus the * rings to the corresponding cpu. * * This just happens to match the default RSS * round-robin bucket -> queue -> CPU allocation. */ if (adapter->num_rx_queues > 1) cpu_id = i; } } for (int i = 0; i < adapter->num_tx_queues; i++) { snprintf(buf, sizeof(buf), "txq%d", i); tx_que = &adapter->tx_queues[i]; tx_que->msix = i % adapter->num_rx_queues; iflib_softirq_alloc_generic(ctx, &adapter->rx_queues[tx_que->msix].que_irq, IFLIB_INTR_TX, tx_que, tx_que->txr.me, buf); } rid = vector + 1; error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, ixgbe_msix_link, adapter, 0, "aq"); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); return (error); } adapter->vector = vector; return (0); fail: iflib_irq_free(ctx, &adapter->irq); rx_que = adapter->rx_queues; for (int i = 0; i < adapter->num_rx_queues; i++, rx_que++) iflib_irq_free(ctx, &rx_que->que_irq); return (error); } /* ixgbe_if_msix_intr_assign */ /********************************************************************* * ixgbe_msix_que - MSI-X Queue Interrupt Service routine **********************************************************************/ static int ixgbe_msix_que(void *arg) { struct ix_rx_queue *que = arg; struct adapter *adapter = que->adapter; struct ifnet *ifp = iflib_get_ifp(que->adapter->ctx); /* Protect against spurious interrupts */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return (FILTER_HANDLED); ixgbe_disable_queue(adapter, que->msix); ++que->irqs; return (FILTER_SCHEDULE_THREAD); } /* ixgbe_msix_que */ /************************************************************************ * ixgbe_media_status - Media Ioctl callback * * Called whenever the user queries the status of * the interface using ifconfig. ************************************************************************/ static void ixgbe_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; int layer; INIT_DEBUGOUT("ixgbe_if_media_status: begin"); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) return; ifmr->ifm_status |= IFM_ACTIVE; layer = adapter->phy_layer; if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T || layer & IXGBE_PHYSICAL_LAYER_1000BASE_T || layer & IXGBE_PHYSICAL_LAYER_100BASE_TX || layer & IXGBE_PHYSICAL_LAYER_10BASE_T) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_T | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_T | IFM_FDX; break; case IXGBE_LINK_SPEED_100_FULL: ifmr->ifm_active |= IFM_100_TX | IFM_FDX; break; case IXGBE_LINK_SPEED_10_FULL: ifmr->ifm_active |= IFM_10_T | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_LR | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_LX | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_LX | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR || layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_SX | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX; break; } /* * XXX: These need to use the proper media types once * they're added. */ #ifndef IFM_ETH_XTYPE if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; break; case IXGBE_LINK_SPEED_2_5GB_FULL: ifmr->ifm_active |= IFM_2500_SX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_CX | IFM_FDX; break; } else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4 || layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX; break; case IXGBE_LINK_SPEED_2_5GB_FULL: ifmr->ifm_active |= IFM_2500_SX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_CX | IFM_FDX; break; } #else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_KR | IFM_FDX; break; case IXGBE_LINK_SPEED_2_5GB_FULL: ifmr->ifm_active |= IFM_2500_KX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_KX | IFM_FDX; break; } else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4 || layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_KX4 | IFM_FDX; break; case IXGBE_LINK_SPEED_2_5GB_FULL: ifmr->ifm_active |= IFM_2500_KX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_KX | IFM_FDX; break; } #endif /* If nothing is recognized... */ if (IFM_SUBTYPE(ifmr->ifm_active) == 0) ifmr->ifm_active |= IFM_UNKNOWN; /* Display current flow control setting used on link */ if (hw->fc.current_mode == ixgbe_fc_rx_pause || hw->fc.current_mode == ixgbe_fc_full) ifmr->ifm_active |= IFM_ETH_RXPAUSE; if (hw->fc.current_mode == ixgbe_fc_tx_pause || hw->fc.current_mode == ixgbe_fc_full) ifmr->ifm_active |= IFM_ETH_TXPAUSE; } /* ixgbe_media_status */ /************************************************************************ * ixgbe_media_change - Media Ioctl callback * * Called when the user changes speed/duplex using * media/mediopt option with ifconfig. ************************************************************************/ static int ixgbe_if_media_change(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifmedia *ifm = iflib_get_media(ctx); struct ixgbe_hw *hw = &adapter->hw; ixgbe_link_speed speed = 0; INIT_DEBUGOUT("ixgbe_if_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if (hw->phy.media_type == ixgbe_media_type_backplane) return (EPERM); /* * We don't actually need to check against the supported * media types of the adapter; ifmedia will take care of * that for us. */ switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: case IFM_10G_T: speed |= IXGBE_LINK_SPEED_100_FULL; speed |= IXGBE_LINK_SPEED_1GB_FULL; speed |= IXGBE_LINK_SPEED_10GB_FULL; break; case IFM_10G_LRM: case IFM_10G_LR: #ifndef IFM_ETH_XTYPE case IFM_10G_SR: /* KR, too */ case IFM_10G_CX4: /* KX4 */ #else case IFM_10G_KR: case IFM_10G_KX4: #endif speed |= IXGBE_LINK_SPEED_1GB_FULL; speed |= IXGBE_LINK_SPEED_10GB_FULL; break; #ifndef IFM_ETH_XTYPE case IFM_1000_CX: /* KX */ #else case IFM_1000_KX: #endif case IFM_1000_LX: case IFM_1000_SX: speed |= IXGBE_LINK_SPEED_1GB_FULL; break; case IFM_1000_T: speed |= IXGBE_LINK_SPEED_100_FULL; speed |= IXGBE_LINK_SPEED_1GB_FULL; break; case IFM_10G_TWINAX: speed |= IXGBE_LINK_SPEED_10GB_FULL; break; case IFM_100_TX: speed |= IXGBE_LINK_SPEED_100_FULL; break; case IFM_10_T: speed |= IXGBE_LINK_SPEED_10_FULL; break; default: goto invalid; } hw->mac.autotry_restart = TRUE; hw->mac.ops.setup_link(hw, speed, TRUE); adapter->advertise = ((speed & IXGBE_LINK_SPEED_10GB_FULL) ? 4 : 0) | ((speed & IXGBE_LINK_SPEED_1GB_FULL) ? 2 : 0) | ((speed & IXGBE_LINK_SPEED_100_FULL) ? 1 : 0) | ((speed & IXGBE_LINK_SPEED_10_FULL) ? 8 : 0); return (0); invalid: device_printf(iflib_get_dev(ctx), "Invalid media type!\n"); return (EINVAL); } /* ixgbe_if_media_change */ /************************************************************************ * ixgbe_set_promisc ************************************************************************/ static int ixgbe_if_promisc_set(if_ctx_t ctx, int flags) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); u32 rctl; int mcnt = 0; rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); rctl &= (~IXGBE_FCTRL_UPE); if (ifp->if_flags & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else { mcnt = min(if_llmaddr_count(ifp), MAX_NUM_MULTICAST_ADDRESSES); } if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) rctl &= (~IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, rctl); if (ifp->if_flags & IFF_PROMISC) { rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, rctl); } else if (ifp->if_flags & IFF_ALLMULTI) { rctl |= IXGBE_FCTRL_MPE; rctl &= ~IXGBE_FCTRL_UPE; IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, rctl); } return (0); } /* ixgbe_if_promisc_set */ /************************************************************************ * ixgbe_msix_link - Link status change ISR (MSI/MSI-X) ************************************************************************/ static int ixgbe_msix_link(void *arg) { struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; u32 eicr, eicr_mask; s32 retval; ++adapter->link_irq; /* Pause other interrupts */ IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_OTHER); /* First get the cause */ eicr = IXGBE_READ_REG(hw, IXGBE_EICS); /* Be sure the queue bits are not cleared */ eicr &= ~IXGBE_EICR_RTX_QUEUE; /* Clear interrupt with write */ IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr); /* Link status change */ if (eicr & IXGBE_EICR_LSC) { IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC); adapter->task_requests |= IXGBE_REQUEST_TASK_LSC; } if (adapter->hw.mac.type != ixgbe_mac_82598EB) { if ((adapter->feat_en & IXGBE_FEATURE_FDIR) && (eicr & IXGBE_EICR_FLOW_DIR)) { /* This is probably overkill :) */ if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1)) return (FILTER_HANDLED); /* Disable the interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR); adapter->task_requests |= IXGBE_REQUEST_TASK_FDIR; } else if (eicr & IXGBE_EICR_ECC) { device_printf(iflib_get_dev(adapter->ctx), "\nCRITICAL: ECC ERROR!! Please Reboot!!\n"); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); } /* Check for over temp condition */ if (adapter->feat_en & IXGBE_FEATURE_TEMP_SENSOR) { switch (adapter->hw.mac.type) { case ixgbe_mac_X550EM_a: if (!(eicr & IXGBE_EICR_GPI_SDP0_X550EM_a)) break; IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_GPI_SDP0_X550EM_a); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0_X550EM_a); retval = hw->phy.ops.check_overtemp(hw); if (retval != IXGBE_ERR_OVERTEMP) break; device_printf(iflib_get_dev(adapter->ctx), "\nCRITICAL: OVER TEMP!! PHY IS SHUT DOWN!!\n"); device_printf(iflib_get_dev(adapter->ctx), "System shutdown required!\n"); break; default: if (!(eicr & IXGBE_EICR_TS)) break; retval = hw->phy.ops.check_overtemp(hw); if (retval != IXGBE_ERR_OVERTEMP) break; device_printf(iflib_get_dev(adapter->ctx), "\nCRITICAL: OVER TEMP!! PHY IS SHUT DOWN!!\n"); device_printf(iflib_get_dev(adapter->ctx), "System shutdown required!\n"); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS); break; } } /* Check for VF message */ if ((adapter->feat_en & IXGBE_FEATURE_SRIOV) && (eicr & IXGBE_EICR_MAILBOX)) adapter->task_requests |= IXGBE_REQUEST_TASK_MBX; } if (ixgbe_is_sfp(hw)) { /* Pluggable optics-related interrupt */ if (hw->mac.type >= ixgbe_mac_X540) eicr_mask = IXGBE_EICR_GPI_SDP0_X540; else eicr_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw); if (eicr & eicr_mask) { IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr_mask); adapter->task_requests |= IXGBE_REQUEST_TASK_MOD; } if ((hw->mac.type == ixgbe_mac_82599EB) && (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); adapter->task_requests |= IXGBE_REQUEST_TASK_MSF; } } /* Check for fan failure */ if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL) { ixgbe_check_fan_failure(adapter, eicr, TRUE); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); } /* External PHY interrupt */ if ((hw->phy.type == ixgbe_phy_x550em_ext_t) && (eicr & IXGBE_EICR_GPI_SDP0_X540)) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0_X540); adapter->task_requests |= IXGBE_REQUEST_TASK_PHY; } return (adapter->task_requests != 0) ? FILTER_SCHEDULE_THREAD : FILTER_HANDLED; } /* ixgbe_msix_link */ /************************************************************************ * ixgbe_sysctl_interrupt_rate_handler ************************************************************************/ static int ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) { struct ix_rx_queue *que = ((struct ix_rx_queue *)oidp->oid_arg1); int error; unsigned int reg, usec, rate; reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix)); usec = ((reg & 0x0FF8) >> 3); if (usec > 0) rate = 500000 / usec; else rate = 0; error = sysctl_handle_int(oidp, &rate, 0, req); if (error || !req->newptr) return error; reg &= ~0xfff; /* default, no limitation */ ixgbe_max_interrupt_rate = 0; if (rate > 0 && rate < 500000) { if (rate < 1000) rate = 1000; ixgbe_max_interrupt_rate = rate; reg |= ((4000000/rate) & 0xff8); } IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg); return (0); } /* ixgbe_sysctl_interrupt_rate_handler */ /************************************************************************ * ixgbe_add_device_sysctls ************************************************************************/ static void ixgbe_add_device_sysctls(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); struct ixgbe_hw *hw = &adapter->hw; struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx_list; ctx_list = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); /* Sysctls for all devices */ SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_flowcntl, "I", IXGBE_SYSCTL_DESC_SET_FC); SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_advertise, "I", IXGBE_SYSCTL_DESC_ADV_SPEED); #ifdef IXGBE_DEBUG /* testing sysctls (for all devices) */ SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "power_state", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_power_state, "I", "PCI Power State"); SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "print_rss_config", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_print_rss_config, "A", "Prints RSS Configuration"); #endif /* for X550 series devices */ if (hw->mac.type >= ixgbe_mac_X550) SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "dmac", CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_dmac, "I", "DMA Coalesce"); /* for WoL-capable devices */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "wol_enable", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_wol_enable, "I", "Enable/Disable Wake on LAN"); SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "wufc", CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_wufc, "I", "Enable/Disable Wake Up Filters"); } /* for X552/X557-AT devices */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { struct sysctl_oid *phy_node; struct sysctl_oid_list *phy_list; phy_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "phy", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "External PHY sysctls"); phy_list = SYSCTL_CHILDREN(phy_node); SYSCTL_ADD_PROC(ctx_list, phy_list, OID_AUTO, "temp", CTLTYPE_U16 | CTLFLAG_RD | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_phy_temp, "I", "Current External PHY Temperature (Celsius)"); SYSCTL_ADD_PROC(ctx_list, phy_list, OID_AUTO, "overtemp_occurred", CTLTYPE_U16 | CTLFLAG_RD | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_phy_overtemp_occurred, "I", "External PHY High Temperature Event Occurred"); } if (adapter->feat_cap & IXGBE_FEATURE_EEE) { SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "eee_state", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, ixgbe_sysctl_eee_state, "I", "EEE Power Save State"); } } /* ixgbe_add_device_sysctls */ /************************************************************************ * ixgbe_allocate_pci_resources ************************************************************************/ static int ixgbe_allocate_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); int rid; rid = PCIR_BAR(0); adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(adapter->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } /* Save bus_space values for READ/WRITE_REG macros */ adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->pci_mem); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->pci_mem); /* Set hw values for shared code */ adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; return (0); } /* ixgbe_allocate_pci_resources */ /************************************************************************ * ixgbe_detach - Device removal routine * * Called when the driver is being removed. * Stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure ************************************************************************/ static int ixgbe_if_detach(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); u32 ctrl_ext; INIT_DEBUGOUT("ixgbe_detach: begin"); if (ixgbe_pci_iov_detach(dev) != 0) { device_printf(dev, "SR-IOV in use; detach first.\n"); return (EBUSY); } ixgbe_setup_low_power_mode(ctx); /* let hardware know driver is unloading */ ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext); ixgbe_free_pci_resources(ctx); free(adapter->mta, M_IXGBE); return (0); } /* ixgbe_if_detach */ /************************************************************************ * ixgbe_setup_low_power_mode - LPLU/WoL preparation * * Prepare the adapter/port for LPLU and/or WoL ************************************************************************/ static int ixgbe_setup_low_power_mode(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; device_t dev = iflib_get_dev(ctx); s32 error = 0; if (!hw->wol_enabled) ixgbe_set_phy_power(hw, FALSE); /* Limit power management flow to X550EM baseT */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T && hw->phy.ops.enter_lplu) { /* Turn off support for APM wakeup. (Using ACPI instead) */ IXGBE_WRITE_REG(hw, IXGBE_GRC, IXGBE_READ_REG(hw, IXGBE_GRC) & ~(u32)2); /* * Clear Wake Up Status register to prevent any previous wakeup * events from waking us up immediately after we suspend. */ IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff); /* * Program the Wakeup Filter Control register with user filter * settings */ IXGBE_WRITE_REG(hw, IXGBE_WUFC, adapter->wufc); /* Enable wakeups and power management in Wakeup Control */ IXGBE_WRITE_REG(hw, IXGBE_WUC, IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN); /* X550EM baseT adapters need a special LPLU flow */ hw->phy.reset_disable = TRUE; ixgbe_if_stop(ctx); error = hw->phy.ops.enter_lplu(hw); if (error) device_printf(dev, "Error entering LPLU: %d\n", error); hw->phy.reset_disable = FALSE; } else { /* Just stop for other adapters */ ixgbe_if_stop(ctx); } return error; } /* ixgbe_setup_low_power_mode */ /************************************************************************ * ixgbe_shutdown - Shutdown entry point ************************************************************************/ static int ixgbe_if_shutdown(if_ctx_t ctx) { int error = 0; INIT_DEBUGOUT("ixgbe_shutdown: begin"); error = ixgbe_setup_low_power_mode(ctx); return (error); } /* ixgbe_if_shutdown */ /************************************************************************ * ixgbe_suspend * * From D0 to D3 ************************************************************************/ static int ixgbe_if_suspend(if_ctx_t ctx) { int error = 0; INIT_DEBUGOUT("ixgbe_suspend: begin"); error = ixgbe_setup_low_power_mode(ctx); return (error); } /* ixgbe_if_suspend */ /************************************************************************ * ixgbe_resume * * From D3 to D0 ************************************************************************/ static int ixgbe_if_resume(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); struct ixgbe_hw *hw = &adapter->hw; u32 wus; INIT_DEBUGOUT("ixgbe_resume: begin"); /* Read & clear WUS register */ wus = IXGBE_READ_REG(hw, IXGBE_WUS); if (wus) device_printf(dev, "Woken up by (WUS): %#010x\n", IXGBE_READ_REG(hw, IXGBE_WUS)); IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff); /* And clear WUFC until next low-power transition */ IXGBE_WRITE_REG(hw, IXGBE_WUFC, 0); /* * Required after D3->D0 transition; * will re-advertise all previous advertised speeds */ if (ifp->if_flags & IFF_UP) ixgbe_if_init(ctx); return (0); } /* ixgbe_if_resume */ /************************************************************************ * ixgbe_if_mtu_set - Ioctl mtu entry point * * Return 0 on success, EINVAL on failure ************************************************************************/ static int ixgbe_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct adapter *adapter = iflib_get_softc(ctx); int error = 0; IOCTL_DEBUGOUT("ioctl: SIOCIFMTU (Set Interface MTU)"); if (mtu > IXGBE_MAX_MTU) { error = EINVAL; } else { adapter->max_frame_size = mtu + IXGBE_MTU_HDR; } return error; } /* ixgbe_if_mtu_set */ /************************************************************************ * ixgbe_if_crcstrip_set ************************************************************************/ static void ixgbe_if_crcstrip_set(if_ctx_t ctx, int onoff, int crcstrip) { struct adapter *sc = iflib_get_softc(ctx); struct ixgbe_hw *hw = &sc->hw; /* crc stripping is set in two places: * IXGBE_HLREG0 (modified on init_locked and hw reset) * IXGBE_RDRXCTL (set by the original driver in * ixgbe_setup_hw_rsc() called in init_locked. * We disable the setting when netmap is compiled in). * We update the values here, but also in ixgbe.c because * init_locked sometimes is called outside our control. */ uint32_t hl, rxc; hl = IXGBE_READ_REG(hw, IXGBE_HLREG0); rxc = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); #ifdef NETMAP if (netmap_verbose) D("%s read HLREG 0x%x rxc 0x%x", onoff ? "enter" : "exit", hl, rxc); #endif /* hw requirements ... */ rxc &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; rxc |= IXGBE_RDRXCTL_RSCACKC; if (onoff && !crcstrip) { /* keep the crc. Fast rx */ hl &= ~IXGBE_HLREG0_RXCRCSTRP; rxc &= ~IXGBE_RDRXCTL_CRCSTRIP; } else { /* reset default mode */ hl |= IXGBE_HLREG0_RXCRCSTRP; rxc |= IXGBE_RDRXCTL_CRCSTRIP; } #ifdef NETMAP if (netmap_verbose) D("%s write HLREG 0x%x rxc 0x%x", onoff ? "enter" : "exit", hl, rxc); #endif IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hl); IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc); } /* ixgbe_if_crcstrip_set */ /********************************************************************* * ixgbe_if_init - Init entry point * * Used in two ways: It is used by the stack as an init * entry point in network interface structure. It is also * used by the driver as a hw/sw initialization routine to * get to a consistent state. * * Return 0 on success, positive on failure **********************************************************************/ void ixgbe_if_init(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); device_t dev = iflib_get_dev(ctx); struct ixgbe_hw *hw = &adapter->hw; struct ix_rx_queue *rx_que; struct ix_tx_queue *tx_que; u32 txdctl, mhadd; u32 rxdctl, rxctrl; u32 ctrl_ext; int i, j, err; INIT_DEBUGOUT("ixgbe_if_init: begin"); /* Queue indices may change with IOV mode */ ixgbe_align_all_queue_indices(adapter); /* reprogram the RAR[0] in case user changed it. */ ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, IXGBE_RAH_AV); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS); ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, 1); hw->addr_ctrl.rar_used_count = 1; ixgbe_init_hw(hw); ixgbe_initialize_iov(adapter); ixgbe_initialize_transmit_units(ctx); /* Setup Multicast table */ ixgbe_if_multi_set(ctx); /* Determine the correct mbuf pool, based on frame size */ adapter->rx_mbuf_sz = iflib_get_rx_mbuf_sz(ctx); /* Configure RX settings */ ixgbe_initialize_receive_units(ctx); /* * Initialize variable holding task enqueue requests * from MSI-X interrupts */ adapter->task_requests = 0; /* Enable SDP & MSI-X interrupts based on adapter */ ixgbe_config_gpie(adapter); /* Set MTU size */ if (ifp->if_mtu > ETHERMTU) { /* aka IXGBE_MAXFRS on 82599 and newer */ mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); mhadd &= ~IXGBE_MHADD_MFS_MASK; mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT; IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); } /* Now enable all the queues */ for (i = 0, tx_que = adapter->tx_queues; i < adapter->num_tx_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txr->me)); txdctl |= IXGBE_TXDCTL_ENABLE; /* Set WTHRESH to 8, burst writeback */ txdctl |= (8 << 16); /* * When the internal queue falls below PTHRESH (32), * start prefetching as long as there are at least * HTHRESH (1) buffers ready. The values are taken * from the Intel linux driver 3.8.21. * Prefetching enables tx line rate even with 1 queue. */ txdctl |= (32 << 0) | (1 << 8); IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txr->me), txdctl); } for (i = 0, rx_que = adapter->rx_queues; i < adapter->num_rx_queues; i++, rx_que++) { struct rx_ring *rxr = &rx_que->rxr; rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); if (hw->mac.type == ixgbe_mac_82598EB) { /* * PTHRESH = 21 * HTHRESH = 4 * WTHRESH = 8 */ rxdctl &= ~0x3FFFFF; rxdctl |= 0x080420; } rxdctl |= IXGBE_RXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), rxdctl); for (j = 0; j < 10; j++) { if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)) & IXGBE_RXDCTL_ENABLE) break; else msec_delay(1); } wmb(); } /* Enable Receive engine */ rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); if (hw->mac.type == ixgbe_mac_82598EB) rxctrl |= IXGBE_RXCTRL_DMBYPS; rxctrl |= IXGBE_RXCTRL_RXEN; ixgbe_enable_rx_dma(hw, rxctrl); /* Set up MSI/MSI-X routing */ if (ixgbe_enable_msix) { ixgbe_configure_ivars(adapter); /* Set up auto-mask */ if (hw->mac.type == ixgbe_mac_82598EB) IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); else { IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF); IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF); } } else { /* Simple settings for Legacy/MSI */ ixgbe_set_ivar(adapter, 0, 0, 0); ixgbe_set_ivar(adapter, 0, 0, 1); IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); } ixgbe_init_fdir(adapter); /* * Check on any SFP devices that * need to be kick-started */ if (hw->phy.type == ixgbe_phy_none) { err = hw->phy.ops.identify(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module type was detected.\n"); return; } } /* Set moderation on the Link interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->vector), IXGBE_LINK_ITR); /* Enable power to the phy. */ ixgbe_set_phy_power(hw, TRUE); /* Config/Enable Link */ ixgbe_config_link(ctx); /* Hardware Packet Buffer & Flow Control setup */ ixgbe_config_delay_values(adapter); /* Initialize the FC settings */ ixgbe_start_hw(hw); /* Set up VLAN support and filter */ ixgbe_setup_vlan_hw_support(ctx); /* Setup DMA Coalescing */ ixgbe_config_dmac(adapter); /* And now turn on interrupts */ ixgbe_if_enable_intr(ctx); /* Enable the use of the MBX by the VF's */ if (adapter->feat_en & IXGBE_FEATURE_SRIOV) { ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD; IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); } } /* ixgbe_init_locked */ /************************************************************************ * ixgbe_set_ivar * * Setup the correct IVAR register for a particular MSI-X interrupt * (yes this is all very magic and confusing :) * - entry is the register array entry * - vector is the MSI-X vector for this queue * - type is RX/TX/MISC ************************************************************************/ static void ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) { struct ixgbe_hw *hw = &adapter->hw; u32 ivar, index; vector |= IXGBE_IVAR_ALLOC_VAL; switch (hw->mac.type) { case ixgbe_mac_82598EB: if (type == -1) entry = IXGBE_IVAR_OTHER_CAUSES_INDEX; else entry += (type * 64); index = (entry >> 2) & 0x1F; ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index)); ivar &= ~(0xFF << (8 * (entry & 0x3))); ivar |= (vector << (8 * (entry & 0x3))); IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: if (type == -1) { /* MISC IVAR */ index = (entry & 1) * 8; ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC); ivar &= ~(0xFF << index); ivar |= (vector << index); IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar); } else { /* RX/TX IVARS */ index = (16 * (entry & 1)) + (8 * type); ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1)); ivar &= ~(0xFF << index); ivar |= (vector << index); IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar); } default: break; } } /* ixgbe_set_ivar */ /************************************************************************ * ixgbe_configure_ivars ************************************************************************/ static void ixgbe_configure_ivars(struct adapter *adapter) { struct ix_rx_queue *rx_que = adapter->rx_queues; struct ix_tx_queue *tx_que = adapter->tx_queues; u32 newitr; if (ixgbe_max_interrupt_rate > 0) newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8; else { /* * Disable DMA coalescing if interrupt moderation is * disabled. */ adapter->dmac = 0; newitr = 0; } for (int i = 0; i < adapter->num_rx_queues; i++, rx_que++) { struct rx_ring *rxr = &rx_que->rxr; /* First the RX queue entry */ ixgbe_set_ivar(adapter, rxr->me, rx_que->msix, 0); /* Set an Initial EITR value */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(rx_que->msix), newitr); } for (int i = 0; i < adapter->num_tx_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; /* ... and the TX */ ixgbe_set_ivar(adapter, txr->me, tx_que->msix, 1); } /* For the Link interrupt */ ixgbe_set_ivar(adapter, 1, adapter->vector, -1); } /* ixgbe_configure_ivars */ /************************************************************************ * ixgbe_config_gpie ************************************************************************/ static void ixgbe_config_gpie(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 gpie; gpie = IXGBE_READ_REG(hw, IXGBE_GPIE); if (adapter->intr_type == IFLIB_INTR_MSIX) { /* Enable Enhanced MSI-X mode */ gpie |= IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT | IXGBE_GPIE_OCD; } /* Fan Failure Interrupt */ if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL) gpie |= IXGBE_SDP1_GPIEN; /* Thermal Sensor Interrupt */ if (adapter->feat_en & IXGBE_FEATURE_TEMP_SENSOR) gpie |= IXGBE_SDP0_GPIEN_X540; /* Link detection */ switch (hw->mac.type) { case ixgbe_mac_82599EB: gpie |= IXGBE_SDP1_GPIEN | IXGBE_SDP2_GPIEN; break; case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: gpie |= IXGBE_SDP0_GPIEN_X540; break; default: break; } IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); } /* ixgbe_config_gpie */ /************************************************************************ * ixgbe_config_delay_values * * Requires adapter->max_frame_size to be set. ************************************************************************/ static void ixgbe_config_delay_values(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 rxpb, frame, size, tmp; frame = adapter->max_frame_size; /* Calculate High Water */ switch (hw->mac.type) { case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: tmp = IXGBE_DV_X540(frame, frame); break; default: tmp = IXGBE_DV(frame, frame); break; } size = IXGBE_BT2KB(tmp); rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10; hw->fc.high_water[0] = rxpb - size; /* Now calculate Low Water */ switch (hw->mac.type) { case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: tmp = IXGBE_LOW_DV_X540(frame); break; default: tmp = IXGBE_LOW_DV(frame); break; } hw->fc.low_water[0] = IXGBE_BT2KB(tmp); hw->fc.pause_time = IXGBE_FC_PAUSE; hw->fc.send_xon = TRUE; } /* ixgbe_config_delay_values */ /************************************************************************ * ixgbe_set_multi - Multicast Update * * Called whenever multicast address list is updated. ************************************************************************/ static u_int ixgbe_mc_filter_apply(void *arg, struct sockaddr_dl *sdl, u_int count) { struct adapter *adapter = arg; struct ixgbe_mc_addr *mta = adapter->mta; if (count == MAX_NUM_MULTICAST_ADDRESSES) return (0); bcopy(LLADDR(sdl), mta[count].addr, IXGBE_ETH_LENGTH_OF_ADDRESS); mta[count].vmdq = adapter->pool; return (1); } /* ixgbe_mc_filter_apply */ static void ixgbe_if_multi_set(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_mc_addr *mta; struct ifnet *ifp = iflib_get_ifp(ctx); u8 *update_ptr; u32 fctrl; u_int mcnt; IOCTL_DEBUGOUT("ixgbe_if_multi_set: begin"); mta = adapter->mta; bzero(mta, sizeof(*mta) * MAX_NUM_MULTICAST_ADDRESSES); mcnt = if_foreach_llmaddr(iflib_get_ifp(ctx), ixgbe_mc_filter_apply, adapter); fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); if (ifp->if_flags & IFF_PROMISC) fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES || ifp->if_flags & IFF_ALLMULTI) { fctrl |= IXGBE_FCTRL_MPE; fctrl &= ~IXGBE_FCTRL_UPE; } else fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl); if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) { update_ptr = (u8 *)mta; ixgbe_update_mc_addr_list(&adapter->hw, update_ptr, mcnt, ixgbe_mc_array_itr, TRUE); } } /* ixgbe_if_multi_set */ /************************************************************************ * ixgbe_mc_array_itr * * An iterator function needed by the multicast shared code. * It feeds the shared code routine the addresses in the * array of ixgbe_set_multi() one by one. ************************************************************************/ static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) { struct ixgbe_mc_addr *mta; mta = (struct ixgbe_mc_addr *)*update_ptr; *vmdq = mta->vmdq; *update_ptr = (u8*)(mta + 1); return (mta->addr); } /* ixgbe_mc_array_itr */ /************************************************************************ * ixgbe_local_timer - Timer routine * * Checks for link status, updates statistics, * and runs the watchdog check. ************************************************************************/ static void ixgbe_if_timer(if_ctx_t ctx, uint16_t qid) { struct adapter *adapter = iflib_get_softc(ctx); if (qid != 0) return; /* Check for pluggable optics */ if (adapter->sfp_probe) if (!ixgbe_sfp_probe(ctx)) return; /* Nothing to do */ ixgbe_check_link(&adapter->hw, &adapter->link_speed, &adapter->link_up, 0); /* Fire off the adminq task */ iflib_admin_intr_deferred(ctx); } /* ixgbe_if_timer */ /************************************************************************ * ixgbe_sfp_probe * * Determine if a port had optics inserted. ************************************************************************/ static bool ixgbe_sfp_probe(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; device_t dev = iflib_get_dev(ctx); bool result = FALSE; if ((hw->phy.type == ixgbe_phy_nl) && (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) { s32 ret = hw->phy.ops.identify_sfp(hw); if (ret) goto out; ret = hw->phy.ops.reset(hw); adapter->sfp_probe = FALSE; if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module detected!"); device_printf(dev, "Reload driver with supported module.\n"); goto out; } else device_printf(dev, "SFP+ module detected!\n"); /* We now have supported optics */ result = TRUE; } out: return (result); } /* ixgbe_sfp_probe */ /************************************************************************ * ixgbe_handle_mod - Tasklet for SFP module interrupts ************************************************************************/ static void ixgbe_handle_mod(void *context) { if_ctx_t ctx = context; struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; device_t dev = iflib_get_dev(ctx); u32 err, cage_full = 0; if (adapter->hw.need_crosstalk_fix) { switch (hw->mac.type) { case ixgbe_mac_82599EB: cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) & IXGBE_ESDP_SDP2; break; case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) & IXGBE_ESDP_SDP0; break; default: break; } if (!cage_full) goto handle_mod_out; } err = hw->phy.ops.identify_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module type was detected.\n"); goto handle_mod_out; } if (hw->mac.type == ixgbe_mac_82598EB) err = hw->phy.ops.reset(hw); else err = hw->mac.ops.setup_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Setup failure - unsupported SFP+ module type.\n"); goto handle_mod_out; } adapter->task_requests |= IXGBE_REQUEST_TASK_MSF; return; handle_mod_out: adapter->task_requests &= ~(IXGBE_REQUEST_TASK_MSF); } /* ixgbe_handle_mod */ /************************************************************************ * ixgbe_handle_msf - Tasklet for MSF (multispeed fiber) interrupts ************************************************************************/ static void ixgbe_handle_msf(void *context) { if_ctx_t ctx = context; struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; u32 autoneg; bool negotiate; /* get_supported_phy_layer will call hw->phy.ops.identify_sfp() */ adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); autoneg = hw->phy.autoneg_advertised; if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); if (hw->mac.ops.setup_link) hw->mac.ops.setup_link(hw, autoneg, TRUE); /* Adjust media types shown in ifconfig */ ifmedia_removeall(adapter->media); ixgbe_add_media_types(adapter->ctx); ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); } /* ixgbe_handle_msf */ /************************************************************************ * ixgbe_handle_phy - Tasklet for external PHY interrupts ************************************************************************/ static void ixgbe_handle_phy(void *context) { if_ctx_t ctx = context; struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; int error; error = hw->phy.ops.handle_lasi(hw); if (error == IXGBE_ERR_OVERTEMP) device_printf(adapter->dev, "CRITICAL: EXTERNAL PHY OVER TEMP!! PHY will downshift to lower power state!\n"); else if (error) device_printf(adapter->dev, "Error handling LASI interrupt: %d\n", error); } /* ixgbe_handle_phy */ /************************************************************************ * ixgbe_if_stop - Stop the hardware * * Disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. ************************************************************************/ static void ixgbe_if_stop(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; INIT_DEBUGOUT("ixgbe_if_stop: begin\n"); ixgbe_reset_hw(hw); hw->adapter_stopped = FALSE; ixgbe_stop_adapter(hw); if (hw->mac.type == ixgbe_mac_82599EB) ixgbe_stop_mac_link_on_d3_82599(hw); /* Turn off the laser - noop with no optics */ ixgbe_disable_tx_laser(hw); /* Update the stack */ adapter->link_up = FALSE; ixgbe_if_update_admin_status(ctx); /* reprogram the RAR[0] in case user changed it. */ ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV); return; } /* ixgbe_if_stop */ /************************************************************************ * ixgbe_update_link_status - Update OS on link state * * Note: Only updates the OS on the cached link state. * The real check of the hardware only happens with * a link interrupt. ************************************************************************/ static void ixgbe_if_update_admin_status(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); if (adapter->link_up) { if (adapter->link_active == FALSE) { if (bootverbose) device_printf(dev, "Link is up %d Gbps %s \n", ((adapter->link_speed == 128) ? 10 : 1), "Full Duplex"); adapter->link_active = TRUE; /* Update any Flow Control changes */ ixgbe_fc_enable(&adapter->hw); /* Update DMA coalescing config */ ixgbe_config_dmac(adapter); /* should actually be negotiated value */ iflib_link_state_change(ctx, LINK_STATE_UP, IF_Gbps(10)); if (adapter->feat_en & IXGBE_FEATURE_SRIOV) ixgbe_ping_all_vfs(adapter); } } else { /* Link down */ if (adapter->link_active == TRUE) { if (bootverbose) device_printf(dev, "Link is Down\n"); iflib_link_state_change(ctx, LINK_STATE_DOWN, 0); adapter->link_active = FALSE; if (adapter->feat_en & IXGBE_FEATURE_SRIOV) ixgbe_ping_all_vfs(adapter); } } /* Handle task requests from msix_link() */ if (adapter->task_requests & IXGBE_REQUEST_TASK_MOD) ixgbe_handle_mod(ctx); if (adapter->task_requests & IXGBE_REQUEST_TASK_MSF) ixgbe_handle_msf(ctx); if (adapter->task_requests & IXGBE_REQUEST_TASK_MBX) ixgbe_handle_mbx(ctx); if (adapter->task_requests & IXGBE_REQUEST_TASK_FDIR) ixgbe_reinit_fdir(ctx); if (adapter->task_requests & IXGBE_REQUEST_TASK_PHY) ixgbe_handle_phy(ctx); adapter->task_requests = 0; ixgbe_update_stats_counters(adapter); } /* ixgbe_if_update_admin_status */ /************************************************************************ * ixgbe_config_dmac - Configure DMA Coalescing ************************************************************************/ static void ixgbe_config_dmac(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config; if (hw->mac.type < ixgbe_mac_X550 || !hw->mac.ops.dmac_config) return; if (dcfg->watchdog_timer ^ adapter->dmac || dcfg->link_speed ^ adapter->link_speed) { dcfg->watchdog_timer = adapter->dmac; dcfg->fcoe_en = FALSE; dcfg->link_speed = adapter->link_speed; dcfg->num_tcs = 1; INIT_DEBUGOUT2("dmac settings: watchdog %d, link speed %d\n", dcfg->watchdog_timer, dcfg->link_speed); hw->mac.ops.dmac_config(hw); } } /* ixgbe_config_dmac */ /************************************************************************ * ixgbe_if_enable_intr ************************************************************************/ void ixgbe_if_enable_intr(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; struct ix_rx_queue *que = adapter->rx_queues; u32 mask, fwsm; mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); switch (adapter->hw.mac.type) { case ixgbe_mac_82599EB: mask |= IXGBE_EIMS_ECC; /* Temperature sensor on some adapters */ mask |= IXGBE_EIMS_GPI_SDP0; /* SFP+ (RX_LOS_N & MOD_ABS_N) */ mask |= IXGBE_EIMS_GPI_SDP1; mask |= IXGBE_EIMS_GPI_SDP2; break; case ixgbe_mac_X540: /* Detect if Thermal Sensor is enabled */ fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM); if (fwsm & IXGBE_FWSM_TS_ENABLED) mask |= IXGBE_EIMS_TS; mask |= IXGBE_EIMS_ECC; break; case ixgbe_mac_X550: /* MAC thermal sensor is automatically enabled */ mask |= IXGBE_EIMS_TS; mask |= IXGBE_EIMS_ECC; break; case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: /* Some devices use SDP0 for important information */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP || hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP || hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP_N || hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw); if (hw->phy.type == ixgbe_phy_x550em_ext_t) mask |= IXGBE_EICR_GPI_SDP0_X540; mask |= IXGBE_EIMS_ECC; break; default: break; } /* Enable Fan Failure detection */ if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL) mask |= IXGBE_EIMS_GPI_SDP1; /* Enable SR-IOV */ if (adapter->feat_en & IXGBE_FEATURE_SRIOV) mask |= IXGBE_EIMS_MAILBOX; /* Enable Flow Director */ if (adapter->feat_en & IXGBE_FEATURE_FDIR) mask |= IXGBE_EIMS_FLOW_DIR; IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); /* With MSI-X we use auto clear */ if (adapter->intr_type == IFLIB_INTR_MSIX) { mask = IXGBE_EIMS_ENABLE_MASK; /* Don't autoclear Link */ mask &= ~IXGBE_EIMS_OTHER; mask &= ~IXGBE_EIMS_LSC; if (adapter->feat_cap & IXGBE_FEATURE_SRIOV) mask &= ~IXGBE_EIMS_MAILBOX; IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask); } /* * Now enable all queues, this is done separately to * allow for handling the extended (beyond 32) MSI-X * vectors that can be used by 82599 */ for (int i = 0; i < adapter->num_rx_queues; i++, que++) ixgbe_enable_queue(adapter, que->msix); IXGBE_WRITE_FLUSH(hw); } /* ixgbe_if_enable_intr */ /************************************************************************ * ixgbe_disable_intr ************************************************************************/ static void ixgbe_if_disable_intr(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if (adapter->intr_type == IFLIB_INTR_MSIX) IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0); if (adapter->hw.mac.type == ixgbe_mac_82598EB) { IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0); } else { IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0); } IXGBE_WRITE_FLUSH(&adapter->hw); } /* ixgbe_if_disable_intr */ /************************************************************************ * ixgbe_link_intr_enable ************************************************************************/ static void ixgbe_link_intr_enable(if_ctx_t ctx) { struct ixgbe_hw *hw = &((struct adapter *)iflib_get_softc(ctx))->hw; /* Re-enable other interrupts */ IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_OTHER | IXGBE_EIMS_LSC); } /* ixgbe_link_intr_enable */ /************************************************************************ * ixgbe_if_rx_queue_intr_enable ************************************************************************/ static int ixgbe_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_rx_queue *que = &adapter->rx_queues[rxqid]; ixgbe_enable_queue(adapter, que->msix); return (0); } /* ixgbe_if_rx_queue_intr_enable */ /************************************************************************ * ixgbe_enable_queue ************************************************************************/ static void ixgbe_enable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u64 queue = 1ULL << vector; u32 mask; if (hw->mac.type == ixgbe_mac_82598EB) { mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); } else { mask = (queue & 0xFFFFFFFF); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); mask = (queue >> 32); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); } } /* ixgbe_enable_queue */ /************************************************************************ * ixgbe_disable_queue ************************************************************************/ static void ixgbe_disable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u64 queue = 1ULL << vector; u32 mask; if (hw->mac.type == ixgbe_mac_82598EB) { mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask); } else { mask = (queue & 0xFFFFFFFF); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask); mask = (queue >> 32); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask); } } /* ixgbe_disable_queue */ /************************************************************************ * ixgbe_intr - Legacy Interrupt Service Routine ************************************************************************/ int ixgbe_intr(void *arg) { struct adapter *adapter = arg; struct ix_rx_queue *que = adapter->rx_queues; struct ixgbe_hw *hw = &adapter->hw; if_ctx_t ctx = adapter->ctx; u32 eicr, eicr_mask; eicr = IXGBE_READ_REG(hw, IXGBE_EICR); ++que->irqs; if (eicr == 0) { ixgbe_if_enable_intr(ctx); return (FILTER_HANDLED); } /* Check for fan failure */ if ((hw->device_id == IXGBE_DEV_ID_82598AT) && (eicr & IXGBE_EICR_GPI_SDP1)) { device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! REPLACE IMMEDIATELY!!\n"); IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); } /* Link status change */ if (eicr & IXGBE_EICR_LSC) { IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC); iflib_admin_intr_deferred(ctx); } if (ixgbe_is_sfp(hw)) { /* Pluggable optics-related interrupt */ if (hw->mac.type >= ixgbe_mac_X540) eicr_mask = IXGBE_EICR_GPI_SDP0_X540; else eicr_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw); if (eicr & eicr_mask) { IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr_mask); adapter->task_requests |= IXGBE_REQUEST_TASK_MOD; } if ((hw->mac.type == ixgbe_mac_82599EB) && (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); adapter->task_requests |= IXGBE_REQUEST_TASK_MSF; } } /* External PHY interrupt */ if ((hw->phy.type == ixgbe_phy_x550em_ext_t) && (eicr & IXGBE_EICR_GPI_SDP0_X540)) adapter->task_requests |= IXGBE_REQUEST_TASK_PHY; return (FILTER_SCHEDULE_THREAD); } /* ixgbe_intr */ /************************************************************************ * ixgbe_free_pci_resources ************************************************************************/ static void ixgbe_free_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_rx_queue *que = adapter->rx_queues; device_t dev = iflib_get_dev(ctx); /* Release all MSI-X queue resources */ if (adapter->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &adapter->irq); if (que != NULL) { for (int i = 0; i < adapter->num_rx_queues; i++, que++) { iflib_irq_free(ctx, &que->que_irq); } } if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(adapter->pci_mem), adapter->pci_mem); } /* ixgbe_free_pci_resources */ /************************************************************************ * ixgbe_sysctl_flowcntl * * SYSCTL wrapper around setting Flow Control ************************************************************************/ static int ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error, fc; adapter = (struct adapter *)arg1; fc = adapter->hw.fc.current_mode; error = sysctl_handle_int(oidp, &fc, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Don't bother if it's not changed */ if (fc == adapter->hw.fc.current_mode) return (0); return ixgbe_set_flowcntl(adapter, fc); } /* ixgbe_sysctl_flowcntl */ /************************************************************************ * ixgbe_set_flowcntl - Set flow control * * Flow control values: * 0 - off * 1 - rx pause * 2 - tx pause * 3 - full ************************************************************************/ static int ixgbe_set_flowcntl(struct adapter *adapter, int fc) { switch (fc) { case ixgbe_fc_rx_pause: case ixgbe_fc_tx_pause: case ixgbe_fc_full: adapter->hw.fc.requested_mode = fc; if (adapter->num_rx_queues > 1) ixgbe_disable_rx_drop(adapter); break; case ixgbe_fc_none: adapter->hw.fc.requested_mode = ixgbe_fc_none; if (adapter->num_rx_queues > 1) ixgbe_enable_rx_drop(adapter); break; default: return (EINVAL); } /* Don't autoneg if forcing a value */ adapter->hw.fc.disable_fc_autoneg = TRUE; ixgbe_fc_enable(&adapter->hw); return (0); } /* ixgbe_set_flowcntl */ /************************************************************************ * ixgbe_enable_rx_drop * * Enable the hardware to drop packets when the buffer is * full. This is useful with multiqueue, so that no single * queue being full stalls the entire RX engine. We only * enable this when Multiqueue is enabled AND Flow Control * is disabled. ************************************************************************/ static void ixgbe_enable_rx_drop(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct rx_ring *rxr; u32 srrctl; for (int i = 0; i < adapter->num_rx_queues; i++) { rxr = &adapter->rx_queues[i].rxr; srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); srrctl |= IXGBE_SRRCTL_DROP_EN; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); } /* enable drop for each vf */ for (int i = 0; i < adapter->num_vfs; i++) { IXGBE_WRITE_REG(hw, IXGBE_QDE, (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT) | IXGBE_QDE_ENABLE)); } } /* ixgbe_enable_rx_drop */ /************************************************************************ * ixgbe_disable_rx_drop ************************************************************************/ static void ixgbe_disable_rx_drop(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct rx_ring *rxr; u32 srrctl; for (int i = 0; i < adapter->num_rx_queues; i++) { rxr = &adapter->rx_queues[i].rxr; srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); srrctl &= ~IXGBE_SRRCTL_DROP_EN; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); } /* disable drop for each vf */ for (int i = 0; i < adapter->num_vfs; i++) { IXGBE_WRITE_REG(hw, IXGBE_QDE, (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT))); } } /* ixgbe_disable_rx_drop */ /************************************************************************ * ixgbe_sysctl_advertise * * SYSCTL wrapper around setting advertised speed ************************************************************************/ static int ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error, advertise; adapter = (struct adapter *)arg1; advertise = adapter->advertise; error = sysctl_handle_int(oidp, &advertise, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixgbe_set_advertise(adapter, advertise); } /* ixgbe_sysctl_advertise */ /************************************************************************ * ixgbe_set_advertise - Control advertised link speed * * Flags: * 0x1 - advertise 100 Mb * 0x2 - advertise 1G * 0x4 - advertise 10G * 0x8 - advertise 10 Mb (yes, Mb) ************************************************************************/ static int ixgbe_set_advertise(struct adapter *adapter, int advertise) { device_t dev = iflib_get_dev(adapter->ctx); struct ixgbe_hw *hw; ixgbe_link_speed speed = 0; ixgbe_link_speed link_caps = 0; s32 err = IXGBE_NOT_IMPLEMENTED; bool negotiate = FALSE; /* Checks to validate new value */ if (adapter->advertise == advertise) /* no change */ return (0); hw = &adapter->hw; /* No speed changes for backplane media */ if (hw->phy.media_type == ixgbe_media_type_backplane) return (ENODEV); if (!((hw->phy.media_type == ixgbe_media_type_copper) || (hw->phy.multispeed_fiber))) { device_printf(dev, "Advertised speed can only be set on copper or multispeed fiber media types.\n"); return (EINVAL); } if (advertise < 0x1 || advertise > 0xF) { device_printf(dev, "Invalid advertised speed; valid modes are 0x1 through 0xF\n"); return (EINVAL); } if (hw->mac.ops.get_link_capabilities) { err = hw->mac.ops.get_link_capabilities(hw, &link_caps, &negotiate); if (err != IXGBE_SUCCESS) { device_printf(dev, "Unable to determine supported advertise speeds\n"); return (ENODEV); } } /* Set new value and report new advertised mode */ if (advertise & 0x1) { if (!(link_caps & IXGBE_LINK_SPEED_100_FULL)) { device_printf(dev, "Interface does not support 100Mb advertised speed\n"); return (EINVAL); } speed |= IXGBE_LINK_SPEED_100_FULL; } if (advertise & 0x2) { if (!(link_caps & IXGBE_LINK_SPEED_1GB_FULL)) { device_printf(dev, "Interface does not support 1Gb advertised speed\n"); return (EINVAL); } speed |= IXGBE_LINK_SPEED_1GB_FULL; } if (advertise & 0x4) { if (!(link_caps & IXGBE_LINK_SPEED_10GB_FULL)) { device_printf(dev, "Interface does not support 10Gb advertised speed\n"); return (EINVAL); } speed |= IXGBE_LINK_SPEED_10GB_FULL; } if (advertise & 0x8) { if (!(link_caps & IXGBE_LINK_SPEED_10_FULL)) { device_printf(dev, "Interface does not support 10Mb advertised speed\n"); return (EINVAL); } speed |= IXGBE_LINK_SPEED_10_FULL; } hw->mac.autotry_restart = TRUE; hw->mac.ops.setup_link(hw, speed, TRUE); adapter->advertise = advertise; return (0); } /* ixgbe_set_advertise */ /************************************************************************ * ixgbe_get_advertise - Get current advertised speed settings * * Formatted for sysctl usage. * Flags: * 0x1 - advertise 100 Mb * 0x2 - advertise 1G * 0x4 - advertise 10G * 0x8 - advertise 10 Mb (yes, Mb) ************************************************************************/ static int ixgbe_get_advertise(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; int speed; ixgbe_link_speed link_caps = 0; s32 err; bool negotiate = FALSE; /* * Advertised speed means nothing unless it's copper or * multi-speed fiber */ if (!(hw->phy.media_type == ixgbe_media_type_copper) && !(hw->phy.multispeed_fiber)) return (0); err = hw->mac.ops.get_link_capabilities(hw, &link_caps, &negotiate); if (err != IXGBE_SUCCESS) return (0); speed = ((link_caps & IXGBE_LINK_SPEED_10GB_FULL) ? 4 : 0) | ((link_caps & IXGBE_LINK_SPEED_1GB_FULL) ? 2 : 0) | ((link_caps & IXGBE_LINK_SPEED_100_FULL) ? 1 : 0) | ((link_caps & IXGBE_LINK_SPEED_10_FULL) ? 8 : 0); return speed; } /* ixgbe_get_advertise */ /************************************************************************ * ixgbe_sysctl_dmac - Manage DMA Coalescing * * Control values: * 0/1 - off / on (use default value of 1000) * * Legal timer values are: * 50,100,250,500,1000,2000,5000,10000 * * Turning off interrupt moderation will also turn this off. ************************************************************************/ static int ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct ifnet *ifp = iflib_get_ifp(adapter->ctx); int error; u16 newval; newval = adapter->dmac; error = sysctl_handle_16(oidp, &newval, 0, req); if ((error) || (req->newptr == NULL)) return (error); switch (newval) { case 0: /* Disabled */ adapter->dmac = 0; break; case 1: /* Enable and use default */ adapter->dmac = 1000; break; case 50: case 100: case 250: case 500: case 1000: case 2000: case 5000: case 10000: /* Legal values - allow */ adapter->dmac = newval; break; default: /* Do nothing, illegal value */ return (EINVAL); } /* Re-initialize hardware if it's already running */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) ifp->if_init(ifp); return (0); } /* ixgbe_sysctl_dmac */ #ifdef IXGBE_DEBUG /************************************************************************ * ixgbe_sysctl_power_state * * Sysctl to test power states * Values: * 0 - set device to D0 * 3 - set device to D3 * (none) - get current device power state ************************************************************************/ static int ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; device_t dev = adapter->dev; int curr_ps, new_ps, error = 0; curr_ps = new_ps = pci_get_powerstate(dev); error = sysctl_handle_int(oidp, &new_ps, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (new_ps == curr_ps) return (0); if (new_ps == 3 && curr_ps == 0) error = DEVICE_SUSPEND(dev); else if (new_ps == 0 && curr_ps == 3) error = DEVICE_RESUME(dev); else return (EINVAL); device_printf(dev, "New state: %d\n", pci_get_powerstate(dev)); return (error); } /* ixgbe_sysctl_power_state */ #endif /************************************************************************ * ixgbe_sysctl_wol_enable * * Sysctl to enable/disable the WoL capability, * if supported by the adapter. * * Values: * 0 - disabled * 1 - enabled ************************************************************************/ static int ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct ixgbe_hw *hw = &adapter->hw; int new_wol_enabled; int error = 0; new_wol_enabled = hw->wol_enabled; error = sysctl_handle_int(oidp, &new_wol_enabled, 0, req); if ((error) || (req->newptr == NULL)) return (error); new_wol_enabled = !!(new_wol_enabled); if (new_wol_enabled == hw->wol_enabled) return (0); if (new_wol_enabled > 0 && !adapter->wol_support) return (ENODEV); else hw->wol_enabled = new_wol_enabled; return (0); } /* ixgbe_sysctl_wol_enable */ /************************************************************************ * ixgbe_sysctl_wufc - Wake Up Filter Control * * Sysctl to enable/disable the types of packets that the * adapter will wake up on upon receipt. * Flags: * 0x1 - Link Status Change * 0x2 - Magic Packet * 0x4 - Direct Exact * 0x8 - Directed Multicast * 0x10 - Broadcast * 0x20 - ARP/IPv4 Request Packet * 0x40 - Direct IPv4 Packet * 0x80 - Direct IPv6 Packet * * Settings not listed above will cause the sysctl to return an error. ************************************************************************/ static int ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; int error = 0; u32 new_wufc; new_wufc = adapter->wufc; error = sysctl_handle_32(oidp, &new_wufc, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (new_wufc == adapter->wufc) return (0); if (new_wufc & 0xffffff00) return (EINVAL); new_wufc &= 0xff; new_wufc |= (0xffffff & adapter->wufc); adapter->wufc = new_wufc; return (0); } /* ixgbe_sysctl_wufc */ #ifdef IXGBE_DEBUG /************************************************************************ * ixgbe_sysctl_print_rss_config ************************************************************************/ static int ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct ixgbe_hw *hw = &adapter->hw; device_t dev = adapter->dev; struct sbuf *buf; int error = 0, reta_size; u32 reg; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } // TODO: use sbufs to make a string to print out /* Set multiplier for RETA setup and table size based on MAC */ switch (adapter->hw.mac.type) { case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: reta_size = 128; break; default: reta_size = 32; break; } /* Print out the redirection table */ sbuf_cat(buf, "\n"); for (int i = 0; i < reta_size; i++) { if (i < 32) { reg = IXGBE_READ_REG(hw, IXGBE_RETA(i)); sbuf_printf(buf, "RETA(%2d): 0x%08x\n", i, reg); } else { reg = IXGBE_READ_REG(hw, IXGBE_ERETA(i - 32)); sbuf_printf(buf, "ERETA(%2d): 0x%08x\n", i - 32, reg); } } // TODO: print more config error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (0); } /* ixgbe_sysctl_print_rss_config */ #endif /* IXGBE_DEBUG */ /************************************************************************ * ixgbe_sysctl_phy_temp - Retrieve temperature of PHY * * For X552/X557-AT devices using an external PHY ************************************************************************/ static int ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct ixgbe_hw *hw = &adapter->hw; u16 reg; if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { device_printf(iflib_get_dev(adapter->ctx), "Device has no supported external thermal sensor.\n"); return (ENODEV); } if (hw->phy.ops.read_reg(hw, IXGBE_PHY_CURRENT_TEMP, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®)) { device_printf(iflib_get_dev(adapter->ctx), "Error reading from PHY's current temperature register\n"); return (EAGAIN); } /* Shift temp for output */ reg = reg >> 8; return (sysctl_handle_16(oidp, NULL, reg, req)); } /* ixgbe_sysctl_phy_temp */ /************************************************************************ * ixgbe_sysctl_phy_overtemp_occurred * * Reports (directly from the PHY) whether the current PHY * temperature is over the overtemp threshold. ************************************************************************/ static int ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct ixgbe_hw *hw = &adapter->hw; u16 reg; if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { device_printf(iflib_get_dev(adapter->ctx), "Device has no supported external thermal sensor.\n"); return (ENODEV); } if (hw->phy.ops.read_reg(hw, IXGBE_PHY_OVERTEMP_STATUS, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®)) { device_printf(iflib_get_dev(adapter->ctx), "Error reading from PHY's temperature status register\n"); return (EAGAIN); } /* Get occurrence bit */ reg = !!(reg & 0x4000); return (sysctl_handle_16(oidp, 0, reg, req)); } /* ixgbe_sysctl_phy_overtemp_occurred */ /************************************************************************ * ixgbe_sysctl_eee_state * * Sysctl to set EEE power saving feature * Values: * 0 - disable EEE * 1 - enable EEE * (none) - get current device EEE state ************************************************************************/ static int ixgbe_sysctl_eee_state(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; device_t dev = adapter->dev; struct ifnet *ifp = iflib_get_ifp(adapter->ctx); int curr_eee, new_eee, error = 0; s32 retval; curr_eee = new_eee = !!(adapter->feat_en & IXGBE_FEATURE_EEE); error = sysctl_handle_int(oidp, &new_eee, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Nothing to do */ if (new_eee == curr_eee) return (0); /* Not supported */ if (!(adapter->feat_cap & IXGBE_FEATURE_EEE)) return (EINVAL); /* Bounds checking */ if ((new_eee < 0) || (new_eee > 1)) return (EINVAL); retval = ixgbe_setup_eee(&adapter->hw, new_eee); if (retval) { device_printf(dev, "Error in EEE setup: 0x%08X\n", retval); return (EINVAL); } /* Restart auto-neg */ ifp->if_init(ifp); device_printf(dev, "New EEE state: %d\n", new_eee); /* Cache new value */ if (new_eee) adapter->feat_en |= IXGBE_FEATURE_EEE; else adapter->feat_en &= ~IXGBE_FEATURE_EEE; return (error); } /* ixgbe_sysctl_eee_state */ /************************************************************************ * ixgbe_init_device_features ************************************************************************/ static void ixgbe_init_device_features(struct adapter *adapter) { adapter->feat_cap = IXGBE_FEATURE_NETMAP | IXGBE_FEATURE_RSS | IXGBE_FEATURE_MSI | IXGBE_FEATURE_MSIX | IXGBE_FEATURE_LEGACY_IRQ; /* Set capabilities first... */ switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB: if (adapter->hw.device_id == IXGBE_DEV_ID_82598AT) adapter->feat_cap |= IXGBE_FEATURE_FAN_FAIL; break; case ixgbe_mac_X540: adapter->feat_cap |= IXGBE_FEATURE_SRIOV; adapter->feat_cap |= IXGBE_FEATURE_FDIR; if ((adapter->hw.device_id == IXGBE_DEV_ID_X540_BYPASS) && (adapter->hw.bus.func == 0)) adapter->feat_cap |= IXGBE_FEATURE_BYPASS; break; case ixgbe_mac_X550: adapter->feat_cap |= IXGBE_FEATURE_TEMP_SENSOR; adapter->feat_cap |= IXGBE_FEATURE_SRIOV; adapter->feat_cap |= IXGBE_FEATURE_FDIR; break; case ixgbe_mac_X550EM_x: adapter->feat_cap |= IXGBE_FEATURE_SRIOV; adapter->feat_cap |= IXGBE_FEATURE_FDIR; break; case ixgbe_mac_X550EM_a: adapter->feat_cap |= IXGBE_FEATURE_SRIOV; adapter->feat_cap |= IXGBE_FEATURE_FDIR; adapter->feat_cap &= ~IXGBE_FEATURE_LEGACY_IRQ; if ((adapter->hw.device_id == IXGBE_DEV_ID_X550EM_A_1G_T) || (adapter->hw.device_id == IXGBE_DEV_ID_X550EM_A_1G_T_L)) { adapter->feat_cap |= IXGBE_FEATURE_TEMP_SENSOR; adapter->feat_cap |= IXGBE_FEATURE_EEE; } break; case ixgbe_mac_82599EB: adapter->feat_cap |= IXGBE_FEATURE_SRIOV; adapter->feat_cap |= IXGBE_FEATURE_FDIR; if ((adapter->hw.device_id == IXGBE_DEV_ID_82599_BYPASS) && (adapter->hw.bus.func == 0)) adapter->feat_cap |= IXGBE_FEATURE_BYPASS; if (adapter->hw.device_id == IXGBE_DEV_ID_82599_QSFP_SF_QP) adapter->feat_cap &= ~IXGBE_FEATURE_LEGACY_IRQ; break; default: break; } /* Enabled by default... */ /* Fan failure detection */ if (adapter->feat_cap & IXGBE_FEATURE_FAN_FAIL) adapter->feat_en |= IXGBE_FEATURE_FAN_FAIL; /* Netmap */ if (adapter->feat_cap & IXGBE_FEATURE_NETMAP) adapter->feat_en |= IXGBE_FEATURE_NETMAP; /* EEE */ if (adapter->feat_cap & IXGBE_FEATURE_EEE) adapter->feat_en |= IXGBE_FEATURE_EEE; /* Thermal Sensor */ if (adapter->feat_cap & IXGBE_FEATURE_TEMP_SENSOR) adapter->feat_en |= IXGBE_FEATURE_TEMP_SENSOR; /* Enabled via global sysctl... */ /* Flow Director */ if (ixgbe_enable_fdir) { if (adapter->feat_cap & IXGBE_FEATURE_FDIR) adapter->feat_en |= IXGBE_FEATURE_FDIR; else device_printf(adapter->dev, "Device does not support Flow Director. Leaving disabled."); } /* * Message Signal Interrupts - Extended (MSI-X) * Normal MSI is only enabled if MSI-X calls fail. */ if (!ixgbe_enable_msix) adapter->feat_cap &= ~IXGBE_FEATURE_MSIX; /* Receive-Side Scaling (RSS) */ if ((adapter->feat_cap & IXGBE_FEATURE_RSS) && ixgbe_enable_rss) adapter->feat_en |= IXGBE_FEATURE_RSS; /* Disable features with unmet dependencies... */ /* No MSI-X */ if (!(adapter->feat_cap & IXGBE_FEATURE_MSIX)) { adapter->feat_cap &= ~IXGBE_FEATURE_RSS; adapter->feat_cap &= ~IXGBE_FEATURE_SRIOV; adapter->feat_en &= ~IXGBE_FEATURE_RSS; adapter->feat_en &= ~IXGBE_FEATURE_SRIOV; } } /* ixgbe_init_device_features */ /************************************************************************ * ixgbe_check_fan_failure ************************************************************************/ static void ixgbe_check_fan_failure(struct adapter *adapter, u32 reg, bool in_interrupt) { u32 mask; mask = (in_interrupt) ? IXGBE_EICR_GPI_SDP1_BY_MAC(&adapter->hw) : IXGBE_ESDP_SDP1; if (reg & mask) device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! REPLACE IMMEDIATELY!!\n"); } /* ixgbe_check_fan_failure */ diff --git a/sys/dev/ixgbe/if_ixv.c b/sys/dev/ixgbe/if_ixv.c index 6bd92d262558..ee139430d42b 100644 --- a/sys/dev/ixgbe/if_ixv.c +++ b/sys/dev/ixgbe/if_ixv.c @@ -1,1952 +1,1950 @@ /****************************************************************************** Copyright (c) 2001-2017, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #include "ixgbe.h" #include "ifdi_if.h" #include #include /************************************************************************ * Driver version ************************************************************************/ char ixv_driver_version[] = "2.0.1-k"; /************************************************************************ * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into ixv_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } ************************************************************************/ static pci_vendor_info_t ixv_vendor_info_array[] = { PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), /* required last entry */ PVID_END }; /************************************************************************ * Function prototypes ************************************************************************/ static void *ixv_register(device_t dev); static int ixv_if_attach_pre(if_ctx_t ctx); static int ixv_if_attach_post(if_ctx_t ctx); static int ixv_if_detach(if_ctx_t ctx); static int ixv_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid); static int ixv_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); static int ixv_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); static void ixv_if_queues_free(if_ctx_t ctx); static void ixv_identify_hardware(if_ctx_t ctx); static void ixv_init_device_features(struct adapter *); static int ixv_allocate_pci_resources(if_ctx_t ctx); static void ixv_free_pci_resources(if_ctx_t ctx); static int ixv_setup_interface(if_ctx_t ctx); static void ixv_if_media_status(if_ctx_t , struct ifmediareq *); static int ixv_if_media_change(if_ctx_t ctx); static void ixv_if_update_admin_status(if_ctx_t ctx); static int ixv_if_msix_intr_assign(if_ctx_t ctx, int msix); static int ixv_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void ixv_if_init(if_ctx_t ctx); static void ixv_if_local_timer(if_ctx_t ctx, uint16_t qid); static void ixv_if_stop(if_ctx_t ctx); static int ixv_negotiate_api(struct adapter *); static void ixv_initialize_transmit_units(if_ctx_t ctx); static void ixv_initialize_receive_units(if_ctx_t ctx); static void ixv_initialize_rss_mapping(struct adapter *); static void ixv_setup_vlan_support(if_ctx_t ctx); static void ixv_configure_ivars(struct adapter *); static void ixv_if_enable_intr(if_ctx_t ctx); static void ixv_if_disable_intr(if_ctx_t ctx); static void ixv_if_multi_set(if_ctx_t ctx); static void ixv_if_register_vlan(if_ctx_t, u16); static void ixv_if_unregister_vlan(if_ctx_t, u16); static uint64_t ixv_if_get_counter(if_ctx_t, ift_counter); static bool ixv_if_needs_restart(if_ctx_t, enum iflib_restart_event); static void ixv_save_stats(struct adapter *); static void ixv_init_stats(struct adapter *); static void ixv_update_stats(struct adapter *); static void ixv_add_stats_sysctls(struct adapter *adapter); static int ixv_sysctl_debug(SYSCTL_HANDLER_ARGS); static void ixv_set_ivar(struct adapter *, u8, u8, s8); static u8 *ixv_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); /* The MSI-X Interrupt handlers */ static int ixv_msix_que(void *); static int ixv_msix_mbx(void *); /************************************************************************ * FreeBSD Device Interface Entry Points ************************************************************************/ static device_method_t ixv_methods[] = { /* Device interface */ DEVMETHOD(device_register, ixv_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD_END }; static driver_t ixv_driver = { "ixv", ixv_methods, sizeof(struct adapter), }; devclass_t ixv_devclass; DRIVER_MODULE(ixv, pci, ixv_driver, ixv_devclass, 0, 0); IFLIB_PNP_INFO(pci, ixv_driver, ixv_vendor_info_array); MODULE_DEPEND(ixv, iflib, 1, 1, 1); MODULE_DEPEND(ixv, pci, 1, 1, 1); MODULE_DEPEND(ixv, ether, 1, 1, 1); static device_method_t ixv_if_methods[] = { DEVMETHOD(ifdi_attach_pre, ixv_if_attach_pre), DEVMETHOD(ifdi_attach_post, ixv_if_attach_post), DEVMETHOD(ifdi_detach, ixv_if_detach), DEVMETHOD(ifdi_init, ixv_if_init), DEVMETHOD(ifdi_stop, ixv_if_stop), DEVMETHOD(ifdi_msix_intr_assign, ixv_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, ixv_if_enable_intr), DEVMETHOD(ifdi_intr_disable, ixv_if_disable_intr), DEVMETHOD(ifdi_tx_queue_intr_enable, ixv_if_rx_queue_intr_enable), DEVMETHOD(ifdi_rx_queue_intr_enable, ixv_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queues_alloc, ixv_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, ixv_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, ixv_if_queues_free), DEVMETHOD(ifdi_update_admin_status, ixv_if_update_admin_status), DEVMETHOD(ifdi_multi_set, ixv_if_multi_set), DEVMETHOD(ifdi_mtu_set, ixv_if_mtu_set), DEVMETHOD(ifdi_media_status, ixv_if_media_status), DEVMETHOD(ifdi_media_change, ixv_if_media_change), DEVMETHOD(ifdi_timer, ixv_if_local_timer), DEVMETHOD(ifdi_vlan_register, ixv_if_register_vlan), DEVMETHOD(ifdi_vlan_unregister, ixv_if_unregister_vlan), DEVMETHOD(ifdi_get_counter, ixv_if_get_counter), DEVMETHOD(ifdi_needs_restart, ixv_if_needs_restart), DEVMETHOD_END }; static driver_t ixv_if_driver = { "ixv_if", ixv_if_methods, sizeof(struct adapter) }; /* * TUNEABLE PARAMETERS: */ /* Flow control setting, default to full */ static int ixv_flow_control = ixgbe_fc_full; TUNABLE_INT("hw.ixv.flow_control", &ixv_flow_control); /* * Header split: this causes the hardware to DMA * the header into a separate mbuf from the payload, * it can be a performance win in some workloads, but * in others it actually hurts, its off by default. */ static int ixv_header_split = FALSE; TUNABLE_INT("hw.ixv.hdr_split", &ixv_header_split); /* * Shadow VFTA table, this is needed because * the real filter table gets cleared during * a soft reset and we need to repopulate it. */ static u32 ixv_shadow_vfta[IXGBE_VFTA_SIZE]; extern struct if_txrx ixgbe_txrx; static struct if_shared_ctx ixv_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE,/* max(DBA_ALIGN, PAGE_SIZE) */ .isc_tx_maxsize = IXGBE_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = PAGE_SIZE, .isc_tso_maxsize = IXGBE_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = PAGE_SIZE, .isc_rx_maxsize = MJUM16BYTES, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = MJUM16BYTES, .isc_nfl = 1, .isc_ntxqs = 1, .isc_nrxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = ixv_vendor_info_array, .isc_driver_version = ixv_driver_version, .isc_driver = &ixv_if_driver, .isc_flags = IFLIB_IS_VF | IFLIB_TSO_INIT_IP, .isc_nrxd_min = {MIN_RXD}, .isc_ntxd_min = {MIN_TXD}, .isc_nrxd_max = {MAX_RXD}, .isc_ntxd_max = {MAX_TXD}, .isc_nrxd_default = {DEFAULT_RXD}, .isc_ntxd_default = {DEFAULT_TXD}, }; -if_shared_ctx_t ixv_sctx = &ixv_sctx_init; - static void * ixv_register(device_t dev) { - return (ixv_sctx); + return (&ixv_sctx_init); } /************************************************************************ * ixv_if_tx_queues_alloc ************************************************************************/ static int ixv_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct ix_tx_queue *que; int i, j, error; MPASS(adapter->num_tx_queues == ntxqsets); MPASS(ntxqs == 1); /* Allocate queue structure memory */ adapter->tx_queues = (struct ix_tx_queue *)malloc(sizeof(struct ix_tx_queue) * ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!adapter->tx_queues) { device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); return (ENOMEM); } for (i = 0, que = adapter->tx_queues; i < ntxqsets; i++, que++) { struct tx_ring *txr = &que->txr; txr->me = i; txr->adapter = que->adapter = adapter; /* Allocate report status array */ if (!(txr->tx_rsq = (qidx_t *)malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_DEVBUF, M_NOWAIT | M_ZERO))) { error = ENOMEM; goto fail; } for (j = 0; j < scctx->isc_ntxd[0]; j++) txr->tx_rsq[j] = QIDX_INVALID; /* get the virtual and physical address of the hardware queues */ txr->tail = IXGBE_VFTDT(txr->me); txr->tx_base = (union ixgbe_adv_tx_desc *)vaddrs[i*ntxqs]; txr->tx_paddr = paddrs[i*ntxqs]; txr->bytes = 0; txr->total_packets = 0; } device_printf(iflib_get_dev(ctx), "allocated for %d queues\n", adapter->num_tx_queues); return (0); fail: ixv_if_queues_free(ctx); return (error); } /* ixv_if_tx_queues_alloc */ /************************************************************************ * ixv_if_rx_queues_alloc ************************************************************************/ static int ixv_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_rx_queue *que; int i, error; MPASS(adapter->num_rx_queues == nrxqsets); MPASS(nrxqs == 1); /* Allocate queue structure memory */ adapter->rx_queues = (struct ix_rx_queue *)malloc(sizeof(struct ix_rx_queue) * nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!adapter->rx_queues) { device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); error = ENOMEM; goto fail; } for (i = 0, que = adapter->rx_queues; i < nrxqsets; i++, que++) { struct rx_ring *rxr = &que->rxr; rxr->me = i; rxr->adapter = que->adapter = adapter; /* get the virtual and physical address of the hw queues */ rxr->tail = IXGBE_VFRDT(rxr->me); rxr->rx_base = (union ixgbe_adv_rx_desc *)vaddrs[i]; rxr->rx_paddr = paddrs[i*nrxqs]; rxr->bytes = 0; rxr->que = que; } device_printf(iflib_get_dev(ctx), "allocated for %d rx queues\n", adapter->num_rx_queues); return (0); fail: ixv_if_queues_free(ctx); return (error); } /* ixv_if_rx_queues_alloc */ /************************************************************************ * ixv_if_queues_free ************************************************************************/ static void ixv_if_queues_free(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_tx_queue *que = adapter->tx_queues; int i; if (que == NULL) goto free; for (i = 0; i < adapter->num_tx_queues; i++, que++) { struct tx_ring *txr = &que->txr; if (txr->tx_rsq == NULL) break; free(txr->tx_rsq, M_DEVBUF); txr->tx_rsq = NULL; } if (adapter->tx_queues != NULL) free(adapter->tx_queues, M_DEVBUF); free: if (adapter->rx_queues != NULL) free(adapter->rx_queues, M_DEVBUF); adapter->tx_queues = NULL; adapter->rx_queues = NULL; } /* ixv_if_queues_free */ /************************************************************************ * ixv_if_attach_pre - Device initialization routine * * Called when the driver is being loaded. * Identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure ************************************************************************/ static int ixv_if_attach_pre(if_ctx_t ctx) { struct adapter *adapter; device_t dev; if_softc_ctx_t scctx; struct ixgbe_hw *hw; int error = 0; INIT_DEBUGOUT("ixv_attach: begin"); /* Allocate, clear, and link in our adapter structure */ dev = iflib_get_dev(ctx); adapter = iflib_get_softc(ctx); adapter->dev = dev; adapter->ctx = ctx; adapter->hw.back = adapter; scctx = adapter->shared = iflib_get_softc_ctx(ctx); adapter->media = iflib_get_media(ctx); hw = &adapter->hw; /* Do base PCI setup - map BAR0 */ if (ixv_allocate_pci_resources(ctx)) { device_printf(dev, "ixv_allocate_pci_resources() failed!\n"); error = ENXIO; goto err_out; } /* SYSCTL APIs */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, adapter, 0, ixv_sysctl_debug, "I", "Debug Info"); /* Determine hardware revision */ ixv_identify_hardware(ctx); ixv_init_device_features(adapter); /* Initialize the shared code */ error = ixgbe_init_ops_vf(hw); if (error) { device_printf(dev, "ixgbe_init_ops_vf() failed!\n"); error = EIO; goto err_out; } /* Setup the mailbox */ ixgbe_init_mbx_params_vf(hw); error = hw->mac.ops.reset_hw(hw); if (error == IXGBE_ERR_RESET_FAILED) device_printf(dev, "...reset_hw() failure: Reset Failed!\n"); else if (error) device_printf(dev, "...reset_hw() failed with error %d\n", error); if (error) { error = EIO; goto err_out; } error = hw->mac.ops.init_hw(hw); if (error) { device_printf(dev, "...init_hw() failed with error %d\n", error); error = EIO; goto err_out; } /* Negotiate mailbox API version */ error = ixv_negotiate_api(adapter); if (error) { device_printf(dev, "Mailbox API negotiation failed during attach!\n"); goto err_out; } /* If no mac address was assigned, make a random one */ if (!ixv_check_ether_addr(hw->mac.addr)) { u8 addr[ETHER_ADDR_LEN]; arc4rand(&addr, sizeof(addr), 0); addr[0] &= 0xFE; addr[0] |= 0x02; bcopy(addr, hw->mac.addr, sizeof(addr)); bcopy(addr, hw->mac.perm_addr, sizeof(addr)); } /* Most of the iflib initialization... */ iflib_set_mac(ctx, hw->mac.addr); switch (adapter->hw.mac.type) { case ixgbe_mac_X550_vf: case ixgbe_mac_X550EM_x_vf: case ixgbe_mac_X550EM_a_vf: scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 2; break; default: scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 1; } scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(union ixgbe_adv_tx_desc) + sizeof(u32), DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); /* XXX */ scctx->isc_tx_csum_flags = CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_TSO; scctx->isc_tx_nsegments = IXGBE_82599_SCATTER; scctx->isc_msix_bar = pci_msix_table_bar(dev); scctx->isc_tx_tso_segments_max = scctx->isc_tx_nsegments; scctx->isc_tx_tso_size_max = IXGBE_TSO_SIZE; scctx->isc_tx_tso_segsize_max = PAGE_SIZE; scctx->isc_txrx = &ixgbe_txrx; /* * Tell the upper layer(s) we support everything the PF * driver does except... * Wake-on-LAN */ scctx->isc_capabilities = IXGBE_CAPS; scctx->isc_capabilities ^= IFCAP_WOL; scctx->isc_capenable = scctx->isc_capabilities; INIT_DEBUGOUT("ixv_if_attach_pre: end"); return (0); err_out: ixv_free_pci_resources(ctx); return (error); } /* ixv_if_attach_pre */ static int ixv_if_attach_post(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); int error = 0; /* Setup OS specific network interface */ error = ixv_setup_interface(ctx); if (error) { device_printf(dev, "Interface setup failed: %d\n", error); goto end; } /* Do the stats setup */ ixv_save_stats(adapter); ixv_init_stats(adapter); ixv_add_stats_sysctls(adapter); end: return error; } /* ixv_if_attach_post */ /************************************************************************ * ixv_detach - Device removal routine * * Called when the driver is being removed. * Stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure ************************************************************************/ static int ixv_if_detach(if_ctx_t ctx) { INIT_DEBUGOUT("ixv_detach: begin"); ixv_free_pci_resources(ctx); return (0); } /* ixv_if_detach */ /************************************************************************ * ixv_if_mtu_set ************************************************************************/ static int ixv_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); int error = 0; IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (mtu > IXGBE_MAX_FRAME_SIZE - IXGBE_MTU_HDR) { error = EINVAL; } else { ifp->if_mtu = mtu; adapter->max_frame_size = ifp->if_mtu + IXGBE_MTU_HDR; } return error; } /* ixv_if_mtu_set */ /************************************************************************ * ixv_if_init - Init entry point * * Used in two ways: It is used by the stack as an init entry * point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get * to a consistent state. * * return 0 on success, positive on failure ************************************************************************/ static void ixv_if_init(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); device_t dev = iflib_get_dev(ctx); struct ixgbe_hw *hw = &adapter->hw; int error = 0; INIT_DEBUGOUT("ixv_if_init: begin"); hw->adapter_stopped = FALSE; hw->mac.ops.stop_adapter(hw); /* reprogram the RAR[0] in case user changed it. */ hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS); hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, 1); /* Reset VF and renegotiate mailbox API version */ hw->mac.ops.reset_hw(hw); hw->mac.ops.start_hw(hw); error = ixv_negotiate_api(adapter); if (error) { device_printf(dev, "Mailbox API negotiation failed in if_init!\n"); return; } ixv_initialize_transmit_units(ctx); /* Setup Multicast table */ ixv_if_multi_set(ctx); adapter->rx_mbuf_sz = iflib_get_rx_mbuf_sz(ctx); /* Configure RX settings */ ixv_initialize_receive_units(ctx); /* Set up VLAN offload and filter */ ixv_setup_vlan_support(ctx); /* Set up MSI-X routing */ ixv_configure_ivars(adapter); /* Set up auto-mask */ IXGBE_WRITE_REG(hw, IXGBE_VTEIAM, IXGBE_EICS_RTX_QUEUE); /* Set moderation on the Link interrupt */ IXGBE_WRITE_REG(hw, IXGBE_VTEITR(adapter->vector), IXGBE_LINK_ITR); /* Stats init */ ixv_init_stats(adapter); /* Config/Enable Link */ hw->mac.ops.check_link(hw, &adapter->link_speed, &adapter->link_up, FALSE); /* And now turn on interrupts */ ixv_if_enable_intr(ctx); return; } /* ixv_if_init */ /************************************************************************ * ixv_enable_queue ************************************************************************/ static inline void ixv_enable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u32 queue = 1 << vector; u32 mask; mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); } /* ixv_enable_queue */ /************************************************************************ * ixv_disable_queue ************************************************************************/ static inline void ixv_disable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u64 queue = (u64)(1 << vector); u32 mask; mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_VTEIMC, mask); } /* ixv_disable_queue */ /************************************************************************ * ixv_msix_que - MSI-X Queue Interrupt Service routine ************************************************************************/ static int ixv_msix_que(void *arg) { struct ix_rx_queue *que = arg; struct adapter *adapter = que->adapter; ixv_disable_queue(adapter, que->msix); ++que->irqs; return (FILTER_SCHEDULE_THREAD); } /* ixv_msix_que */ /************************************************************************ * ixv_msix_mbx ************************************************************************/ static int ixv_msix_mbx(void *arg) { struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; u32 reg; ++adapter->link_irq; /* First get the cause */ reg = IXGBE_READ_REG(hw, IXGBE_VTEICS); /* Clear interrupt with write */ IXGBE_WRITE_REG(hw, IXGBE_VTEICR, reg); /* Link status change */ if (reg & IXGBE_EICR_LSC) iflib_admin_intr_deferred(adapter->ctx); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, IXGBE_EIMS_OTHER); return (FILTER_HANDLED); } /* ixv_msix_mbx */ /************************************************************************ * ixv_media_status - Media Ioctl callback * * Called whenever the user queries the status of * the interface using ifconfig. ************************************************************************/ static void ixv_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) { struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("ixv_media_status: begin"); iflib_admin_intr_deferred(ctx); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) return; ifmr->ifm_status |= IFM_ACTIVE; switch (adapter->link_speed) { case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_T | IFM_FDX; break; case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_T | IFM_FDX; break; case IXGBE_LINK_SPEED_100_FULL: ifmr->ifm_active |= IFM_100_TX | IFM_FDX; break; case IXGBE_LINK_SPEED_10_FULL: ifmr->ifm_active |= IFM_10_T | IFM_FDX; break; } } /* ixv_if_media_status */ /************************************************************************ * ixv_if_media_change - Media Ioctl callback * * Called when the user changes speed/duplex using * media/mediopt option with ifconfig. ************************************************************************/ static int ixv_if_media_change(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifmedia *ifm = iflib_get_media(ctx); INIT_DEBUGOUT("ixv_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: break; default: device_printf(adapter->dev, "Only auto media type\n"); return (EINVAL); } return (0); } /* ixv_if_media_change */ /************************************************************************ * ixv_negotiate_api * * Negotiate the Mailbox API with the PF; * start with the most featured API first. ************************************************************************/ static int ixv_negotiate_api(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; int mbx_api[] = { ixgbe_mbox_api_11, ixgbe_mbox_api_10, ixgbe_mbox_api_unknown }; int i = 0; while (mbx_api[i] != ixgbe_mbox_api_unknown) { if (ixgbevf_negotiate_api_version(hw, mbx_api[i]) == 0) return (0); i++; } return (EINVAL); } /* ixv_negotiate_api */ /************************************************************************ * ixv_if_multi_set - Multicast Update * * Called whenever multicast address list is updated. ************************************************************************/ static void ixv_if_multi_set(if_ctx_t ctx) { u8 mta[MAX_NUM_MULTICAST_ADDRESSES * IXGBE_ETH_LENGTH_OF_ADDRESS]; struct adapter *adapter = iflib_get_softc(ctx); u8 *update_ptr; struct ifmultiaddr *ifma; if_t ifp = iflib_get_ifp(ctx); int mcnt = 0; IOCTL_DEBUGOUT("ixv_if_multi_set: begin"); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS], IXGBE_ETH_LENGTH_OF_ADDRESS); mcnt++; } update_ptr = mta; adapter->hw.mac.ops.update_mc_addr_list(&adapter->hw, update_ptr, mcnt, ixv_mc_array_itr, TRUE); } /* ixv_if_multi_set */ /************************************************************************ * ixv_mc_array_itr * * An iterator function needed by the multicast shared code. * It feeds the shared code routine the addresses in the * array of ixv_set_multi() one by one. ************************************************************************/ static u8 * ixv_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) { u8 *addr = *update_ptr; u8 *newptr; *vmdq = 0; newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS; *update_ptr = newptr; return addr; } /* ixv_mc_array_itr */ /************************************************************************ * ixv_if_local_timer - Timer routine * * Checks for link status, updates statistics, * and runs the watchdog check. ************************************************************************/ static void ixv_if_local_timer(if_ctx_t ctx, uint16_t qid) { if (qid != 0) return; /* Fire off the adminq task */ iflib_admin_intr_deferred(ctx); } /* ixv_if_local_timer */ /************************************************************************ * ixv_if_update_admin_status - Update OS on link state * * Note: Only updates the OS on the cached link state. * The real check of the hardware only happens with * a link interrupt. ************************************************************************/ static void ixv_if_update_admin_status(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); s32 status; adapter->hw.mac.get_link_status = TRUE; status = ixgbe_check_link(&adapter->hw, &adapter->link_speed, &adapter->link_up, FALSE); if (status != IXGBE_SUCCESS && adapter->hw.adapter_stopped == FALSE) { /* Mailbox's Clear To Send status is lost or timeout occurred. * We need reinitialization. */ iflib_get_ifp(ctx)->if_init(ctx); } if (adapter->link_up) { if (adapter->link_active == FALSE) { if (bootverbose) device_printf(dev, "Link is up %d Gbps %s \n", ((adapter->link_speed == 128) ? 10 : 1), "Full Duplex"); adapter->link_active = TRUE; iflib_link_state_change(ctx, LINK_STATE_UP, IF_Gbps(10)); } } else { /* Link down */ if (adapter->link_active == TRUE) { if (bootverbose) device_printf(dev, "Link is Down\n"); iflib_link_state_change(ctx, LINK_STATE_DOWN, 0); adapter->link_active = FALSE; } } /* Stats Update */ ixv_update_stats(adapter); } /* ixv_if_update_admin_status */ /************************************************************************ * ixv_if_stop - Stop the hardware * * Disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. ************************************************************************/ static void ixv_if_stop(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; INIT_DEBUGOUT("ixv_stop: begin\n"); ixv_if_disable_intr(ctx); hw->mac.ops.reset_hw(hw); adapter->hw.adapter_stopped = FALSE; hw->mac.ops.stop_adapter(hw); /* Update the stack */ adapter->link_up = FALSE; ixv_if_update_admin_status(ctx); /* reprogram the RAR[0] in case user changed it. */ hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); } /* ixv_if_stop */ /************************************************************************ * ixv_identify_hardware - Determine hardware revision. ************************************************************************/ static void ixv_identify_hardware(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); struct ixgbe_hw *hw = &adapter->hw; /* Save off the information about this board */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_get_revid(dev); hw->subsystem_vendor_id = pci_get_subvendor(dev); hw->subsystem_device_id = pci_get_subdevice(dev); /* A subset of set_mac_type */ switch (hw->device_id) { case IXGBE_DEV_ID_82599_VF: hw->mac.type = ixgbe_mac_82599_vf; break; case IXGBE_DEV_ID_X540_VF: hw->mac.type = ixgbe_mac_X540_vf; break; case IXGBE_DEV_ID_X550_VF: hw->mac.type = ixgbe_mac_X550_vf; break; case IXGBE_DEV_ID_X550EM_X_VF: hw->mac.type = ixgbe_mac_X550EM_x_vf; break; case IXGBE_DEV_ID_X550EM_A_VF: hw->mac.type = ixgbe_mac_X550EM_a_vf; break; default: device_printf(dev, "unknown mac type\n"); hw->mac.type = ixgbe_mac_unknown; break; } } /* ixv_identify_hardware */ /************************************************************************ * ixv_if_msix_intr_assign - Setup MSI-X Interrupt resources and handlers ************************************************************************/ static int ixv_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); struct ix_rx_queue *rx_que = adapter->rx_queues; struct ix_tx_queue *tx_que; int error, rid, vector = 0; char buf[16]; for (int i = 0; i < adapter->num_rx_queues; i++, vector++, rx_que++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, ixv_msix_que, rx_que, rx_que->rxr.me, buf); if (error) { device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); adapter->num_rx_queues = i + 1; goto fail; } rx_que->msix = vector; } for (int i = 0; i < adapter->num_tx_queues; i++) { snprintf(buf, sizeof(buf), "txq%d", i); tx_que = &adapter->tx_queues[i]; tx_que->msix = i % adapter->num_rx_queues; iflib_softirq_alloc_generic(ctx, &adapter->rx_queues[tx_que->msix].que_irq, IFLIB_INTR_TX, tx_que, tx_que->txr.me, buf); } rid = vector + 1; error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, ixv_msix_mbx, adapter, 0, "aq"); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); return (error); } adapter->vector = vector; /* * Due to a broken design QEMU will fail to properly * enable the guest for MSIX unless the vectors in * the table are all set up, so we must rewrite the * ENABLE in the MSIX control register again at this * point to cause it to successfully initialize us. */ if (adapter->hw.mac.type == ixgbe_mac_82599_vf) { int msix_ctrl; pci_find_cap(dev, PCIY_MSIX, &rid); rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } return (0); fail: iflib_irq_free(ctx, &adapter->irq); rx_que = adapter->rx_queues; for (int i = 0; i < adapter->num_rx_queues; i++, rx_que++) iflib_irq_free(ctx, &rx_que->que_irq); return (error); } /* ixv_if_msix_intr_assign */ /************************************************************************ * ixv_allocate_pci_resources ************************************************************************/ static int ixv_allocate_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); int rid; rid = PCIR_BAR(0); adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(adapter->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->pci_mem); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->pci_mem); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; return (0); } /* ixv_allocate_pci_resources */ /************************************************************************ * ixv_free_pci_resources ************************************************************************/ static void ixv_free_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_rx_queue *que = adapter->rx_queues; device_t dev = iflib_get_dev(ctx); /* Release all MSI-X queue resources */ if (adapter->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &adapter->irq); if (que != NULL) { for (int i = 0; i < adapter->num_rx_queues; i++, que++) { iflib_irq_free(ctx, &que->que_irq); } } if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(adapter->pci_mem), adapter->pci_mem); } /* ixv_free_pci_resources */ /************************************************************************ * ixv_setup_interface * * Setup networking device structure and register an interface. ************************************************************************/ static int ixv_setup_interface(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct ifnet *ifp = iflib_get_ifp(ctx); INIT_DEBUGOUT("ixv_setup_interface: begin"); if_setbaudrate(ifp, IF_Gbps(10)); ifp->if_snd.ifq_maxlen = scctx->isc_ntxd[0] - 2; adapter->max_frame_size = ifp->if_mtu + IXGBE_MTU_HDR; ifmedia_add(adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); return 0; } /* ixv_setup_interface */ /************************************************************************ * ixv_if_get_counter ************************************************************************/ static uint64_t ixv_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct adapter *adapter = iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_IPACKETS: return (adapter->ipackets); case IFCOUNTER_OPACKETS: return (adapter->opackets); case IFCOUNTER_IBYTES: return (adapter->ibytes); case IFCOUNTER_OBYTES: return (adapter->obytes); case IFCOUNTER_IMCASTS: return (adapter->imcasts); default: return (if_get_counter_default(ifp, cnt)); } } /* ixv_if_get_counter */ /* ixv_if_needs_restart - Tell iflib when the driver needs to be reinitialized * @ctx: iflib context * @event: event code to check * * Defaults to returning true for every event. * * @returns true if iflib needs to reinit the interface */ static bool ixv_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event) { switch (event) { case IFLIB_RESTART_VLAN_CONFIG: /* XXX: This may not need to return true */ default: return (true); } } /************************************************************************ * ixv_initialize_transmit_units - Enable transmit unit. ************************************************************************/ static void ixv_initialize_transmit_units(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; if_softc_ctx_t scctx = adapter->shared; struct ix_tx_queue *que = adapter->tx_queues; int i; for (i = 0; i < adapter->num_tx_queues; i++, que++) { struct tx_ring *txr = &que->txr; u64 tdba = txr->tx_paddr; u32 txctrl, txdctl; int j = txr->me; /* Set WTHRESH to 8, burst writeback */ txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(j)); txdctl |= (8 << 16); IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(j), txdctl); /* Set the HW Tx Head and Tail indices */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDH(j), 0); IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDT(j), 0); /* Set Tx Tail register */ txr->tail = IXGBE_VFTDT(j); txr->tx_rs_cidx = txr->tx_rs_pidx; /* Initialize the last processed descriptor to be the end of * the ring, rather than the start, so that we avoid an * off-by-one error when calculating how many descriptors are * done in the credits_update function. */ txr->tx_cidx_processed = scctx->isc_ntxd[0] - 1; for (int k = 0; k < scctx->isc_ntxd[0]; k++) txr->tx_rsq[k] = QIDX_INVALID; /* Set Ring parameters */ IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(j), (tdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(j), (tdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(j), scctx->isc_ntxd[0] * sizeof(struct ixgbe_legacy_tx_desc)); txctrl = IXGBE_READ_REG(hw, IXGBE_VFDCA_TXCTRL(j)); txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(j), txctrl); /* Now enable */ txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(j)); txdctl |= IXGBE_TXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(j), txdctl); } return; } /* ixv_initialize_transmit_units */ /************************************************************************ * ixv_initialize_rss_mapping ************************************************************************/ static void ixv_initialize_rss_mapping(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 reta = 0, mrqc, rss_key[10]; int queue_id; int i, j; u32 rss_hash_config; if (adapter->feat_en & IXGBE_FEATURE_RSS) { /* Fetch the configured RSS key */ rss_getkey((uint8_t *)&rss_key); } else { /* set up random bits */ arc4rand(&rss_key, sizeof(rss_key), 0); } /* Now fill out hash function seeds */ for (i = 0; i < 10; i++) IXGBE_WRITE_REG(hw, IXGBE_VFRSSRK(i), rss_key[i]); /* Set up the redirection table */ for (i = 0, j = 0; i < 64; i++, j++) { if (j == adapter->num_rx_queues) j = 0; if (adapter->feat_en & IXGBE_FEATURE_RSS) { /* * Fetch the RSS bucket id for the given indirection * entry. Cap it at the number of configured buckets * (which is num_rx_queues.) */ queue_id = rss_get_indirection_to_bucket(i); queue_id = queue_id % adapter->num_rx_queues; } else queue_id = j; /* * The low 8 bits are for hash value (n+0); * The next 8 bits are for hash value (n+1), etc. */ reta >>= 8; reta |= ((uint32_t)queue_id) << 24; if ((i & 3) == 3) { IXGBE_WRITE_REG(hw, IXGBE_VFRETA(i >> 2), reta); reta = 0; } } /* Perform hash on these packet types */ if (adapter->feat_en & IXGBE_FEATURE_RSS) rss_hash_config = rss_gethashconfig(); else { /* * Disable UDP - IP fragments aren't currently being handled * and so we end up with a mix of 2-tuple and 4-tuple * traffic. */ rss_hash_config = RSS_HASHTYPE_RSS_IPV4 | RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | RSS_HASHTYPE_RSS_TCP_IPV6; } mrqc = IXGBE_MRQC_RSSEN; if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4; if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP; if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6; if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP; if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) device_printf(adapter->dev, "%s: RSS_HASHTYPE_RSS_IPV6_EX defined, but not supported\n", __func__); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX) device_printf(adapter->dev, "%s: RSS_HASHTYPE_RSS_TCP_IPV6_EX defined, but not supported\n", __func__); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP; if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP; if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX) device_printf(adapter->dev, "%s: RSS_HASHTYPE_RSS_UDP_IPV6_EX defined, but not supported\n", __func__); IXGBE_WRITE_REG(hw, IXGBE_VFMRQC, mrqc); } /* ixv_initialize_rss_mapping */ /************************************************************************ * ixv_initialize_receive_units - Setup receive registers and features. ************************************************************************/ static void ixv_initialize_receive_units(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx; struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp = iflib_get_ifp(ctx); struct ix_rx_queue *que = adapter->rx_queues; u32 bufsz, psrtype; if (ifp->if_mtu > ETHERMTU) bufsz = 4096 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; else bufsz = 2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; psrtype = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR | IXGBE_PSRTYPE_L2HDR; if (adapter->num_rx_queues > 1) psrtype |= 1 << 29; IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype); /* Tell PF our max_frame size */ if (ixgbevf_rlpml_set_vf(hw, adapter->max_frame_size) != 0) { device_printf(adapter->dev, "There is a problem with the PF setup. It is likely the receive unit for this VF will not function correctly.\n"); } scctx = adapter->shared; for (int i = 0; i < adapter->num_rx_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; u64 rdba = rxr->rx_paddr; u32 reg, rxdctl; int j = rxr->me; /* Disable the queue */ rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(j)); rxdctl &= ~IXGBE_RXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(j), rxdctl); for (int k = 0; k < 10; k++) { if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(j)) & IXGBE_RXDCTL_ENABLE) msec_delay(1); else break; } wmb(); /* Setup the Base and Length of the Rx Descriptor Ring */ IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(j), (rdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(j), (rdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(j), scctx->isc_nrxd[0] * sizeof(union ixgbe_adv_rx_desc)); /* Reset the ring indices */ IXGBE_WRITE_REG(hw, IXGBE_VFRDH(rxr->me), 0); IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), 0); /* Set up the SRRCTL register */ reg = IXGBE_READ_REG(hw, IXGBE_VFSRRCTL(j)); reg &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; reg &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; reg |= bufsz; reg |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(j), reg); /* Capture Rx Tail index */ rxr->tail = IXGBE_VFRDT(rxr->me); /* Do the queue enabling last */ rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(j), rxdctl); for (int l = 0; l < 10; l++) { if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(j)) & IXGBE_RXDCTL_ENABLE) break; msec_delay(1); } wmb(); /* Set the Tail Pointer */ #ifdef DEV_NETMAP /* * In netmap mode, we must preserve the buffers made * available to userspace before the if_init() * (this is true by default on the TX side, because * init makes all buffers available to userspace). * * netmap_reset() and the device specific routines * (e.g. ixgbe_setup_receive_rings()) map these * buffers at the end of the NIC ring, so here we * must set the RDT (tail) register to make sure * they are not overwritten. * * In this driver the NIC ring starts at RDH = 0, * RDT points to the last slot available for reception (?), * so RDT = num_rx_desc - 1 means the whole ring is available. */ if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(ifp); struct netmap_kring *kring = na->rx_rings[j]; int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), t); } else #endif /* DEV_NETMAP */ IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), scctx->isc_nrxd[0] - 1); } /* * Do not touch RSS and RETA settings for older hardware * as those are shared among PF and all VF. */ if (adapter->hw.mac.type >= ixgbe_mac_X550_vf) ixv_initialize_rss_mapping(adapter); } /* ixv_initialize_receive_units */ /************************************************************************ * ixv_setup_vlan_support ************************************************************************/ static void ixv_setup_vlan_support(if_ctx_t ctx) { struct ifnet *ifp = iflib_get_ifp(ctx); struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; u32 ctrl, vid, vfta, retry; /* * We get here thru if_init, meaning * a soft reset, this has already cleared * the VFTA and other state, so if there * have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { /* Enable the queues */ for (int i = 0; i < adapter->num_rx_queues; i++) { ctrl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)); ctrl |= IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), ctrl); /* * Let Rx path know that it needs to store VLAN tag * as part of extra mbuf info. */ adapter->rx_queues[i].rxr.vtag_strip = TRUE; } } /* * If filtering VLAN tags is disabled, * there is no need to fill VLAN Filter Table Array (VFTA). */ if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) return; /* * A soft reset zero's out the VFTA, so * we need to repopulate it now. */ for (int i = 0; i < IXGBE_VFTA_SIZE; i++) { if (ixv_shadow_vfta[i] == 0) continue; vfta = ixv_shadow_vfta[i]; /* * Reconstruct the vlan id's * based on the bits set in each * of the array ints. */ for (int j = 0; j < 32; j++) { retry = 0; if ((vfta & (1 << j)) == 0) continue; vid = (i * 32) + j; /* Call the shared code mailbox routine */ while (hw->mac.ops.set_vfta(hw, vid, 0, TRUE, FALSE)) { if (++retry > 5) break; } } } } /* ixv_setup_vlan_support */ /************************************************************************ * ixv_if_register_vlan * * Run via a vlan config EVENT, it enables us to use the * HW Filter table since we can get the vlan id. This just * creates the entry in the soft version of the VFTA, init * will repopulate the real table. ************************************************************************/ static void ixv_if_register_vlan(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u16 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; ixv_shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; } /* ixv_if_register_vlan */ /************************************************************************ * ixv_if_unregister_vlan * * Run via a vlan unconfig EVENT, remove our entry * in the soft vfta. ************************************************************************/ static void ixv_if_unregister_vlan(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u16 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; ixv_shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; } /* ixv_if_unregister_vlan */ /************************************************************************ * ixv_if_enable_intr ************************************************************************/ static void ixv_if_enable_intr(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; struct ix_rx_queue *que = adapter->rx_queues; u32 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); mask = IXGBE_EIMS_ENABLE_MASK; mask &= ~(IXGBE_EIMS_OTHER | IXGBE_EIMS_LSC); IXGBE_WRITE_REG(hw, IXGBE_VTEIAC, mask); for (int i = 0; i < adapter->num_rx_queues; i++, que++) ixv_enable_queue(adapter, que->msix); IXGBE_WRITE_FLUSH(hw); } /* ixv_if_enable_intr */ /************************************************************************ * ixv_if_disable_intr ************************************************************************/ static void ixv_if_disable_intr(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEIAC, 0); IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEIMC, ~0); IXGBE_WRITE_FLUSH(&adapter->hw); } /* ixv_if_disable_intr */ /************************************************************************ * ixv_if_rx_queue_intr_enable ************************************************************************/ static int ixv_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct adapter *adapter = iflib_get_softc(ctx); struct ix_rx_queue *que = &adapter->rx_queues[rxqid]; ixv_enable_queue(adapter, que->rxr.me); return (0); } /* ixv_if_rx_queue_intr_enable */ /************************************************************************ * ixv_set_ivar * * Setup the correct IVAR register for a particular MSI-X interrupt * - entry is the register array entry * - vector is the MSI-X vector for this queue * - type is RX/TX/MISC ************************************************************************/ static void ixv_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) { struct ixgbe_hw *hw = &adapter->hw; u32 ivar, index; vector |= IXGBE_IVAR_ALLOC_VAL; if (type == -1) { /* MISC IVAR */ ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR_MISC); ivar &= ~0xFF; ivar |= vector; IXGBE_WRITE_REG(hw, IXGBE_VTIVAR_MISC, ivar); } else { /* RX/TX IVARS */ index = (16 * (entry & 1)) + (8 * type); ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR(entry >> 1)); ivar &= ~(0xFF << index); ivar |= (vector << index); IXGBE_WRITE_REG(hw, IXGBE_VTIVAR(entry >> 1), ivar); } } /* ixv_set_ivar */ /************************************************************************ * ixv_configure_ivars ************************************************************************/ static void ixv_configure_ivars(struct adapter *adapter) { struct ix_rx_queue *que = adapter->rx_queues; MPASS(adapter->num_rx_queues == adapter->num_tx_queues); for (int i = 0; i < adapter->num_rx_queues; i++, que++) { /* First the RX queue entry */ ixv_set_ivar(adapter, i, que->msix, 0); /* ... and the TX */ ixv_set_ivar(adapter, i, que->msix, 1); /* Set an initial value in EITR */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEITR(que->msix), IXGBE_EITR_DEFAULT); } /* For the mailbox interrupt */ ixv_set_ivar(adapter, 1, adapter->vector, -1); } /* ixv_configure_ivars */ /************************************************************************ * ixv_save_stats * * The VF stats registers never have a truly virgin * starting point, so this routine tries to make an * artificial one, marking ground zero on attach as * it were. ************************************************************************/ static void ixv_save_stats(struct adapter *adapter) { if (adapter->stats.vf.vfgprc || adapter->stats.vf.vfgptc) { adapter->stats.vf.saved_reset_vfgprc += adapter->stats.vf.vfgprc - adapter->stats.vf.base_vfgprc; adapter->stats.vf.saved_reset_vfgptc += adapter->stats.vf.vfgptc - adapter->stats.vf.base_vfgptc; adapter->stats.vf.saved_reset_vfgorc += adapter->stats.vf.vfgorc - adapter->stats.vf.base_vfgorc; adapter->stats.vf.saved_reset_vfgotc += adapter->stats.vf.vfgotc - adapter->stats.vf.base_vfgotc; adapter->stats.vf.saved_reset_vfmprc += adapter->stats.vf.vfmprc - adapter->stats.vf.base_vfmprc; } } /* ixv_save_stats */ /************************************************************************ * ixv_init_stats ************************************************************************/ static void ixv_init_stats(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; adapter->stats.vf.last_vfgprc = IXGBE_READ_REG(hw, IXGBE_VFGPRC); adapter->stats.vf.last_vfgorc = IXGBE_READ_REG(hw, IXGBE_VFGORC_LSB); adapter->stats.vf.last_vfgorc |= (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGORC_MSB))) << 32); adapter->stats.vf.last_vfgptc = IXGBE_READ_REG(hw, IXGBE_VFGPTC); adapter->stats.vf.last_vfgotc = IXGBE_READ_REG(hw, IXGBE_VFGOTC_LSB); adapter->stats.vf.last_vfgotc |= (((u64)(IXGBE_READ_REG(hw, IXGBE_VFGOTC_MSB))) << 32); adapter->stats.vf.last_vfmprc = IXGBE_READ_REG(hw, IXGBE_VFMPRC); adapter->stats.vf.base_vfgprc = adapter->stats.vf.last_vfgprc; adapter->stats.vf.base_vfgorc = adapter->stats.vf.last_vfgorc; adapter->stats.vf.base_vfgptc = adapter->stats.vf.last_vfgptc; adapter->stats.vf.base_vfgotc = adapter->stats.vf.last_vfgotc; adapter->stats.vf.base_vfmprc = adapter->stats.vf.last_vfmprc; } /* ixv_init_stats */ #define UPDATE_STAT_32(reg, last, count) \ { \ u32 current = IXGBE_READ_REG(hw, reg); \ if (current < last) \ count += 0x100000000LL; \ last = current; \ count &= 0xFFFFFFFF00000000LL; \ count |= current; \ } #define UPDATE_STAT_36(lsb, msb, last, count) \ { \ u64 cur_lsb = IXGBE_READ_REG(hw, lsb); \ u64 cur_msb = IXGBE_READ_REG(hw, msb); \ u64 current = ((cur_msb << 32) | cur_lsb); \ if (current < last) \ count += 0x1000000000LL; \ last = current; \ count &= 0xFFFFFFF000000000LL; \ count |= current; \ } /************************************************************************ * ixv_update_stats - Update the board statistics counters. ************************************************************************/ void ixv_update_stats(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ixgbevf_hw_stats *stats = &adapter->stats.vf; UPDATE_STAT_32(IXGBE_VFGPRC, adapter->stats.vf.last_vfgprc, adapter->stats.vf.vfgprc); UPDATE_STAT_32(IXGBE_VFGPTC, adapter->stats.vf.last_vfgptc, adapter->stats.vf.vfgptc); UPDATE_STAT_36(IXGBE_VFGORC_LSB, IXGBE_VFGORC_MSB, adapter->stats.vf.last_vfgorc, adapter->stats.vf.vfgorc); UPDATE_STAT_36(IXGBE_VFGOTC_LSB, IXGBE_VFGOTC_MSB, adapter->stats.vf.last_vfgotc, adapter->stats.vf.vfgotc); UPDATE_STAT_32(IXGBE_VFMPRC, adapter->stats.vf.last_vfmprc, adapter->stats.vf.vfmprc); /* Fill out the OS statistics structure */ IXGBE_SET_IPACKETS(adapter, stats->vfgprc); IXGBE_SET_OPACKETS(adapter, stats->vfgptc); IXGBE_SET_IBYTES(adapter, stats->vfgorc); IXGBE_SET_OBYTES(adapter, stats->vfgotc); IXGBE_SET_IMCASTS(adapter, stats->vfmprc); } /* ixv_update_stats */ /************************************************************************ * ixv_add_stats_sysctls - Add statistic sysctls for the VF. ************************************************************************/ static void ixv_add_stats_sysctls(struct adapter *adapter) { device_t dev = adapter->dev; struct ix_tx_queue *tx_que = adapter->tx_queues; struct ix_rx_queue *rx_que = adapter->rx_queues; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct ixgbevf_hw_stats *stats = &adapter->stats.vf; struct sysctl_oid *stat_node, *queue_node; struct sysctl_oid_list *stat_list, *queue_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSI-X IRQ Handled"); for (int i = 0; i < adapter->num_tx_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso_tx", CTLFLAG_RD, &(txr->tso_tx), "TSO Packets"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &(txr->total_packets), "TX Packets"); } for (int i = 0; i < adapter->num_rx_queues; i++, rx_que++) { struct rx_ring *rxr = &rx_que->rxr; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", CTLFLAG_RD, &(rx_que->irqs), "IRQs on queue"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &(rxr->rx_packets), "RX packets"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &(rxr->rx_bytes), "RX bytes"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_discarded", CTLFLAG_RD, &(rxr->rx_discarded), "Discarded RX packets"); } stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "VF Statistics (read from HW registers)"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd", CTLFLAG_RD, &stats->vfgprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd", CTLFLAG_RD, &stats->vfgorc, "Good Octets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd", CTLFLAG_RD, &stats->vfmprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->vfgptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->vfgotc, "Good Octets Transmitted"); } /* ixv_add_stats_sysctls */ /************************************************************************ * ixv_print_debug_info * * Called only when em_display_debug_stats is enabled. * Provides a way to take a look at important statistics * maintained by the driver and hardware. ************************************************************************/ static void ixv_print_debug_info(struct adapter *adapter) { device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; device_printf(dev, "Error Byte Count = %u \n", IXGBE_READ_REG(hw, IXGBE_ERRBC)); device_printf(dev, "MBX IRQ Handled: %lu\n", (long)adapter->link_irq); } /* ixv_print_debug_info */ /************************************************************************ * ixv_sysctl_debug ************************************************************************/ static int ixv_sysctl_debug(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error, result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { adapter = (struct adapter *)arg1; ixv_print_debug_info(adapter); } return error; } /* ixv_sysctl_debug */ /************************************************************************ * ixv_init_device_features ************************************************************************/ static void ixv_init_device_features(struct adapter *adapter) { adapter->feat_cap = IXGBE_FEATURE_NETMAP | IXGBE_FEATURE_VF | IXGBE_FEATURE_LEGACY_TX; /* A tad short on feature flags for VFs, atm. */ switch (adapter->hw.mac.type) { case ixgbe_mac_82599_vf: break; case ixgbe_mac_X540_vf: break; case ixgbe_mac_X550_vf: case ixgbe_mac_X550EM_x_vf: case ixgbe_mac_X550EM_a_vf: adapter->feat_cap |= IXGBE_FEATURE_NEEDS_CTXD; adapter->feat_cap |= IXGBE_FEATURE_RSS; break; default: break; } /* Enabled by default... */ /* Is a virtual function (VF) */ if (adapter->feat_cap & IXGBE_FEATURE_VF) adapter->feat_en |= IXGBE_FEATURE_VF; /* Netmap */ if (adapter->feat_cap & IXGBE_FEATURE_NETMAP) adapter->feat_en |= IXGBE_FEATURE_NETMAP; /* Receive-Side Scaling (RSS) */ if (adapter->feat_cap & IXGBE_FEATURE_RSS) adapter->feat_en |= IXGBE_FEATURE_RSS; /* Needs advanced context descriptor regardless of offloads req'd */ if (adapter->feat_cap & IXGBE_FEATURE_NEEDS_CTXD) adapter->feat_en |= IXGBE_FEATURE_NEEDS_CTXD; } /* ixv_init_device_features */ diff --git a/sys/dev/ixgbe/ix_txrx.c b/sys/dev/ixgbe/ix_txrx.c index 43e64b0c0df0..9d31e0b1b43e 100644 --- a/sys/dev/ixgbe/ix_txrx.c +++ b/sys/dev/ixgbe/ix_txrx.c @@ -1,551 +1,549 @@ /****************************************************************************** Copyright (c) 2001-2017, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef IXGBE_STANDALONE_BUILD #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #endif #include "ixgbe.h" /************************************************************************ * Local Function prototypes ************************************************************************/ static int ixgbe_isc_txd_encap(void *arg, if_pkt_info_t pi); static void ixgbe_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx); static int ixgbe_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear); static void ixgbe_isc_rxd_refill(void *arg, if_rxd_update_t iru); static void ixgbe_isc_rxd_flush(void *arg, uint16_t qsidx, uint8_t flidx __unused, qidx_t pidx); static int ixgbe_isc_rxd_available(void *arg, uint16_t qsidx, qidx_t pidx, qidx_t budget); static int ixgbe_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); static void ixgbe_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype); static int ixgbe_tx_ctx_setup(struct ixgbe_adv_tx_context_desc *, if_pkt_info_t); extern void ixgbe_if_enable_intr(if_ctx_t ctx); static int ixgbe_determine_rsstype(u16 pkt_info); struct if_txrx ixgbe_txrx = { .ift_txd_encap = ixgbe_isc_txd_encap, .ift_txd_flush = ixgbe_isc_txd_flush, .ift_txd_credits_update = ixgbe_isc_txd_credits_update, .ift_rxd_available = ixgbe_isc_rxd_available, .ift_rxd_pkt_get = ixgbe_isc_rxd_pkt_get, .ift_rxd_refill = ixgbe_isc_rxd_refill, .ift_rxd_flush = ixgbe_isc_rxd_flush, .ift_legacy_intr = NULL }; -extern if_shared_ctx_t ixgbe_sctx; - /************************************************************************ * ixgbe_tx_ctx_setup * * Advanced Context Descriptor setup for VLAN, CSUM or TSO * ************************************************************************/ static int ixgbe_tx_ctx_setup(struct ixgbe_adv_tx_context_desc *TXD, if_pkt_info_t pi) { u32 vlan_macip_lens, type_tucmd_mlhl; u32 olinfo_status, mss_l4len_idx, pktlen, offload; u8 ehdrlen; offload = TRUE; olinfo_status = mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0; /* VLAN MACLEN IPLEN */ vlan_macip_lens |= (htole16(pi->ipi_vtag) << IXGBE_ADVTXD_VLAN_SHIFT); /* * Some of our VF devices need a context descriptor for every * packet. That means the ehdrlen needs to be non-zero in order * for the host driver not to flag a malicious event. The stack * will most likely populate this for all other reasons of why * this function was called. */ if (pi->ipi_ehdrlen == 0) { ehdrlen = ETHER_HDR_LEN; ehdrlen += (pi->ipi_vtag != 0) ? ETHER_VLAN_ENCAP_LEN : 0; } else ehdrlen = pi->ipi_ehdrlen; vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; pktlen = pi->ipi_len; /* First check if TSO is to be used */ if (pi->ipi_csum_flags & CSUM_TSO) { /* This is used in the transmit desc in encap */ pktlen = pi->ipi_len - ehdrlen - pi->ipi_ip_hlen - pi->ipi_tcp_hlen; mss_l4len_idx |= (pi->ipi_tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); mss_l4len_idx |= (pi->ipi_tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); } olinfo_status |= pktlen << IXGBE_ADVTXD_PAYLEN_SHIFT; if (pi->ipi_flags & IPI_TX_IPV4) { type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; /* Tell transmit desc to also do IPv4 checksum. */ if (pi->ipi_csum_flags & (CSUM_IP|CSUM_TSO)) olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; } else if (pi->ipi_flags & IPI_TX_IPV6) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; else offload = FALSE; vlan_macip_lens |= pi->ipi_ip_hlen; switch (pi->ipi_ipproto) { case IPPROTO_TCP: if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP | CSUM_TSO)) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; else offload = FALSE; break; case IPPROTO_UDP: if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; else offload = FALSE; break; case IPPROTO_SCTP: if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; else offload = FALSE; break; default: offload = FALSE; break; } /* Insert L4 checksum into data descriptors */ if (offload) olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; /* Now copy bits into descriptor */ TXD->vlan_macip_lens = htole32(vlan_macip_lens); TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); TXD->seqnum_seed = htole32(0); TXD->mss_l4len_idx = htole32(mss_l4len_idx); return (olinfo_status); } /* ixgbe_tx_ctx_setup */ /************************************************************************ * ixgbe_isc_txd_encap ************************************************************************/ static int ixgbe_isc_txd_encap(void *arg, if_pkt_info_t pi) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct ix_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; int nsegs = pi->ipi_nsegs; bus_dma_segment_t *segs = pi->ipi_segs; union ixgbe_adv_tx_desc *txd = NULL; struct ixgbe_adv_tx_context_desc *TXD; int i, j, first, pidx_last; u32 olinfo_status, cmd, flags; qidx_t ntxd; cmd = (IXGBE_ADVTXD_DTYP_DATA | IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); if (pi->ipi_mflags & M_VLANTAG) cmd |= IXGBE_ADVTXD_DCMD_VLE; i = first = pi->ipi_pidx; flags = (pi->ipi_flags & IPI_TX_INTR) ? IXGBE_TXD_CMD_RS : 0; ntxd = scctx->isc_ntxd[0]; TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[first]; if ((pi->ipi_csum_flags & CSUM_OFFLOAD) || (sc->feat_en & IXGBE_FEATURE_NEEDS_CTXD) || pi->ipi_vtag) { /********************************************* * Set up the appropriate offload context * this will consume the first descriptor *********************************************/ olinfo_status = ixgbe_tx_ctx_setup(TXD, pi); if (pi->ipi_csum_flags & CSUM_TSO) { cmd |= IXGBE_ADVTXD_DCMD_TSE; ++txr->tso_tx; } if (++i == scctx->isc_ntxd[0]) i = 0; } else { /* Indicate the whole packet as payload when not doing TSO */ olinfo_status = pi->ipi_len << IXGBE_ADVTXD_PAYLEN_SHIFT; } olinfo_status |= IXGBE_ADVTXD_CC; pidx_last = 0; for (j = 0; j < nsegs; j++) { bus_size_t seglen; txd = &txr->tx_base[i]; seglen = segs[j].ds_len; txd->read.buffer_addr = htole64(segs[j].ds_addr); txd->read.cmd_type_len = htole32(cmd | seglen); txd->read.olinfo_status = htole32(olinfo_status); pidx_last = i; if (++i == scctx->isc_ntxd[0]) { i = 0; } } if (flags) { txr->tx_rsq[txr->tx_rs_pidx] = pidx_last; txr->tx_rs_pidx = (txr->tx_rs_pidx + 1) & (ntxd - 1); } txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | flags); txr->bytes += pi->ipi_len; pi->ipi_new_pidx = i; ++txr->total_packets; return (0); } /* ixgbe_isc_txd_encap */ /************************************************************************ * ixgbe_isc_txd_flush ************************************************************************/ static void ixgbe_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx) { struct adapter *sc = arg; struct ix_tx_queue *que = &sc->tx_queues[txqid]; struct tx_ring *txr = &que->txr; IXGBE_WRITE_REG(&sc->hw, txr->tail, pidx); } /* ixgbe_isc_txd_flush */ /************************************************************************ * ixgbe_isc_txd_credits_update ************************************************************************/ static int ixgbe_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct ix_tx_queue *que = &sc->tx_queues[txqid]; struct tx_ring *txr = &que->txr; qidx_t processed = 0; int updated; qidx_t cur, prev, ntxd, rs_cidx; int32_t delta; uint8_t status; rs_cidx = txr->tx_rs_cidx; if (rs_cidx == txr->tx_rs_pidx) return (0); cur = txr->tx_rsq[rs_cidx]; status = txr->tx_base[cur].wb.status; updated = !!(status & IXGBE_TXD_STAT_DD); if (!updated) return (0); /* If clear is false just let caller know that there * are descriptors to reclaim */ if (!clear) return (1); prev = txr->tx_cidx_processed; ntxd = scctx->isc_ntxd[0]; do { MPASS(prev != cur); delta = (int32_t)cur - (int32_t)prev; if (delta < 0) delta += ntxd; MPASS(delta > 0); processed += delta; prev = cur; rs_cidx = (rs_cidx + 1) & (ntxd - 1); if (rs_cidx == txr->tx_rs_pidx) break; cur = txr->tx_rsq[rs_cidx]; status = txr->tx_base[cur].wb.status; } while ((status & IXGBE_TXD_STAT_DD)); txr->tx_rs_cidx = rs_cidx; txr->tx_cidx_processed = prev; return (processed); } /* ixgbe_isc_txd_credits_update */ /************************************************************************ * ixgbe_isc_rxd_refill ************************************************************************/ static void ixgbe_isc_rxd_refill(void *arg, if_rxd_update_t iru) { struct adapter *sc = arg; struct ix_rx_queue *que = &sc->rx_queues[iru->iru_qsidx]; struct rx_ring *rxr = &que->rxr; uint64_t *paddrs; int i; uint32_t next_pidx, pidx; uint16_t count; paddrs = iru->iru_paddrs; pidx = iru->iru_pidx; count = iru->iru_count; for (i = 0, next_pidx = pidx; i < count; i++) { rxr->rx_base[next_pidx].read.pkt_addr = htole64(paddrs[i]); if (++next_pidx == sc->shared->isc_nrxd[0]) next_pidx = 0; } } /* ixgbe_isc_rxd_refill */ /************************************************************************ * ixgbe_isc_rxd_flush ************************************************************************/ static void ixgbe_isc_rxd_flush(void *arg, uint16_t qsidx, uint8_t flidx __unused, qidx_t pidx) { struct adapter *sc = arg; struct ix_rx_queue *que = &sc->rx_queues[qsidx]; struct rx_ring *rxr = &que->rxr; IXGBE_WRITE_REG(&sc->hw, rxr->tail, pidx); } /* ixgbe_isc_rxd_flush */ /************************************************************************ * ixgbe_isc_rxd_available ************************************************************************/ static int ixgbe_isc_rxd_available(void *arg, uint16_t qsidx, qidx_t pidx, qidx_t budget) { struct adapter *sc = arg; struct ix_rx_queue *que = &sc->rx_queues[qsidx]; struct rx_ring *rxr = &que->rxr; union ixgbe_adv_rx_desc *rxd; u32 staterr; int cnt, i, nrxd; nrxd = sc->shared->isc_nrxd[0]; for (cnt = 0, i = pidx; cnt < nrxd && cnt <= budget;) { rxd = &rxr->rx_base[i]; staterr = le32toh(rxd->wb.upper.status_error); if ((staterr & IXGBE_RXD_STAT_DD) == 0) break; if (++i == nrxd) i = 0; if (staterr & IXGBE_RXD_STAT_EOP) cnt++; } return (cnt); } /* ixgbe_isc_rxd_available */ /************************************************************************ * ixgbe_isc_rxd_pkt_get * * Routine sends data which has been dma'ed into host memory * to upper layer. Initialize ri structure. * * Returns 0 upon success, errno on failure ************************************************************************/ static int ixgbe_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct adapter *adapter = arg; struct ix_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; struct ifnet *ifp = iflib_get_ifp(adapter->ctx); union ixgbe_adv_rx_desc *rxd; u16 pkt_info, len, cidx, i; u16 vtag = 0; u32 ptype; u32 staterr = 0; bool eop; i = 0; cidx = ri->iri_cidx; do { rxd = &rxr->rx_base[cidx]; staterr = le32toh(rxd->wb.upper.status_error); pkt_info = le16toh(rxd->wb.lower.lo_dword.hs_rss.pkt_info); /* Error Checking then decrement count */ MPASS ((staterr & IXGBE_RXD_STAT_DD) != 0); len = le16toh(rxd->wb.upper.length); ptype = le32toh(rxd->wb.lower.lo_dword.data) & IXGBE_RXDADV_PKTTYPE_MASK; ri->iri_len += len; rxr->bytes += len; rxd->wb.upper.status_error = 0; eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); if ( (rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP) ) { vtag = le16toh(rxd->wb.upper.vlan); } else { vtag = 0; } /* Make sure bad packets are discarded */ if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) { #if __FreeBSD_version >= 1100036 if (adapter->feat_en & IXGBE_FEATURE_VF) if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); #endif rxr->rx_discarded++; return (EBADMSG); } ri->iri_frags[i].irf_flid = 0; ri->iri_frags[i].irf_idx = cidx; ri->iri_frags[i].irf_len = len; if (++cidx == adapter->shared->isc_nrxd[0]) cidx = 0; i++; /* even a 16K packet shouldn't consume more than 8 clusters */ MPASS(i < 9); } while (!eop); rxr->rx_packets++; rxr->packets++; rxr->rx_bytes += ri->iri_len; if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) ixgbe_rx_checksum(staterr, ri, ptype); ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss); ri->iri_rsstype = ixgbe_determine_rsstype(pkt_info); if ((adapter->feat_en & IXGBE_FEATURE_RSS) == 0) { if (ri->iri_rsstype == M_HASHTYPE_OPAQUE) ri->iri_rsstype = M_HASHTYPE_NONE; else ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH; } ri->iri_vtag = vtag; ri->iri_nfrags = i; if (vtag) ri->iri_flags |= M_VLANTAG; return (0); } /* ixgbe_isc_rxd_pkt_get */ /************************************************************************ * ixgbe_rx_checksum * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. ************************************************************************/ static void ixgbe_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype) { u16 status = (u16)staterr; u8 errors = (u8)(staterr >> 24); bool sctp = false; if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) sctp = TRUE; /* IPv4 checksum */ if (status & IXGBE_RXD_STAT_IPCS) { if (!(errors & IXGBE_RXD_ERR_IPE)) { /* IP Checksum Good */ ri->iri_csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID; } else ri->iri_csum_flags = 0; } /* TCP/UDP/SCTP checksum */ if (status & IXGBE_RXD_STAT_L4CS) { u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); #if __FreeBSD_version >= 800000 if (sctp) type = CSUM_SCTP_VALID; #endif if (!(errors & IXGBE_RXD_ERR_TCPE)) { ri->iri_csum_flags |= type; if (!sctp) ri->iri_csum_data = htons(0xffff); } } } /* ixgbe_rx_checksum */ /************************************************************************ * ixgbe_determine_rsstype * * Parse the packet type to determine the appropriate hash ************************************************************************/ static int ixgbe_determine_rsstype(u16 pkt_info) { switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: return M_HASHTYPE_RSS_TCP_IPV4; case IXGBE_RXDADV_RSSTYPE_IPV4: return M_HASHTYPE_RSS_IPV4; case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: return M_HASHTYPE_RSS_TCP_IPV6; case IXGBE_RXDADV_RSSTYPE_IPV6_EX: return M_HASHTYPE_RSS_IPV6_EX; case IXGBE_RXDADV_RSSTYPE_IPV6: return M_HASHTYPE_RSS_IPV6; case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: return M_HASHTYPE_RSS_TCP_IPV6_EX; case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: return M_HASHTYPE_RSS_UDP_IPV4; case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: return M_HASHTYPE_RSS_UDP_IPV6; case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: return M_HASHTYPE_RSS_UDP_IPV6_EX; default: return M_HASHTYPE_OPAQUE; } } /* ixgbe_determine_rsstype */ diff --git a/sys/dev/ixl/if_iavf.c b/sys/dev/ixl/if_iavf.c index 394656d27a2f..f6eb91c2a855 100644 --- a/sys/dev/ixl/if_iavf.c +++ b/sys/dev/ixl/if_iavf.c @@ -1,2454 +1,2452 @@ /****************************************************************************** Copyright (c) 2013-2018, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "iavf.h" /********************************************************************* * Driver version *********************************************************************/ #define IAVF_DRIVER_VERSION_MAJOR 2 #define IAVF_DRIVER_VERSION_MINOR 0 #define IAVF_DRIVER_VERSION_BUILD 0 #define IAVF_DRIVER_VERSION_STRING \ __XSTRING(IAVF_DRIVER_VERSION_MAJOR) "." \ __XSTRING(IAVF_DRIVER_VERSION_MINOR) "." \ __XSTRING(IAVF_DRIVER_VERSION_BUILD) "-k" /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * * ( Vendor ID, Device ID, Branding String ) *********************************************************************/ static pci_vendor_info_t iavf_vendor_info_array[] = { PVID(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_VF, "Intel(R) Ethernet Virtual Function 700 Series"), PVID(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_X722_VF, "Intel(R) Ethernet Virtual Function 700 Series (X722)"), PVID(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_ADAPTIVE_VF, "Intel(R) Ethernet Adaptive Virtual Function"), /* required last entry */ PVID_END }; /********************************************************************* * Function prototypes *********************************************************************/ static void *iavf_register(device_t dev); static int iavf_if_attach_pre(if_ctx_t ctx); static int iavf_if_attach_post(if_ctx_t ctx); static int iavf_if_detach(if_ctx_t ctx); static int iavf_if_shutdown(if_ctx_t ctx); static int iavf_if_suspend(if_ctx_t ctx); static int iavf_if_resume(if_ctx_t ctx); static int iavf_if_msix_intr_assign(if_ctx_t ctx, int msix); static void iavf_if_enable_intr(if_ctx_t ctx); static void iavf_if_disable_intr(if_ctx_t ctx); static int iavf_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int iavf_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static int iavf_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int iavf_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); static void iavf_if_queues_free(if_ctx_t ctx); static void iavf_if_update_admin_status(if_ctx_t ctx); static void iavf_if_multi_set(if_ctx_t ctx); static int iavf_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void iavf_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr); static int iavf_if_media_change(if_ctx_t ctx); static int iavf_if_promisc_set(if_ctx_t ctx, int flags); static void iavf_if_timer(if_ctx_t ctx, uint16_t qid); static void iavf_if_vlan_register(if_ctx_t ctx, u16 vtag); static void iavf_if_vlan_unregister(if_ctx_t ctx, u16 vtag); static uint64_t iavf_if_get_counter(if_ctx_t ctx, ift_counter cnt); static void iavf_if_stop(if_ctx_t ctx); static bool iavf_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event); static int iavf_allocate_pci_resources(struct iavf_sc *); static int iavf_reset_complete(struct i40e_hw *); static int iavf_setup_vc(struct iavf_sc *); static int iavf_reset(struct iavf_sc *); static int iavf_vf_config(struct iavf_sc *); static void iavf_init_filters(struct iavf_sc *); static void iavf_free_pci_resources(struct iavf_sc *); static void iavf_free_filters(struct iavf_sc *); static void iavf_setup_interface(device_t, struct iavf_sc *); static void iavf_add_device_sysctls(struct iavf_sc *); static void iavf_enable_adminq_irq(struct i40e_hw *); static void iavf_disable_adminq_irq(struct i40e_hw *); static void iavf_enable_queue_irq(struct i40e_hw *, int); static void iavf_disable_queue_irq(struct i40e_hw *, int); static void iavf_config_rss(struct iavf_sc *); static void iavf_stop(struct iavf_sc *); static int iavf_add_mac_filter(struct iavf_sc *, u8 *, u16); static int iavf_del_mac_filter(struct iavf_sc *sc, u8 *macaddr); static int iavf_msix_que(void *); static int iavf_msix_adminq(void *); //static void iavf_del_multi(struct iavf_sc *sc); static void iavf_init_multi(struct iavf_sc *sc); static void iavf_configure_itr(struct iavf_sc *sc); static int iavf_sysctl_rx_itr(SYSCTL_HANDLER_ARGS); static int iavf_sysctl_tx_itr(SYSCTL_HANDLER_ARGS); static int iavf_sysctl_current_speed(SYSCTL_HANDLER_ARGS); static int iavf_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS); static int iavf_sysctl_queue_interrupt_table(SYSCTL_HANDLER_ARGS); static int iavf_sysctl_vf_reset(SYSCTL_HANDLER_ARGS); static int iavf_sysctl_vflr_reset(SYSCTL_HANDLER_ARGS); static void iavf_save_tunables(struct iavf_sc *); static enum i40e_status_code iavf_process_adminq(struct iavf_sc *, u16 *); static int iavf_send_vc_msg(struct iavf_sc *sc, u32 op); static int iavf_send_vc_msg_sleep(struct iavf_sc *sc, u32 op); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t iavf_methods[] = { /* Device interface */ DEVMETHOD(device_register, iavf_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD_END }; static driver_t iavf_driver = { "iavf", iavf_methods, sizeof(struct iavf_sc), }; devclass_t iavf_devclass; DRIVER_MODULE(iavf, pci, iavf_driver, iavf_devclass, 0, 0); MODULE_PNP_INFO("U32:vendor;U32:device;U32:subvendor;U32:subdevice;U32:revision", pci, iavf, iavf_vendor_info_array, nitems(iavf_vendor_info_array) - 1); MODULE_VERSION(iavf, 1); MODULE_DEPEND(iavf, pci, 1, 1, 1); MODULE_DEPEND(iavf, ether, 1, 1, 1); MODULE_DEPEND(iavf, iflib, 1, 1, 1); MALLOC_DEFINE(M_IAVF, "iavf", "iavf driver allocations"); static device_method_t iavf_if_methods[] = { DEVMETHOD(ifdi_attach_pre, iavf_if_attach_pre), DEVMETHOD(ifdi_attach_post, iavf_if_attach_post), DEVMETHOD(ifdi_detach, iavf_if_detach), DEVMETHOD(ifdi_shutdown, iavf_if_shutdown), DEVMETHOD(ifdi_suspend, iavf_if_suspend), DEVMETHOD(ifdi_resume, iavf_if_resume), DEVMETHOD(ifdi_init, iavf_if_init), DEVMETHOD(ifdi_stop, iavf_if_stop), DEVMETHOD(ifdi_msix_intr_assign, iavf_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, iavf_if_enable_intr), DEVMETHOD(ifdi_intr_disable, iavf_if_disable_intr), DEVMETHOD(ifdi_rx_queue_intr_enable, iavf_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, iavf_if_tx_queue_intr_enable), DEVMETHOD(ifdi_tx_queues_alloc, iavf_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, iavf_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, iavf_if_queues_free), DEVMETHOD(ifdi_update_admin_status, iavf_if_update_admin_status), DEVMETHOD(ifdi_multi_set, iavf_if_multi_set), DEVMETHOD(ifdi_mtu_set, iavf_if_mtu_set), DEVMETHOD(ifdi_media_status, iavf_if_media_status), DEVMETHOD(ifdi_media_change, iavf_if_media_change), DEVMETHOD(ifdi_promisc_set, iavf_if_promisc_set), DEVMETHOD(ifdi_timer, iavf_if_timer), DEVMETHOD(ifdi_vlan_register, iavf_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, iavf_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, iavf_if_get_counter), DEVMETHOD(ifdi_needs_restart, iavf_if_needs_restart), DEVMETHOD_END }; static driver_t iavf_if_driver = { "iavf_if", iavf_if_methods, sizeof(struct iavf_sc) }; /* ** TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, iavf, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "iavf driver parameters"); /* * Different method for processing TX descriptor * completion. */ static int iavf_enable_head_writeback = 0; TUNABLE_INT("hw.iavf.enable_head_writeback", &iavf_enable_head_writeback); SYSCTL_INT(_hw_iavf, OID_AUTO, enable_head_writeback, CTLFLAG_RDTUN, &iavf_enable_head_writeback, 0, "For detecting last completed TX descriptor by hardware, use value written by HW instead of checking descriptors"); static int iavf_core_debug_mask = 0; TUNABLE_INT("hw.iavf.core_debug_mask", &iavf_core_debug_mask); SYSCTL_INT(_hw_iavf, OID_AUTO, core_debug_mask, CTLFLAG_RDTUN, &iavf_core_debug_mask, 0, "Display debug statements that are printed in non-shared code"); static int iavf_shared_debug_mask = 0; TUNABLE_INT("hw.iavf.shared_debug_mask", &iavf_shared_debug_mask); SYSCTL_INT(_hw_iavf, OID_AUTO, shared_debug_mask, CTLFLAG_RDTUN, &iavf_shared_debug_mask, 0, "Display debug statements that are printed in shared code"); int iavf_rx_itr = IXL_ITR_8K; TUNABLE_INT("hw.iavf.rx_itr", &iavf_rx_itr); SYSCTL_INT(_hw_iavf, OID_AUTO, rx_itr, CTLFLAG_RDTUN, &iavf_rx_itr, 0, "RX Interrupt Rate"); int iavf_tx_itr = IXL_ITR_4K; TUNABLE_INT("hw.iavf.tx_itr", &iavf_tx_itr); SYSCTL_INT(_hw_iavf, OID_AUTO, tx_itr, CTLFLAG_RDTUN, &iavf_tx_itr, 0, "TX Interrupt Rate"); extern struct if_txrx ixl_txrx_hwb; extern struct if_txrx ixl_txrx_dwb; static struct if_shared_ctx iavf_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE,/* max(DBA_ALIGN, PAGE_SIZE) */ .isc_tx_maxsize = IXL_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_tso_maxsize = IXL_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_rx_maxsize = 16384, .isc_rx_nsegments = IXL_MAX_RX_SEGS, .isc_rx_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_nfl = 1, .isc_ntxqs = 1, .isc_nrxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = iavf_vendor_info_array, .isc_driver_version = IAVF_DRIVER_VERSION_STRING, .isc_driver = &iavf_if_driver, .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_NEED_ZERO_CSUM | IFLIB_TSO_INIT_IP | IFLIB_IS_VF, .isc_nrxd_min = {IXL_MIN_RING}, .isc_ntxd_min = {IXL_MIN_RING}, .isc_nrxd_max = {IXL_MAX_RING}, .isc_ntxd_max = {IXL_MAX_RING}, .isc_nrxd_default = {IXL_DEFAULT_RING}, .isc_ntxd_default = {IXL_DEFAULT_RING}, }; -if_shared_ctx_t iavf_sctx = &iavf_sctx_init; - /*** Functions ***/ static void * iavf_register(device_t dev) { - return (iavf_sctx); + return (&iavf_sctx_init); } static int iavf_allocate_pci_resources(struct iavf_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = iflib_get_dev(sc->vsi.ctx); int rid; /* Map BAR0 */ rid = PCIR_BAR(0); sc->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(sc->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: PCI memory\n"); return (ENXIO); } /* Save off the PCI information */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); hw->bus.device = pci_get_slot(dev); hw->bus.func = pci_get_function(dev); /* Save off register access information */ sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->pci_mem); sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->pci_mem); sc->osdep.mem_bus_space_size = rman_get_size(sc->pci_mem); sc->osdep.flush_reg = I40E_VFGEN_RSTAT; sc->osdep.dev = dev; sc->hw.hw_addr = (u8 *) &sc->osdep.mem_bus_space_handle; sc->hw.back = &sc->osdep; return (0); } static int iavf_if_attach_pre(if_ctx_t ctx) { device_t dev; struct iavf_sc *sc; struct i40e_hw *hw; struct ixl_vsi *vsi; if_softc_ctx_t scctx; int error = 0; dev = iflib_get_dev(ctx); sc = iflib_get_softc(ctx); vsi = &sc->vsi; vsi->back = sc; sc->dev = dev; hw = &sc->hw; vsi->dev = dev; vsi->hw = &sc->hw; vsi->num_vlans = 0; vsi->ctx = ctx; vsi->media = iflib_get_media(ctx); vsi->shared = scctx = iflib_get_softc_ctx(ctx); iavf_save_tunables(sc); /* Do PCI setup - map BAR0, etc */ if (iavf_allocate_pci_resources(sc)) { device_printf(dev, "%s: Allocation of PCI resources failed\n", __func__); error = ENXIO; goto err_early; } iavf_dbg_init(sc, "Allocated PCI resources and MSI-X vectors\n"); /* * XXX: This is called by init_shared_code in the PF driver, * but the rest of that function does not support VFs. */ error = i40e_set_mac_type(hw); if (error) { device_printf(dev, "%s: set_mac_type failed: %d\n", __func__, error); goto err_pci_res; } error = iavf_reset_complete(hw); if (error) { device_printf(dev, "%s: Device is still being reset\n", __func__); goto err_pci_res; } iavf_dbg_init(sc, "VF Device is ready for configuration\n"); /* Sets up Admin Queue */ error = iavf_setup_vc(sc); if (error) { device_printf(dev, "%s: Error setting up PF comms, %d\n", __func__, error); goto err_pci_res; } iavf_dbg_init(sc, "PF API version verified\n"); /* Need API version before sending reset message */ error = iavf_reset(sc); if (error) { device_printf(dev, "VF reset failed; reload the driver\n"); goto err_aq; } iavf_dbg_init(sc, "VF reset complete\n"); /* Ask for VF config from PF */ error = iavf_vf_config(sc); if (error) { device_printf(dev, "Error getting configuration from PF: %d\n", error); goto err_aq; } device_printf(dev, "VSIs %d, QPs %d, MSI-X %d, RSS sizes: key %d lut %d\n", sc->vf_res->num_vsis, sc->vf_res->num_queue_pairs, sc->vf_res->max_vectors, sc->vf_res->rss_key_size, sc->vf_res->rss_lut_size); iavf_dbg_info(sc, "Capabilities=%b\n", sc->vf_res->vf_cap_flags, IAVF_PRINTF_VF_OFFLOAD_FLAGS); /* got VF config message back from PF, now we can parse it */ for (int i = 0; i < sc->vf_res->num_vsis; i++) { /* XXX: We only use the first VSI we find */ if (sc->vf_res->vsi_res[i].vsi_type == I40E_VSI_SRIOV) sc->vsi_res = &sc->vf_res->vsi_res[i]; } if (!sc->vsi_res) { device_printf(dev, "%s: no LAN VSI found\n", __func__); error = EIO; goto err_res_buf; } vsi->id = sc->vsi_res->vsi_id; iavf_dbg_init(sc, "Resource Acquisition complete\n"); /* If no mac address was assigned just make a random one */ if (!iavf_check_ether_addr(hw->mac.addr)) { u8 addr[ETHER_ADDR_LEN]; arc4rand(&addr, sizeof(addr), 0); addr[0] &= 0xFE; addr[0] |= 0x02; bcopy(addr, hw->mac.addr, sizeof(addr)); } bcopy(hw->mac.addr, hw->mac.perm_addr, ETHER_ADDR_LEN); iflib_set_mac(ctx, hw->mac.addr); /* Allocate filter lists */ iavf_init_filters(sc); /* Fill out more iflib parameters */ scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = sc->vsi_res->num_queue_pairs; if (vsi->enable_head_writeback) { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(struct i40e_tx_desc) + sizeof(u32), DBA_ALIGN); scctx->isc_txrx = &ixl_txrx_hwb; } else { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(struct i40e_tx_desc), DBA_ALIGN); scctx->isc_txrx = &ixl_txrx_dwb; } scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union i40e_32byte_rx_desc), DBA_ALIGN); scctx->isc_msix_bar = PCIR_BAR(IXL_MSIX_BAR); scctx->isc_tx_nsegments = IXL_MAX_TX_SEGS; scctx->isc_tx_tso_segments_max = IXL_MAX_TSO_SEGS; scctx->isc_tx_tso_size_max = IXL_TSO_SIZE; scctx->isc_tx_tso_segsize_max = IXL_MAX_DMA_SEG_SIZE; scctx->isc_rss_table_size = IXL_RSS_VSI_LUT_SIZE; scctx->isc_tx_csum_flags = CSUM_OFFLOAD; scctx->isc_capabilities = scctx->isc_capenable = IXL_CAPS; return (0); err_res_buf: free(sc->vf_res, M_IAVF); err_aq: i40e_shutdown_adminq(hw); err_pci_res: iavf_free_pci_resources(sc); err_early: return (error); } static int iavf_if_attach_post(if_ctx_t ctx) { device_t dev; struct iavf_sc *sc; struct i40e_hw *hw; struct ixl_vsi *vsi; int error = 0; INIT_DBG_DEV(dev, "begin"); dev = iflib_get_dev(ctx); sc = iflib_get_softc(ctx); vsi = &sc->vsi; vsi->ifp = iflib_get_ifp(ctx); hw = &sc->hw; /* Save off determined number of queues for interface */ vsi->num_rx_queues = vsi->shared->isc_nrxqsets; vsi->num_tx_queues = vsi->shared->isc_ntxqsets; /* Setup the stack interface */ iavf_setup_interface(dev, sc); INIT_DBG_DEV(dev, "Interface setup complete"); /* Initialize statistics & add sysctls */ bzero(&sc->vsi.eth_stats, sizeof(struct i40e_eth_stats)); iavf_add_device_sysctls(sc); sc->init_state = IAVF_INIT_READY; atomic_store_rel_32(&sc->queues_enabled, 0); /* We want AQ enabled early for init */ iavf_enable_adminq_irq(hw); INIT_DBG_DEV(dev, "end"); return (error); } /** * XXX: iflib always ignores the return value of detach() * -> This means that this isn't allowed to fail */ static int iavf_if_detach(if_ctx_t ctx) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; enum i40e_status_code status; INIT_DBG_DEV(dev, "begin"); /* Remove all the media and link information */ ifmedia_removeall(vsi->media); iavf_disable_adminq_irq(hw); status = i40e_shutdown_adminq(&sc->hw); if (status != I40E_SUCCESS) { device_printf(dev, "i40e_shutdown_adminq() failed with status %s\n", i40e_stat_str(hw, status)); } free(sc->vf_res, M_IAVF); iavf_free_pci_resources(sc); iavf_free_filters(sc); INIT_DBG_DEV(dev, "end"); return (0); } static int iavf_if_shutdown(if_ctx_t ctx) { return (0); } static int iavf_if_suspend(if_ctx_t ctx) { return (0); } static int iavf_if_resume(if_ctx_t ctx) { return (0); } static int iavf_send_vc_msg_sleep(struct iavf_sc *sc, u32 op) { int error = 0; if_ctx_t ctx = sc->vsi.ctx; error = ixl_vc_send_cmd(sc, op); if (error != 0) { iavf_dbg_vc(sc, "Error sending %b: %d\n", op, IAVF_FLAGS, error); return (error); } /* Don't wait for a response if the device is being detached. */ if (!iflib_in_detach(ctx)) { iavf_dbg_vc(sc, "Sleeping for op %b\n", op, IAVF_FLAGS); error = sx_sleep(ixl_vc_get_op_chan(sc, op), iflib_ctx_lock_get(ctx), PRI_MAX, "iavf_vc", IAVF_AQ_TIMEOUT); if (error == EWOULDBLOCK) device_printf(sc->dev, "%b timed out\n", op, IAVF_FLAGS); } return (error); } static int iavf_send_vc_msg(struct iavf_sc *sc, u32 op) { int error = 0; error = ixl_vc_send_cmd(sc, op); if (error != 0) iavf_dbg_vc(sc, "Error sending %b: %d\n", op, IAVF_FLAGS, error); return (error); } static void iavf_init_queues(struct ixl_vsi *vsi) { struct ixl_tx_queue *tx_que = vsi->tx_queues; struct ixl_rx_queue *rx_que = vsi->rx_queues; struct rx_ring *rxr; for (int i = 0; i < vsi->num_tx_queues; i++, tx_que++) ixl_init_tx_ring(vsi, tx_que); for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) { rxr = &rx_que->rxr; rxr->mbuf_sz = iflib_get_rx_mbuf_sz(vsi->ctx); wr32(vsi->hw, rxr->tail, 0); } } void iavf_if_init(if_ctx_t ctx) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; struct i40e_hw *hw = &sc->hw; struct ifnet *ifp = iflib_get_ifp(ctx); u8 tmpaddr[ETHER_ADDR_LEN]; int error = 0; INIT_DBG_IF(ifp, "begin"); MPASS(sx_xlocked(iflib_ctx_lock_get(ctx))); error = iavf_reset_complete(hw); if (error) { device_printf(sc->dev, "%s: VF reset failed\n", __func__); } if (!i40e_check_asq_alive(hw)) { iavf_dbg_info(sc, "ASQ is not alive, re-initializing AQ\n"); pci_enable_busmaster(sc->dev); i40e_shutdown_adminq(hw); i40e_init_adminq(hw); } /* Make sure queues are disabled */ iavf_send_vc_msg(sc, IAVF_FLAG_AQ_DISABLE_QUEUES); bcopy(IF_LLADDR(ifp), tmpaddr, ETHER_ADDR_LEN); if (!ixl_ether_is_equal(hw->mac.addr, tmpaddr) && (i40e_validate_mac_addr(tmpaddr) == I40E_SUCCESS)) { error = iavf_del_mac_filter(sc, hw->mac.addr); if (error == 0) iavf_send_vc_msg(sc, IAVF_FLAG_AQ_DEL_MAC_FILTER); bcopy(tmpaddr, hw->mac.addr, ETH_ALEN); } error = iavf_add_mac_filter(sc, hw->mac.addr, 0); if (!error || error == EEXIST) iavf_send_vc_msg(sc, IAVF_FLAG_AQ_ADD_MAC_FILTER); iflib_set_mac(ctx, hw->mac.addr); /* Prepare the queues for operation */ iavf_init_queues(vsi); /* Set initial ITR values */ iavf_configure_itr(sc); iavf_send_vc_msg(sc, IAVF_FLAG_AQ_CONFIGURE_QUEUES); /* Set up RSS */ iavf_config_rss(sc); /* Map vectors */ iavf_send_vc_msg(sc, IAVF_FLAG_AQ_MAP_VECTORS); /* Init SW TX ring indices */ if (vsi->enable_head_writeback) ixl_init_tx_cidx(vsi); else ixl_init_tx_rsqs(vsi); /* Configure promiscuous mode */ iavf_if_promisc_set(ctx, if_getflags(ifp)); /* Enable queues */ iavf_send_vc_msg_sleep(sc, IAVF_FLAG_AQ_ENABLE_QUEUES); sc->init_state = IAVF_RUNNING; } /* * iavf_attach() helper function; initializes the admin queue * and attempts to establish contact with the PF by * retrying the initial "API version" message several times * or until the PF responds. */ static int iavf_setup_vc(struct iavf_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; int error = 0, ret_error = 0, asq_retries = 0; bool send_api_ver_retried = 0; /* Need to set these AQ paramters before initializing AQ */ hw->aq.num_arq_entries = IXL_AQ_LEN; hw->aq.num_asq_entries = IXL_AQ_LEN; hw->aq.arq_buf_size = IXL_AQ_BUF_SZ; hw->aq.asq_buf_size = IXL_AQ_BUF_SZ; for (int i = 0; i < IAVF_AQ_MAX_ERR; i++) { /* Initialize admin queue */ error = i40e_init_adminq(hw); if (error) { device_printf(dev, "%s: init_adminq failed: %d\n", __func__, error); ret_error = 1; continue; } iavf_dbg_init(sc, "Initialized Admin Queue; starting" " send_api_ver attempt %d", i+1); retry_send: /* Send VF's API version */ error = iavf_send_api_ver(sc); if (error) { i40e_shutdown_adminq(hw); ret_error = 2; device_printf(dev, "%s: unable to send api" " version to PF on attempt %d, error %d\n", __func__, i+1, error); } asq_retries = 0; while (!i40e_asq_done(hw)) { if (++asq_retries > IAVF_AQ_MAX_ERR) { i40e_shutdown_adminq(hw); device_printf(dev, "Admin Queue timeout " "(waiting for send_api_ver), %d more tries...\n", IAVF_AQ_MAX_ERR - (i + 1)); ret_error = 3; break; } i40e_msec_pause(10); } if (asq_retries > IAVF_AQ_MAX_ERR) continue; iavf_dbg_init(sc, "Sent API version message to PF"); /* Verify that the VF accepts the PF's API version */ error = iavf_verify_api_ver(sc); if (error == ETIMEDOUT) { if (!send_api_ver_retried) { /* Resend message, one more time */ send_api_ver_retried = true; device_printf(dev, "%s: Timeout while verifying API version on first" " try!\n", __func__); goto retry_send; } else { device_printf(dev, "%s: Timeout while verifying API version on second" " try!\n", __func__); ret_error = 4; break; } } if (error) { device_printf(dev, "%s: Unable to verify API version," " error %s\n", __func__, i40e_stat_str(hw, error)); ret_error = 5; } break; } if (ret_error >= 4) i40e_shutdown_adminq(hw); return (ret_error); } /* * iavf_attach() helper function; asks the PF for this VF's * configuration, and saves the information if it receives it. */ static int iavf_vf_config(struct iavf_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; int bufsz, error = 0, ret_error = 0; int asq_retries, retried = 0; retry_config: error = iavf_send_vf_config_msg(sc); if (error) { device_printf(dev, "%s: Unable to send VF config request, attempt %d," " error %d\n", __func__, retried + 1, error); ret_error = 2; } asq_retries = 0; while (!i40e_asq_done(hw)) { if (++asq_retries > IAVF_AQ_MAX_ERR) { device_printf(dev, "%s: Admin Queue timeout " "(waiting for send_vf_config_msg), attempt %d\n", __func__, retried + 1); ret_error = 3; goto fail; } i40e_msec_pause(10); } iavf_dbg_init(sc, "Sent VF config message to PF, attempt %d\n", retried + 1); if (!sc->vf_res) { bufsz = sizeof(struct virtchnl_vf_resource) + (I40E_MAX_VF_VSI * sizeof(struct virtchnl_vsi_resource)); sc->vf_res = malloc(bufsz, M_IAVF, M_NOWAIT); if (!sc->vf_res) { device_printf(dev, "%s: Unable to allocate memory for VF configuration" " message from PF on attempt %d\n", __func__, retried + 1); ret_error = 1; goto fail; } } /* Check for VF config response */ error = iavf_get_vf_config(sc); if (error == ETIMEDOUT) { /* The 1st time we timeout, send the configuration message again */ if (!retried) { retried++; goto retry_config; } device_printf(dev, "%s: iavf_get_vf_config() timed out waiting for a response\n", __func__); } if (error) { device_printf(dev, "%s: Unable to get VF configuration from PF after %d tries!\n", __func__, retried + 1); ret_error = 4; } goto done; fail: free(sc->vf_res, M_IAVF); done: return (ret_error); } static int iavf_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; struct ixl_rx_queue *rx_que = vsi->rx_queues; struct ixl_tx_queue *tx_que = vsi->tx_queues; int err, i, rid, vector = 0; char buf[16]; MPASS(vsi->shared->isc_nrxqsets > 0); MPASS(vsi->shared->isc_ntxqsets > 0); /* Admin Que is vector 0*/ rid = vector + 1; err = iflib_irq_alloc_generic(ctx, &vsi->irq, rid, IFLIB_INTR_ADMIN, iavf_msix_adminq, sc, 0, "aq"); if (err) { iflib_irq_free(ctx, &vsi->irq); device_printf(iflib_get_dev(ctx), "Failed to register Admin Que handler"); return (err); } /* Now set up the stations */ for (i = 0, vector = 1; i < vsi->shared->isc_nrxqsets; i++, vector++, rx_que++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); err = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, iavf_msix_que, rx_que, rx_que->rxr.me, buf); /* XXX: Does the driver work as expected if there are fewer num_rx_queues than * what's expected in the iflib context? */ if (err) { device_printf(iflib_get_dev(ctx), "Failed to allocate queue RX int vector %d, err: %d\n", i, err); vsi->num_rx_queues = i + 1; goto fail; } rx_que->msix = vector; } bzero(buf, sizeof(buf)); for (i = 0; i < vsi->shared->isc_ntxqsets; i++, tx_que++) { snprintf(buf, sizeof(buf), "txq%d", i); iflib_softirq_alloc_generic(ctx, &vsi->rx_queues[i % vsi->shared->isc_nrxqsets].que_irq, IFLIB_INTR_TX, tx_que, tx_que->txr.me, buf); /* TODO: Maybe call a strategy function for this to figure out which * interrupts to map Tx queues to. I don't know if there's an immediately * better way than this other than a user-supplied map, though. */ tx_que->msix = (i % vsi->shared->isc_nrxqsets) + 1; } return (0); fail: iflib_irq_free(ctx, &vsi->irq); rx_que = vsi->rx_queues; for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) iflib_irq_free(ctx, &rx_que->que_irq); return (err); } /* Enable all interrupts */ static void iavf_if_enable_intr(if_ctx_t ctx) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; iavf_enable_intr(vsi); } /* Disable all interrupts */ static void iavf_if_disable_intr(if_ctx_t ctx) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; iavf_disable_intr(vsi); } static int iavf_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *rx_que = &vsi->rx_queues[rxqid]; iavf_enable_queue_irq(hw, rx_que->msix - 1); return (0); } static int iavf_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_tx_queue *tx_que = &vsi->tx_queues[txqid]; iavf_enable_queue_irq(hw, tx_que->msix - 1); return (0); } static int iavf_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; if_softc_ctx_t scctx = vsi->shared; struct ixl_tx_queue *que; int i, j, error = 0; MPASS(scctx->isc_ntxqsets > 0); MPASS(ntxqs == 1); MPASS(scctx->isc_ntxqsets == ntxqsets); /* Allocate queue structure memory */ if (!(vsi->tx_queues = (struct ixl_tx_queue *) malloc(sizeof(struct ixl_tx_queue) *ntxqsets, M_IAVF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); return (ENOMEM); } for (i = 0, que = vsi->tx_queues; i < ntxqsets; i++, que++) { struct tx_ring *txr = &que->txr; txr->me = i; que->vsi = vsi; if (!vsi->enable_head_writeback) { /* Allocate report status array */ if (!(txr->tx_rsq = malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_IAVF, M_NOWAIT))) { device_printf(iflib_get_dev(ctx), "failed to allocate tx_rsq memory\n"); error = ENOMEM; goto fail; } /* Init report status array */ for (j = 0; j < scctx->isc_ntxd[0]; j++) txr->tx_rsq[j] = QIDX_INVALID; } /* get the virtual and physical address of the hardware queues */ txr->tail = I40E_QTX_TAIL1(txr->me); txr->tx_base = (struct i40e_tx_desc *)vaddrs[i * ntxqs]; txr->tx_paddr = paddrs[i * ntxqs]; txr->que = que; } return (0); fail: iavf_if_queues_free(ctx); return (error); } static int iavf_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; struct ixl_rx_queue *que; int i, error = 0; #ifdef INVARIANTS if_softc_ctx_t scctx = vsi->shared; MPASS(scctx->isc_nrxqsets > 0); MPASS(nrxqs == 1); MPASS(scctx->isc_nrxqsets == nrxqsets); #endif /* Allocate queue structure memory */ if (!(vsi->rx_queues = (struct ixl_rx_queue *) malloc(sizeof(struct ixl_rx_queue) * nrxqsets, M_IAVF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate RX ring memory\n"); error = ENOMEM; goto fail; } for (i = 0, que = vsi->rx_queues; i < nrxqsets; i++, que++) { struct rx_ring *rxr = &que->rxr; rxr->me = i; que->vsi = vsi; /* get the virtual and physical address of the hardware queues */ rxr->tail = I40E_QRX_TAIL1(rxr->me); rxr->rx_base = (union i40e_rx_desc *)vaddrs[i * nrxqs]; rxr->rx_paddr = paddrs[i * nrxqs]; rxr->que = que; } return (0); fail: iavf_if_queues_free(ctx); return (error); } static void iavf_if_queues_free(if_ctx_t ctx) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; if (!vsi->enable_head_writeback) { struct ixl_tx_queue *que; int i = 0; for (i = 0, que = vsi->tx_queues; i < vsi->shared->isc_ntxqsets; i++, que++) { struct tx_ring *txr = &que->txr; if (txr->tx_rsq != NULL) { free(txr->tx_rsq, M_IAVF); txr->tx_rsq = NULL; } } } if (vsi->tx_queues != NULL) { free(vsi->tx_queues, M_IAVF); vsi->tx_queues = NULL; } if (vsi->rx_queues != NULL) { free(vsi->rx_queues, M_IAVF); vsi->rx_queues = NULL; } } static int iavf_check_aq_errors(struct iavf_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; u32 reg, oldreg; u8 aq_error = false; /* check for Admin queue errors */ oldreg = reg = rd32(hw, hw->aq.arq.len); if (reg & I40E_VF_ARQLEN1_ARQVFE_MASK) { device_printf(dev, "ARQ VF Error detected\n"); reg &= ~I40E_VF_ARQLEN1_ARQVFE_MASK; aq_error = true; } if (reg & I40E_VF_ARQLEN1_ARQOVFL_MASK) { device_printf(dev, "ARQ Overflow Error detected\n"); reg &= ~I40E_VF_ARQLEN1_ARQOVFL_MASK; aq_error = true; } if (reg & I40E_VF_ARQLEN1_ARQCRIT_MASK) { device_printf(dev, "ARQ Critical Error detected\n"); reg &= ~I40E_VF_ARQLEN1_ARQCRIT_MASK; aq_error = true; } if (oldreg != reg) wr32(hw, hw->aq.arq.len, reg); oldreg = reg = rd32(hw, hw->aq.asq.len); if (reg & I40E_VF_ATQLEN1_ATQVFE_MASK) { device_printf(dev, "ASQ VF Error detected\n"); reg &= ~I40E_VF_ATQLEN1_ATQVFE_MASK; aq_error = true; } if (reg & I40E_VF_ATQLEN1_ATQOVFL_MASK) { device_printf(dev, "ASQ Overflow Error detected\n"); reg &= ~I40E_VF_ATQLEN1_ATQOVFL_MASK; aq_error = true; } if (reg & I40E_VF_ATQLEN1_ATQCRIT_MASK) { device_printf(dev, "ASQ Critical Error detected\n"); reg &= ~I40E_VF_ATQLEN1_ATQCRIT_MASK; aq_error = true; } if (oldreg != reg) wr32(hw, hw->aq.asq.len, reg); if (aq_error) { device_printf(dev, "WARNING: Stopping VF!\n"); /* * A VF reset might not be enough to fix a problem here; * a PF reset could be required. */ sc->init_state = IAVF_RESET_REQUIRED; iavf_stop(sc); iavf_request_reset(sc); } return (aq_error ? EIO : 0); } static enum i40e_status_code iavf_process_adminq(struct iavf_sc *sc, u16 *pending) { enum i40e_status_code status = I40E_SUCCESS; struct i40e_arq_event_info event; struct i40e_hw *hw = &sc->hw; struct virtchnl_msg *v_msg; int error = 0, loop = 0; u32 reg; error = iavf_check_aq_errors(sc); if (error) return (I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR); event.buf_len = IXL_AQ_BUF_SZ; event.msg_buf = sc->aq_buffer; bzero(event.msg_buf, IXL_AQ_BUF_SZ); v_msg = (struct virtchnl_msg *)&event.desc; /* clean and process any events */ do { status = i40e_clean_arq_element(hw, &event, pending); /* * Also covers normal case when i40e_clean_arq_element() * returns "I40E_ERR_ADMIN_QUEUE_NO_WORK" */ if (status) break; iavf_vc_completion(sc, v_msg->v_opcode, v_msg->v_retval, event.msg_buf, event.msg_len); bzero(event.msg_buf, IXL_AQ_BUF_SZ); } while (*pending && (loop++ < IXL_ADM_LIMIT)); /* Re-enable admin queue interrupt cause */ reg = rd32(hw, I40E_VFINT_ICR0_ENA1); reg |= I40E_VFINT_ICR0_ENA1_ADMINQ_MASK; wr32(hw, I40E_VFINT_ICR0_ENA1, reg); return (status); } static void iavf_if_update_admin_status(if_ctx_t ctx) { struct iavf_sc *sc = iflib_get_softc(ctx); struct i40e_hw *hw = &sc->hw; u16 pending; iavf_process_adminq(sc, &pending); iavf_update_link_status(sc); /* * If there are still messages to process, reschedule. * Otherwise, re-enable the Admin Queue interrupt. */ if (pending > 0) iflib_admin_intr_deferred(ctx); else iavf_enable_adminq_irq(hw); } static u_int iavf_mc_filter_apply(void *arg, struct sockaddr_dl *sdl, u_int count __unused) { struct iavf_sc *sc = arg; int error; error = iavf_add_mac_filter(sc, (u8*)LLADDR(sdl), IAVF_FILTER_MC); return (!error); } static void iavf_if_multi_set(if_ctx_t ctx) { struct iavf_sc *sc = iflib_get_softc(ctx); IOCTL_DEBUGOUT("iavf_if_multi_set: begin"); if (__predict_false(if_llmaddr_count(iflib_get_ifp(ctx)) >= MAX_MULTICAST_ADDR)) { /* Delete MC filters and enable mulitcast promisc instead */ iavf_init_multi(sc); sc->promisc_flags |= FLAG_VF_MULTICAST_PROMISC; iavf_send_vc_msg(sc, IAVF_FLAG_AQ_CONFIGURE_PROMISC); return; } /* If there aren't too many filters, delete existing MC filters */ iavf_init_multi(sc); /* And (re-)install filters for all mcast addresses */ if (if_foreach_llmaddr(iflib_get_ifp(ctx), iavf_mc_filter_apply, sc) > 0) iavf_send_vc_msg(sc, IAVF_FLAG_AQ_ADD_MAC_FILTER); } static int iavf_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (mtu > IXL_MAX_FRAME - ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN) return (EINVAL); vsi->shared->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; return (0); } static void iavf_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) { #ifdef IXL_DEBUG struct ifnet *ifp = iflib_get_ifp(ctx); #endif struct iavf_sc *sc = iflib_get_softc(ctx); INIT_DBG_IF(ifp, "begin"); iavf_update_link_status(sc); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!sc->link_up) return; ifmr->ifm_status |= IFM_ACTIVE; /* Hardware is always full-duplex */ ifmr->ifm_active |= IFM_FDX; /* Based on the link speed reported by the PF over the AdminQ, choose a * PHY type to report. This isn't 100% correct since we don't really * know the underlying PHY type of the PF, but at least we can report * a valid link speed... */ switch (sc->link_speed) { case VIRTCHNL_LINK_SPEED_100MB: ifmr->ifm_active |= IFM_100_TX; break; case VIRTCHNL_LINK_SPEED_1GB: ifmr->ifm_active |= IFM_1000_T; break; case VIRTCHNL_LINK_SPEED_10GB: ifmr->ifm_active |= IFM_10G_SR; break; case VIRTCHNL_LINK_SPEED_20GB: case VIRTCHNL_LINK_SPEED_25GB: ifmr->ifm_active |= IFM_25G_SR; break; case VIRTCHNL_LINK_SPEED_40GB: ifmr->ifm_active |= IFM_40G_SR4; break; default: ifmr->ifm_active |= IFM_UNKNOWN; break; } INIT_DBG_IF(ifp, "end"); } static int iavf_if_media_change(if_ctx_t ctx) { struct ifmedia *ifm = iflib_get_media(ctx); INIT_DEBUGOUT("ixl_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if_printf(iflib_get_ifp(ctx), "Media change is not supported.\n"); return (ENODEV); } static int iavf_if_promisc_set(if_ctx_t ctx, int flags) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); sc->promisc_flags = 0; if (flags & IFF_ALLMULTI || if_llmaddr_count(ifp) >= MAX_MULTICAST_ADDR) sc->promisc_flags |= FLAG_VF_MULTICAST_PROMISC; if (flags & IFF_PROMISC) sc->promisc_flags |= FLAG_VF_UNICAST_PROMISC; iavf_send_vc_msg(sc, IAVF_FLAG_AQ_CONFIGURE_PROMISC); return (0); } static void iavf_if_timer(if_ctx_t ctx, uint16_t qid) { struct iavf_sc *sc = iflib_get_softc(ctx); struct i40e_hw *hw = &sc->hw; u32 val; if (qid != 0) return; /* Check for when PF triggers a VF reset */ val = rd32(hw, I40E_VFGEN_RSTAT) & I40E_VFGEN_RSTAT_VFR_STATE_MASK; if (val != VIRTCHNL_VFR_VFACTIVE && val != VIRTCHNL_VFR_COMPLETED) { iavf_dbg_info(sc, "reset in progress! (%d)\n", val); return; } /* Fire off the adminq task */ iflib_admin_intr_deferred(ctx); /* Update stats */ iavf_request_stats(sc); } static void iavf_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; struct iavf_vlan_filter *v; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; ++vsi->num_vlans; v = malloc(sizeof(struct iavf_vlan_filter), M_IAVF, M_WAITOK | M_ZERO); SLIST_INSERT_HEAD(sc->vlan_filters, v, next); v->vlan = vtag; v->flags = IAVF_FILTER_ADD; iavf_send_vc_msg(sc, IAVF_FLAG_AQ_ADD_VLAN_FILTER); } static void iavf_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; struct iavf_vlan_filter *v; int i = 0; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; SLIST_FOREACH(v, sc->vlan_filters, next) { if (v->vlan == vtag) { v->flags = IAVF_FILTER_DEL; ++i; --vsi->num_vlans; } } if (i) iavf_send_vc_msg(sc, IAVF_FLAG_AQ_DEL_VLAN_FILTER); } static uint64_t iavf_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct iavf_sc *sc = iflib_get_softc(ctx); struct ixl_vsi *vsi = &sc->vsi; if_t ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_IPACKETS: return (vsi->ipackets); case IFCOUNTER_IERRORS: return (vsi->ierrors); case IFCOUNTER_OPACKETS: return (vsi->opackets); case IFCOUNTER_OERRORS: return (vsi->oerrors); case IFCOUNTER_COLLISIONS: /* Collisions are by standard impossible in 40G/10G Ethernet */ return (0); case IFCOUNTER_IBYTES: return (vsi->ibytes); case IFCOUNTER_OBYTES: return (vsi->obytes); case IFCOUNTER_IMCASTS: return (vsi->imcasts); case IFCOUNTER_OMCASTS: return (vsi->omcasts); case IFCOUNTER_IQDROPS: return (vsi->iqdrops); case IFCOUNTER_OQDROPS: return (vsi->oqdrops); case IFCOUNTER_NOPROTO: return (vsi->noproto); default: return (if_get_counter_default(ifp, cnt)); } } /* iavf_if_needs_restart - Tell iflib when the driver needs to be reinitialized * @ctx: iflib context * @event: event code to check * * Defaults to returning true for every event. * * @returns true if iflib needs to reinit the interface */ static bool iavf_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event) { switch (event) { case IFLIB_RESTART_VLAN_CONFIG: /* This case must return true if VLAN anti-spoof checks are * enabled by the PF driver for the VF. */ default: return (true); } } static void iavf_free_pci_resources(struct iavf_sc *sc) { struct ixl_vsi *vsi = &sc->vsi; struct ixl_rx_queue *rx_que = vsi->rx_queues; device_t dev = sc->dev; /* We may get here before stations are set up */ if (rx_que == NULL) goto early; /* Release all interrupts */ iflib_irq_free(vsi->ctx, &vsi->irq); for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) iflib_irq_free(vsi->ctx, &rx_que->que_irq); early: if (sc->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->pci_mem), sc->pci_mem); } /* ** Requests a VF reset from the PF. ** ** Requires the VF's Admin Queue to be initialized. */ static int iavf_reset(struct iavf_sc *sc) { struct i40e_hw *hw = &sc->hw; device_t dev = sc->dev; int error = 0; /* Ask the PF to reset us if we are initiating */ if (sc->init_state != IAVF_RESET_PENDING) iavf_request_reset(sc); i40e_msec_pause(100); error = iavf_reset_complete(hw); if (error) { device_printf(dev, "%s: VF reset failed\n", __func__); return (error); } pci_enable_busmaster(dev); error = i40e_shutdown_adminq(hw); if (error) { device_printf(dev, "%s: shutdown_adminq failed: %d\n", __func__, error); return (error); } error = i40e_init_adminq(hw); if (error) { device_printf(dev, "%s: init_adminq failed: %d\n", __func__, error); return (error); } iavf_enable_adminq_irq(hw); return (0); } static int iavf_reset_complete(struct i40e_hw *hw) { u32 reg; /* Wait up to ~10 seconds */ for (int i = 0; i < 100; i++) { reg = rd32(hw, I40E_VFGEN_RSTAT) & I40E_VFGEN_RSTAT_VFR_STATE_MASK; if ((reg == VIRTCHNL_VFR_VFACTIVE) || (reg == VIRTCHNL_VFR_COMPLETED)) return (0); i40e_msec_pause(100); } return (EBUSY); } static void iavf_setup_interface(device_t dev, struct iavf_sc *sc) { struct ixl_vsi *vsi = &sc->vsi; if_ctx_t ctx = vsi->ctx; struct ifnet *ifp = iflib_get_ifp(ctx); INIT_DBG_DEV(dev, "begin"); vsi->shared->isc_max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; #if __FreeBSD_version >= 1100000 if_setbaudrate(ifp, IF_Gbps(40)); #else if_initbaudrate(ifp, IF_Gbps(40)); #endif ifmedia_add(vsi->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(vsi->media, IFM_ETHER | IFM_AUTO); } /* ** Get a new filter and add it to the mac filter list. */ static struct iavf_mac_filter * iavf_get_mac_filter(struct iavf_sc *sc) { struct iavf_mac_filter *f; f = malloc(sizeof(struct iavf_mac_filter), M_IAVF, M_NOWAIT | M_ZERO); if (f) SLIST_INSERT_HEAD(sc->mac_filters, f, next); return (f); } /* ** Find the filter with matching MAC address */ static struct iavf_mac_filter * iavf_find_mac_filter(struct iavf_sc *sc, u8 *macaddr) { struct iavf_mac_filter *f; bool match = FALSE; SLIST_FOREACH(f, sc->mac_filters, next) { if (ixl_ether_is_equal(f->macaddr, macaddr)) { match = TRUE; break; } } if (!match) f = NULL; return (f); } /* ** Admin Queue interrupt handler */ static int iavf_msix_adminq(void *arg) { struct iavf_sc *sc = arg; struct i40e_hw *hw = &sc->hw; u32 reg, mask; bool do_task = FALSE; ++sc->admin_irq; reg = rd32(hw, I40E_VFINT_ICR01); /* * For masking off interrupt causes that need to be handled before * they can be re-enabled */ mask = rd32(hw, I40E_VFINT_ICR0_ENA1); /* Check on the cause */ if (reg & I40E_VFINT_ICR0_ADMINQ_MASK) { mask &= ~I40E_VFINT_ICR0_ENA_ADMINQ_MASK; do_task = TRUE; } wr32(hw, I40E_VFINT_ICR0_ENA1, mask); iavf_enable_adminq_irq(hw); if (do_task) return (FILTER_SCHEDULE_THREAD); else return (FILTER_HANDLED); } void iavf_enable_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *que = vsi->rx_queues; iavf_enable_adminq_irq(hw); for (int i = 0; i < vsi->num_rx_queues; i++, que++) iavf_enable_queue_irq(hw, que->rxr.me); } void iavf_disable_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *que = vsi->rx_queues; for (int i = 0; i < vsi->num_rx_queues; i++, que++) iavf_disable_queue_irq(hw, que->rxr.me); } static void iavf_disable_adminq_irq(struct i40e_hw *hw) { wr32(hw, I40E_VFINT_DYN_CTL01, 0); wr32(hw, I40E_VFINT_ICR0_ENA1, 0); /* flush */ rd32(hw, I40E_VFGEN_RSTAT); } static void iavf_enable_adminq_irq(struct i40e_hw *hw) { wr32(hw, I40E_VFINT_DYN_CTL01, I40E_VFINT_DYN_CTL01_INTENA_MASK | I40E_VFINT_DYN_CTL01_ITR_INDX_MASK); wr32(hw, I40E_VFINT_ICR0_ENA1, I40E_VFINT_ICR0_ENA1_ADMINQ_MASK); /* flush */ rd32(hw, I40E_VFGEN_RSTAT); } static void iavf_enable_queue_irq(struct i40e_hw *hw, int id) { u32 reg; reg = I40E_VFINT_DYN_CTLN1_INTENA_MASK | I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK; wr32(hw, I40E_VFINT_DYN_CTLN1(id), reg); } static void iavf_disable_queue_irq(struct i40e_hw *hw, int id) { wr32(hw, I40E_VFINT_DYN_CTLN1(id), I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK); rd32(hw, I40E_VFGEN_RSTAT); } static void iavf_configure_tx_itr(struct iavf_sc *sc) { struct i40e_hw *hw = &sc->hw; struct ixl_vsi *vsi = &sc->vsi; struct ixl_tx_queue *que = vsi->tx_queues; vsi->tx_itr_setting = sc->tx_itr; for (int i = 0; i < vsi->num_tx_queues; i++, que++) { struct tx_ring *txr = &que->txr; wr32(hw, I40E_VFINT_ITRN1(IXL_TX_ITR, i), vsi->tx_itr_setting); txr->itr = vsi->tx_itr_setting; txr->latency = IXL_AVE_LATENCY; } } static void iavf_configure_rx_itr(struct iavf_sc *sc) { struct i40e_hw *hw = &sc->hw; struct ixl_vsi *vsi = &sc->vsi; struct ixl_rx_queue *que = vsi->rx_queues; vsi->rx_itr_setting = sc->rx_itr; for (int i = 0; i < vsi->num_rx_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; wr32(hw, I40E_VFINT_ITRN1(IXL_RX_ITR, i), vsi->rx_itr_setting); rxr->itr = vsi->rx_itr_setting; rxr->latency = IXL_AVE_LATENCY; } } /* * Get initial ITR values from tunable values. */ static void iavf_configure_itr(struct iavf_sc *sc) { iavf_configure_tx_itr(sc); iavf_configure_rx_itr(sc); } /* ** Provide a update to the queue RX ** interrupt moderation value. */ static void iavf_set_queue_rx_itr(struct ixl_rx_queue *que) { struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct rx_ring *rxr = &que->rxr; /* Idle, do nothing */ if (rxr->bytes == 0) return; /* Update the hardware if needed */ if (rxr->itr != vsi->rx_itr_setting) { rxr->itr = vsi->rx_itr_setting; wr32(hw, I40E_VFINT_ITRN1(IXL_RX_ITR, que->rxr.me), rxr->itr); } } static int iavf_msix_que(void *arg) { struct ixl_rx_queue *rx_que = arg; ++rx_que->irqs; iavf_set_queue_rx_itr(rx_que); // iavf_set_queue_tx_itr(que); return (FILTER_SCHEDULE_THREAD); } /********************************************************************* * Multicast Initialization * * This routine is called by init to reset a fresh state. * **********************************************************************/ static void iavf_init_multi(struct iavf_sc *sc) { struct iavf_mac_filter *f; int mcnt = 0; /* First clear any multicast filters */ SLIST_FOREACH(f, sc->mac_filters, next) { if ((f->flags & IAVF_FILTER_USED) && (f->flags & IAVF_FILTER_MC)) { f->flags |= IAVF_FILTER_DEL; mcnt++; } } if (mcnt > 0) iavf_send_vc_msg(sc, IAVF_FLAG_AQ_DEL_MAC_FILTER); } /* ** Note: this routine updates the OS on the link state ** the real check of the hardware only happens with ** a link interrupt. */ void iavf_update_link_status(struct iavf_sc *sc) { struct ixl_vsi *vsi = &sc->vsi; u64 baudrate; if (sc->link_up){ if (vsi->link_active == FALSE) { vsi->link_active = TRUE; baudrate = ixl_max_vc_speed_to_value(sc->link_speed); iavf_dbg_info(sc, "baudrate: %lu\n", baudrate); iflib_link_state_change(vsi->ctx, LINK_STATE_UP, baudrate); } } else { /* Link down */ if (vsi->link_active == TRUE) { vsi->link_active = FALSE; iflib_link_state_change(vsi->ctx, LINK_STATE_DOWN, 0); } } } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ static void iavf_stop(struct iavf_sc *sc) { struct ifnet *ifp; ifp = sc->vsi.ifp; iavf_disable_intr(&sc->vsi); if (atomic_load_acq_32(&sc->queues_enabled)) iavf_send_vc_msg_sleep(sc, IAVF_FLAG_AQ_DISABLE_QUEUES); } static void iavf_if_stop(if_ctx_t ctx) { struct iavf_sc *sc = iflib_get_softc(ctx); iavf_stop(sc); } static void iavf_config_rss_reg(struct iavf_sc *sc) { struct i40e_hw *hw = &sc->hw; struct ixl_vsi *vsi = &sc->vsi; u32 lut = 0; u64 set_hena = 0, hena; int i, j, que_id; u32 rss_seed[IXL_RSS_KEY_SIZE_REG]; #ifdef RSS u32 rss_hash_config; #endif /* Don't set up RSS if using a single queue */ if (vsi->num_rx_queues == 1) { wr32(hw, I40E_VFQF_HENA(0), 0); wr32(hw, I40E_VFQF_HENA(1), 0); ixl_flush(hw); return; } #ifdef RSS /* Fetch the configured RSS key */ rss_getkey((uint8_t *) &rss_seed); #else ixl_get_default_rss_key(rss_seed); #endif /* Fill out hash function seed */ for (i = 0; i < IXL_RSS_KEY_SIZE_REG; i++) wr32(hw, I40E_VFQF_HKEY(i), rss_seed[i]); /* Enable PCTYPES for RSS: */ #ifdef RSS rss_hash_config = rss_gethashconfig(); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP); #else set_hena = IXL_DEFAULT_RSS_HENA_XL710; #endif hena = (u64)rd32(hw, I40E_VFQF_HENA(0)) | ((u64)rd32(hw, I40E_VFQF_HENA(1)) << 32); hena |= set_hena; wr32(hw, I40E_VFQF_HENA(0), (u32)hena); wr32(hw, I40E_VFQF_HENA(1), (u32)(hena >> 32)); /* Populate the LUT with max no. of queues in round robin fashion */ for (i = 0, j = 0; i < IXL_RSS_VSI_LUT_SIZE; i++, j++) { if (j == vsi->num_rx_queues) j = 0; #ifdef RSS /* * Fetch the RSS bucket id for the given indirection entry. * Cap it at the number of configured buckets (which is * num_rx_queues.) */ que_id = rss_get_indirection_to_bucket(i); que_id = que_id % vsi->num_rx_queues; #else que_id = j; #endif /* lut = 4-byte sliding window of 4 lut entries */ lut = (lut << 8) | (que_id & IXL_RSS_VF_LUT_ENTRY_MASK); /* On i = 3, we have 4 entries in lut; write to the register */ if ((i & 3) == 3) { wr32(hw, I40E_VFQF_HLUT(i >> 2), lut); DDPRINTF(sc->dev, "HLUT(%2d): %#010x", i, lut); } } ixl_flush(hw); } static void iavf_config_rss_pf(struct iavf_sc *sc) { iavf_send_vc_msg(sc, IAVF_FLAG_AQ_CONFIG_RSS_KEY); iavf_send_vc_msg(sc, IAVF_FLAG_AQ_SET_RSS_HENA); iavf_send_vc_msg(sc, IAVF_FLAG_AQ_CONFIG_RSS_LUT); } /* ** iavf_config_rss - setup RSS ** ** RSS keys and table are cleared on VF reset. */ static void iavf_config_rss(struct iavf_sc *sc) { if (sc->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_REG) { iavf_dbg_info(sc, "Setting up RSS using VF registers..."); iavf_config_rss_reg(sc); } else if (sc->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) { iavf_dbg_info(sc, "Setting up RSS using messages to PF..."); iavf_config_rss_pf(sc); } else device_printf(sc->dev, "VF does not support RSS capability sent by PF.\n"); } /* ** This routine adds new MAC filters to the sc's list; ** these are later added in hardware by sending a virtual ** channel message. */ static int iavf_add_mac_filter(struct iavf_sc *sc, u8 *macaddr, u16 flags) { struct iavf_mac_filter *f; /* Does one already exist? */ f = iavf_find_mac_filter(sc, macaddr); if (f != NULL) { iavf_dbg_filter(sc, "exists: " MAC_FORMAT "\n", MAC_FORMAT_ARGS(macaddr)); return (EEXIST); } /* If not, get a new empty filter */ f = iavf_get_mac_filter(sc); if (f == NULL) { device_printf(sc->dev, "%s: no filters available!!\n", __func__); return (ENOMEM); } iavf_dbg_filter(sc, "marked: " MAC_FORMAT "\n", MAC_FORMAT_ARGS(macaddr)); bcopy(macaddr, f->macaddr, ETHER_ADDR_LEN); f->flags |= (IAVF_FILTER_ADD | IAVF_FILTER_USED); f->flags |= flags; return (0); } /* ** Marks a MAC filter for deletion. */ static int iavf_del_mac_filter(struct iavf_sc *sc, u8 *macaddr) { struct iavf_mac_filter *f; f = iavf_find_mac_filter(sc, macaddr); if (f == NULL) return (ENOENT); f->flags |= IAVF_FILTER_DEL; return (0); } /* * Re-uses the name from the PF driver. */ static void iavf_add_device_sysctls(struct iavf_sc *sc) { struct ixl_vsi *vsi = &sc->vsi; device_t dev = sc->dev; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid_list *ctx_list = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); struct sysctl_oid *debug_node; struct sysctl_oid_list *debug_list; SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, 0, iavf_sysctl_current_speed, "A", "Current Port Speed"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "tx_itr", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, iavf_sysctl_tx_itr, "I", "Immediately set TX ITR value for all queues"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "rx_itr", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, iavf_sysctl_rx_itr, "I", "Immediately set RX ITR value for all queues"); /* Add sysctls meant to print debug information, but don't list them * in "sysctl -a" output. */ debug_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL, "Debug Sysctls"); debug_list = SYSCTL_CHILDREN(debug_node); SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "shared_debug_mask", CTLFLAG_RW, &sc->hw.debug_mask, 0, "Shared code debug message level"); SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "core_debug_mask", CTLFLAG_RW, &sc->dbg_mask, 0, "Non-shared code debug message level"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "filter_list", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, 0, iavf_sysctl_sw_filter_list, "A", "SW Filter List"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "queue_interrupt_table", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, 0, iavf_sysctl_queue_interrupt_table, "A", "View MSI-X indices for TX/RX queues"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "do_vf_reset", CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, sc, 0, iavf_sysctl_vf_reset, "A", "Request a VF reset from PF"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "do_vflr_reset", CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, sc, 0, iavf_sysctl_vflr_reset, "A", "Request a VFLR reset from HW"); /* Add stats sysctls */ ixl_add_vsi_sysctls(dev, vsi, ctx, "vsi"); ixl_vsi_add_queues_stats(vsi, ctx); } static void iavf_init_filters(struct iavf_sc *sc) { sc->mac_filters = malloc(sizeof(struct mac_list), M_IAVF, M_WAITOK | M_ZERO); SLIST_INIT(sc->mac_filters); sc->vlan_filters = malloc(sizeof(struct vlan_list), M_IAVF, M_WAITOK | M_ZERO); SLIST_INIT(sc->vlan_filters); } static void iavf_free_filters(struct iavf_sc *sc) { struct iavf_mac_filter *f; struct iavf_vlan_filter *v; while (!SLIST_EMPTY(sc->mac_filters)) { f = SLIST_FIRST(sc->mac_filters); SLIST_REMOVE_HEAD(sc->mac_filters, next); free(f, M_IAVF); } free(sc->mac_filters, M_IAVF); while (!SLIST_EMPTY(sc->vlan_filters)) { v = SLIST_FIRST(sc->vlan_filters); SLIST_REMOVE_HEAD(sc->vlan_filters, next); free(v, M_IAVF); } free(sc->vlan_filters, M_IAVF); } char * iavf_vc_speed_to_string(enum virtchnl_link_speed link_speed) { int index; char *speeds[] = { "Unknown", "100 Mbps", "1 Gbps", "10 Gbps", "40 Gbps", "20 Gbps", "25 Gbps", }; switch (link_speed) { case VIRTCHNL_LINK_SPEED_100MB: index = 1; break; case VIRTCHNL_LINK_SPEED_1GB: index = 2; break; case VIRTCHNL_LINK_SPEED_10GB: index = 3; break; case VIRTCHNL_LINK_SPEED_40GB: index = 4; break; case VIRTCHNL_LINK_SPEED_20GB: index = 5; break; case VIRTCHNL_LINK_SPEED_25GB: index = 6; break; case VIRTCHNL_LINK_SPEED_UNKNOWN: default: index = 0; break; } return speeds[index]; } static int iavf_sysctl_current_speed(SYSCTL_HANDLER_ARGS) { struct iavf_sc *sc = (struct iavf_sc *)arg1; int error = 0; error = sysctl_handle_string(oidp, iavf_vc_speed_to_string(sc->link_speed), 8, req); return (error); } /* * Sanity check and save off tunable values. */ static void iavf_save_tunables(struct iavf_sc *sc) { device_t dev = sc->dev; /* Save tunable information */ sc->dbg_mask = iavf_core_debug_mask; sc->hw.debug_mask = iavf_shared_debug_mask; sc->vsi.enable_head_writeback = !!(iavf_enable_head_writeback); if (iavf_tx_itr < 0 || iavf_tx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid tx_itr value of %d set!\n", iavf_tx_itr); device_printf(dev, "tx_itr must be between %d and %d, " "inclusive\n", 0, IXL_MAX_ITR); device_printf(dev, "Using default value of %d instead\n", IXL_ITR_4K); sc->tx_itr = IXL_ITR_4K; } else sc->tx_itr = iavf_tx_itr; if (iavf_rx_itr < 0 || iavf_rx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid rx_itr value of %d set!\n", iavf_rx_itr); device_printf(dev, "rx_itr must be between %d and %d, " "inclusive\n", 0, IXL_MAX_ITR); device_printf(dev, "Using default value of %d instead\n", IXL_ITR_8K); sc->rx_itr = IXL_ITR_8K; } else sc->rx_itr = iavf_rx_itr; } /* * Used to set the Tx ITR value for all of the VF's queues. * Writes to the ITR registers immediately. */ static int iavf_sysctl_tx_itr(SYSCTL_HANDLER_ARGS) { struct iavf_sc *sc = (struct iavf_sc *)arg1; device_t dev = sc->dev; int requested_tx_itr; int error = 0; requested_tx_itr = sc->tx_itr; error = sysctl_handle_int(oidp, &requested_tx_itr, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (requested_tx_itr < 0 || requested_tx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid TX itr value; value must be between 0 and %d\n", IXL_MAX_ITR); return (EINVAL); } sc->tx_itr = requested_tx_itr; iavf_configure_tx_itr(sc); return (error); } /* * Used to set the Rx ITR value for all of the VF's queues. * Writes to the ITR registers immediately. */ static int iavf_sysctl_rx_itr(SYSCTL_HANDLER_ARGS) { struct iavf_sc *sc = (struct iavf_sc *)arg1; device_t dev = sc->dev; int requested_rx_itr; int error = 0; requested_rx_itr = sc->rx_itr; error = sysctl_handle_int(oidp, &requested_rx_itr, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (requested_rx_itr < 0 || requested_rx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid RX itr value; value must be between 0 and %d\n", IXL_MAX_ITR); return (EINVAL); } sc->rx_itr = requested_rx_itr; iavf_configure_rx_itr(sc); return (error); } static int iavf_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS) { struct iavf_sc *sc = (struct iavf_sc *)arg1; struct iavf_mac_filter *f; struct iavf_vlan_filter *v; device_t dev = sc->dev; int ftl_len, ftl_counter = 0, error = 0; struct sbuf *buf; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } sbuf_printf(buf, "\n"); /* Print MAC filters */ sbuf_printf(buf, "MAC Filters:\n"); ftl_len = 0; SLIST_FOREACH(f, sc->mac_filters, next) ftl_len++; if (ftl_len < 1) sbuf_printf(buf, "(none)\n"); else { SLIST_FOREACH(f, sc->mac_filters, next) { sbuf_printf(buf, MAC_FORMAT ", flags %#06x\n", MAC_FORMAT_ARGS(f->macaddr), f->flags); } } /* Print VLAN filters */ sbuf_printf(buf, "VLAN Filters:\n"); ftl_len = 0; SLIST_FOREACH(v, sc->vlan_filters, next) ftl_len++; if (ftl_len < 1) sbuf_printf(buf, "(none)"); else { SLIST_FOREACH(v, sc->vlan_filters, next) { sbuf_printf(buf, "%d, flags %#06x", v->vlan, v->flags); /* don't print '\n' for last entry */ if (++ftl_counter != ftl_len) sbuf_printf(buf, "\n"); } } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } /* * Print out mapping of TX queue indexes and Rx queue indexes * to MSI-X vectors. */ static int iavf_sysctl_queue_interrupt_table(SYSCTL_HANDLER_ARGS) { struct iavf_sc *sc = (struct iavf_sc *)arg1; struct ixl_vsi *vsi = &sc->vsi; device_t dev = sc->dev; struct sbuf *buf; int error = 0; struct ixl_rx_queue *rx_que = vsi->rx_queues; struct ixl_tx_queue *tx_que = vsi->tx_queues; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } sbuf_cat(buf, "\n"); for (int i = 0; i < vsi->num_rx_queues; i++) { rx_que = &vsi->rx_queues[i]; sbuf_printf(buf, "(rxq %3d): %d\n", i, rx_que->msix); } for (int i = 0; i < vsi->num_tx_queues; i++) { tx_que = &vsi->tx_queues[i]; sbuf_printf(buf, "(txq %3d): %d\n", i, tx_que->msix); } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } #define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING)) static int iavf_sysctl_vf_reset(SYSCTL_HANDLER_ARGS) { struct iavf_sc *sc = (struct iavf_sc *)arg1; int do_reset = 0, error = 0; error = sysctl_handle_int(oidp, &do_reset, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (do_reset == 1) { iavf_reset(sc); if (CTX_ACTIVE(sc->vsi.ctx)) iflib_request_reset(sc->vsi.ctx); } return (error); } static int iavf_sysctl_vflr_reset(SYSCTL_HANDLER_ARGS) { struct iavf_sc *sc = (struct iavf_sc *)arg1; device_t dev = sc->dev; int do_reset = 0, error = 0; error = sysctl_handle_int(oidp, &do_reset, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (do_reset == 1) { if (!pcie_flr(dev, max(pcie_get_max_completion_timeout(dev) / 1000, 10), true)) { device_printf(dev, "PCIE FLR failed\n"); error = EIO; } else if (CTX_ACTIVE(sc->vsi.ctx)) iflib_request_reset(sc->vsi.ctx); } return (error); } #undef CTX_ACTIVE diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c index a79648de274f..c700af889cf1 100644 --- a/sys/dev/ixl/if_ixl.c +++ b/sys/dev/ixl/if_ixl.c @@ -1,1898 +1,1896 @@ /****************************************************************************** Copyright (c) 2013-2018, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "ixl.h" #include "ixl_pf.h" #ifdef IXL_IW #include "ixl_iw.h" #include "ixl_iw_int.h" #endif #ifdef PCI_IOV #include "ixl_pf_iov.h" #endif /********************************************************************* * Driver version *********************************************************************/ #define IXL_DRIVER_VERSION_MAJOR 2 #define IXL_DRIVER_VERSION_MINOR 3 #define IXL_DRIVER_VERSION_BUILD 0 #define IXL_DRIVER_VERSION_STRING \ __XSTRING(IXL_DRIVER_VERSION_MAJOR) "." \ __XSTRING(IXL_DRIVER_VERSION_MINOR) "." \ __XSTRING(IXL_DRIVER_VERSION_BUILD) "-k" /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * * ( Vendor ID, Device ID, Branding String ) *********************************************************************/ static pci_vendor_info_t ixl_vendor_info_array[] = { PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_XL710, "Intel(R) Ethernet Controller X710 for 10GbE SFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_B, "Intel(R) Ethernet Controller XL710 for 40GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_C, "Intel(R) Ethernet Controller X710 for 10GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_A, "Intel(R) Ethernet Controller XL710 for 40GbE QSFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_B, "Intel(R) Ethernet Controller XL710 for 40GbE QSFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_C, "Intel(R) Ethernet Controller X710 for 10GbE QSFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T, "Intel(R) Ethernet Controller X710 for 10GBASE-T"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T4, "Intel(R) Ethernet Controller X710/X557-AT 10GBASE-T"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_KX_X722, "Intel(R) Ethernet Connection X722 for 10GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_X722, "Intel(R) Ethernet Connection X722 for 10GbE QSFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_X722, "Intel(R) Ethernet Connection X722 for 10GbE SFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_1G_BASE_T_X722, "Intel(R) Ethernet Connection X722 for 1GbE"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T_X722, "Intel(R) Ethernet Connection X722 for 10GBASE-T"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_I_X722, "Intel(R) Ethernet Connection X722 for 10GbE SFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_25G_B, "Intel(R) Ethernet Controller XXV710 for 25GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_25G_SFP28, "Intel(R) Ethernet Controller XXV710 for 25GbE SFP28"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T_BC, "Intel(R) Ethernet Controller X710 for 10GBASE-T"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_SFP, "Intel(R) Ethernet Controller X710 for 10GbE SFP+"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_B, "Intel(R) Ethernet Controller X710 for 10GbE backplane"), PVIDV(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_5G_BASE_T_BC, "Intel(R) Ethernet Controller V710 for 5GBASE-T"), /* required last entry */ PVID_END }; /********************************************************************* * Function prototypes *********************************************************************/ /*** IFLIB interface ***/ static void *ixl_register(device_t dev); static int ixl_if_attach_pre(if_ctx_t ctx); static int ixl_if_attach_post(if_ctx_t ctx); static int ixl_if_detach(if_ctx_t ctx); static int ixl_if_shutdown(if_ctx_t ctx); static int ixl_if_suspend(if_ctx_t ctx); static int ixl_if_resume(if_ctx_t ctx); static int ixl_if_msix_intr_assign(if_ctx_t ctx, int msix); static void ixl_if_enable_intr(if_ctx_t ctx); static void ixl_if_disable_intr(if_ctx_t ctx); static int ixl_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int ixl_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static int ixl_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int ixl_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); static void ixl_if_queues_free(if_ctx_t ctx); static void ixl_if_update_admin_status(if_ctx_t ctx); static void ixl_if_multi_set(if_ctx_t ctx); static int ixl_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void ixl_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr); static int ixl_if_media_change(if_ctx_t ctx); static int ixl_if_promisc_set(if_ctx_t ctx, int flags); static void ixl_if_timer(if_ctx_t ctx, uint16_t qid); static void ixl_if_vlan_register(if_ctx_t ctx, u16 vtag); static void ixl_if_vlan_unregister(if_ctx_t ctx, u16 vtag); static uint64_t ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt); static int ixl_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req); static int ixl_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data); static bool ixl_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event); #ifdef PCI_IOV static void ixl_if_vflr_handle(if_ctx_t ctx); #endif /*** Other ***/ static void ixl_save_pf_tunables(struct ixl_pf *); static int ixl_allocate_pci_resources(struct ixl_pf *); static void ixl_setup_ssctx(struct ixl_pf *pf); static void ixl_admin_timer(void *arg); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t ixl_methods[] = { /* Device interface */ DEVMETHOD(device_register, ixl_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), #ifdef PCI_IOV DEVMETHOD(pci_iov_init, iflib_device_iov_init), DEVMETHOD(pci_iov_uninit, iflib_device_iov_uninit), DEVMETHOD(pci_iov_add_vf, iflib_device_iov_add_vf), #endif DEVMETHOD_END }; static driver_t ixl_driver = { "ixl", ixl_methods, sizeof(struct ixl_pf), }; devclass_t ixl_devclass; DRIVER_MODULE(ixl, pci, ixl_driver, ixl_devclass, 0, 0); IFLIB_PNP_INFO(pci, ixl, ixl_vendor_info_array); MODULE_VERSION(ixl, 3); MODULE_DEPEND(ixl, pci, 1, 1, 1); MODULE_DEPEND(ixl, ether, 1, 1, 1); MODULE_DEPEND(ixl, iflib, 1, 1, 1); static device_method_t ixl_if_methods[] = { DEVMETHOD(ifdi_attach_pre, ixl_if_attach_pre), DEVMETHOD(ifdi_attach_post, ixl_if_attach_post), DEVMETHOD(ifdi_detach, ixl_if_detach), DEVMETHOD(ifdi_shutdown, ixl_if_shutdown), DEVMETHOD(ifdi_suspend, ixl_if_suspend), DEVMETHOD(ifdi_resume, ixl_if_resume), DEVMETHOD(ifdi_init, ixl_if_init), DEVMETHOD(ifdi_stop, ixl_if_stop), DEVMETHOD(ifdi_msix_intr_assign, ixl_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, ixl_if_enable_intr), DEVMETHOD(ifdi_intr_disable, ixl_if_disable_intr), DEVMETHOD(ifdi_rx_queue_intr_enable, ixl_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, ixl_if_tx_queue_intr_enable), DEVMETHOD(ifdi_tx_queues_alloc, ixl_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, ixl_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, ixl_if_queues_free), DEVMETHOD(ifdi_update_admin_status, ixl_if_update_admin_status), DEVMETHOD(ifdi_multi_set, ixl_if_multi_set), DEVMETHOD(ifdi_mtu_set, ixl_if_mtu_set), DEVMETHOD(ifdi_media_status, ixl_if_media_status), DEVMETHOD(ifdi_media_change, ixl_if_media_change), DEVMETHOD(ifdi_promisc_set, ixl_if_promisc_set), DEVMETHOD(ifdi_timer, ixl_if_timer), DEVMETHOD(ifdi_vlan_register, ixl_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, ixl_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, ixl_if_get_counter), DEVMETHOD(ifdi_i2c_req, ixl_if_i2c_req), DEVMETHOD(ifdi_priv_ioctl, ixl_if_priv_ioctl), DEVMETHOD(ifdi_needs_restart, ixl_if_needs_restart), #ifdef PCI_IOV DEVMETHOD(ifdi_iov_init, ixl_if_iov_init), DEVMETHOD(ifdi_iov_uninit, ixl_if_iov_uninit), DEVMETHOD(ifdi_iov_vf_add, ixl_if_iov_vf_add), DEVMETHOD(ifdi_vflr_handle, ixl_if_vflr_handle), #endif // ifdi_led_func // ifdi_debug DEVMETHOD_END }; static driver_t ixl_if_driver = { "ixl_if", ixl_if_methods, sizeof(struct ixl_pf) }; /* ** TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, ixl, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "ixl driver parameters"); #ifdef IXL_DEBUG_FC /* * Leave this on unless you need to send flow control * frames (or other control frames) from software */ static int ixl_enable_tx_fc_filter = 1; TUNABLE_INT("hw.ixl.enable_tx_fc_filter", &ixl_enable_tx_fc_filter); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_tx_fc_filter, CTLFLAG_RDTUN, &ixl_enable_tx_fc_filter, 0, "Filter out packets with Ethertype 0x8808 from being sent out by non-HW sources"); #endif #ifdef IXL_DEBUG static int ixl_debug_recovery_mode = 0; TUNABLE_INT("hw.ixl.debug_recovery_mode", &ixl_debug_recovery_mode); SYSCTL_INT(_hw_ixl, OID_AUTO, debug_recovery_mode, CTLFLAG_RDTUN, &ixl_debug_recovery_mode, 0, "Act like when FW entered recovery mode (for debuging)"); #endif static int ixl_i2c_access_method = 0; TUNABLE_INT("hw.ixl.i2c_access_method", &ixl_i2c_access_method); SYSCTL_INT(_hw_ixl, OID_AUTO, i2c_access_method, CTLFLAG_RDTUN, &ixl_i2c_access_method, 0, IXL_SYSCTL_HELP_I2C_METHOD); static int ixl_enable_vf_loopback = 1; TUNABLE_INT("hw.ixl.enable_vf_loopback", &ixl_enable_vf_loopback); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_vf_loopback, CTLFLAG_RDTUN, &ixl_enable_vf_loopback, 0, IXL_SYSCTL_HELP_VF_LOOPBACK); /* * Different method for processing TX descriptor * completion. */ static int ixl_enable_head_writeback = 1; TUNABLE_INT("hw.ixl.enable_head_writeback", &ixl_enable_head_writeback); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_head_writeback, CTLFLAG_RDTUN, &ixl_enable_head_writeback, 0, "For detecting last completed TX descriptor by hardware, use value written by HW instead of checking descriptors"); static int ixl_core_debug_mask = 0; TUNABLE_INT("hw.ixl.core_debug_mask", &ixl_core_debug_mask); SYSCTL_INT(_hw_ixl, OID_AUTO, core_debug_mask, CTLFLAG_RDTUN, &ixl_core_debug_mask, 0, "Display debug statements that are printed in non-shared code"); static int ixl_shared_debug_mask = 0; TUNABLE_INT("hw.ixl.shared_debug_mask", &ixl_shared_debug_mask); SYSCTL_INT(_hw_ixl, OID_AUTO, shared_debug_mask, CTLFLAG_RDTUN, &ixl_shared_debug_mask, 0, "Display debug statements that are printed in shared code"); #if 0 /* ** Controls for Interrupt Throttling ** - true/false for dynamic adjustment ** - default values for static ITR */ static int ixl_dynamic_rx_itr = 0; TUNABLE_INT("hw.ixl.dynamic_rx_itr", &ixl_dynamic_rx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, dynamic_rx_itr, CTLFLAG_RDTUN, &ixl_dynamic_rx_itr, 0, "Dynamic RX Interrupt Rate"); static int ixl_dynamic_tx_itr = 0; TUNABLE_INT("hw.ixl.dynamic_tx_itr", &ixl_dynamic_tx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, dynamic_tx_itr, CTLFLAG_RDTUN, &ixl_dynamic_tx_itr, 0, "Dynamic TX Interrupt Rate"); #endif static int ixl_rx_itr = IXL_ITR_8K; TUNABLE_INT("hw.ixl.rx_itr", &ixl_rx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, rx_itr, CTLFLAG_RDTUN, &ixl_rx_itr, 0, "RX Interrupt Rate"); static int ixl_tx_itr = IXL_ITR_4K; TUNABLE_INT("hw.ixl.tx_itr", &ixl_tx_itr); SYSCTL_INT(_hw_ixl, OID_AUTO, tx_itr, CTLFLAG_RDTUN, &ixl_tx_itr, 0, "TX Interrupt Rate"); #ifdef IXL_IW int ixl_enable_iwarp = 0; TUNABLE_INT("hw.ixl.enable_iwarp", &ixl_enable_iwarp); SYSCTL_INT(_hw_ixl, OID_AUTO, enable_iwarp, CTLFLAG_RDTUN, &ixl_enable_iwarp, 0, "iWARP enabled"); #if __FreeBSD_version < 1100000 int ixl_limit_iwarp_msix = 1; #else int ixl_limit_iwarp_msix = IXL_IW_MAX_MSIX; #endif TUNABLE_INT("hw.ixl.limit_iwarp_msix", &ixl_limit_iwarp_msix); SYSCTL_INT(_hw_ixl, OID_AUTO, limit_iwarp_msix, CTLFLAG_RDTUN, &ixl_limit_iwarp_msix, 0, "Limit MSI-X vectors assigned to iWARP"); #endif extern struct if_txrx ixl_txrx_hwb; extern struct if_txrx ixl_txrx_dwb; static struct if_shared_ctx ixl_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = IXL_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_tso_maxsize = IXL_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_rx_maxsize = 16384, .isc_rx_nsegments = IXL_MAX_RX_SEGS, .isc_rx_maxsegsize = IXL_MAX_DMA_SEG_SIZE, .isc_nfl = 1, .isc_ntxqs = 1, .isc_nrxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = ixl_vendor_info_array, .isc_driver_version = IXL_DRIVER_VERSION_STRING, .isc_driver = &ixl_if_driver, .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_NEED_ZERO_CSUM | IFLIB_TSO_INIT_IP | IFLIB_ADMIN_ALWAYS_RUN, .isc_nrxd_min = {IXL_MIN_RING}, .isc_ntxd_min = {IXL_MIN_RING}, .isc_nrxd_max = {IXL_MAX_RING}, .isc_ntxd_max = {IXL_MAX_RING}, .isc_nrxd_default = {IXL_DEFAULT_RING}, .isc_ntxd_default = {IXL_DEFAULT_RING}, }; -if_shared_ctx_t ixl_sctx = &ixl_sctx_init; - /*** Functions ***/ static void * ixl_register(device_t dev) { - return (ixl_sctx); + return (&ixl_sctx_init); } static int ixl_allocate_pci_resources(struct ixl_pf *pf) { device_t dev = iflib_get_dev(pf->vsi.ctx); struct i40e_hw *hw = &pf->hw; int rid; /* Map BAR0 */ rid = PCIR_BAR(0); pf->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(pf->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: PCI memory\n"); return (ENXIO); } /* Save off the PCI information */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); hw->bus.device = pci_get_slot(dev); hw->bus.func = pci_get_function(dev); /* Save off register access information */ pf->osdep.mem_bus_space_tag = rman_get_bustag(pf->pci_mem); pf->osdep.mem_bus_space_handle = rman_get_bushandle(pf->pci_mem); pf->osdep.mem_bus_space_size = rman_get_size(pf->pci_mem); pf->osdep.flush_reg = I40E_GLGEN_STAT; pf->osdep.dev = dev; pf->hw.hw_addr = (u8 *) &pf->osdep.mem_bus_space_handle; pf->hw.back = &pf->osdep; return (0); } static void ixl_setup_ssctx(struct ixl_pf *pf) { if_softc_ctx_t scctx = pf->vsi.shared; struct i40e_hw *hw = &pf->hw; if (IXL_PF_IN_RECOVERY_MODE(pf)) { scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 1; scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1; } else if (hw->mac.type == I40E_MAC_X722) scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 128; else scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 64; if (pf->vsi.enable_head_writeback) { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(struct i40e_tx_desc) + sizeof(u32), DBA_ALIGN); scctx->isc_txrx = &ixl_txrx_hwb; } else { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(struct i40e_tx_desc), DBA_ALIGN); scctx->isc_txrx = &ixl_txrx_dwb; } scctx->isc_txrx->ift_legacy_intr = ixl_intr; scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union i40e_32byte_rx_desc), DBA_ALIGN); scctx->isc_msix_bar = PCIR_BAR(IXL_MSIX_BAR); scctx->isc_tx_nsegments = IXL_MAX_TX_SEGS; scctx->isc_tx_tso_segments_max = IXL_MAX_TSO_SEGS; scctx->isc_tx_tso_size_max = IXL_TSO_SIZE; scctx->isc_tx_tso_segsize_max = IXL_MAX_DMA_SEG_SIZE; scctx->isc_rss_table_size = pf->hw.func_caps.rss_table_size; scctx->isc_tx_csum_flags = CSUM_OFFLOAD; scctx->isc_capabilities = scctx->isc_capenable = IXL_CAPS; } static void ixl_admin_timer(void *arg) { struct ixl_pf *pf = (struct ixl_pf *)arg; /* Fire off the admin task */ iflib_admin_intr_deferred(pf->vsi.ctx); /* Reschedule the admin timer */ callout_schedule(&pf->admin_timer, hz/2); } static int ixl_attach_pre_recovery_mode(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); i40e_get_mac_addr(hw, hw->mac.addr); if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); } ixl_setup_ssctx(pf); return (0); } static int ixl_if_attach_pre(if_ctx_t ctx) { device_t dev; struct ixl_pf *pf; struct i40e_hw *hw; struct ixl_vsi *vsi; enum i40e_get_fw_lldp_status_resp lldp_status; struct i40e_filter_control_settings filter; enum i40e_status_code status; int error = 0; dev = iflib_get_dev(ctx); pf = iflib_get_softc(ctx); INIT_DBG_DEV(dev, "begin"); vsi = &pf->vsi; vsi->back = pf; pf->dev = dev; hw = &pf->hw; vsi->dev = dev; vsi->hw = &pf->hw; vsi->id = 0; vsi->num_vlans = 0; vsi->ctx = ctx; vsi->media = iflib_get_media(ctx); vsi->shared = iflib_get_softc_ctx(ctx); snprintf(pf->admin_mtx_name, sizeof(pf->admin_mtx_name), "%s:admin", device_get_nameunit(dev)); mtx_init(&pf->admin_mtx, pf->admin_mtx_name, NULL, MTX_DEF); callout_init_mtx(&pf->admin_timer, &pf->admin_mtx, 0); /* Save tunable values */ ixl_save_pf_tunables(pf); /* Do PCI setup - map BAR0, etc */ if (ixl_allocate_pci_resources(pf)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci_res; } /* Establish a clean starting point */ i40e_clear_hw(hw); i40e_set_mac_type(hw); error = ixl_pf_reset(pf); if (error) goto err_out; /* Initialize the shared code */ status = i40e_init_shared_code(hw); if (status) { device_printf(dev, "Unable to initialize shared code, error %s\n", i40e_stat_str(hw, status)); error = EIO; goto err_out; } /* Set up the admin queue */ hw->aq.num_arq_entries = IXL_AQ_LEN; hw->aq.num_asq_entries = IXL_AQ_LEN; hw->aq.arq_buf_size = IXL_AQ_BUF_SZ; hw->aq.asq_buf_size = IXL_AQ_BUF_SZ; status = i40e_init_adminq(hw); if (status != 0 && status != I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "Unable to initialize Admin Queue, error %s\n", i40e_stat_str(hw, status)); error = EIO; goto err_out; } ixl_print_nvm_version(pf); if (status == I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "The driver for the device stopped " "because the NVM image is newer than expected.\n"); device_printf(dev, "You must install the most recent version of " "the network driver.\n"); error = EIO; goto err_out; } if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw)) { device_printf(dev, "The driver for the device detected " "a newer version of the NVM image than expected.\n"); device_printf(dev, "Please install the most recent version " "of the network driver.\n"); } else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4) { device_printf(dev, "The driver for the device detected " "an older version of the NVM image than expected.\n"); device_printf(dev, "Please update the NVM image.\n"); } if (IXL_PF_IN_RECOVERY_MODE(pf)) { error = ixl_attach_pre_recovery_mode(pf); if (error) goto err_out; return (error); } /* Clear PXE mode */ i40e_clear_pxe_mode(hw); /* Get capabilities from the device */ error = ixl_get_hw_capabilities(pf); if (error) { device_printf(dev, "get_hw_capabilities failed: %d\n", error); goto err_get_cap; } /* Set up host memory cache */ error = ixl_setup_hmc(pf); if (error) goto err_mac_hmc; /* Disable LLDP from the firmware for certain NVM versions */ if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) || (pf->hw.aq.fw_maj_ver < 4)) { i40e_aq_stop_lldp(hw, true, false, NULL); pf->state |= IXL_PF_STATE_FW_LLDP_DISABLED; } /* Try enabling Energy Efficient Ethernet (EEE) mode */ if (i40e_enable_eee(hw, true) == I40E_SUCCESS) atomic_set_32(&pf->state, IXL_PF_STATE_EEE_ENABLED); else atomic_clear_32(&pf->state, IXL_PF_STATE_EEE_ENABLED); /* Get MAC addresses from hardware */ i40e_get_mac_addr(hw, hw->mac.addr); error = i40e_validate_mac_addr(hw->mac.addr); if (error) { device_printf(dev, "validate_mac_addr failed: %d\n", error); goto err_mac_hmc; } bcopy(hw->mac.addr, hw->mac.perm_addr, ETHER_ADDR_LEN); iflib_set_mac(ctx, hw->mac.addr); i40e_get_port_mac_addr(hw, hw->mac.port_addr); /* Set up the device filtering */ bzero(&filter, sizeof(filter)); filter.enable_ethtype = TRUE; filter.enable_macvlan = TRUE; filter.enable_fdir = FALSE; filter.hash_lut_size = I40E_HASH_LUT_SIZE_512; if (i40e_set_filter_control(hw, &filter)) device_printf(dev, "i40e_set_filter_control() failed\n"); /* Query device FW LLDP status */ if (i40e_get_fw_lldp_status(hw, &lldp_status) == I40E_SUCCESS) { if (lldp_status == I40E_GET_FW_LLDP_STATUS_DISABLED) { atomic_set_32(&pf->state, IXL_PF_STATE_FW_LLDP_DISABLED); } else { atomic_clear_32(&pf->state, IXL_PF_STATE_FW_LLDP_DISABLED); } } /* Tell FW to apply DCB config on link up */ i40e_aq_set_dcb_parameters(hw, true, NULL); /* Fill out iflib parameters */ ixl_setup_ssctx(pf); INIT_DBG_DEV(dev, "end"); return (0); err_mac_hmc: ixl_shutdown_hmc(pf); err_get_cap: i40e_shutdown_adminq(hw); err_out: ixl_free_pci_resources(pf); err_pci_res: mtx_lock(&pf->admin_mtx); callout_stop(&pf->admin_timer); mtx_unlock(&pf->admin_mtx); mtx_destroy(&pf->admin_mtx); return (error); } static int ixl_if_attach_post(if_ctx_t ctx) { device_t dev; struct ixl_pf *pf; struct i40e_hw *hw; struct ixl_vsi *vsi; int error = 0; enum i40e_status_code status; dev = iflib_get_dev(ctx); pf = iflib_get_softc(ctx); INIT_DBG_DEV(dev, "begin"); vsi = &pf->vsi; vsi->ifp = iflib_get_ifp(ctx); hw = &pf->hw; /* Save off determined number of queues for interface */ vsi->num_rx_queues = vsi->shared->isc_nrxqsets; vsi->num_tx_queues = vsi->shared->isc_ntxqsets; /* Setup OS network interface / ifnet */ if (ixl_setup_interface(dev, pf)) { device_printf(dev, "interface setup failed!\n"); error = EIO; goto err; } if (IXL_PF_IN_RECOVERY_MODE(pf)) { /* Keep admin queue interrupts active while driver is loaded */ if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); } ixl_add_sysctls_recovery_mode(pf); /* Start the admin timer */ mtx_lock(&pf->admin_mtx); callout_reset(&pf->admin_timer, hz/2, ixl_admin_timer, pf); mtx_unlock(&pf->admin_mtx); return (0); } /* Determine link state */ if (ixl_attach_get_link_status(pf)) { error = EINVAL; goto err; } error = ixl_switch_config(pf); if (error) { device_printf(dev, "Initial ixl_switch_config() failed: %d\n", error); goto err; } /* Add protocol filters to list */ ixl_init_filters(vsi); /* Init queue allocation manager */ error = ixl_pf_qmgr_init(&pf->qmgr, hw->func_caps.num_tx_qp); if (error) { device_printf(dev, "Failed to init queue manager for PF queues, error %d\n", error); goto err; } /* reserve a contiguous allocation for the PF's VSI */ error = ixl_pf_qmgr_alloc_contiguous(&pf->qmgr, max(vsi->num_rx_queues, vsi->num_tx_queues), &pf->qtag); if (error) { device_printf(dev, "Failed to reserve queues for PF LAN VSI, error %d\n", error); goto err; } device_printf(dev, "Allocating %d queues for PF LAN VSI; %d queues active\n", pf->qtag.num_allocated, pf->qtag.num_active); /* Limit PHY interrupts to link, autoneg, and modules failure */ status = i40e_aq_set_phy_int_mask(hw, IXL_DEFAULT_PHY_INT_MASK, NULL); if (status) { device_printf(dev, "i40e_aq_set_phy_mask() failed: err %s," " aq_err %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); goto err; } /* Get the bus configuration and set the shared code */ ixl_get_bus_info(pf); /* Keep admin queue interrupts active while driver is loaded */ if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); } /* Set initial advertised speed sysctl value */ ixl_set_initial_advertised_speeds(pf); /* Initialize statistics & add sysctls */ ixl_add_device_sysctls(pf); ixl_pf_reset_stats(pf); ixl_update_stats_counters(pf); ixl_add_hw_stats(pf); /* * Driver may have been reloaded. Ensure that the link state * is consistent with current settings. */ ixl_set_link(pf, (pf->state & IXL_PF_STATE_LINK_ACTIVE_ON_DOWN) != 0); hw->phy.get_link_info = true; i40e_get_link_status(hw, &pf->link_up); ixl_update_link_status(pf); #ifdef PCI_IOV ixl_initialize_sriov(pf); #endif #ifdef IXL_IW if (hw->func_caps.iwarp && ixl_enable_iwarp) { pf->iw_enabled = (pf->iw_msix > 0) ? true : false; if (pf->iw_enabled) { error = ixl_iw_pf_attach(pf); if (error) { device_printf(dev, "interfacing to iWARP driver failed: %d\n", error); goto err; } else device_printf(dev, "iWARP ready\n"); } else device_printf(dev, "iWARP disabled on this device " "(no MSI-X vectors)\n"); } else { pf->iw_enabled = false; device_printf(dev, "The device is not iWARP enabled\n"); } #endif /* Start the admin timer */ mtx_lock(&pf->admin_mtx); callout_reset(&pf->admin_timer, hz/2, ixl_admin_timer, pf); mtx_unlock(&pf->admin_mtx); INIT_DBG_DEV(dev, "end"); return (0); err: INIT_DEBUGOUT("end: error %d", error); /* ixl_if_detach() is called on error from this */ return (error); } /** * XXX: iflib always ignores the return value of detach() * -> This means that this isn't allowed to fail */ static int ixl_if_detach(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; enum i40e_status_code status; #ifdef IXL_IW int error; #endif INIT_DBG_DEV(dev, "begin"); /* Stop the admin timer */ mtx_lock(&pf->admin_mtx); callout_stop(&pf->admin_timer); mtx_unlock(&pf->admin_mtx); mtx_destroy(&pf->admin_mtx); #ifdef IXL_IW if (ixl_enable_iwarp && pf->iw_enabled) { error = ixl_iw_pf_detach(pf); if (error == EBUSY) { device_printf(dev, "iwarp in use; stop it first.\n"); //return (error); } } #endif /* Remove all previously allocated media types */ ifmedia_removeall(vsi->media); /* Shutdown LAN HMC */ ixl_shutdown_hmc(pf); /* Shutdown admin queue */ ixl_disable_intr0(hw); status = i40e_shutdown_adminq(hw); if (status) device_printf(dev, "i40e_shutdown_adminq() failed with status %s\n", i40e_stat_str(hw, status)); ixl_pf_qmgr_destroy(&pf->qmgr); ixl_free_pci_resources(pf); ixl_free_filters(&vsi->ftl); INIT_DBG_DEV(dev, "end"); return (0); } static int ixl_if_shutdown(if_ctx_t ctx) { int error = 0; INIT_DEBUGOUT("ixl_if_shutdown: begin"); /* TODO: Call ixl_if_stop()? */ /* TODO: Then setup low power mode */ return (error); } static int ixl_if_suspend(if_ctx_t ctx) { int error = 0; INIT_DEBUGOUT("ixl_if_suspend: begin"); /* TODO: Call ixl_if_stop()? */ /* TODO: Then setup low power mode */ return (error); } static int ixl_if_resume(if_ctx_t ctx) { struct ifnet *ifp = iflib_get_ifp(ctx); INIT_DEBUGOUT("ixl_if_resume: begin"); /* Read & clear wake-up registers */ /* Required after D3->D0 transition */ if (ifp->if_flags & IFF_UP) ixl_if_init(ctx); return (0); } void ixl_if_init(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; struct ifnet *ifp = iflib_get_ifp(ctx); device_t dev = iflib_get_dev(ctx); u8 tmpaddr[ETHER_ADDR_LEN]; int ret; if (IXL_PF_IN_RECOVERY_MODE(pf)) return; /* * If the aq is dead here, it probably means something outside of the driver * did something to the adapter, like a PF reset. * So, rebuild the driver's state here if that occurs. */ if (!i40e_check_asq_alive(&pf->hw)) { device_printf(dev, "Admin Queue is down; resetting...\n"); ixl_teardown_hw_structs(pf); ixl_rebuild_hw_structs_after_reset(pf, false); } /* Get the latest mac address... User might use a LAA */ bcopy(IF_LLADDR(vsi->ifp), tmpaddr, ETH_ALEN); if (!ixl_ether_is_equal(hw->mac.addr, tmpaddr) && (i40e_validate_mac_addr(tmpaddr) == I40E_SUCCESS)) { ixl_del_all_vlan_filters(vsi, hw->mac.addr); bcopy(tmpaddr, hw->mac.addr, ETH_ALEN); ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_ONLY, hw->mac.addr, NULL); if (ret) { device_printf(dev, "LLA address change failed!!\n"); return; } /* * New filters are configured by ixl_reconfigure_filters * at the end of ixl_init_locked. */ } iflib_set_mac(ctx, hw->mac.addr); /* Prepare the VSI: rings, hmc contexts, etc... */ if (ixl_initialize_vsi(vsi)) { device_printf(dev, "initialize vsi failed!!\n"); return; } ixl_set_link(pf, true); /* Reconfigure multicast filters in HW */ ixl_if_multi_set(ctx); /* Set up RSS */ ixl_config_rss(pf); /* Set up MSI-X routing and the ITR settings */ if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { ixl_configure_queue_intr_msix(pf); ixl_configure_itr(pf); } else ixl_configure_legacy(pf); if (vsi->enable_head_writeback) ixl_init_tx_cidx(vsi); else ixl_init_tx_rsqs(vsi); ixl_enable_rings(vsi); i40e_aq_set_default_vsi(hw, vsi->seid, NULL); /* Re-add configure filters to HW */ ixl_reconfigure_filters(vsi); /* Configure promiscuous mode */ ixl_if_promisc_set(ctx, if_getflags(ifp)); #ifdef IXL_IW if (ixl_enable_iwarp && pf->iw_enabled) { ret = ixl_iw_pf_init(pf); if (ret) device_printf(dev, "initialize iwarp failed, code %d\n", ret); } #endif } void ixl_if_stop(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); struct ixl_vsi *vsi = &pf->vsi; INIT_DEBUGOUT("ixl_if_stop: begin\n"); if (IXL_PF_IN_RECOVERY_MODE(pf)) return; // TODO: This may need to be reworked #ifdef IXL_IW /* Stop iWARP device */ if (ixl_enable_iwarp && pf->iw_enabled) ixl_iw_pf_stop(pf); #endif ixl_disable_rings_intr(vsi); ixl_disable_rings(pf, vsi, &pf->qtag); /* * Don't set link state if only reconfiguring * e.g. on MTU change. */ if ((if_getflags(ifp) & IFF_UP) == 0 && (atomic_load_acq_32(&pf->state) & IXL_PF_STATE_LINK_ACTIVE_ON_DOWN) == 0) ixl_set_link(pf, false); } static int ixl_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct ixl_rx_queue *rx_que = vsi->rx_queues; struct ixl_tx_queue *tx_que = vsi->tx_queues; int err, i, rid, vector = 0; char buf[16]; MPASS(vsi->shared->isc_nrxqsets > 0); MPASS(vsi->shared->isc_ntxqsets > 0); /* Admin Que must use vector 0*/ rid = vector + 1; err = iflib_irq_alloc_generic(ctx, &vsi->irq, rid, IFLIB_INTR_ADMIN, ixl_msix_adminq, pf, 0, "aq"); if (err) { iflib_irq_free(ctx, &vsi->irq); device_printf(iflib_get_dev(ctx), "Failed to register Admin Que handler"); return (err); } /* Create soft IRQ for handling VFLRs */ iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_IOV, pf, 0, "iov"); /* Now set up the stations */ for (i = 0, vector = 1; i < vsi->shared->isc_nrxqsets; i++, vector++, rx_que++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); err = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, ixl_msix_que, rx_que, rx_que->rxr.me, buf); /* XXX: Does the driver work as expected if there are fewer num_rx_queues than * what's expected in the iflib context? */ if (err) { device_printf(iflib_get_dev(ctx), "Failed to allocate queue RX int vector %d, err: %d\n", i, err); vsi->num_rx_queues = i + 1; goto fail; } rx_que->msix = vector; } bzero(buf, sizeof(buf)); for (i = 0; i < vsi->shared->isc_ntxqsets; i++, tx_que++) { snprintf(buf, sizeof(buf), "txq%d", i); iflib_softirq_alloc_generic(ctx, &vsi->rx_queues[i % vsi->shared->isc_nrxqsets].que_irq, IFLIB_INTR_TX, tx_que, tx_que->txr.me, buf); /* TODO: Maybe call a strategy function for this to figure out which * interrupts to map Tx queues to. I don't know if there's an immediately * better way than this other than a user-supplied map, though. */ tx_que->msix = (i % vsi->shared->isc_nrxqsets) + 1; } return (0); fail: iflib_irq_free(ctx, &vsi->irq); rx_que = vsi->rx_queues; for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) iflib_irq_free(ctx, &rx_que->que_irq); return (err); } /* * Enable all interrupts * * Called in: * iflib_init_locked, after ixl_if_init() */ static void ixl_if_enable_intr(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *que = vsi->rx_queues; ixl_enable_intr0(hw); /* Enable queue interrupts */ for (int i = 0; i < vsi->num_rx_queues; i++, que++) /* TODO: Queue index parameter is probably wrong */ ixl_enable_queue(hw, que->rxr.me); } /* * Disable queue interrupts * * Other interrupt causes need to remain active. */ static void ixl_if_disable_intr(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *rx_que = vsi->rx_queues; if (vsi->shared->isc_intr == IFLIB_INTR_MSIX) { for (int i = 0; i < vsi->num_rx_queues; i++, rx_que++) ixl_disable_queue(hw, rx_que->msix - 1); } else { // Set PFINT_LNKLST0 FIRSTQ_INDX to 0x7FF // stops queues from triggering interrupts wr32(hw, I40E_PFINT_LNKLST0, 0x7FF); } } static int ixl_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_rx_queue *rx_que = &vsi->rx_queues[rxqid]; ixl_enable_queue(hw, rx_que->msix - 1); return (0); } static int ixl_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; struct ixl_tx_queue *tx_que = &vsi->tx_queues[txqid]; ixl_enable_queue(hw, tx_que->msix - 1); return (0); } static int ixl_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; if_softc_ctx_t scctx = vsi->shared; struct ixl_tx_queue *que; int i, j, error = 0; MPASS(scctx->isc_ntxqsets > 0); MPASS(ntxqs == 1); MPASS(scctx->isc_ntxqsets == ntxqsets); /* Allocate queue structure memory */ if (!(vsi->tx_queues = (struct ixl_tx_queue *) malloc(sizeof(struct ixl_tx_queue) *ntxqsets, M_IXL, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate TX ring memory\n"); return (ENOMEM); } for (i = 0, que = vsi->tx_queues; i < ntxqsets; i++, que++) { struct tx_ring *txr = &que->txr; txr->me = i; que->vsi = vsi; if (!vsi->enable_head_writeback) { /* Allocate report status array */ if (!(txr->tx_rsq = malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_IXL, M_NOWAIT))) { device_printf(iflib_get_dev(ctx), "failed to allocate tx_rsq memory\n"); error = ENOMEM; goto fail; } /* Init report status array */ for (j = 0; j < scctx->isc_ntxd[0]; j++) txr->tx_rsq[j] = QIDX_INVALID; } /* get the virtual and physical address of the hardware queues */ txr->tail = I40E_QTX_TAIL(txr->me); txr->tx_base = (struct i40e_tx_desc *)vaddrs[i * ntxqs]; txr->tx_paddr = paddrs[i * ntxqs]; txr->que = que; } return (0); fail: ixl_if_queues_free(ctx); return (error); } static int ixl_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct ixl_rx_queue *que; int i, error = 0; #ifdef INVARIANTS if_softc_ctx_t scctx = vsi->shared; MPASS(scctx->isc_nrxqsets > 0); MPASS(nrxqs == 1); MPASS(scctx->isc_nrxqsets == nrxqsets); #endif /* Allocate queue structure memory */ if (!(vsi->rx_queues = (struct ixl_rx_queue *) malloc(sizeof(struct ixl_rx_queue) * nrxqsets, M_IXL, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate RX ring memory\n"); error = ENOMEM; goto fail; } for (i = 0, que = vsi->rx_queues; i < nrxqsets; i++, que++) { struct rx_ring *rxr = &que->rxr; rxr->me = i; que->vsi = vsi; /* get the virtual and physical address of the hardware queues */ rxr->tail = I40E_QRX_TAIL(rxr->me); rxr->rx_base = (union i40e_rx_desc *)vaddrs[i * nrxqs]; rxr->rx_paddr = paddrs[i * nrxqs]; rxr->que = que; } return (0); fail: ixl_if_queues_free(ctx); return (error); } static void ixl_if_queues_free(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; if (vsi->tx_queues != NULL && !vsi->enable_head_writeback) { struct ixl_tx_queue *que; int i = 0; for (i = 0, que = vsi->tx_queues; i < vsi->num_tx_queues; i++, que++) { struct tx_ring *txr = &que->txr; if (txr->tx_rsq != NULL) { free(txr->tx_rsq, M_IXL); txr->tx_rsq = NULL; } } } if (vsi->tx_queues != NULL) { free(vsi->tx_queues, M_IXL); vsi->tx_queues = NULL; } if (vsi->rx_queues != NULL) { free(vsi->rx_queues, M_IXL); vsi->rx_queues = NULL; } if (!IXL_PF_IN_RECOVERY_MODE(pf)) sysctl_ctx_free(&vsi->sysctl_ctx); } void ixl_update_link_status(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = &pf->hw; u64 baudrate; if (pf->link_up) { if (vsi->link_active == FALSE) { vsi->link_active = TRUE; baudrate = ixl_max_aq_speed_to_value(hw->phy.link_info.link_speed); iflib_link_state_change(vsi->ctx, LINK_STATE_UP, baudrate); ixl_link_up_msg(pf); #ifdef PCI_IOV ixl_broadcast_link_state(pf); #endif } } else { /* Link down */ if (vsi->link_active == TRUE) { vsi->link_active = FALSE; iflib_link_state_change(vsi->ctx, LINK_STATE_DOWN, 0); #ifdef PCI_IOV ixl_broadcast_link_state(pf); #endif } } } static void ixl_handle_lan_overflow_event(struct ixl_pf *pf, struct i40e_arq_event_info *e) { device_t dev = pf->dev; u32 rxq_idx, qtx_ctl; rxq_idx = (e->desc.params.external.param0 & I40E_PRTDCB_RUPTQ_RXQNUM_MASK) >> I40E_PRTDCB_RUPTQ_RXQNUM_SHIFT; qtx_ctl = e->desc.params.external.param1; device_printf(dev, "LAN overflow event: global rxq_idx %d\n", rxq_idx); device_printf(dev, "LAN overflow event: QTX_CTL 0x%08x\n", qtx_ctl); } static int ixl_process_adminq(struct ixl_pf *pf, u16 *pending) { enum i40e_status_code status = I40E_SUCCESS; struct i40e_arq_event_info event; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u16 opcode; u32 loop = 0, reg; event.buf_len = IXL_AQ_BUF_SZ; event.msg_buf = malloc(event.buf_len, M_IXL, M_NOWAIT | M_ZERO); if (!event.msg_buf) { device_printf(dev, "%s: Unable to allocate memory for Admin" " Queue event!\n", __func__); return (ENOMEM); } /* clean and process any events */ do { status = i40e_clean_arq_element(hw, &event, pending); if (status) break; opcode = LE16_TO_CPU(event.desc.opcode); ixl_dbg(pf, IXL_DBG_AQ, "Admin Queue event: %#06x\n", opcode); switch (opcode) { case i40e_aqc_opc_get_link_status: ixl_link_event(pf, &event); break; case i40e_aqc_opc_send_msg_to_pf: #ifdef PCI_IOV ixl_handle_vf_msg(pf, &event); #endif break; /* * This should only occur on no-drop queues, which * aren't currently configured. */ case i40e_aqc_opc_event_lan_overflow: ixl_handle_lan_overflow_event(pf, &event); break; default: break; } } while (*pending && (loop++ < IXL_ADM_LIMIT)); free(event.msg_buf, M_IXL); /* Re-enable admin queue interrupt cause */ reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); return (status); } static void ixl_if_update_admin_status(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct i40e_hw *hw = &pf->hw; u16 pending; if (IXL_PF_IS_RESETTING(pf)) ixl_handle_empr_reset(pf); /* * Admin Queue is shut down while handling reset. * Don't proceed if it hasn't been re-initialized * e.g due to an issue with new FW. */ if (!i40e_check_asq_alive(&pf->hw)) return; if (pf->state & IXL_PF_STATE_MDD_PENDING) ixl_handle_mdd_event(pf); ixl_process_adminq(pf, &pending); ixl_update_link_status(pf); /* * If there are still messages to process, reschedule ourselves. * Otherwise, re-enable our interrupt and go to sleep. */ if (pending > 0) iflib_admin_intr_deferred(ctx); else ixl_enable_intr0(hw); } static void ixl_if_multi_set(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; int mcnt; IOCTL_DEBUGOUT("ixl_if_multi_set: begin"); /* Delete filters for removed multicast addresses */ ixl_del_multi(vsi, false); mcnt = min(if_llmaddr_count(iflib_get_ifp(ctx)), MAX_MULTICAST_ADDR); if (__predict_false(mcnt == MAX_MULTICAST_ADDR)) { i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, TRUE, NULL); ixl_del_multi(vsi, true); return; } ixl_add_multi(vsi); IOCTL_DEBUGOUT("ixl_if_multi_set: end"); } static int ixl_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (mtu > IXL_MAX_FRAME - ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN) return (EINVAL); vsi->shared->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; return (0); } static void ixl_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) { struct ixl_pf *pf = iflib_get_softc(ctx); struct i40e_hw *hw = &pf->hw; INIT_DEBUGOUT("ixl_media_status: begin"); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!pf->link_up) { return; } ifmr->ifm_status |= IFM_ACTIVE; /* Hardware is always full-duplex */ ifmr->ifm_active |= IFM_FDX; switch (hw->phy.link_info.phy_type) { /* 100 M */ case I40E_PHY_TYPE_100BASE_TX: ifmr->ifm_active |= IFM_100_TX; break; /* 1 G */ case I40E_PHY_TYPE_1000BASE_T: ifmr->ifm_active |= IFM_1000_T; break; case I40E_PHY_TYPE_1000BASE_SX: ifmr->ifm_active |= IFM_1000_SX; break; case I40E_PHY_TYPE_1000BASE_LX: ifmr->ifm_active |= IFM_1000_LX; break; case I40E_PHY_TYPE_1000BASE_T_OPTICAL: ifmr->ifm_active |= IFM_1000_T; break; /* 2.5 G */ case I40E_PHY_TYPE_2_5GBASE_T: ifmr->ifm_active |= IFM_2500_T; break; /* 5 G */ case I40E_PHY_TYPE_5GBASE_T: ifmr->ifm_active |= IFM_5000_T; break; /* 10 G */ case I40E_PHY_TYPE_10GBASE_SFPP_CU: ifmr->ifm_active |= IFM_10G_TWINAX; break; case I40E_PHY_TYPE_10GBASE_SR: ifmr->ifm_active |= IFM_10G_SR; break; case I40E_PHY_TYPE_10GBASE_LR: ifmr->ifm_active |= IFM_10G_LR; break; case I40E_PHY_TYPE_10GBASE_T: ifmr->ifm_active |= IFM_10G_T; break; case I40E_PHY_TYPE_XAUI: case I40E_PHY_TYPE_XFI: ifmr->ifm_active |= IFM_10G_TWINAX; break; case I40E_PHY_TYPE_10GBASE_AOC: ifmr->ifm_active |= IFM_10G_AOC; break; /* 25 G */ case I40E_PHY_TYPE_25GBASE_KR: ifmr->ifm_active |= IFM_25G_KR; break; case I40E_PHY_TYPE_25GBASE_CR: ifmr->ifm_active |= IFM_25G_CR; break; case I40E_PHY_TYPE_25GBASE_SR: ifmr->ifm_active |= IFM_25G_SR; break; case I40E_PHY_TYPE_25GBASE_LR: ifmr->ifm_active |= IFM_25G_LR; break; case I40E_PHY_TYPE_25GBASE_AOC: ifmr->ifm_active |= IFM_25G_AOC; break; case I40E_PHY_TYPE_25GBASE_ACC: ifmr->ifm_active |= IFM_25G_ACC; break; /* 40 G */ case I40E_PHY_TYPE_40GBASE_CR4: case I40E_PHY_TYPE_40GBASE_CR4_CU: ifmr->ifm_active |= IFM_40G_CR4; break; case I40E_PHY_TYPE_40GBASE_SR4: ifmr->ifm_active |= IFM_40G_SR4; break; case I40E_PHY_TYPE_40GBASE_LR4: ifmr->ifm_active |= IFM_40G_LR4; break; case I40E_PHY_TYPE_XLAUI: ifmr->ifm_active |= IFM_OTHER; break; case I40E_PHY_TYPE_1000BASE_KX: ifmr->ifm_active |= IFM_1000_KX; break; case I40E_PHY_TYPE_SGMII: ifmr->ifm_active |= IFM_1000_SGMII; break; /* ERJ: What's the difference between these? */ case I40E_PHY_TYPE_10GBASE_CR1_CU: case I40E_PHY_TYPE_10GBASE_CR1: ifmr->ifm_active |= IFM_10G_CR1; break; case I40E_PHY_TYPE_10GBASE_KX4: ifmr->ifm_active |= IFM_10G_KX4; break; case I40E_PHY_TYPE_10GBASE_KR: ifmr->ifm_active |= IFM_10G_KR; break; case I40E_PHY_TYPE_SFI: ifmr->ifm_active |= IFM_10G_SFI; break; /* Our single 20G media type */ case I40E_PHY_TYPE_20GBASE_KR2: ifmr->ifm_active |= IFM_20G_KR2; break; case I40E_PHY_TYPE_40GBASE_KR4: ifmr->ifm_active |= IFM_40G_KR4; break; case I40E_PHY_TYPE_XLPPI: case I40E_PHY_TYPE_40GBASE_AOC: ifmr->ifm_active |= IFM_40G_XLPPI; break; /* Unknown to driver */ default: ifmr->ifm_active |= IFM_UNKNOWN; break; } /* Report flow control status as well */ if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; } static int ixl_if_media_change(if_ctx_t ctx) { struct ifmedia *ifm = iflib_get_media(ctx); INIT_DEBUGOUT("ixl_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if_printf(iflib_get_ifp(ctx), "Media change is not supported.\n"); return (ENODEV); } static int ixl_if_promisc_set(if_ctx_t ctx, int flags) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct ifnet *ifp = iflib_get_ifp(ctx); struct i40e_hw *hw = vsi->hw; int err; bool uni = FALSE, multi = FALSE; if (flags & IFF_PROMISC) uni = multi = TRUE; else if (flags & IFF_ALLMULTI || if_llmaddr_count(ifp) >= MAX_MULTICAST_ADDR) multi = TRUE; err = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid, uni, NULL, true); if (err) return (err); err = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, multi, NULL); return (err); } static void ixl_if_timer(if_ctx_t ctx, uint16_t qid) { struct ixl_pf *pf = iflib_get_softc(ctx); if (qid != 0) return; ixl_update_stats_counters(pf); } static void ixl_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; if_t ifp = iflib_get_ifp(ctx); if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; /* * Keep track of registered VLANS to know what * filters have to be configured when VLAN_HWFILTER * capability is enabled. */ ++vsi->num_vlans; bit_set(vsi->vlans_map, vtag); if ((if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) == 0) return; if (vsi->num_vlans < IXL_MAX_VLAN_FILTERS) ixl_add_filter(vsi, hw->mac.addr, vtag); else if (vsi->num_vlans == IXL_MAX_VLAN_FILTERS) { /* * There is not enough HW resources to add filters * for all registered VLANs. Re-configure filtering * to allow reception of all expected traffic. */ device_printf(vsi->dev, "Not enough HW filters for all VLANs. VLAN HW filtering disabled"); ixl_del_all_vlan_filters(vsi, hw->mac.addr); ixl_add_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); } } static void ixl_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; if_t ifp = iflib_get_ifp(ctx); if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; --vsi->num_vlans; bit_clear(vsi->vlans_map, vtag); if ((if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) == 0) return; if (vsi->num_vlans < IXL_MAX_VLAN_FILTERS) ixl_del_filter(vsi, hw->mac.addr, vtag); else if (vsi->num_vlans == IXL_MAX_VLAN_FILTERS) { ixl_del_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); ixl_add_vlan_filters(vsi, hw->mac.addr); } } static uint64_t ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; if_t ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_IPACKETS: return (vsi->ipackets); case IFCOUNTER_IERRORS: return (vsi->ierrors); case IFCOUNTER_OPACKETS: return (vsi->opackets); case IFCOUNTER_OERRORS: return (vsi->oerrors); case IFCOUNTER_COLLISIONS: /* Collisions are by standard impossible in 40G/10G Ethernet */ return (0); case IFCOUNTER_IBYTES: return (vsi->ibytes); case IFCOUNTER_OBYTES: return (vsi->obytes); case IFCOUNTER_IMCASTS: return (vsi->imcasts); case IFCOUNTER_OMCASTS: return (vsi->omcasts); case IFCOUNTER_IQDROPS: return (vsi->iqdrops); case IFCOUNTER_OQDROPS: return (vsi->oqdrops); case IFCOUNTER_NOPROTO: return (vsi->noproto); default: return (if_get_counter_default(ifp, cnt)); } } #ifdef PCI_IOV static void ixl_if_vflr_handle(if_ctx_t ctx) { struct ixl_pf *pf = iflib_get_softc(ctx); ixl_handle_vflr(pf); } #endif static int ixl_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req) { struct ixl_pf *pf = iflib_get_softc(ctx); if (pf->read_i2c_byte == NULL) return (EINVAL); for (int i = 0; i < req->len; i++) if (pf->read_i2c_byte(pf, req->offset + i, req->dev_addr, &req->data[i])) return (EIO); return (0); } static int ixl_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data) { struct ixl_pf *pf = iflib_get_softc(ctx); struct ifdrv *ifd = (struct ifdrv *)data; int error = 0; /* * The iflib_if_ioctl forwards SIOCxDRVSPEC and SIOGPRIVATE_0 without * performing privilege checks. It is important that this function * perform the necessary checks for commands which should only be * executed by privileged threads. */ switch(command) { case SIOCGDRVSPEC: case SIOCSDRVSPEC: /* NVM update command */ if (ifd->ifd_cmd == I40E_NVM_ACCESS) { error = priv_check(curthread, PRIV_DRIVER); if (error) break; error = ixl_handle_nvmupd_cmd(pf, ifd); } else { error = EINVAL; } break; default: error = EOPNOTSUPP; } return (error); } /* ixl_if_needs_restart - Tell iflib when the driver needs to be reinitialized * @ctx: iflib context * @event: event code to check * * Defaults to returning false for every event. * * @returns true if iflib needs to reinit the interface, false otherwise */ static bool ixl_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event) { switch (event) { case IFLIB_RESTART_VLAN_CONFIG: default: return (false); } } /* * Sanity check and save off tunable values. */ static void ixl_save_pf_tunables(struct ixl_pf *pf) { device_t dev = pf->dev; /* Save tunable information */ #ifdef IXL_DEBUG_FC pf->enable_tx_fc_filter = ixl_enable_tx_fc_filter; #endif #ifdef IXL_DEBUG pf->recovery_mode = ixl_debug_recovery_mode; #endif pf->dbg_mask = ixl_core_debug_mask; pf->hw.debug_mask = ixl_shared_debug_mask; pf->vsi.enable_head_writeback = !!(ixl_enable_head_writeback); pf->enable_vf_loopback = !!(ixl_enable_vf_loopback); #if 0 pf->dynamic_rx_itr = ixl_dynamic_rx_itr; pf->dynamic_tx_itr = ixl_dynamic_tx_itr; #endif if (ixl_i2c_access_method > 3 || ixl_i2c_access_method < 0) pf->i2c_access_method = 0; else pf->i2c_access_method = ixl_i2c_access_method; if (ixl_tx_itr < 0 || ixl_tx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid tx_itr value of %d set!\n", ixl_tx_itr); device_printf(dev, "tx_itr must be between %d and %d, " "inclusive\n", 0, IXL_MAX_ITR); device_printf(dev, "Using default value of %d instead\n", IXL_ITR_4K); pf->tx_itr = IXL_ITR_4K; } else pf->tx_itr = ixl_tx_itr; if (ixl_rx_itr < 0 || ixl_rx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid rx_itr value of %d set!\n", ixl_rx_itr); device_printf(dev, "rx_itr must be between %d and %d, " "inclusive\n", 0, IXL_MAX_ITR); device_printf(dev, "Using default value of %d instead\n", IXL_ITR_8K); pf->rx_itr = IXL_ITR_8K; } else pf->rx_itr = ixl_rx_itr; } diff --git a/sys/net/iflib.h b/sys/net/iflib.h index a30740e67b6e..fcf96215d398 100644 --- a/sys/net/iflib.h +++ b/sys/net/iflib.h @@ -1,498 +1,498 @@ /*- * Copyright (c) 2014-2017, Matthew Macy (mmacy@mattmacy.io) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Neither the name of Matthew Macy nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __IFLIB_H_ #define __IFLIB_H_ #include #include #include #include #include #include struct if_clone; /* * The value type for indexing, limits max descriptors * to 65535 can be conditionally redefined to uint32_t * in the future if the need arises. */ typedef uint16_t qidx_t; #define QIDX_INVALID 0xFFFF struct iflib_ctx; typedef struct iflib_ctx *if_ctx_t; struct if_shared_ctx; -typedef struct if_shared_ctx *if_shared_ctx_t; +typedef const struct if_shared_ctx *if_shared_ctx_t; struct if_int_delay_info; typedef struct if_int_delay_info *if_int_delay_info_t; struct if_pseudo; typedef struct if_pseudo *if_pseudo_t; /* * File organization: * - public structures * - iflib accessors * - iflib utility functions * - iflib core functions */ typedef struct if_rxd_frag { uint8_t irf_flid; qidx_t irf_idx; uint16_t irf_len; } *if_rxd_frag_t; /* bnxt supports 64 with hardware LRO enabled */ #define IFLIB_MAX_RX_SEGS 64 typedef struct if_rxd_info { /* set by iflib */ uint16_t iri_qsidx; /* qset index */ uint16_t iri_vtag; /* vlan tag - if flag set */ /* XXX redundant with the new irf_len field */ uint16_t iri_len; /* packet length */ qidx_t iri_cidx; /* consumer index of cq */ if_t iri_ifp; /* driver may have >1 iface per softc */ /* updated by driver */ if_rxd_frag_t iri_frags; uint32_t iri_flowid; /* RSS hash for packet */ uint32_t iri_csum_flags; /* m_pkthdr csum flags */ uint32_t iri_csum_data; /* m_pkthdr csum data */ uint8_t iri_flags; /* mbuf flags for packet */ uint8_t iri_nfrags; /* number of fragments in packet */ uint8_t iri_rsstype; /* RSS hash type */ uint8_t iri_pad; /* any padding in the received data */ } *if_rxd_info_t; typedef struct if_rxd_update { uint64_t *iru_paddrs; qidx_t *iru_idxs; qidx_t iru_pidx; uint16_t iru_qsidx; uint16_t iru_count; uint16_t iru_buf_size; uint8_t iru_flidx; } *if_rxd_update_t; #define IPI_TX_INTR 0x1 /* send an interrupt when this packet is sent */ #define IPI_TX_IPV4 0x2 /* ethertype IPv4 */ #define IPI_TX_IPV6 0x4 /* ethertype IPv6 */ typedef struct if_pkt_info { bus_dma_segment_t *ipi_segs; /* physical addresses */ uint32_t ipi_len; /* packet length */ uint16_t ipi_qsidx; /* queue set index */ qidx_t ipi_nsegs; /* number of segments */ qidx_t ipi_ndescs; /* number of descriptors used by encap */ uint16_t ipi_flags; /* iflib per-packet flags */ qidx_t ipi_pidx; /* start pidx for encap */ qidx_t ipi_new_pidx; /* next available pidx post-encap */ /* offload handling */ uint8_t ipi_ehdrlen; /* ether header length */ uint8_t ipi_ip_hlen; /* ip header length */ uint8_t ipi_tcp_hlen; /* tcp header length */ uint8_t ipi_ipproto; /* ip protocol */ uint32_t ipi_csum_flags; /* packet checksum flags */ uint16_t ipi_tso_segsz; /* tso segment size */ uint16_t ipi_vtag; /* VLAN tag */ uint16_t ipi_etype; /* ether header type */ uint8_t ipi_tcp_hflags; /* tcp header flags */ uint8_t ipi_mflags; /* packet mbuf flags */ uint32_t ipi_tcp_seq; /* tcp seqno */ uint32_t __spare0__; } *if_pkt_info_t; typedef struct if_irq { struct resource *ii_res; int __spare0__; void *ii_tag; } *if_irq_t; struct if_int_delay_info { if_ctx_t iidi_ctx; /* Back-pointer to the iflib ctx (softc) */ int iidi_offset; /* Register offset to read/write */ int iidi_value; /* Current value in usecs */ struct sysctl_oid *iidi_oidp; struct sysctl_req *iidi_req; }; typedef enum { IFLIB_INTR_LEGACY, IFLIB_INTR_MSI, IFLIB_INTR_MSIX } iflib_intr_mode_t; /* * This really belongs in pciio.h or some place more general * but this is the only consumer for now. */ typedef struct pci_vendor_info { uint32_t pvi_vendor_id; uint32_t pvi_device_id; uint32_t pvi_subvendor_id; uint32_t pvi_subdevice_id; uint32_t pvi_rev_id; uint32_t pvi_class_mask; const char *pvi_name; } pci_vendor_info_t; #define PVID(vendor, devid, name) {vendor, devid, 0, 0, 0, 0, name} #define PVID_OEM(vendor, devid, svid, sdevid, revid, name) {vendor, devid, svid, sdevid, revid, 0, name} #define PVID_END {0, 0, 0, 0, 0, 0, NULL} /* No drivers in tree currently match on anything except vendor:device. */ #define IFLIB_PNP_DESCR "U32:vendor;U32:device;U32:#;U32:#;" \ "U32:#;U32:#;D:#" #define IFLIB_PNP_INFO(b, u, t) \ MODULE_PNP_INFO(IFLIB_PNP_DESCR, b, u, t, nitems(t) - 1) typedef struct if_txrx { int (*ift_txd_encap) (void *, if_pkt_info_t); void (*ift_txd_flush) (void *, uint16_t, qidx_t pidx); int (*ift_txd_credits_update) (void *, uint16_t qsidx, bool clear); int (*ift_rxd_available) (void *, uint16_t qsidx, qidx_t pidx, qidx_t budget); int (*ift_rxd_pkt_get) (void *, if_rxd_info_t ri); void (*ift_rxd_refill) (void * , if_rxd_update_t iru); void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, qidx_t pidx); int (*ift_legacy_intr) (void *); } *if_txrx_t; typedef struct if_softc_ctx { int isc_vectors; int isc_nrxqsets; int isc_ntxqsets; uint16_t __spare0__; uint32_t __spare1__; int isc_msix_bar; /* can be model specific - initialize in attach_pre */ int isc_tx_nsegments; /* can be model specific - initialize in attach_pre */ int isc_ntxd[8]; int isc_nrxd[8]; uint32_t isc_txqsizes[8]; uint32_t isc_rxqsizes[8]; /* is there such thing as a descriptor that is more than 248 bytes ? */ uint8_t isc_txd_size[8]; uint8_t isc_rxd_size[8]; int isc_tx_tso_segments_max; int isc_tx_tso_size_max; int isc_tx_tso_segsize_max; int isc_tx_csum_flags; int isc_capabilities; int isc_capenable; int isc_rss_table_size; int isc_rss_table_mask; int isc_nrxqsets_max; int isc_ntxqsets_max; uint32_t __spare2__; iflib_intr_mode_t isc_intr; uint16_t isc_rxd_buf_size[8]; /* set at init time by driver, 0 means use iflib-calculated size based on isc_max_frame_size */ uint16_t isc_max_frame_size; /* set at init time by driver */ uint16_t isc_min_frame_size; /* set at init time by driver, only used if IFLIB_NEED_ETHER_PAD is set. */ uint32_t isc_pause_frames; /* set by driver for iflib_timer to detect */ uint32_t __spare3__; uint32_t __spare4__; uint32_t __spare5__; uint32_t __spare6__; uint32_t __spare7__; uint32_t __spare8__; caddr_t __spare9__; int isc_disable_msix; if_txrx_t isc_txrx; struct ifmedia *isc_media; } *if_softc_ctx_t; /* * Initialization values for device */ struct if_shared_ctx { unsigned isc_magic; driver_t *isc_driver; bus_size_t isc_q_align; bus_size_t isc_tx_maxsize; bus_size_t isc_tx_maxsegsize; bus_size_t isc_tso_maxsize; bus_size_t isc_tso_maxsegsize; bus_size_t isc_rx_maxsize; bus_size_t isc_rx_maxsegsize; int isc_rx_nsegments; int isc_admin_intrcnt; /* # of admin/link interrupts */ /* fields necessary for probe */ const pci_vendor_info_t *isc_vendor_info; const char *isc_driver_version; /* optional function to transform the read values to match the table*/ void (*isc_parse_devinfo) (uint16_t *device_id, uint16_t *subvendor_id, uint16_t *subdevice_id, uint16_t *rev_id); int isc_nrxd_min[8]; int isc_nrxd_default[8]; int isc_nrxd_max[8]; int isc_ntxd_min[8]; int isc_ntxd_default[8]; int isc_ntxd_max[8]; /* actively used during operation */ int isc_nfl __aligned(CACHE_LINE_SIZE); int isc_ntxqs; /* # of tx queues per tx qset - usually 1 */ int isc_nrxqs; /* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */ int __spare0__; int isc_tx_reclaim_thresh; int isc_flags; const char *isc_name; }; typedef struct iflib_dma_info { bus_addr_t idi_paddr; caddr_t idi_vaddr; bus_dma_tag_t idi_tag; bus_dmamap_t idi_map; uint32_t idi_size; } *iflib_dma_info_t; #define IFLIB_MAGIC 0xCAFEF00D typedef enum { /* Interrupt or softirq handles only receive */ IFLIB_INTR_RX, /* Interrupt or softirq handles only transmit */ IFLIB_INTR_TX, /* * Interrupt will check for both pending receive * and available tx credits and dispatch a task * for one or both depending on the disposition * of the respective queues. */ IFLIB_INTR_RXTX, /* * Other interrupt - typically link status and * or error conditions. */ IFLIB_INTR_ADMIN, /* Softirq (task) for iov handling */ IFLIB_INTR_IOV, } iflib_intr_type_t; /* * Interface has a separate completion queue for RX */ #define IFLIB_HAS_RXCQ 0x01 /* * Driver has already allocated vectors */ #define IFLIB_SKIP_MSIX 0x02 /* * Interface is a virtual function */ #define IFLIB_IS_VF 0x04 /* * Interface has a separate completion queue for TX */ #define IFLIB_HAS_TXCQ 0x08 /* * Interface does checksum in place */ #define IFLIB_NEED_SCRATCH 0x10 /* * Interface doesn't expect in_pseudo for th_sum */ #define IFLIB_TSO_INIT_IP 0x20 /* * Interface doesn't align IP header */ #define IFLIB_DO_RX_FIXUP 0x40 /* * Driver needs csum zeroed for offloading */ #define IFLIB_NEED_ZERO_CSUM 0x80 /* * Driver needs frames padded to some minimum length */ #define IFLIB_NEED_ETHER_PAD 0x100 /* * Packets can be freed immediately after encap */ #define IFLIB_TXD_ENCAP_PIO 0x00200 /* * Use RX completion handler */ #define IFLIB_RX_COMPLETION 0x00400 /* * Skip refilling cluster free lists */ #define IFLIB_SKIP_CLREFILL 0x00800 /* * Don't reset on hang */ #define IFLIB_NO_HANG_RESET 0x01000 /* * Don't need/want most of the niceties of * queue management */ #define IFLIB_PSEUDO 0x02000 /* * No DMA support needed / wanted */ #define IFLIB_VIRTUAL 0x04000 /* * autogenerate a MAC address */ #define IFLIB_GEN_MAC 0x08000 /* * Interface needs admin task to ignore interface up/down status */ #define IFLIB_ADMIN_ALWAYS_RUN 0x10000 /* * Driver will pass the media */ #define IFLIB_DRIVER_MEDIA 0x20000 /* * When using a single hardware interrupt for the interface, only process RX * interrupts instead of doing combined RX/TX processing. */ #define IFLIB_SINGLE_IRQ_RX_ONLY 0x40000 /* * Don't need/want most of the niceties of * emulating ethernet */ #define IFLIB_PSEUDO_ETHER 0x80000 /* * These enum values are used in iflib_needs_restart to indicate to iflib * functions whether or not the interface needs restarting when certain events * happen. */ enum iflib_restart_event { IFLIB_RESTART_VLAN_CONFIG, }; /* * field accessors */ void *iflib_get_softc(if_ctx_t ctx); device_t iflib_get_dev(if_ctx_t ctx); if_t iflib_get_ifp(if_ctx_t ctx); struct ifmedia *iflib_get_media(if_ctx_t ctx); if_softc_ctx_t iflib_get_softc_ctx(if_ctx_t ctx); if_shared_ctx_t iflib_get_sctx(if_ctx_t ctx); void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]); void iflib_request_reset(if_ctx_t ctx); uint8_t iflib_in_detach(if_ctx_t ctx); uint32_t iflib_get_rx_mbuf_sz(if_ctx_t ctx); /* * If the driver can plug cleanly in to newbus use these */ int iflib_device_probe(device_t); int iflib_device_attach(device_t); int iflib_device_detach(device_t); int iflib_device_suspend(device_t); int iflib_device_resume(device_t); int iflib_device_shutdown(device_t); /* * Use this instead of iflib_device_probe if the driver should report * BUS_PROBE_VENDOR instead of BUS_PROBE_DEFAULT. (For example, an out-of-tree * driver based on iflib). */ int iflib_device_probe_vendor(device_t); int iflib_device_iov_init(device_t, uint16_t, const nvlist_t *); void iflib_device_iov_uninit(device_t); int iflib_device_iov_add_vf(device_t, uint16_t, const nvlist_t *); /* * If the driver can't plug cleanly in to newbus * use these */ int iflib_device_register(device_t dev, void *softc, if_shared_ctx_t sctx, if_ctx_t *ctxp); int iflib_device_deregister(if_ctx_t); int iflib_irq_alloc(if_ctx_t, if_irq_t, int, driver_filter_t, void *filter_arg, driver_intr_t, void *arg, const char *name); int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, void *filter_arg, int qid, const char *name); void iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, const char *name); void iflib_irq_free(if_ctx_t ctx, if_irq_t irq); void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, const char *name); void iflib_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn, const char *name); void iflib_config_gtask_deinit(struct grouptask *gtask); void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid); void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid); void iflib_admin_intr_deferred(if_ctx_t ctx); void iflib_iov_intr_deferred(if_ctx_t ctx); void iflib_link_state_change(if_ctx_t ctx, int linkstate, uint64_t baudrate); int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags); int iflib_dma_alloc_align(if_ctx_t ctx, int size, int align, iflib_dma_info_t dma, int mapflags); void iflib_dma_free(iflib_dma_info_t dma); int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count); void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count); struct sx *iflib_ctx_lock_get(if_ctx_t); void iflib_led_create(if_ctx_t ctx); void iflib_add_int_delay_sysctl(if_ctx_t, const char *, const char *, if_int_delay_info_t, int, int); /* * Pseudo device support */ if_pseudo_t iflib_clone_register(if_shared_ctx_t); void iflib_clone_deregister(if_pseudo_t); #endif /* __IFLIB_H_ */