Index: head/sys/dev/bnxt/if_bnxt.c =================================================================== --- head/sys/dev/bnxt/if_bnxt.c (revision 323515) +++ head/sys/dev/bnxt/if_bnxt.c (revision 323516) @@ -1,2649 +1,2650 @@ /*- * Broadcom NetXtreme-C/E network driver. * * Copyright (c) 2016 Broadcom, All Rights Reserved. * The term Broadcom refers to Broadcom Limited and/or its subsidiaries * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #include "ifdi_if.h" #include "bnxt.h" #include "bnxt_hwrm.h" #include "bnxt_ioctl.h" #include "bnxt_sysctl.h" #include "hsi_struct_def.h" /* * PCI Device ID Table */ static pci_vendor_info_t bnxt_vendor_info_array[] = { PVID(BROADCOM_VENDOR_ID, BCM57301, "Broadcom BCM57301 NetXtreme-C 10Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57302, "Broadcom BCM57302 NetXtreme-C 10Gb/25Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57304, "Broadcom BCM57304 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57311, "Broadcom BCM57311 NetXtreme-C 10Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57312, "Broadcom BCM57312 NetXtreme-C 10Gb/25Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57314, "Broadcom BCM57314 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57402, "Broadcom BCM57402 NetXtreme-E 10Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57402_NPAR, "Broadcom BCM57402 NetXtreme-E Partition"), PVID(BROADCOM_VENDOR_ID, BCM57404, "Broadcom BCM57404 NetXtreme-E 10Gb/25Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57404_NPAR, "Broadcom BCM57404 NetXtreme-E Partition"), PVID(BROADCOM_VENDOR_ID, BCM57406, "Broadcom BCM57406 NetXtreme-E 10GBase-T Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57406_NPAR, "Broadcom BCM57406 NetXtreme-E Partition"), PVID(BROADCOM_VENDOR_ID, BCM57407, "Broadcom BCM57407 NetXtreme-E 10GBase-T Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57407_NPAR, "Broadcom BCM57407 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57407_SFP, "Broadcom BCM57407 NetXtreme-E 25Gb Ethernet Controller"), PVID(BROADCOM_VENDOR_ID, BCM57412, "Broadcom BCM57412 NetXtreme-E 10Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57412_NPAR1, "Broadcom BCM57412 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57412_NPAR2, "Broadcom BCM57412 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57414, "Broadcom BCM57414 NetXtreme-E 10Gb/25Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57414_NPAR1, "Broadcom BCM57414 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57414_NPAR2, "Broadcom BCM57414 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57416, "Broadcom BCM57416 NetXtreme-E 10GBase-T Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57416_NPAR1, "Broadcom BCM57416 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57416_NPAR2, "Broadcom BCM57416 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57416_SFP, "Broadcom BCM57416 NetXtreme-E 10Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57417, "Broadcom BCM57417 NetXtreme-E 10GBase-T Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57417_NPAR1, "Broadcom BCM57417 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57417_NPAR2, "Broadcom BCM57417 NetXtreme-E Ethernet Partition"), PVID(BROADCOM_VENDOR_ID, BCM57417_SFP, "Broadcom BCM57417 NetXtreme-E 10Gb/25Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM57454, "Broadcom BCM57454 NetXtreme-E 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, BCM58700, "Broadcom BCM58700 Nitro 1Gb/2.5Gb/10Gb Ethernet"), PVID(BROADCOM_VENDOR_ID, NETXTREME_C_VF1, "Broadcom NetXtreme-C Ethernet Virtual Function"), PVID(BROADCOM_VENDOR_ID, NETXTREME_C_VF2, "Broadcom NetXtreme-C Ethernet Virtual Function"), PVID(BROADCOM_VENDOR_ID, NETXTREME_C_VF3, "Broadcom NetXtreme-C Ethernet Virtual Function"), PVID(BROADCOM_VENDOR_ID, NETXTREME_E_VF1, "Broadcom NetXtreme-E Ethernet Virtual Function"), PVID(BROADCOM_VENDOR_ID, NETXTREME_E_VF2, "Broadcom NetXtreme-E Ethernet Virtual Function"), PVID(BROADCOM_VENDOR_ID, NETXTREME_E_VF3, "Broadcom NetXtreme-E Ethernet Virtual Function"), /* required last entry */ PVID_END }; /* * Function prototypes */ static void *bnxt_register(device_t dev); /* Soft queue setup and teardown */ static int bnxt_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int bnxt_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets); static void bnxt_queues_free(if_ctx_t ctx); /* Device setup and teardown */ static int bnxt_attach_pre(if_ctx_t ctx); static int bnxt_attach_post(if_ctx_t ctx); static int bnxt_detach(if_ctx_t ctx); /* Device configuration */ static void bnxt_init(if_ctx_t ctx); static void bnxt_stop(if_ctx_t ctx); static void bnxt_multi_set(if_ctx_t ctx); static int bnxt_mtu_set(if_ctx_t ctx, uint32_t mtu); static void bnxt_media_status(if_ctx_t ctx, struct ifmediareq * ifmr); static int bnxt_media_change(if_ctx_t ctx); static int bnxt_promisc_set(if_ctx_t ctx, int flags); static uint64_t bnxt_get_counter(if_ctx_t, ift_counter); static void bnxt_update_admin_status(if_ctx_t ctx); static void bnxt_if_timer(if_ctx_t ctx, uint16_t qid); /* Interrupt enable / disable */ static void bnxt_intr_enable(if_ctx_t ctx); static int bnxt_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid); static int bnxt_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid); static void bnxt_disable_intr(if_ctx_t ctx); static int bnxt_msix_intr_assign(if_ctx_t ctx, int msix); /* vlan support */ static void bnxt_vlan_register(if_ctx_t ctx, uint16_t vtag); static void bnxt_vlan_unregister(if_ctx_t ctx, uint16_t vtag); /* ioctl */ static int bnxt_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data); static int bnxt_shutdown(if_ctx_t ctx); static int bnxt_suspend(if_ctx_t ctx); static int bnxt_resume(if_ctx_t ctx); /* Internal support functions */ static int bnxt_probe_phy(struct bnxt_softc *softc); static void bnxt_add_media_types(struct bnxt_softc *softc); static int bnxt_pci_mapping(struct bnxt_softc *softc); static void bnxt_pci_mapping_free(struct bnxt_softc *softc); static int bnxt_update_link(struct bnxt_softc *softc, bool chng_link_state); static int bnxt_handle_def_cp(void *arg); static int bnxt_handle_rx_cp(void *arg); static void bnxt_clear_ids(struct bnxt_softc *softc); static void inline bnxt_do_enable_intr(struct bnxt_cp_ring *cpr); static void inline bnxt_do_disable_intr(struct bnxt_cp_ring *cpr); static void bnxt_mark_cpr_invalid(struct bnxt_cp_ring *cpr); static void bnxt_def_cp_task(void *context); static void bnxt_handle_async_event(struct bnxt_softc *softc, struct cmpl_base *cmpl); static uint8_t get_phy_type(struct bnxt_softc *softc); static uint64_t bnxt_get_baudrate(struct bnxt_link_info *link); static void bnxt_get_wol_settings(struct bnxt_softc *softc); static int bnxt_wol_config(if_ctx_t ctx); /* * Device Interface Declaration */ static device_method_t bnxt_methods[] = { /* Device interface */ DEVMETHOD(device_register, bnxt_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static driver_t bnxt_driver = { "bnxt", bnxt_methods, sizeof(struct bnxt_softc), }; devclass_t bnxt_devclass; DRIVER_MODULE(bnxt, pci, bnxt_driver, bnxt_devclass, 0, 0); MODULE_DEPEND(bnxt, pci, 1, 1, 1); MODULE_DEPEND(bnxt, ether, 1, 1, 1); MODULE_DEPEND(bnxt, iflib, 1, 1, 1); static device_method_t bnxt_iflib_methods[] = { DEVMETHOD(ifdi_tx_queues_alloc, bnxt_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, bnxt_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, bnxt_queues_free), DEVMETHOD(ifdi_attach_pre, bnxt_attach_pre), DEVMETHOD(ifdi_attach_post, bnxt_attach_post), DEVMETHOD(ifdi_detach, bnxt_detach), DEVMETHOD(ifdi_init, bnxt_init), DEVMETHOD(ifdi_stop, bnxt_stop), DEVMETHOD(ifdi_multi_set, bnxt_multi_set), DEVMETHOD(ifdi_mtu_set, bnxt_mtu_set), DEVMETHOD(ifdi_media_status, bnxt_media_status), DEVMETHOD(ifdi_media_change, bnxt_media_change), DEVMETHOD(ifdi_promisc_set, bnxt_promisc_set), DEVMETHOD(ifdi_get_counter, bnxt_get_counter), DEVMETHOD(ifdi_update_admin_status, bnxt_update_admin_status), DEVMETHOD(ifdi_timer, bnxt_if_timer), DEVMETHOD(ifdi_intr_enable, bnxt_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, bnxt_tx_queue_intr_enable), DEVMETHOD(ifdi_rx_queue_intr_enable, bnxt_rx_queue_intr_enable), DEVMETHOD(ifdi_intr_disable, bnxt_disable_intr), DEVMETHOD(ifdi_msix_intr_assign, bnxt_msix_intr_assign), DEVMETHOD(ifdi_vlan_register, bnxt_vlan_register), DEVMETHOD(ifdi_vlan_unregister, bnxt_vlan_unregister), DEVMETHOD(ifdi_priv_ioctl, bnxt_priv_ioctl), DEVMETHOD(ifdi_suspend, bnxt_suspend), DEVMETHOD(ifdi_shutdown, bnxt_shutdown), DEVMETHOD(ifdi_resume, bnxt_resume), DEVMETHOD_END }; static driver_t bnxt_iflib_driver = { "bnxt", bnxt_iflib_methods, sizeof(struct bnxt_softc) }; /* * iflib shared context */ #define BNXT_DRIVER_VERSION "1.0.0.2" char bnxt_driver_version[] = BNXT_DRIVER_VERSION; extern struct if_txrx bnxt_txrx; static struct if_shared_ctx bnxt_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_driver = &bnxt_iflib_driver, .isc_nfl = 2, // Number of Free Lists .isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = BNXT_TSO_SIZE, .isc_tx_maxsegsize = BNXT_TSO_SIZE, .isc_rx_maxsize = BNXT_TSO_SIZE, .isc_rx_maxsegsize = BNXT_TSO_SIZE, // Only use a single segment to avoid page size constraints .isc_rx_nsegments = 1, .isc_ntxqs = 2, .isc_nrxqs = 3, .isc_nrxd_min = {16, 16, 16}, .isc_nrxd_default = {PAGE_SIZE / sizeof(struct cmpl_base) * 8, PAGE_SIZE / sizeof(struct rx_prod_pkt_bd), PAGE_SIZE / sizeof(struct rx_prod_pkt_bd)}, .isc_nrxd_max = {INT32_MAX, INT32_MAX, INT32_MAX}, .isc_ntxd_min = {16, 16, 16}, .isc_ntxd_default = {PAGE_SIZE / sizeof(struct cmpl_base) * 2, PAGE_SIZE / sizeof(struct tx_bd_short)}, .isc_ntxd_max = {INT32_MAX, INT32_MAX, INT32_MAX}, .isc_admin_intrcnt = 1, .isc_vendor_info = bnxt_vendor_info_array, .isc_driver_version = bnxt_driver_version, }; if_shared_ctx_t bnxt_sctx = &bnxt_sctx_init; /* * Device Methods */ static void * bnxt_register(device_t dev) { return bnxt_sctx; } /* * Device Dependent Configuration Functions */ /* Soft queue setup and teardown */ static int bnxt_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct bnxt_softc *softc; int i; int rc; softc = iflib_get_softc(ctx); softc->tx_cp_rings = malloc(sizeof(struct bnxt_cp_ring) * ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->tx_cp_rings) { device_printf(iflib_get_dev(ctx), "unable to allocate TX completion rings\n"); rc = ENOMEM; goto cp_alloc_fail; } softc->tx_rings = malloc(sizeof(struct bnxt_ring) * ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->tx_rings) { device_printf(iflib_get_dev(ctx), "unable to allocate TX rings\n"); rc = ENOMEM; goto ring_alloc_fail; } rc = iflib_dma_alloc(ctx, sizeof(struct ctx_hw_stats) * ntxqsets, &softc->tx_stats, 0); if (rc) goto dma_alloc_fail; bus_dmamap_sync(softc->tx_stats.idi_tag, softc->tx_stats.idi_map, BUS_DMASYNC_PREREAD); for (i = 0; i < ntxqsets; i++) { /* Set up the completion ring */ softc->tx_cp_rings[i].stats_ctx_id = HWRM_NA_SIGNATURE; softc->tx_cp_rings[i].ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->tx_cp_rings[i].ring.softc = softc; softc->tx_cp_rings[i].ring.id = (softc->scctx->isc_nrxqsets * 2) + 1 + i; softc->tx_cp_rings[i].ring.doorbell = softc->tx_cp_rings[i].ring.id * 0x80; softc->tx_cp_rings[i].ring.ring_size = softc->scctx->isc_ntxd[0]; softc->tx_cp_rings[i].ring.vaddr = vaddrs[i * ntxqs]; softc->tx_cp_rings[i].ring.paddr = paddrs[i * ntxqs]; /* Set up the TX ring */ softc->tx_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->tx_rings[i].softc = softc; softc->tx_rings[i].id = (softc->scctx->isc_nrxqsets * 2) + 1 + i; softc->tx_rings[i].doorbell = softc->tx_rings[i].id * 0x80; softc->tx_rings[i].ring_size = softc->scctx->isc_ntxd[1]; softc->tx_rings[i].vaddr = vaddrs[i * ntxqs + 1]; softc->tx_rings[i].paddr = paddrs[i * ntxqs + 1]; bnxt_create_tx_sysctls(softc, i); } softc->ntxqsets = ntxqsets; return rc; dma_alloc_fail: free(softc->tx_rings, M_DEVBUF); ring_alloc_fail: free(softc->tx_cp_rings, M_DEVBUF); cp_alloc_fail: return rc; } static void bnxt_queues_free(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); // Free TX queues iflib_dma_free(&softc->tx_stats); free(softc->tx_rings, M_DEVBUF); softc->tx_rings = NULL; free(softc->tx_cp_rings, M_DEVBUF); softc->tx_cp_rings = NULL; softc->ntxqsets = 0; // Free RX queues iflib_dma_free(&softc->rx_stats); iflib_dma_free(&softc->hw_tx_port_stats); iflib_dma_free(&softc->hw_rx_port_stats); free(softc->grp_info, M_DEVBUF); free(softc->ag_rings, M_DEVBUF); free(softc->rx_rings, M_DEVBUF); free(softc->rx_cp_rings, M_DEVBUF); } static int bnxt_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct bnxt_softc *softc; int i; int rc; softc = iflib_get_softc(ctx); softc->rx_cp_rings = malloc(sizeof(struct bnxt_cp_ring) * nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->rx_cp_rings) { device_printf(iflib_get_dev(ctx), "unable to allocate RX completion rings\n"); rc = ENOMEM; goto cp_alloc_fail; } softc->rx_rings = malloc(sizeof(struct bnxt_ring) * nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->rx_rings) { device_printf(iflib_get_dev(ctx), "unable to allocate RX rings\n"); rc = ENOMEM; goto ring_alloc_fail; } softc->ag_rings = malloc(sizeof(struct bnxt_ring) * nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->ag_rings) { device_printf(iflib_get_dev(ctx), "unable to allocate aggregation rings\n"); rc = ENOMEM; goto ag_alloc_fail; } softc->grp_info = malloc(sizeof(struct bnxt_grp_info) * nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO); if (!softc->grp_info) { device_printf(iflib_get_dev(ctx), "unable to allocate ring groups\n"); rc = ENOMEM; goto grp_alloc_fail; } rc = iflib_dma_alloc(ctx, sizeof(struct ctx_hw_stats) * nrxqsets, &softc->rx_stats, 0); if (rc) goto hw_stats_alloc_fail; bus_dmamap_sync(softc->rx_stats.idi_tag, softc->rx_stats.idi_map, BUS_DMASYNC_PREREAD); /* * Additional 512 bytes for future expansion. * To prevent corruption when loaded with newer firmwares with added counters. * This can be deleted when there will be no further additions of counters. */ #define BNXT_PORT_STAT_PADDING 512 rc = iflib_dma_alloc(ctx, sizeof(struct rx_port_stats) + BNXT_PORT_STAT_PADDING, &softc->hw_rx_port_stats, 0); if (rc) goto hw_port_rx_stats_alloc_fail; bus_dmamap_sync(softc->hw_rx_port_stats.idi_tag, softc->hw_rx_port_stats.idi_map, BUS_DMASYNC_PREREAD); rc = iflib_dma_alloc(ctx, sizeof(struct tx_port_stats) + BNXT_PORT_STAT_PADDING, &softc->hw_tx_port_stats, 0); if (rc) goto hw_port_tx_stats_alloc_fail; bus_dmamap_sync(softc->hw_tx_port_stats.idi_tag, softc->hw_tx_port_stats.idi_map, BUS_DMASYNC_PREREAD); softc->rx_port_stats = (void *) softc->hw_rx_port_stats.idi_vaddr; softc->tx_port_stats = (void *) softc->hw_tx_port_stats.idi_vaddr; for (i = 0; i < nrxqsets; i++) { /* Allocation the completion ring */ softc->rx_cp_rings[i].stats_ctx_id = HWRM_NA_SIGNATURE; softc->rx_cp_rings[i].ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->rx_cp_rings[i].ring.softc = softc; softc->rx_cp_rings[i].ring.id = i + 1; softc->rx_cp_rings[i].ring.doorbell = softc->rx_cp_rings[i].ring.id * 0x80; /* * If this ring overflows, RX stops working. */ softc->rx_cp_rings[i].ring.ring_size = softc->scctx->isc_nrxd[0]; softc->rx_cp_rings[i].ring.vaddr = vaddrs[i * nrxqs]; softc->rx_cp_rings[i].ring.paddr = paddrs[i * nrxqs]; /* Allocate the RX ring */ softc->rx_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->rx_rings[i].softc = softc; softc->rx_rings[i].id = i + 1; softc->rx_rings[i].doorbell = softc->rx_rings[i].id * 0x80; softc->rx_rings[i].ring_size = softc->scctx->isc_nrxd[1]; softc->rx_rings[i].vaddr = vaddrs[i * nrxqs + 1]; softc->rx_rings[i].paddr = paddrs[i * nrxqs + 1]; /* Allocate the TPA start buffer */ softc->rx_rings[i].tpa_start = malloc(sizeof(struct bnxt_full_tpa_start) * (RX_TPA_START_CMPL_AGG_ID_MASK >> RX_TPA_START_CMPL_AGG_ID_SFT), M_DEVBUF, M_NOWAIT | M_ZERO); if (softc->rx_rings[i].tpa_start == NULL) { rc = -ENOMEM; device_printf(softc->dev, "Unable to allocate space for TPA\n"); goto tpa_alloc_fail; } /* Allocate the AG ring */ softc->ag_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->ag_rings[i].softc = softc; softc->ag_rings[i].id = nrxqsets + i + 1; softc->ag_rings[i].doorbell = softc->ag_rings[i].id * 0x80; softc->ag_rings[i].ring_size = softc->scctx->isc_nrxd[2]; softc->ag_rings[i].vaddr = vaddrs[i * nrxqs + 2]; softc->ag_rings[i].paddr = paddrs[i * nrxqs + 2]; /* Allocate the ring group */ softc->grp_info[i].grp_id = (uint16_t)HWRM_NA_SIGNATURE; softc->grp_info[i].stats_ctx = softc->rx_cp_rings[i].stats_ctx_id; softc->grp_info[i].rx_ring_id = softc->rx_rings[i].phys_id; softc->grp_info[i].ag_ring_id = softc->ag_rings[i].phys_id; softc->grp_info[i].cp_ring_id = softc->rx_cp_rings[i].ring.phys_id; bnxt_create_rx_sysctls(softc, i); } /* * When SR-IOV is enabled, avoid each VF sending PORT_QSTATS * HWRM every sec with which firmware timeouts can happen */ if (BNXT_PF(softc)) bnxt_create_port_stats_sysctls(softc); /* And finally, the VNIC */ softc->vnic_info.id = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.flow_id = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.filter_id = -1; softc->vnic_info.def_ring_grp = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.cos_rule = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.lb_rule = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.rx_mask = HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_BCAST; softc->vnic_info.mc_list_count = 0; softc->vnic_info.flags = BNXT_VNIC_FLAG_DEFAULT; rc = iflib_dma_alloc(ctx, BNXT_MAX_MC_ADDRS * ETHER_ADDR_LEN, &softc->vnic_info.mc_list, 0); if (rc) goto mc_list_alloc_fail; /* The VNIC RSS Hash Key */ rc = iflib_dma_alloc(ctx, HW_HASH_KEY_SIZE, &softc->vnic_info.rss_hash_key_tbl, 0); if (rc) goto rss_hash_alloc_fail; bus_dmamap_sync(softc->vnic_info.rss_hash_key_tbl.idi_tag, softc->vnic_info.rss_hash_key_tbl.idi_map, BUS_DMASYNC_PREWRITE); memcpy(softc->vnic_info.rss_hash_key_tbl.idi_vaddr, softc->vnic_info.rss_hash_key, HW_HASH_KEY_SIZE); /* Allocate the RSS tables */ rc = iflib_dma_alloc(ctx, HW_HASH_INDEX_SIZE * sizeof(uint16_t), &softc->vnic_info.rss_grp_tbl, 0); if (rc) goto rss_grp_alloc_fail; bus_dmamap_sync(softc->vnic_info.rss_grp_tbl.idi_tag, softc->vnic_info.rss_grp_tbl.idi_map, BUS_DMASYNC_PREWRITE); memset(softc->vnic_info.rss_grp_tbl.idi_vaddr, 0xff, softc->vnic_info.rss_grp_tbl.idi_size); softc->nrxqsets = nrxqsets; return rc; rss_grp_alloc_fail: iflib_dma_free(&softc->vnic_info.rss_hash_key_tbl); rss_hash_alloc_fail: iflib_dma_free(&softc->vnic_info.mc_list); tpa_alloc_fail: mc_list_alloc_fail: for (i = i - 1; i >= 0; i--) free(softc->rx_rings[i].tpa_start, M_DEVBUF); iflib_dma_free(&softc->hw_tx_port_stats); hw_port_tx_stats_alloc_fail: iflib_dma_free(&softc->hw_rx_port_stats); hw_port_rx_stats_alloc_fail: iflib_dma_free(&softc->rx_stats); hw_stats_alloc_fail: free(softc->grp_info, M_DEVBUF); grp_alloc_fail: free(softc->ag_rings, M_DEVBUF); ag_alloc_fail: free(softc->rx_rings, M_DEVBUF); ring_alloc_fail: free(softc->rx_cp_rings, M_DEVBUF); cp_alloc_fail: return rc; } /* Device setup and teardown */ static int bnxt_attach_pre(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); if_softc_ctx_t scctx; int rc = 0; softc->ctx = ctx; softc->dev = iflib_get_dev(ctx); softc->media = iflib_get_media(ctx); softc->scctx = iflib_get_softc_ctx(ctx); softc->sctx = iflib_get_sctx(ctx); scctx = softc->scctx; /* TODO: Better way of detecting NPAR/VF is needed */ switch (softc->sctx->isc_vendor_info->pvi_device_id) { case BCM57402_NPAR: case BCM57404_NPAR: case BCM57406_NPAR: case BCM57407_NPAR: case BCM57412_NPAR1: case BCM57412_NPAR2: case BCM57414_NPAR1: case BCM57414_NPAR2: case BCM57416_NPAR1: case BCM57416_NPAR2: softc->flags |= BNXT_FLAG_NPAR; break; case NETXTREME_C_VF1: case NETXTREME_C_VF2: case NETXTREME_C_VF3: case NETXTREME_E_VF1: case NETXTREME_E_VF2: case NETXTREME_E_VF3: softc->flags |= BNXT_FLAG_VF; break; } pci_enable_busmaster(softc->dev); if (bnxt_pci_mapping(softc)) return (ENXIO); /* HWRM setup/init */ BNXT_HWRM_LOCK_INIT(softc, device_get_nameunit(softc->dev)); rc = bnxt_alloc_hwrm_dma_mem(softc); if (rc) goto dma_fail; /* Get firmware version and compare with driver */ softc->ver_info = malloc(sizeof(struct bnxt_ver_info), M_DEVBUF, M_NOWAIT | M_ZERO); if (softc->ver_info == NULL) { rc = ENOMEM; device_printf(softc->dev, "Unable to allocate space for version info\n"); goto ver_alloc_fail; } /* Default minimum required HWRM version */ softc->ver_info->hwrm_min_major = 1; softc->ver_info->hwrm_min_minor = 2; softc->ver_info->hwrm_min_update = 2; rc = bnxt_hwrm_ver_get(softc); if (rc) { device_printf(softc->dev, "attach: hwrm ver get failed\n"); goto ver_fail; } /* Get NVRAM info */ softc->nvm_info = malloc(sizeof(struct bnxt_nvram_info), M_DEVBUF, M_NOWAIT | M_ZERO); if (softc->nvm_info == NULL) { rc = ENOMEM; device_printf(softc->dev, "Unable to allocate space for NVRAM info\n"); goto nvm_alloc_fail; } rc = bnxt_hwrm_nvm_get_dev_info(softc, &softc->nvm_info->mfg_id, &softc->nvm_info->device_id, &softc->nvm_info->sector_size, &softc->nvm_info->size, &softc->nvm_info->reserved_size, &softc->nvm_info->available_size); /* Register the driver with the FW */ rc = bnxt_hwrm_func_drv_rgtr(softc); if (rc) { device_printf(softc->dev, "attach: hwrm drv rgtr failed\n"); goto drv_rgtr_fail; } rc = bnxt_hwrm_func_rgtr_async_events(softc, NULL, 0); if (rc) { device_printf(softc->dev, "attach: hwrm rgtr async evts failed\n"); goto drv_rgtr_fail; } /* Get the HW capabilities */ rc = bnxt_hwrm_func_qcaps(softc); if (rc) goto failed; /* Get the current configuration of this function */ rc = bnxt_hwrm_func_qcfg(softc); if (rc) { device_printf(softc->dev, "attach: hwrm func qcfg failed\n"); goto failed; } iflib_set_mac(ctx, softc->func.mac_addr); scctx->isc_txrx = &bnxt_txrx; scctx->isc_tx_csum_flags = (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_TSO); scctx->isc_capenable = /* These are translated to hwassit bits */ IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 | IFCAP_TSO4 | IFCAP_TSO6 | /* These are checked by iflib */ IFCAP_LRO | IFCAP_VLAN_HWFILTER | /* These are part of the iflib mask */ IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | /* These likely get lost... */ IFCAP_VLAN_HWCSUM | IFCAP_JUMBO_MTU; if (bnxt_wol_supported(softc)) scctx->isc_capenable |= IFCAP_WOL_MAGIC; /* Get the queue config */ rc = bnxt_hwrm_queue_qportcfg(softc); if (rc) { device_printf(softc->dev, "attach: hwrm qportcfg failed\n"); goto failed; } bnxt_get_wol_settings(softc); /* Now perform a function reset */ rc = bnxt_hwrm_func_reset(softc); bnxt_clear_ids(softc); if (rc) goto failed; /* Now set up iflib sc */ scctx->isc_tx_nsegments = 31, scctx->isc_tx_tso_segments_max = 31; scctx->isc_tx_tso_size_max = BNXT_TSO_SIZE; scctx->isc_tx_tso_segsize_max = BNXT_TSO_SIZE; scctx->isc_vectors = softc->func.max_cp_rings; scctx->isc_txrx = &bnxt_txrx; if (scctx->isc_nrxd[0] < ((scctx->isc_nrxd[1] * 4) + scctx->isc_nrxd[2])) device_printf(softc->dev, "WARNING: nrxd0 (%d) should be at least 4 * nrxd1 (%d) + nrxd2 (%d). Driver may be unstable\n", scctx->isc_nrxd[0], scctx->isc_nrxd[1], scctx->isc_nrxd[2]); if (scctx->isc_ntxd[0] < scctx->isc_ntxd[1] * 2) device_printf(softc->dev, "WARNING: ntxd0 (%d) should be at least 2 * ntxd1 (%d). Driver may be unstable\n", scctx->isc_ntxd[0], scctx->isc_ntxd[1]); scctx->isc_txqsizes[0] = sizeof(struct cmpl_base) * scctx->isc_ntxd[0]; scctx->isc_txqsizes[1] = sizeof(struct tx_bd_short) * scctx->isc_ntxd[1]; scctx->isc_rxqsizes[0] = sizeof(struct cmpl_base) * scctx->isc_nrxd[0]; scctx->isc_rxqsizes[1] = sizeof(struct rx_prod_pkt_bd) * scctx->isc_nrxd[1]; scctx->isc_rxqsizes[2] = sizeof(struct rx_prod_pkt_bd) * scctx->isc_nrxd[2]; scctx->isc_nrxqsets_max = min(pci_msix_count(softc->dev)-1, softc->fn_qcfg.alloc_completion_rings - 1); scctx->isc_nrxqsets_max = min(scctx->isc_nrxqsets_max, softc->fn_qcfg.alloc_rx_rings); scctx->isc_nrxqsets_max = min(scctx->isc_nrxqsets_max, softc->fn_qcfg.alloc_vnics); scctx->isc_ntxqsets_max = min(softc->fn_qcfg.alloc_tx_rings, softc->fn_qcfg.alloc_completion_rings - scctx->isc_nrxqsets_max - 1); scctx->isc_rss_table_size = HW_HASH_INDEX_SIZE; scctx->isc_rss_table_mask = scctx->isc_rss_table_size - 1; /* iflib will map and release this bar */ scctx->isc_msix_bar = pci_msix_table_bar(softc->dev); /* Allocate the default completion ring */ softc->def_cp_ring.stats_ctx_id = HWRM_NA_SIGNATURE; softc->def_cp_ring.ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->def_cp_ring.ring.softc = softc; softc->def_cp_ring.ring.id = 0; softc->def_cp_ring.ring.doorbell = softc->def_cp_ring.ring.id * 0x80; softc->def_cp_ring.ring.ring_size = PAGE_SIZE / sizeof(struct cmpl_base); rc = iflib_dma_alloc(ctx, sizeof(struct cmpl_base) * softc->def_cp_ring.ring.ring_size, &softc->def_cp_ring_mem, 0); softc->def_cp_ring.ring.vaddr = softc->def_cp_ring_mem.idi_vaddr; softc->def_cp_ring.ring.paddr = softc->def_cp_ring_mem.idi_paddr; iflib_config_gtask_init(ctx, &softc->def_cp_task, bnxt_def_cp_task, "dflt_cp"); rc = bnxt_init_sysctl_ctx(softc); if (rc) goto init_sysctl_failed; rc = bnxt_create_nvram_sysctls(softc->nvm_info); if (rc) goto failed; arc4rand(softc->vnic_info.rss_hash_key, HW_HASH_KEY_SIZE, 0); softc->vnic_info.rss_hash_type = HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_IPV4 | HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_TCP_IPV4 | HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_UDP_IPV4 | HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_IPV6 | HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_TCP_IPV6 | HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_UDP_IPV6; rc = bnxt_create_config_sysctls_pre(softc); if (rc) goto failed; /* Initialize the vlan list */ SLIST_INIT(&softc->vnic_info.vlan_tags); softc->vnic_info.vlan_tag_list.idi_vaddr = NULL; return (rc); failed: bnxt_free_sysctl_ctx(softc); init_sysctl_failed: bnxt_hwrm_func_drv_unrgtr(softc, false); drv_rgtr_fail: free(softc->nvm_info, M_DEVBUF); nvm_alloc_fail: ver_fail: free(softc->ver_info, M_DEVBUF); ver_alloc_fail: bnxt_free_hwrm_dma_mem(softc); dma_fail: BNXT_HWRM_LOCK_DESTROY(softc); bnxt_pci_mapping_free(softc); pci_disable_busmaster(softc->dev); return (rc); } static int bnxt_attach_post(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); int rc; bnxt_create_config_sysctls_post(softc); /* Update link state etc... */ rc = bnxt_probe_phy(softc); if (rc) goto failed; /* Needs to be done after probing the phy */ bnxt_create_ver_sysctls(softc); bnxt_add_media_types(softc); ifmedia_set(softc->media, IFM_ETHER | IFM_AUTO); softc->scctx->isc_max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; failed: return rc; } static int bnxt_detach(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct bnxt_vlan_tag *tag; struct bnxt_vlan_tag *tmp; int i; bnxt_wol_config(ctx); bnxt_do_disable_intr(&softc->def_cp_ring); bnxt_free_sysctl_ctx(softc); bnxt_hwrm_func_reset(softc); bnxt_clear_ids(softc); iflib_irq_free(ctx, &softc->def_cp_ring.irq); iflib_config_gtask_deinit(&softc->def_cp_task); /* We need to free() these here... */ for (i = softc->nrxqsets-1; i>=0; i--) { iflib_irq_free(ctx, &softc->rx_cp_rings[i].irq); } iflib_dma_free(&softc->vnic_info.mc_list); iflib_dma_free(&softc->vnic_info.rss_hash_key_tbl); iflib_dma_free(&softc->vnic_info.rss_grp_tbl); if (softc->vnic_info.vlan_tag_list.idi_vaddr) iflib_dma_free(&softc->vnic_info.vlan_tag_list); SLIST_FOREACH_SAFE(tag, &softc->vnic_info.vlan_tags, next, tmp) free(tag, M_DEVBUF); iflib_dma_free(&softc->def_cp_ring_mem); for (i = 0; i < softc->nrxqsets; i++) free(softc->rx_rings[i].tpa_start, M_DEVBUF); free(softc->ver_info, M_DEVBUF); free(softc->nvm_info, M_DEVBUF); bnxt_hwrm_func_drv_unrgtr(softc, false); bnxt_free_hwrm_dma_mem(softc); BNXT_HWRM_LOCK_DESTROY(softc); pci_disable_busmaster(softc->dev); bnxt_pci_mapping_free(softc); return 0; } /* Device configuration */ static void bnxt_init(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct ifmediareq ifmr; int i, j; int rc; rc = bnxt_hwrm_func_reset(softc); if (rc) return; bnxt_clear_ids(softc); /* Allocate the default completion ring */ softc->def_cp_ring.cons = UINT32_MAX; softc->def_cp_ring.v_bit = 1; bnxt_mark_cpr_invalid(&softc->def_cp_ring); rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL, &softc->def_cp_ring.ring, (uint16_t)HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE, true); if (rc) goto fail; /* And now set the default CP ring as the async CP ring */ rc = bnxt_hwrm_func_cfg(softc); if (rc) goto fail; for (i = 0; i < softc->nrxqsets; i++) { /* Allocate the statistics context */ rc = bnxt_hwrm_stat_ctx_alloc(softc, &softc->rx_cp_rings[i], softc->rx_stats.idi_paddr + (sizeof(struct ctx_hw_stats) * i)); if (rc) goto fail; /* Allocate the completion ring */ softc->rx_cp_rings[i].cons = UINT32_MAX; softc->rx_cp_rings[i].v_bit = 1; softc->rx_cp_rings[i].last_idx = UINT32_MAX; bnxt_mark_cpr_invalid(&softc->rx_cp_rings[i]); rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL, &softc->rx_cp_rings[i].ring, (uint16_t)HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE, true); if (rc) goto fail; /* Allocate the RX ring */ rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_RX, &softc->rx_rings[i], (uint16_t)HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE, false); if (rc) goto fail; BNXT_RX_DB(&softc->rx_rings[i], 0); /* TODO: Cumulus+ doesn't need the double doorbell */ BNXT_RX_DB(&softc->rx_rings[i], 0); /* Allocate the AG ring */ rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_RX, &softc->ag_rings[i], (uint16_t)HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE, false); if (rc) goto fail; BNXT_RX_DB(&softc->rx_rings[i], 0); /* TODO: Cumulus+ doesn't need the double doorbell */ BNXT_RX_DB(&softc->ag_rings[i], 0); /* Allocate the ring group */ softc->grp_info[i].stats_ctx = softc->rx_cp_rings[i].stats_ctx_id; softc->grp_info[i].rx_ring_id = softc->rx_rings[i].phys_id; softc->grp_info[i].ag_ring_id = softc->ag_rings[i].phys_id; softc->grp_info[i].cp_ring_id = softc->rx_cp_rings[i].ring.phys_id; rc = bnxt_hwrm_ring_grp_alloc(softc, &softc->grp_info[i]); if (rc) goto fail; } /* Allocate the VNIC RSS context */ rc = bnxt_hwrm_vnic_ctx_alloc(softc, &softc->vnic_info.rss_id); if (rc) goto fail; /* Allocate the vnic */ softc->vnic_info.def_ring_grp = softc->grp_info[0].grp_id; softc->vnic_info.mru = softc->scctx->isc_max_frame_size; rc = bnxt_hwrm_vnic_alloc(softc, &softc->vnic_info); if (rc) goto fail; rc = bnxt_hwrm_vnic_cfg(softc, &softc->vnic_info); if (rc) goto fail; rc = bnxt_hwrm_set_filter(softc, &softc->vnic_info); if (rc) goto fail; /* Enable RSS on the VNICs */ for (i = 0, j = 0; i < HW_HASH_INDEX_SIZE; i++) { ((uint16_t *) softc->vnic_info.rss_grp_tbl.idi_vaddr)[i] = htole16(softc->grp_info[j].grp_id); if (++j == softc->nrxqsets) j = 0; } rc = bnxt_hwrm_rss_cfg(softc, &softc->vnic_info, softc->vnic_info.rss_hash_type); if (rc) goto fail; /* * Enable LRO/TPA/GRO * TBD: * Enable / Disable HW_LRO based on * ifconfig lro / ifconfig -lro setting */ rc = bnxt_hwrm_vnic_tpa_cfg(softc, &softc->vnic_info, (if_getcapenable(iflib_get_ifp(ctx)) & IFCAP_LRO) ? HWRM_VNIC_TPA_CFG_INPUT_FLAGS_TPA : 0); if (rc) goto fail; for (i = 0; i < softc->ntxqsets; i++) { /* Allocate the statistics context */ rc = bnxt_hwrm_stat_ctx_alloc(softc, &softc->tx_cp_rings[i], softc->tx_stats.idi_paddr + (sizeof(struct ctx_hw_stats) * i)); if (rc) goto fail; /* Allocate the completion ring */ softc->tx_cp_rings[i].cons = UINT32_MAX; softc->tx_cp_rings[i].v_bit = 1; bnxt_mark_cpr_invalid(&softc->tx_cp_rings[i]); rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL, &softc->tx_cp_rings[i].ring, (uint16_t)HWRM_NA_SIGNATURE, HWRM_NA_SIGNATURE, false); if (rc) goto fail; /* Allocate the TX ring */ rc = bnxt_hwrm_ring_alloc(softc, HWRM_RING_ALLOC_INPUT_RING_TYPE_TX, &softc->tx_rings[i], softc->tx_cp_rings[i].ring.phys_id, softc->tx_cp_rings[i].stats_ctx_id, false); if (rc) goto fail; BNXT_TX_DB(&softc->tx_rings[i], 0); /* TODO: Cumulus+ doesn't need the double doorbell */ BNXT_TX_DB(&softc->tx_rings[i], 0); } bnxt_do_enable_intr(&softc->def_cp_ring); bnxt_media_status(softc->ctx, &ifmr); return; fail: bnxt_hwrm_func_reset(softc); bnxt_clear_ids(softc); return; } static void bnxt_stop(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); bnxt_do_disable_intr(&softc->def_cp_ring); bnxt_hwrm_func_reset(softc); bnxt_clear_ids(softc); return; } static void bnxt_multi_set(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); uint8_t *mta; int cnt, mcnt; mcnt = if_multiaddr_count(ifp, -1); if (mcnt > BNXT_MAX_MC_ADDRS) { softc->vnic_info.rx_mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ALL_MCAST; bnxt_hwrm_cfa_l2_set_rx_mask(softc, &softc->vnic_info); } else { softc->vnic_info.rx_mask &= ~HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ALL_MCAST; mta = softc->vnic_info.mc_list.idi_vaddr; bzero(mta, softc->vnic_info.mc_list.idi_size); if_multiaddr_array(ifp, mta, &cnt, mcnt); bus_dmamap_sync(softc->vnic_info.mc_list.idi_tag, softc->vnic_info.mc_list.idi_map, BUS_DMASYNC_PREWRITE); softc->vnic_info.mc_list_count = cnt; softc->vnic_info.rx_mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_MCAST; if (bnxt_hwrm_cfa_l2_set_rx_mask(softc, &softc->vnic_info)) device_printf(softc->dev, "set_multi: rx_mask set failed\n"); } } static int bnxt_mtu_set(if_ctx_t ctx, uint32_t mtu) { struct bnxt_softc *softc = iflib_get_softc(ctx); if (mtu > BNXT_MAX_MTU) return EINVAL; softc->scctx->isc_max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; return 0; } static void bnxt_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct bnxt_link_info *link_info = &softc->link_info; uint8_t phy_type = get_phy_type(softc); bnxt_update_link(softc, true); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (link_info->link_up) ifmr->ifm_status |= IFM_ACTIVE; else ifmr->ifm_status &= ~IFM_ACTIVE; if (link_info->duplex == HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_CFG_FULL) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; switch (link_info->link_speed) { case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_100MB: ifmr->ifm_active |= IFM_100_T; break; case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_1GB: switch (phy_type) { case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKX: ifmr->ifm_active |= IFM_1000_KX; break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASET: ifmr->ifm_active |= IFM_1000_T; break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_SGMIIEXTPHY: ifmr->ifm_active |= IFM_1000_SGMII; break; default: /* * Workaround: * Don't return IFM_UNKNOWN until * Stratus return proper media_type */ ifmr->ifm_active |= IFM_1000_KX; break; } break; case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_2_5GB: switch (phy_type) { case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKX: ifmr->ifm_active |= IFM_2500_KX; break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASET: ifmr->ifm_active |= IFM_2500_T; break; default: ifmr->ifm_active |= IFM_UNKNOWN; break; } break; case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_10GB: switch (phy_type) { case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASECR: ifmr->ifm_active |= IFM_10G_CR1; break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR2: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR: ifmr->ifm_active |= IFM_10G_KR; break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASELR: ifmr->ifm_active |= IFM_10G_LR; break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASESR: ifmr->ifm_active |= IFM_10G_SR; break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKX: ifmr->ifm_active |= IFM_10G_KX4; break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASET: ifmr->ifm_active |= IFM_10G_T; break; default: /* * Workaround: * Don't return IFM_UNKNOWN until * Stratus return proper media_type */ ifmr->ifm_active |= IFM_10G_CR1; break; } break; case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_20GB: ifmr->ifm_active |= IFM_20G_KR2; break; case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_25GB: switch (phy_type) { case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASECR: ifmr->ifm_active |= IFM_25G_CR; break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR2: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR: ifmr->ifm_active |= IFM_25G_KR; break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASESR: ifmr->ifm_active |= IFM_25G_SR; break; default: /* * Workaround: * Don't return IFM_UNKNOWN until * Stratus return proper media_type */ ifmr->ifm_active |= IFM_25G_CR; break; } break; case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_40GB: switch (phy_type) { case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASECR: ifmr->ifm_active |= IFM_40G_CR4; break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR2: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR: ifmr->ifm_active |= IFM_40G_KR4; break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASELR: ifmr->ifm_active |= IFM_40G_LR4; break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASESR: ifmr->ifm_active |= IFM_40G_SR4; break; default: ifmr->ifm_active |= IFM_UNKNOWN; break; } break; case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_50GB: switch (phy_type) { case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASECR: ifmr->ifm_active |= IFM_50G_CR2; break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR2: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR: ifmr->ifm_active |= IFM_50G_KR2; break; default: /* * Workaround: * Don't return IFM_UNKNOWN until * Stratus return proper media_type */ ifmr->ifm_active |= IFM_50G_CR2; break; } break; case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_100GB: switch (phy_type) { case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASECR: ifmr->ifm_active |= IFM_100G_CR4; break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR2: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR: ifmr->ifm_active |= IFM_100G_KR4; break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASELR: ifmr->ifm_active |= IFM_100G_LR4; break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASESR: ifmr->ifm_active |= IFM_100G_SR4; break; default: /* * Workaround: * Don't return IFM_UNKNOWN until * Stratus return proper media_type */ ifmr->ifm_active |= IFM_100G_CR4; break; } default: return; } if (link_info->pause == (HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_TX | HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_RX)) ifmr->ifm_active |= (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE); else if (link_info->pause == HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; else if (link_info->pause == HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; bnxt_report_link(softc); return; } static int bnxt_media_change(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct ifmedia *ifm = iflib_get_media(ctx); struct ifmediareq ifmr; int rc; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return EINVAL; switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_100_T: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_100MB; break; case IFM_1000_KX: case IFM_1000_T: case IFM_1000_SGMII: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_1GB; break; case IFM_2500_KX: case IFM_2500_T: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_2_5GB; break; case IFM_10G_CR1: case IFM_10G_KR: case IFM_10G_LR: case IFM_10G_SR: case IFM_10G_T: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_10GB; break; case IFM_20G_KR2: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_20GB; break; case IFM_25G_CR: case IFM_25G_KR: case IFM_25G_SR: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_25GB; break; case IFM_40G_CR4: case IFM_40G_KR4: case IFM_40G_LR4: case IFM_40G_SR4: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_40GB; break; case IFM_50G_CR2: case IFM_50G_KR2: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_50GB; break; case IFM_100G_CR4: case IFM_100G_KR4: case IFM_100G_LR4: case IFM_100G_SR4: softc->link_info.autoneg &= ~BNXT_AUTONEG_SPEED; softc->link_info.req_link_speed = HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_100GB; break; default: device_printf(softc->dev, "Unsupported media type! Using auto\n"); /* Fall-through */ case IFM_AUTO: // Auto softc->link_info.autoneg |= BNXT_AUTONEG_SPEED; break; } rc = bnxt_hwrm_set_link_setting(softc, true, true); bnxt_media_status(softc->ctx, &ifmr); return rc; } static int bnxt_promisc_set(if_ctx_t ctx, int flags) { struct bnxt_softc *softc = iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); int rc; if (ifp->if_flags & IFF_ALLMULTI || if_multiaddr_count(ifp, -1) > BNXT_MAX_MC_ADDRS) softc->vnic_info.rx_mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ALL_MCAST; else softc->vnic_info.rx_mask &= ~HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ALL_MCAST; if (ifp->if_flags & IFF_PROMISC) softc->vnic_info.rx_mask |= HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_PROMISCUOUS | HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ANYVLAN_NONVLAN; else softc->vnic_info.rx_mask &= ~(HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_PROMISCUOUS | HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ANYVLAN_NONVLAN); rc = bnxt_hwrm_cfa_l2_set_rx_mask(softc, &softc->vnic_info); return rc; } static uint64_t bnxt_get_counter(if_ctx_t ctx, ift_counter cnt) { if_t ifp = iflib_get_ifp(ctx); if (cnt < IFCOUNTERS) return if_get_counter_default(ifp, cnt); return 0; } static void bnxt_update_admin_status(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); /* * When SR-IOV is enabled, avoid each VF sending this HWRM * request every sec with which firmware timeouts can happen */ if (BNXT_PF(softc)) { bnxt_hwrm_port_qstats(softc); } return; } static void bnxt_if_timer(if_ctx_t ctx, uint16_t qid) { struct bnxt_softc *softc = iflib_get_softc(ctx); uint64_t ticks_now = ticks; /* Schedule bnxt_update_admin_status() once per sec */ if (ticks_now - softc->admin_ticks >= hz) { softc->admin_ticks = ticks_now; iflib_admin_intr_deferred(ctx); } return; } static void inline bnxt_do_enable_intr(struct bnxt_cp_ring *cpr) { if (cpr->ring.phys_id != (uint16_t)HWRM_NA_SIGNATURE) { /* First time enabling, do not set index */ if (cpr->cons == UINT32_MAX) BNXT_CP_ENABLE_DB(&cpr->ring); else BNXT_CP_IDX_ENABLE_DB(&cpr->ring, cpr->cons); } } static void inline bnxt_do_disable_intr(struct bnxt_cp_ring *cpr) { if (cpr->ring.phys_id != (uint16_t)HWRM_NA_SIGNATURE) BNXT_CP_DISABLE_DB(&cpr->ring); } /* Enable all interrupts */ static void bnxt_intr_enable(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); int i; bnxt_do_enable_intr(&softc->def_cp_ring); for (i = 0; i < softc->nrxqsets; i++) bnxt_do_enable_intr(&softc->rx_cp_rings[i]); return; } /* Enable interrupt for a single queue */ static int bnxt_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { struct bnxt_softc *softc = iflib_get_softc(ctx); bnxt_do_enable_intr(&softc->tx_cp_rings[qid]); return 0; } static int bnxt_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { struct bnxt_softc *softc = iflib_get_softc(ctx); bnxt_do_enable_intr(&softc->rx_cp_rings[qid]); return 0; } /* Disable all interrupts */ static void bnxt_disable_intr(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); int i; /* * NOTE: These TX interrupts should never get enabled, so don't * update the index */ for (i = 0; i < softc->ntxqsets; i++) bnxt_do_disable_intr(&softc->tx_cp_rings[i]); for (i = 0; i < softc->nrxqsets; i++) bnxt_do_disable_intr(&softc->rx_cp_rings[i]); return; } static int bnxt_msix_intr_assign(if_ctx_t ctx, int msix) { struct bnxt_softc *softc = iflib_get_softc(ctx); int rc; int i; char irq_name[16]; rc = iflib_irq_alloc_generic(ctx, &softc->def_cp_ring.irq, softc->def_cp_ring.ring.id + 1, IFLIB_INTR_ADMIN, bnxt_handle_def_cp, softc, 0, "def_cp"); if (rc) { device_printf(iflib_get_dev(ctx), "Failed to register default completion ring handler\n"); return rc; } for (i=0; iscctx->isc_nrxqsets; i++) { snprintf(irq_name, sizeof(irq_name), "rxq%d", i); rc = iflib_irq_alloc_generic(ctx, &softc->rx_cp_rings[i].irq, softc->rx_cp_rings[i].ring.id + 1, IFLIB_INTR_RX, bnxt_handle_rx_cp, &softc->rx_cp_rings[i], i, irq_name); if (rc) { device_printf(iflib_get_dev(ctx), "Failed to register RX completion ring handler\n"); i--; goto fail; } } for (i=0; iscctx->isc_ntxqsets; i++) - iflib_softirq_alloc_generic(ctx, i + 1, IFLIB_INTR_TX, NULL, i, + /* TODO: Benchmark and see if tying to the RX irqs helps */ + iflib_softirq_alloc_generic(ctx, -1, IFLIB_INTR_TX, NULL, i, "tx_cp"); return rc; fail: for (; i>=0; i--) iflib_irq_free(ctx, &softc->rx_cp_rings[i].irq); iflib_irq_free(ctx, &softc->def_cp_ring.irq); return rc; } /* * We're explicitly allowing duplicates here. They will need to be * removed as many times as they are added. */ static void bnxt_vlan_register(if_ctx_t ctx, uint16_t vtag) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct bnxt_vlan_tag *new_tag; new_tag = malloc(sizeof(struct bnxt_vlan_tag), M_DEVBUF, M_NOWAIT); if (new_tag == NULL) return; new_tag->tag = vtag; new_tag->tpid = 8100; SLIST_INSERT_HEAD(&softc->vnic_info.vlan_tags, new_tag, next); }; static void bnxt_vlan_unregister(if_ctx_t ctx, uint16_t vtag) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct bnxt_vlan_tag *vlan_tag; SLIST_FOREACH(vlan_tag, &softc->vnic_info.vlan_tags, next) { if (vlan_tag->tag == vtag) { SLIST_REMOVE(&softc->vnic_info.vlan_tags, vlan_tag, bnxt_vlan_tag, next); free(vlan_tag, M_DEVBUF); break; } } } static int bnxt_wol_config(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); if_t ifp = iflib_get_ifp(ctx); if (!softc) return -EBUSY; if (!bnxt_wol_supported(softc)) return -ENOTSUP; if (if_getcapabilities(ifp) & IFCAP_WOL_MAGIC) { if (!softc->wol) { if (bnxt_hwrm_alloc_wol_fltr(softc)) return -EBUSY; softc->wol = 1; } } else { if (softc->wol) { if (bnxt_hwrm_free_wol_fltr(softc)) return -EBUSY; softc->wol = 0; } } return 0; } static int bnxt_shutdown(if_ctx_t ctx) { bnxt_wol_config(ctx); return 0; } static int bnxt_suspend(if_ctx_t ctx) { bnxt_wol_config(ctx); return 0; } static int bnxt_resume(if_ctx_t ctx) { struct bnxt_softc *softc = iflib_get_softc(ctx); bnxt_get_wol_settings(softc); return 0; } static int bnxt_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data) { struct bnxt_softc *softc = iflib_get_softc(ctx); struct ifreq *ifr = (struct ifreq *)data; struct ifreq_buffer *ifbuf = &ifr->ifr_ifru.ifru_buffer; struct bnxt_ioctl_header *ioh = (struct bnxt_ioctl_header *)(ifbuf->buffer); int rc = ENOTSUP; struct bnxt_ioctl_data *iod = NULL; switch (command) { case SIOCGPRIVATE_0: if ((rc = priv_check(curthread, PRIV_DRIVER)) != 0) goto exit; iod = malloc(ifbuf->length, M_DEVBUF, M_NOWAIT | M_ZERO); if (!iod) { rc = ENOMEM; goto exit; } copyin(ioh, iod, ifbuf->length); switch (ioh->type) { case BNXT_HWRM_NVM_FIND_DIR_ENTRY: { struct bnxt_ioctl_hwrm_nvm_find_dir_entry *find = &iod->find; rc = bnxt_hwrm_nvm_find_dir_entry(softc, find->type, &find->ordinal, find->ext, &find->index, find->use_index, find->search_opt, &find->data_length, &find->item_length, &find->fw_ver); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, ifbuf->length); } rc = 0; goto exit; } case BNXT_HWRM_NVM_READ: { struct bnxt_ioctl_hwrm_nvm_read *rd = &iod->read; struct iflib_dma_info dma_data; size_t offset; size_t remain; size_t csize; /* * Some HWRM versions can't read more than 0x8000 bytes */ rc = iflib_dma_alloc(softc->ctx, min(rd->length, 0x8000), &dma_data, BUS_DMA_NOWAIT); if (rc) break; for (remain = rd->length, offset = 0; remain && offset < rd->length; offset += 0x8000) { csize = min(remain, 0x8000); rc = bnxt_hwrm_nvm_read(softc, rd->index, rd->offset + offset, csize, &dma_data); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); break; } else { copyout(dma_data.idi_vaddr, rd->data + offset, csize); iod->hdr.rc = 0; } remain -= csize; } if (iod->hdr.rc == 0) copyout(iod, ioh, ifbuf->length); iflib_dma_free(&dma_data); rc = 0; goto exit; } case BNXT_HWRM_FW_RESET: { struct bnxt_ioctl_hwrm_fw_reset *rst = &iod->reset; rc = bnxt_hwrm_fw_reset(softc, rst->processor, &rst->selfreset); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, ifbuf->length); } rc = 0; goto exit; } case BNXT_HWRM_FW_QSTATUS: { struct bnxt_ioctl_hwrm_fw_qstatus *qstat = &iod->status; rc = bnxt_hwrm_fw_qstatus(softc, qstat->processor, &qstat->selfreset); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, ifbuf->length); } rc = 0; goto exit; } case BNXT_HWRM_NVM_WRITE: { struct bnxt_ioctl_hwrm_nvm_write *wr = &iod->write; rc = bnxt_hwrm_nvm_write(softc, wr->data, true, wr->type, wr->ordinal, wr->ext, wr->attr, wr->option, wr->data_length, wr->keep, &wr->item_length, &wr->index); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, ifbuf->length); } rc = 0; goto exit; } case BNXT_HWRM_NVM_ERASE_DIR_ENTRY: { struct bnxt_ioctl_hwrm_nvm_erase_dir_entry *erase = &iod->erase; rc = bnxt_hwrm_nvm_erase_dir_entry(softc, erase->index); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, ifbuf->length); } rc = 0; goto exit; } case BNXT_HWRM_NVM_GET_DIR_INFO: { struct bnxt_ioctl_hwrm_nvm_get_dir_info *info = &iod->dir_info; rc = bnxt_hwrm_nvm_get_dir_info(softc, &info->entries, &info->entry_length); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, ifbuf->length); } rc = 0; goto exit; } case BNXT_HWRM_NVM_GET_DIR_ENTRIES: { struct bnxt_ioctl_hwrm_nvm_get_dir_entries *get = &iod->dir_entries; struct iflib_dma_info dma_data; rc = iflib_dma_alloc(softc->ctx, get->max_size, &dma_data, BUS_DMA_NOWAIT); if (rc) break; rc = bnxt_hwrm_nvm_get_dir_entries(softc, &get->entries, &get->entry_length, &dma_data); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { copyout(dma_data.idi_vaddr, get->data, get->entry_length * get->entries); iod->hdr.rc = 0; copyout(iod, ioh, ifbuf->length); } iflib_dma_free(&dma_data); rc = 0; goto exit; } case BNXT_HWRM_NVM_VERIFY_UPDATE: { struct bnxt_ioctl_hwrm_nvm_verify_update *vrfy = &iod->verify; rc = bnxt_hwrm_nvm_verify_update(softc, vrfy->type, vrfy->ordinal, vrfy->ext); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, ifbuf->length); } rc = 0; goto exit; } case BNXT_HWRM_NVM_INSTALL_UPDATE: { struct bnxt_ioctl_hwrm_nvm_install_update *inst = &iod->install; rc = bnxt_hwrm_nvm_install_update(softc, inst->install_type, &inst->installed_items, &inst->result, &inst->problem_item, &inst->reset_required); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, ifbuf->length); } rc = 0; goto exit; } case BNXT_HWRM_NVM_MODIFY: { struct bnxt_ioctl_hwrm_nvm_modify *mod = &iod->modify; rc = bnxt_hwrm_nvm_modify(softc, mod->index, mod->offset, mod->data, true, mod->length); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, ifbuf->length); } rc = 0; goto exit; } case BNXT_HWRM_FW_GET_TIME: { struct bnxt_ioctl_hwrm_fw_get_time *gtm = &iod->get_time; rc = bnxt_hwrm_fw_get_time(softc, >m->year, >m->month, >m->day, >m->hour, >m->minute, >m->second, >m->millisecond, >m->zone); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, ifbuf->length); } rc = 0; goto exit; } case BNXT_HWRM_FW_SET_TIME: { struct bnxt_ioctl_hwrm_fw_set_time *stm = &iod->set_time; rc = bnxt_hwrm_fw_set_time(softc, stm->year, stm->month, stm->day, stm->hour, stm->minute, stm->second, stm->millisecond, stm->zone); if (rc) { iod->hdr.rc = rc; copyout(&iod->hdr.rc, &ioh->rc, sizeof(ioh->rc)); } else { iod->hdr.rc = 0; copyout(iod, ioh, ifbuf->length); } rc = 0; goto exit; } } break; } exit: if (iod) free(iod, M_DEVBUF); return rc; } /* * Support functions */ static int bnxt_probe_phy(struct bnxt_softc *softc) { struct bnxt_link_info *link_info = &softc->link_info; int rc = 0; rc = bnxt_update_link(softc, false); if (rc) { device_printf(softc->dev, "Probe phy can't update link (rc: %x)\n", rc); return (rc); } /*initialize the ethool setting copy with NVM settings */ if (link_info->auto_mode != HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_NONE) link_info->autoneg |= BNXT_AUTONEG_SPEED; if (link_info->auto_pause & (HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_TX | HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_RX)) { if (link_info->auto_pause == ( HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_TX | HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_RX)) link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL; link_info->req_flow_ctrl = link_info->auto_pause; } else if (link_info->force_pause & ( HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_TX | HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_RX)) { link_info->req_flow_ctrl = link_info->force_pause; } link_info->req_duplex = link_info->duplex_setting; if (link_info->autoneg & BNXT_AUTONEG_SPEED) link_info->req_link_speed = link_info->auto_link_speed; else link_info->req_link_speed = link_info->force_link_speed; return (rc); } static void bnxt_add_media_types(struct bnxt_softc *softc) { struct bnxt_link_info *link_info = &softc->link_info; uint16_t supported; uint8_t phy_type = get_phy_type(softc); supported = link_info->support_speeds; /* Auto is always supported */ ifmedia_add(softc->media, IFM_ETHER | IFM_AUTO, 0, NULL); if (softc->flags & BNXT_FLAG_NPAR) return; switch (phy_type) { case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASECR: if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_100GB) ifmedia_add(softc->media, IFM_ETHER | IFM_100G_CR4, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_50GB) ifmedia_add(softc->media, IFM_ETHER | IFM_50G_CR2, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_40GB) ifmedia_add(softc->media, IFM_ETHER | IFM_40G_CR4, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_25GB) ifmedia_add(softc->media, IFM_ETHER | IFM_25G_CR, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_10GB) ifmedia_add(softc->media, IFM_ETHER | IFM_10G_CR1, 0, NULL); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR4: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR2: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR: if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_100GB) ifmedia_add(softc->media, IFM_ETHER | IFM_100G_KR4, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_50GB) ifmedia_add(softc->media, IFM_ETHER | IFM_50G_KR2, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_40GB) ifmedia_add(softc->media, IFM_ETHER | IFM_40G_KR4, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_25GB) ifmedia_add(softc->media, IFM_ETHER | IFM_25G_KR, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_20GB) ifmedia_add(softc->media, IFM_ETHER | IFM_20G_KR2, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_10GB) ifmedia_add(softc->media, IFM_ETHER | IFM_10G_KR, 0, NULL); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASELR: if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_100GB) ifmedia_add(softc->media, IFM_ETHER | IFM_100G_LR4, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_40GB) ifmedia_add(softc->media, IFM_ETHER | IFM_40G_LR4, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_10GB) ifmedia_add(softc->media, IFM_ETHER | IFM_10G_LR, 0, NULL); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASESR: if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_100GB) ifmedia_add(softc->media, IFM_ETHER | IFM_100G_SR4, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_40GB) ifmedia_add(softc->media, IFM_ETHER | IFM_40G_SR4, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_25GB) ifmedia_add(softc->media, IFM_ETHER | IFM_25G_SR, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_10GB) ifmedia_add(softc->media, IFM_ETHER | IFM_10G_SR, 0, NULL); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKX: if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_10GB) ifmedia_add(softc->media, IFM_ETHER | IFM_10G_KX4, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_2_5GB) ifmedia_add(softc->media, IFM_ETHER | IFM_2500_KX, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_1GB) ifmedia_add(softc->media, IFM_ETHER | IFM_1000_KX, 0, NULL); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASET: case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASETE: if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_10MB) ifmedia_add(softc->media, IFM_ETHER | IFM_10_T, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_100MB) ifmedia_add(softc->media, IFM_ETHER | IFM_100_T, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_1GB) ifmedia_add(softc->media, IFM_ETHER | IFM_1000_T, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_2_5GB) ifmedia_add(softc->media, IFM_ETHER | IFM_2500_T, 0, NULL); if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_10GB) ifmedia_add(softc->media, IFM_ETHER | IFM_10G_T, 0, NULL); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_SGMIIEXTPHY: if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_1GB) ifmedia_add(softc->media, IFM_ETHER | IFM_1000_SGMII, 0, NULL); break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_UNKNOWN: default: /* * Workaround for Cumulus & Stratus * For Stratus: * media_type is being returned as 0x0 * Return support speeds as 10G, 25G, 50G & 100G * * For Cumulus: * phy_type is being returned as 0x14 (PHY_TYPE_40G_BASECR4) * Return support speeds as 1G, 10G, 25G & 50G */ if (pci_get_device(softc->dev) == BCM57454) { /* For Stratus: 10G, 25G, 50G & 100G */ ifmedia_add(softc->media, IFM_ETHER | IFM_100G_CR4, 0, NULL); ifmedia_add(softc->media, IFM_ETHER | IFM_50G_CR2, 0, NULL); ifmedia_add(softc->media, IFM_ETHER | IFM_25G_CR, 0, NULL); ifmedia_add(softc->media, IFM_ETHER | IFM_10G_CR1, 0, NULL); } else if (pci_get_device(softc->dev) == BCM57414) { /* For Cumulus: 1G, 10G, 25G & 50G */ ifmedia_add(softc->media, IFM_ETHER | IFM_50G_CR2, 0, NULL); ifmedia_add(softc->media, IFM_ETHER | IFM_25G_CR, 0, NULL); ifmedia_add(softc->media, IFM_ETHER | IFM_10G_CR1, 0, NULL); ifmedia_add(softc->media, IFM_ETHER | IFM_1000_T, 0, NULL); } break; } return; } static int bnxt_map_bar(struct bnxt_softc *softc, struct bnxt_bar_info *bar, int bar_num, bool shareable) { uint32_t flag; if (bar->res != NULL) { device_printf(softc->dev, "Bar %d already mapped\n", bar_num); return EDOOFUS; } bar->rid = PCIR_BAR(bar_num); flag = RF_ACTIVE; if (shareable) flag |= RF_SHAREABLE; if ((bar->res = bus_alloc_resource_any(softc->dev, SYS_RES_MEMORY, &bar->rid, flag)) == NULL) { device_printf(softc->dev, "PCI BAR%d mapping failure\n", bar_num); return (ENXIO); } bar->tag = rman_get_bustag(bar->res); bar->handle = rman_get_bushandle(bar->res); bar->size = rman_get_size(bar->res); return 0; } static int bnxt_pci_mapping(struct bnxt_softc *softc) { int rc; rc = bnxt_map_bar(softc, &softc->hwrm_bar, 0, true); if (rc) return rc; rc = bnxt_map_bar(softc, &softc->doorbell_bar, 2, false); return rc; } static void bnxt_pci_mapping_free(struct bnxt_softc *softc) { if (softc->hwrm_bar.res != NULL) bus_release_resource(softc->dev, SYS_RES_MEMORY, softc->hwrm_bar.rid, softc->hwrm_bar.res); softc->hwrm_bar.res = NULL; if (softc->doorbell_bar.res != NULL) bus_release_resource(softc->dev, SYS_RES_MEMORY, softc->doorbell_bar.rid, softc->doorbell_bar.res); softc->doorbell_bar.res = NULL; } static int bnxt_update_link(struct bnxt_softc *softc, bool chng_link_state) { struct bnxt_link_info *link_info = &softc->link_info; uint8_t link_up = link_info->link_up; int rc = 0; rc = bnxt_hwrm_port_phy_qcfg(softc); if (rc) goto exit; /* TODO: need to add more logic to report VF link */ if (chng_link_state) { if (link_info->phy_link_status == HWRM_PORT_PHY_QCFG_OUTPUT_LINK_LINK) link_info->link_up = 1; else link_info->link_up = 0; if (link_up != link_info->link_up) bnxt_report_link(softc); } else { /* always link down if not require to update link state */ link_info->link_up = 0; } exit: return rc; } void bnxt_report_link(struct bnxt_softc *softc) { const char *duplex = NULL, *flow_ctrl = NULL; if (softc->link_info.link_up == softc->link_info.last_link_up) { if (!softc->link_info.link_up) return; if (softc->link_info.pause == softc->link_info.last_pause && softc->link_info.duplex == softc->link_info.last_duplex) return; } if (softc->link_info.link_up) { if (softc->link_info.duplex == HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_CFG_FULL) duplex = "full duplex"; else duplex = "half duplex"; if (softc->link_info.pause == ( HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_TX | HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_RX)) flow_ctrl = "FC - receive & transmit"; else if (softc->link_info.pause == HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_TX) flow_ctrl = "FC - transmit"; else if (softc->link_info.pause == HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_RX) flow_ctrl = "FC - receive"; else flow_ctrl = "FC - none"; iflib_link_state_change(softc->ctx, LINK_STATE_UP, IF_Gbps(100)); device_printf(softc->dev, "Link is UP %s, %s - %d Mbps \n", duplex, flow_ctrl, (softc->link_info.link_speed * 100)); } else { iflib_link_state_change(softc->ctx, LINK_STATE_DOWN, bnxt_get_baudrate(&softc->link_info)); device_printf(softc->dev, "Link is Down\n"); } softc->link_info.last_link_up = softc->link_info.link_up; softc->link_info.last_pause = softc->link_info.pause; softc->link_info.last_duplex = softc->link_info.duplex; } static int bnxt_handle_rx_cp(void *arg) { struct bnxt_cp_ring *cpr = arg; /* Disable further interrupts for this queue */ BNXT_CP_DISABLE_DB(&cpr->ring); return FILTER_SCHEDULE_THREAD; } static int bnxt_handle_def_cp(void *arg) { struct bnxt_softc *softc = arg; BNXT_CP_DISABLE_DB(&softc->def_cp_ring.ring); GROUPTASK_ENQUEUE(&softc->def_cp_task); return FILTER_HANDLED; } static void bnxt_clear_ids(struct bnxt_softc *softc) { int i; softc->def_cp_ring.stats_ctx_id = HWRM_NA_SIGNATURE; softc->def_cp_ring.ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; for (i = 0; i < softc->ntxqsets; i++) { softc->tx_cp_rings[i].stats_ctx_id = HWRM_NA_SIGNATURE; softc->tx_cp_rings[i].ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->tx_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; } for (i = 0; i < softc->nrxqsets; i++) { softc->rx_cp_rings[i].stats_ctx_id = HWRM_NA_SIGNATURE; softc->rx_cp_rings[i].ring.phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->rx_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->ag_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE; softc->grp_info[i].grp_id = (uint16_t)HWRM_NA_SIGNATURE; } softc->vnic_info.filter_id = -1; softc->vnic_info.id = (uint16_t)HWRM_NA_SIGNATURE; softc->vnic_info.rss_id = (uint16_t)HWRM_NA_SIGNATURE; memset(softc->vnic_info.rss_grp_tbl.idi_vaddr, 0xff, softc->vnic_info.rss_grp_tbl.idi_size); } static void bnxt_mark_cpr_invalid(struct bnxt_cp_ring *cpr) { struct cmpl_base *cmp = (void *)cpr->ring.vaddr; int i; for (i = 0; i < cpr->ring.ring_size; i++) cmp[i].info3_v = !cpr->v_bit; } static void bnxt_handle_async_event(struct bnxt_softc *softc, struct cmpl_base *cmpl) { struct hwrm_async_event_cmpl *ae = (void *)cmpl; uint16_t async_id = le16toh(ae->event_id); struct ifmediareq ifmr; switch (async_id) { case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE: bnxt_media_status(softc->ctx, &ifmr); break; case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_MTU_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_LOAD: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_FLR: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE: case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR: device_printf(softc->dev, "Unhandled async completion type %u\n", async_id); break; default: device_printf(softc->dev, "Unknown async completion type %u\n", async_id); break; } } static void bnxt_def_cp_task(void *context) { if_ctx_t ctx = context; struct bnxt_softc *softc = iflib_get_softc(ctx); struct bnxt_cp_ring *cpr = &softc->def_cp_ring; /* Handle completions on the default completion ring */ struct cmpl_base *cmpl; uint32_t cons = cpr->cons; bool v_bit = cpr->v_bit; bool last_v_bit; uint32_t last_cons; uint16_t type; for (;;) { last_cons = cons; last_v_bit = v_bit; NEXT_CP_CONS_V(&cpr->ring, cons, v_bit); cmpl = &((struct cmpl_base *)cpr->ring.vaddr)[cons]; if (!CMP_VALID(cmpl, v_bit)) break; type = le16toh(cmpl->type) & CMPL_BASE_TYPE_MASK; switch (type) { case CMPL_BASE_TYPE_HWRM_ASYNC_EVENT: bnxt_handle_async_event(softc, cmpl); break; case CMPL_BASE_TYPE_TX_L2: case CMPL_BASE_TYPE_RX_L2: case CMPL_BASE_TYPE_RX_AGG: case CMPL_BASE_TYPE_RX_TPA_START: case CMPL_BASE_TYPE_RX_TPA_END: case CMPL_BASE_TYPE_STAT_EJECT: case CMPL_BASE_TYPE_HWRM_DONE: case CMPL_BASE_TYPE_HWRM_FWD_REQ: case CMPL_BASE_TYPE_HWRM_FWD_RESP: case CMPL_BASE_TYPE_CQ_NOTIFICATION: case CMPL_BASE_TYPE_SRQ_EVENT: case CMPL_BASE_TYPE_DBQ_EVENT: case CMPL_BASE_TYPE_QP_EVENT: case CMPL_BASE_TYPE_FUNC_EVENT: device_printf(softc->dev, "Unhandled completion type %u\n", type); break; default: device_printf(softc->dev, "Unknown completion type %u\n", type); break; } } cpr->cons = last_cons; cpr->v_bit = last_v_bit; BNXT_CP_IDX_ENABLE_DB(&cpr->ring, cpr->cons); } static uint8_t get_phy_type(struct bnxt_softc *softc) { struct bnxt_link_info *link_info = &softc->link_info; uint8_t phy_type = link_info->phy_type; uint16_t supported; if (phy_type != HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_UNKNOWN) return phy_type; /* Deduce the phy type from the media type and supported speeds */ supported = link_info->support_speeds; if (link_info->media_type == HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_TP) return HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASET; if (link_info->media_type == HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_DAC) { if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_2_5GB) return HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKX; if (supported & HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_20GB) return HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR; return HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASECR; } if (link_info->media_type == HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_FIBRE) return HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASESR; return phy_type; } bool bnxt_check_hwrm_version(struct bnxt_softc *softc) { char buf[16]; sprintf(buf, "%hhu.%hhu.%hhu", softc->ver_info->hwrm_min_major, softc->ver_info->hwrm_min_minor, softc->ver_info->hwrm_min_update); if (softc->ver_info->hwrm_min_major > softc->ver_info->hwrm_if_major) { device_printf(softc->dev, "WARNING: HWRM version %s is too old (older than %s)\n", softc->ver_info->hwrm_if_ver, buf); return false; } else if(softc->ver_info->hwrm_min_major == softc->ver_info->hwrm_if_major) { if (softc->ver_info->hwrm_min_minor > softc->ver_info->hwrm_if_minor) { device_printf(softc->dev, "WARNING: HWRM version %s is too old (older than %s)\n", softc->ver_info->hwrm_if_ver, buf); return false; } else if (softc->ver_info->hwrm_min_minor == softc->ver_info->hwrm_if_minor) { if (softc->ver_info->hwrm_min_update > softc->ver_info->hwrm_if_update) { device_printf(softc->dev, "WARNING: HWRM version %s is too old (older than %s)\n", softc->ver_info->hwrm_if_ver, buf); return false; } } } return true; } static uint64_t bnxt_get_baudrate(struct bnxt_link_info *link) { switch (link->link_speed) { case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_100MB: return IF_Mbps(100); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_1GB: return IF_Gbps(1); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_2GB: return IF_Gbps(2); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_2_5GB: return IF_Mbps(2500); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_10GB: return IF_Gbps(10); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_20GB: return IF_Gbps(20); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_25GB: return IF_Gbps(25); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_40GB: return IF_Gbps(40); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_50GB: return IF_Gbps(50); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_100GB: return IF_Gbps(100); case HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_10MB: return IF_Mbps(10); } return IF_Gbps(100); } static void bnxt_get_wol_settings(struct bnxt_softc *softc) { uint16_t wol_handle = 0; if (!bnxt_wol_supported(softc)) return; do { wol_handle = bnxt_hwrm_get_wol_fltrs(softc, wol_handle); } while (wol_handle && wol_handle != BNXT_NO_MORE_WOL_FILTERS); } Index: head/sys/dev/e1000/e1000_80003es2lan.c =================================================================== --- head/sys/dev/e1000/e1000_80003es2lan.c (revision 323515) +++ head/sys/dev/e1000/e1000_80003es2lan.c (revision 323516) @@ -1,1526 +1,1453 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ /* 80003ES2LAN Gigabit Ethernet Controller (Copper) * 80003ES2LAN Gigabit Ethernet Controller (Serdes) */ #include "e1000_api.h" static s32 e1000_acquire_phy_80003es2lan(struct e1000_hw *hw); static void e1000_release_phy_80003es2lan(struct e1000_hw *hw); static s32 e1000_acquire_nvm_80003es2lan(struct e1000_hw *hw); static void e1000_release_nvm_80003es2lan(struct e1000_hw *hw); static s32 e1000_read_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, u32 offset, u16 *data); static s32 e1000_write_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, u32 offset, u16 data); static s32 e1000_write_nvm_80003es2lan(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); static s32 e1000_get_cfg_done_80003es2lan(struct e1000_hw *hw); static s32 e1000_phy_force_speed_duplex_80003es2lan(struct e1000_hw *hw); static s32 e1000_get_cable_length_80003es2lan(struct e1000_hw *hw); static s32 e1000_get_link_up_info_80003es2lan(struct e1000_hw *hw, u16 *speed, u16 *duplex); static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw); static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw); static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw); static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw); -static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask); static s32 e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex); static s32 e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw); static s32 e1000_cfg_on_link_up_80003es2lan(struct e1000_hw *hw); static s32 e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, u16 *data); static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, u16 data); static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw); -static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask); static s32 e1000_read_mac_addr_80003es2lan(struct e1000_hw *hw); static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw); /* A table for the GG82563 cable length where the range is defined * with a lower bound at "index" and the upper bound at * "index + 5". */ static const u16 e1000_gg82563_cable_length_table[] = { 0, 60, 115, 150, 150, 60, 115, 150, 180, 180, 0xFF }; #define GG82563_CABLE_LENGTH_TABLE_SIZE \ (sizeof(e1000_gg82563_cable_length_table) / \ sizeof(e1000_gg82563_cable_length_table[0])) /** * e1000_init_phy_params_80003es2lan - Init ESB2 PHY func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_phy_params_80003es2lan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; DEBUGFUNC("e1000_init_phy_params_80003es2lan"); if (hw->phy.media_type != e1000_media_type_copper) { phy->type = e1000_phy_none; return E1000_SUCCESS; } else { phy->ops.power_up = e1000_power_up_phy_copper; phy->ops.power_down = e1000_power_down_phy_copper_80003es2lan; } phy->addr = 1; phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; phy->reset_delay_us = 100; phy->type = e1000_phy_gg82563; phy->ops.acquire = e1000_acquire_phy_80003es2lan; phy->ops.check_polarity = e1000_check_polarity_m88; phy->ops.check_reset_block = e1000_check_reset_block_generic; phy->ops.commit = e1000_phy_sw_reset_generic; phy->ops.get_cfg_done = e1000_get_cfg_done_80003es2lan; phy->ops.get_info = e1000_get_phy_info_m88; phy->ops.release = e1000_release_phy_80003es2lan; phy->ops.reset = e1000_phy_hw_reset_generic; phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_generic; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_80003es2lan; phy->ops.get_cable_length = e1000_get_cable_length_80003es2lan; phy->ops.read_reg = e1000_read_phy_reg_gg82563_80003es2lan; phy->ops.write_reg = e1000_write_phy_reg_gg82563_80003es2lan; phy->ops.cfg_on_link_up = e1000_cfg_on_link_up_80003es2lan; /* This can only be done after all function pointers are setup. */ ret_val = e1000_get_phy_id(hw); /* Verify phy id */ if (phy->id != GG82563_E_PHY_ID) return -E1000_ERR_PHY; return ret_val; } /** * e1000_init_nvm_params_80003es2lan - Init ESB2 NVM func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_nvm_params_80003es2lan(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; u32 eecd = E1000_READ_REG(hw, E1000_EECD); u16 size; DEBUGFUNC("e1000_init_nvm_params_80003es2lan"); nvm->opcode_bits = 8; nvm->delay_usec = 1; switch (nvm->override) { case e1000_nvm_override_spi_large: nvm->page_size = 32; nvm->address_bits = 16; break; case e1000_nvm_override_spi_small: nvm->page_size = 8; nvm->address_bits = 8; break; default: nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8; nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? 16 : 8; break; } nvm->type = e1000_nvm_eeprom_spi; size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> E1000_EECD_SIZE_EX_SHIFT); /* Added to a constant, "size" becomes the left-shift value * for setting word_size. */ size += NVM_WORD_SIZE_BASE_SHIFT; /* EEPROM access above 16k is unsupported */ if (size > 14) size = 14; nvm->word_size = 1 << size; /* Function Pointers */ nvm->ops.acquire = e1000_acquire_nvm_80003es2lan; nvm->ops.read = e1000_read_nvm_eerd; nvm->ops.release = e1000_release_nvm_80003es2lan; nvm->ops.update = e1000_update_nvm_checksum_generic; nvm->ops.valid_led_default = e1000_valid_led_default_generic; nvm->ops.validate = e1000_validate_nvm_checksum_generic; nvm->ops.write = e1000_write_nvm_80003es2lan; return E1000_SUCCESS; } /** * e1000_init_mac_params_80003es2lan - Init ESB2 MAC func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_mac_params_80003es2lan(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; DEBUGFUNC("e1000_init_mac_params_80003es2lan"); /* Set media type and media-dependent function pointers */ switch (hw->device_id) { case E1000_DEV_ID_80003ES2LAN_SERDES_DPT: hw->phy.media_type = e1000_media_type_internal_serdes; mac->ops.check_for_link = e1000_check_for_serdes_link_generic; mac->ops.setup_physical_interface = e1000_setup_fiber_serdes_link_generic; break; default: hw->phy.media_type = e1000_media_type_copper; mac->ops.check_for_link = e1000_check_for_copper_link_generic; mac->ops.setup_physical_interface = e1000_setup_copper_link_80003es2lan; break; } /* Set mta register count */ mac->mta_reg_count = 128; /* Set rar entry count */ mac->rar_entry_count = E1000_RAR_ENTRIES; /* Set if part includes ASF firmware */ mac->asf_firmware_present = TRUE; /* FWSM register */ mac->has_fwsm = TRUE; /* ARC supported; valid only if manageability features are enabled. */ mac->arc_subsystem_valid = !!(E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK); /* Adaptive IFS not supported */ mac->adaptive_ifs = FALSE; /* Function pointers */ /* bus type/speed/width */ mac->ops.get_bus_info = e1000_get_bus_info_pcie_generic; /* reset */ mac->ops.reset_hw = e1000_reset_hw_80003es2lan; /* hw initialization */ mac->ops.init_hw = e1000_init_hw_80003es2lan; /* link setup */ mac->ops.setup_link = e1000_setup_link_generic; /* check management mode */ mac->ops.check_mng_mode = e1000_check_mng_mode_generic; /* multicast address update */ mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic; /* writing VFTA */ mac->ops.write_vfta = e1000_write_vfta_generic; /* clearing VFTA */ mac->ops.clear_vfta = e1000_clear_vfta_generic; /* read mac address */ mac->ops.read_mac_addr = e1000_read_mac_addr_80003es2lan; /* ID LED init */ mac->ops.id_led_init = e1000_id_led_init_generic; /* blink LED */ mac->ops.blink_led = e1000_blink_led_generic; /* setup LED */ mac->ops.setup_led = e1000_setup_led_generic; /* cleanup LED */ mac->ops.cleanup_led = e1000_cleanup_led_generic; /* turn on/off LED */ mac->ops.led_on = e1000_led_on_generic; mac->ops.led_off = e1000_led_off_generic; /* clear hardware counters */ mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_80003es2lan; /* link info */ mac->ops.get_link_up_info = e1000_get_link_up_info_80003es2lan; /* set lan id for port to determine which phy lock to use */ hw->mac.ops.set_lan_id(hw); return E1000_SUCCESS; } /** * e1000_init_function_pointers_80003es2lan - Init ESB2 func ptrs. * @hw: pointer to the HW structure * * Called to initialize all function pointers and parameters. **/ void e1000_init_function_pointers_80003es2lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_init_function_pointers_80003es2lan"); hw->mac.ops.init_params = e1000_init_mac_params_80003es2lan; hw->nvm.ops.init_params = e1000_init_nvm_params_80003es2lan; hw->phy.ops.init_params = e1000_init_phy_params_80003es2lan; } /** * e1000_acquire_phy_80003es2lan - Acquire rights to access PHY * @hw: pointer to the HW structure * * A wrapper to acquire access rights to the correct PHY. **/ static s32 e1000_acquire_phy_80003es2lan(struct e1000_hw *hw) { u16 mask; DEBUGFUNC("e1000_acquire_phy_80003es2lan"); mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM; - return e1000_acquire_swfw_sync_80003es2lan(hw, mask); + return e1000_acquire_swfw_sync(hw, mask); } /** * e1000_release_phy_80003es2lan - Release rights to access PHY * @hw: pointer to the HW structure * * A wrapper to release access rights to the correct PHY. **/ static void e1000_release_phy_80003es2lan(struct e1000_hw *hw) { u16 mask; DEBUGFUNC("e1000_release_phy_80003es2lan"); mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM; - e1000_release_swfw_sync_80003es2lan(hw, mask); + e1000_release_swfw_sync(hw, mask); } /** * e1000_acquire_mac_csr_80003es2lan - Acquire right to access Kumeran register * @hw: pointer to the HW structure * * Acquire the semaphore to access the Kumeran interface. * **/ static s32 e1000_acquire_mac_csr_80003es2lan(struct e1000_hw *hw) { u16 mask; DEBUGFUNC("e1000_acquire_mac_csr_80003es2lan"); mask = E1000_SWFW_CSR_SM; - return e1000_acquire_swfw_sync_80003es2lan(hw, mask); + return e1000_acquire_swfw_sync(hw, mask); } /** * e1000_release_mac_csr_80003es2lan - Release right to access Kumeran Register * @hw: pointer to the HW structure * * Release the semaphore used to access the Kumeran interface **/ static void e1000_release_mac_csr_80003es2lan(struct e1000_hw *hw) { u16 mask; DEBUGFUNC("e1000_release_mac_csr_80003es2lan"); mask = E1000_SWFW_CSR_SM; - e1000_release_swfw_sync_80003es2lan(hw, mask); + e1000_release_swfw_sync(hw, mask); } /** * e1000_acquire_nvm_80003es2lan - Acquire rights to access NVM * @hw: pointer to the HW structure * * Acquire the semaphore to access the EEPROM. **/ static s32 e1000_acquire_nvm_80003es2lan(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_acquire_nvm_80003es2lan"); - ret_val = e1000_acquire_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); + ret_val = e1000_acquire_swfw_sync(hw, E1000_SWFW_EEP_SM); if (ret_val) return ret_val; ret_val = e1000_acquire_nvm_generic(hw); if (ret_val) - e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); + e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM); return ret_val; } /** * e1000_release_nvm_80003es2lan - Relinquish rights to access NVM * @hw: pointer to the HW structure * * Release the semaphore used to access the EEPROM. **/ static void e1000_release_nvm_80003es2lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_release_nvm_80003es2lan"); e1000_release_nvm_generic(hw); - e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); -} - -/** - * e1000_acquire_swfw_sync_80003es2lan - Acquire SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Acquire the SW/FW semaphore to access the PHY or NVM. The mask - * will also specify which port we're acquiring the lock for. - **/ -static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - u32 swmask = mask; - u32 fwmask = mask << 16; - s32 i = 0; - s32 timeout = 50; - - DEBUGFUNC("e1000_acquire_swfw_sync_80003es2lan"); - - while (i < timeout) { - if (e1000_get_hw_semaphore_generic(hw)) - return -E1000_ERR_SWFW_SYNC; - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - if (!(swfw_sync & (fwmask | swmask))) - break; - - /* Firmware currently using resource (fwmask) - * or other software thread using resource (swmask) - */ - e1000_put_hw_semaphore_generic(hw); - msec_delay_irq(5); - i++; - } - - if (i == timeout) { - DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); - return -E1000_ERR_SWFW_SYNC; - } - - swfw_sync |= swmask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore_generic(hw); - - return E1000_SUCCESS; -} - -/** - * e1000_release_swfw_sync_80003es2lan - Release SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Release the SW/FW semaphore used to access the PHY or NVM. The mask - * will also specify which port we're releasing the lock for. - **/ -static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - - DEBUGFUNC("e1000_release_swfw_sync_80003es2lan"); - - while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS) - ; /* Empty */ - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - swfw_sync &= ~mask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore_generic(hw); + e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM); } /** * e1000_read_phy_reg_gg82563_80003es2lan - Read GG82563 PHY register * @hw: pointer to the HW structure * @offset: offset of the register to read * @data: pointer to the data returned from the operation * * Read the GG82563 PHY register. **/ static s32 e1000_read_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, u32 offset, u16 *data) { s32 ret_val; u32 page_select; u16 temp; DEBUGFUNC("e1000_read_phy_reg_gg82563_80003es2lan"); ret_val = e1000_acquire_phy_80003es2lan(hw); if (ret_val) return ret_val; /* Select Configuration Page */ if ((offset & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) { page_select = GG82563_PHY_PAGE_SELECT; } else { /* Use Alternative Page Select register to access * registers 30 and 31 */ page_select = GG82563_PHY_PAGE_SELECT_ALT; } temp = (u16)((u16)offset >> GG82563_PAGE_SHIFT); ret_val = e1000_write_phy_reg_mdic(hw, page_select, temp); if (ret_val) { e1000_release_phy_80003es2lan(hw); return ret_val; } if (hw->dev_spec._80003es2lan.mdic_wa_enable) { /* The "ready" bit in the MDIC register may be incorrectly set * before the device has completed the "Page Select" MDI * transaction. So we wait 200us after each MDI command... */ usec_delay(200); /* ...and verify the command was successful. */ ret_val = e1000_read_phy_reg_mdic(hw, page_select, &temp); if (((u16)offset >> GG82563_PAGE_SHIFT) != temp) { e1000_release_phy_80003es2lan(hw); return -E1000_ERR_PHY; } usec_delay(200); ret_val = e1000_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, data); usec_delay(200); } else { ret_val = e1000_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, data); } e1000_release_phy_80003es2lan(hw); return ret_val; } /** * e1000_write_phy_reg_gg82563_80003es2lan - Write GG82563 PHY register * @hw: pointer to the HW structure * @offset: offset of the register to read * @data: value to write to the register * * Write to the GG82563 PHY register. **/ static s32 e1000_write_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw, u32 offset, u16 data) { s32 ret_val; u32 page_select; u16 temp; DEBUGFUNC("e1000_write_phy_reg_gg82563_80003es2lan"); ret_val = e1000_acquire_phy_80003es2lan(hw); if (ret_val) return ret_val; /* Select Configuration Page */ if ((offset & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) { page_select = GG82563_PHY_PAGE_SELECT; } else { /* Use Alternative Page Select register to access * registers 30 and 31 */ page_select = GG82563_PHY_PAGE_SELECT_ALT; } temp = (u16)((u16)offset >> GG82563_PAGE_SHIFT); ret_val = e1000_write_phy_reg_mdic(hw, page_select, temp); if (ret_val) { e1000_release_phy_80003es2lan(hw); return ret_val; } if (hw->dev_spec._80003es2lan.mdic_wa_enable) { /* The "ready" bit in the MDIC register may be incorrectly set * before the device has completed the "Page Select" MDI * transaction. So we wait 200us after each MDI command... */ usec_delay(200); /* ...and verify the command was successful. */ ret_val = e1000_read_phy_reg_mdic(hw, page_select, &temp); if (((u16)offset >> GG82563_PAGE_SHIFT) != temp) { e1000_release_phy_80003es2lan(hw); return -E1000_ERR_PHY; } usec_delay(200); ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, data); usec_delay(200); } else { ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, data); } e1000_release_phy_80003es2lan(hw); return ret_val; } /** * e1000_write_nvm_80003es2lan - Write to ESB2 NVM * @hw: pointer to the HW structure * @offset: offset of the register to read * @words: number of words to write * @data: buffer of data to write to the NVM * * Write "words" of data to the ESB2 NVM. **/ static s32 e1000_write_nvm_80003es2lan(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { DEBUGFUNC("e1000_write_nvm_80003es2lan"); return e1000_write_nvm_spi(hw, offset, words, data); } /** * e1000_get_cfg_done_80003es2lan - Wait for configuration to complete * @hw: pointer to the HW structure * * Wait a specific amount of time for manageability processes to complete. * This is a function pointer entry point called by the phy module. **/ static s32 e1000_get_cfg_done_80003es2lan(struct e1000_hw *hw) { s32 timeout = PHY_CFG_TIMEOUT; u32 mask = E1000_NVM_CFG_DONE_PORT_0; DEBUGFUNC("e1000_get_cfg_done_80003es2lan"); if (hw->bus.func == 1) mask = E1000_NVM_CFG_DONE_PORT_1; while (timeout) { if (E1000_READ_REG(hw, E1000_EEMNGCTL) & mask) break; msec_delay(1); timeout--; } if (!timeout) { DEBUGOUT("MNG configuration cycle has not completed.\n"); return -E1000_ERR_RESET; } return E1000_SUCCESS; } /** * e1000_phy_force_speed_duplex_80003es2lan - Force PHY speed and duplex * @hw: pointer to the HW structure * * Force the speed and duplex settings onto the PHY. This is a * function pointer entry point called by the phy module. **/ static s32 e1000_phy_force_speed_duplex_80003es2lan(struct e1000_hw *hw) { s32 ret_val; u16 phy_data; bool link; DEBUGFUNC("e1000_phy_force_speed_duplex_80003es2lan"); if (!(hw->phy.ops.read_reg)) return E1000_SUCCESS; /* Clear Auto-Crossover to force MDI manually. M88E1000 requires MDI * forced whenever speed and duplex are forced. */ ret_val = hw->phy.ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; phy_data &= ~GG82563_PSCR_CROSSOVER_MODE_AUTO; ret_val = hw->phy.ops.write_reg(hw, GG82563_PHY_SPEC_CTRL, phy_data); if (ret_val) return ret_val; DEBUGOUT1("GG82563 PSCR: %X\n", phy_data); ret_val = hw->phy.ops.read_reg(hw, PHY_CONTROL, &phy_data); if (ret_val) return ret_val; e1000_phy_force_speed_duplex_setup(hw, &phy_data); /* Reset the phy to commit changes. */ phy_data |= MII_CR_RESET; ret_val = hw->phy.ops.write_reg(hw, PHY_CONTROL, phy_data); if (ret_val) return ret_val; usec_delay(1); if (hw->phy.autoneg_wait_to_complete) { DEBUGOUT("Waiting for forced speed/duplex link on GG82563 phy.\n"); ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, 100000, &link); if (ret_val) return ret_val; if (!link) { /* We didn't get link. * Reset the DSP and cross our fingers. */ ret_val = e1000_phy_reset_dsp_generic(hw); if (ret_val) return ret_val; } /* Try once more */ ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, 100000, &link); if (ret_val) return ret_val; } ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, &phy_data); if (ret_val) return ret_val; /* Resetting the phy means we need to verify the TX_CLK corresponds * to the link speed. 10Mbps -> 2.5MHz, else 25MHz. */ phy_data &= ~GG82563_MSCR_TX_CLK_MASK; if (hw->mac.forced_speed_duplex & E1000_ALL_10_SPEED) phy_data |= GG82563_MSCR_TX_CLK_10MBPS_2_5; else phy_data |= GG82563_MSCR_TX_CLK_100MBPS_25; /* In addition, we must re-enable CRS on Tx for both half and full * duplex. */ phy_data |= GG82563_MSCR_ASSERT_CRS_ON_TX; ret_val = hw->phy.ops.write_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, phy_data); return ret_val; } /** * e1000_get_cable_length_80003es2lan - Set approximate cable length * @hw: pointer to the HW structure * * Find the approximate cable length as measured by the GG82563 PHY. * This is a function pointer entry point called by the phy module. **/ static s32 e1000_get_cable_length_80003es2lan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 phy_data, index; DEBUGFUNC("e1000_get_cable_length_80003es2lan"); if (!(hw->phy.ops.read_reg)) return E1000_SUCCESS; ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_DSP_DISTANCE, &phy_data); if (ret_val) return ret_val; index = phy_data & GG82563_DSPD_CABLE_LENGTH; if (index >= GG82563_CABLE_LENGTH_TABLE_SIZE - 5) return -E1000_ERR_PHY; phy->min_cable_length = e1000_gg82563_cable_length_table[index]; phy->max_cable_length = e1000_gg82563_cable_length_table[index + 5]; phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; return E1000_SUCCESS; } /** * e1000_get_link_up_info_80003es2lan - Report speed and duplex * @hw: pointer to the HW structure * @speed: pointer to speed buffer * @duplex: pointer to duplex buffer * * Retrieve the current speed and duplex configuration. **/ static s32 e1000_get_link_up_info_80003es2lan(struct e1000_hw *hw, u16 *speed, u16 *duplex) { s32 ret_val; DEBUGFUNC("e1000_get_link_up_info_80003es2lan"); if (hw->phy.media_type == e1000_media_type_copper) { ret_val = e1000_get_speed_and_duplex_copper_generic(hw, speed, duplex); hw->phy.ops.cfg_on_link_up(hw); } else { ret_val = e1000_get_speed_and_duplex_fiber_serdes_generic(hw, speed, duplex); } return ret_val; } /** * e1000_reset_hw_80003es2lan - Reset the ESB2 controller * @hw: pointer to the HW structure * * Perform a global reset to the ESB2 controller. **/ static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; u16 kum_reg_data; DEBUGFUNC("e1000_reset_hw_80003es2lan"); /* Prevent the PCI-E bus from sticking if there is no TLP connection * on the last TLP read/write transaction when MAC is reset. */ ret_val = e1000_disable_pcie_master_generic(hw); if (ret_val) DEBUGOUT("PCI-E Master disable polling has failed.\n"); DEBUGOUT("Masking off all interrupts\n"); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_WRITE_REG(hw, E1000_RCTL, 0); E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); E1000_WRITE_FLUSH(hw); msec_delay(10); ctrl = E1000_READ_REG(hw, E1000_CTRL); ret_val = e1000_acquire_phy_80003es2lan(hw); if (ret_val) return ret_val; DEBUGOUT("Issuing a global reset to MAC\n"); E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); e1000_release_phy_80003es2lan(hw); /* Disable IBIST slave mode (far-end loopback) */ ret_val = e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, &kum_reg_data); if (!ret_val) { kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE; ret_val = e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, kum_reg_data); if (ret_val) DEBUGOUT("Error disabling far-end loopback\n"); } else DEBUGOUT("Error disabling far-end loopback\n"); ret_val = e1000_get_auto_rd_done_generic(hw); if (ret_val) /* We don't want to continue accessing MAC registers. */ return ret_val; /* Clear any pending interrupt events. */ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_READ_REG(hw, E1000_ICR); return e1000_check_alt_mac_addr_generic(hw); } /** * e1000_init_hw_80003es2lan - Initialize the ESB2 controller * @hw: pointer to the HW structure * * Initialize the hw bits, LED, VFTA, MTA, link and hw counters. **/ static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 reg_data; s32 ret_val; u16 kum_reg_data; u16 i; DEBUGFUNC("e1000_init_hw_80003es2lan"); e1000_initialize_hw_bits_80003es2lan(hw); /* Initialize identification LED */ ret_val = mac->ops.id_led_init(hw); /* An error is not fatal and we should not stop init due to this */ if (ret_val) DEBUGOUT("Error initializing identification LED\n"); /* Disabling VLAN filtering */ DEBUGOUT("Initializing the IEEE VLAN\n"); mac->ops.clear_vfta(hw); /* Setup the receive address. */ e1000_init_rx_addrs_generic(hw, mac->rar_entry_count); /* Zero out the Multicast HASH table */ DEBUGOUT("Zeroing the MTA\n"); for (i = 0; i < mac->mta_reg_count; i++) E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); /* Setup link and flow control */ ret_val = mac->ops.setup_link(hw); if (ret_val) return ret_val; /* Disable IBIST slave mode (far-end loopback) */ ret_val = e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, &kum_reg_data); if (!ret_val) { kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE; ret_val = e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, kum_reg_data); if (ret_val) DEBUGOUT("Error disabling far-end loopback\n"); } else DEBUGOUT("Error disabling far-end loopback\n"); /* Set the transmit descriptor write-back policy */ reg_data = E1000_READ_REG(hw, E1000_TXDCTL(0)); reg_data = ((reg_data & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC); E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg_data); /* ...for both queues. */ reg_data = E1000_READ_REG(hw, E1000_TXDCTL(1)); reg_data = ((reg_data & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC); E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg_data); /* Enable retransmit on late collisions */ reg_data = E1000_READ_REG(hw, E1000_TCTL); reg_data |= E1000_TCTL_RTLC; E1000_WRITE_REG(hw, E1000_TCTL, reg_data); /* Configure Gigabit Carry Extend Padding */ reg_data = E1000_READ_REG(hw, E1000_TCTL_EXT); reg_data &= ~E1000_TCTL_EXT_GCEX_MASK; reg_data |= DEFAULT_TCTL_EXT_GCEX_80003ES2LAN; E1000_WRITE_REG(hw, E1000_TCTL_EXT, reg_data); /* Configure Transmit Inter-Packet Gap */ reg_data = E1000_READ_REG(hw, E1000_TIPG); reg_data &= ~E1000_TIPG_IPGT_MASK; reg_data |= DEFAULT_TIPG_IPGT_1000_80003ES2LAN; E1000_WRITE_REG(hw, E1000_TIPG, reg_data); reg_data = E1000_READ_REG_ARRAY(hw, E1000_FFLT, 0x0001); reg_data &= ~0x00100000; E1000_WRITE_REG_ARRAY(hw, E1000_FFLT, 0x0001, reg_data); /* default to TRUE to enable the MDIC W/A */ hw->dev_spec._80003es2lan.mdic_wa_enable = TRUE; ret_val = e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_OFFSET >> E1000_KMRNCTRLSTA_OFFSET_SHIFT, &i); if (!ret_val) { if ((i & E1000_KMRNCTRLSTA_OPMODE_MASK) == E1000_KMRNCTRLSTA_OPMODE_INBAND_MDIO) hw->dev_spec._80003es2lan.mdic_wa_enable = FALSE; } /* Clear all of the statistics registers (clear on read). It is * important that we do this after we have tried to establish link * because the symbol error count will increment wildly if there * is no link. */ e1000_clear_hw_cntrs_80003es2lan(hw); return ret_val; } /** * e1000_initialize_hw_bits_80003es2lan - Init hw bits of ESB2 * @hw: pointer to the HW structure * * Initializes required hardware-dependent bits needed for normal operation. **/ static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw) { u32 reg; DEBUGFUNC("e1000_initialize_hw_bits_80003es2lan"); /* Transmit Descriptor Control 0 */ reg = E1000_READ_REG(hw, E1000_TXDCTL(0)); reg |= (1 << 22); E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg); /* Transmit Descriptor Control 1 */ reg = E1000_READ_REG(hw, E1000_TXDCTL(1)); reg |= (1 << 22); E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg); /* Transmit Arbitration Control 0 */ reg = E1000_READ_REG(hw, E1000_TARC(0)); reg &= ~(0xF << 27); /* 30:27 */ if (hw->phy.media_type != e1000_media_type_copper) reg &= ~(1 << 20); E1000_WRITE_REG(hw, E1000_TARC(0), reg); /* Transmit Arbitration Control 1 */ reg = E1000_READ_REG(hw, E1000_TARC(1)); if (E1000_READ_REG(hw, E1000_TCTL) & E1000_TCTL_MULR) reg &= ~(1 << 28); else reg |= (1 << 28); E1000_WRITE_REG(hw, E1000_TARC(1), reg); /* Disable IPv6 extension header parsing because some malformed * IPv6 headers can hang the Rx. */ reg = E1000_READ_REG(hw, E1000_RFCTL); reg |= (E1000_RFCTL_IPV6_EX_DIS | E1000_RFCTL_NEW_IPV6_EXT_DIS); E1000_WRITE_REG(hw, E1000_RFCTL, reg); return; } /** * e1000_copper_link_setup_gg82563_80003es2lan - Configure GG82563 Link * @hw: pointer to the HW structure * * Setup some GG82563 PHY registers for obtaining link **/ static s32 e1000_copper_link_setup_gg82563_80003es2lan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u32 reg; u16 data; DEBUGFUNC("e1000_copper_link_setup_gg82563_80003es2lan"); ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, &data); if (ret_val) return ret_val; data |= GG82563_MSCR_ASSERT_CRS_ON_TX; /* Use 25MHz for both link down and 1000Base-T for Tx clock. */ data |= GG82563_MSCR_TX_CLK_1000MBPS_25; ret_val = hw->phy.ops.write_reg(hw, GG82563_PHY_MAC_SPEC_CTRL, data); if (ret_val) return ret_val; /* Options: * MDI/MDI-X = 0 (default) * 0 - Auto for all speeds * 1 - MDI mode * 2 - MDI-X mode * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) */ ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_SPEC_CTRL, &data); if (ret_val) return ret_val; data &= ~GG82563_PSCR_CROSSOVER_MODE_MASK; switch (phy->mdix) { case 1: data |= GG82563_PSCR_CROSSOVER_MODE_MDI; break; case 2: data |= GG82563_PSCR_CROSSOVER_MODE_MDIX; break; case 0: default: data |= GG82563_PSCR_CROSSOVER_MODE_AUTO; break; } /* Options: * disable_polarity_correction = 0 (default) * Automatic Correction for Reversed Cable Polarity * 0 - Disabled * 1 - Enabled */ data &= ~GG82563_PSCR_POLARITY_REVERSAL_DISABLE; if (phy->disable_polarity_correction) data |= GG82563_PSCR_POLARITY_REVERSAL_DISABLE; ret_val = hw->phy.ops.write_reg(hw, GG82563_PHY_SPEC_CTRL, data); if (ret_val) return ret_val; /* SW Reset the PHY so all changes take effect */ ret_val = hw->phy.ops.commit(hw); if (ret_val) { DEBUGOUT("Error Resetting the PHY\n"); return ret_val; } /* Bypass Rx and Tx FIFO's */ reg = E1000_KMRNCTRLSTA_OFFSET_FIFO_CTRL; data = (E1000_KMRNCTRLSTA_FIFO_CTRL_RX_BYPASS | E1000_KMRNCTRLSTA_FIFO_CTRL_TX_BYPASS); ret_val = e1000_write_kmrn_reg_80003es2lan(hw, reg, data); if (ret_val) return ret_val; reg = E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE; ret_val = e1000_read_kmrn_reg_80003es2lan(hw, reg, &data); if (ret_val) return ret_val; data |= E1000_KMRNCTRLSTA_OPMODE_E_IDLE; ret_val = e1000_write_kmrn_reg_80003es2lan(hw, reg, data); if (ret_val) return ret_val; ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_SPEC_CTRL_2, &data); if (ret_val) return ret_val; data &= ~GG82563_PSCR2_REVERSE_AUTO_NEG; ret_val = hw->phy.ops.write_reg(hw, GG82563_PHY_SPEC_CTRL_2, data); if (ret_val) return ret_val; reg = E1000_READ_REG(hw, E1000_CTRL_EXT); reg &= ~E1000_CTRL_EXT_LINK_MODE_MASK; E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_PWR_MGMT_CTRL, &data); if (ret_val) return ret_val; /* Do not init these registers when the HW is in IAMT mode, since the * firmware will have already initialized them. We only initialize * them if the HW is not in IAMT mode. */ if (!hw->mac.ops.check_mng_mode(hw)) { /* Enable Electrical Idle on the PHY */ data |= GG82563_PMCR_ENABLE_ELECTRICAL_IDLE; ret_val = hw->phy.ops.write_reg(hw, GG82563_PHY_PWR_MGMT_CTRL, data); if (ret_val) return ret_val; ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, &data); if (ret_val) return ret_val; data &= ~GG82563_KMCR_PASS_FALSE_CARRIER; ret_val = hw->phy.ops.write_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, data); if (ret_val) return ret_val; } /* Workaround: Disable padding in Kumeran interface in the MAC * and in the PHY to avoid CRC errors. */ ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_INBAND_CTRL, &data); if (ret_val) return ret_val; data |= GG82563_ICR_DIS_PADDING; ret_val = hw->phy.ops.write_reg(hw, GG82563_PHY_INBAND_CTRL, data); if (ret_val) return ret_val; return E1000_SUCCESS; } /** * e1000_setup_copper_link_80003es2lan - Setup Copper Link for ESB2 * @hw: pointer to the HW structure * * Essentially a wrapper for setting up all things "copper" related. * This is a function pointer entry point called by the mac module. **/ static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; u16 reg_data; DEBUGFUNC("e1000_setup_copper_link_80003es2lan"); ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= E1000_CTRL_SLU; ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); /* Set the mac to wait the maximum time between each * iteration and increase the max iterations when * polling the phy; this fixes erroneous timeouts at 10Mbps. */ ret_val = e1000_write_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 4), 0xFFFF); if (ret_val) return ret_val; ret_val = e1000_read_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 9), ®_data); if (ret_val) return ret_val; reg_data |= 0x3F; ret_val = e1000_write_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 9), reg_data); if (ret_val) return ret_val; ret_val = e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_OFFSET_INB_CTRL, ®_data); if (ret_val) return ret_val; reg_data |= E1000_KMRNCTRLSTA_INB_CTRL_DIS_PADDING; ret_val = e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_OFFSET_INB_CTRL, reg_data); if (ret_val) return ret_val; ret_val = e1000_copper_link_setup_gg82563_80003es2lan(hw); if (ret_val) return ret_val; return e1000_setup_copper_link_generic(hw); } /** * e1000_cfg_on_link_up_80003es2lan - es2 link configuration after link-up * @hw: pointer to the HW structure * @duplex: current duplex setting * * Configure the KMRN interface by applying last minute quirks for * 10/100 operation. **/ static s32 e1000_cfg_on_link_up_80003es2lan(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 speed; u16 duplex; DEBUGFUNC("e1000_configure_on_link_up"); if (hw->phy.media_type == e1000_media_type_copper) { ret_val = e1000_get_speed_and_duplex_copper_generic(hw, &speed, &duplex); if (ret_val) return ret_val; if (speed == SPEED_1000) ret_val = e1000_cfg_kmrn_1000_80003es2lan(hw); else ret_val = e1000_cfg_kmrn_10_100_80003es2lan(hw, duplex); } return ret_val; } /** * e1000_cfg_kmrn_10_100_80003es2lan - Apply "quirks" for 10/100 operation * @hw: pointer to the HW structure * @duplex: current duplex setting * * Configure the KMRN interface by applying last minute quirks for * 10/100 operation. **/ static s32 e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex) { s32 ret_val; u32 tipg; u32 i = 0; u16 reg_data, reg_data2; DEBUGFUNC("e1000_configure_kmrn_for_10_100"); reg_data = E1000_KMRNCTRLSTA_HD_CTRL_10_100_DEFAULT; ret_val = e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_OFFSET_HD_CTRL, reg_data); if (ret_val) return ret_val; /* Configure Transmit Inter-Packet Gap */ tipg = E1000_READ_REG(hw, E1000_TIPG); tipg &= ~E1000_TIPG_IPGT_MASK; tipg |= DEFAULT_TIPG_IPGT_10_100_80003ES2LAN; E1000_WRITE_REG(hw, E1000_TIPG, tipg); do { ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, ®_data); if (ret_val) return ret_val; ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, ®_data2); if (ret_val) return ret_val; i++; } while ((reg_data != reg_data2) && (i < GG82563_MAX_KMRN_RETRY)); if (duplex == HALF_DUPLEX) reg_data |= GG82563_KMCR_PASS_FALSE_CARRIER; else reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER; return hw->phy.ops.write_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data); } /** * e1000_cfg_kmrn_1000_80003es2lan - Apply "quirks" for gigabit operation * @hw: pointer to the HW structure * * Configure the KMRN interface by applying last minute quirks for * gigabit operation. **/ static s32 e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw) { s32 ret_val; u16 reg_data, reg_data2; u32 tipg; u32 i = 0; DEBUGFUNC("e1000_configure_kmrn_for_1000"); reg_data = E1000_KMRNCTRLSTA_HD_CTRL_1000_DEFAULT; ret_val = e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_OFFSET_HD_CTRL, reg_data); if (ret_val) return ret_val; /* Configure Transmit Inter-Packet Gap */ tipg = E1000_READ_REG(hw, E1000_TIPG); tipg &= ~E1000_TIPG_IPGT_MASK; tipg |= DEFAULT_TIPG_IPGT_1000_80003ES2LAN; E1000_WRITE_REG(hw, E1000_TIPG, tipg); do { ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, ®_data); if (ret_val) return ret_val; ret_val = hw->phy.ops.read_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, ®_data2); if (ret_val) return ret_val; i++; } while ((reg_data != reg_data2) && (i < GG82563_MAX_KMRN_RETRY)); reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER; return hw->phy.ops.write_reg(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data); } /** * e1000_read_kmrn_reg_80003es2lan - Read kumeran register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Acquire semaphore, then read the PHY register at offset * using the kumeran interface. The information retrieved is stored in data. * Release the semaphore before exiting. **/ static s32 e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, u16 *data) { u32 kmrnctrlsta; s32 ret_val; DEBUGFUNC("e1000_read_kmrn_reg_80003es2lan"); ret_val = e1000_acquire_mac_csr_80003es2lan(hw); if (ret_val) return ret_val; kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN; E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta); E1000_WRITE_FLUSH(hw); usec_delay(2); kmrnctrlsta = E1000_READ_REG(hw, E1000_KMRNCTRLSTA); *data = (u16)kmrnctrlsta; e1000_release_mac_csr_80003es2lan(hw); return ret_val; } /** * e1000_write_kmrn_reg_80003es2lan - Write kumeran register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * * Acquire semaphore, then write the data to PHY register * at the offset using the kumeran interface. Release semaphore * before exiting. **/ static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, u16 data) { u32 kmrnctrlsta; s32 ret_val; DEBUGFUNC("e1000_write_kmrn_reg_80003es2lan"); ret_val = e1000_acquire_mac_csr_80003es2lan(hw); if (ret_val) return ret_val; kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & E1000_KMRNCTRLSTA_OFFSET) | data; E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta); E1000_WRITE_FLUSH(hw); usec_delay(2); e1000_release_mac_csr_80003es2lan(hw); return ret_val; } /** * e1000_read_mac_addr_80003es2lan - Read device MAC address * @hw: pointer to the HW structure **/ static s32 e1000_read_mac_addr_80003es2lan(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_read_mac_addr_80003es2lan"); /* If there's an alternate MAC address place it in RAR0 * so that it will override the Si installed default perm * address. */ ret_val = e1000_check_alt_mac_addr_generic(hw); if (ret_val) return ret_val; return e1000_read_mac_addr_generic(hw); } /** * e1000_power_down_phy_copper_80003es2lan - Remove link during PHY power down * @hw: pointer to the HW structure * * In the case of a PHY power down to save power, or to turn off link during a * driver unload, or wake on lan is not enabled, remove the link. **/ static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw) { /* If the management interface is not enabled, then power down */ if (!(hw->mac.ops.check_mng_mode(hw) || hw->phy.ops.check_reset_block(hw))) e1000_power_down_phy_copper(hw); return; } /** * e1000_clear_hw_cntrs_80003es2lan - Clear device specific hardware counters * @hw: pointer to the HW structure * * Clears the hardware counters by reading the counter registers. **/ static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_clear_hw_cntrs_80003es2lan"); e1000_clear_hw_cntrs_base_generic(hw); E1000_READ_REG(hw, E1000_PRC64); E1000_READ_REG(hw, E1000_PRC127); E1000_READ_REG(hw, E1000_PRC255); E1000_READ_REG(hw, E1000_PRC511); E1000_READ_REG(hw, E1000_PRC1023); E1000_READ_REG(hw, E1000_PRC1522); E1000_READ_REG(hw, E1000_PTC64); E1000_READ_REG(hw, E1000_PTC127); E1000_READ_REG(hw, E1000_PTC255); E1000_READ_REG(hw, E1000_PTC511); E1000_READ_REG(hw, E1000_PTC1023); E1000_READ_REG(hw, E1000_PTC1522); E1000_READ_REG(hw, E1000_ALGNERRC); E1000_READ_REG(hw, E1000_RXERRC); E1000_READ_REG(hw, E1000_TNCRS); E1000_READ_REG(hw, E1000_CEXTERR); E1000_READ_REG(hw, E1000_TSCTC); E1000_READ_REG(hw, E1000_TSCTFC); E1000_READ_REG(hw, E1000_MGTPRC); E1000_READ_REG(hw, E1000_MGTPDC); E1000_READ_REG(hw, E1000_MGTPTC); E1000_READ_REG(hw, E1000_IAC); E1000_READ_REG(hw, E1000_ICRXOC); E1000_READ_REG(hw, E1000_ICRXPTC); E1000_READ_REG(hw, E1000_ICRXATC); E1000_READ_REG(hw, E1000_ICTXPTC); E1000_READ_REG(hw, E1000_ICTXATC); E1000_READ_REG(hw, E1000_ICTXQEC); E1000_READ_REG(hw, E1000_ICTXQMTC); E1000_READ_REG(hw, E1000_ICRXDMTC); } Index: head/sys/dev/e1000/e1000_82571.c =================================================================== --- head/sys/dev/e1000/e1000_82571.c (revision 323515) +++ head/sys/dev/e1000/e1000_82571.c (revision 323516) @@ -1,2031 +1,1910 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ /* 82571EB Gigabit Ethernet Controller * 82571EB Gigabit Ethernet Controller (Copper) * 82571EB Gigabit Ethernet Controller (Fiber) * 82571EB Dual Port Gigabit Mezzanine Adapter * 82571EB Quad Port Gigabit Mezzanine Adapter * 82571PT Gigabit PT Quad Port Server ExpressModule * 82572EI Gigabit Ethernet Controller (Copper) * 82572EI Gigabit Ethernet Controller (Fiber) * 82572EI Gigabit Ethernet Controller * 82573V Gigabit Ethernet Controller (Copper) * 82573E Gigabit Ethernet Controller (Copper) * 82573L Gigabit Ethernet Controller * 82574L Gigabit Network Connection * 82583V Gigabit Network Connection */ #include "e1000_api.h" static s32 e1000_acquire_nvm_82571(struct e1000_hw *hw); static void e1000_release_nvm_82571(struct e1000_hw *hw); static s32 e1000_write_nvm_82571(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); static s32 e1000_update_nvm_checksum_82571(struct e1000_hw *hw); static s32 e1000_validate_nvm_checksum_82571(struct e1000_hw *hw); static s32 e1000_get_cfg_done_82571(struct e1000_hw *hw); static s32 e1000_set_d0_lplu_state_82571(struct e1000_hw *hw, bool active); static s32 e1000_reset_hw_82571(struct e1000_hw *hw); static s32 e1000_init_hw_82571(struct e1000_hw *hw); static void e1000_clear_vfta_82571(struct e1000_hw *hw); static bool e1000_check_mng_mode_82574(struct e1000_hw *hw); static s32 e1000_led_on_82574(struct e1000_hw *hw); static s32 e1000_setup_link_82571(struct e1000_hw *hw); static s32 e1000_setup_copper_link_82571(struct e1000_hw *hw); static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw); static s32 e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw); static s32 e1000_valid_led_default_82571(struct e1000_hw *hw, u16 *data); static void e1000_clear_hw_cntrs_82571(struct e1000_hw *hw); -static s32 e1000_get_hw_semaphore_82571(struct e1000_hw *hw); static s32 e1000_fix_nvm_checksum_82571(struct e1000_hw *hw); static s32 e1000_get_phy_id_82571(struct e1000_hw *hw); -static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw); -static void e1000_put_hw_semaphore_82573(struct e1000_hw *hw); static s32 e1000_get_hw_semaphore_82574(struct e1000_hw *hw); static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw); static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active); static s32 e1000_set_d3_lplu_state_82574(struct e1000_hw *hw, bool active); static void e1000_initialize_hw_bits_82571(struct e1000_hw *hw); static s32 e1000_write_nvm_eewr_82571(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); static s32 e1000_read_mac_addr_82571(struct e1000_hw *hw); static void e1000_power_down_phy_copper_82571(struct e1000_hw *hw); /** * e1000_init_phy_params_82571 - Init PHY func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_phy_params_82571(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; DEBUGFUNC("e1000_init_phy_params_82571"); if (hw->phy.media_type != e1000_media_type_copper) { phy->type = e1000_phy_none; return E1000_SUCCESS; } phy->addr = 1; phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; phy->reset_delay_us = 100; phy->ops.check_reset_block = e1000_check_reset_block_generic; phy->ops.reset = e1000_phy_hw_reset_generic; phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82571; phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_generic; phy->ops.power_up = e1000_power_up_phy_copper; phy->ops.power_down = e1000_power_down_phy_copper_82571; switch (hw->mac.type) { case e1000_82571: case e1000_82572: phy->type = e1000_phy_igp_2; phy->ops.get_cfg_done = e1000_get_cfg_done_82571; phy->ops.get_info = e1000_get_phy_info_igp; phy->ops.check_polarity = e1000_check_polarity_igp; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_igp; phy->ops.get_cable_length = e1000_get_cable_length_igp_2; phy->ops.read_reg = e1000_read_phy_reg_igp; phy->ops.write_reg = e1000_write_phy_reg_igp; - phy->ops.acquire = e1000_get_hw_semaphore_82571; - phy->ops.release = e1000_put_hw_semaphore_82571; + phy->ops.acquire = e1000_get_hw_semaphore; + phy->ops.release = e1000_put_hw_semaphore; break; case e1000_82573: phy->type = e1000_phy_m88; phy->ops.get_cfg_done = e1000_get_cfg_done_generic; phy->ops.get_info = e1000_get_phy_info_m88; phy->ops.check_polarity = e1000_check_polarity_m88; phy->ops.commit = e1000_phy_sw_reset_generic; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; phy->ops.get_cable_length = e1000_get_cable_length_m88; phy->ops.read_reg = e1000_read_phy_reg_m88; phy->ops.write_reg = e1000_write_phy_reg_m88; - phy->ops.acquire = e1000_get_hw_semaphore_82571; - phy->ops.release = e1000_put_hw_semaphore_82571; + phy->ops.acquire = e1000_get_hw_semaphore; + phy->ops.release = e1000_put_hw_semaphore; break; case e1000_82574: case e1000_82583: - E1000_MUTEX_INIT(&hw->dev_spec._82571.swflag_mutex); phy->type = e1000_phy_bm; phy->ops.get_cfg_done = e1000_get_cfg_done_generic; phy->ops.get_info = e1000_get_phy_info_m88; phy->ops.check_polarity = e1000_check_polarity_m88; phy->ops.commit = e1000_phy_sw_reset_generic; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; phy->ops.get_cable_length = e1000_get_cable_length_m88; phy->ops.read_reg = e1000_read_phy_reg_bm2; phy->ops.write_reg = e1000_write_phy_reg_bm2; phy->ops.acquire = e1000_get_hw_semaphore_82574; phy->ops.release = e1000_put_hw_semaphore_82574; phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82574; phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82574; break; default: return -E1000_ERR_PHY; break; } /* This can only be done after all function pointers are setup. */ ret_val = e1000_get_phy_id_82571(hw); if (ret_val) { DEBUGOUT("Error getting PHY ID\n"); return ret_val; } /* Verify phy id */ switch (hw->mac.type) { case e1000_82571: case e1000_82572: if (phy->id != IGP01E1000_I_PHY_ID) ret_val = -E1000_ERR_PHY; break; case e1000_82573: if (phy->id != M88E1111_I_PHY_ID) ret_val = -E1000_ERR_PHY; break; case e1000_82574: case e1000_82583: if (phy->id != BME1000_E_PHY_ID_R2) ret_val = -E1000_ERR_PHY; break; default: ret_val = -E1000_ERR_PHY; break; } if (ret_val) DEBUGOUT1("PHY ID unknown: type = 0x%08x\n", phy->id); return ret_val; } /** * e1000_init_nvm_params_82571 - Init NVM func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_nvm_params_82571(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; u32 eecd = E1000_READ_REG(hw, E1000_EECD); u16 size; DEBUGFUNC("e1000_init_nvm_params_82571"); nvm->opcode_bits = 8; nvm->delay_usec = 1; switch (nvm->override) { case e1000_nvm_override_spi_large: nvm->page_size = 32; nvm->address_bits = 16; break; case e1000_nvm_override_spi_small: nvm->page_size = 8; nvm->address_bits = 8; break; default: nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8; nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? 16 : 8; break; } switch (hw->mac.type) { case e1000_82573: case e1000_82574: case e1000_82583: if (((eecd >> 15) & 0x3) == 0x3) { nvm->type = e1000_nvm_flash_hw; nvm->word_size = 2048; /* Autonomous Flash update bit must be cleared due * to Flash update issue. */ eecd &= ~E1000_EECD_AUPDEN; E1000_WRITE_REG(hw, E1000_EECD, eecd); break; } /* Fall Through */ default: nvm->type = e1000_nvm_eeprom_spi; size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> E1000_EECD_SIZE_EX_SHIFT); /* Added to a constant, "size" becomes the left-shift value * for setting word_size. */ size += NVM_WORD_SIZE_BASE_SHIFT; /* EEPROM access above 16k is unsupported */ if (size > 14) size = 14; nvm->word_size = 1 << size; break; } /* Function Pointers */ switch (hw->mac.type) { case e1000_82574: case e1000_82583: nvm->ops.acquire = e1000_get_hw_semaphore_82574; nvm->ops.release = e1000_put_hw_semaphore_82574; break; default: nvm->ops.acquire = e1000_acquire_nvm_82571; nvm->ops.release = e1000_release_nvm_82571; break; } nvm->ops.read = e1000_read_nvm_eerd; nvm->ops.update = e1000_update_nvm_checksum_82571; nvm->ops.validate = e1000_validate_nvm_checksum_82571; nvm->ops.valid_led_default = e1000_valid_led_default_82571; nvm->ops.write = e1000_write_nvm_82571; return E1000_SUCCESS; } /** * e1000_init_mac_params_82571 - Init MAC func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_mac_params_82571(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 swsm = 0; u32 swsm2 = 0; bool force_clear_smbi = FALSE; DEBUGFUNC("e1000_init_mac_params_82571"); /* Set media type and media-dependent function pointers */ switch (hw->device_id) { case E1000_DEV_ID_82571EB_FIBER: case E1000_DEV_ID_82572EI_FIBER: case E1000_DEV_ID_82571EB_QUAD_FIBER: hw->phy.media_type = e1000_media_type_fiber; mac->ops.setup_physical_interface = e1000_setup_fiber_serdes_link_82571; mac->ops.check_for_link = e1000_check_for_fiber_link_generic; mac->ops.get_link_up_info = e1000_get_speed_and_duplex_fiber_serdes_generic; break; case E1000_DEV_ID_82571EB_SERDES: case E1000_DEV_ID_82571EB_SERDES_DUAL: case E1000_DEV_ID_82571EB_SERDES_QUAD: case E1000_DEV_ID_82572EI_SERDES: hw->phy.media_type = e1000_media_type_internal_serdes; mac->ops.setup_physical_interface = e1000_setup_fiber_serdes_link_82571; mac->ops.check_for_link = e1000_check_for_serdes_link_82571; mac->ops.get_link_up_info = e1000_get_speed_and_duplex_fiber_serdes_generic; break; default: hw->phy.media_type = e1000_media_type_copper; mac->ops.setup_physical_interface = e1000_setup_copper_link_82571; mac->ops.check_for_link = e1000_check_for_copper_link_generic; mac->ops.get_link_up_info = e1000_get_speed_and_duplex_copper_generic; break; } /* Set mta register count */ mac->mta_reg_count = 128; /* Set rar entry count */ mac->rar_entry_count = E1000_RAR_ENTRIES; /* Set if part includes ASF firmware */ mac->asf_firmware_present = TRUE; /* Adaptive IFS supported */ mac->adaptive_ifs = TRUE; /* Function pointers */ /* bus type/speed/width */ mac->ops.get_bus_info = e1000_get_bus_info_pcie_generic; /* reset */ mac->ops.reset_hw = e1000_reset_hw_82571; /* hw initialization */ mac->ops.init_hw = e1000_init_hw_82571; /* link setup */ mac->ops.setup_link = e1000_setup_link_82571; /* multicast address update */ mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic; /* writing VFTA */ mac->ops.write_vfta = e1000_write_vfta_generic; /* clearing VFTA */ mac->ops.clear_vfta = e1000_clear_vfta_82571; /* read mac address */ mac->ops.read_mac_addr = e1000_read_mac_addr_82571; /* ID LED init */ mac->ops.id_led_init = e1000_id_led_init_generic; /* setup LED */ mac->ops.setup_led = e1000_setup_led_generic; /* cleanup LED */ mac->ops.cleanup_led = e1000_cleanup_led_generic; /* turn off LED */ mac->ops.led_off = e1000_led_off_generic; /* clear hardware counters */ mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_82571; /* MAC-specific function pointers */ switch (hw->mac.type) { case e1000_82573: mac->ops.set_lan_id = e1000_set_lan_id_single_port; mac->ops.check_mng_mode = e1000_check_mng_mode_generic; mac->ops.led_on = e1000_led_on_generic; mac->ops.blink_led = e1000_blink_led_generic; /* FWSM register */ mac->has_fwsm = TRUE; /* ARC supported; valid only if manageability features are * enabled. */ mac->arc_subsystem_valid = !!(E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK); break; case e1000_82574: case e1000_82583: mac->ops.set_lan_id = e1000_set_lan_id_single_port; mac->ops.check_mng_mode = e1000_check_mng_mode_82574; mac->ops.led_on = e1000_led_on_82574; break; default: mac->ops.check_mng_mode = e1000_check_mng_mode_generic; mac->ops.led_on = e1000_led_on_generic; mac->ops.blink_led = e1000_blink_led_generic; /* FWSM register */ mac->has_fwsm = TRUE; break; } /* Ensure that the inter-port SWSM.SMBI lock bit is clear before * first NVM or PHY access. This should be done for single-port * devices, and for one port only on dual-port devices so that * for those devices we can still use the SMBI lock to synchronize * inter-port accesses to the PHY & NVM. */ switch (hw->mac.type) { case e1000_82571: case e1000_82572: swsm2 = E1000_READ_REG(hw, E1000_SWSM2); if (!(swsm2 & E1000_SWSM2_LOCK)) { /* Only do this for the first interface on this card */ E1000_WRITE_REG(hw, E1000_SWSM2, swsm2 | E1000_SWSM2_LOCK); force_clear_smbi = TRUE; } else { force_clear_smbi = FALSE; } break; default: force_clear_smbi = TRUE; break; } if (force_clear_smbi) { /* Make sure SWSM.SMBI is clear */ swsm = E1000_READ_REG(hw, E1000_SWSM); if (swsm & E1000_SWSM_SMBI) { /* This bit should not be set on a first interface, and * indicates that the bootagent or EFI code has * improperly left this bit enabled */ DEBUGOUT("Please update your 82571 Bootagent\n"); } E1000_WRITE_REG(hw, E1000_SWSM, swsm & ~E1000_SWSM_SMBI); } /* Initialze device specific counter of SMBI acquisition timeouts. */ hw->dev_spec._82571.smb_counter = 0; return E1000_SUCCESS; } /** * e1000_init_function_pointers_82571 - Init func ptrs. * @hw: pointer to the HW structure * * Called to initialize all function pointers and parameters. **/ void e1000_init_function_pointers_82571(struct e1000_hw *hw) { DEBUGFUNC("e1000_init_function_pointers_82571"); hw->mac.ops.init_params = e1000_init_mac_params_82571; hw->nvm.ops.init_params = e1000_init_nvm_params_82571; hw->phy.ops.init_params = e1000_init_phy_params_82571; } /** * e1000_get_phy_id_82571 - Retrieve the PHY ID and revision * @hw: pointer to the HW structure * * Reads the PHY registers and stores the PHY ID and possibly the PHY * revision in the hardware structure. **/ static s32 e1000_get_phy_id_82571(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 phy_id = 0; DEBUGFUNC("e1000_get_phy_id_82571"); switch (hw->mac.type) { case e1000_82571: case e1000_82572: /* The 82571 firmware may still be configuring the PHY. * In this case, we cannot access the PHY until the * configuration is done. So we explicitly set the * PHY ID. */ phy->id = IGP01E1000_I_PHY_ID; break; case e1000_82573: return e1000_get_phy_id(hw); break; case e1000_82574: case e1000_82583: ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id); if (ret_val) return ret_val; phy->id = (u32)(phy_id << 16); usec_delay(20); ret_val = phy->ops.read_reg(hw, PHY_ID2, &phy_id); if (ret_val) return ret_val; phy->id |= (u32)(phy_id); phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK); break; default: return -E1000_ERR_PHY; break; } return E1000_SUCCESS; } /** - * e1000_get_hw_semaphore_82571 - Acquire hardware semaphore + * e1000_get_hw_semaphore_82574 - Acquire hardware semaphore * @hw: pointer to the HW structure * - * Acquire the HW semaphore to access the PHY or NVM - **/ -static s32 e1000_get_hw_semaphore_82571(struct e1000_hw *hw) -{ - u32 swsm; - s32 sw_timeout = hw->nvm.word_size + 1; - s32 fw_timeout = hw->nvm.word_size + 1; - s32 i = 0; - - DEBUGFUNC("e1000_get_hw_semaphore_82571"); - - /* If we have timedout 3 times on trying to acquire - * the inter-port SMBI semaphore, there is old code - * operating on the other port, and it is not - * releasing SMBI. Modify the number of times that - * we try for the semaphore to interwork with this - * older code. - */ - if (hw->dev_spec._82571.smb_counter > 2) - sw_timeout = 1; - - /* Get the SW semaphore */ - while (i < sw_timeout) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - if (!(swsm & E1000_SWSM_SMBI)) - break; - - usec_delay(50); - i++; - } - - if (i == sw_timeout) { - DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); - hw->dev_spec._82571.smb_counter++; - } - /* Get the FW semaphore. */ - for (i = 0; i < fw_timeout; i++) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); - - /* Semaphore acquired if bit latched */ - if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) - break; - - usec_delay(50); - } - - if (i == fw_timeout) { - /* Release semaphores */ - e1000_put_hw_semaphore_82571(hw); - DEBUGOUT("Driver can't access the NVM\n"); - return -E1000_ERR_NVM; - } - - return E1000_SUCCESS; -} - -/** - * e1000_put_hw_semaphore_82571 - Release hardware semaphore - * @hw: pointer to the HW structure - * - * Release hardware semaphore used to access the PHY or NVM - **/ -static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw) -{ - u32 swsm; - - DEBUGFUNC("e1000_put_hw_semaphore_generic"); - - swsm = E1000_READ_REG(hw, E1000_SWSM); - - swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); - - E1000_WRITE_REG(hw, E1000_SWSM, swsm); -} - -/** - * e1000_get_hw_semaphore_82573 - Acquire hardware semaphore - * @hw: pointer to the HW structure - * * Acquire the HW semaphore during reset. * **/ -static s32 e1000_get_hw_semaphore_82573(struct e1000_hw *hw) +static s32 +e1000_get_hw_semaphore_82574(struct e1000_hw *hw) { u32 extcnf_ctrl; s32 i = 0; - + /* XXX assert that mutex is held */ DEBUGFUNC("e1000_get_hw_semaphore_82573"); + ASSERT_CTX_LOCK_HELD(hw); extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); do { extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP; E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl); extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); if (extcnf_ctrl & E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP) break; msec_delay(2); i++; } while (i < MDIO_OWNERSHIP_TIMEOUT); if (i == MDIO_OWNERSHIP_TIMEOUT) { /* Release semaphores */ - e1000_put_hw_semaphore_82573(hw); + e1000_put_hw_semaphore_82574(hw); DEBUGOUT("Driver can't access the PHY\n"); return -E1000_ERR_PHY; } return E1000_SUCCESS; } /** - * e1000_put_hw_semaphore_82573 - Release hardware semaphore + * e1000_put_hw_semaphore_82574 - Release hardware semaphore * @hw: pointer to the HW structure * * Release hardware semaphore used during reset. * **/ -static void e1000_put_hw_semaphore_82573(struct e1000_hw *hw) +static void +e1000_put_hw_semaphore_82574(struct e1000_hw *hw) { u32 extcnf_ctrl; - DEBUGFUNC("e1000_put_hw_semaphore_82573"); + DEBUGFUNC("e1000_put_hw_semaphore_82574"); extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); extcnf_ctrl &= ~E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP; E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl); } /** - * e1000_get_hw_semaphore_82574 - Acquire hardware semaphore - * @hw: pointer to the HW structure - * - * Acquire the HW semaphore to access the PHY or NVM. - * - **/ -static s32 e1000_get_hw_semaphore_82574(struct e1000_hw *hw) -{ - s32 ret_val; - - DEBUGFUNC("e1000_get_hw_semaphore_82574"); - - E1000_MUTEX_LOCK(&hw->dev_spec._82571.swflag_mutex); - ret_val = e1000_get_hw_semaphore_82573(hw); - if (ret_val) - E1000_MUTEX_UNLOCK(&hw->dev_spec._82571.swflag_mutex); - return ret_val; -} - -/** - * e1000_put_hw_semaphore_82574 - Release hardware semaphore - * @hw: pointer to the HW structure - * - * Release hardware semaphore used to access the PHY or NVM - * - **/ -static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw) -{ - DEBUGFUNC("e1000_put_hw_semaphore_82574"); - - e1000_put_hw_semaphore_82573(hw); - E1000_MUTEX_UNLOCK(&hw->dev_spec._82571.swflag_mutex); -} - -/** * e1000_set_d0_lplu_state_82574 - Set Low Power Linkup D0 state * @hw: pointer to the HW structure * @active: TRUE to enable LPLU, FALSE to disable * * Sets the LPLU D0 state according to the active flag. * LPLU will not be activated unless the * device autonegotiation advertisement meets standards of * either 10 or 10/100 or 10/100/1000 at all duplexes. * This is a function pointer entry point only called by * PHY setup routines. **/ static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active) { u32 data = E1000_READ_REG(hw, E1000_POEMB); DEBUGFUNC("e1000_set_d0_lplu_state_82574"); if (active) data |= E1000_PHY_CTRL_D0A_LPLU; else data &= ~E1000_PHY_CTRL_D0A_LPLU; E1000_WRITE_REG(hw, E1000_POEMB, data); return E1000_SUCCESS; } /** * e1000_set_d3_lplu_state_82574 - Sets low power link up state for D3 * @hw: pointer to the HW structure * @active: boolean used to enable/disable lplu * * The low power link up (lplu) state is set to the power management level D3 * when active is TRUE, else clear lplu for D3. LPLU * is used during Dx states where the power conservation is most important. * During driver activity, SmartSpeed should be enabled so performance is * maintained. **/ static s32 e1000_set_d3_lplu_state_82574(struct e1000_hw *hw, bool active) { u32 data = E1000_READ_REG(hw, E1000_POEMB); DEBUGFUNC("e1000_set_d3_lplu_state_82574"); if (!active) { data &= ~E1000_PHY_CTRL_NOND0A_LPLU; } else if ((hw->phy.autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || (hw->phy.autoneg_advertised == E1000_ALL_NOT_GIG) || (hw->phy.autoneg_advertised == E1000_ALL_10_SPEED)) { data |= E1000_PHY_CTRL_NOND0A_LPLU; } E1000_WRITE_REG(hw, E1000_POEMB, data); return E1000_SUCCESS; } /** * e1000_acquire_nvm_82571 - Request for access to the EEPROM * @hw: pointer to the HW structure * * To gain access to the EEPROM, first we must obtain a hardware semaphore. * Then for non-82573 hardware, set the EEPROM access request bit and wait * for EEPROM access grant bit. If the access grant bit is not set, release * hardware semaphore. **/ static s32 e1000_acquire_nvm_82571(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_acquire_nvm_82571"); - ret_val = e1000_get_hw_semaphore_82571(hw); + ret_val = e1000_get_hw_semaphore(hw); if (ret_val) return ret_val; switch (hw->mac.type) { case e1000_82573: break; default: ret_val = e1000_acquire_nvm_generic(hw); break; } if (ret_val) - e1000_put_hw_semaphore_82571(hw); + e1000_put_hw_semaphore(hw); return ret_val; } /** * e1000_release_nvm_82571 - Release exclusive access to EEPROM * @hw: pointer to the HW structure * * Stop any current commands to the EEPROM and clear the EEPROM request bit. **/ static void e1000_release_nvm_82571(struct e1000_hw *hw) { DEBUGFUNC("e1000_release_nvm_82571"); e1000_release_nvm_generic(hw); - e1000_put_hw_semaphore_82571(hw); + e1000_put_hw_semaphore(hw); } /** * e1000_write_nvm_82571 - Write to EEPROM using appropriate interface * @hw: pointer to the HW structure * @offset: offset within the EEPROM to be written to * @words: number of words to write * @data: 16 bit word(s) to be written to the EEPROM * * For non-82573 silicon, write data to EEPROM at offset using SPI interface. * * If e1000_update_nvm_checksum is not called after this function, the * EEPROM will most likely contain an invalid checksum. **/ static s32 e1000_write_nvm_82571(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { s32 ret_val; DEBUGFUNC("e1000_write_nvm_82571"); switch (hw->mac.type) { case e1000_82573: case e1000_82574: case e1000_82583: ret_val = e1000_write_nvm_eewr_82571(hw, offset, words, data); break; case e1000_82571: case e1000_82572: ret_val = e1000_write_nvm_spi(hw, offset, words, data); break; default: ret_val = -E1000_ERR_NVM; break; } return ret_val; } /** * e1000_update_nvm_checksum_82571 - Update EEPROM checksum * @hw: pointer to the HW structure * * Updates the EEPROM checksum by reading/adding each word of the EEPROM * up to the checksum. Then calculates the EEPROM checksum and writes the * value to the EEPROM. **/ static s32 e1000_update_nvm_checksum_82571(struct e1000_hw *hw) { u32 eecd; s32 ret_val; u16 i; DEBUGFUNC("e1000_update_nvm_checksum_82571"); ret_val = e1000_update_nvm_checksum_generic(hw); if (ret_val) return ret_val; /* If our nvm is an EEPROM, then we're done * otherwise, commit the checksum to the flash NVM. */ if (hw->nvm.type != e1000_nvm_flash_hw) return E1000_SUCCESS; /* Check for pending operations. */ for (i = 0; i < E1000_FLASH_UPDATES; i++) { msec_delay(1); if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_FLUPD)) break; } if (i == E1000_FLASH_UPDATES) return -E1000_ERR_NVM; /* Reset the firmware if using STM opcode. */ if ((E1000_READ_REG(hw, E1000_FLOP) & 0xFF00) == E1000_STM_OPCODE) { /* The enabling of and the actual reset must be done * in two write cycles. */ E1000_WRITE_REG(hw, E1000_HICR, E1000_HICR_FW_RESET_ENABLE); E1000_WRITE_FLUSH(hw); E1000_WRITE_REG(hw, E1000_HICR, E1000_HICR_FW_RESET); } /* Commit the write to flash */ eecd = E1000_READ_REG(hw, E1000_EECD) | E1000_EECD_FLUPD; E1000_WRITE_REG(hw, E1000_EECD, eecd); for (i = 0; i < E1000_FLASH_UPDATES; i++) { msec_delay(1); if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_FLUPD)) break; } if (i == E1000_FLASH_UPDATES) return -E1000_ERR_NVM; return E1000_SUCCESS; } /** * e1000_validate_nvm_checksum_82571 - Validate EEPROM checksum * @hw: pointer to the HW structure * * Calculates the EEPROM checksum by reading/adding each word of the EEPROM * and then verifies that the sum of the EEPROM is equal to 0xBABA. **/ static s32 e1000_validate_nvm_checksum_82571(struct e1000_hw *hw) { DEBUGFUNC("e1000_validate_nvm_checksum_82571"); if (hw->nvm.type == e1000_nvm_flash_hw) e1000_fix_nvm_checksum_82571(hw); return e1000_validate_nvm_checksum_generic(hw); } /** * e1000_write_nvm_eewr_82571 - Write to EEPROM for 82573 silicon * @hw: pointer to the HW structure * @offset: offset within the EEPROM to be written to * @words: number of words to write * @data: 16 bit word(s) to be written to the EEPROM * * After checking for invalid values, poll the EEPROM to ensure the previous * command has completed before trying to write the next word. After write * poll for completion. * * If e1000_update_nvm_checksum is not called after this function, the * EEPROM will most likely contain an invalid checksum. **/ static s32 e1000_write_nvm_eewr_82571(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_nvm_info *nvm = &hw->nvm; u32 i, eewr = 0; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_write_nvm_eewr_82571"); /* A check for invalid values: offset too large, too many words, * and not enough words. */ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || (words == 0)) { DEBUGOUT("nvm parameter(s) out of bounds\n"); return -E1000_ERR_NVM; } for (i = 0; i < words; i++) { eewr = ((data[i] << E1000_NVM_RW_REG_DATA) | ((offset + i) << E1000_NVM_RW_ADDR_SHIFT) | E1000_NVM_RW_REG_START); ret_val = e1000_poll_eerd_eewr_done(hw, E1000_NVM_POLL_WRITE); if (ret_val) break; E1000_WRITE_REG(hw, E1000_EEWR, eewr); ret_val = e1000_poll_eerd_eewr_done(hw, E1000_NVM_POLL_WRITE); if (ret_val) break; } return ret_val; } /** * e1000_get_cfg_done_82571 - Poll for configuration done * @hw: pointer to the HW structure * * Reads the management control register for the config done bit to be set. **/ static s32 e1000_get_cfg_done_82571(struct e1000_hw *hw) { s32 timeout = PHY_CFG_TIMEOUT; DEBUGFUNC("e1000_get_cfg_done_82571"); while (timeout) { if (E1000_READ_REG(hw, E1000_EEMNGCTL) & E1000_NVM_CFG_DONE_PORT_0) break; msec_delay(1); timeout--; } if (!timeout) { DEBUGOUT("MNG configuration cycle has not completed.\n"); return -E1000_ERR_RESET; } return E1000_SUCCESS; } /** * e1000_set_d0_lplu_state_82571 - Set Low Power Linkup D0 state * @hw: pointer to the HW structure * @active: TRUE to enable LPLU, FALSE to disable * * Sets the LPLU D0 state according to the active flag. When activating LPLU * this function also disables smart speed and vice versa. LPLU will not be * activated unless the device autonegotiation advertisement meets standards * of either 10 or 10/100 or 10/100/1000 at all duplexes. This is a function * pointer entry point only called by PHY setup routines. **/ static s32 e1000_set_d0_lplu_state_82571(struct e1000_hw *hw, bool active) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 data; DEBUGFUNC("e1000_set_d0_lplu_state_82571"); if (!(phy->ops.read_reg)) return E1000_SUCCESS; ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data); if (ret_val) return ret_val; if (active) { data |= IGP02E1000_PM_D0_LPLU; ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, data); if (ret_val) return ret_val; /* When LPLU is enabled, we should disable SmartSpeed */ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) return ret_val; } else { data &= ~IGP02E1000_PM_D0_LPLU; ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, data); /* LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. */ if (phy->smart_speed == e1000_smart_speed_on) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data |= IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) return ret_val; } else if (phy->smart_speed == e1000_smart_speed_off) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) return ret_val; } } return E1000_SUCCESS; } /** * e1000_reset_hw_82571 - Reset hardware * @hw: pointer to the HW structure * * This resets the hardware into a known state. **/ static s32 e1000_reset_hw_82571(struct e1000_hw *hw) { u32 ctrl, ctrl_ext, eecd, tctl; s32 ret_val; DEBUGFUNC("e1000_reset_hw_82571"); /* Prevent the PCI-E bus from sticking if there is no TLP connection * on the last TLP read/write transaction when MAC is reset. */ ret_val = e1000_disable_pcie_master_generic(hw); if (ret_val) DEBUGOUT("PCI-E Master disable polling has failed.\n"); DEBUGOUT("Masking off all interrupts\n"); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_WRITE_REG(hw, E1000_RCTL, 0); tctl = E1000_READ_REG(hw, E1000_TCTL); tctl &= ~E1000_TCTL_EN; E1000_WRITE_REG(hw, E1000_TCTL, tctl); E1000_WRITE_FLUSH(hw); msec_delay(10); /* Must acquire the MDIO ownership before MAC reset. * Ownership defaults to firmware after a reset. */ switch (hw->mac.type) { case e1000_82573: - ret_val = e1000_get_hw_semaphore_82573(hw); - break; case e1000_82574: case e1000_82583: ret_val = e1000_get_hw_semaphore_82574(hw); break; default: break; } ctrl = E1000_READ_REG(hw, E1000_CTRL); DEBUGOUT("Issuing a global reset to MAC\n"); E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); /* Must release MDIO ownership and mutex after MAC reset. */ switch (hw->mac.type) { case e1000_82573: - /* Release mutex only if the hw semaphore is acquired */ - if (!ret_val) - e1000_put_hw_semaphore_82573(hw); - break; case e1000_82574: case e1000_82583: /* Release mutex only if the hw semaphore is acquired */ if (!ret_val) e1000_put_hw_semaphore_82574(hw); break; default: + panic("unknown mac type %x\n", hw->mac.type); break; } if (hw->nvm.type == e1000_nvm_flash_hw) { usec_delay(10); ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_EE_RST; E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); E1000_WRITE_FLUSH(hw); } ret_val = e1000_get_auto_rd_done_generic(hw); if (ret_val) /* We don't want to continue accessing MAC registers. */ return ret_val; /* Phy configuration from NVM just starts after EECD_AUTO_RD is set. * Need to wait for Phy configuration completion before accessing * NVM and Phy. */ switch (hw->mac.type) { case e1000_82571: case e1000_82572: /* REQ and GNT bits need to be cleared when using AUTO_RD * to access the EEPROM. */ eecd = E1000_READ_REG(hw, E1000_EECD); eecd &= ~(E1000_EECD_REQ | E1000_EECD_GNT); E1000_WRITE_REG(hw, E1000_EECD, eecd); break; case e1000_82573: case e1000_82574: case e1000_82583: msec_delay(25); break; default: break; } /* Clear any pending interrupt events. */ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_READ_REG(hw, E1000_ICR); if (hw->mac.type == e1000_82571) { /* Install any alternate MAC address into RAR0 */ ret_val = e1000_check_alt_mac_addr_generic(hw); if (ret_val) return ret_val; e1000_set_laa_state_82571(hw, TRUE); } /* Reinitialize the 82571 serdes link state machine */ if (hw->phy.media_type == e1000_media_type_internal_serdes) hw->mac.serdes_link_state = e1000_serdes_link_down; return E1000_SUCCESS; } /** * e1000_init_hw_82571 - Initialize hardware * @hw: pointer to the HW structure * * This inits the hardware readying it for operation. **/ static s32 e1000_init_hw_82571(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 reg_data; s32 ret_val; u16 i, rar_count = mac->rar_entry_count; DEBUGFUNC("e1000_init_hw_82571"); e1000_initialize_hw_bits_82571(hw); /* Initialize identification LED */ ret_val = mac->ops.id_led_init(hw); /* An error is not fatal and we should not stop init due to this */ if (ret_val) DEBUGOUT("Error initializing identification LED\n"); /* Disabling VLAN filtering */ DEBUGOUT("Initializing the IEEE VLAN\n"); mac->ops.clear_vfta(hw); /* Setup the receive address. * If, however, a locally administered address was assigned to the * 82571, we must reserve a RAR for it to work around an issue where * resetting one port will reload the MAC on the other port. */ if (e1000_get_laa_state_82571(hw)) rar_count--; e1000_init_rx_addrs_generic(hw, rar_count); /* Zero out the Multicast HASH table */ DEBUGOUT("Zeroing the MTA\n"); for (i = 0; i < mac->mta_reg_count; i++) E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); /* Setup link and flow control */ ret_val = mac->ops.setup_link(hw); /* Set the transmit descriptor write-back policy */ reg_data = E1000_READ_REG(hw, E1000_TXDCTL(0)); reg_data = ((reg_data & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC); E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg_data); /* ...for both queues. */ switch (mac->type) { case e1000_82573: e1000_enable_tx_pkt_filtering_generic(hw); /* fall through */ case e1000_82574: case e1000_82583: reg_data = E1000_READ_REG(hw, E1000_GCR); reg_data |= E1000_GCR_L1_ACT_WITHOUT_L0S_RX; E1000_WRITE_REG(hw, E1000_GCR, reg_data); break; default: reg_data = E1000_READ_REG(hw, E1000_TXDCTL(1)); reg_data = ((reg_data & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC); E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg_data); break; } /* Clear all of the statistics registers (clear on read). It is * important that we do this after we have tried to establish link * because the symbol error count will increment wildly if there * is no link. */ e1000_clear_hw_cntrs_82571(hw); return ret_val; } /** * e1000_initialize_hw_bits_82571 - Initialize hardware-dependent bits * @hw: pointer to the HW structure * * Initializes required hardware-dependent bits needed for normal operation. **/ static void e1000_initialize_hw_bits_82571(struct e1000_hw *hw) { u32 reg; DEBUGFUNC("e1000_initialize_hw_bits_82571"); /* Transmit Descriptor Control 0 */ reg = E1000_READ_REG(hw, E1000_TXDCTL(0)); reg |= (1 << 22); E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg); /* Transmit Descriptor Control 1 */ reg = E1000_READ_REG(hw, E1000_TXDCTL(1)); reg |= (1 << 22); E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg); /* Transmit Arbitration Control 0 */ reg = E1000_READ_REG(hw, E1000_TARC(0)); reg &= ~(0xF << 27); /* 30:27 */ switch (hw->mac.type) { case e1000_82571: case e1000_82572: reg |= (1 << 23) | (1 << 24) | (1 << 25) | (1 << 26); break; case e1000_82574: case e1000_82583: reg |= (1 << 26); break; default: break; } E1000_WRITE_REG(hw, E1000_TARC(0), reg); /* Transmit Arbitration Control 1 */ reg = E1000_READ_REG(hw, E1000_TARC(1)); switch (hw->mac.type) { case e1000_82571: case e1000_82572: reg &= ~((1 << 29) | (1 << 30)); reg |= (1 << 22) | (1 << 24) | (1 << 25) | (1 << 26); if (E1000_READ_REG(hw, E1000_TCTL) & E1000_TCTL_MULR) reg &= ~(1 << 28); else reg |= (1 << 28); E1000_WRITE_REG(hw, E1000_TARC(1), reg); break; default: break; } /* Device Control */ switch (hw->mac.type) { case e1000_82573: case e1000_82574: case e1000_82583: reg = E1000_READ_REG(hw, E1000_CTRL); reg &= ~(1 << 29); E1000_WRITE_REG(hw, E1000_CTRL, reg); break; default: break; } /* Extended Device Control */ switch (hw->mac.type) { case e1000_82573: case e1000_82574: case e1000_82583: reg = E1000_READ_REG(hw, E1000_CTRL_EXT); reg &= ~(1 << 23); reg |= (1 << 22); E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); break; default: break; } if (hw->mac.type == e1000_82571) { reg = E1000_READ_REG(hw, E1000_PBA_ECC); reg |= E1000_PBA_ECC_CORR_EN; E1000_WRITE_REG(hw, E1000_PBA_ECC, reg); } /* Workaround for hardware errata. * Ensure that DMA Dynamic Clock gating is disabled on 82571 and 82572 */ if ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572)) { reg = E1000_READ_REG(hw, E1000_CTRL_EXT); reg &= ~E1000_CTRL_EXT_DMA_DYN_CLK_EN; E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); } /* Disable IPv6 extension header parsing because some malformed * IPv6 headers can hang the Rx. */ if (hw->mac.type <= e1000_82573) { reg = E1000_READ_REG(hw, E1000_RFCTL); reg |= (E1000_RFCTL_IPV6_EX_DIS | E1000_RFCTL_NEW_IPV6_EXT_DIS); E1000_WRITE_REG(hw, E1000_RFCTL, reg); } /* PCI-Ex Control Registers */ switch (hw->mac.type) { case e1000_82574: case e1000_82583: reg = E1000_READ_REG(hw, E1000_GCR); reg |= (1 << 22); E1000_WRITE_REG(hw, E1000_GCR, reg); /* Workaround for hardware errata. * apply workaround for hardware errata documented in errata * docs Fixes issue where some error prone or unreliable PCIe * completions are occurring, particularly with ASPM enabled. * Without fix, issue can cause Tx timeouts. */ reg = E1000_READ_REG(hw, E1000_GCR2); reg |= 1; E1000_WRITE_REG(hw, E1000_GCR2, reg); break; default: break; } return; } /** * e1000_clear_vfta_82571 - Clear VLAN filter table * @hw: pointer to the HW structure * * Clears the register array which contains the VLAN filter table by * setting all the values to 0. **/ static void e1000_clear_vfta_82571(struct e1000_hw *hw) { u32 offset; u32 vfta_value = 0; u32 vfta_offset = 0; u32 vfta_bit_in_reg = 0; DEBUGFUNC("e1000_clear_vfta_82571"); switch (hw->mac.type) { case e1000_82573: case e1000_82574: case e1000_82583: if (hw->mng_cookie.vlan_id != 0) { /* The VFTA is a 4096b bit-field, each identifying * a single VLAN ID. The following operations * determine which 32b entry (i.e. offset) into the * array we want to set the VLAN ID (i.e. bit) of * the manageability unit. */ vfta_offset = (hw->mng_cookie.vlan_id >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK; vfta_bit_in_reg = 1 << (hw->mng_cookie.vlan_id & E1000_VFTA_ENTRY_BIT_SHIFT_MASK); } break; default: break; } for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { /* If the offset we want to clear is the same offset of the * manageability VLAN ID, then clear all bits except that of * the manageability unit. */ vfta_value = (offset == vfta_offset) ? vfta_bit_in_reg : 0; E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, vfta_value); E1000_WRITE_FLUSH(hw); } } /** * e1000_check_mng_mode_82574 - Check manageability is enabled * @hw: pointer to the HW structure * * Reads the NVM Initialization Control Word 2 and returns TRUE * (>0) if any manageability is enabled, else FALSE (0). **/ static bool e1000_check_mng_mode_82574(struct e1000_hw *hw) { u16 data; s32 ret_val; DEBUGFUNC("e1000_check_mng_mode_82574"); ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG, 1, &data); if (ret_val) return FALSE; return (data & E1000_NVM_INIT_CTRL2_MNGM) != 0; } /** * e1000_led_on_82574 - Turn LED on * @hw: pointer to the HW structure * * Turn LED on. **/ static s32 e1000_led_on_82574(struct e1000_hw *hw) { u32 ctrl; u32 i; DEBUGFUNC("e1000_led_on_82574"); ctrl = hw->mac.ledctl_mode2; if (!(E1000_STATUS_LU & E1000_READ_REG(hw, E1000_STATUS))) { /* If no link, then turn LED on by setting the invert bit * for each LED that's "on" (0x0E) in ledctl_mode2. */ for (i = 0; i < 4; i++) if (((hw->mac.ledctl_mode2 >> (i * 8)) & 0xFF) == E1000_LEDCTL_MODE_LED_ON) ctrl |= (E1000_LEDCTL_LED0_IVRT << (i * 8)); } E1000_WRITE_REG(hw, E1000_LEDCTL, ctrl); return E1000_SUCCESS; } /** * e1000_check_phy_82574 - check 82574 phy hung state * @hw: pointer to the HW structure * * Returns whether phy is hung or not **/ bool e1000_check_phy_82574(struct e1000_hw *hw) { u16 status_1kbt = 0; u16 receive_errors = 0; s32 ret_val; DEBUGFUNC("e1000_check_phy_82574"); /* Read PHY Receive Error counter first, if its is max - all F's then * read the Base1000T status register If both are max then PHY is hung. */ ret_val = hw->phy.ops.read_reg(hw, E1000_RECEIVE_ERROR_COUNTER, &receive_errors); if (ret_val) return FALSE; if (receive_errors == E1000_RECEIVE_ERROR_MAX) { ret_val = hw->phy.ops.read_reg(hw, E1000_BASE1000T_STATUS, &status_1kbt); if (ret_val) return FALSE; if ((status_1kbt & E1000_IDLE_ERROR_COUNT_MASK) == E1000_IDLE_ERROR_COUNT_MASK) return TRUE; } return FALSE; } /** * e1000_setup_link_82571 - Setup flow control and link settings * @hw: pointer to the HW structure * * Determines which flow control settings to use, then configures flow * control. Calls the appropriate media-specific link configuration * function. Assuming the adapter has a valid link partner, a valid link * should be established. Assumes the hardware has previously been reset * and the transmitter and receiver are not enabled. **/ static s32 e1000_setup_link_82571(struct e1000_hw *hw) { DEBUGFUNC("e1000_setup_link_82571"); /* 82573 does not have a word in the NVM to determine * the default flow control setting, so we explicitly * set it to full. */ switch (hw->mac.type) { case e1000_82573: case e1000_82574: case e1000_82583: if (hw->fc.requested_mode == e1000_fc_default) hw->fc.requested_mode = e1000_fc_full; break; default: break; } return e1000_setup_link_generic(hw); } /** * e1000_setup_copper_link_82571 - Configure copper link settings * @hw: pointer to the HW structure * * Configures the link for auto-neg or forced speed and duplex. Then we check * for link, once link is established calls to configure collision distance * and flow control are called. **/ static s32 e1000_setup_copper_link_82571(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; DEBUGFUNC("e1000_setup_copper_link_82571"); ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= E1000_CTRL_SLU; ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); switch (hw->phy.type) { case e1000_phy_m88: case e1000_phy_bm: ret_val = e1000_copper_link_setup_m88(hw); break; case e1000_phy_igp_2: ret_val = e1000_copper_link_setup_igp(hw); break; default: return -E1000_ERR_PHY; break; } if (ret_val) return ret_val; return e1000_setup_copper_link_generic(hw); } /** * e1000_setup_fiber_serdes_link_82571 - Setup link for fiber/serdes * @hw: pointer to the HW structure * * Configures collision distance and flow control for fiber and serdes links. * Upon successful setup, poll for link. **/ static s32 e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw) { DEBUGFUNC("e1000_setup_fiber_serdes_link_82571"); switch (hw->mac.type) { case e1000_82571: case e1000_82572: /* If SerDes loopback mode is entered, there is no form * of reset to take the adapter out of that mode. So we * have to explicitly take the adapter out of loopback * mode. This prevents drivers from twiddling their thumbs * if another tool failed to take it out of loopback mode. */ E1000_WRITE_REG(hw, E1000_SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK); break; default: break; } return e1000_setup_fiber_serdes_link_generic(hw); } /** * e1000_check_for_serdes_link_82571 - Check for link (Serdes) * @hw: pointer to the HW structure * * Reports the link state as up or down. * * If autonegotiation is supported by the link partner, the link state is * determined by the result of autonegotiation. This is the most likely case. * If autonegotiation is not supported by the link partner, and the link * has a valid signal, force the link up. * * The link state is represented internally here by 4 states: * * 1) down * 2) autoneg_progress * 3) autoneg_complete (the link successfully autonegotiated) * 4) forced_up (the link has been forced up, it did not autonegotiate) * **/ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 rxcw; u32 ctrl; u32 status; u32 txcw; u32 i; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_check_for_serdes_link_82571"); ctrl = E1000_READ_REG(hw, E1000_CTRL); status = E1000_READ_REG(hw, E1000_STATUS); E1000_READ_REG(hw, E1000_RXCW); /* SYNCH bit and IV bit are sticky */ usec_delay(10); rxcw = E1000_READ_REG(hw, E1000_RXCW); if ((rxcw & E1000_RXCW_SYNCH) && !(rxcw & E1000_RXCW_IV)) { /* Receiver is synchronized with no invalid bits. */ switch (mac->serdes_link_state) { case e1000_serdes_link_autoneg_complete: if (!(status & E1000_STATUS_LU)) { /* We have lost link, retry autoneg before * reporting link failure */ mac->serdes_link_state = e1000_serdes_link_autoneg_progress; mac->serdes_has_link = FALSE; DEBUGOUT("AN_UP -> AN_PROG\n"); } else { mac->serdes_has_link = TRUE; } break; case e1000_serdes_link_forced_up: /* If we are receiving /C/ ordered sets, re-enable * auto-negotiation in the TXCW register and disable * forced link in the Device Control register in an * attempt to auto-negotiate with our link partner. */ if (rxcw & E1000_RXCW_C) { /* Enable autoneg, and unforce link up */ E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw); E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU)); mac->serdes_link_state = e1000_serdes_link_autoneg_progress; mac->serdes_has_link = FALSE; DEBUGOUT("FORCED_UP -> AN_PROG\n"); } else { mac->serdes_has_link = TRUE; } break; case e1000_serdes_link_autoneg_progress: if (rxcw & E1000_RXCW_C) { /* We received /C/ ordered sets, meaning the * link partner has autonegotiated, and we can * trust the Link Up (LU) status bit. */ if (status & E1000_STATUS_LU) { mac->serdes_link_state = e1000_serdes_link_autoneg_complete; DEBUGOUT("AN_PROG -> AN_UP\n"); mac->serdes_has_link = TRUE; } else { /* Autoneg completed, but failed. */ mac->serdes_link_state = e1000_serdes_link_down; DEBUGOUT("AN_PROG -> DOWN\n"); } } else { /* The link partner did not autoneg. * Force link up and full duplex, and change * state to forced. */ E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE)); ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); /* Configure Flow Control after link up. */ ret_val = e1000_config_fc_after_link_up_generic(hw); if (ret_val) { DEBUGOUT("Error config flow control\n"); break; } mac->serdes_link_state = e1000_serdes_link_forced_up; mac->serdes_has_link = TRUE; DEBUGOUT("AN_PROG -> FORCED_UP\n"); } break; case e1000_serdes_link_down: default: /* The link was down but the receiver has now gained * valid sync, so lets see if we can bring the link * up. */ E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw); E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU)); mac->serdes_link_state = e1000_serdes_link_autoneg_progress; mac->serdes_has_link = FALSE; DEBUGOUT("DOWN -> AN_PROG\n"); break; } } else { if (!(rxcw & E1000_RXCW_SYNCH)) { mac->serdes_has_link = FALSE; mac->serdes_link_state = e1000_serdes_link_down; DEBUGOUT("ANYSTATE -> DOWN\n"); } else { /* Check several times, if SYNCH bit and CONFIG * bit both are consistently 1 then simply ignore * the IV bit and restart Autoneg */ for (i = 0; i < AN_RETRY_COUNT; i++) { usec_delay(10); rxcw = E1000_READ_REG(hw, E1000_RXCW); if ((rxcw & E1000_RXCW_SYNCH) && (rxcw & E1000_RXCW_C)) continue; if (rxcw & E1000_RXCW_IV) { mac->serdes_has_link = FALSE; mac->serdes_link_state = e1000_serdes_link_down; DEBUGOUT("ANYSTATE -> DOWN\n"); break; } } if (i == AN_RETRY_COUNT) { txcw = E1000_READ_REG(hw, E1000_TXCW); txcw |= E1000_TXCW_ANE; E1000_WRITE_REG(hw, E1000_TXCW, txcw); mac->serdes_link_state = e1000_serdes_link_autoneg_progress; mac->serdes_has_link = FALSE; DEBUGOUT("ANYSTATE -> AN_PROG\n"); } } } return ret_val; } /** * e1000_valid_led_default_82571 - Verify a valid default LED config * @hw: pointer to the HW structure * @data: pointer to the NVM (EEPROM) * * Read the EEPROM for the current default LED configuration. If the * LED configuration is not valid, set to a valid LED configuration. **/ static s32 e1000_valid_led_default_82571(struct e1000_hw *hw, u16 *data) { s32 ret_val; DEBUGFUNC("e1000_valid_led_default_82571"); ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } switch (hw->mac.type) { case e1000_82573: case e1000_82574: case e1000_82583: if (*data == ID_LED_RESERVED_F746) *data = ID_LED_DEFAULT_82573; break; default: if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) *data = ID_LED_DEFAULT; break; } return E1000_SUCCESS; } /** * e1000_get_laa_state_82571 - Get locally administered address state * @hw: pointer to the HW structure * * Retrieve and return the current locally administered address state. **/ bool e1000_get_laa_state_82571(struct e1000_hw *hw) { DEBUGFUNC("e1000_get_laa_state_82571"); if (hw->mac.type != e1000_82571) return FALSE; return hw->dev_spec._82571.laa_is_present; } /** * e1000_set_laa_state_82571 - Set locally administered address state * @hw: pointer to the HW structure * @state: enable/disable locally administered address * * Enable/Disable the current locally administered address state. **/ void e1000_set_laa_state_82571(struct e1000_hw *hw, bool state) { DEBUGFUNC("e1000_set_laa_state_82571"); if (hw->mac.type != e1000_82571) return; hw->dev_spec._82571.laa_is_present = state; /* If workaround is activated... */ if (state) /* Hold a copy of the LAA in RAR[14] This is done so that * between the time RAR[0] gets clobbered and the time it * gets fixed, the actual LAA is in one of the RARs and no * incoming packets directed to this port are dropped. * Eventually the LAA will be in RAR[0] and RAR[14]. */ hw->mac.ops.rar_set(hw, hw->mac.addr, hw->mac.rar_entry_count - 1); return; } /** * e1000_fix_nvm_checksum_82571 - Fix EEPROM checksum * @hw: pointer to the HW structure * * Verifies that the EEPROM has completed the update. After updating the * EEPROM, we need to check bit 15 in work 0x23 for the checksum fix. If * the checksum fix is not implemented, we need to set the bit and update * the checksum. Otherwise, if bit 15 is set and the checksum is incorrect, * we need to return bad checksum. **/ static s32 e1000_fix_nvm_checksum_82571(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; s32 ret_val; u16 data; DEBUGFUNC("e1000_fix_nvm_checksum_82571"); if (nvm->type != e1000_nvm_flash_hw) return E1000_SUCCESS; /* Check bit 4 of word 10h. If it is 0, firmware is done updating * 10h-12h. Checksum may need to be fixed. */ ret_val = nvm->ops.read(hw, 0x10, 1, &data); if (ret_val) return ret_val; if (!(data & 0x10)) { /* Read 0x23 and check bit 15. This bit is a 1 * when the checksum has already been fixed. If * the checksum is still wrong and this bit is a * 1, we need to return bad checksum. Otherwise, * we need to set this bit to a 1 and update the * checksum. */ ret_val = nvm->ops.read(hw, 0x23, 1, &data); if (ret_val) return ret_val; if (!(data & 0x8000)) { data |= 0x8000; ret_val = nvm->ops.write(hw, 0x23, 1, &data); if (ret_val) return ret_val; ret_val = nvm->ops.update(hw); if (ret_val) return ret_val; } } return E1000_SUCCESS; } /** * e1000_read_mac_addr_82571 - Read device MAC address * @hw: pointer to the HW structure **/ static s32 e1000_read_mac_addr_82571(struct e1000_hw *hw) { DEBUGFUNC("e1000_read_mac_addr_82571"); if (hw->mac.type == e1000_82571) { s32 ret_val; /* If there's an alternate MAC address place it in RAR0 * so that it will override the Si installed default perm * address. */ ret_val = e1000_check_alt_mac_addr_generic(hw); if (ret_val) return ret_val; } return e1000_read_mac_addr_generic(hw); } /** * e1000_power_down_phy_copper_82571 - Remove link during PHY power down * @hw: pointer to the HW structure * * In the case of a PHY power down to save power, or to turn off link during a * driver unload, or wake on lan is not enabled, remove the link. **/ static void e1000_power_down_phy_copper_82571(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; struct e1000_mac_info *mac = &hw->mac; if (!phy->ops.check_reset_block) return; /* If the management interface is not enabled, then power down */ if (!(mac->ops.check_mng_mode(hw) || phy->ops.check_reset_block(hw))) e1000_power_down_phy_copper(hw); return; } /** * e1000_clear_hw_cntrs_82571 - Clear device specific hardware counters * @hw: pointer to the HW structure * * Clears the hardware counters by reading the counter registers. **/ static void e1000_clear_hw_cntrs_82571(struct e1000_hw *hw) { DEBUGFUNC("e1000_clear_hw_cntrs_82571"); e1000_clear_hw_cntrs_base_generic(hw); E1000_READ_REG(hw, E1000_PRC64); E1000_READ_REG(hw, E1000_PRC127); E1000_READ_REG(hw, E1000_PRC255); E1000_READ_REG(hw, E1000_PRC511); E1000_READ_REG(hw, E1000_PRC1023); E1000_READ_REG(hw, E1000_PRC1522); E1000_READ_REG(hw, E1000_PTC64); E1000_READ_REG(hw, E1000_PTC127); E1000_READ_REG(hw, E1000_PTC255); E1000_READ_REG(hw, E1000_PTC511); E1000_READ_REG(hw, E1000_PTC1023); E1000_READ_REG(hw, E1000_PTC1522); E1000_READ_REG(hw, E1000_ALGNERRC); E1000_READ_REG(hw, E1000_RXERRC); E1000_READ_REG(hw, E1000_TNCRS); E1000_READ_REG(hw, E1000_CEXTERR); E1000_READ_REG(hw, E1000_TSCTC); E1000_READ_REG(hw, E1000_TSCTFC); E1000_READ_REG(hw, E1000_MGTPRC); E1000_READ_REG(hw, E1000_MGTPDC); E1000_READ_REG(hw, E1000_MGTPTC); E1000_READ_REG(hw, E1000_IAC); E1000_READ_REG(hw, E1000_ICRXOC); E1000_READ_REG(hw, E1000_ICRXPTC); E1000_READ_REG(hw, E1000_ICRXATC); E1000_READ_REG(hw, E1000_ICTXPTC); E1000_READ_REG(hw, E1000_ICTXATC); E1000_READ_REG(hw, E1000_ICTXQEC); E1000_READ_REG(hw, E1000_ICTXQMTC); E1000_READ_REG(hw, E1000_ICRXDMTC); } Index: head/sys/dev/e1000/e1000_82575.c =================================================================== --- head/sys/dev/e1000/e1000_82575.c (revision 323515) +++ head/sys/dev/e1000/e1000_82575.c (revision 323516) @@ -1,3778 +1,3696 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ /* * 82575EB Gigabit Network Connection * 82575EB Gigabit Backplane Connection * 82575GB Gigabit Network Connection * 82576 Gigabit Network Connection * 82576 Quad Port Gigabit Mezzanine Adapter * 82580 Gigabit Network Connection * I350 Gigabit Network Connection */ #include "e1000_api.h" #include "e1000_i210.h" static s32 e1000_init_phy_params_82575(struct e1000_hw *hw); static s32 e1000_init_mac_params_82575(struct e1000_hw *hw); static s32 e1000_acquire_phy_82575(struct e1000_hw *hw); static void e1000_release_phy_82575(struct e1000_hw *hw); static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw); static void e1000_release_nvm_82575(struct e1000_hw *hw); static s32 e1000_check_for_link_82575(struct e1000_hw *hw); static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw); static s32 e1000_get_cfg_done_82575(struct e1000_hw *hw); static s32 e1000_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed, u16 *duplex); static s32 e1000_phy_hw_reset_sgmii_82575(struct e1000_hw *hw); static s32 e1000_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, u16 *data); static s32 e1000_reset_hw_82575(struct e1000_hw *hw); static s32 e1000_reset_hw_82580(struct e1000_hw *hw); static s32 e1000_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data); static s32 e1000_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data); static s32 e1000_set_d0_lplu_state_82580(struct e1000_hw *hw, bool active); static s32 e1000_set_d3_lplu_state_82580(struct e1000_hw *hw, bool active); static s32 e1000_set_d0_lplu_state_82575(struct e1000_hw *hw, bool active); static s32 e1000_setup_copper_link_82575(struct e1000_hw *hw); static s32 e1000_setup_serdes_link_82575(struct e1000_hw *hw); static s32 e1000_get_media_type_82575(struct e1000_hw *hw); static s32 e1000_set_sfp_media_type_82575(struct e1000_hw *hw); static s32 e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data); static s32 e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, u16 data); static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw); -static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask); static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw, u16 *speed, u16 *duplex); static s32 e1000_get_phy_id_82575(struct e1000_hw *hw); -static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask); static bool e1000_sgmii_active_82575(struct e1000_hw *hw); static s32 e1000_reset_init_script_82575(struct e1000_hw *hw); static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw); static void e1000_config_collision_dist_82575(struct e1000_hw *hw); static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw); static void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw); static void e1000_power_up_serdes_link_82575(struct e1000_hw *hw); static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw); static s32 e1000_reset_mdicnfg_82580(struct e1000_hw *hw); static s32 e1000_validate_nvm_checksum_82580(struct e1000_hw *hw); static s32 e1000_update_nvm_checksum_82580(struct e1000_hw *hw); static s32 e1000_update_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset); static s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset); static s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw); static s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw); static void e1000_clear_vfta_i350(struct e1000_hw *hw); static void e1000_i2c_start(struct e1000_hw *hw); static void e1000_i2c_stop(struct e1000_hw *hw); static s32 e1000_clock_in_i2c_byte(struct e1000_hw *hw, u8 *data); static s32 e1000_clock_out_i2c_byte(struct e1000_hw *hw, u8 data); static s32 e1000_get_i2c_ack(struct e1000_hw *hw); static s32 e1000_clock_in_i2c_bit(struct e1000_hw *hw, bool *data); static s32 e1000_clock_out_i2c_bit(struct e1000_hw *hw, bool data); static void e1000_raise_i2c_clk(struct e1000_hw *hw, u32 *i2cctl); static void e1000_lower_i2c_clk(struct e1000_hw *hw, u32 *i2cctl); static s32 e1000_set_i2c_data(struct e1000_hw *hw, u32 *i2cctl, bool data); static bool e1000_get_i2c_data(u32 *i2cctl); static const u16 e1000_82580_rxpbs_table[] = { 36, 72, 144, 1, 2, 4, 8, 16, 35, 70, 140 }; #define E1000_82580_RXPBS_TABLE_SIZE \ (sizeof(e1000_82580_rxpbs_table) / \ sizeof(e1000_82580_rxpbs_table[0])) /** * e1000_sgmii_uses_mdio_82575 - Determine if I2C pins are for external MDIO * @hw: pointer to the HW structure * * Called to determine if the I2C pins are being used for I2C or as an * external MDIO interface since the two options are mutually exclusive. **/ static bool e1000_sgmii_uses_mdio_82575(struct e1000_hw *hw) { u32 reg = 0; bool ext_mdio = FALSE; DEBUGFUNC("e1000_sgmii_uses_mdio_82575"); switch (hw->mac.type) { case e1000_82575: case e1000_82576: reg = E1000_READ_REG(hw, E1000_MDIC); ext_mdio = !!(reg & E1000_MDIC_DEST); break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: reg = E1000_READ_REG(hw, E1000_MDICNFG); ext_mdio = !!(reg & E1000_MDICNFG_EXT_MDIO); break; default: break; } return ext_mdio; } /** * e1000_init_phy_params_82575 - Init PHY func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_phy_params_82575(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = E1000_SUCCESS; u32 ctrl_ext; DEBUGFUNC("e1000_init_phy_params_82575"); phy->ops.read_i2c_byte = e1000_read_i2c_byte_generic; phy->ops.write_i2c_byte = e1000_write_i2c_byte_generic; if (hw->phy.media_type != e1000_media_type_copper) { phy->type = e1000_phy_none; goto out; } phy->ops.power_up = e1000_power_up_phy_copper; phy->ops.power_down = e1000_power_down_phy_copper_82575; phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; phy->reset_delay_us = 100; phy->ops.acquire = e1000_acquire_phy_82575; phy->ops.check_reset_block = e1000_check_reset_block_generic; phy->ops.commit = e1000_phy_sw_reset_generic; phy->ops.get_cfg_done = e1000_get_cfg_done_82575; phy->ops.release = e1000_release_phy_82575; ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); if (e1000_sgmii_active_82575(hw)) { phy->ops.reset = e1000_phy_hw_reset_sgmii_82575; ctrl_ext |= E1000_CTRL_I2C_ENA; } else { phy->ops.reset = e1000_phy_hw_reset_generic; ctrl_ext &= ~E1000_CTRL_I2C_ENA; } E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); e1000_reset_mdicnfg_82580(hw); if (e1000_sgmii_active_82575(hw) && !e1000_sgmii_uses_mdio_82575(hw)) { phy->ops.read_reg = e1000_read_phy_reg_sgmii_82575; phy->ops.write_reg = e1000_write_phy_reg_sgmii_82575; } else { switch (hw->mac.type) { case e1000_82580: case e1000_i350: case e1000_i354: phy->ops.read_reg = e1000_read_phy_reg_82580; phy->ops.write_reg = e1000_write_phy_reg_82580; break; case e1000_i210: case e1000_i211: phy->ops.read_reg = e1000_read_phy_reg_gs40g; phy->ops.write_reg = e1000_write_phy_reg_gs40g; break; default: phy->ops.read_reg = e1000_read_phy_reg_igp; phy->ops.write_reg = e1000_write_phy_reg_igp; } } /* Set phy->phy_addr and phy->id. */ ret_val = e1000_get_phy_id_82575(hw); /* Verify phy id and set remaining function pointers */ switch (phy->id) { case M88E1543_E_PHY_ID: case M88E1512_E_PHY_ID: case I347AT4_E_PHY_ID: case M88E1112_E_PHY_ID: case M88E1340M_E_PHY_ID: case M88E1111_I_PHY_ID: phy->type = e1000_phy_m88; phy->ops.check_polarity = e1000_check_polarity_m88; phy->ops.get_info = e1000_get_phy_info_m88; if (phy->id == I347AT4_E_PHY_ID || phy->id == M88E1112_E_PHY_ID || phy->id == M88E1340M_E_PHY_ID) phy->ops.get_cable_length = e1000_get_cable_length_m88_gen2; else if (phy->id == M88E1543_E_PHY_ID || phy->id == M88E1512_E_PHY_ID) phy->ops.get_cable_length = e1000_get_cable_length_m88_gen2; else phy->ops.get_cable_length = e1000_get_cable_length_m88; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; /* Check if this PHY is confgured for media swap. */ if (phy->id == M88E1112_E_PHY_ID) { u16 data; ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 2); if (ret_val) goto out; ret_val = phy->ops.read_reg(hw, E1000_M88E1112_MAC_CTRL_1, &data); if (ret_val) goto out; data = (data & E1000_M88E1112_MAC_CTRL_1_MODE_MASK) >> E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT; if (data == E1000_M88E1112_AUTO_COPPER_SGMII || data == E1000_M88E1112_AUTO_COPPER_BASEX) hw->mac.ops.check_for_link = e1000_check_for_link_media_swap; } if (phy->id == M88E1512_E_PHY_ID) { ret_val = e1000_initialize_M88E1512_phy(hw); if (ret_val) goto out; } if (phy->id == M88E1543_E_PHY_ID) { ret_val = e1000_initialize_M88E1543_phy(hw); if (ret_val) goto out; } break; case IGP03E1000_E_PHY_ID: case IGP04E1000_E_PHY_ID: phy->type = e1000_phy_igp_3; phy->ops.check_polarity = e1000_check_polarity_igp; phy->ops.get_info = e1000_get_phy_info_igp; phy->ops.get_cable_length = e1000_get_cable_length_igp_2; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_igp; phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82575; phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_generic; break; case I82580_I_PHY_ID: case I350_I_PHY_ID: phy->type = e1000_phy_82580; phy->ops.check_polarity = e1000_check_polarity_82577; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_82577; phy->ops.get_cable_length = e1000_get_cable_length_82577; phy->ops.get_info = e1000_get_phy_info_82577; phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82580; phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82580; break; case I210_I_PHY_ID: phy->type = e1000_phy_i210; phy->ops.check_polarity = e1000_check_polarity_m88; phy->ops.get_info = e1000_get_phy_info_m88; phy->ops.get_cable_length = e1000_get_cable_length_m88_gen2; phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82580; phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82580; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; break; default: ret_val = -E1000_ERR_PHY; goto out; } out: return ret_val; } /** * e1000_init_nvm_params_82575 - Init NVM func ptrs. * @hw: pointer to the HW structure **/ s32 e1000_init_nvm_params_82575(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; u32 eecd = E1000_READ_REG(hw, E1000_EECD); u16 size; DEBUGFUNC("e1000_init_nvm_params_82575"); size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> E1000_EECD_SIZE_EX_SHIFT); /* * Added to a constant, "size" becomes the left-shift value * for setting word_size. */ size += NVM_WORD_SIZE_BASE_SHIFT; /* Just in case size is out of range, cap it to the largest * EEPROM size supported */ if (size > 15) size = 15; nvm->word_size = 1 << size; if (hw->mac.type < e1000_i210) { nvm->opcode_bits = 8; nvm->delay_usec = 1; switch (nvm->override) { case e1000_nvm_override_spi_large: nvm->page_size = 32; nvm->address_bits = 16; break; case e1000_nvm_override_spi_small: nvm->page_size = 8; nvm->address_bits = 8; break; default: nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8; nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? 16 : 8; break; } if (nvm->word_size == (1 << 15)) nvm->page_size = 128; nvm->type = e1000_nvm_eeprom_spi; } else { nvm->type = e1000_nvm_flash_hw; } /* Function Pointers */ nvm->ops.acquire = e1000_acquire_nvm_82575; nvm->ops.release = e1000_release_nvm_82575; if (nvm->word_size < (1 << 15)) nvm->ops.read = e1000_read_nvm_eerd; else nvm->ops.read = e1000_read_nvm_spi; nvm->ops.write = e1000_write_nvm_spi; nvm->ops.validate = e1000_validate_nvm_checksum_generic; nvm->ops.update = e1000_update_nvm_checksum_generic; nvm->ops.valid_led_default = e1000_valid_led_default_82575; /* override generic family function pointers for specific descendants */ switch (hw->mac.type) { case e1000_82580: nvm->ops.validate = e1000_validate_nvm_checksum_82580; nvm->ops.update = e1000_update_nvm_checksum_82580; break; case e1000_i350: case e1000_i354: nvm->ops.validate = e1000_validate_nvm_checksum_i350; nvm->ops.update = e1000_update_nvm_checksum_i350; break; default: break; } return E1000_SUCCESS; } /** * e1000_init_mac_params_82575 - Init MAC func ptrs. * @hw: pointer to the HW structure **/ static s32 e1000_init_mac_params_82575(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; DEBUGFUNC("e1000_init_mac_params_82575"); /* Derives media type */ e1000_get_media_type_82575(hw); /* Set mta register count */ mac->mta_reg_count = 128; /* Set uta register count */ mac->uta_reg_count = (hw->mac.type == e1000_82575) ? 0 : 128; /* Set rar entry count */ mac->rar_entry_count = E1000_RAR_ENTRIES_82575; if (mac->type == e1000_82576) mac->rar_entry_count = E1000_RAR_ENTRIES_82576; if (mac->type == e1000_82580) mac->rar_entry_count = E1000_RAR_ENTRIES_82580; if (mac->type == e1000_i350 || mac->type == e1000_i354) mac->rar_entry_count = E1000_RAR_ENTRIES_I350; /* Enable EEE default settings for EEE supported devices */ if (mac->type >= e1000_i350) dev_spec->eee_disable = FALSE; /* Allow a single clear of the SW semaphore on I210 and newer */ if (mac->type >= e1000_i210) dev_spec->clear_semaphore_once = TRUE; /* Set if part includes ASF firmware */ mac->asf_firmware_present = TRUE; /* FWSM register */ mac->has_fwsm = TRUE; /* ARC supported; valid only if manageability features are enabled. */ mac->arc_subsystem_valid = !!(E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK); /* Function pointers */ /* bus type/speed/width */ mac->ops.get_bus_info = e1000_get_bus_info_pcie_generic; /* reset */ if (mac->type >= e1000_82580) mac->ops.reset_hw = e1000_reset_hw_82580; else mac->ops.reset_hw = e1000_reset_hw_82575; /* hw initialization */ if ((mac->type == e1000_i210) || (mac->type == e1000_i211)) mac->ops.init_hw = e1000_init_hw_i210; else mac->ops.init_hw = e1000_init_hw_82575; /* link setup */ mac->ops.setup_link = e1000_setup_link_generic; /* physical interface link setup */ mac->ops.setup_physical_interface = (hw->phy.media_type == e1000_media_type_copper) ? e1000_setup_copper_link_82575 : e1000_setup_serdes_link_82575; /* physical interface shutdown */ mac->ops.shutdown_serdes = e1000_shutdown_serdes_link_82575; /* physical interface power up */ mac->ops.power_up_serdes = e1000_power_up_serdes_link_82575; /* check for link */ mac->ops.check_for_link = e1000_check_for_link_82575; /* read mac address */ mac->ops.read_mac_addr = e1000_read_mac_addr_82575; /* configure collision distance */ mac->ops.config_collision_dist = e1000_config_collision_dist_82575; /* multicast address update */ mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic; if (hw->mac.type == e1000_i350 || mac->type == e1000_i354) { /* writing VFTA */ mac->ops.write_vfta = e1000_write_vfta_i350; /* clearing VFTA */ mac->ops.clear_vfta = e1000_clear_vfta_i350; } else { /* writing VFTA */ mac->ops.write_vfta = e1000_write_vfta_generic; /* clearing VFTA */ mac->ops.clear_vfta = e1000_clear_vfta_generic; } if (hw->mac.type >= e1000_82580) mac->ops.validate_mdi_setting = e1000_validate_mdi_setting_crossover_generic; /* ID LED init */ mac->ops.id_led_init = e1000_id_led_init_generic; /* blink LED */ mac->ops.blink_led = e1000_blink_led_generic; /* setup LED */ mac->ops.setup_led = e1000_setup_led_generic; /* cleanup LED */ mac->ops.cleanup_led = e1000_cleanup_led_generic; /* turn on/off LED */ mac->ops.led_on = e1000_led_on_generic; mac->ops.led_off = e1000_led_off_generic; /* clear hardware counters */ mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_82575; /* link info */ mac->ops.get_link_up_info = e1000_get_link_up_info_82575; /* acquire SW_FW sync */ - mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_82575; - mac->ops.release_swfw_sync = e1000_release_swfw_sync_82575; - if (mac->type >= e1000_i210) { - mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_i210; - mac->ops.release_swfw_sync = e1000_release_swfw_sync_i210; - } + mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync; + mac->ops.release_swfw_sync = e1000_release_swfw_sync; /* set lan id for port to determine which phy lock to use */ hw->mac.ops.set_lan_id(hw); return E1000_SUCCESS; } /** * e1000_init_function_pointers_82575 - Init func ptrs. * @hw: pointer to the HW structure * * Called to initialize all function pointers and parameters. **/ void e1000_init_function_pointers_82575(struct e1000_hw *hw) { DEBUGFUNC("e1000_init_function_pointers_82575"); hw->mac.ops.init_params = e1000_init_mac_params_82575; hw->nvm.ops.init_params = e1000_init_nvm_params_82575; hw->phy.ops.init_params = e1000_init_phy_params_82575; hw->mbx.ops.init_params = e1000_init_mbx_params_pf; } /** * e1000_acquire_phy_82575 - Acquire rights to access PHY * @hw: pointer to the HW structure * * Acquire access rights to the correct PHY. **/ static s32 e1000_acquire_phy_82575(struct e1000_hw *hw) { u16 mask = E1000_SWFW_PHY0_SM; DEBUGFUNC("e1000_acquire_phy_82575"); if (hw->bus.func == E1000_FUNC_1) mask = E1000_SWFW_PHY1_SM; else if (hw->bus.func == E1000_FUNC_2) mask = E1000_SWFW_PHY2_SM; else if (hw->bus.func == E1000_FUNC_3) mask = E1000_SWFW_PHY3_SM; return hw->mac.ops.acquire_swfw_sync(hw, mask); } /** * e1000_release_phy_82575 - Release rights to access PHY * @hw: pointer to the HW structure * * A wrapper to release access rights to the correct PHY. **/ static void e1000_release_phy_82575(struct e1000_hw *hw) { u16 mask = E1000_SWFW_PHY0_SM; DEBUGFUNC("e1000_release_phy_82575"); if (hw->bus.func == E1000_FUNC_1) mask = E1000_SWFW_PHY1_SM; else if (hw->bus.func == E1000_FUNC_2) mask = E1000_SWFW_PHY2_SM; else if (hw->bus.func == E1000_FUNC_3) mask = E1000_SWFW_PHY3_SM; hw->mac.ops.release_swfw_sync(hw, mask); } /** * e1000_read_phy_reg_sgmii_82575 - Read PHY register using sgmii * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Reads the PHY register at offset using the serial gigabit media independent * interface and stores the retrieved information in data. **/ static s32 e1000_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, u16 *data) { s32 ret_val = -E1000_ERR_PARAM; DEBUGFUNC("e1000_read_phy_reg_sgmii_82575"); if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) { DEBUGOUT1("PHY Address %u is out of range\n", offset); goto out; } ret_val = hw->phy.ops.acquire(hw); if (ret_val) goto out; ret_val = e1000_read_phy_reg_i2c(hw, offset, data); hw->phy.ops.release(hw); out: return ret_val; } /** * e1000_write_phy_reg_sgmii_82575 - Write PHY register using sgmii * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset * * Writes the data to PHY register at the offset using the serial gigabit * media independent interface. **/ static s32 e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, u16 data) { s32 ret_val = -E1000_ERR_PARAM; DEBUGFUNC("e1000_write_phy_reg_sgmii_82575"); if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) { DEBUGOUT1("PHY Address %d is out of range\n", offset); goto out; } ret_val = hw->phy.ops.acquire(hw); if (ret_val) goto out; ret_val = e1000_write_phy_reg_i2c(hw, offset, data); hw->phy.ops.release(hw); out: return ret_val; } /** * e1000_get_phy_id_82575 - Retrieve PHY addr and id * @hw: pointer to the HW structure * * Retrieves the PHY address and ID for both PHY's which do and do not use * sgmi interface. **/ static s32 e1000_get_phy_id_82575(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = E1000_SUCCESS; u16 phy_id; u32 ctrl_ext; u32 mdic; DEBUGFUNC("e1000_get_phy_id_82575"); /* some i354 devices need an extra read for phy id */ if (hw->mac.type == e1000_i354) e1000_get_phy_id(hw); /* * For SGMII PHYs, we try the list of possible addresses until * we find one that works. For non-SGMII PHYs * (e.g. integrated copper PHYs), an address of 1 should * work. The result of this function should mean phy->phy_addr * and phy->id are set correctly. */ if (!e1000_sgmii_active_82575(hw)) { phy->addr = 1; ret_val = e1000_get_phy_id(hw); goto out; } if (e1000_sgmii_uses_mdio_82575(hw)) { switch (hw->mac.type) { case e1000_82575: case e1000_82576: mdic = E1000_READ_REG(hw, E1000_MDIC); mdic &= E1000_MDIC_PHY_MASK; phy->addr = mdic >> E1000_MDIC_PHY_SHIFT; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: mdic = E1000_READ_REG(hw, E1000_MDICNFG); mdic &= E1000_MDICNFG_PHY_MASK; phy->addr = mdic >> E1000_MDICNFG_PHY_SHIFT; break; default: ret_val = -E1000_ERR_PHY; goto out; break; } ret_val = e1000_get_phy_id(hw); goto out; } /* Power on sgmii phy if it is disabled */ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_SDP3_DATA); E1000_WRITE_FLUSH(hw); msec_delay(300); /* * The address field in the I2CCMD register is 3 bits and 0 is invalid. * Therefore, we need to test 1-7 */ for (phy->addr = 1; phy->addr < 8; phy->addr++) { ret_val = e1000_read_phy_reg_sgmii_82575(hw, PHY_ID1, &phy_id); if (ret_val == E1000_SUCCESS) { DEBUGOUT2("Vendor ID 0x%08X read at address %u\n", phy_id, phy->addr); /* * At the time of this writing, The M88 part is * the only supported SGMII PHY product. */ if (phy_id == M88_VENDOR) break; } else { DEBUGOUT1("PHY address %u was unreadable\n", phy->addr); } } /* A valid PHY type couldn't be found. */ if (phy->addr == 8) { phy->addr = 0; ret_val = -E1000_ERR_PHY; } else { ret_val = e1000_get_phy_id(hw); } /* restore previous sfp cage power state */ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); out: return ret_val; } /** * e1000_phy_hw_reset_sgmii_82575 - Performs a PHY reset * @hw: pointer to the HW structure * * Resets the PHY using the serial gigabit media independent interface. **/ static s32 e1000_phy_hw_reset_sgmii_82575(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; struct e1000_phy_info *phy = &hw->phy; DEBUGFUNC("e1000_phy_hw_reset_sgmii_82575"); /* * This isn't a TRUE "hard" reset, but is the only reset * available to us at this time. */ DEBUGOUT("Soft resetting SGMII attached PHY...\n"); if (!(hw->phy.ops.write_reg)) goto out; /* * SFP documentation requires the following to configure the SPF module * to work on SGMII. No further documentation is given. */ ret_val = hw->phy.ops.write_reg(hw, 0x1B, 0x8084); if (ret_val) goto out; ret_val = hw->phy.ops.commit(hw); if (ret_val) goto out; if (phy->id == M88E1512_E_PHY_ID) ret_val = e1000_initialize_M88E1512_phy(hw); out: return ret_val; } /** * e1000_set_d0_lplu_state_82575 - Set Low Power Linkup D0 state * @hw: pointer to the HW structure * @active: TRUE to enable LPLU, FALSE to disable * * Sets the LPLU D0 state according to the active flag. When * activating LPLU this function also disables smart speed * and vice versa. LPLU will not be activated unless the * device autonegotiation advertisement meets standards of * either 10 or 10/100 or 10/100/1000 at all duplexes. * This is a function pointer entry point only called by * PHY setup routines. **/ static s32 e1000_set_d0_lplu_state_82575(struct e1000_hw *hw, bool active) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = E1000_SUCCESS; u16 data; DEBUGFUNC("e1000_set_d0_lplu_state_82575"); if (!(hw->phy.ops.read_reg)) goto out; ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data); if (ret_val) goto out; if (active) { data |= IGP02E1000_PM_D0_LPLU; ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, data); if (ret_val) goto out; /* When LPLU is enabled, we should disable SmartSpeed */ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) goto out; } else { data &= ~IGP02E1000_PM_D0_LPLU; ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, data); /* * LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. */ if (phy->smart_speed == e1000_smart_speed_on) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) goto out; data |= IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) goto out; } else if (phy->smart_speed == e1000_smart_speed_off) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) goto out; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) goto out; } } out: return ret_val; } /** * e1000_set_d0_lplu_state_82580 - Set Low Power Linkup D0 state * @hw: pointer to the HW structure * @active: TRUE to enable LPLU, FALSE to disable * * Sets the LPLU D0 state according to the active flag. When * activating LPLU this function also disables smart speed * and vice versa. LPLU will not be activated unless the * device autonegotiation advertisement meets standards of * either 10 or 10/100 or 10/100/1000 at all duplexes. * This is a function pointer entry point only called by * PHY setup routines. **/ static s32 e1000_set_d0_lplu_state_82580(struct e1000_hw *hw, bool active) { struct e1000_phy_info *phy = &hw->phy; u32 data; DEBUGFUNC("e1000_set_d0_lplu_state_82580"); data = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT); if (active) { data |= E1000_82580_PM_D0_LPLU; /* When LPLU is enabled, we should disable SmartSpeed */ data &= ~E1000_82580_PM_SPD; } else { data &= ~E1000_82580_PM_D0_LPLU; /* * LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. */ if (phy->smart_speed == e1000_smart_speed_on) data |= E1000_82580_PM_SPD; else if (phy->smart_speed == e1000_smart_speed_off) data &= ~E1000_82580_PM_SPD; } E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, data); return E1000_SUCCESS; } /** * e1000_set_d3_lplu_state_82580 - Sets low power link up state for D3 * @hw: pointer to the HW structure * @active: boolean used to enable/disable lplu * * Success returns 0, Failure returns 1 * * The low power link up (lplu) state is set to the power management level D3 * and SmartSpeed is disabled when active is TRUE, else clear lplu for D3 * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU * is used during Dx states where the power conservation is most important. * During driver activity, SmartSpeed should be enabled so performance is * maintained. **/ s32 e1000_set_d3_lplu_state_82580(struct e1000_hw *hw, bool active) { struct e1000_phy_info *phy = &hw->phy; u32 data; DEBUGFUNC("e1000_set_d3_lplu_state_82580"); data = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT); if (!active) { data &= ~E1000_82580_PM_D3_LPLU; /* * LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. */ if (phy->smart_speed == e1000_smart_speed_on) data |= E1000_82580_PM_SPD; else if (phy->smart_speed == e1000_smart_speed_off) data &= ~E1000_82580_PM_SPD; } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { data |= E1000_82580_PM_D3_LPLU; /* When LPLU is enabled, we should disable SmartSpeed */ data &= ~E1000_82580_PM_SPD; } E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, data); return E1000_SUCCESS; } /** * e1000_acquire_nvm_82575 - Request for access to EEPROM * @hw: pointer to the HW structure * * Acquire the necessary semaphores for exclusive access to the EEPROM. * Set the EEPROM access request bit and wait for EEPROM access grant bit. * Return successful if access grant bit set, else clear the request for * EEPROM access and return -E1000_ERR_NVM (-1). **/ static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_acquire_nvm_82575"); - ret_val = e1000_acquire_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); + ret_val = e1000_acquire_swfw_sync(hw, E1000_SWFW_EEP_SM); if (ret_val) goto out; /* * Check if there is some access * error this access may hook on */ if (hw->mac.type == e1000_i350) { u32 eecd = E1000_READ_REG(hw, E1000_EECD); if (eecd & (E1000_EECD_BLOCKED | E1000_EECD_ABORT | E1000_EECD_TIMEOUT)) { /* Clear all access error flags */ E1000_WRITE_REG(hw, E1000_EECD, eecd | E1000_EECD_ERROR_CLR); DEBUGOUT("Nvm bit banging access error detected and cleared.\n"); } } if (hw->mac.type == e1000_82580) { u32 eecd = E1000_READ_REG(hw, E1000_EECD); if (eecd & E1000_EECD_BLOCKED) { /* Clear access error flag */ E1000_WRITE_REG(hw, E1000_EECD, eecd | E1000_EECD_BLOCKED); DEBUGOUT("Nvm bit banging access error detected and cleared.\n"); } } ret_val = e1000_acquire_nvm_generic(hw); if (ret_val) - e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); + e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM); out: return ret_val; } /** * e1000_release_nvm_82575 - Release exclusive access to EEPROM * @hw: pointer to the HW structure * * Stop any current commands to the EEPROM and clear the EEPROM request bit, * then release the semaphores acquired. **/ static void e1000_release_nvm_82575(struct e1000_hw *hw) { DEBUGFUNC("e1000_release_nvm_82575"); e1000_release_nvm_generic(hw); - e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); -} - -/** - * e1000_acquire_swfw_sync_82575 - Acquire SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Acquire the SW/FW semaphore to access the PHY or NVM. The mask - * will also specify which port we're acquiring the lock for. - **/ -static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - u32 swmask = mask; - u32 fwmask = mask << 16; - s32 ret_val = E1000_SUCCESS; - s32 i = 0, timeout = 200; - - DEBUGFUNC("e1000_acquire_swfw_sync_82575"); - - while (i < timeout) { - if (e1000_get_hw_semaphore_generic(hw)) { - ret_val = -E1000_ERR_SWFW_SYNC; - goto out; - } - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - if (!(swfw_sync & (fwmask | swmask))) - break; - - /* - * Firmware currently using resource (fwmask) - * or other software thread using resource (swmask) - */ - e1000_put_hw_semaphore_generic(hw); - msec_delay_irq(5); - i++; - } - - if (i == timeout) { - DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); - ret_val = -E1000_ERR_SWFW_SYNC; - goto out; - } - - swfw_sync |= swmask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore_generic(hw); - -out: - return ret_val; -} - -/** - * e1000_release_swfw_sync_82575 - Release SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Release the SW/FW semaphore used to access the PHY or NVM. The mask - * will also specify which port we're releasing the lock for. - **/ -static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - - DEBUGFUNC("e1000_release_swfw_sync_82575"); - - while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS) - ; /* Empty */ - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - swfw_sync &= ~mask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore_generic(hw); + e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM); } /** * e1000_get_cfg_done_82575 - Read config done bit * @hw: pointer to the HW structure * * Read the management control register for the config done bit for * completion status. NOTE: silicon which is EEPROM-less will fail trying * to read the config done bit, so an error is *ONLY* logged and returns * E1000_SUCCESS. If we were to return with error, EEPROM-less silicon * would not be able to be reset or change link. **/ static s32 e1000_get_cfg_done_82575(struct e1000_hw *hw) { s32 timeout = PHY_CFG_TIMEOUT; u32 mask = E1000_NVM_CFG_DONE_PORT_0; DEBUGFUNC("e1000_get_cfg_done_82575"); if (hw->bus.func == E1000_FUNC_1) mask = E1000_NVM_CFG_DONE_PORT_1; else if (hw->bus.func == E1000_FUNC_2) mask = E1000_NVM_CFG_DONE_PORT_2; else if (hw->bus.func == E1000_FUNC_3) mask = E1000_NVM_CFG_DONE_PORT_3; while (timeout) { if (E1000_READ_REG(hw, E1000_EEMNGCTL) & mask) break; msec_delay(1); timeout--; } if (!timeout) DEBUGOUT("MNG configuration cycle has not completed.\n"); /* If EEPROM is not marked present, init the PHY manually */ if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES) && (hw->phy.type == e1000_phy_igp_3)) e1000_phy_init_script_igp3(hw); return E1000_SUCCESS; } /** * e1000_get_link_up_info_82575 - Get link speed/duplex info * @hw: pointer to the HW structure * @speed: stores the current speed * @duplex: stores the current duplex * * This is a wrapper function, if using the serial gigabit media independent * interface, use PCS to retrieve the link speed and duplex information. * Otherwise, use the generic function to get the link speed and duplex info. **/ static s32 e1000_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed, u16 *duplex) { s32 ret_val; DEBUGFUNC("e1000_get_link_up_info_82575"); if (hw->phy.media_type != e1000_media_type_copper) ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, speed, duplex); else ret_val = e1000_get_speed_and_duplex_copper_generic(hw, speed, duplex); return ret_val; } /** * e1000_check_for_link_82575 - Check for link * @hw: pointer to the HW structure * * If sgmii is enabled, then use the pcs register to determine link, otherwise * use the generic interface for determining link. **/ static s32 e1000_check_for_link_82575(struct e1000_hw *hw) { s32 ret_val; u16 speed, duplex; DEBUGFUNC("e1000_check_for_link_82575"); if (hw->phy.media_type != e1000_media_type_copper) { ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, &speed, &duplex); /* * Use this flag to determine if link needs to be checked or * not. If we have link clear the flag so that we do not * continue to check for link. */ hw->mac.get_link_status = !hw->mac.serdes_has_link; /* * Configure Flow Control now that Auto-Neg has completed. * First, we need to restore the desired flow control * settings because we may have had to re-autoneg with a * different link partner. */ ret_val = e1000_config_fc_after_link_up_generic(hw); if (ret_val) DEBUGOUT("Error configuring flow control\n"); } else { ret_val = e1000_check_for_copper_link_generic(hw); } return ret_val; } /** * e1000_check_for_link_media_swap - Check which M88E1112 interface linked * @hw: pointer to the HW structure * * Poll the M88E1112 interfaces to see which interface achieved link. */ static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 data; u8 port = 0; DEBUGFUNC("e1000_check_for_link_media_swap"); /* Check for copper. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0); if (ret_val) return ret_val; ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data); if (ret_val) return ret_val; if (data & E1000_M88E1112_STATUS_LINK) port = E1000_MEDIA_PORT_COPPER; /* Check for other. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 1); if (ret_val) return ret_val; ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data); if (ret_val) return ret_val; if (data & E1000_M88E1112_STATUS_LINK) port = E1000_MEDIA_PORT_OTHER; /* Determine if a swap needs to happen. */ if (port && (hw->dev_spec._82575.media_port != port)) { hw->dev_spec._82575.media_port = port; hw->dev_spec._82575.media_changed = TRUE; } if (port == E1000_MEDIA_PORT_COPPER) { /* reset page to 0 */ ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0); if (ret_val) return ret_val; e1000_check_for_link_82575(hw); } else { e1000_check_for_link_82575(hw); /* reset page to 0 */ ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0); if (ret_val) return ret_val; } return E1000_SUCCESS; } /** * e1000_power_up_serdes_link_82575 - Power up the serdes link after shutdown * @hw: pointer to the HW structure **/ static void e1000_power_up_serdes_link_82575(struct e1000_hw *hw) { u32 reg; DEBUGFUNC("e1000_power_up_serdes_link_82575"); if ((hw->phy.media_type != e1000_media_type_internal_serdes) && !e1000_sgmii_active_82575(hw)) return; /* Enable PCS to turn on link */ reg = E1000_READ_REG(hw, E1000_PCS_CFG0); reg |= E1000_PCS_CFG_PCS_EN; E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg); /* Power up the laser */ reg = E1000_READ_REG(hw, E1000_CTRL_EXT); reg &= ~E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); /* flush the write to verify completion */ E1000_WRITE_FLUSH(hw); msec_delay(1); } /** * e1000_get_pcs_speed_and_duplex_82575 - Retrieve current speed/duplex * @hw: pointer to the HW structure * @speed: stores the current speed * @duplex: stores the current duplex * * Using the physical coding sub-layer (PCS), retrieve the current speed and * duplex, then store the values in the pointers provided. **/ static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw, u16 *speed, u16 *duplex) { struct e1000_mac_info *mac = &hw->mac; u32 pcs; u32 status; DEBUGFUNC("e1000_get_pcs_speed_and_duplex_82575"); /* * Read the PCS Status register for link state. For non-copper mode, * the status register is not accurate. The PCS status register is * used instead. */ pcs = E1000_READ_REG(hw, E1000_PCS_LSTAT); /* * The link up bit determines when link is up on autoneg. */ if (pcs & E1000_PCS_LSTS_LINK_OK) { mac->serdes_has_link = TRUE; /* Detect and store PCS speed */ if (pcs & E1000_PCS_LSTS_SPEED_1000) *speed = SPEED_1000; else if (pcs & E1000_PCS_LSTS_SPEED_100) *speed = SPEED_100; else *speed = SPEED_10; /* Detect and store PCS duplex */ if (pcs & E1000_PCS_LSTS_DUPLEX_FULL) *duplex = FULL_DUPLEX; else *duplex = HALF_DUPLEX; /* Check if it is an I354 2.5Gb backplane connection. */ if (mac->type == e1000_i354) { status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && !(status & E1000_STATUS_2P5_SKU_OVER)) { *speed = SPEED_2500; *duplex = FULL_DUPLEX; DEBUGOUT("2500 Mbs, "); DEBUGOUT("Full Duplex\n"); } } } else { mac->serdes_has_link = FALSE; *speed = 0; *duplex = 0; } return E1000_SUCCESS; } /** * e1000_shutdown_serdes_link_82575 - Remove link during power down * @hw: pointer to the HW structure * * In the case of serdes shut down sfp and PCS on driver unload * when management pass thru is not enabled. **/ void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw) { u32 reg; DEBUGFUNC("e1000_shutdown_serdes_link_82575"); if ((hw->phy.media_type != e1000_media_type_internal_serdes) && !e1000_sgmii_active_82575(hw)) return; if (!e1000_enable_mng_pass_thru(hw)) { /* Disable PCS to turn off link */ reg = E1000_READ_REG(hw, E1000_PCS_CFG0); reg &= ~E1000_PCS_CFG_PCS_EN; E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg); /* shutdown the laser */ reg = E1000_READ_REG(hw, E1000_CTRL_EXT); reg |= E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); /* flush the write to verify completion */ E1000_WRITE_FLUSH(hw); msec_delay(1); } return; } /** * e1000_reset_hw_82575 - Reset hardware * @hw: pointer to the HW structure * * This resets the hardware into a known state. **/ static s32 e1000_reset_hw_82575(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; DEBUGFUNC("e1000_reset_hw_82575"); /* * Prevent the PCI-E bus from sticking if there is no TLP connection * on the last TLP read/write transaction when MAC is reset. */ ret_val = e1000_disable_pcie_master_generic(hw); if (ret_val) DEBUGOUT("PCI-E Master disable polling has failed.\n"); /* set the completion timeout for interface */ ret_val = e1000_set_pcie_completion_timeout(hw); if (ret_val) DEBUGOUT("PCI-E Set completion timeout has failed.\n"); DEBUGOUT("Masking off all interrupts\n"); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_WRITE_REG(hw, E1000_RCTL, 0); E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); E1000_WRITE_FLUSH(hw); msec_delay(10); ctrl = E1000_READ_REG(hw, E1000_CTRL); DEBUGOUT("Issuing a global reset to MAC\n"); E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); ret_val = e1000_get_auto_rd_done_generic(hw); if (ret_val) { /* * When auto config read does not complete, do not * return with an error. This can happen in situations * where there is no eeprom and prevents getting link. */ DEBUGOUT("Auto Read Done did not complete\n"); } /* If EEPROM is not present, run manual init scripts */ if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES)) e1000_reset_init_script_82575(hw); /* Clear any pending interrupt events. */ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_READ_REG(hw, E1000_ICR); /* Install any alternate MAC address into RAR0 */ ret_val = e1000_check_alt_mac_addr_generic(hw); return ret_val; } /** * e1000_init_hw_82575 - Initialize hardware * @hw: pointer to the HW structure * * This inits the hardware readying it for operation. **/ s32 e1000_init_hw_82575(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; s32 ret_val; u16 i, rar_count = mac->rar_entry_count; DEBUGFUNC("e1000_init_hw_82575"); /* Initialize identification LED */ ret_val = mac->ops.id_led_init(hw); if (ret_val) { DEBUGOUT("Error initializing identification LED\n"); /* This is not fatal and we should not stop init due to this */ } /* Disabling VLAN filtering */ DEBUGOUT("Initializing the IEEE VLAN\n"); mac->ops.clear_vfta(hw); /* Setup the receive address */ e1000_init_rx_addrs_generic(hw, rar_count); /* Zero out the Multicast HASH table */ DEBUGOUT("Zeroing the MTA\n"); for (i = 0; i < mac->mta_reg_count; i++) E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); /* Zero out the Unicast HASH table */ DEBUGOUT("Zeroing the UTA\n"); for (i = 0; i < mac->uta_reg_count; i++) E1000_WRITE_REG_ARRAY(hw, E1000_UTA, i, 0); /* Setup link and flow control */ ret_val = mac->ops.setup_link(hw); /* Set the default MTU size */ hw->dev_spec._82575.mtu = 1500; /* * Clear all of the statistics registers (clear on read). It is * important that we do this after we have tried to establish link * because the symbol error count will increment wildly if there * is no link. */ e1000_clear_hw_cntrs_82575(hw); return ret_val; } /** * e1000_setup_copper_link_82575 - Configure copper link settings * @hw: pointer to the HW structure * * Configures the link for auto-neg or forced speed and duplex. Then we check * for link, once link is established calls to configure collision distance * and flow control are called. **/ static s32 e1000_setup_copper_link_82575(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; u32 phpm_reg; DEBUGFUNC("e1000_setup_copper_link_82575"); ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= E1000_CTRL_SLU; ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); /* Clear Go Link Disconnect bit on supported devices */ switch (hw->mac.type) { case e1000_82580: case e1000_i350: case e1000_i210: case e1000_i211: phpm_reg = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT); phpm_reg &= ~E1000_82580_PM_GO_LINKD; E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, phpm_reg); break; default: break; } ret_val = e1000_setup_serdes_link_82575(hw); if (ret_val) goto out; if (e1000_sgmii_active_82575(hw)) { /* allow time for SFP cage time to power up phy */ msec_delay(300); ret_val = hw->phy.ops.reset(hw); if (ret_val) { DEBUGOUT("Error resetting the PHY.\n"); goto out; } } switch (hw->phy.type) { case e1000_phy_i210: case e1000_phy_m88: switch (hw->phy.id) { case I347AT4_E_PHY_ID: case M88E1112_E_PHY_ID: case M88E1340M_E_PHY_ID: case M88E1543_E_PHY_ID: case M88E1512_E_PHY_ID: case I210_I_PHY_ID: ret_val = e1000_copper_link_setup_m88_gen2(hw); break; default: ret_val = e1000_copper_link_setup_m88(hw); break; } break; case e1000_phy_igp_3: ret_val = e1000_copper_link_setup_igp(hw); break; case e1000_phy_82580: ret_val = e1000_copper_link_setup_82577(hw); break; default: ret_val = -E1000_ERR_PHY; break; } if (ret_val) goto out; ret_val = e1000_setup_copper_link_generic(hw); out: return ret_val; } /** * e1000_setup_serdes_link_82575 - Setup link for serdes * @hw: pointer to the HW structure * * Configure the physical coding sub-layer (PCS) link. The PCS link is * used on copper connections where the serialized gigabit media independent * interface (sgmii), or serdes fiber is being used. Configures the link * for auto-negotiation or forces speed/duplex. **/ static s32 e1000_setup_serdes_link_82575(struct e1000_hw *hw) { u32 ctrl_ext, ctrl_reg, reg, anadv_reg; bool pcs_autoneg; s32 ret_val = E1000_SUCCESS; u16 data; DEBUGFUNC("e1000_setup_serdes_link_82575"); if ((hw->phy.media_type != e1000_media_type_internal_serdes) && !e1000_sgmii_active_82575(hw)) return ret_val; /* * On the 82575, SerDes loopback mode persists until it is * explicitly turned off or a power cycle is performed. A read to * the register does not indicate its status. Therefore, we ensure * loopback mode is disabled during initialization. */ E1000_WRITE_REG(hw, E1000_SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK); /* power on the sfp cage if present */ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); ctrl_reg = E1000_READ_REG(hw, E1000_CTRL); ctrl_reg |= E1000_CTRL_SLU; /* set both sw defined pins on 82575/82576*/ if (hw->mac.type == e1000_82575 || hw->mac.type == e1000_82576) ctrl_reg |= E1000_CTRL_SWDPIN0 | E1000_CTRL_SWDPIN1; reg = E1000_READ_REG(hw, E1000_PCS_LCTL); /* default pcs_autoneg to the same setting as mac autoneg */ pcs_autoneg = hw->mac.autoneg; switch (ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK) { case E1000_CTRL_EXT_LINK_MODE_SGMII: /* sgmii mode lets the phy handle forcing speed/duplex */ pcs_autoneg = TRUE; /* autoneg time out should be disabled for SGMII mode */ reg &= ~(E1000_PCS_LCTL_AN_TIMEOUT); break; case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX: /* disable PCS autoneg and support parallel detect only */ pcs_autoneg = FALSE; /* fall through to default case */ default: if (hw->mac.type == e1000_82575 || hw->mac.type == e1000_82576) { ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } if (data & E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT) pcs_autoneg = FALSE; } /* * non-SGMII modes only supports a speed of 1000/Full for the * link so it is best to just force the MAC and let the pcs * link either autoneg or be forced to 1000/Full */ ctrl_reg |= E1000_CTRL_SPD_1000 | E1000_CTRL_FRCSPD | E1000_CTRL_FD | E1000_CTRL_FRCDPX; /* set speed of 1000/Full if speed/duplex is forced */ reg |= E1000_PCS_LCTL_FSV_1000 | E1000_PCS_LCTL_FDV_FULL; break; } E1000_WRITE_REG(hw, E1000_CTRL, ctrl_reg); /* * New SerDes mode allows for forcing speed or autonegotiating speed * at 1gb. Autoneg should be default set by most drivers. This is the * mode that will be compatible with older link partners and switches. * However, both are supported by the hardware and some drivers/tools. */ reg &= ~(E1000_PCS_LCTL_AN_ENABLE | E1000_PCS_LCTL_FLV_LINK_UP | E1000_PCS_LCTL_FSD | E1000_PCS_LCTL_FORCE_LINK); if (pcs_autoneg) { /* Set PCS register for autoneg */ reg |= E1000_PCS_LCTL_AN_ENABLE | /* Enable Autoneg */ E1000_PCS_LCTL_AN_RESTART; /* Restart autoneg */ /* Disable force flow control for autoneg */ reg &= ~E1000_PCS_LCTL_FORCE_FCTRL; /* Configure flow control advertisement for autoneg */ anadv_reg = E1000_READ_REG(hw, E1000_PCS_ANADV); anadv_reg &= ~(E1000_TXCW_ASM_DIR | E1000_TXCW_PAUSE); switch (hw->fc.requested_mode) { case e1000_fc_full: case e1000_fc_rx_pause: anadv_reg |= E1000_TXCW_ASM_DIR; anadv_reg |= E1000_TXCW_PAUSE; break; case e1000_fc_tx_pause: anadv_reg |= E1000_TXCW_ASM_DIR; break; default: break; } E1000_WRITE_REG(hw, E1000_PCS_ANADV, anadv_reg); DEBUGOUT1("Configuring Autoneg:PCS_LCTL=0x%08X\n", reg); } else { /* Set PCS register for forced link */ reg |= E1000_PCS_LCTL_FSD; /* Force Speed */ /* Force flow control for forced link */ reg |= E1000_PCS_LCTL_FORCE_FCTRL; DEBUGOUT1("Configuring Forced Link:PCS_LCTL=0x%08X\n", reg); } E1000_WRITE_REG(hw, E1000_PCS_LCTL, reg); if (!pcs_autoneg && !e1000_sgmii_active_82575(hw)) e1000_force_mac_fc_generic(hw); return ret_val; } /** * e1000_get_media_type_82575 - derives current media type. * @hw: pointer to the HW structure * * The media type is chosen reflecting few settings. * The following are taken into account: * - link mode set in the current port Init Control Word #3 * - current link mode settings in CSR register * - MDIO vs. I2C PHY control interface chosen * - SFP module media type **/ static s32 e1000_get_media_type_82575(struct e1000_hw *hw) { struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; s32 ret_val = E1000_SUCCESS; u32 ctrl_ext = 0; u32 link_mode = 0; /* Set internal phy as default */ dev_spec->sgmii_active = FALSE; dev_spec->module_plugged = FALSE; /* Get CSR setting */ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); /* extract link mode setting */ link_mode = ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK; switch (link_mode) { case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX: hw->phy.media_type = e1000_media_type_internal_serdes; break; case E1000_CTRL_EXT_LINK_MODE_GMII: hw->phy.media_type = e1000_media_type_copper; break; case E1000_CTRL_EXT_LINK_MODE_SGMII: /* Get phy control interface type set (MDIO vs. I2C)*/ if (e1000_sgmii_uses_mdio_82575(hw)) { hw->phy.media_type = e1000_media_type_copper; dev_spec->sgmii_active = TRUE; break; } /* fall through for I2C based SGMII */ case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES: /* read media type from SFP EEPROM */ ret_val = e1000_set_sfp_media_type_82575(hw); if ((ret_val != E1000_SUCCESS) || (hw->phy.media_type == e1000_media_type_unknown)) { /* * If media type was not identified then return media * type defined by the CTRL_EXT settings. */ hw->phy.media_type = e1000_media_type_internal_serdes; if (link_mode == E1000_CTRL_EXT_LINK_MODE_SGMII) { hw->phy.media_type = e1000_media_type_copper; dev_spec->sgmii_active = TRUE; } break; } /* do not change link mode for 100BaseFX */ if (dev_spec->eth_flags.e100_base_fx) break; /* change current link mode setting */ ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK; if (hw->phy.media_type == e1000_media_type_copper) ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_SGMII; else ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); break; } return ret_val; } /** * e1000_set_sfp_media_type_82575 - derives SFP module media type. * @hw: pointer to the HW structure * * The media type is chosen based on SFP module. * compatibility flags retrieved from SFP ID EEPROM. **/ static s32 e1000_set_sfp_media_type_82575(struct e1000_hw *hw) { s32 ret_val = E1000_ERR_CONFIG; u32 ctrl_ext = 0; struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; struct sfp_e1000_flags *eth_flags = &dev_spec->eth_flags; u8 tranceiver_type = 0; s32 timeout = 3; /* Turn I2C interface ON and power on sfp cage */ ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_I2C_ENA); E1000_WRITE_FLUSH(hw); /* Read SFP module data */ while (timeout) { ret_val = e1000_read_sfp_data_byte(hw, E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_IDENTIFIER_OFFSET), &tranceiver_type); if (ret_val == E1000_SUCCESS) break; msec_delay(100); timeout--; } if (ret_val != E1000_SUCCESS) goto out; ret_val = e1000_read_sfp_data_byte(hw, E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_ETH_FLAGS_OFFSET), (u8 *)eth_flags); if (ret_val != E1000_SUCCESS) goto out; /* Check if there is some SFP module plugged and powered */ if ((tranceiver_type == E1000_SFF_IDENTIFIER_SFP) || (tranceiver_type == E1000_SFF_IDENTIFIER_SFF)) { dev_spec->module_plugged = TRUE; if (eth_flags->e1000_base_lx || eth_flags->e1000_base_sx) { hw->phy.media_type = e1000_media_type_internal_serdes; } else if (eth_flags->e100_base_fx) { dev_spec->sgmii_active = TRUE; hw->phy.media_type = e1000_media_type_internal_serdes; } else if (eth_flags->e1000_base_t) { dev_spec->sgmii_active = TRUE; hw->phy.media_type = e1000_media_type_copper; } else { hw->phy.media_type = e1000_media_type_unknown; DEBUGOUT("PHY module has not been recognized\n"); goto out; } } else { hw->phy.media_type = e1000_media_type_unknown; } ret_val = E1000_SUCCESS; out: /* Restore I2C interface setting */ E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); return ret_val; } /** * e1000_valid_led_default_82575 - Verify a valid default LED config * @hw: pointer to the HW structure * @data: pointer to the NVM (EEPROM) * * Read the EEPROM for the current default LED configuration. If the * LED configuration is not valid, set to a valid LED configuration. **/ static s32 e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data) { s32 ret_val; DEBUGFUNC("e1000_valid_led_default_82575"); ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); goto out; } if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) { switch (hw->phy.media_type) { case e1000_media_type_internal_serdes: *data = ID_LED_DEFAULT_82575_SERDES; break; case e1000_media_type_copper: default: *data = ID_LED_DEFAULT; break; } } out: return ret_val; } /** * e1000_sgmii_active_82575 - Return sgmii state * @hw: pointer to the HW structure * * 82575 silicon has a serialized gigabit media independent interface (sgmii) * which can be enabled for use in the embedded applications. Simply * return the current state of the sgmii interface. **/ static bool e1000_sgmii_active_82575(struct e1000_hw *hw) { struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; return dev_spec->sgmii_active; } /** * e1000_reset_init_script_82575 - Inits HW defaults after reset * @hw: pointer to the HW structure * * Inits recommended HW defaults after a reset when there is no EEPROM * detected. This is only for the 82575. **/ static s32 e1000_reset_init_script_82575(struct e1000_hw *hw) { DEBUGFUNC("e1000_reset_init_script_82575"); if (hw->mac.type == e1000_82575) { DEBUGOUT("Running reset init script for 82575\n"); /* SerDes configuration via SERDESCTRL */ e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x00, 0x0C); e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x01, 0x78); e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x1B, 0x23); e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x23, 0x15); /* CCM configuration via CCMCTL register */ e1000_write_8bit_ctrl_reg_generic(hw, E1000_CCMCTL, 0x14, 0x00); e1000_write_8bit_ctrl_reg_generic(hw, E1000_CCMCTL, 0x10, 0x00); /* PCIe lanes configuration */ e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x00, 0xEC); e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x61, 0xDF); e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x34, 0x05); e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x2F, 0x81); /* PCIe PLL Configuration */ e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x02, 0x47); e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x14, 0x00); e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x10, 0x00); } return E1000_SUCCESS; } /** * e1000_read_mac_addr_82575 - Read device MAC address * @hw: pointer to the HW structure **/ static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_read_mac_addr_82575"); /* * If there's an alternate MAC address place it in RAR0 * so that it will override the Si installed default perm * address. */ ret_val = e1000_check_alt_mac_addr_generic(hw); if (ret_val) goto out; ret_val = e1000_read_mac_addr_generic(hw); out: return ret_val; } /** * e1000_config_collision_dist_82575 - Configure collision distance * @hw: pointer to the HW structure * * Configures the collision distance to the default value and is used * during link setup. **/ static void e1000_config_collision_dist_82575(struct e1000_hw *hw) { u32 tctl_ext; DEBUGFUNC("e1000_config_collision_dist_82575"); tctl_ext = E1000_READ_REG(hw, E1000_TCTL_EXT); tctl_ext &= ~E1000_TCTL_EXT_COLD; tctl_ext |= E1000_COLLISION_DISTANCE << E1000_TCTL_EXT_COLD_SHIFT; E1000_WRITE_REG(hw, E1000_TCTL_EXT, tctl_ext); E1000_WRITE_FLUSH(hw); } /** * e1000_power_down_phy_copper_82575 - Remove link during PHY power down * @hw: pointer to the HW structure * * In the case of a PHY power down to save power, or to turn off link during a * driver unload, or wake on lan is not enabled, remove the link. **/ static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; if (!(phy->ops.check_reset_block)) return; /* If the management interface is not enabled, then power down */ if (!(e1000_enable_mng_pass_thru(hw) || phy->ops.check_reset_block(hw))) e1000_power_down_phy_copper(hw); return; } /** * e1000_clear_hw_cntrs_82575 - Clear device specific hardware counters * @hw: pointer to the HW structure * * Clears the hardware counters by reading the counter registers. **/ static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw) { DEBUGFUNC("e1000_clear_hw_cntrs_82575"); e1000_clear_hw_cntrs_base_generic(hw); E1000_READ_REG(hw, E1000_PRC64); E1000_READ_REG(hw, E1000_PRC127); E1000_READ_REG(hw, E1000_PRC255); E1000_READ_REG(hw, E1000_PRC511); E1000_READ_REG(hw, E1000_PRC1023); E1000_READ_REG(hw, E1000_PRC1522); E1000_READ_REG(hw, E1000_PTC64); E1000_READ_REG(hw, E1000_PTC127); E1000_READ_REG(hw, E1000_PTC255); E1000_READ_REG(hw, E1000_PTC511); E1000_READ_REG(hw, E1000_PTC1023); E1000_READ_REG(hw, E1000_PTC1522); E1000_READ_REG(hw, E1000_ALGNERRC); E1000_READ_REG(hw, E1000_RXERRC); E1000_READ_REG(hw, E1000_TNCRS); E1000_READ_REG(hw, E1000_CEXTERR); E1000_READ_REG(hw, E1000_TSCTC); E1000_READ_REG(hw, E1000_TSCTFC); E1000_READ_REG(hw, E1000_MGTPRC); E1000_READ_REG(hw, E1000_MGTPDC); E1000_READ_REG(hw, E1000_MGTPTC); E1000_READ_REG(hw, E1000_IAC); E1000_READ_REG(hw, E1000_ICRXOC); E1000_READ_REG(hw, E1000_ICRXPTC); E1000_READ_REG(hw, E1000_ICRXATC); E1000_READ_REG(hw, E1000_ICTXPTC); E1000_READ_REG(hw, E1000_ICTXATC); E1000_READ_REG(hw, E1000_ICTXQEC); E1000_READ_REG(hw, E1000_ICTXQMTC); E1000_READ_REG(hw, E1000_ICRXDMTC); E1000_READ_REG(hw, E1000_CBTMPC); E1000_READ_REG(hw, E1000_HTDPMC); E1000_READ_REG(hw, E1000_CBRMPC); E1000_READ_REG(hw, E1000_RPTHC); E1000_READ_REG(hw, E1000_HGPTC); E1000_READ_REG(hw, E1000_HTCBDPC); E1000_READ_REG(hw, E1000_HGORCL); E1000_READ_REG(hw, E1000_HGORCH); E1000_READ_REG(hw, E1000_HGOTCL); E1000_READ_REG(hw, E1000_HGOTCH); E1000_READ_REG(hw, E1000_LENERRS); /* This register should not be read in copper configurations */ if ((hw->phy.media_type == e1000_media_type_internal_serdes) || e1000_sgmii_active_82575(hw)) E1000_READ_REG(hw, E1000_SCVPC); } /** * e1000_rx_fifo_flush_82575 - Clean rx fifo after Rx enable * @hw: pointer to the HW structure * * After Rx enable, if manageability is enabled then there is likely some * bad data at the start of the fifo and possibly in the DMA fifo. This * function clears the fifos and flushes any packets that came in as rx was * being enabled. **/ void e1000_rx_fifo_flush_82575(struct e1000_hw *hw) { u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled; int i, ms_wait; DEBUGFUNC("e1000_rx_fifo_flush_82575"); /* disable IPv6 options as per hardware errata */ rfctl = E1000_READ_REG(hw, E1000_RFCTL); rfctl |= E1000_RFCTL_IPV6_EX_DIS; E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); if (hw->mac.type != e1000_82575 || !(E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_RCV_TCO_EN)) return; /* Disable all Rx queues */ for (i = 0; i < 4; i++) { rxdctl[i] = E1000_READ_REG(hw, E1000_RXDCTL(i)); E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl[i] & ~E1000_RXDCTL_QUEUE_ENABLE); } /* Poll all queues to verify they have shut down */ for (ms_wait = 0; ms_wait < 10; ms_wait++) { msec_delay(1); rx_enabled = 0; for (i = 0; i < 4; i++) rx_enabled |= E1000_READ_REG(hw, E1000_RXDCTL(i)); if (!(rx_enabled & E1000_RXDCTL_QUEUE_ENABLE)) break; } if (ms_wait == 10) DEBUGOUT("Queue disable timed out after 10ms\n"); /* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all * incoming packets are rejected. Set enable and wait 2ms so that * any packet that was coming in as RCTL.EN was set is flushed */ E1000_WRITE_REG(hw, E1000_RFCTL, rfctl & ~E1000_RFCTL_LEF); rlpml = E1000_READ_REG(hw, E1000_RLPML); E1000_WRITE_REG(hw, E1000_RLPML, 0); rctl = E1000_READ_REG(hw, E1000_RCTL); temp_rctl = rctl & ~(E1000_RCTL_EN | E1000_RCTL_SBP); temp_rctl |= E1000_RCTL_LPE; E1000_WRITE_REG(hw, E1000_RCTL, temp_rctl); E1000_WRITE_REG(hw, E1000_RCTL, temp_rctl | E1000_RCTL_EN); E1000_WRITE_FLUSH(hw); msec_delay(2); /* Enable Rx queues that were previously enabled and restore our * previous state */ for (i = 0; i < 4; i++) E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl[i]); E1000_WRITE_REG(hw, E1000_RCTL, rctl); E1000_WRITE_FLUSH(hw); E1000_WRITE_REG(hw, E1000_RLPML, rlpml); E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); /* Flush receive errors generated by workaround */ E1000_READ_REG(hw, E1000_ROC); E1000_READ_REG(hw, E1000_RNBC); E1000_READ_REG(hw, E1000_MPC); } /** * e1000_set_pcie_completion_timeout - set pci-e completion timeout * @hw: pointer to the HW structure * * The defaults for 82575 and 82576 should be in the range of 50us to 50ms, * however the hardware default for these parts is 500us to 1ms which is less * than the 10ms recommended by the pci-e spec. To address this we need to * increase the value to either 10ms to 200ms for capability version 1 config, * or 16ms to 55ms for version 2. **/ static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw) { u32 gcr = E1000_READ_REG(hw, E1000_GCR); s32 ret_val = E1000_SUCCESS; u16 pcie_devctl2; /* only take action if timeout value is defaulted to 0 */ if (gcr & E1000_GCR_CMPL_TMOUT_MASK) goto out; /* * if capababilities version is type 1 we can write the * timeout of 10ms to 200ms through the GCR register */ if (!(gcr & E1000_GCR_CAP_VER2)) { gcr |= E1000_GCR_CMPL_TMOUT_10ms; goto out; } /* * for version 2 capabilities we need to write the config space * directly in order to set the completion timeout value for * 16ms to 55ms */ ret_val = e1000_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, &pcie_devctl2); if (ret_val) goto out; pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms; ret_val = e1000_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, &pcie_devctl2); out: /* disable completion timeout resend */ gcr &= ~E1000_GCR_CMPL_TMOUT_RESEND; E1000_WRITE_REG(hw, E1000_GCR, gcr); return ret_val; } /** * e1000_vmdq_set_anti_spoofing_pf - enable or disable anti-spoofing * @hw: pointer to the hardware struct * @enable: state to enter, either enabled or disabled * @pf: Physical Function pool - do not set anti-spoofing for the PF * * enables/disables L2 switch anti-spoofing functionality. **/ void e1000_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf) { u32 reg_val, reg_offset; switch (hw->mac.type) { case e1000_82576: reg_offset = E1000_DTXSWC; break; case e1000_i350: case e1000_i354: reg_offset = E1000_TXSWC; break; default: return; } reg_val = E1000_READ_REG(hw, reg_offset); if (enable) { reg_val |= (E1000_DTXSWC_MAC_SPOOF_MASK | E1000_DTXSWC_VLAN_SPOOF_MASK); /* The PF can spoof - it has to in order to * support emulation mode NICs */ reg_val ^= (1 << pf | 1 << (pf + MAX_NUM_VFS)); } else { reg_val &= ~(E1000_DTXSWC_MAC_SPOOF_MASK | E1000_DTXSWC_VLAN_SPOOF_MASK); } E1000_WRITE_REG(hw, reg_offset, reg_val); } /** * e1000_vmdq_set_loopback_pf - enable or disable vmdq loopback * @hw: pointer to the hardware struct * @enable: state to enter, either enabled or disabled * * enables/disables L2 switch loopback functionality. **/ void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable) { u32 dtxswc; switch (hw->mac.type) { case e1000_82576: dtxswc = E1000_READ_REG(hw, E1000_DTXSWC); if (enable) dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN; else dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN; E1000_WRITE_REG(hw, E1000_DTXSWC, dtxswc); break; case e1000_i350: case e1000_i354: dtxswc = E1000_READ_REG(hw, E1000_TXSWC); if (enable) dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN; else dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN; E1000_WRITE_REG(hw, E1000_TXSWC, dtxswc); break; default: /* Currently no other hardware supports loopback */ break; } } /** * e1000_vmdq_set_replication_pf - enable or disable vmdq replication * @hw: pointer to the hardware struct * @enable: state to enter, either enabled or disabled * * enables/disables replication of packets across multiple pools. **/ void e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable) { u32 vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL); if (enable) vt_ctl |= E1000_VT_CTL_VM_REPL_EN; else vt_ctl &= ~E1000_VT_CTL_VM_REPL_EN; E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl); } /** * e1000_read_phy_reg_82580 - Read 82580 MDI control register * @hw: pointer to the HW structure * @offset: register offset to be read * @data: pointer to the read data * * Reads the MDI control register in the PHY at offset and stores the * information read to data. **/ static s32 e1000_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data) { s32 ret_val; DEBUGFUNC("e1000_read_phy_reg_82580"); ret_val = hw->phy.ops.acquire(hw); if (ret_val) goto out; ret_val = e1000_read_phy_reg_mdic(hw, offset, data); hw->phy.ops.release(hw); out: return ret_val; } /** * e1000_write_phy_reg_82580 - Write 82580 MDI control register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write to register at offset * * Writes data to MDI control register in the PHY at offset. **/ static s32 e1000_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data) { s32 ret_val; DEBUGFUNC("e1000_write_phy_reg_82580"); ret_val = hw->phy.ops.acquire(hw); if (ret_val) goto out; ret_val = e1000_write_phy_reg_mdic(hw, offset, data); hw->phy.ops.release(hw); out: return ret_val; } /** * e1000_reset_mdicnfg_82580 - Reset MDICNFG destination and com_mdio bits * @hw: pointer to the HW structure * * This resets the MDICNFG.Destination and MDICNFG.Com_MDIO bits based on * the values found in the EEPROM. This addresses an issue in which these * bits are not restored from EEPROM after reset. **/ static s32 e1000_reset_mdicnfg_82580(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u32 mdicnfg; u16 nvm_data = 0; DEBUGFUNC("e1000_reset_mdicnfg_82580"); if (hw->mac.type != e1000_82580) goto out; if (!e1000_sgmii_active_82575(hw)) goto out; ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); goto out; } mdicnfg = E1000_READ_REG(hw, E1000_MDICNFG); if (nvm_data & NVM_WORD24_EXT_MDIO) mdicnfg |= E1000_MDICNFG_EXT_MDIO; if (nvm_data & NVM_WORD24_COM_MDIO) mdicnfg |= E1000_MDICNFG_COM_MDIO; E1000_WRITE_REG(hw, E1000_MDICNFG, mdicnfg); out: return ret_val; } /** * e1000_reset_hw_82580 - Reset hardware * @hw: pointer to the HW structure * * This resets function or entire device (all ports, etc.) * to a known state. **/ static s32 e1000_reset_hw_82580(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; /* BH SW mailbox bit in SW_FW_SYNC */ u16 swmbsw_mask = E1000_SW_SYNCH_MB; u32 ctrl; bool global_device_reset = hw->dev_spec._82575.global_device_reset; DEBUGFUNC("e1000_reset_hw_82580"); hw->dev_spec._82575.global_device_reset = FALSE; /* 82580 does not reliably do global_device_reset due to hw errata */ if (hw->mac.type == e1000_82580) global_device_reset = FALSE; /* Get current control state. */ ctrl = E1000_READ_REG(hw, E1000_CTRL); /* * Prevent the PCI-E bus from sticking if there is no TLP connection * on the last TLP read/write transaction when MAC is reset. */ ret_val = e1000_disable_pcie_master_generic(hw); if (ret_val) DEBUGOUT("PCI-E Master disable polling has failed.\n"); DEBUGOUT("Masking off all interrupts\n"); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_WRITE_REG(hw, E1000_RCTL, 0); E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); E1000_WRITE_FLUSH(hw); msec_delay(10); /* Determine whether or not a global dev reset is requested */ if (global_device_reset && hw->mac.ops.acquire_swfw_sync(hw, swmbsw_mask)) global_device_reset = FALSE; if (global_device_reset && !(E1000_READ_REG(hw, E1000_STATUS) & E1000_STAT_DEV_RST_SET)) ctrl |= E1000_CTRL_DEV_RST; else ctrl |= E1000_CTRL_RST; E1000_WRITE_REG(hw, E1000_CTRL, ctrl); switch (hw->device_id) { case E1000_DEV_ID_DH89XXCC_SGMII: break; default: E1000_WRITE_FLUSH(hw); break; } /* Add delay to insure DEV_RST or RST has time to complete */ msec_delay(5); ret_val = e1000_get_auto_rd_done_generic(hw); if (ret_val) { /* * When auto config read does not complete, do not * return with an error. This can happen in situations * where there is no eeprom and prevents getting link. */ DEBUGOUT("Auto Read Done did not complete\n"); } /* clear global device reset status bit */ E1000_WRITE_REG(hw, E1000_STATUS, E1000_STAT_DEV_RST_SET); /* Clear any pending interrupt events. */ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_READ_REG(hw, E1000_ICR); ret_val = e1000_reset_mdicnfg_82580(hw); if (ret_val) DEBUGOUT("Could not reset MDICNFG based on EEPROM\n"); /* Install any alternate MAC address into RAR0 */ ret_val = e1000_check_alt_mac_addr_generic(hw); /* Release semaphore */ if (global_device_reset) hw->mac.ops.release_swfw_sync(hw, swmbsw_mask); return ret_val; } /** * e1000_rxpbs_adjust_82580 - adjust RXPBS value to reflect actual Rx PBA size * @data: data received by reading RXPBS register * * The 82580 uses a table based approach for packet buffer allocation sizes. * This function converts the retrieved value into the correct table value * 0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7 * 0x0 36 72 144 1 2 4 8 16 * 0x8 35 70 140 rsv rsv rsv rsv rsv */ u16 e1000_rxpbs_adjust_82580(u32 data) { u16 ret_val = 0; if (data < E1000_82580_RXPBS_TABLE_SIZE) ret_val = e1000_82580_rxpbs_table[data]; return ret_val; } /** * e1000_validate_nvm_checksum_with_offset - Validate EEPROM * checksum * @hw: pointer to the HW structure * @offset: offset in words of the checksum protected region * * Calculates the EEPROM checksum by reading/adding each word of the EEPROM * and then verifies that the sum of the EEPROM is equal to 0xBABA. **/ s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset) { s32 ret_val = E1000_SUCCESS; u16 checksum = 0; u16 i, nvm_data; DEBUGFUNC("e1000_validate_nvm_checksum_with_offset"); for (i = offset; i < ((NVM_CHECKSUM_REG + offset) + 1); i++) { ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); goto out; } checksum += nvm_data; } if (checksum != (u16) NVM_SUM) { DEBUGOUT("NVM Checksum Invalid\n"); ret_val = -E1000_ERR_NVM; goto out; } out: return ret_val; } /** * e1000_update_nvm_checksum_with_offset - Update EEPROM * checksum * @hw: pointer to the HW structure * @offset: offset in words of the checksum protected region * * Updates the EEPROM checksum by reading/adding each word of the EEPROM * up to the checksum. Then calculates the EEPROM checksum and writes the * value to the EEPROM. **/ s32 e1000_update_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset) { s32 ret_val; u16 checksum = 0; u16 i, nvm_data; DEBUGFUNC("e1000_update_nvm_checksum_with_offset"); for (i = offset; i < (NVM_CHECKSUM_REG + offset); i++) { ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error while updating checksum.\n"); goto out; } checksum += nvm_data; } checksum = (u16) NVM_SUM - checksum; ret_val = hw->nvm.ops.write(hw, (NVM_CHECKSUM_REG + offset), 1, &checksum); if (ret_val) DEBUGOUT("NVM Write Error while updating checksum.\n"); out: return ret_val; } /** * e1000_validate_nvm_checksum_82580 - Validate EEPROM checksum * @hw: pointer to the HW structure * * Calculates the EEPROM section checksum by reading/adding each word of * the EEPROM and then verifies that the sum of the EEPROM is * equal to 0xBABA. **/ static s32 e1000_validate_nvm_checksum_82580(struct e1000_hw *hw) { s32 ret_val; u16 eeprom_regions_count = 1; u16 j, nvm_data; u16 nvm_offset; DEBUGFUNC("e1000_validate_nvm_checksum_82580"); ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); goto out; } if (nvm_data & NVM_COMPATIBILITY_BIT_MASK) { /* if chekcsums compatibility bit is set validate checksums * for all 4 ports. */ eeprom_regions_count = 4; } for (j = 0; j < eeprom_regions_count; j++) { nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); ret_val = e1000_validate_nvm_checksum_with_offset(hw, nvm_offset); if (ret_val != E1000_SUCCESS) goto out; } out: return ret_val; } /** * e1000_update_nvm_checksum_82580 - Update EEPROM checksum * @hw: pointer to the HW structure * * Updates the EEPROM section checksums for all 4 ports by reading/adding * each word of the EEPROM up to the checksum. Then calculates the EEPROM * checksum and writes the value to the EEPROM. **/ static s32 e1000_update_nvm_checksum_82580(struct e1000_hw *hw) { s32 ret_val; u16 j, nvm_data; u16 nvm_offset; DEBUGFUNC("e1000_update_nvm_checksum_82580"); ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error while updating checksum compatibility bit.\n"); goto out; } if (!(nvm_data & NVM_COMPATIBILITY_BIT_MASK)) { /* set compatibility bit to validate checksums appropriately */ nvm_data = nvm_data | NVM_COMPATIBILITY_BIT_MASK; ret_val = hw->nvm.ops.write(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Write Error while updating checksum compatibility bit.\n"); goto out; } } for (j = 0; j < 4; j++) { nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); ret_val = e1000_update_nvm_checksum_with_offset(hw, nvm_offset); if (ret_val) goto out; } out: return ret_val; } /** * e1000_validate_nvm_checksum_i350 - Validate EEPROM checksum * @hw: pointer to the HW structure * * Calculates the EEPROM section checksum by reading/adding each word of * the EEPROM and then verifies that the sum of the EEPROM is * equal to 0xBABA. **/ static s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 j; u16 nvm_offset; DEBUGFUNC("e1000_validate_nvm_checksum_i350"); for (j = 0; j < 4; j++) { nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); ret_val = e1000_validate_nvm_checksum_with_offset(hw, nvm_offset); if (ret_val != E1000_SUCCESS) goto out; } out: return ret_val; } /** * e1000_update_nvm_checksum_i350 - Update EEPROM checksum * @hw: pointer to the HW structure * * Updates the EEPROM section checksums for all 4 ports by reading/adding * each word of the EEPROM up to the checksum. Then calculates the EEPROM * checksum and writes the value to the EEPROM. **/ static s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 j; u16 nvm_offset; DEBUGFUNC("e1000_update_nvm_checksum_i350"); for (j = 0; j < 4; j++) { nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); ret_val = e1000_update_nvm_checksum_with_offset(hw, nvm_offset); if (ret_val != E1000_SUCCESS) goto out; } out: return ret_val; } /** * __e1000_access_emi_reg - Read/write EMI register * @hw: pointer to the HW structure * @addr: EMI address to program * @data: pointer to value to read/write from/to the EMI address * @read: boolean flag to indicate read or write **/ static s32 __e1000_access_emi_reg(struct e1000_hw *hw, u16 address, u16 *data, bool read) { s32 ret_val; DEBUGFUNC("__e1000_access_emi_reg"); ret_val = hw->phy.ops.write_reg(hw, E1000_EMIADD, address); if (ret_val) return ret_val; if (read) ret_val = hw->phy.ops.read_reg(hw, E1000_EMIDATA, data); else ret_val = hw->phy.ops.write_reg(hw, E1000_EMIDATA, *data); return ret_val; } /** * e1000_read_emi_reg - Read Extended Management Interface register * @hw: pointer to the HW structure * @addr: EMI address to program * @data: value to be read from the EMI address **/ s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data) { DEBUGFUNC("e1000_read_emi_reg"); return __e1000_access_emi_reg(hw, addr, data, TRUE); } /** * e1000_initialize_M88E1512_phy - Initialize M88E1512 PHY * @hw: pointer to the HW structure * * Initialize Marvell 1512 to work correctly with Avoton. **/ s32 e1000_initialize_M88E1512_phy(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_initialize_M88E1512_phy"); /* Check if this is correct PHY. */ if (phy->id != M88E1512_E_PHY_ID) goto out; /* Switch to PHY page 0xFF. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x00FF); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0x214B); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2144); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0x0C28); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2146); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0xB233); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x214D); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0xCC0C); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2159); if (ret_val) goto out; /* Switch to PHY page 0xFB. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x00FB); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_3, 0x000D); if (ret_val) goto out; /* Switch to PHY page 0x12. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x12); if (ret_val) goto out; /* Change mode to SGMII-to-Copper */ ret_val = phy->ops.write_reg(hw, E1000_M88E1512_MODE, 0x8001); if (ret_val) goto out; /* Return the PHY to page 0. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0); if (ret_val) goto out; ret_val = phy->ops.commit(hw); if (ret_val) { DEBUGOUT("Error committing the PHY changes\n"); return ret_val; } msec_delay(1000); out: return ret_val; } /** * e1000_initialize_M88E1543_phy - Initialize M88E1543 PHY * @hw: pointer to the HW structure * * Initialize Marvell 1543 to work correctly with Avoton. **/ s32 e1000_initialize_M88E1543_phy(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_initialize_M88E1543_phy"); /* Check if this is correct PHY. */ if (phy->id != M88E1543_E_PHY_ID) goto out; /* Switch to PHY page 0xFF. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x00FF); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0x214B); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2144); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0x0C28); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2146); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0xB233); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x214D); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0xDC0C); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2159); if (ret_val) goto out; /* Switch to PHY page 0xFB. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x00FB); if (ret_val) goto out; ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_3, 0xC00D); if (ret_val) goto out; /* Switch to PHY page 0x12. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x12); if (ret_val) goto out; /* Change mode to SGMII-to-Copper */ ret_val = phy->ops.write_reg(hw, E1000_M88E1512_MODE, 0x8001); if (ret_val) goto out; /* Switch to PHY page 1. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x1); if (ret_val) goto out; /* Change mode to 1000BASE-X/SGMII and autoneg enable; reset */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_FIBER_CTRL, 0x9140); if (ret_val) goto out; /* Return the PHY to page 0. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0); if (ret_val) goto out; ret_val = phy->ops.commit(hw); if (ret_val) { DEBUGOUT("Error committing the PHY changes\n"); return ret_val; } msec_delay(1000); out: return ret_val; } /** * e1000_set_eee_i350 - Enable/disable EEE support * @hw: pointer to the HW structure * @adv1g: boolean flag enabling 1G EEE advertisement * @adv100m: boolean flag enabling 100M EEE advertisement * * Enable/disable EEE based on setting in dev_spec structure. * **/ s32 e1000_set_eee_i350(struct e1000_hw *hw, bool adv1G, bool adv100M) { u32 ipcnfg, eeer; DEBUGFUNC("e1000_set_eee_i350"); if ((hw->mac.type < e1000_i350) || (hw->phy.media_type != e1000_media_type_copper)) goto out; ipcnfg = E1000_READ_REG(hw, E1000_IPCNFG); eeer = E1000_READ_REG(hw, E1000_EEER); /* enable or disable per user setting */ if (!(hw->dev_spec._82575.eee_disable)) { u32 eee_su = E1000_READ_REG(hw, E1000_EEE_SU); if (adv100M) ipcnfg |= E1000_IPCNFG_EEE_100M_AN; else ipcnfg &= ~E1000_IPCNFG_EEE_100M_AN; if (adv1G) ipcnfg |= E1000_IPCNFG_EEE_1G_AN; else ipcnfg &= ~E1000_IPCNFG_EEE_1G_AN; eeer |= (E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN | E1000_EEER_LPI_FC); /* This bit should not be set in normal operation. */ if (eee_su & E1000_EEE_SU_LPI_CLK_STP) DEBUGOUT("LPI Clock Stop Bit should not be set!\n"); } else { ipcnfg &= ~(E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN); eeer &= ~(E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN | E1000_EEER_LPI_FC); } E1000_WRITE_REG(hw, E1000_IPCNFG, ipcnfg); E1000_WRITE_REG(hw, E1000_EEER, eeer); E1000_READ_REG(hw, E1000_IPCNFG); E1000_READ_REG(hw, E1000_EEER); out: return E1000_SUCCESS; } /** * e1000_set_eee_i354 - Enable/disable EEE support * @hw: pointer to the HW structure * @adv1g: boolean flag enabling 1G EEE advertisement * @adv100m: boolean flag enabling 100M EEE advertisement * * Enable/disable EEE legacy mode based on setting in dev_spec structure. * **/ s32 e1000_set_eee_i354(struct e1000_hw *hw, bool adv1G, bool adv100M) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = E1000_SUCCESS; u16 phy_data; DEBUGFUNC("e1000_set_eee_i354"); if ((hw->phy.media_type != e1000_media_type_copper) || ((phy->id != M88E1543_E_PHY_ID) && (phy->id != M88E1512_E_PHY_ID))) goto out; if (!hw->dev_spec._82575.eee_disable) { /* Switch to PHY page 18. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 18); if (ret_val) goto out; ret_val = phy->ops.read_reg(hw, E1000_M88E1543_EEE_CTRL_1, &phy_data); if (ret_val) goto out; phy_data |= E1000_M88E1543_EEE_CTRL_1_MS; ret_val = phy->ops.write_reg(hw, E1000_M88E1543_EEE_CTRL_1, phy_data); if (ret_val) goto out; /* Return the PHY to page 0. */ ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0); if (ret_val) goto out; /* Turn on EEE advertisement. */ ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, E1000_EEE_ADV_DEV_I354, &phy_data); if (ret_val) goto out; if (adv100M) phy_data |= E1000_EEE_ADV_100_SUPPORTED; else phy_data &= ~E1000_EEE_ADV_100_SUPPORTED; if (adv1G) phy_data |= E1000_EEE_ADV_1000_SUPPORTED; else phy_data &= ~E1000_EEE_ADV_1000_SUPPORTED; ret_val = e1000_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, E1000_EEE_ADV_DEV_I354, phy_data); } else { /* Turn off EEE advertisement. */ ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, E1000_EEE_ADV_DEV_I354, &phy_data); if (ret_val) goto out; phy_data &= ~(E1000_EEE_ADV_100_SUPPORTED | E1000_EEE_ADV_1000_SUPPORTED); ret_val = e1000_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, E1000_EEE_ADV_DEV_I354, phy_data); } out: return ret_val; } /** * e1000_get_eee_status_i354 - Get EEE status * @hw: pointer to the HW structure * @status: EEE status * * Get EEE status by guessing based on whether Tx or Rx LPI indications have * been received. **/ s32 e1000_get_eee_status_i354(struct e1000_hw *hw, bool *status) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val = E1000_SUCCESS; u16 phy_data; DEBUGFUNC("e1000_get_eee_status_i354"); /* Check if EEE is supported on this device. */ if ((hw->phy.media_type != e1000_media_type_copper) || ((phy->id != M88E1543_E_PHY_ID) && (phy->id != M88E1512_E_PHY_ID))) goto out; ret_val = e1000_read_xmdio_reg(hw, E1000_PCS_STATUS_ADDR_I354, E1000_PCS_STATUS_DEV_I354, &phy_data); if (ret_val) goto out; *status = phy_data & (E1000_PCS_STATUS_TX_LPI_RCVD | E1000_PCS_STATUS_RX_LPI_RCVD) ? TRUE : FALSE; out: return ret_val; } /* Due to a hw errata, if the host tries to configure the VFTA register * while performing queries from the BMC or DMA, then the VFTA in some * cases won't be written. */ /** * e1000_clear_vfta_i350 - Clear VLAN filter table * @hw: pointer to the HW structure * * Clears the register array which contains the VLAN filter table by * setting all the values to 0. **/ void e1000_clear_vfta_i350(struct e1000_hw *hw) { u32 offset; int i; DEBUGFUNC("e1000_clear_vfta_350"); for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { for (i = 0; i < 10; i++) E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0); E1000_WRITE_FLUSH(hw); } } /** * e1000_write_vfta_i350 - Write value to VLAN filter table * @hw: pointer to the HW structure * @offset: register offset in VLAN filter table * @value: register value written to VLAN filter table * * Writes value at the given offset in the register array which stores * the VLAN filter table. **/ void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value) { int i; DEBUGFUNC("e1000_write_vfta_350"); for (i = 0; i < 10; i++) E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value); E1000_WRITE_FLUSH(hw); } /** * e1000_set_i2c_bb - Enable I2C bit-bang * @hw: pointer to the HW structure * * Enable I2C bit-bang interface * **/ s32 e1000_set_i2c_bb(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u32 ctrl_ext, i2cparams; DEBUGFUNC("e1000_set_i2c_bb"); ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_I2C_ENA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); E1000_WRITE_FLUSH(hw); i2cparams = E1000_READ_REG(hw, E1000_I2CPARAMS); i2cparams |= E1000_I2CBB_EN; i2cparams |= E1000_I2C_DATA_OE_N; i2cparams |= E1000_I2C_CLK_OE_N; E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cparams); E1000_WRITE_FLUSH(hw); return ret_val; } /** * e1000_read_i2c_byte_generic - Reads 8 bit word over I2C * @hw: pointer to hardware structure * @byte_offset: byte offset to read * @dev_addr: device address * @data: value read * * Performs byte read operation over I2C interface at * a specified device address. **/ s32 e1000_read_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, u8 dev_addr, u8 *data) { s32 status = E1000_SUCCESS; u32 max_retry = 10; u32 retry = 1; u16 swfw_mask = 0; bool nack = TRUE; DEBUGFUNC("e1000_read_i2c_byte_generic"); swfw_mask = E1000_SWFW_PHY0_SM; do { if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS) { status = E1000_ERR_SWFW_SYNC; goto read_byte_out; } e1000_i2c_start(hw); /* Device Address and write indication */ status = e1000_clock_out_i2c_byte(hw, dev_addr); if (status != E1000_SUCCESS) goto fail; status = e1000_get_i2c_ack(hw); if (status != E1000_SUCCESS) goto fail; status = e1000_clock_out_i2c_byte(hw, byte_offset); if (status != E1000_SUCCESS) goto fail; status = e1000_get_i2c_ack(hw); if (status != E1000_SUCCESS) goto fail; e1000_i2c_start(hw); /* Device Address and read indication */ status = e1000_clock_out_i2c_byte(hw, (dev_addr | 0x1)); if (status != E1000_SUCCESS) goto fail; status = e1000_get_i2c_ack(hw); if (status != E1000_SUCCESS) goto fail; status = e1000_clock_in_i2c_byte(hw, data); if (status != E1000_SUCCESS) goto fail; status = e1000_clock_out_i2c_bit(hw, nack); if (status != E1000_SUCCESS) goto fail; e1000_i2c_stop(hw); break; fail: hw->mac.ops.release_swfw_sync(hw, swfw_mask); msec_delay(100); e1000_i2c_bus_clear(hw); retry++; if (retry < max_retry) DEBUGOUT("I2C byte read error - Retrying.\n"); else DEBUGOUT("I2C byte read error.\n"); } while (retry < max_retry); hw->mac.ops.release_swfw_sync(hw, swfw_mask); read_byte_out: return status; } /** * e1000_write_i2c_byte_generic - Writes 8 bit word over I2C * @hw: pointer to hardware structure * @byte_offset: byte offset to write * @dev_addr: device address * @data: value to write * * Performs byte write operation over I2C interface at * a specified device address. **/ s32 e1000_write_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, u8 dev_addr, u8 data) { s32 status = E1000_SUCCESS; u32 max_retry = 1; u32 retry = 0; u16 swfw_mask = 0; DEBUGFUNC("e1000_write_i2c_byte_generic"); swfw_mask = E1000_SWFW_PHY0_SM; if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS) { status = E1000_ERR_SWFW_SYNC; goto write_byte_out; } do { e1000_i2c_start(hw); status = e1000_clock_out_i2c_byte(hw, dev_addr); if (status != E1000_SUCCESS) goto fail; status = e1000_get_i2c_ack(hw); if (status != E1000_SUCCESS) goto fail; status = e1000_clock_out_i2c_byte(hw, byte_offset); if (status != E1000_SUCCESS) goto fail; status = e1000_get_i2c_ack(hw); if (status != E1000_SUCCESS) goto fail; status = e1000_clock_out_i2c_byte(hw, data); if (status != E1000_SUCCESS) goto fail; status = e1000_get_i2c_ack(hw); if (status != E1000_SUCCESS) goto fail; e1000_i2c_stop(hw); break; fail: e1000_i2c_bus_clear(hw); retry++; if (retry < max_retry) DEBUGOUT("I2C byte write error - Retrying.\n"); else DEBUGOUT("I2C byte write error.\n"); } while (retry < max_retry); hw->mac.ops.release_swfw_sync(hw, swfw_mask); write_byte_out: return status; } /** * e1000_i2c_start - Sets I2C start condition * @hw: pointer to hardware structure * * Sets I2C start condition (High -> Low on SDA while SCL is High) **/ static void e1000_i2c_start(struct e1000_hw *hw) { u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); DEBUGFUNC("e1000_i2c_start"); /* Start condition must begin with data and clock high */ e1000_set_i2c_data(hw, &i2cctl, 1); e1000_raise_i2c_clk(hw, &i2cctl); /* Setup time for start condition (4.7us) */ usec_delay(E1000_I2C_T_SU_STA); e1000_set_i2c_data(hw, &i2cctl, 0); /* Hold time for start condition (4us) */ usec_delay(E1000_I2C_T_HD_STA); e1000_lower_i2c_clk(hw, &i2cctl); /* Minimum low period of clock is 4.7 us */ usec_delay(E1000_I2C_T_LOW); } /** * e1000_i2c_stop - Sets I2C stop condition * @hw: pointer to hardware structure * * Sets I2C stop condition (Low -> High on SDA while SCL is High) **/ static void e1000_i2c_stop(struct e1000_hw *hw) { u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); DEBUGFUNC("e1000_i2c_stop"); /* Stop condition must begin with data low and clock high */ e1000_set_i2c_data(hw, &i2cctl, 0); e1000_raise_i2c_clk(hw, &i2cctl); /* Setup time for stop condition (4us) */ usec_delay(E1000_I2C_T_SU_STO); e1000_set_i2c_data(hw, &i2cctl, 1); /* bus free time between stop and start (4.7us)*/ usec_delay(E1000_I2C_T_BUF); } /** * e1000_clock_in_i2c_byte - Clocks in one byte via I2C * @hw: pointer to hardware structure * @data: data byte to clock in * * Clocks in one byte data via I2C data/clock **/ static s32 e1000_clock_in_i2c_byte(struct e1000_hw *hw, u8 *data) { s32 i; bool bit = 0; DEBUGFUNC("e1000_clock_in_i2c_byte"); *data = 0; for (i = 7; i >= 0; i--) { e1000_clock_in_i2c_bit(hw, &bit); *data |= bit << i; } return E1000_SUCCESS; } /** * e1000_clock_out_i2c_byte - Clocks out one byte via I2C * @hw: pointer to hardware structure * @data: data byte clocked out * * Clocks out one byte data via I2C data/clock **/ static s32 e1000_clock_out_i2c_byte(struct e1000_hw *hw, u8 data) { s32 status = E1000_SUCCESS; s32 i; u32 i2cctl; bool bit = 0; DEBUGFUNC("e1000_clock_out_i2c_byte"); for (i = 7; i >= 0; i--) { bit = (data >> i) & 0x1; status = e1000_clock_out_i2c_bit(hw, bit); if (status != E1000_SUCCESS) break; } /* Release SDA line (set high) */ i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); i2cctl |= E1000_I2C_DATA_OE_N; E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl); E1000_WRITE_FLUSH(hw); return status; } /** * e1000_get_i2c_ack - Polls for I2C ACK * @hw: pointer to hardware structure * * Clocks in/out one bit via I2C data/clock **/ static s32 e1000_get_i2c_ack(struct e1000_hw *hw) { s32 status = E1000_SUCCESS; u32 i = 0; u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); u32 timeout = 10; bool ack = TRUE; DEBUGFUNC("e1000_get_i2c_ack"); e1000_raise_i2c_clk(hw, &i2cctl); /* Minimum high period of clock is 4us */ usec_delay(E1000_I2C_T_HIGH); /* Wait until SCL returns high */ for (i = 0; i < timeout; i++) { usec_delay(1); i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); if (i2cctl & E1000_I2C_CLK_IN) break; } if (!(i2cctl & E1000_I2C_CLK_IN)) return E1000_ERR_I2C; ack = e1000_get_i2c_data(&i2cctl); if (ack) { DEBUGOUT("I2C ack was not received.\n"); status = E1000_ERR_I2C; } e1000_lower_i2c_clk(hw, &i2cctl); /* Minimum low period of clock is 4.7 us */ usec_delay(E1000_I2C_T_LOW); return status; } /** * e1000_clock_in_i2c_bit - Clocks in one bit via I2C data/clock * @hw: pointer to hardware structure * @data: read data value * * Clocks in one bit via I2C data/clock **/ static s32 e1000_clock_in_i2c_bit(struct e1000_hw *hw, bool *data) { u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); DEBUGFUNC("e1000_clock_in_i2c_bit"); e1000_raise_i2c_clk(hw, &i2cctl); /* Minimum high period of clock is 4us */ usec_delay(E1000_I2C_T_HIGH); i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); *data = e1000_get_i2c_data(&i2cctl); e1000_lower_i2c_clk(hw, &i2cctl); /* Minimum low period of clock is 4.7 us */ usec_delay(E1000_I2C_T_LOW); return E1000_SUCCESS; } /** * e1000_clock_out_i2c_bit - Clocks in/out one bit via I2C data/clock * @hw: pointer to hardware structure * @data: data value to write * * Clocks out one bit via I2C data/clock **/ static s32 e1000_clock_out_i2c_bit(struct e1000_hw *hw, bool data) { s32 status; u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); DEBUGFUNC("e1000_clock_out_i2c_bit"); status = e1000_set_i2c_data(hw, &i2cctl, data); if (status == E1000_SUCCESS) { e1000_raise_i2c_clk(hw, &i2cctl); /* Minimum high period of clock is 4us */ usec_delay(E1000_I2C_T_HIGH); e1000_lower_i2c_clk(hw, &i2cctl); /* Minimum low period of clock is 4.7 us. * This also takes care of the data hold time. */ usec_delay(E1000_I2C_T_LOW); } else { status = E1000_ERR_I2C; DEBUGOUT1("I2C data was not set to %X\n", data); } return status; } /** * e1000_raise_i2c_clk - Raises the I2C SCL clock * @hw: pointer to hardware structure * @i2cctl: Current value of I2CCTL register * * Raises the I2C clock line '0'->'1' **/ static void e1000_raise_i2c_clk(struct e1000_hw *hw, u32 *i2cctl) { DEBUGFUNC("e1000_raise_i2c_clk"); *i2cctl |= E1000_I2C_CLK_OUT; *i2cctl &= ~E1000_I2C_CLK_OE_N; E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl); E1000_WRITE_FLUSH(hw); /* SCL rise time (1000ns) */ usec_delay(E1000_I2C_T_RISE); } /** * e1000_lower_i2c_clk - Lowers the I2C SCL clock * @hw: pointer to hardware structure * @i2cctl: Current value of I2CCTL register * * Lowers the I2C clock line '1'->'0' **/ static void e1000_lower_i2c_clk(struct e1000_hw *hw, u32 *i2cctl) { DEBUGFUNC("e1000_lower_i2c_clk"); *i2cctl &= ~E1000_I2C_CLK_OUT; *i2cctl &= ~E1000_I2C_CLK_OE_N; E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl); E1000_WRITE_FLUSH(hw); /* SCL fall time (300ns) */ usec_delay(E1000_I2C_T_FALL); } /** * e1000_set_i2c_data - Sets the I2C data bit * @hw: pointer to hardware structure * @i2cctl: Current value of I2CCTL register * @data: I2C data value (0 or 1) to set * * Sets the I2C data bit **/ static s32 e1000_set_i2c_data(struct e1000_hw *hw, u32 *i2cctl, bool data) { s32 status = E1000_SUCCESS; DEBUGFUNC("e1000_set_i2c_data"); if (data) *i2cctl |= E1000_I2C_DATA_OUT; else *i2cctl &= ~E1000_I2C_DATA_OUT; *i2cctl &= ~E1000_I2C_DATA_OE_N; *i2cctl |= E1000_I2C_CLK_OE_N; E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl); E1000_WRITE_FLUSH(hw); /* Data rise/fall (1000ns/300ns) and set-up time (250ns) */ usec_delay(E1000_I2C_T_RISE + E1000_I2C_T_FALL + E1000_I2C_T_SU_DATA); *i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); if (data != e1000_get_i2c_data(i2cctl)) { status = E1000_ERR_I2C; DEBUGOUT1("Error - I2C data was not set to %X.\n", data); } return status; } /** * e1000_get_i2c_data - Reads the I2C SDA data bit * @hw: pointer to hardware structure * @i2cctl: Current value of I2CCTL register * * Returns the I2C data bit value **/ static bool e1000_get_i2c_data(u32 *i2cctl) { bool data; DEBUGFUNC("e1000_get_i2c_data"); if (*i2cctl & E1000_I2C_DATA_IN) data = 1; else data = 0; return data; } /** * e1000_i2c_bus_clear - Clears the I2C bus * @hw: pointer to hardware structure * * Clears the I2C bus by sending nine clock pulses. * Used when data line is stuck low. **/ void e1000_i2c_bus_clear(struct e1000_hw *hw) { u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); u32 i; DEBUGFUNC("e1000_i2c_bus_clear"); e1000_i2c_start(hw); e1000_set_i2c_data(hw, &i2cctl, 1); for (i = 0; i < 9; i++) { e1000_raise_i2c_clk(hw, &i2cctl); /* Min high period of clock is 4us */ usec_delay(E1000_I2C_T_HIGH); e1000_lower_i2c_clk(hw, &i2cctl); /* Min low period of clock is 4.7us*/ usec_delay(E1000_I2C_T_LOW); } e1000_i2c_start(hw); /* Put the i2c bus back to default state */ e1000_i2c_stop(hw); } Index: head/sys/dev/e1000/e1000_hw.h =================================================================== --- head/sys/dev/e1000/e1000_hw.h (revision 323515) +++ head/sys/dev/e1000/e1000_hw.h (revision 323516) @@ -1,1040 +1,1037 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _E1000_HW_H_ #define _E1000_HW_H_ #include "e1000_osdep.h" #include "e1000_regs.h" #include "e1000_defines.h" struct e1000_hw; #define E1000_DEV_ID_82542 0x1000 #define E1000_DEV_ID_82543GC_FIBER 0x1001 #define E1000_DEV_ID_82543GC_COPPER 0x1004 #define E1000_DEV_ID_82544EI_COPPER 0x1008 #define E1000_DEV_ID_82544EI_FIBER 0x1009 #define E1000_DEV_ID_82544GC_COPPER 0x100C #define E1000_DEV_ID_82544GC_LOM 0x100D #define E1000_DEV_ID_82540EM 0x100E #define E1000_DEV_ID_82540EM_LOM 0x1015 #define E1000_DEV_ID_82540EP_LOM 0x1016 #define E1000_DEV_ID_82540EP 0x1017 #define E1000_DEV_ID_82540EP_LP 0x101E #define E1000_DEV_ID_82545EM_COPPER 0x100F #define E1000_DEV_ID_82545EM_FIBER 0x1011 #define E1000_DEV_ID_82545GM_COPPER 0x1026 #define E1000_DEV_ID_82545GM_FIBER 0x1027 #define E1000_DEV_ID_82545GM_SERDES 0x1028 #define E1000_DEV_ID_82546EB_COPPER 0x1010 #define E1000_DEV_ID_82546EB_FIBER 0x1012 #define E1000_DEV_ID_82546EB_QUAD_COPPER 0x101D #define E1000_DEV_ID_82546GB_COPPER 0x1079 #define E1000_DEV_ID_82546GB_FIBER 0x107A #define E1000_DEV_ID_82546GB_SERDES 0x107B #define E1000_DEV_ID_82546GB_PCIE 0x108A #define E1000_DEV_ID_82546GB_QUAD_COPPER 0x1099 #define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5 #define E1000_DEV_ID_82541EI 0x1013 #define E1000_DEV_ID_82541EI_MOBILE 0x1018 #define E1000_DEV_ID_82541ER_LOM 0x1014 #define E1000_DEV_ID_82541ER 0x1078 #define E1000_DEV_ID_82541GI 0x1076 #define E1000_DEV_ID_82541GI_LF 0x107C #define E1000_DEV_ID_82541GI_MOBILE 0x1077 #define E1000_DEV_ID_82547EI 0x1019 #define E1000_DEV_ID_82547EI_MOBILE 0x101A #define E1000_DEV_ID_82547GI 0x1075 #define E1000_DEV_ID_82571EB_COPPER 0x105E #define E1000_DEV_ID_82571EB_FIBER 0x105F #define E1000_DEV_ID_82571EB_SERDES 0x1060 #define E1000_DEV_ID_82571EB_SERDES_DUAL 0x10D9 #define E1000_DEV_ID_82571EB_SERDES_QUAD 0x10DA #define E1000_DEV_ID_82571EB_QUAD_COPPER 0x10A4 #define E1000_DEV_ID_82571PT_QUAD_COPPER 0x10D5 #define E1000_DEV_ID_82571EB_QUAD_FIBER 0x10A5 #define E1000_DEV_ID_82571EB_QUAD_COPPER_LP 0x10BC #define E1000_DEV_ID_82572EI_COPPER 0x107D #define E1000_DEV_ID_82572EI_FIBER 0x107E #define E1000_DEV_ID_82572EI_SERDES 0x107F #define E1000_DEV_ID_82572EI 0x10B9 #define E1000_DEV_ID_82573E 0x108B #define E1000_DEV_ID_82573E_IAMT 0x108C #define E1000_DEV_ID_82573L 0x109A #define E1000_DEV_ID_82574L 0x10D3 #define E1000_DEV_ID_82574LA 0x10F6 #define E1000_DEV_ID_82583V 0x150C #define E1000_DEV_ID_80003ES2LAN_COPPER_DPT 0x1096 #define E1000_DEV_ID_80003ES2LAN_SERDES_DPT 0x1098 #define E1000_DEV_ID_80003ES2LAN_COPPER_SPT 0x10BA #define E1000_DEV_ID_80003ES2LAN_SERDES_SPT 0x10BB #define E1000_DEV_ID_ICH8_82567V_3 0x1501 #define E1000_DEV_ID_ICH8_IGP_M_AMT 0x1049 #define E1000_DEV_ID_ICH8_IGP_AMT 0x104A #define E1000_DEV_ID_ICH8_IGP_C 0x104B #define E1000_DEV_ID_ICH8_IFE 0x104C #define E1000_DEV_ID_ICH8_IFE_GT 0x10C4 #define E1000_DEV_ID_ICH8_IFE_G 0x10C5 #define E1000_DEV_ID_ICH8_IGP_M 0x104D #define E1000_DEV_ID_ICH9_IGP_M 0x10BF #define E1000_DEV_ID_ICH9_IGP_M_AMT 0x10F5 #define E1000_DEV_ID_ICH9_IGP_M_V 0x10CB #define E1000_DEV_ID_ICH9_IGP_AMT 0x10BD #define E1000_DEV_ID_ICH9_BM 0x10E5 #define E1000_DEV_ID_ICH9_IGP_C 0x294C #define E1000_DEV_ID_ICH9_IFE 0x10C0 #define E1000_DEV_ID_ICH9_IFE_GT 0x10C3 #define E1000_DEV_ID_ICH9_IFE_G 0x10C2 #define E1000_DEV_ID_ICH10_R_BM_LM 0x10CC #define E1000_DEV_ID_ICH10_R_BM_LF 0x10CD #define E1000_DEV_ID_ICH10_R_BM_V 0x10CE #define E1000_DEV_ID_ICH10_D_BM_LM 0x10DE #define E1000_DEV_ID_ICH10_D_BM_LF 0x10DF #define E1000_DEV_ID_ICH10_D_BM_V 0x1525 #define E1000_DEV_ID_PCH_M_HV_LM 0x10EA #define E1000_DEV_ID_PCH_M_HV_LC 0x10EB #define E1000_DEV_ID_PCH_D_HV_DM 0x10EF #define E1000_DEV_ID_PCH_D_HV_DC 0x10F0 #define E1000_DEV_ID_PCH2_LV_LM 0x1502 #define E1000_DEV_ID_PCH2_LV_V 0x1503 #define E1000_DEV_ID_PCH_LPT_I217_LM 0x153A #define E1000_DEV_ID_PCH_LPT_I217_V 0x153B #define E1000_DEV_ID_PCH_LPTLP_I218_LM 0x155A #define E1000_DEV_ID_PCH_LPTLP_I218_V 0x1559 #define E1000_DEV_ID_PCH_I218_LM2 0x15A0 #define E1000_DEV_ID_PCH_I218_V2 0x15A1 #define E1000_DEV_ID_PCH_I218_LM3 0x15A2 /* Wildcat Point PCH */ #define E1000_DEV_ID_PCH_I218_V3 0x15A3 /* Wildcat Point PCH */ #define E1000_DEV_ID_PCH_SPT_I219_LM 0x156F /* Sunrise Point PCH */ #define E1000_DEV_ID_PCH_SPT_I219_V 0x1570 /* Sunrise Point PCH */ #define E1000_DEV_ID_PCH_SPT_I219_LM2 0x15B7 /* Sunrise Point-H PCH */ #define E1000_DEV_ID_PCH_SPT_I219_V2 0x15B8 /* Sunrise Point-H PCH */ #define E1000_DEV_ID_PCH_LBG_I219_LM3 0x15B9 /* LEWISBURG PCH */ #define E1000_DEV_ID_PCH_SPT_I219_LM4 0x15D7 #define E1000_DEV_ID_PCH_SPT_I219_V4 0x15D8 #define E1000_DEV_ID_PCH_SPT_I219_LM5 0x15E3 #define E1000_DEV_ID_PCH_SPT_I219_V5 0x15D6 #define E1000_DEV_ID_82576 0x10C9 #define E1000_DEV_ID_82576_FIBER 0x10E6 #define E1000_DEV_ID_82576_SERDES 0x10E7 #define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 #define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 #define E1000_DEV_ID_82576_NS 0x150A #define E1000_DEV_ID_82576_NS_SERDES 0x1518 #define E1000_DEV_ID_82576_SERDES_QUAD 0x150D #define E1000_DEV_ID_82576_VF 0x10CA #define E1000_DEV_ID_82576_VF_HV 0x152D #define E1000_DEV_ID_I350_VF 0x1520 #define E1000_DEV_ID_I350_VF_HV 0x152F #define E1000_DEV_ID_82575EB_COPPER 0x10A7 #define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9 #define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6 #define E1000_DEV_ID_82580_COPPER 0x150E #define E1000_DEV_ID_82580_FIBER 0x150F #define E1000_DEV_ID_82580_SERDES 0x1510 #define E1000_DEV_ID_82580_SGMII 0x1511 #define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 #define E1000_DEV_ID_82580_QUAD_FIBER 0x1527 #define E1000_DEV_ID_I350_COPPER 0x1521 #define E1000_DEV_ID_I350_FIBER 0x1522 #define E1000_DEV_ID_I350_SERDES 0x1523 #define E1000_DEV_ID_I350_SGMII 0x1524 #define E1000_DEV_ID_I350_DA4 0x1546 #define E1000_DEV_ID_I210_COPPER 0x1533 #define E1000_DEV_ID_I210_COPPER_OEM1 0x1534 #define E1000_DEV_ID_I210_COPPER_IT 0x1535 #define E1000_DEV_ID_I210_FIBER 0x1536 #define E1000_DEV_ID_I210_SERDES 0x1537 #define E1000_DEV_ID_I210_SGMII 0x1538 #define E1000_DEV_ID_I210_COPPER_FLASHLESS 0x157B #define E1000_DEV_ID_I210_SERDES_FLASHLESS 0x157C #define E1000_DEV_ID_I211_COPPER 0x1539 #define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40 #define E1000_DEV_ID_I354_SGMII 0x1F41 #define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS 0x1F45 #define E1000_DEV_ID_DH89XXCC_SGMII 0x0438 #define E1000_DEV_ID_DH89XXCC_SERDES 0x043A #define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C #define E1000_DEV_ID_DH89XXCC_SFP 0x0440 #define E1000_REVISION_0 0 #define E1000_REVISION_1 1 #define E1000_REVISION_2 2 #define E1000_REVISION_3 3 #define E1000_REVISION_4 4 #define E1000_FUNC_0 0 #define E1000_FUNC_1 1 #define E1000_FUNC_2 2 #define E1000_FUNC_3 3 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0 0 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1 3 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN2 6 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN3 9 enum e1000_mac_type { e1000_undefined = 0, e1000_82542, e1000_82543, e1000_82544, e1000_82540, e1000_82545, e1000_82545_rev_3, e1000_82546, e1000_82546_rev_3, e1000_82541, e1000_82541_rev_2, e1000_82547, e1000_82547_rev_2, e1000_82571, e1000_82572, e1000_82573, e1000_82574, e1000_82583, e1000_80003es2lan, e1000_ich8lan, e1000_ich9lan, e1000_ich10lan, e1000_pchlan, e1000_pch2lan, e1000_pch_lpt, e1000_pch_spt, e1000_82575, e1000_82576, e1000_82580, e1000_i350, e1000_i354, e1000_i210, e1000_i211, e1000_vfadapt, e1000_vfadapt_i350, e1000_num_macs /* List is 1-based, so subtract 1 for TRUE count. */ }; enum e1000_media_type { e1000_media_type_unknown = 0, e1000_media_type_copper = 1, e1000_media_type_fiber = 2, e1000_media_type_internal_serdes = 3, e1000_num_media_types }; enum e1000_nvm_type { e1000_nvm_unknown = 0, e1000_nvm_none, e1000_nvm_eeprom_spi, e1000_nvm_eeprom_microwire, e1000_nvm_flash_hw, e1000_nvm_invm, e1000_nvm_flash_sw }; enum e1000_nvm_override { e1000_nvm_override_none = 0, e1000_nvm_override_spi_small, e1000_nvm_override_spi_large, e1000_nvm_override_microwire_small, e1000_nvm_override_microwire_large }; enum e1000_phy_type { e1000_phy_unknown = 0, e1000_phy_none, e1000_phy_m88, e1000_phy_igp, e1000_phy_igp_2, e1000_phy_gg82563, e1000_phy_igp_3, e1000_phy_ife, e1000_phy_bm, e1000_phy_82578, e1000_phy_82577, e1000_phy_82579, e1000_phy_i217, e1000_phy_82580, e1000_phy_vf, e1000_phy_i210, }; enum e1000_bus_type { e1000_bus_type_unknown = 0, e1000_bus_type_pci, e1000_bus_type_pcix, e1000_bus_type_pci_express, e1000_bus_type_reserved }; enum e1000_bus_speed { e1000_bus_speed_unknown = 0, e1000_bus_speed_33, e1000_bus_speed_66, e1000_bus_speed_100, e1000_bus_speed_120, e1000_bus_speed_133, e1000_bus_speed_2500, e1000_bus_speed_5000, e1000_bus_speed_reserved }; enum e1000_bus_width { e1000_bus_width_unknown = 0, e1000_bus_width_pcie_x1, e1000_bus_width_pcie_x2, e1000_bus_width_pcie_x4 = 4, e1000_bus_width_pcie_x8 = 8, e1000_bus_width_32, e1000_bus_width_64, e1000_bus_width_reserved }; enum e1000_1000t_rx_status { e1000_1000t_rx_status_not_ok = 0, e1000_1000t_rx_status_ok, e1000_1000t_rx_status_undefined = 0xFF }; enum e1000_rev_polarity { e1000_rev_polarity_normal = 0, e1000_rev_polarity_reversed, e1000_rev_polarity_undefined = 0xFF }; enum e1000_fc_mode { e1000_fc_none = 0, e1000_fc_rx_pause, e1000_fc_tx_pause, e1000_fc_full, e1000_fc_default = 0xFF }; enum e1000_ffe_config { e1000_ffe_config_enabled = 0, e1000_ffe_config_active, e1000_ffe_config_blocked }; enum e1000_dsp_config { e1000_dsp_config_disabled = 0, e1000_dsp_config_enabled, e1000_dsp_config_activated, e1000_dsp_config_undefined = 0xFF }; enum e1000_ms_type { e1000_ms_hw_default = 0, e1000_ms_force_master, e1000_ms_force_slave, e1000_ms_auto }; enum e1000_smart_speed { e1000_smart_speed_default = 0, e1000_smart_speed_on, e1000_smart_speed_off }; enum e1000_serdes_link_state { e1000_serdes_link_down = 0, e1000_serdes_link_autoneg_progress, e1000_serdes_link_autoneg_complete, e1000_serdes_link_forced_up }; #define __le16 u16 #define __le32 u32 #define __le64 u64 /* Receive Descriptor */ struct e1000_rx_desc { __le64 buffer_addr; /* Address of the descriptor's data buffer */ __le16 length; /* Length of data DMAed into data buffer */ __le16 csum; /* Packet checksum */ u8 status; /* Descriptor status */ u8 errors; /* Descriptor Errors */ __le16 special; }; /* Receive Descriptor - Extended */ union e1000_rx_desc_extended { struct { __le64 buffer_addr; __le64 reserved; } read; struct { struct { __le32 mrq; /* Multiple Rx Queues */ union { __le32 rss; /* RSS Hash */ struct { __le16 ip_id; /* IP id */ __le16 csum; /* Packet Checksum */ } csum_ip; } hi_dword; } lower; struct { __le32 status_error; /* ext status/error */ __le16 length; __le16 vlan; /* VLAN tag */ } upper; } wb; /* writeback */ }; #define MAX_PS_BUFFERS 4 /* Number of packet split data buffers (not including the header buffer) */ #define PS_PAGE_BUFFERS (MAX_PS_BUFFERS - 1) /* Receive Descriptor - Packet Split */ union e1000_rx_desc_packet_split { struct { /* one buffer for protocol header(s), three data buffers */ __le64 buffer_addr[MAX_PS_BUFFERS]; } read; struct { struct { __le32 mrq; /* Multiple Rx Queues */ union { __le32 rss; /* RSS Hash */ struct { __le16 ip_id; /* IP id */ __le16 csum; /* Packet Checksum */ } csum_ip; } hi_dword; } lower; struct { __le32 status_error; /* ext status/error */ __le16 length0; /* length of buffer 0 */ __le16 vlan; /* VLAN tag */ } middle; struct { __le16 header_status; /* length of buffers 1-3 */ __le16 length[PS_PAGE_BUFFERS]; } upper; __le64 reserved; } wb; /* writeback */ }; /* Transmit Descriptor */ struct e1000_tx_desc { __le64 buffer_addr; /* Address of the descriptor's data buffer */ union { __le32 data; struct { __le16 length; /* Data buffer length */ u8 cso; /* Checksum offset */ u8 cmd; /* Descriptor control */ } flags; } lower; union { __le32 data; struct { u8 status; /* Descriptor status */ u8 css; /* Checksum start */ __le16 special; } fields; } upper; }; /* Offload Context Descriptor */ struct e1000_context_desc { union { __le32 ip_config; struct { u8 ipcss; /* IP checksum start */ u8 ipcso; /* IP checksum offset */ __le16 ipcse; /* IP checksum end */ } ip_fields; } lower_setup; union { __le32 tcp_config; struct { u8 tucss; /* TCP checksum start */ u8 tucso; /* TCP checksum offset */ __le16 tucse; /* TCP checksum end */ } tcp_fields; } upper_setup; __le32 cmd_and_length; union { __le32 data; struct { u8 status; /* Descriptor status */ u8 hdr_len; /* Header length */ __le16 mss; /* Maximum segment size */ } fields; } tcp_seg_setup; }; /* Offload data descriptor */ struct e1000_data_desc { __le64 buffer_addr; /* Address of the descriptor's buffer address */ union { __le32 data; struct { __le16 length; /* Data buffer length */ u8 typ_len_ext; u8 cmd; } flags; } lower; union { __le32 data; struct { u8 status; /* Descriptor status */ u8 popts; /* Packet Options */ __le16 special; } fields; } upper; }; /* Statistics counters collected by the MAC */ struct e1000_hw_stats { u64 crcerrs; u64 algnerrc; u64 symerrs; u64 rxerrc; u64 mpc; u64 scc; u64 ecol; u64 mcc; u64 latecol; u64 colc; u64 dc; u64 tncrs; u64 sec; u64 cexterr; u64 rlec; u64 xonrxc; u64 xontxc; u64 xoffrxc; u64 xofftxc; u64 fcruc; u64 prc64; u64 prc127; u64 prc255; u64 prc511; u64 prc1023; u64 prc1522; u64 gprc; u64 bprc; u64 mprc; u64 gptc; u64 gorc; u64 gotc; u64 rnbc; u64 ruc; u64 rfc; u64 roc; u64 rjc; u64 mgprc; u64 mgpdc; u64 mgptc; u64 tor; u64 tot; u64 tpr; u64 tpt; u64 ptc64; u64 ptc127; u64 ptc255; u64 ptc511; u64 ptc1023; u64 ptc1522; u64 mptc; u64 bptc; u64 tsctc; u64 tsctfc; u64 iac; u64 icrxptc; u64 icrxatc; u64 ictxptc; u64 ictxatc; u64 ictxqec; u64 ictxqmtc; u64 icrxdmtc; u64 icrxoc; u64 cbtmpc; u64 htdpmc; u64 cbrdpc; u64 cbrmpc; u64 rpthc; u64 hgptc; u64 htcbdpc; u64 hgorc; u64 hgotc; u64 lenerrs; u64 scvpc; u64 hrmpc; u64 doosync; u64 o2bgptc; u64 o2bspc; u64 b2ospc; u64 b2ogprc; }; struct e1000_vf_stats { u64 base_gprc; u64 base_gptc; u64 base_gorc; u64 base_gotc; u64 base_mprc; u64 base_gotlbc; u64 base_gptlbc; u64 base_gorlbc; u64 base_gprlbc; u32 last_gprc; u32 last_gptc; u32 last_gorc; u32 last_gotc; u32 last_mprc; u32 last_gotlbc; u32 last_gptlbc; u32 last_gorlbc; u32 last_gprlbc; u64 gprc; u64 gptc; u64 gorc; u64 gotc; u64 mprc; u64 gotlbc; u64 gptlbc; u64 gorlbc; u64 gprlbc; }; struct e1000_phy_stats { u32 idle_errors; u32 receive_errors; }; struct e1000_host_mng_dhcp_cookie { u32 signature; u8 status; u8 reserved0; u16 vlan_id; u32 reserved1; u16 reserved2; u8 reserved3; u8 checksum; }; /* Host Interface "Rev 1" */ struct e1000_host_command_header { u8 command_id; u8 command_length; u8 command_options; u8 checksum; }; #define E1000_HI_MAX_DATA_LENGTH 252 struct e1000_host_command_info { struct e1000_host_command_header command_header; u8 command_data[E1000_HI_MAX_DATA_LENGTH]; }; /* Host Interface "Rev 2" */ struct e1000_host_mng_command_header { u8 command_id; u8 checksum; u16 reserved1; u16 reserved2; u16 command_length; }; #define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8 struct e1000_host_mng_command_info { struct e1000_host_mng_command_header command_header; u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH]; }; #include "e1000_mac.h" #include "e1000_phy.h" #include "e1000_nvm.h" #include "e1000_manage.h" #include "e1000_mbx.h" /* Function pointers for the MAC. */ struct e1000_mac_operations { s32 (*init_params)(struct e1000_hw *); s32 (*id_led_init)(struct e1000_hw *); s32 (*blink_led)(struct e1000_hw *); bool (*check_mng_mode)(struct e1000_hw *); s32 (*check_for_link)(struct e1000_hw *); s32 (*cleanup_led)(struct e1000_hw *); void (*clear_hw_cntrs)(struct e1000_hw *); void (*clear_vfta)(struct e1000_hw *); s32 (*get_bus_info)(struct e1000_hw *); void (*set_lan_id)(struct e1000_hw *); s32 (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *); s32 (*led_on)(struct e1000_hw *); s32 (*led_off)(struct e1000_hw *); void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32); s32 (*reset_hw)(struct e1000_hw *); s32 (*init_hw)(struct e1000_hw *); void (*shutdown_serdes)(struct e1000_hw *); void (*power_up_serdes)(struct e1000_hw *); s32 (*setup_link)(struct e1000_hw *); s32 (*setup_physical_interface)(struct e1000_hw *); s32 (*setup_led)(struct e1000_hw *); void (*write_vfta)(struct e1000_hw *, u32, u32); void (*config_collision_dist)(struct e1000_hw *); int (*rar_set)(struct e1000_hw *, u8*, u32); s32 (*read_mac_addr)(struct e1000_hw *); s32 (*validate_mdi_setting)(struct e1000_hw *); s32 (*set_obff_timer)(struct e1000_hw *, u32); s32 (*acquire_swfw_sync)(struct e1000_hw *, u16); void (*release_swfw_sync)(struct e1000_hw *, u16); }; /* When to use various PHY register access functions: * * Func Caller * Function Does Does When to use * ~~~~~~~~~~~~ ~~~~~ ~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * X_reg L,P,A n/a for simple PHY reg accesses * X_reg_locked P,A L for multiple accesses of different regs * on different pages * X_reg_page A L,P for multiple accesses of different regs * on the same page * * Where X=[read|write], L=locking, P=sets page, A=register access * */ struct e1000_phy_operations { s32 (*init_params)(struct e1000_hw *); s32 (*acquire)(struct e1000_hw *); s32 (*cfg_on_link_up)(struct e1000_hw *); s32 (*check_polarity)(struct e1000_hw *); s32 (*check_reset_block)(struct e1000_hw *); s32 (*commit)(struct e1000_hw *); s32 (*force_speed_duplex)(struct e1000_hw *); s32 (*get_cfg_done)(struct e1000_hw *hw); s32 (*get_cable_length)(struct e1000_hw *); s32 (*get_info)(struct e1000_hw *); s32 (*set_page)(struct e1000_hw *, u16); s32 (*read_reg)(struct e1000_hw *, u32, u16 *); s32 (*read_reg_locked)(struct e1000_hw *, u32, u16 *); s32 (*read_reg_page)(struct e1000_hw *, u32, u16 *); void (*release)(struct e1000_hw *); s32 (*reset)(struct e1000_hw *); s32 (*set_d0_lplu_state)(struct e1000_hw *, bool); s32 (*set_d3_lplu_state)(struct e1000_hw *, bool); s32 (*write_reg)(struct e1000_hw *, u32, u16); s32 (*write_reg_locked)(struct e1000_hw *, u32, u16); s32 (*write_reg_page)(struct e1000_hw *, u32, u16); void (*power_up)(struct e1000_hw *); void (*power_down)(struct e1000_hw *); s32 (*read_i2c_byte)(struct e1000_hw *, u8, u8, u8 *); s32 (*write_i2c_byte)(struct e1000_hw *, u8, u8, u8); }; /* Function pointers for the NVM. */ struct e1000_nvm_operations { s32 (*init_params)(struct e1000_hw *); s32 (*acquire)(struct e1000_hw *); s32 (*read)(struct e1000_hw *, u16, u16, u16 *); void (*release)(struct e1000_hw *); void (*reload)(struct e1000_hw *); s32 (*update)(struct e1000_hw *); s32 (*valid_led_default)(struct e1000_hw *, u16 *); s32 (*validate)(struct e1000_hw *); s32 (*write)(struct e1000_hw *, u16, u16, u16 *); }; struct e1000_mac_info { struct e1000_mac_operations ops; u8 addr[ETH_ADDR_LEN]; u8 perm_addr[ETH_ADDR_LEN]; enum e1000_mac_type type; u32 collision_delta; u32 ledctl_default; u32 ledctl_mode1; u32 ledctl_mode2; u32 mc_filter_type; u32 tx_packet_delta; u32 txcw; u16 current_ifs_val; u16 ifs_max_val; u16 ifs_min_val; u16 ifs_ratio; u16 ifs_step_size; u16 mta_reg_count; u16 uta_reg_count; /* Maximum size of the MTA register table in all supported adapters */ #define MAX_MTA_REG 128 u32 mta_shadow[MAX_MTA_REG]; u16 rar_entry_count; u8 forced_speed_duplex; bool adaptive_ifs; bool has_fwsm; bool arc_subsystem_valid; bool asf_firmware_present; bool autoneg; bool autoneg_failed; bool get_link_status; bool in_ifs_mode; bool report_tx_early; enum e1000_serdes_link_state serdes_link_state; bool serdes_has_link; bool tx_pkt_filtering; u32 max_frame_size; }; struct e1000_phy_info { struct e1000_phy_operations ops; enum e1000_phy_type type; enum e1000_1000t_rx_status local_rx; enum e1000_1000t_rx_status remote_rx; enum e1000_ms_type ms_type; enum e1000_ms_type original_ms_type; enum e1000_rev_polarity cable_polarity; enum e1000_smart_speed smart_speed; u32 addr; u32 id; u32 reset_delay_us; /* in usec */ u32 revision; enum e1000_media_type media_type; u16 autoneg_advertised; u16 autoneg_mask; u16 cable_length; u16 max_cable_length; u16 min_cable_length; u8 mdix; bool disable_polarity_correction; bool is_mdix; bool polarity_correction; bool speed_downgraded; bool autoneg_wait_to_complete; }; struct e1000_nvm_info { struct e1000_nvm_operations ops; enum e1000_nvm_type type; enum e1000_nvm_override override; u32 flash_bank_size; u32 flash_base_addr; u16 word_size; u16 delay_usec; u16 address_bits; u16 opcode_bits; u16 page_size; }; struct e1000_bus_info { enum e1000_bus_type type; enum e1000_bus_speed speed; enum e1000_bus_width width; u16 func; u16 pci_cmd_word; }; struct e1000_fc_info { u32 high_water; /* Flow control high-water mark */ u32 low_water; /* Flow control low-water mark */ u16 pause_time; /* Flow control pause timer */ u16 refresh_time; /* Flow control refresh timer */ bool send_xon; /* Flow control send XON */ bool strict_ieee; /* Strict IEEE mode */ enum e1000_fc_mode current_mode; /* FC mode in effect */ enum e1000_fc_mode requested_mode; /* FC mode requested by caller */ }; struct e1000_mbx_operations { s32 (*init_params)(struct e1000_hw *hw); s32 (*read)(struct e1000_hw *, u32 *, u16, u16); s32 (*write)(struct e1000_hw *, u32 *, u16, u16); s32 (*read_posted)(struct e1000_hw *, u32 *, u16, u16); s32 (*write_posted)(struct e1000_hw *, u32 *, u16, u16); s32 (*check_for_msg)(struct e1000_hw *, u16); s32 (*check_for_ack)(struct e1000_hw *, u16); s32 (*check_for_rst)(struct e1000_hw *, u16); }; struct e1000_mbx_stats { u32 msgs_tx; u32 msgs_rx; u32 acks; u32 reqs; u32 rsts; }; struct e1000_mbx_info { struct e1000_mbx_operations ops; struct e1000_mbx_stats stats; u32 timeout; u32 usec_delay; u16 size; }; struct e1000_dev_spec_82541 { enum e1000_dsp_config dsp_config; enum e1000_ffe_config ffe_config; u16 spd_default; bool phy_init_script; }; struct e1000_dev_spec_82542 { bool dma_fairness; }; struct e1000_dev_spec_82543 { u32 tbi_compatibility; bool dma_fairness; bool init_phy_disabled; }; struct e1000_dev_spec_82571 { bool laa_is_present; u32 smb_counter; - E1000_MUTEX swflag_mutex; }; struct e1000_dev_spec_80003es2lan { bool mdic_wa_enable; }; struct e1000_shadow_ram { u16 value; bool modified; }; #define E1000_SHADOW_RAM_WORDS 2048 /* I218 PHY Ultra Low Power (ULP) states */ enum e1000_ulp_state { e1000_ulp_state_unknown, e1000_ulp_state_off, e1000_ulp_state_on, }; struct e1000_dev_spec_ich8lan { bool kmrn_lock_loss_workaround_enabled; struct e1000_shadow_ram shadow_ram[E1000_SHADOW_RAM_WORDS]; - E1000_MUTEX nvm_mutex; - E1000_MUTEX swflag_mutex; bool nvm_k1_enabled; bool disable_k1_off; bool eee_disable; u16 eee_lp_ability; enum e1000_ulp_state ulp_state; bool ulp_capability_disabled; bool during_suspend_flow; bool during_dpg_exit; }; struct e1000_dev_spec_82575 { bool sgmii_active; bool global_device_reset; bool eee_disable; bool module_plugged; bool clear_semaphore_once; u32 mtu; struct sfp_e1000_flags eth_flags; u8 media_port; bool media_changed; }; struct e1000_dev_spec_vf { u32 vf_number; u32 v2p_mailbox; }; struct e1000_hw { void *back; u8 *hw_addr; u8 *flash_address; unsigned long io_base; struct e1000_mac_info mac; struct e1000_fc_info fc; struct e1000_phy_info phy; struct e1000_nvm_info nvm; struct e1000_bus_info bus; struct e1000_mbx_info mbx; struct e1000_host_mng_dhcp_cookie mng_cookie; union { struct e1000_dev_spec_82541 _82541; struct e1000_dev_spec_82542 _82542; struct e1000_dev_spec_82543 _82543; struct e1000_dev_spec_82571 _82571; struct e1000_dev_spec_80003es2lan _80003es2lan; struct e1000_dev_spec_ich8lan ich8lan; struct e1000_dev_spec_82575 _82575; struct e1000_dev_spec_vf vf; } dev_spec; u16 device_id; u16 subsystem_vendor_id; u16 subsystem_device_id; u16 vendor_id; u8 revision_id; }; #include "e1000_82541.h" #include "e1000_82543.h" #include "e1000_82571.h" #include "e1000_80003es2lan.h" #include "e1000_ich8lan.h" #include "e1000_82575.h" #include "e1000_i210.h" /* These functions must be implemented by drivers */ void e1000_pci_clear_mwi(struct e1000_hw *hw); void e1000_pci_set_mwi(struct e1000_hw *hw); s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); void e1000_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); void e1000_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); #endif Index: head/sys/dev/e1000/e1000_i210.c =================================================================== --- head/sys/dev/e1000/e1000_i210.c (revision 323515) +++ head/sys/dev/e1000/e1000_i210.c (revision 323516) @@ -1,934 +1,788 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "e1000_api.h" static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw); static void e1000_release_nvm_i210(struct e1000_hw *hw); -static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw); static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); static s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw); static s32 e1000_valid_led_default_i210(struct e1000_hw *hw, u16 *data); /** * e1000_acquire_nvm_i210 - Request for access to EEPROM * @hw: pointer to the HW structure * * Acquire the necessary semaphores for exclusive access to the EEPROM. * Set the EEPROM access request bit and wait for EEPROM access grant bit. * Return successful if access grant bit set, else clear the request for * EEPROM access and return -E1000_ERR_NVM (-1). **/ static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_acquire_nvm_i210"); - ret_val = e1000_acquire_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); + ret_val = e1000_acquire_swfw_sync(hw, E1000_SWFW_EEP_SM); return ret_val; } /** * e1000_release_nvm_i210 - Release exclusive access to EEPROM * @hw: pointer to the HW structure * * Stop any current commands to the EEPROM and clear the EEPROM request bit, * then release the semaphores acquired. **/ static void e1000_release_nvm_i210(struct e1000_hw *hw) { DEBUGFUNC("e1000_release_nvm_i210"); - e1000_release_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); -} - -/** - * e1000_acquire_swfw_sync_i210 - Acquire SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Acquire the SW/FW semaphore to access the PHY or NVM. The mask - * will also specify which port we're acquiring the lock for. - **/ -s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - u32 swmask = mask; - u32 fwmask = mask << 16; - s32 ret_val = E1000_SUCCESS; - s32 i = 0, timeout = 200; /* FIXME: find real value to use here */ - - DEBUGFUNC("e1000_acquire_swfw_sync_i210"); - - while (i < timeout) { - if (e1000_get_hw_semaphore_i210(hw)) { - ret_val = -E1000_ERR_SWFW_SYNC; - goto out; - } - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - if (!(swfw_sync & (fwmask | swmask))) - break; - - /* - * Firmware currently using resource (fwmask) - * or other software thread using resource (swmask) - */ - e1000_put_hw_semaphore_generic(hw); - msec_delay_irq(5); - i++; - } - - if (i == timeout) { - DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); - ret_val = -E1000_ERR_SWFW_SYNC; - goto out; - } - - swfw_sync |= swmask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore_generic(hw); - -out: - return ret_val; -} - -/** - * e1000_release_swfw_sync_i210 - Release SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Release the SW/FW semaphore used to access the PHY or NVM. The mask - * will also specify which port we're releasing the lock for. - **/ -void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - - DEBUGFUNC("e1000_release_swfw_sync_i210"); - - while (e1000_get_hw_semaphore_i210(hw) != E1000_SUCCESS) - ; /* Empty */ - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - swfw_sync &= ~mask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore_generic(hw); -} - -/** - * e1000_get_hw_semaphore_i210 - Acquire hardware semaphore - * @hw: pointer to the HW structure - * - * Acquire the HW semaphore to access the PHY or NVM - **/ -static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw) -{ - u32 swsm; - s32 timeout = hw->nvm.word_size + 1; - s32 i = 0; - - DEBUGFUNC("e1000_get_hw_semaphore_i210"); - - /* Get the SW semaphore */ - while (i < timeout) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - if (!(swsm & E1000_SWSM_SMBI)) - break; - - usec_delay(50); - i++; - } - - if (i == timeout) { - /* In rare circumstances, the SW semaphore may already be held - * unintentionally. Clear the semaphore once before giving up. - */ - if (hw->dev_spec._82575.clear_semaphore_once) { - hw->dev_spec._82575.clear_semaphore_once = FALSE; - e1000_put_hw_semaphore_generic(hw); - for (i = 0; i < timeout; i++) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - if (!(swsm & E1000_SWSM_SMBI)) - break; - - usec_delay(50); - } - } - - /* If we do not have the semaphore here, we have to give up. */ - if (i == timeout) { - DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); - return -E1000_ERR_NVM; - } - } - - /* Get the FW semaphore. */ - for (i = 0; i < timeout; i++) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); - - /* Semaphore acquired if bit latched */ - if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) - break; - - usec_delay(50); - } - - if (i == timeout) { - /* Release semaphores */ - e1000_put_hw_semaphore_generic(hw); - DEBUGOUT("Driver can't access the NVM\n"); - return -E1000_ERR_NVM; - } - - return E1000_SUCCESS; + e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM); } /** * e1000_read_nvm_srrd_i210 - Reads Shadow Ram using EERD register * @hw: pointer to the HW structure * @offset: offset of word in the Shadow Ram to read * @words: number of words to read * @data: word read from the Shadow Ram * * Reads a 16 bit word from the Shadow Ram using the EERD register. * Uses necessary synchronization semaphores. **/ s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { s32 status = E1000_SUCCESS; u16 i, count; DEBUGFUNC("e1000_read_nvm_srrd_i210"); /* We cannot hold synchronization semaphores for too long, * because of forceful takeover procedure. However it is more efficient * to read in bursts than synchronizing access for each word. */ for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) { count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ? E1000_EERD_EEWR_MAX_COUNT : (words - i); if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { status = e1000_read_nvm_eerd(hw, offset, count, data + i); hw->nvm.ops.release(hw); } else { status = E1000_ERR_SWFW_SYNC; } if (status != E1000_SUCCESS) break; } return status; } /** * e1000_write_nvm_srwr_i210 - Write to Shadow RAM using EEWR * @hw: pointer to the HW structure * @offset: offset within the Shadow RAM to be written to * @words: number of words to write * @data: 16 bit word(s) to be written to the Shadow RAM * * Writes data to Shadow RAM at offset using EEWR register. * * If e1000_update_nvm_checksum is not called after this function , the * data will not be committed to FLASH and also Shadow RAM will most likely * contain an invalid checksum. * * If error code is returned, data and Shadow RAM may be inconsistent - buffer * partially written. **/ s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { s32 status = E1000_SUCCESS; u16 i, count; DEBUGFUNC("e1000_write_nvm_srwr_i210"); /* We cannot hold synchronization semaphores for too long, * because of forceful takeover procedure. However it is more efficient * to write in bursts than synchronizing access for each word. */ for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) { count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ? E1000_EERD_EEWR_MAX_COUNT : (words - i); if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { status = e1000_write_nvm_srwr(hw, offset, count, data + i); hw->nvm.ops.release(hw); } else { status = E1000_ERR_SWFW_SYNC; } if (status != E1000_SUCCESS) break; } return status; } /** * e1000_write_nvm_srwr - Write to Shadow Ram using EEWR * @hw: pointer to the HW structure * @offset: offset within the Shadow Ram to be written to * @words: number of words to write * @data: 16 bit word(s) to be written to the Shadow Ram * * Writes data to Shadow Ram at offset using EEWR register. * * If e1000_update_nvm_checksum is not called after this function , the * Shadow Ram will most likely contain an invalid checksum. **/ static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_nvm_info *nvm = &hw->nvm; u32 i, k, eewr = 0; u32 attempts = 100000; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_write_nvm_srwr"); /* * A check for invalid values: offset too large, too many words, * too many words for the offset, and not enough words. */ if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || (words == 0)) { DEBUGOUT("nvm parameter(s) out of bounds\n"); ret_val = -E1000_ERR_NVM; goto out; } for (i = 0; i < words; i++) { eewr = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) | (data[i] << E1000_NVM_RW_REG_DATA) | E1000_NVM_RW_REG_START; E1000_WRITE_REG(hw, E1000_SRWR, eewr); for (k = 0; k < attempts; k++) { if (E1000_NVM_RW_REG_DONE & E1000_READ_REG(hw, E1000_SRWR)) { ret_val = E1000_SUCCESS; break; } usec_delay(5); } if (ret_val != E1000_SUCCESS) { DEBUGOUT("Shadow RAM write EEWR timed out\n"); break; } } out: return ret_val; } /** e1000_read_invm_word_i210 - Reads OTP * @hw: pointer to the HW structure * @address: the word address (aka eeprom offset) to read * @data: pointer to the data read * * Reads 16-bit words from the OTP. Return error when the word is not * stored in OTP. **/ static s32 e1000_read_invm_word_i210(struct e1000_hw *hw, u8 address, u16 *data) { s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND; u32 invm_dword; u16 i; u8 record_type, word_address; DEBUGFUNC("e1000_read_invm_word_i210"); for (i = 0; i < E1000_INVM_SIZE; i++) { invm_dword = E1000_READ_REG(hw, E1000_INVM_DATA_REG(i)); /* Get record type */ record_type = INVM_DWORD_TO_RECORD_TYPE(invm_dword); if (record_type == E1000_INVM_UNINITIALIZED_STRUCTURE) break; if (record_type == E1000_INVM_CSR_AUTOLOAD_STRUCTURE) i += E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS; if (record_type == E1000_INVM_RSA_KEY_SHA256_STRUCTURE) i += E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS; if (record_type == E1000_INVM_WORD_AUTOLOAD_STRUCTURE) { word_address = INVM_DWORD_TO_WORD_ADDRESS(invm_dword); if (word_address == address) { *data = INVM_DWORD_TO_WORD_DATA(invm_dword); DEBUGOUT2("Read INVM Word 0x%02x = %x", address, *data); status = E1000_SUCCESS; break; } } } if (status != E1000_SUCCESS) DEBUGOUT1("Requested word 0x%02x not found in OTP\n", address); return status; } /** e1000_read_invm_i210 - Read invm wrapper function for I210/I211 * @hw: pointer to the HW structure * @address: the word address (aka eeprom offset) to read * @data: pointer to the data read * * Wrapper function to return data formerly found in the NVM. **/ static s32 e1000_read_invm_i210(struct e1000_hw *hw, u16 offset, u16 E1000_UNUSEDARG words, u16 *data) { s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_read_invm_i210"); /* Only the MAC addr is required to be present in the iNVM */ switch (offset) { case NVM_MAC_ADDR: ret_val = e1000_read_invm_word_i210(hw, (u8)offset, &data[0]); ret_val |= e1000_read_invm_word_i210(hw, (u8)offset+1, &data[1]); ret_val |= e1000_read_invm_word_i210(hw, (u8)offset+2, &data[2]); if (ret_val != E1000_SUCCESS) DEBUGOUT("MAC Addr not found in iNVM\n"); break; case NVM_INIT_CTRL_2: ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); if (ret_val != E1000_SUCCESS) { *data = NVM_INIT_CTRL_2_DEFAULT_I211; ret_val = E1000_SUCCESS; } break; case NVM_INIT_CTRL_4: ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); if (ret_val != E1000_SUCCESS) { *data = NVM_INIT_CTRL_4_DEFAULT_I211; ret_val = E1000_SUCCESS; } break; case NVM_LED_1_CFG: ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); if (ret_val != E1000_SUCCESS) { *data = NVM_LED_1_CFG_DEFAULT_I211; ret_val = E1000_SUCCESS; } break; case NVM_LED_0_2_CFG: ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); if (ret_val != E1000_SUCCESS) { *data = NVM_LED_0_2_CFG_DEFAULT_I211; ret_val = E1000_SUCCESS; } break; case NVM_ID_LED_SETTINGS: ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); if (ret_val != E1000_SUCCESS) { *data = ID_LED_RESERVED_FFFF; ret_val = E1000_SUCCESS; } break; case NVM_SUB_DEV_ID: *data = hw->subsystem_device_id; break; case NVM_SUB_VEN_ID: *data = hw->subsystem_vendor_id; break; case NVM_DEV_ID: *data = hw->device_id; break; case NVM_VEN_ID: *data = hw->vendor_id; break; default: DEBUGOUT1("NVM word 0x%02x is not mapped.\n", offset); *data = NVM_RESERVED_WORD; break; } return ret_val; } /** * e1000_validate_nvm_checksum_i210 - Validate EEPROM checksum * @hw: pointer to the HW structure * * Calculates the EEPROM checksum by reading/adding each word of the EEPROM * and then verifies that the sum of the EEPROM is equal to 0xBABA. **/ s32 e1000_validate_nvm_checksum_i210(struct e1000_hw *hw) { s32 status = E1000_SUCCESS; s32 (*read_op_ptr)(struct e1000_hw *, u16, u16, u16 *); DEBUGFUNC("e1000_validate_nvm_checksum_i210"); if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { /* * Replace the read function with semaphore grabbing with * the one that skips this for a while. * We have semaphore taken already here. */ read_op_ptr = hw->nvm.ops.read; hw->nvm.ops.read = e1000_read_nvm_eerd; status = e1000_validate_nvm_checksum_generic(hw); /* Revert original read operation. */ hw->nvm.ops.read = read_op_ptr; hw->nvm.ops.release(hw); } else { status = E1000_ERR_SWFW_SYNC; } return status; } /** * e1000_update_nvm_checksum_i210 - Update EEPROM checksum * @hw: pointer to the HW structure * * Updates the EEPROM checksum by reading/adding each word of the EEPROM * up to the checksum. Then calculates the EEPROM checksum and writes the * value to the EEPROM. Next commit EEPROM data onto the Flash. **/ s32 e1000_update_nvm_checksum_i210(struct e1000_hw *hw) { s32 ret_val; u16 checksum = 0; u16 i, nvm_data; DEBUGFUNC("e1000_update_nvm_checksum_i210"); /* * Read the first word from the EEPROM. If this times out or fails, do * not continue or we could be in for a very long wait while every * EEPROM read fails */ ret_val = e1000_read_nvm_eerd(hw, 0, 1, &nvm_data); if (ret_val != E1000_SUCCESS) { DEBUGOUT("EEPROM read failed\n"); goto out; } if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { /* * Do not use hw->nvm.ops.write, hw->nvm.ops.read * because we do not want to take the synchronization * semaphores twice here. */ for (i = 0; i < NVM_CHECKSUM_REG; i++) { ret_val = e1000_read_nvm_eerd(hw, i, 1, &nvm_data); if (ret_val) { hw->nvm.ops.release(hw); DEBUGOUT("NVM Read Error while updating checksum.\n"); goto out; } checksum += nvm_data; } checksum = (u16) NVM_SUM - checksum; ret_val = e1000_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1, &checksum); if (ret_val != E1000_SUCCESS) { hw->nvm.ops.release(hw); DEBUGOUT("NVM Write Error while updating checksum.\n"); goto out; } hw->nvm.ops.release(hw); ret_val = e1000_update_flash_i210(hw); } else { ret_val = E1000_ERR_SWFW_SYNC; } out: return ret_val; } /** * e1000_get_flash_presence_i210 - Check if flash device is detected. * @hw: pointer to the HW structure * **/ bool e1000_get_flash_presence_i210(struct e1000_hw *hw) { u32 eec = 0; bool ret_val = FALSE; DEBUGFUNC("e1000_get_flash_presence_i210"); eec = E1000_READ_REG(hw, E1000_EECD); if (eec & E1000_EECD_FLASH_DETECTED_I210) ret_val = TRUE; return ret_val; } /** * e1000_update_flash_i210 - Commit EEPROM to the flash * @hw: pointer to the HW structure * **/ s32 e1000_update_flash_i210(struct e1000_hw *hw) { s32 ret_val; u32 flup; DEBUGFUNC("e1000_update_flash_i210"); ret_val = e1000_pool_flash_update_done_i210(hw); if (ret_val == -E1000_ERR_NVM) { DEBUGOUT("Flash update time out\n"); goto out; } flup = E1000_READ_REG(hw, E1000_EECD) | E1000_EECD_FLUPD_I210; E1000_WRITE_REG(hw, E1000_EECD, flup); ret_val = e1000_pool_flash_update_done_i210(hw); if (ret_val == E1000_SUCCESS) DEBUGOUT("Flash update complete\n"); else DEBUGOUT("Flash update time out\n"); out: return ret_val; } /** * e1000_pool_flash_update_done_i210 - Pool FLUDONE status. * @hw: pointer to the HW structure * **/ s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw) { s32 ret_val = -E1000_ERR_NVM; u32 i, reg; DEBUGFUNC("e1000_pool_flash_update_done_i210"); for (i = 0; i < E1000_FLUDONE_ATTEMPTS; i++) { reg = E1000_READ_REG(hw, E1000_EECD); if (reg & E1000_EECD_FLUDONE_I210) { ret_val = E1000_SUCCESS; break; } usec_delay(5); } return ret_val; } /** * e1000_init_nvm_params_i210 - Initialize i210 NVM function pointers * @hw: pointer to the HW structure * * Initialize the i210/i211 NVM parameters and function pointers. **/ static s32 e1000_init_nvm_params_i210(struct e1000_hw *hw) { s32 ret_val; struct e1000_nvm_info *nvm = &hw->nvm; DEBUGFUNC("e1000_init_nvm_params_i210"); ret_val = e1000_init_nvm_params_82575(hw); nvm->ops.acquire = e1000_acquire_nvm_i210; nvm->ops.release = e1000_release_nvm_i210; nvm->ops.valid_led_default = e1000_valid_led_default_i210; if (e1000_get_flash_presence_i210(hw)) { hw->nvm.type = e1000_nvm_flash_hw; nvm->ops.read = e1000_read_nvm_srrd_i210; nvm->ops.write = e1000_write_nvm_srwr_i210; nvm->ops.validate = e1000_validate_nvm_checksum_i210; nvm->ops.update = e1000_update_nvm_checksum_i210; } else { hw->nvm.type = e1000_nvm_invm; nvm->ops.read = e1000_read_invm_i210; nvm->ops.write = e1000_null_write_nvm; nvm->ops.validate = e1000_null_ops_generic; nvm->ops.update = e1000_null_ops_generic; } return ret_val; } /** * e1000_init_function_pointers_i210 - Init func ptrs. * @hw: pointer to the HW structure * * Called to initialize all function pointers and parameters. **/ void e1000_init_function_pointers_i210(struct e1000_hw *hw) { e1000_init_function_pointers_82575(hw); hw->nvm.ops.init_params = e1000_init_nvm_params_i210; return; } /** * e1000_valid_led_default_i210 - Verify a valid default LED config * @hw: pointer to the HW structure * @data: pointer to the NVM (EEPROM) * * Read the EEPROM for the current default LED configuration. If the * LED configuration is not valid, set to a valid LED configuration. **/ static s32 e1000_valid_led_default_i210(struct e1000_hw *hw, u16 *data) { s32 ret_val; DEBUGFUNC("e1000_valid_led_default_i210"); ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); goto out; } if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) { switch (hw->phy.media_type) { case e1000_media_type_internal_serdes: *data = ID_LED_DEFAULT_I210_SERDES; break; case e1000_media_type_copper: default: *data = ID_LED_DEFAULT_I210; break; } } out: return ret_val; } /** * __e1000_access_xmdio_reg - Read/write XMDIO register * @hw: pointer to the HW structure * @address: XMDIO address to program * @dev_addr: device address to program * @data: pointer to value to read/write from/to the XMDIO address * @read: boolean flag to indicate read or write **/ static s32 __e1000_access_xmdio_reg(struct e1000_hw *hw, u16 address, u8 dev_addr, u16 *data, bool read) { s32 ret_val; DEBUGFUNC("__e1000_access_xmdio_reg"); ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, dev_addr); if (ret_val) return ret_val; ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, address); if (ret_val) return ret_val; ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, E1000_MMDAC_FUNC_DATA | dev_addr); if (ret_val) return ret_val; if (read) ret_val = hw->phy.ops.read_reg(hw, E1000_MMDAAD, data); else ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, *data); if (ret_val) return ret_val; /* Recalibrate the device back to 0 */ ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, 0); if (ret_val) return ret_val; return ret_val; } /** * e1000_read_xmdio_reg - Read XMDIO register * @hw: pointer to the HW structure * @addr: XMDIO address to program * @dev_addr: device address to program * @data: value to be read from the EMI address **/ s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data) { DEBUGFUNC("e1000_read_xmdio_reg"); return __e1000_access_xmdio_reg(hw, addr, dev_addr, data, TRUE); } /** * e1000_write_xmdio_reg - Write XMDIO register * @hw: pointer to the HW structure * @addr: XMDIO address to program * @dev_addr: device address to program * @data: value to be written to the XMDIO address **/ s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data) { DEBUGFUNC("e1000_read_xmdio_reg"); return __e1000_access_xmdio_reg(hw, addr, dev_addr, &data, FALSE); } /** * e1000_pll_workaround_i210 * @hw: pointer to the HW structure * * Works around an errata in the PLL circuit where it occasionally * provides the wrong clock frequency after power up. **/ static s32 e1000_pll_workaround_i210(struct e1000_hw *hw) { s32 ret_val; u32 wuc, mdicnfg, ctrl, ctrl_ext, reg_val; u16 nvm_word, phy_word, pci_word, tmp_nvm; int i; /* Get and set needed register values */ wuc = E1000_READ_REG(hw, E1000_WUC); mdicnfg = E1000_READ_REG(hw, E1000_MDICNFG); reg_val = mdicnfg & ~E1000_MDICNFG_EXT_MDIO; E1000_WRITE_REG(hw, E1000_MDICNFG, reg_val); /* Get data from NVM, or set default */ ret_val = e1000_read_invm_word_i210(hw, E1000_INVM_AUTOLOAD, &nvm_word); if (ret_val != E1000_SUCCESS) nvm_word = E1000_INVM_DEFAULT_AL; tmp_nvm = nvm_word | E1000_INVM_PLL_WO_VAL; for (i = 0; i < E1000_MAX_PLL_TRIES; i++) { /* check current state directly from internal PHY */ e1000_read_phy_reg_gs40g(hw, (E1000_PHY_PLL_FREQ_PAGE | E1000_PHY_PLL_FREQ_REG), &phy_word); if ((phy_word & E1000_PHY_PLL_UNCONF) != E1000_PHY_PLL_UNCONF) { ret_val = E1000_SUCCESS; break; } else { ret_val = -E1000_ERR_PHY; } /* directly reset the internal PHY */ ctrl = E1000_READ_REG(hw, E1000_CTRL); E1000_WRITE_REG(hw, E1000_CTRL, ctrl|E1000_CTRL_PHY_RST); ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ctrl_ext |= (E1000_CTRL_EXT_PHYPDEN | E1000_CTRL_EXT_SDLPE); E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); E1000_WRITE_REG(hw, E1000_WUC, 0); reg_val = (E1000_INVM_AUTOLOAD << 4) | (tmp_nvm << 16); E1000_WRITE_REG(hw, E1000_EEARBC_I210, reg_val); e1000_read_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word); pci_word |= E1000_PCI_PMCSR_D3; e1000_write_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word); msec_delay(1); pci_word &= ~E1000_PCI_PMCSR_D3; e1000_write_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word); reg_val = (E1000_INVM_AUTOLOAD << 4) | (nvm_word << 16); E1000_WRITE_REG(hw, E1000_EEARBC_I210, reg_val); /* restore WUC register */ E1000_WRITE_REG(hw, E1000_WUC, wuc); } /* restore MDICNFG setting */ E1000_WRITE_REG(hw, E1000_MDICNFG, mdicnfg); return ret_val; } /** * e1000_get_cfg_done_i210 - Read config done bit * @hw: pointer to the HW structure * * Read the management control register for the config done bit for * completion status. NOTE: silicon which is EEPROM-less will fail trying * to read the config done bit, so an error is *ONLY* logged and returns * E1000_SUCCESS. If we were to return with error, EEPROM-less silicon * would not be able to be reset or change link. **/ static s32 e1000_get_cfg_done_i210(struct e1000_hw *hw) { s32 timeout = PHY_CFG_TIMEOUT; u32 mask = E1000_NVM_CFG_DONE_PORT_0; DEBUGFUNC("e1000_get_cfg_done_i210"); while (timeout) { if (E1000_READ_REG(hw, E1000_EEMNGCTL_I210) & mask) break; msec_delay(1); timeout--; } if (!timeout) DEBUGOUT("MNG configuration cycle has not completed.\n"); return E1000_SUCCESS; } /** * e1000_init_hw_i210 - Init hw for I210/I211 * @hw: pointer to the HW structure * * Called to initialize hw for i210 hw family. **/ s32 e1000_init_hw_i210(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_init_hw_i210"); if ((hw->mac.type >= e1000_i210) && !(e1000_get_flash_presence_i210(hw))) { ret_val = e1000_pll_workaround_i210(hw); if (ret_val != E1000_SUCCESS) return ret_val; } hw->phy.ops.get_cfg_done = e1000_get_cfg_done_i210; ret_val = e1000_init_hw_82575(hw); return ret_val; } Index: head/sys/dev/e1000/e1000_i210.h =================================================================== --- head/sys/dev/e1000/e1000_i210.h (revision 323515) +++ head/sys/dev/e1000/e1000_i210.h (revision 323516) @@ -1,109 +1,107 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _E1000_I210_H_ #define _E1000_I210_H_ bool e1000_get_flash_presence_i210(struct e1000_hw *hw); s32 e1000_update_flash_i210(struct e1000_hw *hw); s32 e1000_update_nvm_checksum_i210(struct e1000_hw *hw); s32 e1000_validate_nvm_checksum_i210(struct e1000_hw *hw); s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); -s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask); -void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask); s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data); s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data); s32 e1000_init_hw_i210(struct e1000_hw *hw); #define E1000_STM_OPCODE 0xDB00 #define E1000_EEPROM_FLASH_SIZE_WORD 0x11 #define INVM_DWORD_TO_RECORD_TYPE(invm_dword) \ (u8)((invm_dword) & 0x7) #define INVM_DWORD_TO_WORD_ADDRESS(invm_dword) \ (u8)(((invm_dword) & 0x0000FE00) >> 9) #define INVM_DWORD_TO_WORD_DATA(invm_dword) \ (u16)(((invm_dword) & 0xFFFF0000) >> 16) enum E1000_INVM_STRUCTURE_TYPE { E1000_INVM_UNINITIALIZED_STRUCTURE = 0x00, E1000_INVM_WORD_AUTOLOAD_STRUCTURE = 0x01, E1000_INVM_CSR_AUTOLOAD_STRUCTURE = 0x02, E1000_INVM_PHY_REGISTER_AUTOLOAD_STRUCTURE = 0x03, E1000_INVM_RSA_KEY_SHA256_STRUCTURE = 0x04, E1000_INVM_INVALIDATED_STRUCTURE = 0x0F, }; #define E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS 8 #define E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS 1 #define E1000_INVM_ULT_BYTES_SIZE 8 #define E1000_INVM_RECORD_SIZE_IN_BYTES 4 #define E1000_INVM_VER_FIELD_ONE 0x1FF8 #define E1000_INVM_VER_FIELD_TWO 0x7FE000 #define E1000_INVM_IMGTYPE_FIELD 0x1F800000 #define E1000_INVM_MAJOR_MASK 0x3F0 #define E1000_INVM_MINOR_MASK 0xF #define E1000_INVM_MAJOR_SHIFT 4 #define ID_LED_DEFAULT_I210 ((ID_LED_OFF1_ON2 << 8) | \ (ID_LED_DEF1_DEF2 << 4) | \ (ID_LED_OFF1_OFF2)) #define ID_LED_DEFAULT_I210_SERDES ((ID_LED_DEF1_DEF2 << 8) | \ (ID_LED_DEF1_DEF2 << 4) | \ (ID_LED_OFF1_ON2)) /* NVM offset defaults for I211 devices */ #define NVM_INIT_CTRL_2_DEFAULT_I211 0X7243 #define NVM_INIT_CTRL_4_DEFAULT_I211 0x00C1 #define NVM_LED_1_CFG_DEFAULT_I211 0x0184 #define NVM_LED_0_2_CFG_DEFAULT_I211 0x200C /* PLL Defines */ #define E1000_PCI_PMCSR 0x44 #define E1000_PCI_PMCSR_D3 0x03 #define E1000_MAX_PLL_TRIES 5 #define E1000_PHY_PLL_UNCONF 0xFF #define E1000_PHY_PLL_FREQ_PAGE 0xFC0000 #define E1000_PHY_PLL_FREQ_REG 0x000E #define E1000_INVM_DEFAULT_AL 0x202F #define E1000_INVM_AUTOLOAD 0x0A #define E1000_INVM_PLL_WO_VAL 0x0010 #endif Index: head/sys/dev/e1000/e1000_ich8lan.c =================================================================== --- head/sys/dev/e1000/e1000_ich8lan.c (revision 323515) +++ head/sys/dev/e1000/e1000_ich8lan.c (revision 323516) @@ -1,6110 +1,6096 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ /* 82562G 10/100 Network Connection * 82562G-2 10/100 Network Connection * 82562GT 10/100 Network Connection * 82562GT-2 10/100 Network Connection * 82562V 10/100 Network Connection * 82562V-2 10/100 Network Connection * 82566DC-2 Gigabit Network Connection * 82566DC Gigabit Network Connection * 82566DM-2 Gigabit Network Connection * 82566DM Gigabit Network Connection * 82566MC Gigabit Network Connection * 82566MM Gigabit Network Connection * 82567LM Gigabit Network Connection * 82567LF Gigabit Network Connection * 82567V Gigabit Network Connection * 82567LM-2 Gigabit Network Connection * 82567LF-2 Gigabit Network Connection * 82567V-2 Gigabit Network Connection * 82567LF-3 Gigabit Network Connection * 82567LM-3 Gigabit Network Connection * 82567LM-4 Gigabit Network Connection * 82577LM Gigabit Network Connection * 82577LC Gigabit Network Connection * 82578DM Gigabit Network Connection * 82578DC Gigabit Network Connection * 82579LM Gigabit Network Connection * 82579V Gigabit Network Connection * Ethernet Connection I217-LM * Ethernet Connection I217-V * Ethernet Connection I218-V * Ethernet Connection I218-LM * Ethernet Connection (2) I218-LM * Ethernet Connection (2) I218-V * Ethernet Connection (3) I218-LM * Ethernet Connection (3) I218-V */ #include "e1000_api.h" static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw); static void e1000_release_swflag_ich8lan(struct e1000_hw *hw); static s32 e1000_acquire_nvm_ich8lan(struct e1000_hw *hw); static void e1000_release_nvm_ich8lan(struct e1000_hw *hw); static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw); static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw); static int e1000_rar_set_pch2lan(struct e1000_hw *hw, u8 *addr, u32 index); static int e1000_rar_set_pch_lpt(struct e1000_hw *hw, u8 *addr, u32 index); static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw); static void e1000_update_mc_addr_list_pch2lan(struct e1000_hw *hw, u8 *mc_addr_list, u32 mc_addr_count); static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw); static s32 e1000_phy_hw_reset_ich8lan(struct e1000_hw *hw); static s32 e1000_set_lplu_state_pchlan(struct e1000_hw *hw, bool active); static s32 e1000_set_d0_lplu_state_ich8lan(struct e1000_hw *hw, bool active); static s32 e1000_set_d3_lplu_state_ich8lan(struct e1000_hw *hw, bool active); static s32 e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); static s32 e1000_read_nvm_spt(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); static s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw); static s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw); static s32 e1000_update_nvm_checksum_spt(struct e1000_hw *hw); static s32 e1000_valid_led_default_ich8lan(struct e1000_hw *hw, u16 *data); static s32 e1000_id_led_init_pchlan(struct e1000_hw *hw); static s32 e1000_get_bus_info_ich8lan(struct e1000_hw *hw); static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw); static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw); static s32 e1000_setup_link_ich8lan(struct e1000_hw *hw); static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw); static s32 e1000_setup_copper_link_pch_lpt(struct e1000_hw *hw); static s32 e1000_get_link_up_info_ich8lan(struct e1000_hw *hw, u16 *speed, u16 *duplex); static s32 e1000_cleanup_led_ich8lan(struct e1000_hw *hw); static s32 e1000_led_on_ich8lan(struct e1000_hw *hw); static s32 e1000_led_off_ich8lan(struct e1000_hw *hw); static s32 e1000_k1_gig_workaround_hv(struct e1000_hw *hw, bool link); static s32 e1000_setup_led_pchlan(struct e1000_hw *hw); static s32 e1000_cleanup_led_pchlan(struct e1000_hw *hw); static s32 e1000_led_on_pchlan(struct e1000_hw *hw); static s32 e1000_led_off_pchlan(struct e1000_hw *hw); static void e1000_clear_hw_cntrs_ich8lan(struct e1000_hw *hw); static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank); static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw); static s32 e1000_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw); static s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, u8 *data); static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, u8 size, u16 *data); static s32 e1000_read_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, u32 *data); static s32 e1000_read_flash_dword_ich8lan(struct e1000_hw *hw, u32 offset, u32 *data); static s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, u32 data); static s32 e1000_retry_write_flash_dword_ich8lan(struct e1000_hw *hw, u32 offset, u32 dword); static s32 e1000_read_flash_word_ich8lan(struct e1000_hw *hw, u32 offset, u16 *data); static s32 e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, u8 byte); static s32 e1000_get_cfg_done_ich8lan(struct e1000_hw *hw); static void e1000_power_down_phy_copper_ich8lan(struct e1000_hw *hw); static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw); static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw); static s32 e1000_k1_workaround_lv(struct e1000_hw *hw); static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate); static s32 e1000_set_obff_timer_pch_lpt(struct e1000_hw *hw, u32 itr); /* ICH GbE Flash Hardware Sequencing Flash Status Register bit breakdown */ /* Offset 04h HSFSTS */ union ich8_hws_flash_status { struct ich8_hsfsts { u16 flcdone:1; /* bit 0 Flash Cycle Done */ u16 flcerr:1; /* bit 1 Flash Cycle Error */ u16 dael:1; /* bit 2 Direct Access error Log */ u16 berasesz:2; /* bit 4:3 Sector Erase Size */ u16 flcinprog:1; /* bit 5 flash cycle in Progress */ u16 reserved1:2; /* bit 13:6 Reserved */ u16 reserved2:6; /* bit 13:6 Reserved */ u16 fldesvalid:1; /* bit 14 Flash Descriptor Valid */ u16 flockdn:1; /* bit 15 Flash Config Lock-Down */ } hsf_status; u16 regval; }; /* ICH GbE Flash Hardware Sequencing Flash control Register bit breakdown */ /* Offset 06h FLCTL */ union ich8_hws_flash_ctrl { struct ich8_hsflctl { u16 flcgo:1; /* 0 Flash Cycle Go */ u16 flcycle:2; /* 2:1 Flash Cycle */ u16 reserved:5; /* 7:3 Reserved */ u16 fldbcount:2; /* 9:8 Flash Data Byte Count */ u16 flockdn:6; /* 15:10 Reserved */ } hsf_ctrl; u16 regval; }; /* ICH Flash Region Access Permissions */ union ich8_hws_flash_regacc { struct ich8_flracc { u32 grra:8; /* 0:7 GbE region Read Access */ u32 grwa:8; /* 8:15 GbE region Write Access */ u32 gmrag:8; /* 23:16 GbE Master Read Access Grant */ u32 gmwag:8; /* 31:24 GbE Master Write Access Grant */ } hsf_flregacc; u16 regval; }; /** * e1000_phy_is_accessible_pchlan - Check if able to access PHY registers * @hw: pointer to the HW structure * * Test access to the PHY registers by reading the PHY ID registers. If * the PHY ID is already known (e.g. resume path) compare it with known ID, * otherwise assume the read PHY ID is correct if it is valid. * * Assumes the sw/fw/hw semaphore is already acquired. **/ static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw) { u16 phy_reg = 0; u32 phy_id = 0; s32 ret_val = 0; u16 retry_count; u32 mac_reg = 0; for (retry_count = 0; retry_count < 2; retry_count++) { ret_val = hw->phy.ops.read_reg_locked(hw, PHY_ID1, &phy_reg); if (ret_val || (phy_reg == 0xFFFF)) continue; phy_id = (u32)(phy_reg << 16); ret_val = hw->phy.ops.read_reg_locked(hw, PHY_ID2, &phy_reg); if (ret_val || (phy_reg == 0xFFFF)) { phy_id = 0; continue; } phy_id |= (u32)(phy_reg & PHY_REVISION_MASK); break; } if (hw->phy.id) { if (hw->phy.id == phy_id) goto out; } else if (phy_id) { hw->phy.id = phy_id; hw->phy.revision = (u32)(phy_reg & ~PHY_REVISION_MASK); goto out; } /* In case the PHY needs to be in mdio slow mode, * set slow mode and try to get the PHY id again. */ if (hw->mac.type < e1000_pch_lpt) { hw->phy.ops.release(hw); ret_val = e1000_set_mdio_slow_mode_hv(hw); if (!ret_val) ret_val = e1000_get_phy_id(hw); hw->phy.ops.acquire(hw); } if (ret_val) return FALSE; out: if (hw->mac.type >= e1000_pch_lpt) { /* Only unforce SMBus if ME is not active */ if (!(E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID)) { /* Unforce SMBus mode in PHY */ hw->phy.ops.read_reg_locked(hw, CV_SMB_CTRL, &phy_reg); phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS; hw->phy.ops.write_reg_locked(hw, CV_SMB_CTRL, phy_reg); /* Unforce SMBus mode in MAC */ mac_reg = E1000_READ_REG(hw, E1000_CTRL_EXT); mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg); } } return TRUE; } /** * e1000_toggle_lanphypc_pch_lpt - toggle the LANPHYPC pin value * @hw: pointer to the HW structure * * Toggling the LANPHYPC pin value fully power-cycles the PHY and is * used to reset the PHY to a quiescent state when necessary. **/ static void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw) { u32 mac_reg; DEBUGFUNC("e1000_toggle_lanphypc_pch_lpt"); /* Set Phy Config Counter to 50msec */ mac_reg = E1000_READ_REG(hw, E1000_FEXTNVM3); mac_reg &= ~E1000_FEXTNVM3_PHY_CFG_COUNTER_MASK; mac_reg |= E1000_FEXTNVM3_PHY_CFG_COUNTER_50MSEC; E1000_WRITE_REG(hw, E1000_FEXTNVM3, mac_reg); /* Toggle LANPHYPC Value bit */ mac_reg = E1000_READ_REG(hw, E1000_CTRL); mac_reg |= E1000_CTRL_LANPHYPC_OVERRIDE; mac_reg &= ~E1000_CTRL_LANPHYPC_VALUE; E1000_WRITE_REG(hw, E1000_CTRL, mac_reg); E1000_WRITE_FLUSH(hw); msec_delay(1); mac_reg &= ~E1000_CTRL_LANPHYPC_OVERRIDE; E1000_WRITE_REG(hw, E1000_CTRL, mac_reg); E1000_WRITE_FLUSH(hw); if (hw->mac.type < e1000_pch_lpt) { msec_delay(50); } else { u16 count = 20; do { msec_delay(5); } while (!(E1000_READ_REG(hw, E1000_CTRL_EXT) & E1000_CTRL_EXT_LPCD) && count--); msec_delay(30); } } /** * e1000_init_phy_workarounds_pchlan - PHY initialization workarounds * @hw: pointer to the HW structure * * Workarounds/flow necessary for PHY initialization during driver load * and resume paths. **/ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) { u32 mac_reg, fwsm = E1000_READ_REG(hw, E1000_FWSM); s32 ret_val; DEBUGFUNC("e1000_init_phy_workarounds_pchlan"); /* Gate automatic PHY configuration by hardware on managed and * non-managed 82579 and newer adapters. */ e1000_gate_hw_phy_config_ich8lan(hw, TRUE); /* It is not possible to be certain of the current state of ULP * so forcibly disable it. */ hw->dev_spec.ich8lan.ulp_state = e1000_ulp_state_unknown; e1000_disable_ulp_lpt_lp(hw, TRUE); ret_val = hw->phy.ops.acquire(hw); if (ret_val) { DEBUGOUT("Failed to initialize PHY flow\n"); goto out; } /* The MAC-PHY interconnect may be in SMBus mode. If the PHY is * inaccessible and resetting the PHY is not blocked, toggle the * LANPHYPC Value bit to force the interconnect to PCIe mode. */ switch (hw->mac.type) { case e1000_pch_lpt: case e1000_pch_spt: if (e1000_phy_is_accessible_pchlan(hw)) break; /* Before toggling LANPHYPC, see if PHY is accessible by * forcing MAC to SMBus mode first. */ mac_reg = E1000_READ_REG(hw, E1000_CTRL_EXT); mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg); /* Wait 50 milliseconds for MAC to finish any retries * that it might be trying to perform from previous * attempts to acknowledge any phy read requests. */ msec_delay(50); /* fall-through */ case e1000_pch2lan: if (e1000_phy_is_accessible_pchlan(hw)) break; /* fall-through */ case e1000_pchlan: if ((hw->mac.type == e1000_pchlan) && (fwsm & E1000_ICH_FWSM_FW_VALID)) break; if (hw->phy.ops.check_reset_block(hw)) { DEBUGOUT("Required LANPHYPC toggle blocked by ME\n"); ret_val = -E1000_ERR_PHY; break; } /* Toggle LANPHYPC Value bit */ e1000_toggle_lanphypc_pch_lpt(hw); if (hw->mac.type >= e1000_pch_lpt) { if (e1000_phy_is_accessible_pchlan(hw)) break; /* Toggling LANPHYPC brings the PHY out of SMBus mode * so ensure that the MAC is also out of SMBus mode */ mac_reg = E1000_READ_REG(hw, E1000_CTRL_EXT); mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg); if (e1000_phy_is_accessible_pchlan(hw)) break; ret_val = -E1000_ERR_PHY; } break; default: break; } hw->phy.ops.release(hw); if (!ret_val) { /* Check to see if able to reset PHY. Print error if not */ if (hw->phy.ops.check_reset_block(hw)) { ERROR_REPORT("Reset blocked by ME\n"); goto out; } /* Reset the PHY before any access to it. Doing so, ensures * that the PHY is in a known good state before we read/write * PHY registers. The generic reset is sufficient here, * because we haven't determined the PHY type yet. */ ret_val = e1000_phy_hw_reset_generic(hw); if (ret_val) goto out; /* On a successful reset, possibly need to wait for the PHY * to quiesce to an accessible state before returning control * to the calling function. If the PHY does not quiesce, then * return E1000E_BLK_PHY_RESET, as this is the condition that * the PHY is in. */ ret_val = hw->phy.ops.check_reset_block(hw); if (ret_val) ERROR_REPORT("ME blocked access to PHY after reset\n"); } out: /* Ungate automatic PHY configuration on non-managed 82579 */ if ((hw->mac.type == e1000_pch2lan) && !(fwsm & E1000_ICH_FWSM_FW_VALID)) { msec_delay(10); e1000_gate_hw_phy_config_ich8lan(hw, FALSE); } return ret_val; } /** * e1000_init_phy_params_pchlan - Initialize PHY function pointers * @hw: pointer to the HW structure * * Initialize family-specific PHY parameters and function pointers. **/ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; DEBUGFUNC("e1000_init_phy_params_pchlan"); phy->addr = 1; phy->reset_delay_us = 100; phy->ops.acquire = e1000_acquire_swflag_ich8lan; phy->ops.check_reset_block = e1000_check_reset_block_ich8lan; phy->ops.get_cfg_done = e1000_get_cfg_done_ich8lan; phy->ops.set_page = e1000_set_page_igp; phy->ops.read_reg = e1000_read_phy_reg_hv; phy->ops.read_reg_locked = e1000_read_phy_reg_hv_locked; phy->ops.read_reg_page = e1000_read_phy_reg_page_hv; phy->ops.release = e1000_release_swflag_ich8lan; phy->ops.reset = e1000_phy_hw_reset_ich8lan; phy->ops.set_d0_lplu_state = e1000_set_lplu_state_pchlan; phy->ops.set_d3_lplu_state = e1000_set_lplu_state_pchlan; phy->ops.write_reg = e1000_write_phy_reg_hv; phy->ops.write_reg_locked = e1000_write_phy_reg_hv_locked; phy->ops.write_reg_page = e1000_write_phy_reg_page_hv; phy->ops.power_up = e1000_power_up_phy_copper; phy->ops.power_down = e1000_power_down_phy_copper_ich8lan; phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; phy->id = e1000_phy_unknown; ret_val = e1000_init_phy_workarounds_pchlan(hw); if (ret_val) return ret_val; if (phy->id == e1000_phy_unknown) switch (hw->mac.type) { default: ret_val = e1000_get_phy_id(hw); if (ret_val) return ret_val; if ((phy->id != 0) && (phy->id != PHY_REVISION_MASK)) break; /* fall-through */ case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: /* In case the PHY needs to be in mdio slow mode, * set slow mode and try to get the PHY id again. */ ret_val = e1000_set_mdio_slow_mode_hv(hw); if (ret_val) return ret_val; ret_val = e1000_get_phy_id(hw); if (ret_val) return ret_val; break; } phy->type = e1000_get_phy_type_from_id(phy->id); switch (phy->type) { case e1000_phy_82577: case e1000_phy_82579: case e1000_phy_i217: phy->ops.check_polarity = e1000_check_polarity_82577; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_82577; phy->ops.get_cable_length = e1000_get_cable_length_82577; phy->ops.get_info = e1000_get_phy_info_82577; phy->ops.commit = e1000_phy_sw_reset_generic; break; case e1000_phy_82578: phy->ops.check_polarity = e1000_check_polarity_m88; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; phy->ops.get_cable_length = e1000_get_cable_length_m88; phy->ops.get_info = e1000_get_phy_info_m88; break; default: ret_val = -E1000_ERR_PHY; break; } return ret_val; } /** * e1000_init_phy_params_ich8lan - Initialize PHY function pointers * @hw: pointer to the HW structure * * Initialize family-specific PHY parameters and function pointers. **/ static s32 e1000_init_phy_params_ich8lan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; s32 ret_val; u16 i = 0; DEBUGFUNC("e1000_init_phy_params_ich8lan"); phy->addr = 1; phy->reset_delay_us = 100; phy->ops.acquire = e1000_acquire_swflag_ich8lan; phy->ops.check_reset_block = e1000_check_reset_block_ich8lan; phy->ops.get_cable_length = e1000_get_cable_length_igp_2; phy->ops.get_cfg_done = e1000_get_cfg_done_ich8lan; phy->ops.read_reg = e1000_read_phy_reg_igp; phy->ops.release = e1000_release_swflag_ich8lan; phy->ops.reset = e1000_phy_hw_reset_ich8lan; phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_ich8lan; phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_ich8lan; phy->ops.write_reg = e1000_write_phy_reg_igp; phy->ops.power_up = e1000_power_up_phy_copper; phy->ops.power_down = e1000_power_down_phy_copper_ich8lan; /* We may need to do this twice - once for IGP and if that fails, * we'll set BM func pointers and try again */ ret_val = e1000_determine_phy_address(hw); if (ret_val) { phy->ops.write_reg = e1000_write_phy_reg_bm; phy->ops.read_reg = e1000_read_phy_reg_bm; ret_val = e1000_determine_phy_address(hw); if (ret_val) { DEBUGOUT("Cannot determine PHY addr. Erroring out\n"); return ret_val; } } phy->id = 0; while ((e1000_phy_unknown == e1000_get_phy_type_from_id(phy->id)) && (i++ < 100)) { msec_delay(1); ret_val = e1000_get_phy_id(hw); if (ret_val) return ret_val; } /* Verify phy id */ switch (phy->id) { case IGP03E1000_E_PHY_ID: phy->type = e1000_phy_igp_3; phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; phy->ops.read_reg_locked = e1000_read_phy_reg_igp_locked; phy->ops.write_reg_locked = e1000_write_phy_reg_igp_locked; phy->ops.get_info = e1000_get_phy_info_igp; phy->ops.check_polarity = e1000_check_polarity_igp; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_igp; break; case IFE_E_PHY_ID: case IFE_PLUS_E_PHY_ID: case IFE_C_E_PHY_ID: phy->type = e1000_phy_ife; phy->autoneg_mask = E1000_ALL_NOT_GIG; phy->ops.get_info = e1000_get_phy_info_ife; phy->ops.check_polarity = e1000_check_polarity_ife; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_ife; break; case BME1000_E_PHY_ID: phy->type = e1000_phy_bm; phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; phy->ops.read_reg = e1000_read_phy_reg_bm; phy->ops.write_reg = e1000_write_phy_reg_bm; phy->ops.commit = e1000_phy_sw_reset_generic; phy->ops.get_info = e1000_get_phy_info_m88; phy->ops.check_polarity = e1000_check_polarity_m88; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; break; default: return -E1000_ERR_PHY; break; } return E1000_SUCCESS; } /** * e1000_init_nvm_params_ich8lan - Initialize NVM function pointers * @hw: pointer to the HW structure * * Initialize family-specific NVM parameters and function * pointers. **/ static s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u32 gfpreg, sector_base_addr, sector_end_addr; u16 i; u32 nvm_size; DEBUGFUNC("e1000_init_nvm_params_ich8lan"); nvm->type = e1000_nvm_flash_sw; if (hw->mac.type >= e1000_pch_spt) { /* in SPT, gfpreg doesn't exist. NVM size is taken from the * STRAP register. This is because in SPT the GbE Flash region * is no longer accessed through the flash registers. Instead, * the mechanism has changed, and the Flash region access * registers are now implemented in GbE memory space. */ nvm->flash_base_addr = 0; nvm_size = (((E1000_READ_REG(hw, E1000_STRAP) >> 1) & 0x1F) + 1) * NVM_SIZE_MULTIPLIER; nvm->flash_bank_size = nvm_size / 2; /* Adjust to word count */ nvm->flash_bank_size /= sizeof(u16); /* Set the base address for flash register access */ hw->flash_address = hw->hw_addr + E1000_FLASH_BASE_ADDR; } else { /* Can't read flash registers if register set isn't mapped. */ if (!hw->flash_address) { DEBUGOUT("ERROR: Flash registers not mapped\n"); return -E1000_ERR_CONFIG; } gfpreg = E1000_READ_FLASH_REG(hw, ICH_FLASH_GFPREG); /* sector_X_addr is a "sector"-aligned address (4096 bytes) * Add 1 to sector_end_addr since this sector is included in * the overall size. */ sector_base_addr = gfpreg & FLASH_GFPREG_BASE_MASK; sector_end_addr = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK) + 1; /* flash_base_addr is byte-aligned */ nvm->flash_base_addr = sector_base_addr << FLASH_SECTOR_ADDR_SHIFT; /* find total size of the NVM, then cut in half since the total * size represents two separate NVM banks. */ nvm->flash_bank_size = ((sector_end_addr - sector_base_addr) << FLASH_SECTOR_ADDR_SHIFT); nvm->flash_bank_size /= 2; /* Adjust to word count */ nvm->flash_bank_size /= sizeof(u16); } nvm->word_size = E1000_SHADOW_RAM_WORDS; /* Clear shadow ram */ for (i = 0; i < nvm->word_size; i++) { dev_spec->shadow_ram[i].modified = FALSE; dev_spec->shadow_ram[i].value = 0xFFFF; } - E1000_MUTEX_INIT(&dev_spec->nvm_mutex); - E1000_MUTEX_INIT(&dev_spec->swflag_mutex); - /* Function Pointers */ nvm->ops.acquire = e1000_acquire_nvm_ich8lan; nvm->ops.release = e1000_release_nvm_ich8lan; if (hw->mac.type >= e1000_pch_spt) { nvm->ops.read = e1000_read_nvm_spt; nvm->ops.update = e1000_update_nvm_checksum_spt; } else { nvm->ops.read = e1000_read_nvm_ich8lan; nvm->ops.update = e1000_update_nvm_checksum_ich8lan; } nvm->ops.valid_led_default = e1000_valid_led_default_ich8lan; nvm->ops.validate = e1000_validate_nvm_checksum_ich8lan; nvm->ops.write = e1000_write_nvm_ich8lan; return E1000_SUCCESS; } /** * e1000_init_mac_params_ich8lan - Initialize MAC function pointers * @hw: pointer to the HW structure * * Initialize family-specific MAC parameters and function * pointers. **/ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; DEBUGFUNC("e1000_init_mac_params_ich8lan"); /* Set media type function pointer */ hw->phy.media_type = e1000_media_type_copper; /* Set mta register count */ mac->mta_reg_count = 32; /* Set rar entry count */ mac->rar_entry_count = E1000_ICH_RAR_ENTRIES; if (mac->type == e1000_ich8lan) mac->rar_entry_count--; /* Set if part includes ASF firmware */ mac->asf_firmware_present = TRUE; /* FWSM register */ mac->has_fwsm = TRUE; /* ARC subsystem not supported */ mac->arc_subsystem_valid = FALSE; /* Adaptive IFS supported */ mac->adaptive_ifs = TRUE; /* Function pointers */ /* bus type/speed/width */ mac->ops.get_bus_info = e1000_get_bus_info_ich8lan; /* function id */ mac->ops.set_lan_id = e1000_set_lan_id_single_port; /* reset */ mac->ops.reset_hw = e1000_reset_hw_ich8lan; /* hw initialization */ mac->ops.init_hw = e1000_init_hw_ich8lan; /* link setup */ mac->ops.setup_link = e1000_setup_link_ich8lan; /* physical interface setup */ mac->ops.setup_physical_interface = e1000_setup_copper_link_ich8lan; /* check for link */ mac->ops.check_for_link = e1000_check_for_copper_link_ich8lan; /* link info */ mac->ops.get_link_up_info = e1000_get_link_up_info_ich8lan; /* multicast address update */ mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic; /* clear hardware counters */ mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_ich8lan; /* LED and other operations */ switch (mac->type) { case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: /* check management mode */ mac->ops.check_mng_mode = e1000_check_mng_mode_ich8lan; /* ID LED init */ mac->ops.id_led_init = e1000_id_led_init_generic; /* blink LED */ mac->ops.blink_led = e1000_blink_led_generic; /* setup LED */ mac->ops.setup_led = e1000_setup_led_generic; /* cleanup LED */ mac->ops.cleanup_led = e1000_cleanup_led_ich8lan; /* turn on/off LED */ mac->ops.led_on = e1000_led_on_ich8lan; mac->ops.led_off = e1000_led_off_ich8lan; break; case e1000_pch2lan: mac->rar_entry_count = E1000_PCH2_RAR_ENTRIES; mac->ops.rar_set = e1000_rar_set_pch2lan; /* fall-through */ case e1000_pch_lpt: case e1000_pch_spt: /* multicast address update for pch2 */ mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_pch2lan; /* fall-through */ case e1000_pchlan: /* check management mode */ mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan; /* ID LED init */ mac->ops.id_led_init = e1000_id_led_init_pchlan; /* setup LED */ mac->ops.setup_led = e1000_setup_led_pchlan; /* cleanup LED */ mac->ops.cleanup_led = e1000_cleanup_led_pchlan; /* turn on/off LED */ mac->ops.led_on = e1000_led_on_pchlan; mac->ops.led_off = e1000_led_off_pchlan; break; default: break; } if (mac->type >= e1000_pch_lpt) { mac->rar_entry_count = E1000_PCH_LPT_RAR_ENTRIES; mac->ops.rar_set = e1000_rar_set_pch_lpt; mac->ops.setup_physical_interface = e1000_setup_copper_link_pch_lpt; mac->ops.set_obff_timer = e1000_set_obff_timer_pch_lpt; } /* Enable PCS Lock-loss workaround for ICH8 */ if (mac->type == e1000_ich8lan) e1000_set_kmrn_lock_loss_workaround_ich8lan(hw, TRUE); return E1000_SUCCESS; } /** * __e1000_access_emi_reg_locked - Read/write EMI register * @hw: pointer to the HW structure * @addr: EMI address to program * @data: pointer to value to read/write from/to the EMI address * @read: boolean flag to indicate read or write * * This helper function assumes the SW/FW/HW Semaphore is already acquired. **/ static s32 __e1000_access_emi_reg_locked(struct e1000_hw *hw, u16 address, u16 *data, bool read) { s32 ret_val; DEBUGFUNC("__e1000_access_emi_reg_locked"); ret_val = hw->phy.ops.write_reg_locked(hw, I82579_EMI_ADDR, address); if (ret_val) return ret_val; if (read) ret_val = hw->phy.ops.read_reg_locked(hw, I82579_EMI_DATA, data); else ret_val = hw->phy.ops.write_reg_locked(hw, I82579_EMI_DATA, *data); return ret_val; } /** * e1000_read_emi_reg_locked - Read Extended Management Interface register * @hw: pointer to the HW structure * @addr: EMI address to program * @data: value to be read from the EMI address * * Assumes the SW/FW/HW Semaphore is already acquired. **/ s32 e1000_read_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 *data) { DEBUGFUNC("e1000_read_emi_reg_locked"); return __e1000_access_emi_reg_locked(hw, addr, data, TRUE); } /** * e1000_write_emi_reg_locked - Write Extended Management Interface register * @hw: pointer to the HW structure * @addr: EMI address to program * @data: value to be written to the EMI address * * Assumes the SW/FW/HW Semaphore is already acquired. **/ s32 e1000_write_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 data) { DEBUGFUNC("e1000_read_emi_reg_locked"); return __e1000_access_emi_reg_locked(hw, addr, &data, FALSE); } /** * e1000_set_eee_pchlan - Enable/disable EEE support * @hw: pointer to the HW structure * * Enable/disable EEE based on setting in dev_spec structure, the duplex of * the link and the EEE capabilities of the link partner. The LPI Control * register bits will remain set only if/when link is up. * * EEE LPI must not be asserted earlier than one second after link is up. * On 82579, EEE LPI should not be enabled until such time otherwise there * can be link issues with some switches. Other devices can have EEE LPI * enabled immediately upon link up since they have a timer in hardware which * prevents LPI from being asserted too early. **/ s32 e1000_set_eee_pchlan(struct e1000_hw *hw) { struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; s32 ret_val; u16 lpa, pcs_status, adv, adv_addr, lpi_ctrl, data; DEBUGFUNC("e1000_set_eee_pchlan"); switch (hw->phy.type) { case e1000_phy_82579: lpa = I82579_EEE_LP_ABILITY; pcs_status = I82579_EEE_PCS_STATUS; adv_addr = I82579_EEE_ADVERTISEMENT; break; case e1000_phy_i217: lpa = I217_EEE_LP_ABILITY; pcs_status = I217_EEE_PCS_STATUS; adv_addr = I217_EEE_ADVERTISEMENT; break; default: return E1000_SUCCESS; } ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; ret_val = hw->phy.ops.read_reg_locked(hw, I82579_LPI_CTRL, &lpi_ctrl); if (ret_val) goto release; /* Clear bits that enable EEE in various speeds */ lpi_ctrl &= ~I82579_LPI_CTRL_ENABLE_MASK; /* Enable EEE if not disabled by user */ if (!dev_spec->eee_disable) { /* Save off link partner's EEE ability */ ret_val = e1000_read_emi_reg_locked(hw, lpa, &dev_spec->eee_lp_ability); if (ret_val) goto release; /* Read EEE advertisement */ ret_val = e1000_read_emi_reg_locked(hw, adv_addr, &adv); if (ret_val) goto release; /* Enable EEE only for speeds in which the link partner is * EEE capable and for which we advertise EEE. */ if (adv & dev_spec->eee_lp_ability & I82579_EEE_1000_SUPPORTED) lpi_ctrl |= I82579_LPI_CTRL_1000_ENABLE; if (adv & dev_spec->eee_lp_ability & I82579_EEE_100_SUPPORTED) { hw->phy.ops.read_reg_locked(hw, PHY_LP_ABILITY, &data); if (data & NWAY_LPAR_100TX_FD_CAPS) lpi_ctrl |= I82579_LPI_CTRL_100_ENABLE; else /* EEE is not supported in 100Half, so ignore * partner's EEE in 100 ability if full-duplex * is not advertised. */ dev_spec->eee_lp_ability &= ~I82579_EEE_100_SUPPORTED; } } if (hw->phy.type == e1000_phy_82579) { ret_val = e1000_read_emi_reg_locked(hw, I82579_LPI_PLL_SHUT, &data); if (ret_val) goto release; data &= ~I82579_LPI_100_PLL_SHUT; ret_val = e1000_write_emi_reg_locked(hw, I82579_LPI_PLL_SHUT, data); } /* R/Clr IEEE MMD 3.1 bits 11:10 - Tx/Rx LPI Received */ ret_val = e1000_read_emi_reg_locked(hw, pcs_status, &data); if (ret_val) goto release; ret_val = hw->phy.ops.write_reg_locked(hw, I82579_LPI_CTRL, lpi_ctrl); release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_k1_workaround_lpt_lp - K1 workaround on Lynxpoint-LP * @hw: pointer to the HW structure * @link: link up bool flag * * When K1 is enabled for 1Gbps, the MAC can miss 2 DMA completion indications * preventing further DMA write requests. Workaround the issue by disabling * the de-assertion of the clock request when in 1Gpbs mode. * Also, set appropriate Tx re-transmission timeouts for 10 and 100Half link * speeds in order to avoid Tx hangs. **/ static s32 e1000_k1_workaround_lpt_lp(struct e1000_hw *hw, bool link) { u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6); u32 status = E1000_READ_REG(hw, E1000_STATUS); s32 ret_val = E1000_SUCCESS; u16 reg; if (link && (status & E1000_STATUS_SPEED_1000)) { ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; ret_val = e1000_read_kmrn_reg_locked(hw, E1000_KMRNCTRLSTA_K1_CONFIG, ®); if (ret_val) goto release; ret_val = e1000_write_kmrn_reg_locked(hw, E1000_KMRNCTRLSTA_K1_CONFIG, reg & ~E1000_KMRNCTRLSTA_K1_ENABLE); if (ret_val) goto release; usec_delay(10); E1000_WRITE_REG(hw, E1000_FEXTNVM6, fextnvm6 | E1000_FEXTNVM6_REQ_PLL_CLK); ret_val = e1000_write_kmrn_reg_locked(hw, E1000_KMRNCTRLSTA_K1_CONFIG, reg); release: hw->phy.ops.release(hw); } else { /* clear FEXTNVM6 bit 8 on link down or 10/100 */ fextnvm6 &= ~E1000_FEXTNVM6_REQ_PLL_CLK; if ((hw->phy.revision > 5) || !link || ((status & E1000_STATUS_SPEED_100) && (status & E1000_STATUS_FD))) goto update_fextnvm6; ret_val = hw->phy.ops.read_reg(hw, I217_INBAND_CTRL, ®); if (ret_val) return ret_val; /* Clear link status transmit timeout */ reg &= ~I217_INBAND_CTRL_LINK_STAT_TX_TIMEOUT_MASK; if (status & E1000_STATUS_SPEED_100) { /* Set inband Tx timeout to 5x10us for 100Half */ reg |= 5 << I217_INBAND_CTRL_LINK_STAT_TX_TIMEOUT_SHIFT; /* Do not extend the K1 entry latency for 100Half */ fextnvm6 &= ~E1000_FEXTNVM6_ENABLE_K1_ENTRY_CONDITION; } else { /* Set inband Tx timeout to 50x10us for 10Full/Half */ reg |= 50 << I217_INBAND_CTRL_LINK_STAT_TX_TIMEOUT_SHIFT; /* Extend the K1 entry latency for 10 Mbps */ fextnvm6 |= E1000_FEXTNVM6_ENABLE_K1_ENTRY_CONDITION; } ret_val = hw->phy.ops.write_reg(hw, I217_INBAND_CTRL, reg); if (ret_val) return ret_val; update_fextnvm6: E1000_WRITE_REG(hw, E1000_FEXTNVM6, fextnvm6); } return ret_val; } static u64 e1000_ltr2ns(u16 ltr) { u32 value, scale; /* Determine the latency in nsec based on the LTR value & scale */ value = ltr & E1000_LTRV_VALUE_MASK; scale = (ltr & E1000_LTRV_SCALE_MASK) >> E1000_LTRV_SCALE_SHIFT; return value * (1 << (scale * E1000_LTRV_SCALE_FACTOR)); } /** * e1000_platform_pm_pch_lpt - Set platform power management values * @hw: pointer to the HW structure * @link: bool indicating link status * * Set the Latency Tolerance Reporting (LTR) values for the "PCIe-like" * GbE MAC in the Lynx Point PCH based on Rx buffer size and link speed * when link is up (which must not exceed the maximum latency supported * by the platform), otherwise specify there is no LTR requirement. * Unlike TRUE-PCIe devices which set the LTR maximum snoop/no-snoop * latencies in the LTR Extended Capability Structure in the PCIe Extended * Capability register set, on this device LTR is set by writing the * equivalent snoop/no-snoop latencies in the LTRV register in the MAC and * set the SEND bit to send an Intel On-chip System Fabric sideband (IOSF-SB) * message to the PMC. * * Use the LTR value to calculate the Optimized Buffer Flush/Fill (OBFF) * high-water mark. **/ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) { u32 reg = link << (E1000_LTRV_REQ_SHIFT + E1000_LTRV_NOSNOOP_SHIFT) | link << E1000_LTRV_REQ_SHIFT | E1000_LTRV_SEND; u16 lat_enc = 0; /* latency encoded */ s32 obff_hwm = 0; DEBUGFUNC("e1000_platform_pm_pch_lpt"); if (link) { u16 speed, duplex, scale = 0; u16 max_snoop, max_nosnoop; u16 max_ltr_enc; /* max LTR latency encoded */ s64 lat_ns; s64 value; u32 rxa; if (!hw->mac.max_frame_size) { DEBUGOUT("max_frame_size not set.\n"); return -E1000_ERR_CONFIG; } hw->mac.ops.get_link_up_info(hw, &speed, &duplex); if (!speed) { DEBUGOUT("Speed not set.\n"); return -E1000_ERR_CONFIG; } /* Rx Packet Buffer Allocation size (KB) */ rxa = E1000_READ_REG(hw, E1000_PBA) & E1000_PBA_RXA_MASK; /* Determine the maximum latency tolerated by the device. * * Per the PCIe spec, the tolerated latencies are encoded as * a 3-bit encoded scale (only 0-5 are valid) multiplied by * a 10-bit value (0-1023) to provide a range from 1 ns to * 2^25*(2^10-1) ns. The scale is encoded as 0=2^0ns, * 1=2^5ns, 2=2^10ns,...5=2^25ns. */ lat_ns = ((s64)rxa * 1024 - (2 * (s64)hw->mac.max_frame_size)) * 8 * 1000; if (lat_ns < 0) lat_ns = 0; else lat_ns /= speed; value = lat_ns; while (value > E1000_LTRV_VALUE_MASK) { scale++; value = E1000_DIVIDE_ROUND_UP(value, (1 << 5)); } if (scale > E1000_LTRV_SCALE_MAX) { DEBUGOUT1("Invalid LTR latency scale %d\n", scale); return -E1000_ERR_CONFIG; } lat_enc = (u16)((scale << E1000_LTRV_SCALE_SHIFT) | value); /* Determine the maximum latency tolerated by the platform */ e1000_read_pci_cfg(hw, E1000_PCI_LTR_CAP_LPT, &max_snoop); e1000_read_pci_cfg(hw, E1000_PCI_LTR_CAP_LPT + 2, &max_nosnoop); max_ltr_enc = E1000_MAX(max_snoop, max_nosnoop); if (lat_enc > max_ltr_enc) { lat_enc = max_ltr_enc; lat_ns = e1000_ltr2ns(max_ltr_enc); } if (lat_ns) { lat_ns *= speed * 1000; lat_ns /= 8; lat_ns /= 1000000000; obff_hwm = (s32)(rxa - lat_ns); } if ((obff_hwm < 0) || (obff_hwm > E1000_SVT_OFF_HWM_MASK)) { DEBUGOUT1("Invalid high water mark %d\n", obff_hwm); return -E1000_ERR_CONFIG; } } /* Set Snoop and No-Snoop latencies the same */ reg |= lat_enc | (lat_enc << E1000_LTRV_NOSNOOP_SHIFT); E1000_WRITE_REG(hw, E1000_LTRV, reg); /* Set OBFF high water mark */ reg = E1000_READ_REG(hw, E1000_SVT) & ~E1000_SVT_OFF_HWM_MASK; reg |= obff_hwm; E1000_WRITE_REG(hw, E1000_SVT, reg); /* Enable OBFF */ reg = E1000_READ_REG(hw, E1000_SVCR); reg |= E1000_SVCR_OFF_EN; /* Always unblock interrupts to the CPU even when the system is * in OBFF mode. This ensures that small round-robin traffic * (like ping) does not get dropped or experience long latency. */ reg |= E1000_SVCR_OFF_MASKINT; E1000_WRITE_REG(hw, E1000_SVCR, reg); return E1000_SUCCESS; } /** * e1000_set_obff_timer_pch_lpt - Update Optimized Buffer Flush/Fill timer * @hw: pointer to the HW structure * @itr: interrupt throttling rate * * Configure OBFF with the updated interrupt rate. **/ static s32 e1000_set_obff_timer_pch_lpt(struct e1000_hw *hw, u32 itr) { u32 svcr; s32 timer; DEBUGFUNC("e1000_set_obff_timer_pch_lpt"); /* Convert ITR value into microseconds for OBFF timer */ timer = itr & E1000_ITR_MASK; timer = (timer * E1000_ITR_MULT) / 1000; if ((timer < 0) || (timer > E1000_ITR_MASK)) { DEBUGOUT1("Invalid OBFF timer %d\n", timer); return -E1000_ERR_CONFIG; } svcr = E1000_READ_REG(hw, E1000_SVCR); svcr &= ~E1000_SVCR_OFF_TIMER_MASK; svcr |= timer << E1000_SVCR_OFF_TIMER_SHIFT; E1000_WRITE_REG(hw, E1000_SVCR, svcr); return E1000_SUCCESS; } /** * e1000_enable_ulp_lpt_lp - configure Ultra Low Power mode for LynxPoint-LP * @hw: pointer to the HW structure * @to_sx: boolean indicating a system power state transition to Sx * * When link is down, configure ULP mode to significantly reduce the power * to the PHY. If on a Manageability Engine (ME) enabled system, tell the * ME firmware to start the ULP configuration. If not on an ME enabled * system, configure the ULP mode by software. */ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) { u32 mac_reg; s32 ret_val = E1000_SUCCESS; u16 phy_reg; u16 oem_reg = 0; if ((hw->mac.type < e1000_pch_lpt) || (hw->device_id == E1000_DEV_ID_PCH_LPT_I217_LM) || (hw->device_id == E1000_DEV_ID_PCH_LPT_I217_V) || (hw->device_id == E1000_DEV_ID_PCH_I218_LM2) || (hw->device_id == E1000_DEV_ID_PCH_I218_V2) || (hw->dev_spec.ich8lan.ulp_state == e1000_ulp_state_on)) return 0; if (E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID) { /* Request ME configure ULP mode in the PHY */ mac_reg = E1000_READ_REG(hw, E1000_H2ME); mac_reg |= E1000_H2ME_ULP | E1000_H2ME_ENFORCE_SETTINGS; E1000_WRITE_REG(hw, E1000_H2ME, mac_reg); goto out; } if (!to_sx) { int i = 0; /* Poll up to 5 seconds for Cable Disconnected indication */ while (!(E1000_READ_REG(hw, E1000_FEXT) & E1000_FEXT_PHY_CABLE_DISCONNECTED)) { /* Bail if link is re-acquired */ if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU) return -E1000_ERR_PHY; if (i++ == 100) break; msec_delay(50); } DEBUGOUT2("CABLE_DISCONNECTED %s set after %dmsec\n", (E1000_READ_REG(hw, E1000_FEXT) & E1000_FEXT_PHY_CABLE_DISCONNECTED) ? "" : "not", i * 50); } ret_val = hw->phy.ops.acquire(hw); if (ret_val) goto out; /* Force SMBus mode in PHY */ ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); if (ret_val) goto release; phy_reg |= CV_SMB_CTRL_FORCE_SMBUS; e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); /* Force SMBus mode in MAC */ mac_reg = E1000_READ_REG(hw, E1000_CTRL_EXT); mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg); /* Si workaround for ULP entry flow on i127/rev6 h/w. Enable * LPLU and disable Gig speed when entering ULP */ if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6)) { ret_val = e1000_read_phy_reg_hv_locked(hw, HV_OEM_BITS, &oem_reg); if (ret_val) goto release; phy_reg = oem_reg; phy_reg |= HV_OEM_BITS_LPLU | HV_OEM_BITS_GBE_DIS; ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS, phy_reg); if (ret_val) goto release; } /* Set Inband ULP Exit, Reset to SMBus mode and * Disable SMBus Release on PERST# in PHY */ ret_val = e1000_read_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, &phy_reg); if (ret_val) goto release; phy_reg |= (I218_ULP_CONFIG1_RESET_TO_SMBUS | I218_ULP_CONFIG1_DISABLE_SMB_PERST); if (to_sx) { if (E1000_READ_REG(hw, E1000_WUFC) & E1000_WUFC_LNKC) phy_reg |= I218_ULP_CONFIG1_WOL_HOST; else phy_reg &= ~I218_ULP_CONFIG1_WOL_HOST; phy_reg |= I218_ULP_CONFIG1_STICKY_ULP; phy_reg &= ~I218_ULP_CONFIG1_INBAND_EXIT; } else { phy_reg |= I218_ULP_CONFIG1_INBAND_EXIT; phy_reg &= ~I218_ULP_CONFIG1_STICKY_ULP; phy_reg &= ~I218_ULP_CONFIG1_WOL_HOST; } e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg); /* Set Disable SMBus Release on PERST# in MAC */ mac_reg = E1000_READ_REG(hw, E1000_FEXTNVM7); mac_reg |= E1000_FEXTNVM7_DISABLE_SMB_PERST; E1000_WRITE_REG(hw, E1000_FEXTNVM7, mac_reg); /* Commit ULP changes in PHY by starting auto ULP configuration */ phy_reg |= I218_ULP_CONFIG1_START; e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg); if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6) && to_sx && (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS, oem_reg); if (ret_val) goto release; } release: hw->phy.ops.release(hw); out: if (ret_val) DEBUGOUT1("Error in ULP enable flow: %d\n", ret_val); else hw->dev_spec.ich8lan.ulp_state = e1000_ulp_state_on; return ret_val; } /** * e1000_disable_ulp_lpt_lp - unconfigure Ultra Low Power mode for LynxPoint-LP * @hw: pointer to the HW structure * @force: boolean indicating whether or not to force disabling ULP * * Un-configure ULP mode when link is up, the system is transitioned from * Sx or the driver is unloaded. If on a Manageability Engine (ME) enabled * system, poll for an indication from ME that ULP has been un-configured. * If not on an ME enabled system, un-configure the ULP mode by software. * * During nominal operation, this function is called when link is acquired * to disable ULP mode (force=FALSE); otherwise, for example when unloading * the driver or during Sx->S0 transitions, this is called with force=TRUE * to forcibly disable ULP. */ s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) { s32 ret_val = E1000_SUCCESS; u32 mac_reg; u16 phy_reg; int i = 0; if ((hw->mac.type < e1000_pch_lpt) || (hw->device_id == E1000_DEV_ID_PCH_LPT_I217_LM) || (hw->device_id == E1000_DEV_ID_PCH_LPT_I217_V) || (hw->device_id == E1000_DEV_ID_PCH_I218_LM2) || (hw->device_id == E1000_DEV_ID_PCH_I218_V2) || (hw->dev_spec.ich8lan.ulp_state == e1000_ulp_state_off)) return 0; if (E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID) { if (force) { /* Request ME un-configure ULP mode in the PHY */ mac_reg = E1000_READ_REG(hw, E1000_H2ME); mac_reg &= ~E1000_H2ME_ULP; mac_reg |= E1000_H2ME_ENFORCE_SETTINGS; E1000_WRITE_REG(hw, E1000_H2ME, mac_reg); } /* Poll up to 300msec for ME to clear ULP_CFG_DONE. */ while (E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_ULP_CFG_DONE) { if (i++ == 30) { ret_val = -E1000_ERR_PHY; goto out; } msec_delay(10); } DEBUGOUT1("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10); if (force) { mac_reg = E1000_READ_REG(hw, E1000_H2ME); mac_reg &= ~E1000_H2ME_ENFORCE_SETTINGS; E1000_WRITE_REG(hw, E1000_H2ME, mac_reg); } else { /* Clear H2ME.ULP after ME ULP configuration */ mac_reg = E1000_READ_REG(hw, E1000_H2ME); mac_reg &= ~E1000_H2ME_ULP; E1000_WRITE_REG(hw, E1000_H2ME, mac_reg); } goto out; } ret_val = hw->phy.ops.acquire(hw); if (ret_val) goto out; if (force) /* Toggle LANPHYPC Value bit */ e1000_toggle_lanphypc_pch_lpt(hw); /* Unforce SMBus mode in PHY */ ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); if (ret_val) { /* The MAC might be in PCIe mode, so temporarily force to * SMBus mode in order to access the PHY. */ mac_reg = E1000_READ_REG(hw, E1000_CTRL_EXT); mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg); msec_delay(50); ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg); if (ret_val) goto release; } phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS; e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg); /* Unforce SMBus mode in MAC */ mac_reg = E1000_READ_REG(hw, E1000_CTRL_EXT); mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS; E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg); /* When ULP mode was previously entered, K1 was disabled by the * hardware. Re-Enable K1 in the PHY when exiting ULP. */ ret_val = e1000_read_phy_reg_hv_locked(hw, HV_PM_CTRL, &phy_reg); if (ret_val) goto release; phy_reg |= HV_PM_CTRL_K1_ENABLE; e1000_write_phy_reg_hv_locked(hw, HV_PM_CTRL, phy_reg); /* Clear ULP enabled configuration */ ret_val = e1000_read_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, &phy_reg); if (ret_val) goto release; phy_reg &= ~(I218_ULP_CONFIG1_IND | I218_ULP_CONFIG1_STICKY_ULP | I218_ULP_CONFIG1_RESET_TO_SMBUS | I218_ULP_CONFIG1_WOL_HOST | I218_ULP_CONFIG1_INBAND_EXIT | I218_ULP_CONFIG1_EN_ULP_LANPHYPC | I218_ULP_CONFIG1_DIS_CLR_STICKY_ON_PERST | I218_ULP_CONFIG1_DISABLE_SMB_PERST); e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg); /* Commit ULP changes by starting auto ULP configuration */ phy_reg |= I218_ULP_CONFIG1_START; e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg); /* Clear Disable SMBus Release on PERST# in MAC */ mac_reg = E1000_READ_REG(hw, E1000_FEXTNVM7); mac_reg &= ~E1000_FEXTNVM7_DISABLE_SMB_PERST; E1000_WRITE_REG(hw, E1000_FEXTNVM7, mac_reg); release: hw->phy.ops.release(hw); if (force) { hw->phy.ops.reset(hw); msec_delay(50); } out: if (ret_val) DEBUGOUT1("Error in ULP disable flow: %d\n", ret_val); else hw->dev_spec.ich8lan.ulp_state = e1000_ulp_state_off; return ret_val; } /** * e1000_check_for_copper_link_ich8lan - Check for link (Copper) * @hw: pointer to the HW structure * * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. **/ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; s32 ret_val, tipg_reg = 0; u16 emi_addr, emi_val = 0; bool link; u16 phy_reg; DEBUGFUNC("e1000_check_for_copper_link_ich8lan"); /* We only want to go out to the PHY registers to see if Auto-Neg * has completed and/or if our link status has changed. The * get_link_status flag is set upon receiving a Link Status * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) return E1000_SUCCESS; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex * of the PHY. */ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); if (ret_val) return ret_val; if (hw->mac.type == e1000_pchlan) { ret_val = e1000_k1_gig_workaround_hv(hw, link); if (ret_val) return ret_val; } /* When connected at 10Mbps half-duplex, some parts are excessively * aggressive resulting in many collisions. To avoid this, increase * the IPG and reduce Rx latency in the PHY. */ if ((hw->mac.type >= e1000_pch2lan) && link) { u16 speed, duplex; e1000_get_speed_and_duplex_copper_generic(hw, &speed, &duplex); tipg_reg = E1000_READ_REG(hw, E1000_TIPG); tipg_reg &= ~E1000_TIPG_IPGT_MASK; if (duplex == HALF_DUPLEX && speed == SPEED_10) { tipg_reg |= 0xFF; /* Reduce Rx latency in analog PHY */ emi_val = 0; } else if (hw->mac.type >= e1000_pch_spt && duplex == FULL_DUPLEX && speed != SPEED_1000) { tipg_reg |= 0xC; emi_val = 1; } else { /* Roll back the default values */ tipg_reg |= 0x08; emi_val = 1; } E1000_WRITE_REG(hw, E1000_TIPG, tipg_reg); ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; if (hw->mac.type == e1000_pch2lan) emi_addr = I82579_RX_CONFIG; else emi_addr = I217_RX_CONFIG; ret_val = e1000_write_emi_reg_locked(hw, emi_addr, emi_val); if (hw->mac.type >= e1000_pch_lpt) { u16 phy_reg; hw->phy.ops.read_reg_locked(hw, I217_PLL_CLOCK_GATE_REG, &phy_reg); phy_reg &= ~I217_PLL_CLOCK_GATE_MASK; if (speed == SPEED_100 || speed == SPEED_10) phy_reg |= 0x3E8; else phy_reg |= 0xFA; hw->phy.ops.write_reg_locked(hw, I217_PLL_CLOCK_GATE_REG, phy_reg); if (speed == SPEED_1000) { hw->phy.ops.read_reg_locked(hw, HV_PM_CTRL, &phy_reg); phy_reg |= HV_PM_CTRL_K1_CLK_REQ; hw->phy.ops.write_reg_locked(hw, HV_PM_CTRL, phy_reg); } } hw->phy.ops.release(hw); if (ret_val) return ret_val; if (hw->mac.type >= e1000_pch_spt) { u16 data; u16 ptr_gap; if (speed == SPEED_1000) { ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; ret_val = hw->phy.ops.read_reg_locked(hw, PHY_REG(776, 20), &data); if (ret_val) { hw->phy.ops.release(hw); return ret_val; } ptr_gap = (data & (0x3FF << 2)) >> 2; if (ptr_gap < 0x18) { data &= ~(0x3FF << 2); data |= (0x18 << 2); ret_val = hw->phy.ops.write_reg_locked(hw, PHY_REG(776, 20), data); } hw->phy.ops.release(hw); if (ret_val) return ret_val; } else { ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; ret_val = hw->phy.ops.write_reg_locked(hw, PHY_REG(776, 20), 0xC023); hw->phy.ops.release(hw); if (ret_val) return ret_val; } } } /* I217 Packet Loss issue: * ensure that FEXTNVM4 Beacon Duration is set correctly * on power up. * Set the Beacon Duration for I217 to 8 usec */ if (hw->mac.type >= e1000_pch_lpt) { u32 mac_reg; mac_reg = E1000_READ_REG(hw, E1000_FEXTNVM4); mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK; mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC; E1000_WRITE_REG(hw, E1000_FEXTNVM4, mac_reg); } /* Work-around I218 hang issue */ if ((hw->device_id == E1000_DEV_ID_PCH_LPTLP_I218_LM) || (hw->device_id == E1000_DEV_ID_PCH_LPTLP_I218_V) || (hw->device_id == E1000_DEV_ID_PCH_I218_LM3) || (hw->device_id == E1000_DEV_ID_PCH_I218_V3)) { ret_val = e1000_k1_workaround_lpt_lp(hw, link); if (ret_val) return ret_val; } if (hw->mac.type >= e1000_pch_lpt) { /* Set platform power management values for * Latency Tolerance Reporting (LTR) * Optimized Buffer Flush/Fill (OBFF) */ ret_val = e1000_platform_pm_pch_lpt(hw, link); if (ret_val) return ret_val; } /* Clear link partner's EEE ability */ hw->dev_spec.ich8lan.eee_lp_ability = 0; if (hw->mac.type >= e1000_pch_lpt) { u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6); if (hw->mac.type == e1000_pch_spt) { /* FEXTNVM6 K1-off workaround - for SPT only */ u32 pcieanacfg = E1000_READ_REG(hw, E1000_PCIEANACFG); if (pcieanacfg & E1000_FEXTNVM6_K1_OFF_ENABLE) fextnvm6 |= E1000_FEXTNVM6_K1_OFF_ENABLE; else fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE; } if (hw->dev_spec.ich8lan.disable_k1_off == TRUE) fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE; E1000_WRITE_REG(hw, E1000_FEXTNVM6, fextnvm6); } if (!link) return E1000_SUCCESS; /* No link detected */ mac->get_link_status = FALSE; switch (hw->mac.type) { case e1000_pch2lan: ret_val = e1000_k1_workaround_lv(hw); if (ret_val) return ret_val; /* fall-thru */ case e1000_pchlan: if (hw->phy.type == e1000_phy_82578) { ret_val = e1000_link_stall_workaround_hv(hw); if (ret_val) return ret_val; } /* Workaround for PCHx parts in half-duplex: * Set the number of preambles removed from the packet * when it is passed from the PHY to the MAC to prevent * the MAC from misinterpreting the packet type. */ hw->phy.ops.read_reg(hw, HV_KMRN_FIFO_CTRLSTA, &phy_reg); phy_reg &= ~HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK; if ((E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FD) != E1000_STATUS_FD) phy_reg |= (1 << HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT); hw->phy.ops.write_reg(hw, HV_KMRN_FIFO_CTRLSTA, phy_reg); break; default: break; } /* Check if there was DownShift, must be checked * immediately after link-up */ e1000_check_downshift_generic(hw); /* Enable/Disable EEE after link up */ if (hw->phy.type > e1000_phy_82579) { ret_val = e1000_set_eee_pchlan(hw); if (ret_val) return ret_val; } /* If we are forcing speed/duplex, then we simply return since * we have already determined whether we have link or not. */ if (!mac->autoneg) return -E1000_ERR_CONFIG; /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to * configure Collision Distance in the MAC. */ mac->ops.config_collision_dist(hw); /* Configure Flow Control now that Auto-Neg has completed. * First, we need to restore the desired flow control * settings because we may have had to re-autoneg with a * different link partner. */ ret_val = e1000_config_fc_after_link_up_generic(hw); if (ret_val) DEBUGOUT("Error configuring flow control\n"); return ret_val; } /** * e1000_init_function_pointers_ich8lan - Initialize ICH8 function pointers * @hw: pointer to the HW structure * * Initialize family-specific function pointers for PHY, MAC, and NVM. **/ void e1000_init_function_pointers_ich8lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_init_function_pointers_ich8lan"); hw->mac.ops.init_params = e1000_init_mac_params_ich8lan; hw->nvm.ops.init_params = e1000_init_nvm_params_ich8lan; switch (hw->mac.type) { case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: hw->phy.ops.init_params = e1000_init_phy_params_ich8lan; break; case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: hw->phy.ops.init_params = e1000_init_phy_params_pchlan; break; default: break; } } /** * e1000_acquire_nvm_ich8lan - Acquire NVM mutex * @hw: pointer to the HW structure * * Acquires the mutex for performing NVM operations. **/ static s32 e1000_acquire_nvm_ich8lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_acquire_nvm_ich8lan"); - E1000_MUTEX_LOCK(&hw->dev_spec.ich8lan.nvm_mutex); + ASSERT_CTX_LOCK_HELD(hw); return E1000_SUCCESS; } /** * e1000_release_nvm_ich8lan - Release NVM mutex * @hw: pointer to the HW structure * * Releases the mutex used while performing NVM operations. **/ static void e1000_release_nvm_ich8lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_release_nvm_ich8lan"); - E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.nvm_mutex); - - return; + ASSERT_CTX_LOCK_HELD(hw); } /** * e1000_acquire_swflag_ich8lan - Acquire software control flag * @hw: pointer to the HW structure * * Acquires the software control flag for performing PHY and select * MAC CSR accesses. **/ static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw) { u32 extcnf_ctrl, timeout = PHY_CFG_TIMEOUT; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_acquire_swflag_ich8lan"); - E1000_MUTEX_LOCK(&hw->dev_spec.ich8lan.swflag_mutex); + ASSERT_CTX_LOCK_HELD(hw); while (timeout) { extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); if (!(extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG)) break; msec_delay_irq(1); timeout--; } if (!timeout) { DEBUGOUT("SW has already locked the resource.\n"); ret_val = -E1000_ERR_CONFIG; goto out; } timeout = SW_FLAG_TIMEOUT; extcnf_ctrl |= E1000_EXTCNF_CTRL_SWFLAG; E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl); while (timeout) { extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); if (extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG) break; msec_delay_irq(1); timeout--; } if (!timeout) { DEBUGOUT2("Failed to acquire the semaphore, FW or HW has it: FWSM=0x%8.8x EXTCNF_CTRL=0x%8.8x)\n", E1000_READ_REG(hw, E1000_FWSM), extcnf_ctrl); extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG; E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl); ret_val = -E1000_ERR_CONFIG; goto out; } out: - if (ret_val) - E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex); - return ret_val; } /** * e1000_release_swflag_ich8lan - Release software control flag * @hw: pointer to the HW structure * * Releases the software control flag for performing PHY and select * MAC CSR accesses. **/ static void e1000_release_swflag_ich8lan(struct e1000_hw *hw) { u32 extcnf_ctrl; DEBUGFUNC("e1000_release_swflag_ich8lan"); extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); if (extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG) { extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG; E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl); } else { DEBUGOUT("Semaphore unexpectedly released by sw/fw/hw\n"); } - - E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex); - - return; } /** * e1000_check_mng_mode_ich8lan - Checks management mode * @hw: pointer to the HW structure * * This checks if the adapter has any manageability enabled. * This is a function pointer entry point only called by read/write * routines for the PHY and NVM parts. **/ static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw) { u32 fwsm; DEBUGFUNC("e1000_check_mng_mode_ich8lan"); fwsm = E1000_READ_REG(hw, E1000_FWSM); return (fwsm & E1000_ICH_FWSM_FW_VALID) && ((fwsm & E1000_FWSM_MODE_MASK) == (E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT)); } /** * e1000_check_mng_mode_pchlan - Checks management mode * @hw: pointer to the HW structure * * This checks if the adapter has iAMT enabled. * This is a function pointer entry point only called by read/write * routines for the PHY and NVM parts. **/ static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw) { u32 fwsm; DEBUGFUNC("e1000_check_mng_mode_pchlan"); fwsm = E1000_READ_REG(hw, E1000_FWSM); return (fwsm & E1000_ICH_FWSM_FW_VALID) && (fwsm & (E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT)); } /** * e1000_rar_set_pch2lan - Set receive address register * @hw: pointer to the HW structure * @addr: pointer to the receive address * @index: receive address array register * * Sets the receive address array register at index to the address passed * in by addr. For 82579, RAR[0] is the base address register that is to * contain the MAC address but RAR[1-6] are reserved for manageability (ME). * Use SHRA[0-3] in place of those reserved for ME. **/ static int e1000_rar_set_pch2lan(struct e1000_hw *hw, u8 *addr, u32 index) { u32 rar_low, rar_high; DEBUGFUNC("e1000_rar_set_pch2lan"); /* HW expects these in little endian so we reverse the byte order * from network order (big endian) to little endian */ rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); /* If MAC address zero, no need to set the AV bit */ if (rar_low || rar_high) rar_high |= E1000_RAH_AV; if (index == 0) { E1000_WRITE_REG(hw, E1000_RAL(index), rar_low); E1000_WRITE_FLUSH(hw); E1000_WRITE_REG(hw, E1000_RAH(index), rar_high); E1000_WRITE_FLUSH(hw); return E1000_SUCCESS; } /* RAR[1-6] are owned by manageability. Skip those and program the * next address into the SHRA register array. */ if (index < (u32) (hw->mac.rar_entry_count)) { s32 ret_val; ret_val = e1000_acquire_swflag_ich8lan(hw); if (ret_val) goto out; E1000_WRITE_REG(hw, E1000_SHRAL(index - 1), rar_low); E1000_WRITE_FLUSH(hw); E1000_WRITE_REG(hw, E1000_SHRAH(index - 1), rar_high); E1000_WRITE_FLUSH(hw); e1000_release_swflag_ich8lan(hw); /* verify the register updates */ if ((E1000_READ_REG(hw, E1000_SHRAL(index - 1)) == rar_low) && (E1000_READ_REG(hw, E1000_SHRAH(index - 1)) == rar_high)) return E1000_SUCCESS; DEBUGOUT2("SHRA[%d] might be locked by ME - FWSM=0x%8.8x\n", (index - 1), E1000_READ_REG(hw, E1000_FWSM)); } out: DEBUGOUT1("Failed to write receive address at index %d\n", index); return -E1000_ERR_CONFIG; } /** * e1000_rar_set_pch_lpt - Set receive address registers * @hw: pointer to the HW structure * @addr: pointer to the receive address * @index: receive address array register * * Sets the receive address register array at index to the address passed * in by addr. For LPT, RAR[0] is the base address register that is to * contain the MAC address. SHRA[0-10] are the shared receive address * registers that are shared between the Host and manageability engine (ME). **/ static int e1000_rar_set_pch_lpt(struct e1000_hw *hw, u8 *addr, u32 index) { u32 rar_low, rar_high; u32 wlock_mac; DEBUGFUNC("e1000_rar_set_pch_lpt"); /* HW expects these in little endian so we reverse the byte order * from network order (big endian) to little endian */ rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); /* If MAC address zero, no need to set the AV bit */ if (rar_low || rar_high) rar_high |= E1000_RAH_AV; if (index == 0) { E1000_WRITE_REG(hw, E1000_RAL(index), rar_low); E1000_WRITE_FLUSH(hw); E1000_WRITE_REG(hw, E1000_RAH(index), rar_high); E1000_WRITE_FLUSH(hw); return E1000_SUCCESS; } /* The manageability engine (ME) can lock certain SHRAR registers that * it is using - those registers are unavailable for use. */ if (index < hw->mac.rar_entry_count) { wlock_mac = E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_WLOCK_MAC_MASK; wlock_mac >>= E1000_FWSM_WLOCK_MAC_SHIFT; /* Check if all SHRAR registers are locked */ if (wlock_mac == 1) goto out; if ((wlock_mac == 0) || (index <= wlock_mac)) { s32 ret_val; ret_val = e1000_acquire_swflag_ich8lan(hw); if (ret_val) goto out; E1000_WRITE_REG(hw, E1000_SHRAL_PCH_LPT(index - 1), rar_low); E1000_WRITE_FLUSH(hw); E1000_WRITE_REG(hw, E1000_SHRAH_PCH_LPT(index - 1), rar_high); E1000_WRITE_FLUSH(hw); e1000_release_swflag_ich8lan(hw); /* verify the register updates */ if ((E1000_READ_REG(hw, E1000_SHRAL_PCH_LPT(index - 1)) == rar_low) && (E1000_READ_REG(hw, E1000_SHRAH_PCH_LPT(index - 1)) == rar_high)) return E1000_SUCCESS; } } out: DEBUGOUT1("Failed to write receive address at index %d\n", index); return -E1000_ERR_CONFIG; } /** * e1000_update_mc_addr_list_pch2lan - Update Multicast addresses * @hw: pointer to the HW structure * @mc_addr_list: array of multicast addresses to program * @mc_addr_count: number of multicast addresses to program * * Updates entire Multicast Table Array of the PCH2 MAC and PHY. * The caller must have a packed mc_addr_list of multicast addresses. **/ static void e1000_update_mc_addr_list_pch2lan(struct e1000_hw *hw, u8 *mc_addr_list, u32 mc_addr_count) { u16 phy_reg = 0; int i; s32 ret_val; DEBUGFUNC("e1000_update_mc_addr_list_pch2lan"); e1000_update_mc_addr_list_generic(hw, mc_addr_list, mc_addr_count); ret_val = hw->phy.ops.acquire(hw); if (ret_val) return; ret_val = e1000_enable_phy_wakeup_reg_access_bm(hw, &phy_reg); if (ret_val) goto release; for (i = 0; i < hw->mac.mta_reg_count; i++) { hw->phy.ops.write_reg_page(hw, BM_MTA(i), (u16)(hw->mac.mta_shadow[i] & 0xFFFF)); hw->phy.ops.write_reg_page(hw, (BM_MTA(i) + 1), (u16)((hw->mac.mta_shadow[i] >> 16) & 0xFFFF)); } e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg); release: hw->phy.ops.release(hw); } /** * e1000_check_reset_block_ich8lan - Check if PHY reset is blocked * @hw: pointer to the HW structure * * Checks if firmware is blocking the reset of the PHY. * This is a function pointer entry point only called by * reset routines. **/ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw) { u32 fwsm; bool blocked = FALSE; int i = 0; DEBUGFUNC("e1000_check_reset_block_ich8lan"); do { fwsm = E1000_READ_REG(hw, E1000_FWSM); if (!(fwsm & E1000_ICH_FWSM_RSPCIPHY)) { blocked = TRUE; msec_delay(10); continue; } blocked = FALSE; } while (blocked && (i++ < 30)); return blocked ? E1000_BLK_PHY_RESET : E1000_SUCCESS; } /** * e1000_write_smbus_addr - Write SMBus address to PHY needed during Sx states * @hw: pointer to the HW structure * * Assumes semaphore already acquired. * **/ static s32 e1000_write_smbus_addr(struct e1000_hw *hw) { u16 phy_data; u32 strap = E1000_READ_REG(hw, E1000_STRAP); u32 freq = (strap & E1000_STRAP_SMT_FREQ_MASK) >> E1000_STRAP_SMT_FREQ_SHIFT; s32 ret_val; strap &= E1000_STRAP_SMBUS_ADDRESS_MASK; ret_val = e1000_read_phy_reg_hv_locked(hw, HV_SMB_ADDR, &phy_data); if (ret_val) return ret_val; phy_data &= ~HV_SMB_ADDR_MASK; phy_data |= (strap >> E1000_STRAP_SMBUS_ADDRESS_SHIFT); phy_data |= HV_SMB_ADDR_PEC_EN | HV_SMB_ADDR_VALID; if (hw->phy.type == e1000_phy_i217) { /* Restore SMBus frequency */ if (freq--) { phy_data &= ~HV_SMB_ADDR_FREQ_MASK; phy_data |= (freq & (1 << 0)) << HV_SMB_ADDR_FREQ_LOW_SHIFT; phy_data |= (freq & (1 << 1)) << (HV_SMB_ADDR_FREQ_HIGH_SHIFT - 1); } else { DEBUGOUT("Unsupported SMB frequency in PHY\n"); } } return e1000_write_phy_reg_hv_locked(hw, HV_SMB_ADDR, phy_data); } /** * e1000_sw_lcd_config_ich8lan - SW-based LCD Configuration * @hw: pointer to the HW structure * * SW should configure the LCD from the NVM extended configuration region * as a workaround for certain parts. **/ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; u32 i, data, cnf_size, cnf_base_addr, sw_cfg_mask; s32 ret_val = E1000_SUCCESS; u16 word_addr, reg_data, reg_addr, phy_page = 0; DEBUGFUNC("e1000_sw_lcd_config_ich8lan"); /* Initialize the PHY from the NVM on ICH platforms. This * is needed due to an issue where the NVM configuration is * not properly autoloaded after power transitions. * Therefore, after each PHY reset, we will load the * configuration data out of the NVM manually. */ switch (hw->mac.type) { case e1000_ich8lan: if (phy->type != e1000_phy_igp_3) return ret_val; if ((hw->device_id == E1000_DEV_ID_ICH8_IGP_AMT) || (hw->device_id == E1000_DEV_ID_ICH8_IGP_C)) { sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG; break; } /* Fall-thru */ case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M; break; default: return ret_val; } ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; data = E1000_READ_REG(hw, E1000_FEXTNVM); if (!(data & sw_cfg_mask)) goto release; /* Make sure HW does not configure LCD from PHY * extended configuration before SW configuration */ data = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); if ((hw->mac.type < e1000_pch2lan) && (data & E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE)) goto release; cnf_size = E1000_READ_REG(hw, E1000_EXTCNF_SIZE); cnf_size &= E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK; cnf_size >>= E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT; if (!cnf_size) goto release; cnf_base_addr = data & E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK; cnf_base_addr >>= E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT; if (((hw->mac.type == e1000_pchlan) && !(data & E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE)) || (hw->mac.type > e1000_pchlan)) { /* HW configures the SMBus address and LEDs when the * OEM and LCD Write Enable bits are set in the NVM. * When both NVM bits are cleared, SW will configure * them instead. */ ret_val = e1000_write_smbus_addr(hw); if (ret_val) goto release; data = E1000_READ_REG(hw, E1000_LEDCTL); ret_val = e1000_write_phy_reg_hv_locked(hw, HV_LED_CONFIG, (u16)data); if (ret_val) goto release; } /* Configure LCD from extended configuration region. */ /* cnf_base_addr is in DWORD */ word_addr = (u16)(cnf_base_addr << 1); for (i = 0; i < cnf_size; i++) { ret_val = hw->nvm.ops.read(hw, (word_addr + i * 2), 1, ®_data); if (ret_val) goto release; ret_val = hw->nvm.ops.read(hw, (word_addr + i * 2 + 1), 1, ®_addr); if (ret_val) goto release; /* Save off the PHY page for future writes. */ if (reg_addr == IGP01E1000_PHY_PAGE_SELECT) { phy_page = reg_data; continue; } reg_addr &= PHY_REG_MASK; reg_addr |= phy_page; ret_val = phy->ops.write_reg_locked(hw, (u32)reg_addr, reg_data); if (ret_val) goto release; } release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_k1_gig_workaround_hv - K1 Si workaround * @hw: pointer to the HW structure * @link: link up bool flag * * If K1 is enabled for 1Gbps, the MAC might stall when transitioning * from a lower speed. This workaround disables K1 whenever link is at 1Gig * If link is down, the function will restore the default K1 setting located * in the NVM. **/ static s32 e1000_k1_gig_workaround_hv(struct e1000_hw *hw, bool link) { s32 ret_val = E1000_SUCCESS; u16 status_reg = 0; bool k1_enable = hw->dev_spec.ich8lan.nvm_k1_enabled; DEBUGFUNC("e1000_k1_gig_workaround_hv"); if (hw->mac.type != e1000_pchlan) return E1000_SUCCESS; /* Wrap the whole flow with the sw flag */ ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; /* Disable K1 when link is 1Gbps, otherwise use the NVM setting */ if (link) { if (hw->phy.type == e1000_phy_82578) { ret_val = hw->phy.ops.read_reg_locked(hw, BM_CS_STATUS, &status_reg); if (ret_val) goto release; status_reg &= (BM_CS_STATUS_LINK_UP | BM_CS_STATUS_RESOLVED | BM_CS_STATUS_SPEED_MASK); if (status_reg == (BM_CS_STATUS_LINK_UP | BM_CS_STATUS_RESOLVED | BM_CS_STATUS_SPEED_1000)) k1_enable = FALSE; } if (hw->phy.type == e1000_phy_82577) { ret_val = hw->phy.ops.read_reg_locked(hw, HV_M_STATUS, &status_reg); if (ret_val) goto release; status_reg &= (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE | HV_M_STATUS_SPEED_MASK); if (status_reg == (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE | HV_M_STATUS_SPEED_1000)) k1_enable = FALSE; } /* Link stall fix for link up */ ret_val = hw->phy.ops.write_reg_locked(hw, PHY_REG(770, 19), 0x0100); if (ret_val) goto release; } else { /* Link stall fix for link down */ ret_val = hw->phy.ops.write_reg_locked(hw, PHY_REG(770, 19), 0x4100); if (ret_val) goto release; } ret_val = e1000_configure_k1_ich8lan(hw, k1_enable); release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_configure_k1_ich8lan - Configure K1 power state * @hw: pointer to the HW structure * @enable: K1 state to configure * * Configure the K1 power state based on the provided parameter. * Assumes semaphore already acquired. * * Success returns 0, Failure returns -E1000_ERR_PHY (-2) **/ s32 e1000_configure_k1_ich8lan(struct e1000_hw *hw, bool k1_enable) { s32 ret_val; u32 ctrl_reg = 0; u32 ctrl_ext = 0; u32 reg = 0; u16 kmrn_reg = 0; DEBUGFUNC("e1000_configure_k1_ich8lan"); ret_val = e1000_read_kmrn_reg_locked(hw, E1000_KMRNCTRLSTA_K1_CONFIG, &kmrn_reg); if (ret_val) return ret_val; if (k1_enable) kmrn_reg |= E1000_KMRNCTRLSTA_K1_ENABLE; else kmrn_reg &= ~E1000_KMRNCTRLSTA_K1_ENABLE; ret_val = e1000_write_kmrn_reg_locked(hw, E1000_KMRNCTRLSTA_K1_CONFIG, kmrn_reg); if (ret_val) return ret_val; usec_delay(20); ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ctrl_reg = E1000_READ_REG(hw, E1000_CTRL); reg = ctrl_reg & ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100); reg |= E1000_CTRL_FRCSPD; E1000_WRITE_REG(hw, E1000_CTRL, reg); E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_SPD_BYPS); E1000_WRITE_FLUSH(hw); usec_delay(20); E1000_WRITE_REG(hw, E1000_CTRL, ctrl_reg); E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); E1000_WRITE_FLUSH(hw); usec_delay(20); return E1000_SUCCESS; } /** * e1000_oem_bits_config_ich8lan - SW-based LCD Configuration * @hw: pointer to the HW structure * @d0_state: boolean if entering d0 or d3 device state * * SW will configure Gbe Disable and LPLU based on the NVM. The four bits are * collectively called OEM bits. The OEM Write Enable bit and SW Config bit * in NVM determines whether HW should configure LPLU and Gbe Disable. **/ static s32 e1000_oem_bits_config_ich8lan(struct e1000_hw *hw, bool d0_state) { s32 ret_val = 0; u32 mac_reg; u16 oem_reg; DEBUGFUNC("e1000_oem_bits_config_ich8lan"); if (hw->mac.type < e1000_pchlan) return ret_val; ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; if (hw->mac.type == e1000_pchlan) { mac_reg = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); if (mac_reg & E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE) goto release; } mac_reg = E1000_READ_REG(hw, E1000_FEXTNVM); if (!(mac_reg & E1000_FEXTNVM_SW_CONFIG_ICH8M)) goto release; mac_reg = E1000_READ_REG(hw, E1000_PHY_CTRL); ret_val = hw->phy.ops.read_reg_locked(hw, HV_OEM_BITS, &oem_reg); if (ret_val) goto release; oem_reg &= ~(HV_OEM_BITS_GBE_DIS | HV_OEM_BITS_LPLU); if (d0_state) { if (mac_reg & E1000_PHY_CTRL_GBE_DISABLE) oem_reg |= HV_OEM_BITS_GBE_DIS; if (mac_reg & E1000_PHY_CTRL_D0A_LPLU) oem_reg |= HV_OEM_BITS_LPLU; } else { if (mac_reg & (E1000_PHY_CTRL_GBE_DISABLE | E1000_PHY_CTRL_NOND0A_GBE_DISABLE)) oem_reg |= HV_OEM_BITS_GBE_DIS; if (mac_reg & (E1000_PHY_CTRL_D0A_LPLU | E1000_PHY_CTRL_NOND0A_LPLU)) oem_reg |= HV_OEM_BITS_LPLU; } /* Set Restart auto-neg to activate the bits */ if ((d0_state || (hw->mac.type != e1000_pchlan)) && !hw->phy.ops.check_reset_block(hw)) oem_reg |= HV_OEM_BITS_RESTART_AN; ret_val = hw->phy.ops.write_reg_locked(hw, HV_OEM_BITS, oem_reg); release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_set_mdio_slow_mode_hv - Set slow MDIO access mode * @hw: pointer to the HW structure **/ static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw) { s32 ret_val; u16 data; DEBUGFUNC("e1000_set_mdio_slow_mode_hv"); ret_val = hw->phy.ops.read_reg(hw, HV_KMRN_MODE_CTRL, &data); if (ret_val) return ret_val; data |= HV_KMRN_MDIO_SLOW; ret_val = hw->phy.ops.write_reg(hw, HV_KMRN_MODE_CTRL, data); return ret_val; } /** * e1000_hv_phy_workarounds_ich8lan - A series of Phy workarounds to be * done after every PHY reset. **/ static s32 e1000_hv_phy_workarounds_ich8lan(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 phy_data; DEBUGFUNC("e1000_hv_phy_workarounds_ich8lan"); if (hw->mac.type != e1000_pchlan) return E1000_SUCCESS; /* Set MDIO slow mode before any other MDIO access */ if (hw->phy.type == e1000_phy_82577) { ret_val = e1000_set_mdio_slow_mode_hv(hw); if (ret_val) return ret_val; } if (((hw->phy.type == e1000_phy_82577) && ((hw->phy.revision == 1) || (hw->phy.revision == 2))) || ((hw->phy.type == e1000_phy_82578) && (hw->phy.revision == 1))) { /* Disable generation of early preamble */ ret_val = hw->phy.ops.write_reg(hw, PHY_REG(769, 25), 0x4431); if (ret_val) return ret_val; /* Preamble tuning for SSC */ ret_val = hw->phy.ops.write_reg(hw, HV_KMRN_FIFO_CTRLSTA, 0xA204); if (ret_val) return ret_val; } if (hw->phy.type == e1000_phy_82578) { /* Return registers to default by doing a soft reset then * writing 0x3140 to the control register. */ if (hw->phy.revision < 2) { e1000_phy_sw_reset_generic(hw); ret_val = hw->phy.ops.write_reg(hw, PHY_CONTROL, 0x3140); } } /* Select page 0 */ ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; hw->phy.addr = 1; ret_val = e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, 0); hw->phy.ops.release(hw); if (ret_val) return ret_val; /* Configure the K1 Si workaround during phy reset assuming there is * link so that it disables K1 if link is in 1Gbps. */ ret_val = e1000_k1_gig_workaround_hv(hw, TRUE); if (ret_val) return ret_val; /* Workaround for link disconnects on a busy hub in half duplex */ ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; ret_val = hw->phy.ops.read_reg_locked(hw, BM_PORT_GEN_CFG, &phy_data); if (ret_val) goto release; ret_val = hw->phy.ops.write_reg_locked(hw, BM_PORT_GEN_CFG, phy_data & 0x00FF); if (ret_val) goto release; /* set MSE higher to enable link to stay up when noise is high */ ret_val = e1000_write_emi_reg_locked(hw, I82577_MSE_THRESHOLD, 0x0034); release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_copy_rx_addrs_to_phy_ich8lan - Copy Rx addresses from MAC to PHY * @hw: pointer to the HW structure **/ void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw) { u32 mac_reg; u16 i, phy_reg = 0; s32 ret_val; DEBUGFUNC("e1000_copy_rx_addrs_to_phy_ich8lan"); ret_val = hw->phy.ops.acquire(hw); if (ret_val) return; ret_val = e1000_enable_phy_wakeup_reg_access_bm(hw, &phy_reg); if (ret_val) goto release; /* Copy both RAL/H (rar_entry_count) and SHRAL/H to PHY */ for (i = 0; i < (hw->mac.rar_entry_count); i++) { mac_reg = E1000_READ_REG(hw, E1000_RAL(i)); hw->phy.ops.write_reg_page(hw, BM_RAR_L(i), (u16)(mac_reg & 0xFFFF)); hw->phy.ops.write_reg_page(hw, BM_RAR_M(i), (u16)((mac_reg >> 16) & 0xFFFF)); mac_reg = E1000_READ_REG(hw, E1000_RAH(i)); hw->phy.ops.write_reg_page(hw, BM_RAR_H(i), (u16)(mac_reg & 0xFFFF)); hw->phy.ops.write_reg_page(hw, BM_RAR_CTRL(i), (u16)((mac_reg & E1000_RAH_AV) >> 16)); } e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg); release: hw->phy.ops.release(hw); } static u32 e1000_calc_rx_da_crc(u8 mac[]) { u32 poly = 0xEDB88320; /* Polynomial for 802.3 CRC calculation */ u32 i, j, mask, crc; DEBUGFUNC("e1000_calc_rx_da_crc"); crc = 0xffffffff; for (i = 0; i < 6; i++) { crc = crc ^ mac[i]; for (j = 8; j > 0; j--) { mask = (crc & 1) * (-1); crc = (crc >> 1) ^ (poly & mask); } } return ~crc; } /** * e1000_lv_jumbo_workaround_ich8lan - required for jumbo frame operation * with 82579 PHY * @hw: pointer to the HW structure * @enable: flag to enable/disable workaround when enabling/disabling jumbos **/ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable) { s32 ret_val = E1000_SUCCESS; u16 phy_reg, data; u32 mac_reg; u16 i; DEBUGFUNC("e1000_lv_jumbo_workaround_ich8lan"); if (hw->mac.type < e1000_pch2lan) return E1000_SUCCESS; /* disable Rx path while enabling/disabling workaround */ hw->phy.ops.read_reg(hw, PHY_REG(769, 20), &phy_reg); ret_val = hw->phy.ops.write_reg(hw, PHY_REG(769, 20), phy_reg | (1 << 14)); if (ret_val) return ret_val; if (enable) { /* Write Rx addresses (rar_entry_count for RAL/H, and * SHRAL/H) and initial CRC values to the MAC */ for (i = 0; i < hw->mac.rar_entry_count; i++) { u8 mac_addr[ETH_ADDR_LEN] = {0}; u32 addr_high, addr_low; addr_high = E1000_READ_REG(hw, E1000_RAH(i)); if (!(addr_high & E1000_RAH_AV)) continue; addr_low = E1000_READ_REG(hw, E1000_RAL(i)); mac_addr[0] = (addr_low & 0xFF); mac_addr[1] = ((addr_low >> 8) & 0xFF); mac_addr[2] = ((addr_low >> 16) & 0xFF); mac_addr[3] = ((addr_low >> 24) & 0xFF); mac_addr[4] = (addr_high & 0xFF); mac_addr[5] = ((addr_high >> 8) & 0xFF); E1000_WRITE_REG(hw, E1000_PCH_RAICC(i), e1000_calc_rx_da_crc(mac_addr)); } /* Write Rx addresses to the PHY */ e1000_copy_rx_addrs_to_phy_ich8lan(hw); /* Enable jumbo frame workaround in the MAC */ mac_reg = E1000_READ_REG(hw, E1000_FFLT_DBG); mac_reg &= ~(1 << 14); mac_reg |= (7 << 15); E1000_WRITE_REG(hw, E1000_FFLT_DBG, mac_reg); mac_reg = E1000_READ_REG(hw, E1000_RCTL); mac_reg |= E1000_RCTL_SECRC; E1000_WRITE_REG(hw, E1000_RCTL, mac_reg); ret_val = e1000_read_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_CTRL_OFFSET, &data); if (ret_val) return ret_val; ret_val = e1000_write_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_CTRL_OFFSET, data | (1 << 0)); if (ret_val) return ret_val; ret_val = e1000_read_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_HD_CTRL, &data); if (ret_val) return ret_val; data &= ~(0xF << 8); data |= (0xB << 8); ret_val = e1000_write_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_HD_CTRL, data); if (ret_val) return ret_val; /* Enable jumbo frame workaround in the PHY */ hw->phy.ops.read_reg(hw, PHY_REG(769, 23), &data); data &= ~(0x7F << 5); data |= (0x37 << 5); ret_val = hw->phy.ops.write_reg(hw, PHY_REG(769, 23), data); if (ret_val) return ret_val; hw->phy.ops.read_reg(hw, PHY_REG(769, 16), &data); data &= ~(1 << 13); ret_val = hw->phy.ops.write_reg(hw, PHY_REG(769, 16), data); if (ret_val) return ret_val; hw->phy.ops.read_reg(hw, PHY_REG(776, 20), &data); data &= ~(0x3FF << 2); data |= (E1000_TX_PTR_GAP << 2); ret_val = hw->phy.ops.write_reg(hw, PHY_REG(776, 20), data); if (ret_val) return ret_val; ret_val = hw->phy.ops.write_reg(hw, PHY_REG(776, 23), 0xF100); if (ret_val) return ret_val; hw->phy.ops.read_reg(hw, HV_PM_CTRL, &data); ret_val = hw->phy.ops.write_reg(hw, HV_PM_CTRL, data | (1 << 10)); if (ret_val) return ret_val; } else { /* Write MAC register values back to h/w defaults */ mac_reg = E1000_READ_REG(hw, E1000_FFLT_DBG); mac_reg &= ~(0xF << 14); E1000_WRITE_REG(hw, E1000_FFLT_DBG, mac_reg); mac_reg = E1000_READ_REG(hw, E1000_RCTL); mac_reg &= ~E1000_RCTL_SECRC; E1000_WRITE_REG(hw, E1000_RCTL, mac_reg); ret_val = e1000_read_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_CTRL_OFFSET, &data); if (ret_val) return ret_val; ret_val = e1000_write_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_CTRL_OFFSET, data & ~(1 << 0)); if (ret_val) return ret_val; ret_val = e1000_read_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_HD_CTRL, &data); if (ret_val) return ret_val; data &= ~(0xF << 8); data |= (0xB << 8); ret_val = e1000_write_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_HD_CTRL, data); if (ret_val) return ret_val; /* Write PHY register values back to h/w defaults */ hw->phy.ops.read_reg(hw, PHY_REG(769, 23), &data); data &= ~(0x7F << 5); ret_val = hw->phy.ops.write_reg(hw, PHY_REG(769, 23), data); if (ret_val) return ret_val; hw->phy.ops.read_reg(hw, PHY_REG(769, 16), &data); data |= (1 << 13); ret_val = hw->phy.ops.write_reg(hw, PHY_REG(769, 16), data); if (ret_val) return ret_val; hw->phy.ops.read_reg(hw, PHY_REG(776, 20), &data); data &= ~(0x3FF << 2); data |= (0x8 << 2); ret_val = hw->phy.ops.write_reg(hw, PHY_REG(776, 20), data); if (ret_val) return ret_val; ret_val = hw->phy.ops.write_reg(hw, PHY_REG(776, 23), 0x7E00); if (ret_val) return ret_val; hw->phy.ops.read_reg(hw, HV_PM_CTRL, &data); ret_val = hw->phy.ops.write_reg(hw, HV_PM_CTRL, data & ~(1 << 10)); if (ret_val) return ret_val; } /* re-enable Rx path after enabling/disabling workaround */ return hw->phy.ops.write_reg(hw, PHY_REG(769, 20), phy_reg & ~(1 << 14)); } /** * e1000_lv_phy_workarounds_ich8lan - A series of Phy workarounds to be * done after every PHY reset. **/ static s32 e1000_lv_phy_workarounds_ich8lan(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_lv_phy_workarounds_ich8lan"); if (hw->mac.type != e1000_pch2lan) return E1000_SUCCESS; /* Set MDIO slow mode before any other MDIO access */ ret_val = e1000_set_mdio_slow_mode_hv(hw); if (ret_val) return ret_val; ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; /* set MSE higher to enable link to stay up when noise is high */ ret_val = e1000_write_emi_reg_locked(hw, I82579_MSE_THRESHOLD, 0x0034); if (ret_val) goto release; /* drop link after 5 times MSE threshold was reached */ ret_val = e1000_write_emi_reg_locked(hw, I82579_MSE_LINK_DOWN, 0x0005); release: hw->phy.ops.release(hw); return ret_val; } /** * e1000_k1_gig_workaround_lv - K1 Si workaround * @hw: pointer to the HW structure * * Workaround to set the K1 beacon duration for 82579 parts in 10Mbps * Disable K1 for 1000 and 100 speeds **/ static s32 e1000_k1_workaround_lv(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 status_reg = 0; DEBUGFUNC("e1000_k1_workaround_lv"); if (hw->mac.type != e1000_pch2lan) return E1000_SUCCESS; /* Set K1 beacon duration based on 10Mbs speed */ ret_val = hw->phy.ops.read_reg(hw, HV_M_STATUS, &status_reg); if (ret_val) return ret_val; if ((status_reg & (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE)) == (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE)) { if (status_reg & (HV_M_STATUS_SPEED_1000 | HV_M_STATUS_SPEED_100)) { u16 pm_phy_reg; /* LV 1G/100 Packet drop issue wa */ ret_val = hw->phy.ops.read_reg(hw, HV_PM_CTRL, &pm_phy_reg); if (ret_val) return ret_val; pm_phy_reg &= ~HV_PM_CTRL_K1_ENABLE; ret_val = hw->phy.ops.write_reg(hw, HV_PM_CTRL, pm_phy_reg); if (ret_val) return ret_val; } else { u32 mac_reg; mac_reg = E1000_READ_REG(hw, E1000_FEXTNVM4); mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK; mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_16USEC; E1000_WRITE_REG(hw, E1000_FEXTNVM4, mac_reg); } } return ret_val; } /** * e1000_gate_hw_phy_config_ich8lan - disable PHY config via hardware * @hw: pointer to the HW structure * @gate: boolean set to TRUE to gate, FALSE to ungate * * Gate/ungate the automatic PHY configuration via hardware; perform * the configuration via software instead. **/ static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate) { u32 extcnf_ctrl; DEBUGFUNC("e1000_gate_hw_phy_config_ich8lan"); if (hw->mac.type < e1000_pch2lan) return; extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); if (gate) extcnf_ctrl |= E1000_EXTCNF_CTRL_GATE_PHY_CFG; else extcnf_ctrl &= ~E1000_EXTCNF_CTRL_GATE_PHY_CFG; E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl); } /** * e1000_lan_init_done_ich8lan - Check for PHY config completion * @hw: pointer to the HW structure * * Check the appropriate indication the MAC has finished configuring the * PHY after a software reset. **/ static void e1000_lan_init_done_ich8lan(struct e1000_hw *hw) { u32 data, loop = E1000_ICH8_LAN_INIT_TIMEOUT; DEBUGFUNC("e1000_lan_init_done_ich8lan"); /* Wait for basic configuration completes before proceeding */ do { data = E1000_READ_REG(hw, E1000_STATUS); data &= E1000_STATUS_LAN_INIT_DONE; usec_delay(100); } while ((!data) && --loop); /* If basic configuration is incomplete before the above loop * count reaches 0, loading the configuration from NVM will * leave the PHY in a bad state possibly resulting in no link. */ if (loop == 0) DEBUGOUT("LAN_INIT_DONE not set, increase timeout\n"); /* Clear the Init Done bit for the next init event */ data = E1000_READ_REG(hw, E1000_STATUS); data &= ~E1000_STATUS_LAN_INIT_DONE; E1000_WRITE_REG(hw, E1000_STATUS, data); } /** * e1000_post_phy_reset_ich8lan - Perform steps required after a PHY reset * @hw: pointer to the HW structure **/ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u16 reg; DEBUGFUNC("e1000_post_phy_reset_ich8lan"); if (hw->phy.ops.check_reset_block(hw)) return E1000_SUCCESS; /* Allow time for h/w to get to quiescent state after reset */ msec_delay(10); /* Perform any necessary post-reset workarounds */ switch (hw->mac.type) { case e1000_pchlan: ret_val = e1000_hv_phy_workarounds_ich8lan(hw); if (ret_val) return ret_val; break; case e1000_pch2lan: ret_val = e1000_lv_phy_workarounds_ich8lan(hw); if (ret_val) return ret_val; break; default: break; } /* Clear the host wakeup bit after lcd reset */ if (hw->mac.type >= e1000_pchlan) { hw->phy.ops.read_reg(hw, BM_PORT_GEN_CFG, ®); reg &= ~BM_WUC_HOST_WU_BIT; hw->phy.ops.write_reg(hw, BM_PORT_GEN_CFG, reg); } /* Configure the LCD with the extended configuration region in NVM */ ret_val = e1000_sw_lcd_config_ich8lan(hw); if (ret_val) return ret_val; /* Configure the LCD with the OEM bits in NVM */ ret_val = e1000_oem_bits_config_ich8lan(hw, TRUE); if (hw->mac.type == e1000_pch2lan) { /* Ungate automatic PHY configuration on non-managed 82579 */ if (!(E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID)) { msec_delay(10); e1000_gate_hw_phy_config_ich8lan(hw, FALSE); } /* Set EEE LPI Update Timer to 200usec */ ret_val = hw->phy.ops.acquire(hw); if (ret_val) return ret_val; ret_val = e1000_write_emi_reg_locked(hw, I82579_LPI_UPDATE_TIMER, 0x1387); hw->phy.ops.release(hw); } return ret_val; } /** * e1000_phy_hw_reset_ich8lan - Performs a PHY reset * @hw: pointer to the HW structure * * Resets the PHY * This is a function pointer entry point called by drivers * or other shared routines. **/ static s32 e1000_phy_hw_reset_ich8lan(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_phy_hw_reset_ich8lan"); /* Gate automatic PHY configuration by hardware on non-managed 82579 */ if ((hw->mac.type == e1000_pch2lan) && !(E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID)) e1000_gate_hw_phy_config_ich8lan(hw, TRUE); ret_val = e1000_phy_hw_reset_generic(hw); if (ret_val) return ret_val; return e1000_post_phy_reset_ich8lan(hw); } /** * e1000_set_lplu_state_pchlan - Set Low Power Link Up state * @hw: pointer to the HW structure * @active: TRUE to enable LPLU, FALSE to disable * * Sets the LPLU state according to the active flag. For PCH, if OEM write * bit are disabled in the NVM, writing the LPLU bits in the MAC will not set * the phy speed. This function will manually set the LPLU bit and restart * auto-neg as hw would do. D3 and D0 LPLU will call the same function * since it configures the same bit. **/ static s32 e1000_set_lplu_state_pchlan(struct e1000_hw *hw, bool active) { s32 ret_val; u16 oem_reg; DEBUGFUNC("e1000_set_lplu_state_pchlan"); ret_val = hw->phy.ops.read_reg(hw, HV_OEM_BITS, &oem_reg); if (ret_val) return ret_val; if (active) oem_reg |= HV_OEM_BITS_LPLU; else oem_reg &= ~HV_OEM_BITS_LPLU; if (!hw->phy.ops.check_reset_block(hw)) oem_reg |= HV_OEM_BITS_RESTART_AN; return hw->phy.ops.write_reg(hw, HV_OEM_BITS, oem_reg); } /** * e1000_set_d0_lplu_state_ich8lan - Set Low Power Linkup D0 state * @hw: pointer to the HW structure * @active: TRUE to enable LPLU, FALSE to disable * * Sets the LPLU D0 state according to the active flag. When * activating LPLU this function also disables smart speed * and vice versa. LPLU will not be activated unless the * device autonegotiation advertisement meets standards of * either 10 or 10/100 or 10/100/1000 at all duplexes. * This is a function pointer entry point only called by * PHY setup routines. **/ static s32 e1000_set_d0_lplu_state_ich8lan(struct e1000_hw *hw, bool active) { struct e1000_phy_info *phy = &hw->phy; u32 phy_ctrl; s32 ret_val = E1000_SUCCESS; u16 data; DEBUGFUNC("e1000_set_d0_lplu_state_ich8lan"); if (phy->type == e1000_phy_ife) return E1000_SUCCESS; phy_ctrl = E1000_READ_REG(hw, E1000_PHY_CTRL); if (active) { phy_ctrl |= E1000_PHY_CTRL_D0A_LPLU; E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl); if (phy->type != e1000_phy_igp_3) return E1000_SUCCESS; /* Call gig speed drop workaround on LPLU before accessing * any PHY registers */ if (hw->mac.type == e1000_ich8lan) e1000_gig_downshift_workaround_ich8lan(hw); /* When LPLU is enabled, we should disable SmartSpeed */ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) return ret_val; } else { phy_ctrl &= ~E1000_PHY_CTRL_D0A_LPLU; E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl); if (phy->type != e1000_phy_igp_3) return E1000_SUCCESS; /* LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. */ if (phy->smart_speed == e1000_smart_speed_on) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data |= IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) return ret_val; } else if (phy->smart_speed == e1000_smart_speed_off) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) return ret_val; } } return E1000_SUCCESS; } /** * e1000_set_d3_lplu_state_ich8lan - Set Low Power Linkup D3 state * @hw: pointer to the HW structure * @active: TRUE to enable LPLU, FALSE to disable * * Sets the LPLU D3 state according to the active flag. When * activating LPLU this function also disables smart speed * and vice versa. LPLU will not be activated unless the * device autonegotiation advertisement meets standards of * either 10 or 10/100 or 10/100/1000 at all duplexes. * This is a function pointer entry point only called by * PHY setup routines. **/ static s32 e1000_set_d3_lplu_state_ich8lan(struct e1000_hw *hw, bool active) { struct e1000_phy_info *phy = &hw->phy; u32 phy_ctrl; s32 ret_val = E1000_SUCCESS; u16 data; DEBUGFUNC("e1000_set_d3_lplu_state_ich8lan"); phy_ctrl = E1000_READ_REG(hw, E1000_PHY_CTRL); if (!active) { phy_ctrl &= ~E1000_PHY_CTRL_NOND0A_LPLU; E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl); if (phy->type != e1000_phy_igp_3) return E1000_SUCCESS; /* LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable * SmartSpeed, so performance is maintained. */ if (phy->smart_speed == e1000_smart_speed_on) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data |= IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) return ret_val; } else if (phy->smart_speed == e1000_smart_speed_off) { ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); if (ret_val) return ret_val; } } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { phy_ctrl |= E1000_PHY_CTRL_NOND0A_LPLU; E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl); if (phy->type != e1000_phy_igp_3) return E1000_SUCCESS; /* Call gig speed drop workaround on LPLU before accessing * any PHY registers */ if (hw->mac.type == e1000_ich8lan) e1000_gig_downshift_workaround_ich8lan(hw); /* When LPLU is enabled, we should disable SmartSpeed */ ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, &data); if (ret_val) return ret_val; data &= ~IGP01E1000_PSCFR_SMART_SPEED; ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, data); } return ret_val; } /** * e1000_valid_nvm_bank_detect_ich8lan - finds out the valid bank 0 or 1 * @hw: pointer to the HW structure * @bank: pointer to the variable that returns the active bank * * Reads signature byte from the NVM using the flash access registers. * Word 0x13 bits 15:14 = 10b indicate a valid signature for that bank. **/ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) { u32 eecd; struct e1000_nvm_info *nvm = &hw->nvm; u32 bank1_offset = nvm->flash_bank_size * sizeof(u16); u32 act_offset = E1000_ICH_NVM_SIG_WORD * 2 + 1; u32 nvm_dword = 0; u8 sig_byte = 0; s32 ret_val; DEBUGFUNC("e1000_valid_nvm_bank_detect_ich8lan"); switch (hw->mac.type) { case e1000_pch_spt: bank1_offset = nvm->flash_bank_size; act_offset = E1000_ICH_NVM_SIG_WORD; /* set bank to 0 in case flash read fails */ *bank = 0; /* Check bank 0 */ ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, &nvm_dword); if (ret_val) return ret_val; sig_byte = (u8)((nvm_dword & 0xFF00) >> 8); if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == E1000_ICH_NVM_SIG_VALUE) { *bank = 0; return E1000_SUCCESS; } /* Check bank 1 */ ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset + bank1_offset, &nvm_dword); if (ret_val) return ret_val; sig_byte = (u8)((nvm_dword & 0xFF00) >> 8); if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == E1000_ICH_NVM_SIG_VALUE) { *bank = 1; return E1000_SUCCESS; } DEBUGOUT("ERROR: No valid NVM bank present\n"); return -E1000_ERR_NVM; case e1000_ich8lan: case e1000_ich9lan: eecd = E1000_READ_REG(hw, E1000_EECD); if ((eecd & E1000_EECD_SEC1VAL_VALID_MASK) == E1000_EECD_SEC1VAL_VALID_MASK) { if (eecd & E1000_EECD_SEC1VAL) *bank = 1; else *bank = 0; return E1000_SUCCESS; } DEBUGOUT("Unable to determine valid NVM bank via EEC - reading flash signature\n"); /* fall-thru */ default: /* set bank to 0 in case flash read fails */ *bank = 0; /* Check bank 0 */ ret_val = e1000_read_flash_byte_ich8lan(hw, act_offset, &sig_byte); if (ret_val) return ret_val; if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == E1000_ICH_NVM_SIG_VALUE) { *bank = 0; return E1000_SUCCESS; } /* Check bank 1 */ ret_val = e1000_read_flash_byte_ich8lan(hw, act_offset + bank1_offset, &sig_byte); if (ret_val) return ret_val; if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == E1000_ICH_NVM_SIG_VALUE) { *bank = 1; return E1000_SUCCESS; } DEBUGOUT("ERROR: No valid NVM bank present\n"); return -E1000_ERR_NVM; } } /** * e1000_read_nvm_spt - NVM access for SPT * @hw: pointer to the HW structure * @offset: The offset (in bytes) of the word(s) to read. * @words: Size of data to read in words. * @data: pointer to the word(s) to read at offset. * * Reads a word(s) from the NVM **/ static s32 e1000_read_nvm_spt(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_nvm_info *nvm = &hw->nvm; struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u32 act_offset; s32 ret_val = E1000_SUCCESS; u32 bank = 0; u32 dword = 0; u16 offset_to_read; u16 i; DEBUGFUNC("e1000_read_nvm_spt"); if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) || (words == 0)) { DEBUGOUT("nvm parameter(s) out of bounds\n"); ret_val = -E1000_ERR_NVM; goto out; } nvm->ops.acquire(hw); ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank); if (ret_val != E1000_SUCCESS) { DEBUGOUT("Could not detect valid bank, assuming bank 0\n"); bank = 0; } act_offset = (bank) ? nvm->flash_bank_size : 0; act_offset += offset; ret_val = E1000_SUCCESS; for (i = 0; i < words; i += 2) { if (words - i == 1) { if (dev_spec->shadow_ram[offset+i].modified) { data[i] = dev_spec->shadow_ram[offset+i].value; } else { offset_to_read = act_offset + i - ((act_offset + i) % 2); ret_val = e1000_read_flash_dword_ich8lan(hw, offset_to_read, &dword); if (ret_val) break; if ((act_offset + i) % 2 == 0) data[i] = (u16)(dword & 0xFFFF); else data[i] = (u16)((dword >> 16) & 0xFFFF); } } else { offset_to_read = act_offset + i; if (!(dev_spec->shadow_ram[offset+i].modified) || !(dev_spec->shadow_ram[offset+i+1].modified)) { ret_val = e1000_read_flash_dword_ich8lan(hw, offset_to_read, &dword); if (ret_val) break; } if (dev_spec->shadow_ram[offset+i].modified) data[i] = dev_spec->shadow_ram[offset+i].value; else data[i] = (u16) (dword & 0xFFFF); if (dev_spec->shadow_ram[offset+i].modified) data[i+1] = dev_spec->shadow_ram[offset+i+1].value; else data[i+1] = (u16) (dword >> 16 & 0xFFFF); } } nvm->ops.release(hw); out: if (ret_val) DEBUGOUT1("NVM read error: %d\n", ret_val); return ret_val; } /** * e1000_read_nvm_ich8lan - Read word(s) from the NVM * @hw: pointer to the HW structure * @offset: The offset (in bytes) of the word(s) to read. * @words: Size of data to read in words * @data: Pointer to the word(s) to read at offset. * * Reads a word(s) from the NVM using the flash access registers. **/ static s32 e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_nvm_info *nvm = &hw->nvm; struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u32 act_offset; s32 ret_val = E1000_SUCCESS; u32 bank = 0; u16 i, word; DEBUGFUNC("e1000_read_nvm_ich8lan"); if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) || (words == 0)) { DEBUGOUT("nvm parameter(s) out of bounds\n"); ret_val = -E1000_ERR_NVM; goto out; } nvm->ops.acquire(hw); ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank); if (ret_val != E1000_SUCCESS) { DEBUGOUT("Could not detect valid bank, assuming bank 0\n"); bank = 0; } act_offset = (bank) ? nvm->flash_bank_size : 0; act_offset += offset; ret_val = E1000_SUCCESS; for (i = 0; i < words; i++) { if (dev_spec->shadow_ram[offset+i].modified) { data[i] = dev_spec->shadow_ram[offset+i].value; } else { ret_val = e1000_read_flash_word_ich8lan(hw, act_offset + i, &word); if (ret_val) break; data[i] = word; } } nvm->ops.release(hw); out: if (ret_val) DEBUGOUT1("NVM read error: %d\n", ret_val); return ret_val; } /** * e1000_flash_cycle_init_ich8lan - Initialize flash * @hw: pointer to the HW structure * * This function does initial flash setup so that a new read/write/erase cycle * can be started. **/ static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) { union ich8_hws_flash_status hsfsts; s32 ret_val = -E1000_ERR_NVM; DEBUGFUNC("e1000_flash_cycle_init_ich8lan"); hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); /* Check if the flash descriptor is valid */ if (!hsfsts.hsf_status.fldesvalid) { DEBUGOUT("Flash descriptor invalid. SW Sequencing must be used.\n"); return -E1000_ERR_NVM; } /* Clear FCERR and DAEL in hw status by writing 1 */ hsfsts.hsf_status.flcerr = 1; hsfsts.hsf_status.dael = 1; if (hw->mac.type >= e1000_pch_spt) E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, hsfsts.regval & 0xFFFF); else E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval); /* Either we should have a hardware SPI cycle in progress * bit to check against, in order to start a new cycle or * FDONE bit should be changed in the hardware so that it * is 1 after hardware reset, which can then be used as an * indication whether a cycle is in progress or has been * completed. */ if (!hsfsts.hsf_status.flcinprog) { /* There is no cycle running at present, * so we can start a cycle. * Begin by setting Flash Cycle Done. */ hsfsts.hsf_status.flcdone = 1; if (hw->mac.type >= e1000_pch_spt) E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, hsfsts.regval & 0xFFFF); else E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval); ret_val = E1000_SUCCESS; } else { s32 i; /* Otherwise poll for sometime so the current * cycle has a chance to end before giving up. */ for (i = 0; i < ICH_FLASH_READ_COMMAND_TIMEOUT; i++) { hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); if (!hsfsts.hsf_status.flcinprog) { ret_val = E1000_SUCCESS; break; } usec_delay(1); } if (ret_val == E1000_SUCCESS) { /* Successful in waiting for previous cycle to timeout, * now set the Flash Cycle Done. */ hsfsts.hsf_status.flcdone = 1; if (hw->mac.type >= e1000_pch_spt) E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, hsfsts.regval & 0xFFFF); else E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval); } else { DEBUGOUT("Flash controller busy, cannot get access\n"); } } return ret_val; } /** * e1000_flash_cycle_ich8lan - Starts flash cycle (read/write/erase) * @hw: pointer to the HW structure * @timeout: maximum time to wait for completion * * This function starts a flash cycle and waits for its completion. **/ static s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout) { union ich8_hws_flash_ctrl hsflctl; union ich8_hws_flash_status hsfsts; u32 i = 0; DEBUGFUNC("e1000_flash_cycle_ich8lan"); /* Start a cycle by writing 1 in Flash Cycle Go in Hw Flash Control */ if (hw->mac.type >= e1000_pch_spt) hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16; else hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); hsflctl.hsf_ctrl.flcgo = 1; if (hw->mac.type >= e1000_pch_spt) E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, hsflctl.regval << 16); else E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); /* wait till FDONE bit is set to 1 */ do { hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); if (hsfsts.hsf_status.flcdone) break; usec_delay(1); } while (i++ < timeout); if (hsfsts.hsf_status.flcdone && !hsfsts.hsf_status.flcerr) return E1000_SUCCESS; return -E1000_ERR_NVM; } /** * e1000_read_flash_dword_ich8lan - Read dword from flash * @hw: pointer to the HW structure * @offset: offset to data location * @data: pointer to the location for storing the data * * Reads the flash dword at offset into data. Offset is converted * to bytes before read. **/ static s32 e1000_read_flash_dword_ich8lan(struct e1000_hw *hw, u32 offset, u32 *data) { DEBUGFUNC("e1000_read_flash_dword_ich8lan"); if (!data) return -E1000_ERR_NVM; /* Must convert word offset into bytes. */ offset <<= 1; return e1000_read_flash_data32_ich8lan(hw, offset, data); } /** * e1000_read_flash_word_ich8lan - Read word from flash * @hw: pointer to the HW structure * @offset: offset to data location * @data: pointer to the location for storing the data * * Reads the flash word at offset into data. Offset is converted * to bytes before read. **/ static s32 e1000_read_flash_word_ich8lan(struct e1000_hw *hw, u32 offset, u16 *data) { DEBUGFUNC("e1000_read_flash_word_ich8lan"); if (!data) return -E1000_ERR_NVM; /* Must convert offset into bytes. */ offset <<= 1; return e1000_read_flash_data_ich8lan(hw, offset, 2, data); } /** * e1000_read_flash_byte_ich8lan - Read byte from flash * @hw: pointer to the HW structure * @offset: The offset of the byte to read. * @data: Pointer to a byte to store the value read. * * Reads a single byte from the NVM using the flash access registers. **/ static s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, u8 *data) { s32 ret_val; u16 word = 0; /* In SPT, only 32 bits access is supported, * so this function should not be called. */ if (hw->mac.type >= e1000_pch_spt) return -E1000_ERR_NVM; else ret_val = e1000_read_flash_data_ich8lan(hw, offset, 1, &word); if (ret_val) return ret_val; *data = (u8)word; return E1000_SUCCESS; } /** * e1000_read_flash_data_ich8lan - Read byte or word from NVM * @hw: pointer to the HW structure * @offset: The offset (in bytes) of the byte or word to read. * @size: Size of data to read, 1=byte 2=word * @data: Pointer to the word to store the value read. * * Reads a byte or word from the NVM using the flash access registers. **/ static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, u8 size, u16 *data) { union ich8_hws_flash_status hsfsts; union ich8_hws_flash_ctrl hsflctl; u32 flash_linear_addr; u32 flash_data = 0; s32 ret_val = -E1000_ERR_NVM; u8 count = 0; DEBUGFUNC("e1000_read_flash_data_ich8lan"); if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK) return -E1000_ERR_NVM; flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + hw->nvm.flash_base_addr); do { usec_delay(1); /* Steps */ ret_val = e1000_flash_cycle_init_ich8lan(hw); if (ret_val != E1000_SUCCESS) break; hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); /* 0b/1b corresponds to 1 or 2 byte size, respectively. */ hsflctl.hsf_ctrl.fldbcount = size - 1; hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_READ; E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); ret_val = e1000_flash_cycle_ich8lan(hw, ICH_FLASH_READ_COMMAND_TIMEOUT); /* Check if FCERR is set to 1, if set to 1, clear it * and try the whole sequence a few more times, else * read in (shift in) the Flash Data0, the order is * least significant byte first msb to lsb */ if (ret_val == E1000_SUCCESS) { flash_data = E1000_READ_FLASH_REG(hw, ICH_FLASH_FDATA0); if (size == 1) *data = (u8)(flash_data & 0x000000FF); else if (size == 2) *data = (u16)(flash_data & 0x0000FFFF); break; } else { /* If we've gotten here, then things are probably * completely hosed, but if the error condition is * detected, it won't hurt to give it another try... * ICH_FLASH_CYCLE_REPEAT_COUNT times. */ hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); if (hsfsts.hsf_status.flcerr) { /* Repeat for some time before giving up. */ continue; } else if (!hsfsts.hsf_status.flcdone) { DEBUGOUT("Timeout error - flash cycle did not complete.\n"); break; } } } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT); return ret_val; } /** * e1000_read_flash_data32_ich8lan - Read dword from NVM * @hw: pointer to the HW structure * @offset: The offset (in bytes) of the dword to read. * @data: Pointer to the dword to store the value read. * * Reads a byte or word from the NVM using the flash access registers. **/ static s32 e1000_read_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, u32 *data) { union ich8_hws_flash_status hsfsts; union ich8_hws_flash_ctrl hsflctl; u32 flash_linear_addr; s32 ret_val = -E1000_ERR_NVM; u8 count = 0; DEBUGFUNC("e1000_read_flash_data_ich8lan"); if (offset > ICH_FLASH_LINEAR_ADDR_MASK || hw->mac.type < e1000_pch_spt) return -E1000_ERR_NVM; flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + hw->nvm.flash_base_addr); do { usec_delay(1); /* Steps */ ret_val = e1000_flash_cycle_init_ich8lan(hw); if (ret_val != E1000_SUCCESS) break; /* In SPT, This register is in Lan memory space, not flash. * Therefore, only 32 bit access is supported */ hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16; /* 0b/1b corresponds to 1 or 2 byte size, respectively. */ hsflctl.hsf_ctrl.fldbcount = sizeof(u32) - 1; hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_READ; /* In SPT, This register is in Lan memory space, not flash. * Therefore, only 32 bit access is supported */ E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, (u32)hsflctl.regval << 16); E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); ret_val = e1000_flash_cycle_ich8lan(hw, ICH_FLASH_READ_COMMAND_TIMEOUT); /* Check if FCERR is set to 1, if set to 1, clear it * and try the whole sequence a few more times, else * read in (shift in) the Flash Data0, the order is * least significant byte first msb to lsb */ if (ret_val == E1000_SUCCESS) { *data = E1000_READ_FLASH_REG(hw, ICH_FLASH_FDATA0); break; } else { /* If we've gotten here, then things are probably * completely hosed, but if the error condition is * detected, it won't hurt to give it another try... * ICH_FLASH_CYCLE_REPEAT_COUNT times. */ hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); if (hsfsts.hsf_status.flcerr) { /* Repeat for some time before giving up. */ continue; } else if (!hsfsts.hsf_status.flcdone) { DEBUGOUT("Timeout error - flash cycle did not complete.\n"); break; } } } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT); return ret_val; } /** * e1000_write_nvm_ich8lan - Write word(s) to the NVM * @hw: pointer to the HW structure * @offset: The offset (in bytes) of the word(s) to write. * @words: Size of data to write in words * @data: Pointer to the word(s) to write at offset. * * Writes a byte or word to the NVM using the flash access registers. **/ static s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { struct e1000_nvm_info *nvm = &hw->nvm; struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u16 i; DEBUGFUNC("e1000_write_nvm_ich8lan"); if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) || (words == 0)) { DEBUGOUT("nvm parameter(s) out of bounds\n"); return -E1000_ERR_NVM; } nvm->ops.acquire(hw); for (i = 0; i < words; i++) { dev_spec->shadow_ram[offset+i].modified = TRUE; dev_spec->shadow_ram[offset+i].value = data[i]; } nvm->ops.release(hw); return E1000_SUCCESS; } /** * e1000_update_nvm_checksum_spt - Update the checksum for NVM * @hw: pointer to the HW structure * * The NVM checksum is updated by calling the generic update_nvm_checksum, * which writes the checksum to the shadow ram. The changes in the shadow * ram are then committed to the EEPROM by processing each bank at a time * checking for the modified bit and writing only the pending changes. * After a successful commit, the shadow ram is cleared and is ready for * future writes. **/ static s32 e1000_update_nvm_checksum_spt(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u32 i, act_offset, new_bank_offset, old_bank_offset, bank; s32 ret_val; u32 dword = 0; DEBUGFUNC("e1000_update_nvm_checksum_spt"); ret_val = e1000_update_nvm_checksum_generic(hw); if (ret_val) goto out; if (nvm->type != e1000_nvm_flash_sw) goto out; nvm->ops.acquire(hw); /* We're writing to the opposite bank so if we're on bank 1, * write to bank 0 etc. We also need to erase the segment that * is going to be written */ ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank); if (ret_val != E1000_SUCCESS) { DEBUGOUT("Could not detect valid bank, assuming bank 0\n"); bank = 0; } if (bank == 0) { new_bank_offset = nvm->flash_bank_size; old_bank_offset = 0; ret_val = e1000_erase_flash_bank_ich8lan(hw, 1); if (ret_val) goto release; } else { old_bank_offset = nvm->flash_bank_size; new_bank_offset = 0; ret_val = e1000_erase_flash_bank_ich8lan(hw, 0); if (ret_val) goto release; } for (i = 0; i < E1000_SHADOW_RAM_WORDS; i += 2) { /* Determine whether to write the value stored * in the other NVM bank or a modified value stored * in the shadow RAM */ ret_val = e1000_read_flash_dword_ich8lan(hw, i + old_bank_offset, &dword); if (dev_spec->shadow_ram[i].modified) { dword &= 0xffff0000; dword |= (dev_spec->shadow_ram[i].value & 0xffff); } if (dev_spec->shadow_ram[i + 1].modified) { dword &= 0x0000ffff; dword |= ((dev_spec->shadow_ram[i + 1].value & 0xffff) << 16); } if (ret_val) break; /* If the word is 0x13, then make sure the signature bits * (15:14) are 11b until the commit has completed. * This will allow us to write 10b which indicates the * signature is valid. We want to do this after the write * has completed so that we don't mark the segment valid * while the write is still in progress */ if (i == E1000_ICH_NVM_SIG_WORD - 1) dword |= E1000_ICH_NVM_SIG_MASK << 16; /* Convert offset to bytes. */ act_offset = (i + new_bank_offset) << 1; usec_delay(100); /* Write the data to the new bank. Offset in words*/ act_offset = i + new_bank_offset; ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, dword); if (ret_val) break; } /* Don't bother writing the segment valid bits if sector * programming failed. */ if (ret_val) { DEBUGOUT("Flash commit failed.\n"); goto release; } /* Finally validate the new segment by setting bit 15:14 * to 10b in word 0x13 , this can be done without an * erase as well since these bits are 11 to start with * and we need to change bit 14 to 0b */ act_offset = new_bank_offset + E1000_ICH_NVM_SIG_WORD; /*offset in words but we read dword*/ --act_offset; ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, &dword); if (ret_val) goto release; dword &= 0xBFFFFFFF; ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, dword); if (ret_val) goto release; /* And invalidate the previously valid segment by setting * its signature word (0x13) high_byte to 0b. This can be * done without an erase because flash erase sets all bits * to 1's. We can write 1's to 0's without an erase */ act_offset = (old_bank_offset + E1000_ICH_NVM_SIG_WORD) * 2 + 1; /* offset in words but we read dword*/ act_offset = old_bank_offset + E1000_ICH_NVM_SIG_WORD - 1; ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, &dword); if (ret_val) goto release; dword &= 0x00FFFFFF; ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, dword); if (ret_val) goto release; /* Great! Everything worked, we can now clear the cached entries. */ for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) { dev_spec->shadow_ram[i].modified = FALSE; dev_spec->shadow_ram[i].value = 0xFFFF; } release: nvm->ops.release(hw); /* Reload the EEPROM, or else modifications will not appear * until after the next adapter reset. */ if (!ret_val) { nvm->ops.reload(hw); msec_delay(10); } out: if (ret_val) DEBUGOUT1("NVM update error: %d\n", ret_val); return ret_val; } /** * e1000_update_nvm_checksum_ich8lan - Update the checksum for NVM * @hw: pointer to the HW structure * * The NVM checksum is updated by calling the generic update_nvm_checksum, * which writes the checksum to the shadow ram. The changes in the shadow * ram are then committed to the EEPROM by processing each bank at a time * checking for the modified bit and writing only the pending changes. * After a successful commit, the shadow ram is cleared and is ready for * future writes. **/ static s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw) { struct e1000_nvm_info *nvm = &hw->nvm; struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u32 i, act_offset, new_bank_offset, old_bank_offset, bank; s32 ret_val; u16 data = 0; DEBUGFUNC("e1000_update_nvm_checksum_ich8lan"); ret_val = e1000_update_nvm_checksum_generic(hw); if (ret_val) goto out; if (nvm->type != e1000_nvm_flash_sw) goto out; nvm->ops.acquire(hw); /* We're writing to the opposite bank so if we're on bank 1, * write to bank 0 etc. We also need to erase the segment that * is going to be written */ ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank); if (ret_val != E1000_SUCCESS) { DEBUGOUT("Could not detect valid bank, assuming bank 0\n"); bank = 0; } if (bank == 0) { new_bank_offset = nvm->flash_bank_size; old_bank_offset = 0; ret_val = e1000_erase_flash_bank_ich8lan(hw, 1); if (ret_val) goto release; } else { old_bank_offset = nvm->flash_bank_size; new_bank_offset = 0; ret_val = e1000_erase_flash_bank_ich8lan(hw, 0); if (ret_val) goto release; } for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) { if (dev_spec->shadow_ram[i].modified) { data = dev_spec->shadow_ram[i].value; } else { ret_val = e1000_read_flash_word_ich8lan(hw, i + old_bank_offset, &data); if (ret_val) break; } /* If the word is 0x13, then make sure the signature bits * (15:14) are 11b until the commit has completed. * This will allow us to write 10b which indicates the * signature is valid. We want to do this after the write * has completed so that we don't mark the segment valid * while the write is still in progress */ if (i == E1000_ICH_NVM_SIG_WORD) data |= E1000_ICH_NVM_SIG_MASK; /* Convert offset to bytes. */ act_offset = (i + new_bank_offset) << 1; usec_delay(100); /* Write the bytes to the new bank. */ ret_val = e1000_retry_write_flash_byte_ich8lan(hw, act_offset, (u8)data); if (ret_val) break; usec_delay(100); ret_val = e1000_retry_write_flash_byte_ich8lan(hw, act_offset + 1, (u8)(data >> 8)); if (ret_val) break; } /* Don't bother writing the segment valid bits if sector * programming failed. */ if (ret_val) { DEBUGOUT("Flash commit failed.\n"); goto release; } /* Finally validate the new segment by setting bit 15:14 * to 10b in word 0x13 , this can be done without an * erase as well since these bits are 11 to start with * and we need to change bit 14 to 0b */ act_offset = new_bank_offset + E1000_ICH_NVM_SIG_WORD; ret_val = e1000_read_flash_word_ich8lan(hw, act_offset, &data); if (ret_val) goto release; data &= 0xBFFF; ret_val = e1000_retry_write_flash_byte_ich8lan(hw, act_offset * 2 + 1, (u8)(data >> 8)); if (ret_val) goto release; /* And invalidate the previously valid segment by setting * its signature word (0x13) high_byte to 0b. This can be * done without an erase because flash erase sets all bits * to 1's. We can write 1's to 0's without an erase */ act_offset = (old_bank_offset + E1000_ICH_NVM_SIG_WORD) * 2 + 1; ret_val = e1000_retry_write_flash_byte_ich8lan(hw, act_offset, 0); if (ret_val) goto release; /* Great! Everything worked, we can now clear the cached entries. */ for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) { dev_spec->shadow_ram[i].modified = FALSE; dev_spec->shadow_ram[i].value = 0xFFFF; } release: nvm->ops.release(hw); /* Reload the EEPROM, or else modifications will not appear * until after the next adapter reset. */ if (!ret_val) { nvm->ops.reload(hw); msec_delay(10); } out: if (ret_val) DEBUGOUT1("NVM update error: %d\n", ret_val); return ret_val; } /** * e1000_validate_nvm_checksum_ich8lan - Validate EEPROM checksum * @hw: pointer to the HW structure * * Check to see if checksum needs to be fixed by reading bit 6 in word 0x19. * If the bit is 0, that the EEPROM had been modified, but the checksum was not * calculated, in which case we need to calculate the checksum and set bit 6. **/ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw) { s32 ret_val; u16 data; u16 word; u16 valid_csum_mask; DEBUGFUNC("e1000_validate_nvm_checksum_ich8lan"); /* Read NVM and check Invalid Image CSUM bit. If this bit is 0, * the checksum needs to be fixed. This bit is an indication that * the NVM was prepared by OEM software and did not calculate * the checksum...a likely scenario. */ switch (hw->mac.type) { case e1000_pch_lpt: case e1000_pch_spt: word = NVM_COMPAT; valid_csum_mask = NVM_COMPAT_VALID_CSUM; break; default: word = NVM_FUTURE_INIT_WORD1; valid_csum_mask = NVM_FUTURE_INIT_WORD1_VALID_CSUM; break; } ret_val = hw->nvm.ops.read(hw, word, 1, &data); if (ret_val) return ret_val; if (!(data & valid_csum_mask)) { data |= valid_csum_mask; ret_val = hw->nvm.ops.write(hw, word, 1, &data); if (ret_val) return ret_val; ret_val = hw->nvm.ops.update(hw); if (ret_val) return ret_val; } return e1000_validate_nvm_checksum_generic(hw); } /** * e1000_write_flash_data_ich8lan - Writes bytes to the NVM * @hw: pointer to the HW structure * @offset: The offset (in bytes) of the byte/word to read. * @size: Size of data to read, 1=byte 2=word * @data: The byte(s) to write to the NVM. * * Writes one/two bytes to the NVM using the flash access registers. **/ static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, u8 size, u16 data) { union ich8_hws_flash_status hsfsts; union ich8_hws_flash_ctrl hsflctl; u32 flash_linear_addr; u32 flash_data = 0; s32 ret_val; u8 count = 0; DEBUGFUNC("e1000_write_ich8_data"); if (hw->mac.type >= e1000_pch_spt) { if (size != 4 || offset > ICH_FLASH_LINEAR_ADDR_MASK) return -E1000_ERR_NVM; } else { if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK) return -E1000_ERR_NVM; } flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + hw->nvm.flash_base_addr); do { usec_delay(1); /* Steps */ ret_val = e1000_flash_cycle_init_ich8lan(hw); if (ret_val != E1000_SUCCESS) break; /* In SPT, This register is in Lan memory space, not * flash. Therefore, only 32 bit access is supported */ if (hw->mac.type >= e1000_pch_spt) hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16; else hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); /* 0b/1b corresponds to 1 or 2 byte size, respectively. */ hsflctl.hsf_ctrl.fldbcount = size - 1; hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE; /* In SPT, This register is in Lan memory space, * not flash. Therefore, only 32 bit access is * supported */ if (hw->mac.type >= e1000_pch_spt) E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, hsflctl.regval << 16); else E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); if (size == 1) flash_data = (u32)data & 0x00FF; else flash_data = (u32)data; E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FDATA0, flash_data); /* check if FCERR is set to 1 , if set to 1, clear it * and try the whole sequence a few more times else done */ ret_val = e1000_flash_cycle_ich8lan(hw, ICH_FLASH_WRITE_COMMAND_TIMEOUT); if (ret_val == E1000_SUCCESS) break; /* If we're here, then things are most likely * completely hosed, but if the error condition * is detected, it won't hurt to give it another * try...ICH_FLASH_CYCLE_REPEAT_COUNT times. */ hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); if (hsfsts.hsf_status.flcerr) /* Repeat for some time before giving up. */ continue; if (!hsfsts.hsf_status.flcdone) { DEBUGOUT("Timeout error - flash cycle did not complete.\n"); break; } } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT); return ret_val; } /** * e1000_write_flash_data32_ich8lan - Writes 4 bytes to the NVM * @hw: pointer to the HW structure * @offset: The offset (in bytes) of the dwords to read. * @data: The 4 bytes to write to the NVM. * * Writes one/two/four bytes to the NVM using the flash access registers. **/ static s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, u32 data) { union ich8_hws_flash_status hsfsts; union ich8_hws_flash_ctrl hsflctl; u32 flash_linear_addr; s32 ret_val; u8 count = 0; DEBUGFUNC("e1000_write_flash_data32_ich8lan"); if (hw->mac.type >= e1000_pch_spt) { if (offset > ICH_FLASH_LINEAR_ADDR_MASK) return -E1000_ERR_NVM; } flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + hw->nvm.flash_base_addr); do { usec_delay(1); /* Steps */ ret_val = e1000_flash_cycle_init_ich8lan(hw); if (ret_val != E1000_SUCCESS) break; /* In SPT, This register is in Lan memory space, not * flash. Therefore, only 32 bit access is supported */ if (hw->mac.type >= e1000_pch_spt) hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS) >> 16; else hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); hsflctl.hsf_ctrl.fldbcount = sizeof(u32) - 1; hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE; /* In SPT, This register is in Lan memory space, * not flash. Therefore, only 32 bit access is * supported */ if (hw->mac.type >= e1000_pch_spt) E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, hsflctl.regval << 16); else E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FDATA0, data); /* check if FCERR is set to 1 , if set to 1, clear it * and try the whole sequence a few more times else done */ ret_val = e1000_flash_cycle_ich8lan(hw, ICH_FLASH_WRITE_COMMAND_TIMEOUT); if (ret_val == E1000_SUCCESS) break; /* If we're here, then things are most likely * completely hosed, but if the error condition * is detected, it won't hurt to give it another * try...ICH_FLASH_CYCLE_REPEAT_COUNT times. */ hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); if (hsfsts.hsf_status.flcerr) /* Repeat for some time before giving up. */ continue; if (!hsfsts.hsf_status.flcdone) { DEBUGOUT("Timeout error - flash cycle did not complete.\n"); break; } } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT); return ret_val; } /** * e1000_write_flash_byte_ich8lan - Write a single byte to NVM * @hw: pointer to the HW structure * @offset: The index of the byte to read. * @data: The byte to write to the NVM. * * Writes a single byte to the NVM using the flash access registers. **/ static s32 e1000_write_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, u8 data) { u16 word = (u16)data; DEBUGFUNC("e1000_write_flash_byte_ich8lan"); return e1000_write_flash_data_ich8lan(hw, offset, 1, word); } /** * e1000_retry_write_flash_dword_ich8lan - Writes a dword to NVM * @hw: pointer to the HW structure * @offset: The offset of the word to write. * @dword: The dword to write to the NVM. * * Writes a single dword to the NVM using the flash access registers. * Goes through a retry algorithm before giving up. **/ static s32 e1000_retry_write_flash_dword_ich8lan(struct e1000_hw *hw, u32 offset, u32 dword) { s32 ret_val; u16 program_retries; DEBUGFUNC("e1000_retry_write_flash_dword_ich8lan"); /* Must convert word offset into bytes. */ offset <<= 1; ret_val = e1000_write_flash_data32_ich8lan(hw, offset, dword); if (!ret_val) return ret_val; for (program_retries = 0; program_retries < 100; program_retries++) { DEBUGOUT2("Retrying Byte %8.8X at offset %u\n", dword, offset); usec_delay(100); ret_val = e1000_write_flash_data32_ich8lan(hw, offset, dword); if (ret_val == E1000_SUCCESS) break; } if (program_retries == 100) return -E1000_ERR_NVM; return E1000_SUCCESS; } /** * e1000_retry_write_flash_byte_ich8lan - Writes a single byte to NVM * @hw: pointer to the HW structure * @offset: The offset of the byte to write. * @byte: The byte to write to the NVM. * * Writes a single byte to the NVM using the flash access registers. * Goes through a retry algorithm before giving up. **/ static s32 e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, u8 byte) { s32 ret_val; u16 program_retries; DEBUGFUNC("e1000_retry_write_flash_byte_ich8lan"); ret_val = e1000_write_flash_byte_ich8lan(hw, offset, byte); if (!ret_val) return ret_val; for (program_retries = 0; program_retries < 100; program_retries++) { DEBUGOUT2("Retrying Byte %2.2X at offset %u\n", byte, offset); usec_delay(100); ret_val = e1000_write_flash_byte_ich8lan(hw, offset, byte); if (ret_val == E1000_SUCCESS) break; } if (program_retries == 100) return -E1000_ERR_NVM; return E1000_SUCCESS; } /** * e1000_erase_flash_bank_ich8lan - Erase a bank (4k) from NVM * @hw: pointer to the HW structure * @bank: 0 for first bank, 1 for second bank, etc. * * Erases the bank specified. Each bank is a 4k block. Banks are 0 based. * bank N is 4096 * N + flash_reg_addr. **/ static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank) { struct e1000_nvm_info *nvm = &hw->nvm; union ich8_hws_flash_status hsfsts; union ich8_hws_flash_ctrl hsflctl; u32 flash_linear_addr; /* bank size is in 16bit words - adjust to bytes */ u32 flash_bank_size = nvm->flash_bank_size * 2; s32 ret_val; s32 count = 0; s32 j, iteration, sector_size; DEBUGFUNC("e1000_erase_flash_bank_ich8lan"); hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); /* Determine HW Sector size: Read BERASE bits of hw flash status * register * 00: The Hw sector is 256 bytes, hence we need to erase 16 * consecutive sectors. The start index for the nth Hw sector * can be calculated as = bank * 4096 + n * 256 * 01: The Hw sector is 4K bytes, hence we need to erase 1 sector. * The start index for the nth Hw sector can be calculated * as = bank * 4096 * 10: The Hw sector is 8K bytes, nth sector = bank * 8192 * (ich9 only, otherwise error condition) * 11: The Hw sector is 64K bytes, nth sector = bank * 65536 */ switch (hsfsts.hsf_status.berasesz) { case 0: /* Hw sector size 256 */ sector_size = ICH_FLASH_SEG_SIZE_256; iteration = flash_bank_size / ICH_FLASH_SEG_SIZE_256; break; case 1: sector_size = ICH_FLASH_SEG_SIZE_4K; iteration = 1; break; case 2: sector_size = ICH_FLASH_SEG_SIZE_8K; iteration = 1; break; case 3: sector_size = ICH_FLASH_SEG_SIZE_64K; iteration = 1; break; default: return -E1000_ERR_NVM; } /* Start with the base address, then add the sector offset. */ flash_linear_addr = hw->nvm.flash_base_addr; flash_linear_addr += (bank) ? flash_bank_size : 0; for (j = 0; j < iteration; j++) { do { u32 timeout = ICH_FLASH_ERASE_COMMAND_TIMEOUT; /* Steps */ ret_val = e1000_flash_cycle_init_ich8lan(hw); if (ret_val) return ret_val; /* Write a value 11 (block Erase) in Flash * Cycle field in hw flash control */ if (hw->mac.type >= e1000_pch_spt) hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16; else hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_ERASE; if (hw->mac.type >= e1000_pch_spt) E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, hsflctl.regval << 16); else E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); /* Write the last 24 bits of an index within the * block into Flash Linear address field in Flash * Address. */ flash_linear_addr += (j * sector_size); E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); ret_val = e1000_flash_cycle_ich8lan(hw, timeout); if (ret_val == E1000_SUCCESS) break; /* Check if FCERR is set to 1. If 1, * clear it and try the whole sequence * a few more times else Done */ hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); if (hsfsts.hsf_status.flcerr) /* repeat for some time before giving up */ continue; else if (!hsfsts.hsf_status.flcdone) return ret_val; } while (++count < ICH_FLASH_CYCLE_REPEAT_COUNT); } return E1000_SUCCESS; } /** * e1000_valid_led_default_ich8lan - Set the default LED settings * @hw: pointer to the HW structure * @data: Pointer to the LED settings * * Reads the LED default settings from the NVM to data. If the NVM LED * settings is all 0's or F's, set the LED default to a valid LED default * setting. **/ static s32 e1000_valid_led_default_ich8lan(struct e1000_hw *hw, u16 *data) { s32 ret_val; DEBUGFUNC("e1000_valid_led_default_ich8lan"); ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) *data = ID_LED_DEFAULT_ICH8LAN; return E1000_SUCCESS; } /** * e1000_id_led_init_pchlan - store LED configurations * @hw: pointer to the HW structure * * PCH does not control LEDs via the LEDCTL register, rather it uses * the PHY LED configuration register. * * PCH also does not have an "always on" or "always off" mode which * complicates the ID feature. Instead of using the "on" mode to indicate * in ledctl_mode2 the LEDs to use for ID (see e1000_id_led_init_generic()), * use "link_up" mode. The LEDs will still ID on request if there is no * link based on logic in e1000_led_[on|off]_pchlan(). **/ static s32 e1000_id_led_init_pchlan(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; s32 ret_val; const u32 ledctl_on = E1000_LEDCTL_MODE_LINK_UP; const u32 ledctl_off = E1000_LEDCTL_MODE_LINK_UP | E1000_PHY_LED0_IVRT; u16 data, i, temp, shift; DEBUGFUNC("e1000_id_led_init_pchlan"); /* Get default ID LED modes */ ret_val = hw->nvm.ops.valid_led_default(hw, &data); if (ret_val) return ret_val; mac->ledctl_default = E1000_READ_REG(hw, E1000_LEDCTL); mac->ledctl_mode1 = mac->ledctl_default; mac->ledctl_mode2 = mac->ledctl_default; for (i = 0; i < 4; i++) { temp = (data >> (i << 2)) & E1000_LEDCTL_LED0_MODE_MASK; shift = (i * 5); switch (temp) { case ID_LED_ON1_DEF2: case ID_LED_ON1_ON2: case ID_LED_ON1_OFF2: mac->ledctl_mode1 &= ~(E1000_PHY_LED0_MASK << shift); mac->ledctl_mode1 |= (ledctl_on << shift); break; case ID_LED_OFF1_DEF2: case ID_LED_OFF1_ON2: case ID_LED_OFF1_OFF2: mac->ledctl_mode1 &= ~(E1000_PHY_LED0_MASK << shift); mac->ledctl_mode1 |= (ledctl_off << shift); break; default: /* Do nothing */ break; } switch (temp) { case ID_LED_DEF1_ON2: case ID_LED_ON1_ON2: case ID_LED_OFF1_ON2: mac->ledctl_mode2 &= ~(E1000_PHY_LED0_MASK << shift); mac->ledctl_mode2 |= (ledctl_on << shift); break; case ID_LED_DEF1_OFF2: case ID_LED_ON1_OFF2: case ID_LED_OFF1_OFF2: mac->ledctl_mode2 &= ~(E1000_PHY_LED0_MASK << shift); mac->ledctl_mode2 |= (ledctl_off << shift); break; default: /* Do nothing */ break; } } return E1000_SUCCESS; } /** * e1000_get_bus_info_ich8lan - Get/Set the bus type and width * @hw: pointer to the HW structure * * ICH8 use the PCI Express bus, but does not contain a PCI Express Capability * register, so the bus width is hard coded. **/ static s32 e1000_get_bus_info_ich8lan(struct e1000_hw *hw) { struct e1000_bus_info *bus = &hw->bus; s32 ret_val; DEBUGFUNC("e1000_get_bus_info_ich8lan"); ret_val = e1000_get_bus_info_pcie_generic(hw); /* ICH devices are "PCI Express"-ish. They have * a configuration space, but do not contain * PCI Express Capability registers, so bus width * must be hardcoded. */ if (bus->width == e1000_bus_width_unknown) bus->width = e1000_bus_width_pcie_x1; return ret_val; } /** * e1000_reset_hw_ich8lan - Reset the hardware * @hw: pointer to the HW structure * * Does a full reset of the hardware which includes a reset of the PHY and * MAC. **/ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) { struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u16 kum_cfg; u32 ctrl, reg; s32 ret_val; DEBUGFUNC("e1000_reset_hw_ich8lan"); /* Prevent the PCI-E bus from sticking if there is no TLP connection * on the last TLP read/write transaction when MAC is reset. */ ret_val = e1000_disable_pcie_master_generic(hw); if (ret_val) DEBUGOUT("PCI-E Master disable polling has failed.\n"); DEBUGOUT("Masking off all interrupts\n"); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); /* Disable the Transmit and Receive units. Then delay to allow * any pending transactions to complete before we hit the MAC * with the global reset. */ E1000_WRITE_REG(hw, E1000_RCTL, 0); E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); E1000_WRITE_FLUSH(hw); msec_delay(10); /* Workaround for ICH8 bit corruption issue in FIFO memory */ if (hw->mac.type == e1000_ich8lan) { /* Set Tx and Rx buffer allocation to 8k apiece. */ E1000_WRITE_REG(hw, E1000_PBA, E1000_PBA_8K); /* Set Packet Buffer Size to 16k. */ E1000_WRITE_REG(hw, E1000_PBS, E1000_PBS_16K); } if (hw->mac.type == e1000_pchlan) { /* Save the NVM K1 bit setting*/ ret_val = e1000_read_nvm(hw, E1000_NVM_K1_CONFIG, 1, &kum_cfg); if (ret_val) return ret_val; if (kum_cfg & E1000_NVM_K1_ENABLE) dev_spec->nvm_k1_enabled = TRUE; else dev_spec->nvm_k1_enabled = FALSE; } ctrl = E1000_READ_REG(hw, E1000_CTRL); if (!hw->phy.ops.check_reset_block(hw)) { /* Full-chip reset requires MAC and PHY reset at the same * time to make sure the interface between MAC and the * external PHY is reset. */ ctrl |= E1000_CTRL_PHY_RST; /* Gate automatic PHY configuration by hardware on * non-managed 82579 */ if ((hw->mac.type == e1000_pch2lan) && !(E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID)) e1000_gate_hw_phy_config_ich8lan(hw, TRUE); } ret_val = e1000_acquire_swflag_ich8lan(hw); DEBUGOUT("Issuing a global reset to ich8lan\n"); E1000_WRITE_REG(hw, E1000_CTRL, (ctrl | E1000_CTRL_RST)); /* cannot issue a flush here because it hangs the hardware */ msec_delay(20); /* Set Phy Config Counter to 50msec */ if (hw->mac.type == e1000_pch2lan) { reg = E1000_READ_REG(hw, E1000_FEXTNVM3); reg &= ~E1000_FEXTNVM3_PHY_CFG_COUNTER_MASK; reg |= E1000_FEXTNVM3_PHY_CFG_COUNTER_50MSEC; E1000_WRITE_REG(hw, E1000_FEXTNVM3, reg); } - if (!ret_val) - E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex); if (ctrl & E1000_CTRL_PHY_RST) { ret_val = hw->phy.ops.get_cfg_done(hw); if (ret_val) return ret_val; ret_val = e1000_post_phy_reset_ich8lan(hw); if (ret_val) return ret_val; } /* For PCH, this write will make sure that any noise * will be detected as a CRC error and be dropped rather than show up * as a bad packet to the DMA engine. */ if (hw->mac.type == e1000_pchlan) E1000_WRITE_REG(hw, E1000_CRC_OFFSET, 0x65656565); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_READ_REG(hw, E1000_ICR); reg = E1000_READ_REG(hw, E1000_KABGTXD); reg |= E1000_KABGTXD_BGSQLBIAS; E1000_WRITE_REG(hw, E1000_KABGTXD, reg); return E1000_SUCCESS; } /** * e1000_init_hw_ich8lan - Initialize the hardware * @hw: pointer to the HW structure * * Prepares the hardware for transmit and receive by doing the following: * - initialize hardware bits * - initialize LED identification * - setup receive address registers * - setup flow control * - setup transmit descriptors * - clear statistics **/ static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 ctrl_ext, txdctl, snoop; s32 ret_val; u16 i; DEBUGFUNC("e1000_init_hw_ich8lan"); e1000_initialize_hw_bits_ich8lan(hw); /* Initialize identification LED */ ret_val = mac->ops.id_led_init(hw); /* An error is not fatal and we should not stop init due to this */ if (ret_val) DEBUGOUT("Error initializing identification LED\n"); /* Setup the receive address. */ e1000_init_rx_addrs_generic(hw, mac->rar_entry_count); /* Zero out the Multicast HASH table */ DEBUGOUT("Zeroing the MTA\n"); for (i = 0; i < mac->mta_reg_count; i++) E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); /* The 82578 Rx buffer will stall if wakeup is enabled in host and * the ME. Disable wakeup by clearing the host wakeup bit. * Reset the phy after disabling host wakeup to reset the Rx buffer. */ if (hw->phy.type == e1000_phy_82578) { hw->phy.ops.read_reg(hw, BM_PORT_GEN_CFG, &i); i &= ~BM_WUC_HOST_WU_BIT; hw->phy.ops.write_reg(hw, BM_PORT_GEN_CFG, i); ret_val = e1000_phy_hw_reset_ich8lan(hw); if (ret_val) return ret_val; } /* Setup link and flow control */ ret_val = mac->ops.setup_link(hw); /* Set the transmit descriptor write-back policy for both queues */ txdctl = E1000_READ_REG(hw, E1000_TXDCTL(0)); txdctl = ((txdctl & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB); txdctl = ((txdctl & ~E1000_TXDCTL_PTHRESH) | E1000_TXDCTL_MAX_TX_DESC_PREFETCH); E1000_WRITE_REG(hw, E1000_TXDCTL(0), txdctl); txdctl = E1000_READ_REG(hw, E1000_TXDCTL(1)); txdctl = ((txdctl & ~E1000_TXDCTL_WTHRESH) | E1000_TXDCTL_FULL_TX_DESC_WB); txdctl = ((txdctl & ~E1000_TXDCTL_PTHRESH) | E1000_TXDCTL_MAX_TX_DESC_PREFETCH); E1000_WRITE_REG(hw, E1000_TXDCTL(1), txdctl); /* ICH8 has opposite polarity of no_snoop bits. * By default, we should use snoop behavior. */ if (mac->type == e1000_ich8lan) snoop = PCIE_ICH8_SNOOP_ALL; else snoop = (u32) ~(PCIE_NO_SNOOP_ALL); e1000_set_pcie_no_snoop_generic(hw, snoop); ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_RO_DIS; E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); /* Clear all of the statistics registers (clear on read). It is * important that we do this after we have tried to establish link * because the symbol error count will increment wildly if there * is no link. */ e1000_clear_hw_cntrs_ich8lan(hw); return ret_val; } /** * e1000_initialize_hw_bits_ich8lan - Initialize required hardware bits * @hw: pointer to the HW structure * * Sets/Clears required hardware bits necessary for correctly setting up the * hardware for transmit and receive. **/ static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw) { u32 reg; DEBUGFUNC("e1000_initialize_hw_bits_ich8lan"); /* Extended Device Control */ reg = E1000_READ_REG(hw, E1000_CTRL_EXT); reg |= (1 << 22); /* Enable PHY low-power state when MAC is at D3 w/o WoL */ if (hw->mac.type >= e1000_pchlan) reg |= E1000_CTRL_EXT_PHYPDEN; E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); /* Transmit Descriptor Control 0 */ reg = E1000_READ_REG(hw, E1000_TXDCTL(0)); reg |= (1 << 22); E1000_WRITE_REG(hw, E1000_TXDCTL(0), reg); /* Transmit Descriptor Control 1 */ reg = E1000_READ_REG(hw, E1000_TXDCTL(1)); reg |= (1 << 22); E1000_WRITE_REG(hw, E1000_TXDCTL(1), reg); /* Transmit Arbitration Control 0 */ reg = E1000_READ_REG(hw, E1000_TARC(0)); if (hw->mac.type == e1000_ich8lan) reg |= (1 << 28) | (1 << 29); reg |= (1 << 23) | (1 << 24) | (1 << 26) | (1 << 27); E1000_WRITE_REG(hw, E1000_TARC(0), reg); /* Transmit Arbitration Control 1 */ reg = E1000_READ_REG(hw, E1000_TARC(1)); if (E1000_READ_REG(hw, E1000_TCTL) & E1000_TCTL_MULR) reg &= ~(1 << 28); else reg |= (1 << 28); reg |= (1 << 24) | (1 << 26) | (1 << 30); E1000_WRITE_REG(hw, E1000_TARC(1), reg); /* Device Status */ if (hw->mac.type == e1000_ich8lan) { reg = E1000_READ_REG(hw, E1000_STATUS); reg &= ~(1 << 31); E1000_WRITE_REG(hw, E1000_STATUS, reg); } /* work-around descriptor data corruption issue during nfs v2 udp * traffic, just disable the nfs filtering capability */ reg = E1000_READ_REG(hw, E1000_RFCTL); reg |= (E1000_RFCTL_NFSW_DIS | E1000_RFCTL_NFSR_DIS); /* Disable IPv6 extension header parsing because some malformed * IPv6 headers can hang the Rx. */ if (hw->mac.type == e1000_ich8lan) reg |= (E1000_RFCTL_IPV6_EX_DIS | E1000_RFCTL_NEW_IPV6_EXT_DIS); E1000_WRITE_REG(hw, E1000_RFCTL, reg); /* Enable ECC on Lynxpoint */ if (hw->mac.type >= e1000_pch_lpt) { reg = E1000_READ_REG(hw, E1000_PBECCSTS); reg |= E1000_PBECCSTS_ECC_ENABLE; E1000_WRITE_REG(hw, E1000_PBECCSTS, reg); reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_MEHE; E1000_WRITE_REG(hw, E1000_CTRL, reg); } return; } /** * e1000_setup_link_ich8lan - Setup flow control and link settings * @hw: pointer to the HW structure * * Determines which flow control settings to use, then configures flow * control. Calls the appropriate media-specific link configuration * function. Assuming the adapter has a valid link partner, a valid link * should be established. Assumes the hardware has previously been reset * and the transmitter and receiver are not enabled. **/ static s32 e1000_setup_link_ich8lan(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_setup_link_ich8lan"); if (hw->phy.ops.check_reset_block(hw)) return E1000_SUCCESS; /* ICH parts do not have a word in the NVM to determine * the default flow control setting, so we explicitly * set it to full. */ if (hw->fc.requested_mode == e1000_fc_default) hw->fc.requested_mode = e1000_fc_full; /* Save off the requested flow control mode for use later. Depending * on the link partner's capabilities, we may or may not use this mode. */ hw->fc.current_mode = hw->fc.requested_mode; DEBUGOUT1("After fix-ups FlowControl is now = %x\n", hw->fc.current_mode); /* Continue to configure the copper link. */ ret_val = hw->mac.ops.setup_physical_interface(hw); if (ret_val) return ret_val; E1000_WRITE_REG(hw, E1000_FCTTV, hw->fc.pause_time); if ((hw->phy.type == e1000_phy_82578) || (hw->phy.type == e1000_phy_82579) || (hw->phy.type == e1000_phy_i217) || (hw->phy.type == e1000_phy_82577)) { E1000_WRITE_REG(hw, E1000_FCRTV_PCH, hw->fc.refresh_time); ret_val = hw->phy.ops.write_reg(hw, PHY_REG(BM_PORT_CTRL_PAGE, 27), hw->fc.pause_time); if (ret_val) return ret_val; } return e1000_set_fc_watermarks_generic(hw); } /** * e1000_setup_copper_link_ich8lan - Configure MAC/PHY interface * @hw: pointer to the HW structure * * Configures the kumeran interface to the PHY to wait the appropriate time * when polling the PHY, then call the generic setup_copper_link to finish * configuring the copper link. **/ static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; u16 reg_data; DEBUGFUNC("e1000_setup_copper_link_ich8lan"); ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= E1000_CTRL_SLU; ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); /* Set the mac to wait the maximum time between each iteration * and increase the max iterations when polling the phy; * this fixes erroneous timeouts at 10Mbps. */ ret_val = e1000_write_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_TIMEOUTS, 0xFFFF); if (ret_val) return ret_val; ret_val = e1000_read_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, ®_data); if (ret_val) return ret_val; reg_data |= 0x3F; ret_val = e1000_write_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_INBAND_PARAM, reg_data); if (ret_val) return ret_val; switch (hw->phy.type) { case e1000_phy_igp_3: ret_val = e1000_copper_link_setup_igp(hw); if (ret_val) return ret_val; break; case e1000_phy_bm: case e1000_phy_82578: ret_val = e1000_copper_link_setup_m88(hw); if (ret_val) return ret_val; break; case e1000_phy_82577: case e1000_phy_82579: ret_val = e1000_copper_link_setup_82577(hw); if (ret_val) return ret_val; break; case e1000_phy_ife: ret_val = hw->phy.ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, ®_data); if (ret_val) return ret_val; reg_data &= ~IFE_PMC_AUTO_MDIX; switch (hw->phy.mdix) { case 1: reg_data &= ~IFE_PMC_FORCE_MDIX; break; case 2: reg_data |= IFE_PMC_FORCE_MDIX; break; case 0: default: reg_data |= IFE_PMC_AUTO_MDIX; break; } ret_val = hw->phy.ops.write_reg(hw, IFE_PHY_MDIX_CONTROL, reg_data); if (ret_val) return ret_val; break; default: break; } return e1000_setup_copper_link_generic(hw); } /** * e1000_setup_copper_link_pch_lpt - Configure MAC/PHY interface * @hw: pointer to the HW structure * * Calls the PHY specific link setup function and then calls the * generic setup_copper_link to finish configuring the link for * Lynxpoint PCH devices **/ static s32 e1000_setup_copper_link_pch_lpt(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; DEBUGFUNC("e1000_setup_copper_link_pch_lpt"); ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= E1000_CTRL_SLU; ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); ret_val = e1000_copper_link_setup_82577(hw); if (ret_val) return ret_val; return e1000_setup_copper_link_generic(hw); } /** * e1000_get_link_up_info_ich8lan - Get current link speed and duplex * @hw: pointer to the HW structure * @speed: pointer to store current link speed * @duplex: pointer to store the current link duplex * * Calls the generic get_speed_and_duplex to retrieve the current link * information and then calls the Kumeran lock loss workaround for links at * gigabit speeds. **/ static s32 e1000_get_link_up_info_ich8lan(struct e1000_hw *hw, u16 *speed, u16 *duplex) { s32 ret_val; DEBUGFUNC("e1000_get_link_up_info_ich8lan"); ret_val = e1000_get_speed_and_duplex_copper_generic(hw, speed, duplex); if (ret_val) return ret_val; if ((hw->mac.type == e1000_ich8lan) && (hw->phy.type == e1000_phy_igp_3) && (*speed == SPEED_1000)) { ret_val = e1000_kmrn_lock_loss_workaround_ich8lan(hw); } return ret_val; } /** * e1000_kmrn_lock_loss_workaround_ich8lan - Kumeran workaround * @hw: pointer to the HW structure * * Work-around for 82566 Kumeran PCS lock loss: * On link status change (i.e. PCI reset, speed change) and link is up and * speed is gigabit- * 0) if workaround is optionally disabled do nothing * 1) wait 1ms for Kumeran link to come up * 2) check Kumeran Diagnostic register PCS lock loss bit * 3) if not set the link is locked (all is good), otherwise... * 4) reset the PHY * 5) repeat up to 10 times * Note: this is only called for IGP3 copper when speed is 1gb. **/ static s32 e1000_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw) { struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u32 phy_ctrl; s32 ret_val; u16 i, data; bool link; DEBUGFUNC("e1000_kmrn_lock_loss_workaround_ich8lan"); if (!dev_spec->kmrn_lock_loss_workaround_enabled) return E1000_SUCCESS; /* Make sure link is up before proceeding. If not just return. * Attempting this while link is negotiating fouled up link * stability */ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); if (!link) return E1000_SUCCESS; for (i = 0; i < 10; i++) { /* read once to clear */ ret_val = hw->phy.ops.read_reg(hw, IGP3_KMRN_DIAG, &data); if (ret_val) return ret_val; /* and again to get new status */ ret_val = hw->phy.ops.read_reg(hw, IGP3_KMRN_DIAG, &data); if (ret_val) return ret_val; /* check for PCS lock */ if (!(data & IGP3_KMRN_DIAG_PCS_LOCK_LOSS)) return E1000_SUCCESS; /* Issue PHY reset */ hw->phy.ops.reset(hw); msec_delay_irq(5); } /* Disable GigE link negotiation */ phy_ctrl = E1000_READ_REG(hw, E1000_PHY_CTRL); phy_ctrl |= (E1000_PHY_CTRL_GBE_DISABLE | E1000_PHY_CTRL_NOND0A_GBE_DISABLE); E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl); /* Call gig speed drop workaround on Gig disable before accessing * any PHY registers */ e1000_gig_downshift_workaround_ich8lan(hw); /* unable to acquire PCS lock */ return -E1000_ERR_PHY; } /** * e1000_set_kmrn_lock_loss_workaround_ich8lan - Set Kumeran workaround state * @hw: pointer to the HW structure * @state: boolean value used to set the current Kumeran workaround state * * If ICH8, set the current Kumeran workaround state (enabled - TRUE * /disabled - FALSE). **/ void e1000_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw, bool state) { struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; DEBUGFUNC("e1000_set_kmrn_lock_loss_workaround_ich8lan"); if (hw->mac.type != e1000_ich8lan) { DEBUGOUT("Workaround applies to ICH8 only.\n"); return; } dev_spec->kmrn_lock_loss_workaround_enabled = state; return; } /** * e1000_ipg3_phy_powerdown_workaround_ich8lan - Power down workaround on D3 * @hw: pointer to the HW structure * * Workaround for 82566 power-down on D3 entry: * 1) disable gigabit link * 2) write VR power-down enable * 3) read it back * Continue if successful, else issue LCD reset and repeat **/ void e1000_igp3_phy_powerdown_workaround_ich8lan(struct e1000_hw *hw) { u32 reg; u16 data; u8 retry = 0; DEBUGFUNC("e1000_igp3_phy_powerdown_workaround_ich8lan"); if (hw->phy.type != e1000_phy_igp_3) return; /* Try the workaround twice (if needed) */ do { /* Disable link */ reg = E1000_READ_REG(hw, E1000_PHY_CTRL); reg |= (E1000_PHY_CTRL_GBE_DISABLE | E1000_PHY_CTRL_NOND0A_GBE_DISABLE); E1000_WRITE_REG(hw, E1000_PHY_CTRL, reg); /* Call gig speed drop workaround on Gig disable before * accessing any PHY registers */ if (hw->mac.type == e1000_ich8lan) e1000_gig_downshift_workaround_ich8lan(hw); /* Write VR power-down enable */ hw->phy.ops.read_reg(hw, IGP3_VR_CTRL, &data); data &= ~IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK; hw->phy.ops.write_reg(hw, IGP3_VR_CTRL, data | IGP3_VR_CTRL_MODE_SHUTDOWN); /* Read it back and test */ hw->phy.ops.read_reg(hw, IGP3_VR_CTRL, &data); data &= IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK; if ((data == IGP3_VR_CTRL_MODE_SHUTDOWN) || retry) break; /* Issue PHY reset and repeat at most one more time */ reg = E1000_READ_REG(hw, E1000_CTRL); E1000_WRITE_REG(hw, E1000_CTRL, reg | E1000_CTRL_PHY_RST); retry++; } while (retry); } /** * e1000_gig_downshift_workaround_ich8lan - WoL from S5 stops working * @hw: pointer to the HW structure * * Steps to take when dropping from 1Gb/s (eg. link cable removal (LSC), * LPLU, Gig disable, MDIC PHY reset): * 1) Set Kumeran Near-end loopback * 2) Clear Kumeran Near-end loopback * Should only be called for ICH8[m] devices with any 1G Phy. **/ void e1000_gig_downshift_workaround_ich8lan(struct e1000_hw *hw) { s32 ret_val; u16 reg_data; DEBUGFUNC("e1000_gig_downshift_workaround_ich8lan"); if ((hw->mac.type != e1000_ich8lan) || (hw->phy.type == e1000_phy_ife)) return; ret_val = e1000_read_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET, ®_data); if (ret_val) return; reg_data |= E1000_KMRNCTRLSTA_DIAG_NELPBK; ret_val = e1000_write_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET, reg_data); if (ret_val) return; reg_data &= ~E1000_KMRNCTRLSTA_DIAG_NELPBK; e1000_write_kmrn_reg_generic(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET, reg_data); } /** * e1000_suspend_workarounds_ich8lan - workarounds needed during S0->Sx * @hw: pointer to the HW structure * * During S0 to Sx transition, it is possible the link remains at gig * instead of negotiating to a lower speed. Before going to Sx, set * 'Gig Disable' to force link speed negotiation to a lower speed based on * the LPLU setting in the NVM or custom setting. For PCH and newer parts, * the OEM bits PHY register (LED, GbE disable and LPLU configurations) also * needs to be written. * Parts that support (and are linked to a partner which support) EEE in * 100Mbps should disable LPLU since 100Mbps w/ EEE requires less power * than 10Mbps w/o EEE. **/ void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw) { struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u32 phy_ctrl; s32 ret_val; DEBUGFUNC("e1000_suspend_workarounds_ich8lan"); phy_ctrl = E1000_READ_REG(hw, E1000_PHY_CTRL); phy_ctrl |= E1000_PHY_CTRL_GBE_DISABLE; if (hw->phy.type == e1000_phy_i217) { u16 phy_reg, device_id = hw->device_id; if ((device_id == E1000_DEV_ID_PCH_LPTLP_I218_LM) || (device_id == E1000_DEV_ID_PCH_LPTLP_I218_V) || (device_id == E1000_DEV_ID_PCH_I218_LM3) || (device_id == E1000_DEV_ID_PCH_I218_V3) || (hw->mac.type >= e1000_pch_spt)) { u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6); E1000_WRITE_REG(hw, E1000_FEXTNVM6, fextnvm6 & ~E1000_FEXTNVM6_REQ_PLL_CLK); } ret_val = hw->phy.ops.acquire(hw); if (ret_val) goto out; if (!dev_spec->eee_disable) { u16 eee_advert; ret_val = e1000_read_emi_reg_locked(hw, I217_EEE_ADVERTISEMENT, &eee_advert); if (ret_val) goto release; /* Disable LPLU if both link partners support 100BaseT * EEE and 100Full is advertised on both ends of the * link, and enable Auto Enable LPI since there will * be no driver to enable LPI while in Sx. */ if ((eee_advert & I82579_EEE_100_SUPPORTED) && (dev_spec->eee_lp_ability & I82579_EEE_100_SUPPORTED) && (hw->phy.autoneg_advertised & ADVERTISE_100_FULL)) { phy_ctrl &= ~(E1000_PHY_CTRL_D0A_LPLU | E1000_PHY_CTRL_NOND0A_LPLU); /* Set Auto Enable LPI after link up */ hw->phy.ops.read_reg_locked(hw, I217_LPI_GPIO_CTRL, &phy_reg); phy_reg |= I217_LPI_GPIO_CTRL_AUTO_EN_LPI; hw->phy.ops.write_reg_locked(hw, I217_LPI_GPIO_CTRL, phy_reg); } } /* For i217 Intel Rapid Start Technology support, * when the system is going into Sx and no manageability engine * is present, the driver must configure proxy to reset only on * power good. LPI (Low Power Idle) state must also reset only * on power good, as well as the MTA (Multicast table array). * The SMBus release must also be disabled on LCD reset. */ if (!(E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID)) { /* Enable proxy to reset only on power good. */ hw->phy.ops.read_reg_locked(hw, I217_PROXY_CTRL, &phy_reg); phy_reg |= I217_PROXY_CTRL_AUTO_DISABLE; hw->phy.ops.write_reg_locked(hw, I217_PROXY_CTRL, phy_reg); /* Set bit enable LPI (EEE) to reset only on * power good. */ hw->phy.ops.read_reg_locked(hw, I217_SxCTRL, &phy_reg); phy_reg |= I217_SxCTRL_ENABLE_LPI_RESET; hw->phy.ops.write_reg_locked(hw, I217_SxCTRL, phy_reg); /* Disable the SMB release on LCD reset. */ hw->phy.ops.read_reg_locked(hw, I217_MEMPWR, &phy_reg); phy_reg &= ~I217_MEMPWR_DISABLE_SMB_RELEASE; hw->phy.ops.write_reg_locked(hw, I217_MEMPWR, phy_reg); } /* Enable MTA to reset for Intel Rapid Start Technology * Support */ hw->phy.ops.read_reg_locked(hw, I217_CGFREG, &phy_reg); phy_reg |= I217_CGFREG_ENABLE_MTA_RESET; hw->phy.ops.write_reg_locked(hw, I217_CGFREG, phy_reg); release: hw->phy.ops.release(hw); } out: E1000_WRITE_REG(hw, E1000_PHY_CTRL, phy_ctrl); if (hw->mac.type == e1000_ich8lan) e1000_gig_downshift_workaround_ich8lan(hw); if (hw->mac.type >= e1000_pchlan) { e1000_oem_bits_config_ich8lan(hw, FALSE); /* Reset PHY to activate OEM bits on 82577/8 */ if (hw->mac.type == e1000_pchlan) e1000_phy_hw_reset_generic(hw); ret_val = hw->phy.ops.acquire(hw); if (ret_val) return; e1000_write_smbus_addr(hw); hw->phy.ops.release(hw); } return; } /** * e1000_resume_workarounds_pchlan - workarounds needed during Sx->S0 * @hw: pointer to the HW structure * * During Sx to S0 transitions on non-managed devices or managed devices * on which PHY resets are not blocked, if the PHY registers cannot be * accessed properly by the s/w toggle the LANPHYPC value to power cycle * the PHY. * On i217, setup Intel Rapid Start Technology. **/ u32 e1000_resume_workarounds_pchlan(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_resume_workarounds_pchlan"); if (hw->mac.type < e1000_pch2lan) return E1000_SUCCESS; ret_val = e1000_init_phy_workarounds_pchlan(hw); if (ret_val) { DEBUGOUT1("Failed to init PHY flow ret_val=%d\n", ret_val); return ret_val; } /* For i217 Intel Rapid Start Technology support when the system * is transitioning from Sx and no manageability engine is present * configure SMBus to restore on reset, disable proxy, and enable * the reset on MTA (Multicast table array). */ if (hw->phy.type == e1000_phy_i217) { u16 phy_reg; ret_val = hw->phy.ops.acquire(hw); if (ret_val) { DEBUGOUT("Failed to setup iRST\n"); return ret_val; } /* Clear Auto Enable LPI after link up */ hw->phy.ops.read_reg_locked(hw, I217_LPI_GPIO_CTRL, &phy_reg); phy_reg &= ~I217_LPI_GPIO_CTRL_AUTO_EN_LPI; hw->phy.ops.write_reg_locked(hw, I217_LPI_GPIO_CTRL, phy_reg); if (!(E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID)) { /* Restore clear on SMB if no manageability engine * is present */ ret_val = hw->phy.ops.read_reg_locked(hw, I217_MEMPWR, &phy_reg); if (ret_val) goto release; phy_reg |= I217_MEMPWR_DISABLE_SMB_RELEASE; hw->phy.ops.write_reg_locked(hw, I217_MEMPWR, phy_reg); /* Disable Proxy */ hw->phy.ops.write_reg_locked(hw, I217_PROXY_CTRL, 0); } /* Enable reset on MTA */ ret_val = hw->phy.ops.read_reg_locked(hw, I217_CGFREG, &phy_reg); if (ret_val) goto release; phy_reg &= ~I217_CGFREG_ENABLE_MTA_RESET; hw->phy.ops.write_reg_locked(hw, I217_CGFREG, phy_reg); release: if (ret_val) DEBUGOUT1("Error %d in resume workarounds\n", ret_val); hw->phy.ops.release(hw); return ret_val; } return E1000_SUCCESS; } /** * e1000_cleanup_led_ich8lan - Restore the default LED operation * @hw: pointer to the HW structure * * Return the LED back to the default configuration. **/ static s32 e1000_cleanup_led_ich8lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_cleanup_led_ich8lan"); if (hw->phy.type == e1000_phy_ife) return hw->phy.ops.write_reg(hw, IFE_PHY_SPECIAL_CONTROL_LED, 0); E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_default); return E1000_SUCCESS; } /** * e1000_led_on_ich8lan - Turn LEDs on * @hw: pointer to the HW structure * * Turn on the LEDs. **/ static s32 e1000_led_on_ich8lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_led_on_ich8lan"); if (hw->phy.type == e1000_phy_ife) return hw->phy.ops.write_reg(hw, IFE_PHY_SPECIAL_CONTROL_LED, (IFE_PSCL_PROBE_MODE | IFE_PSCL_PROBE_LEDS_ON)); E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode2); return E1000_SUCCESS; } /** * e1000_led_off_ich8lan - Turn LEDs off * @hw: pointer to the HW structure * * Turn off the LEDs. **/ static s32 e1000_led_off_ich8lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_led_off_ich8lan"); if (hw->phy.type == e1000_phy_ife) return hw->phy.ops.write_reg(hw, IFE_PHY_SPECIAL_CONTROL_LED, (IFE_PSCL_PROBE_MODE | IFE_PSCL_PROBE_LEDS_OFF)); E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1); return E1000_SUCCESS; } /** * e1000_setup_led_pchlan - Configures SW controllable LED * @hw: pointer to the HW structure * * This prepares the SW controllable LED for use. **/ static s32 e1000_setup_led_pchlan(struct e1000_hw *hw) { DEBUGFUNC("e1000_setup_led_pchlan"); return hw->phy.ops.write_reg(hw, HV_LED_CONFIG, (u16)hw->mac.ledctl_mode1); } /** * e1000_cleanup_led_pchlan - Restore the default LED operation * @hw: pointer to the HW structure * * Return the LED back to the default configuration. **/ static s32 e1000_cleanup_led_pchlan(struct e1000_hw *hw) { DEBUGFUNC("e1000_cleanup_led_pchlan"); return hw->phy.ops.write_reg(hw, HV_LED_CONFIG, (u16)hw->mac.ledctl_default); } /** * e1000_led_on_pchlan - Turn LEDs on * @hw: pointer to the HW structure * * Turn on the LEDs. **/ static s32 e1000_led_on_pchlan(struct e1000_hw *hw) { u16 data = (u16)hw->mac.ledctl_mode2; u32 i, led; DEBUGFUNC("e1000_led_on_pchlan"); /* If no link, then turn LED on by setting the invert bit * for each LED that's mode is "link_up" in ledctl_mode2. */ if (!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { for (i = 0; i < 3; i++) { led = (data >> (i * 5)) & E1000_PHY_LED0_MASK; if ((led & E1000_PHY_LED0_MODE_MASK) != E1000_LEDCTL_MODE_LINK_UP) continue; if (led & E1000_PHY_LED0_IVRT) data &= ~(E1000_PHY_LED0_IVRT << (i * 5)); else data |= (E1000_PHY_LED0_IVRT << (i * 5)); } } return hw->phy.ops.write_reg(hw, HV_LED_CONFIG, data); } /** * e1000_led_off_pchlan - Turn LEDs off * @hw: pointer to the HW structure * * Turn off the LEDs. **/ static s32 e1000_led_off_pchlan(struct e1000_hw *hw) { u16 data = (u16)hw->mac.ledctl_mode1; u32 i, led; DEBUGFUNC("e1000_led_off_pchlan"); /* If no link, then turn LED off by clearing the invert bit * for each LED that's mode is "link_up" in ledctl_mode1. */ if (!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { for (i = 0; i < 3; i++) { led = (data >> (i * 5)) & E1000_PHY_LED0_MASK; if ((led & E1000_PHY_LED0_MODE_MASK) != E1000_LEDCTL_MODE_LINK_UP) continue; if (led & E1000_PHY_LED0_IVRT) data &= ~(E1000_PHY_LED0_IVRT << (i * 5)); else data |= (E1000_PHY_LED0_IVRT << (i * 5)); } } return hw->phy.ops.write_reg(hw, HV_LED_CONFIG, data); } /** * e1000_get_cfg_done_ich8lan - Read config done bit after Full or PHY reset * @hw: pointer to the HW structure * * Read appropriate register for the config done bit for completion status * and configure the PHY through s/w for EEPROM-less parts. * * NOTE: some silicon which is EEPROM-less will fail trying to read the * config done bit, so only an error is logged and continues. If we were * to return with error, EEPROM-less silicon would not be able to be reset * or change link. **/ static s32 e1000_get_cfg_done_ich8lan(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; u32 bank = 0; u32 status; DEBUGFUNC("e1000_get_cfg_done_ich8lan"); e1000_get_cfg_done_generic(hw); /* Wait for indication from h/w that it has completed basic config */ if (hw->mac.type >= e1000_ich10lan) { e1000_lan_init_done_ich8lan(hw); } else { ret_val = e1000_get_auto_rd_done_generic(hw); if (ret_val) { /* When auto config read does not complete, do not * return with an error. This can happen in situations * where there is no eeprom and prevents getting link. */ DEBUGOUT("Auto Read Done did not complete\n"); ret_val = E1000_SUCCESS; } } /* Clear PHY Reset Asserted bit */ status = E1000_READ_REG(hw, E1000_STATUS); if (status & E1000_STATUS_PHYRA) E1000_WRITE_REG(hw, E1000_STATUS, status & ~E1000_STATUS_PHYRA); else DEBUGOUT("PHY Reset Asserted not set - needs delay\n"); /* If EEPROM is not marked present, init the IGP 3 PHY manually */ if (hw->mac.type <= e1000_ich9lan) { if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES) && (hw->phy.type == e1000_phy_igp_3)) { e1000_phy_init_script_igp3(hw); } } else { if (e1000_valid_nvm_bank_detect_ich8lan(hw, &bank)) { /* Maybe we should do a basic PHY config */ DEBUGOUT("EEPROM not present\n"); ret_val = -E1000_ERR_CONFIG; } } return ret_val; } /** * e1000_power_down_phy_copper_ich8lan - Remove link during PHY power down * @hw: pointer to the HW structure * * In the case of a PHY power down to save power, or to turn off link during a * driver unload, or wake on lan is not enabled, remove the link. **/ static void e1000_power_down_phy_copper_ich8lan(struct e1000_hw *hw) { /* If the management interface is not enabled, then power down */ if (!(hw->mac.ops.check_mng_mode(hw) || hw->phy.ops.check_reset_block(hw))) e1000_power_down_phy_copper(hw); return; } /** * e1000_clear_hw_cntrs_ich8lan - Clear statistical counters * @hw: pointer to the HW structure * * Clears hardware counters specific to the silicon family and calls * clear_hw_cntrs_generic to clear all general purpose counters. **/ static void e1000_clear_hw_cntrs_ich8lan(struct e1000_hw *hw) { u16 phy_data; s32 ret_val; DEBUGFUNC("e1000_clear_hw_cntrs_ich8lan"); e1000_clear_hw_cntrs_base_generic(hw); E1000_READ_REG(hw, E1000_ALGNERRC); E1000_READ_REG(hw, E1000_RXERRC); E1000_READ_REG(hw, E1000_TNCRS); E1000_READ_REG(hw, E1000_CEXTERR); E1000_READ_REG(hw, E1000_TSCTC); E1000_READ_REG(hw, E1000_TSCTFC); E1000_READ_REG(hw, E1000_MGTPRC); E1000_READ_REG(hw, E1000_MGTPDC); E1000_READ_REG(hw, E1000_MGTPTC); E1000_READ_REG(hw, E1000_IAC); E1000_READ_REG(hw, E1000_ICRXOC); /* Clear PHY statistics registers */ if ((hw->phy.type == e1000_phy_82578) || (hw->phy.type == e1000_phy_82579) || (hw->phy.type == e1000_phy_i217) || (hw->phy.type == e1000_phy_82577)) { ret_val = hw->phy.ops.acquire(hw); if (ret_val) return; ret_val = hw->phy.ops.set_page(hw, HV_STATS_PAGE << IGP_PAGE_SHIFT); if (ret_val) goto release; hw->phy.ops.read_reg_page(hw, HV_SCC_UPPER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_SCC_LOWER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_ECOL_UPPER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_ECOL_LOWER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_MCC_UPPER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_MCC_LOWER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_LATECOL_UPPER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_LATECOL_LOWER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_COLC_UPPER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_COLC_LOWER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_DC_UPPER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_DC_LOWER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_TNCRS_UPPER, &phy_data); hw->phy.ops.read_reg_page(hw, HV_TNCRS_LOWER, &phy_data); release: hw->phy.ops.release(hw); } } Index: head/sys/dev/e1000/e1000_mac.c =================================================================== --- head/sys/dev/e1000/e1000_mac.c (revision 323515) +++ head/sys/dev/e1000/e1000_mac.c (revision 323516) @@ -1,2253 +1,2366 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "e1000_api.h" static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw); static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw); static void e1000_config_collision_dist_generic(struct e1000_hw *hw); static int e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index); /** * e1000_init_mac_ops_generic - Initialize MAC function pointers * @hw: pointer to the HW structure * * Setups up the function pointers to no-op functions **/ void e1000_init_mac_ops_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; DEBUGFUNC("e1000_init_mac_ops_generic"); /* General Setup */ mac->ops.init_params = e1000_null_ops_generic; mac->ops.init_hw = e1000_null_ops_generic; mac->ops.reset_hw = e1000_null_ops_generic; mac->ops.setup_physical_interface = e1000_null_ops_generic; mac->ops.get_bus_info = e1000_null_ops_generic; mac->ops.set_lan_id = e1000_set_lan_id_multi_port_pcie; mac->ops.read_mac_addr = e1000_read_mac_addr_generic; mac->ops.config_collision_dist = e1000_config_collision_dist_generic; mac->ops.clear_hw_cntrs = e1000_null_mac_generic; /* LED */ mac->ops.cleanup_led = e1000_null_ops_generic; mac->ops.setup_led = e1000_null_ops_generic; mac->ops.blink_led = e1000_null_ops_generic; mac->ops.led_on = e1000_null_ops_generic; mac->ops.led_off = e1000_null_ops_generic; /* LINK */ mac->ops.setup_link = e1000_null_ops_generic; mac->ops.get_link_up_info = e1000_null_link_info; mac->ops.check_for_link = e1000_null_ops_generic; mac->ops.set_obff_timer = e1000_null_set_obff_timer; /* Management */ mac->ops.check_mng_mode = e1000_null_mng_mode; /* VLAN, MC, etc. */ mac->ops.update_mc_addr_list = e1000_null_update_mc; mac->ops.clear_vfta = e1000_null_mac_generic; mac->ops.write_vfta = e1000_null_write_vfta; mac->ops.rar_set = e1000_rar_set_generic; mac->ops.validate_mdi_setting = e1000_validate_mdi_setting_generic; } /** * e1000_null_ops_generic - No-op function, returns 0 * @hw: pointer to the HW structure **/ s32 e1000_null_ops_generic(struct e1000_hw E1000_UNUSEDARG *hw) { DEBUGFUNC("e1000_null_ops_generic"); return E1000_SUCCESS; } /** * e1000_null_mac_generic - No-op function, return void * @hw: pointer to the HW structure **/ void e1000_null_mac_generic(struct e1000_hw E1000_UNUSEDARG *hw) { DEBUGFUNC("e1000_null_mac_generic"); return; } /** * e1000_null_link_info - No-op function, return 0 * @hw: pointer to the HW structure **/ s32 e1000_null_link_info(struct e1000_hw E1000_UNUSEDARG *hw, u16 E1000_UNUSEDARG *s, u16 E1000_UNUSEDARG *d) { DEBUGFUNC("e1000_null_link_info"); return E1000_SUCCESS; } /** * e1000_null_mng_mode - No-op function, return FALSE * @hw: pointer to the HW structure **/ bool e1000_null_mng_mode(struct e1000_hw E1000_UNUSEDARG *hw) { DEBUGFUNC("e1000_null_mng_mode"); return FALSE; } /** * e1000_null_update_mc - No-op function, return void * @hw: pointer to the HW structure **/ void e1000_null_update_mc(struct e1000_hw E1000_UNUSEDARG *hw, u8 E1000_UNUSEDARG *h, u32 E1000_UNUSEDARG a) { DEBUGFUNC("e1000_null_update_mc"); return; } /** * e1000_null_write_vfta - No-op function, return void * @hw: pointer to the HW structure **/ void e1000_null_write_vfta(struct e1000_hw E1000_UNUSEDARG *hw, u32 E1000_UNUSEDARG a, u32 E1000_UNUSEDARG b) { DEBUGFUNC("e1000_null_write_vfta"); return; } /** * e1000_null_rar_set - No-op function, return 0 * @hw: pointer to the HW structure **/ int e1000_null_rar_set(struct e1000_hw E1000_UNUSEDARG *hw, u8 E1000_UNUSEDARG *h, u32 E1000_UNUSEDARG a) { DEBUGFUNC("e1000_null_rar_set"); return E1000_SUCCESS; } /** * e1000_null_set_obff_timer - No-op function, return 0 * @hw: pointer to the HW structure **/ s32 e1000_null_set_obff_timer(struct e1000_hw E1000_UNUSEDARG *hw, u32 E1000_UNUSEDARG a) { DEBUGFUNC("e1000_null_set_obff_timer"); return E1000_SUCCESS; } /** * e1000_get_bus_info_pci_generic - Get PCI(x) bus information * @hw: pointer to the HW structure * * Determines and stores the system bus information for a particular * network interface. The following bus information is determined and stored: * bus speed, bus width, type (PCI/PCIx), and PCI(-x) function. **/ s32 e1000_get_bus_info_pci_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; struct e1000_bus_info *bus = &hw->bus; u32 status = E1000_READ_REG(hw, E1000_STATUS); s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_get_bus_info_pci_generic"); /* PCI or PCI-X? */ bus->type = (status & E1000_STATUS_PCIX_MODE) ? e1000_bus_type_pcix : e1000_bus_type_pci; /* Bus speed */ if (bus->type == e1000_bus_type_pci) { bus->speed = (status & E1000_STATUS_PCI66) ? e1000_bus_speed_66 : e1000_bus_speed_33; } else { switch (status & E1000_STATUS_PCIX_SPEED) { case E1000_STATUS_PCIX_SPEED_66: bus->speed = e1000_bus_speed_66; break; case E1000_STATUS_PCIX_SPEED_100: bus->speed = e1000_bus_speed_100; break; case E1000_STATUS_PCIX_SPEED_133: bus->speed = e1000_bus_speed_133; break; default: bus->speed = e1000_bus_speed_reserved; break; } } /* Bus width */ bus->width = (status & E1000_STATUS_BUS64) ? e1000_bus_width_64 : e1000_bus_width_32; /* Which PCI(-X) function? */ mac->ops.set_lan_id(hw); return ret_val; } /** * e1000_get_bus_info_pcie_generic - Get PCIe bus information * @hw: pointer to the HW structure * * Determines and stores the system bus information for a particular * network interface. The following bus information is determined and stored: * bus speed, bus width, type (PCIe), and PCIe function. **/ s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; struct e1000_bus_info *bus = &hw->bus; s32 ret_val; u16 pcie_link_status; DEBUGFUNC("e1000_get_bus_info_pcie_generic"); bus->type = e1000_bus_type_pci_express; ret_val = e1000_read_pcie_cap_reg(hw, PCIE_LINK_STATUS, &pcie_link_status); if (ret_val) { bus->width = e1000_bus_width_unknown; bus->speed = e1000_bus_speed_unknown; } else { switch (pcie_link_status & PCIE_LINK_SPEED_MASK) { case PCIE_LINK_SPEED_2500: bus->speed = e1000_bus_speed_2500; break; case PCIE_LINK_SPEED_5000: bus->speed = e1000_bus_speed_5000; break; default: bus->speed = e1000_bus_speed_unknown; break; } bus->width = (enum e1000_bus_width)((pcie_link_status & PCIE_LINK_WIDTH_MASK) >> PCIE_LINK_WIDTH_SHIFT); } mac->ops.set_lan_id(hw); return E1000_SUCCESS; } /** * e1000_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices * * @hw: pointer to the HW structure * * Determines the LAN function id by reading memory-mapped registers * and swaps the port value if requested. **/ static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw) { struct e1000_bus_info *bus = &hw->bus; u32 reg; /* The status register reports the correct function number * for the device regardless of function swap state. */ reg = E1000_READ_REG(hw, E1000_STATUS); bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT; } /** * e1000_set_lan_id_multi_port_pci - Set LAN id for PCI multiple port devices * @hw: pointer to the HW structure * * Determines the LAN function id by reading PCI config space. **/ void e1000_set_lan_id_multi_port_pci(struct e1000_hw *hw) { struct e1000_bus_info *bus = &hw->bus; u16 pci_header_type; u32 status; e1000_read_pci_cfg(hw, PCI_HEADER_TYPE_REGISTER, &pci_header_type); if (pci_header_type & PCI_HEADER_TYPE_MULTIFUNC) { status = E1000_READ_REG(hw, E1000_STATUS); bus->func = (status & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT; } else { bus->func = 0; } } /** * e1000_set_lan_id_single_port - Set LAN id for a single port device * @hw: pointer to the HW structure * * Sets the LAN function id to zero for a single port device. **/ void e1000_set_lan_id_single_port(struct e1000_hw *hw) { struct e1000_bus_info *bus = &hw->bus; bus->func = 0; } /** * e1000_clear_vfta_generic - Clear VLAN filter table * @hw: pointer to the HW structure * * Clears the register array which contains the VLAN filter table by * setting all the values to 0. **/ void e1000_clear_vfta_generic(struct e1000_hw *hw) { u32 offset; DEBUGFUNC("e1000_clear_vfta_generic"); for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0); E1000_WRITE_FLUSH(hw); } } /** * e1000_write_vfta_generic - Write value to VLAN filter table * @hw: pointer to the HW structure * @offset: register offset in VLAN filter table * @value: register value written to VLAN filter table * * Writes value at the given offset in the register array which stores * the VLAN filter table. **/ void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value) { DEBUGFUNC("e1000_write_vfta_generic"); E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value); E1000_WRITE_FLUSH(hw); } /** * e1000_init_rx_addrs_generic - Initialize receive address's * @hw: pointer to the HW structure * @rar_count: receive address registers * * Setup the receive address registers by setting the base receive address * register to the devices MAC address and clearing all the other receive * address registers to 0. **/ void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count) { u32 i; u8 mac_addr[ETH_ADDR_LEN] = {0}; DEBUGFUNC("e1000_init_rx_addrs_generic"); /* Setup the receive address */ DEBUGOUT("Programming MAC Address into RAR[0]\n"); hw->mac.ops.rar_set(hw, hw->mac.addr, 0); /* Zero out the other (rar_entry_count - 1) receive addresses */ DEBUGOUT1("Clearing RAR[1-%u]\n", rar_count-1); for (i = 1; i < rar_count; i++) hw->mac.ops.rar_set(hw, mac_addr, i); } /** * e1000_check_alt_mac_addr_generic - Check for alternate MAC addr * @hw: pointer to the HW structure * * Checks the nvm for an alternate MAC address. An alternate MAC address * can be setup by pre-boot software and must be treated like a permanent * address and must override the actual permanent MAC address. If an * alternate MAC address is found it is programmed into RAR0, replacing * the permanent address that was installed into RAR0 by the Si on reset. * This function will return SUCCESS unless it encounters an error while * reading the EEPROM. **/ s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw) { u32 i; s32 ret_val; u16 offset, nvm_alt_mac_addr_offset, nvm_data; u8 alt_mac_addr[ETH_ADDR_LEN]; DEBUGFUNC("e1000_check_alt_mac_addr_generic"); ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &nvm_data); if (ret_val) return ret_val; /* not supported on older hardware or 82573 */ if ((hw->mac.type < e1000_82571) || (hw->mac.type == e1000_82573)) return E1000_SUCCESS; /* Alternate MAC address is handled by the option ROM for 82580 * and newer. SW support not required. */ if (hw->mac.type >= e1000_82580) return E1000_SUCCESS; ret_val = hw->nvm.ops.read(hw, NVM_ALT_MAC_ADDR_PTR, 1, &nvm_alt_mac_addr_offset); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } if ((nvm_alt_mac_addr_offset == 0xFFFF) || (nvm_alt_mac_addr_offset == 0x0000)) /* There is no Alternate MAC Address */ return E1000_SUCCESS; if (hw->bus.func == E1000_FUNC_1) nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN1; if (hw->bus.func == E1000_FUNC_2) nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN2; if (hw->bus.func == E1000_FUNC_3) nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN3; for (i = 0; i < ETH_ADDR_LEN; i += 2) { offset = nvm_alt_mac_addr_offset + (i >> 1); ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } alt_mac_addr[i] = (u8)(nvm_data & 0xFF); alt_mac_addr[i + 1] = (u8)(nvm_data >> 8); } /* if multicast bit is set, the alternate address will not be used */ if (alt_mac_addr[0] & 0x01) { DEBUGOUT("Ignoring Alternate Mac Address with MC bit set\n"); return E1000_SUCCESS; } /* We have a valid alternate MAC address, and we want to treat it the * same as the normal permanent MAC address stored by the HW into the * RAR. Do this by mapping this address into RAR0. */ hw->mac.ops.rar_set(hw, alt_mac_addr, 0); return E1000_SUCCESS; } /** * e1000_rar_set_generic - Set receive address register * @hw: pointer to the HW structure * @addr: pointer to the receive address * @index: receive address array register * * Sets the receive address array register at index to the address passed * in by addr. **/ static int e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index) { u32 rar_low, rar_high; DEBUGFUNC("e1000_rar_set_generic"); /* HW expects these in little endian so we reverse the byte order * from network order (big endian) to little endian */ rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); /* If MAC address zero, no need to set the AV bit */ if (rar_low || rar_high) rar_high |= E1000_RAH_AV; /* Some bridges will combine consecutive 32-bit writes into * a single burst write, which will malfunction on some parts. * The flushes avoid this. */ E1000_WRITE_REG(hw, E1000_RAL(index), rar_low); E1000_WRITE_FLUSH(hw); E1000_WRITE_REG(hw, E1000_RAH(index), rar_high); E1000_WRITE_FLUSH(hw); return E1000_SUCCESS; } /** * e1000_hash_mc_addr_generic - Generate a multicast hash value * @hw: pointer to the HW structure * @mc_addr: pointer to a multicast address * * Generates a multicast address hash value which is used to determine * the multicast filter table array address and new table value. **/ u32 e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr) { u32 hash_value, hash_mask; u8 bit_shift = 0; DEBUGFUNC("e1000_hash_mc_addr_generic"); /* Register count multiplied by bits per register */ hash_mask = (hw->mac.mta_reg_count * 32) - 1; /* For a mc_filter_type of 0, bit_shift is the number of left-shifts * where 0xFF would still fall within the hash mask. */ while (hash_mask >> bit_shift != 0xFF) bit_shift++; /* The portion of the address that is used for the hash table * is determined by the mc_filter_type setting. * The algorithm is such that there is a total of 8 bits of shifting. * The bit_shift for a mc_filter_type of 0 represents the number of * left-shifts where the MSB of mc_addr[5] would still fall within * the hash_mask. Case 0 does this exactly. Since there are a total * of 8 bits of shifting, then mc_addr[4] will shift right the * remaining number of bits. Thus 8 - bit_shift. The rest of the * cases are a variation of this algorithm...essentially raising the * number of bits to shift mc_addr[5] left, while still keeping the * 8-bit shifting total. * * For example, given the following Destination MAC Address and an * mta register count of 128 (thus a 4096-bit vector and 0xFFF mask), * we can see that the bit_shift for case 0 is 4. These are the hash * values resulting from each mc_filter_type... * [0] [1] [2] [3] [4] [5] * 01 AA 00 12 34 56 * LSB MSB * * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563 * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6 * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163 * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634 */ switch (hw->mac.mc_filter_type) { default: case 0: break; case 1: bit_shift += 1; break; case 2: bit_shift += 2; break; case 3: bit_shift += 4; break; } hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) | (((u16) mc_addr[5]) << bit_shift))); return hash_value; } /** * e1000_update_mc_addr_list_generic - Update Multicast addresses * @hw: pointer to the HW structure * @mc_addr_list: array of multicast addresses to program * @mc_addr_count: number of multicast addresses to program * * Updates entire Multicast Table Array. * The caller must have a packed mc_addr_list of multicast addresses. **/ void e1000_update_mc_addr_list_generic(struct e1000_hw *hw, u8 *mc_addr_list, u32 mc_addr_count) { u32 hash_value, hash_bit, hash_reg; int i; DEBUGFUNC("e1000_update_mc_addr_list_generic"); /* clear mta_shadow */ memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow)); /* update mta_shadow from mc_addr_list */ for (i = 0; (u32) i < mc_addr_count; i++) { hash_value = e1000_hash_mc_addr_generic(hw, mc_addr_list); hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1); hash_bit = hash_value & 0x1F; hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit); mc_addr_list += (ETH_ADDR_LEN); } /* replace the entire MTA table */ for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, hw->mac.mta_shadow[i]); E1000_WRITE_FLUSH(hw); } /** * e1000_pcix_mmrbc_workaround_generic - Fix incorrect MMRBC value * @hw: pointer to the HW structure * * In certain situations, a system BIOS may report that the PCIx maximum * memory read byte count (MMRBC) value is higher than than the actual * value. We check the PCIx command register with the current PCIx status * register. **/ void e1000_pcix_mmrbc_workaround_generic(struct e1000_hw *hw) { u16 cmd_mmrbc; u16 pcix_cmd; u16 pcix_stat_hi_word; u16 stat_mmrbc; DEBUGFUNC("e1000_pcix_mmrbc_workaround_generic"); /* Workaround for PCI-X issue when BIOS sets MMRBC incorrectly */ if (hw->bus.type != e1000_bus_type_pcix) return; e1000_read_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd); e1000_read_pci_cfg(hw, PCIX_STATUS_REGISTER_HI, &pcix_stat_hi_word); cmd_mmrbc = (pcix_cmd & PCIX_COMMAND_MMRBC_MASK) >> PCIX_COMMAND_MMRBC_SHIFT; stat_mmrbc = (pcix_stat_hi_word & PCIX_STATUS_HI_MMRBC_MASK) >> PCIX_STATUS_HI_MMRBC_SHIFT; if (stat_mmrbc == PCIX_STATUS_HI_MMRBC_4K) stat_mmrbc = PCIX_STATUS_HI_MMRBC_2K; if (cmd_mmrbc > stat_mmrbc) { pcix_cmd &= ~PCIX_COMMAND_MMRBC_MASK; pcix_cmd |= stat_mmrbc << PCIX_COMMAND_MMRBC_SHIFT; e1000_write_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd); } } /** * e1000_clear_hw_cntrs_base_generic - Clear base hardware counters * @hw: pointer to the HW structure * * Clears the base hardware counters by reading the counter registers. **/ void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw) { DEBUGFUNC("e1000_clear_hw_cntrs_base_generic"); E1000_READ_REG(hw, E1000_CRCERRS); E1000_READ_REG(hw, E1000_SYMERRS); E1000_READ_REG(hw, E1000_MPC); E1000_READ_REG(hw, E1000_SCC); E1000_READ_REG(hw, E1000_ECOL); E1000_READ_REG(hw, E1000_MCC); E1000_READ_REG(hw, E1000_LATECOL); E1000_READ_REG(hw, E1000_COLC); E1000_READ_REG(hw, E1000_DC); E1000_READ_REG(hw, E1000_SEC); E1000_READ_REG(hw, E1000_RLEC); E1000_READ_REG(hw, E1000_XONRXC); E1000_READ_REG(hw, E1000_XONTXC); E1000_READ_REG(hw, E1000_XOFFRXC); E1000_READ_REG(hw, E1000_XOFFTXC); E1000_READ_REG(hw, E1000_FCRUC); E1000_READ_REG(hw, E1000_GPRC); E1000_READ_REG(hw, E1000_BPRC); E1000_READ_REG(hw, E1000_MPRC); E1000_READ_REG(hw, E1000_GPTC); E1000_READ_REG(hw, E1000_GORCL); E1000_READ_REG(hw, E1000_GORCH); E1000_READ_REG(hw, E1000_GOTCL); E1000_READ_REG(hw, E1000_GOTCH); E1000_READ_REG(hw, E1000_RNBC); E1000_READ_REG(hw, E1000_RUC); E1000_READ_REG(hw, E1000_RFC); E1000_READ_REG(hw, E1000_ROC); E1000_READ_REG(hw, E1000_RJC); E1000_READ_REG(hw, E1000_TORL); E1000_READ_REG(hw, E1000_TORH); E1000_READ_REG(hw, E1000_TOTL); E1000_READ_REG(hw, E1000_TOTH); E1000_READ_REG(hw, E1000_TPR); E1000_READ_REG(hw, E1000_TPT); E1000_READ_REG(hw, E1000_MPTC); E1000_READ_REG(hw, E1000_BPTC); } /** * e1000_check_for_copper_link_generic - Check for link (Copper) * @hw: pointer to the HW structure * * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. **/ s32 e1000_check_for_copper_link_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; s32 ret_val; bool link; DEBUGFUNC("e1000_check_for_copper_link"); /* We only want to go out to the PHY registers to see if Auto-Neg * has completed and/or if our link status has changed. The * get_link_status flag is set upon receiving a Link Status * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) return E1000_SUCCESS; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex * of the PHY. */ ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); if (ret_val) return ret_val; if (!link) return E1000_SUCCESS; /* No link detected */ mac->get_link_status = FALSE; /* Check if there was DownShift, must be checked * immediately after link-up */ e1000_check_downshift_generic(hw); /* If we are forcing speed/duplex, then we simply return since * we have already determined whether we have link or not. */ if (!mac->autoneg) return -E1000_ERR_CONFIG; /* Auto-Neg is enabled. Auto Speed Detection takes care * of MAC speed/duplex configuration. So we only need to * configure Collision Distance in the MAC. */ mac->ops.config_collision_dist(hw); /* Configure Flow Control now that Auto-Neg has completed. * First, we need to restore the desired flow control * settings because we may have had to re-autoneg with a * different link partner. */ ret_val = e1000_config_fc_after_link_up_generic(hw); if (ret_val) DEBUGOUT("Error configuring flow control\n"); return ret_val; } /** * e1000_check_for_fiber_link_generic - Check for link (Fiber) * @hw: pointer to the HW structure * * Checks for link up on the hardware. If link is not up and we have * a signal, then we need to force link up. **/ s32 e1000_check_for_fiber_link_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 rxcw; u32 ctrl; u32 status; s32 ret_val; DEBUGFUNC("e1000_check_for_fiber_link_generic"); ctrl = E1000_READ_REG(hw, E1000_CTRL); status = E1000_READ_REG(hw, E1000_STATUS); rxcw = E1000_READ_REG(hw, E1000_RXCW); /* If we don't have link (auto-negotiation failed or link partner * cannot auto-negotiate), the cable is plugged in (we have signal), * and our link partner is not trying to auto-negotiate with us (we * are receiving idles or data), we need to force link up. We also * need to give auto-negotiation time to complete, in case the cable * was just plugged in. The autoneg_failed flag does this. */ /* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */ if ((ctrl & E1000_CTRL_SWDPIN1) && !(status & E1000_STATUS_LU) && !(rxcw & E1000_RXCW_C)) { if (!mac->autoneg_failed) { mac->autoneg_failed = TRUE; return E1000_SUCCESS; } DEBUGOUT("NOT Rx'ing /C/, disable AutoNeg and force link.\n"); /* Disable auto-negotiation in the TXCW register */ E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE)); /* Force link-up and also force full-duplex. */ ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); /* Configure Flow Control after forcing link up. */ ret_val = e1000_config_fc_after_link_up_generic(hw); if (ret_val) { DEBUGOUT("Error configuring flow control\n"); return ret_val; } } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { /* If we are forcing link and we are receiving /C/ ordered * sets, re-enable auto-negotiation in the TXCW register * and disable forced link in the Device Control register * in an attempt to auto-negotiate with our link partner. */ DEBUGOUT("Rx'ing /C/, enable AutoNeg and stop forcing link.\n"); E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw); E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU)); mac->serdes_has_link = TRUE; } return E1000_SUCCESS; } /** * e1000_check_for_serdes_link_generic - Check for link (Serdes) * @hw: pointer to the HW structure * * Checks for link up on the hardware. If link is not up and we have * a signal, then we need to force link up. **/ s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 rxcw; u32 ctrl; u32 status; s32 ret_val; DEBUGFUNC("e1000_check_for_serdes_link_generic"); ctrl = E1000_READ_REG(hw, E1000_CTRL); status = E1000_READ_REG(hw, E1000_STATUS); rxcw = E1000_READ_REG(hw, E1000_RXCW); /* If we don't have link (auto-negotiation failed or link partner * cannot auto-negotiate), and our link partner is not trying to * auto-negotiate with us (we are receiving idles or data), * we need to force link up. We also need to give auto-negotiation * time to complete. */ /* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */ if (!(status & E1000_STATUS_LU) && !(rxcw & E1000_RXCW_C)) { if (!mac->autoneg_failed) { mac->autoneg_failed = TRUE; return E1000_SUCCESS; } DEBUGOUT("NOT Rx'ing /C/, disable AutoNeg and force link.\n"); /* Disable auto-negotiation in the TXCW register */ E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE)); /* Force link-up and also force full-duplex. */ ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); /* Configure Flow Control after forcing link up. */ ret_val = e1000_config_fc_after_link_up_generic(hw); if (ret_val) { DEBUGOUT("Error configuring flow control\n"); return ret_val; } } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { /* If we are forcing link and we are receiving /C/ ordered * sets, re-enable auto-negotiation in the TXCW register * and disable forced link in the Device Control register * in an attempt to auto-negotiate with our link partner. */ DEBUGOUT("Rx'ing /C/, enable AutoNeg and stop forcing link.\n"); E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw); E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU)); mac->serdes_has_link = TRUE; } else if (!(E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW))) { /* If we force link for non-auto-negotiation switch, check * link status based on MAC synchronization for internal * serdes media type. */ /* SYNCH bit and IV bit are sticky. */ usec_delay(10); rxcw = E1000_READ_REG(hw, E1000_RXCW); if (rxcw & E1000_RXCW_SYNCH) { if (!(rxcw & E1000_RXCW_IV)) { mac->serdes_has_link = TRUE; DEBUGOUT("SERDES: Link up - forced.\n"); } } else { mac->serdes_has_link = FALSE; DEBUGOUT("SERDES: Link down - force failed.\n"); } } if (E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW)) { status = E1000_READ_REG(hw, E1000_STATUS); if (status & E1000_STATUS_LU) { /* SYNCH bit and IV bit are sticky, so reread rxcw. */ usec_delay(10); rxcw = E1000_READ_REG(hw, E1000_RXCW); if (rxcw & E1000_RXCW_SYNCH) { if (!(rxcw & E1000_RXCW_IV)) { mac->serdes_has_link = TRUE; DEBUGOUT("SERDES: Link up - autoneg completed successfully.\n"); } else { mac->serdes_has_link = FALSE; DEBUGOUT("SERDES: Link down - invalid codewords detected in autoneg.\n"); } } else { mac->serdes_has_link = FALSE; DEBUGOUT("SERDES: Link down - no sync.\n"); } } else { mac->serdes_has_link = FALSE; DEBUGOUT("SERDES: Link down - autoneg failed\n"); } } return E1000_SUCCESS; } /** * e1000_set_default_fc_generic - Set flow control default values * @hw: pointer to the HW structure * * Read the EEPROM for the default values for flow control and store the * values. **/ s32 e1000_set_default_fc_generic(struct e1000_hw *hw) { s32 ret_val; u16 nvm_data; u16 nvm_offset = 0; DEBUGFUNC("e1000_set_default_fc_generic"); /* Read and store word 0x0F of the EEPROM. This word contains bits * that determine the hardware's default PAUSE (flow control) mode, * a bit that determines whether the HW defaults to enabling or * disabling auto-negotiation, and the direction of the * SW defined pins. If there is no SW over-ride of the flow * control setting, then the variable hw->fc will * be initialized based on a value in the EEPROM. */ if (hw->mac.type == e1000_i350) { nvm_offset = NVM_82580_LAN_FUNC_OFFSET(hw->bus.func); ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG + nvm_offset, 1, &nvm_data); } else { ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG, 1, &nvm_data); } if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } if (!(nvm_data & NVM_WORD0F_PAUSE_MASK)) hw->fc.requested_mode = e1000_fc_none; else if ((nvm_data & NVM_WORD0F_PAUSE_MASK) == NVM_WORD0F_ASM_DIR) hw->fc.requested_mode = e1000_fc_tx_pause; else hw->fc.requested_mode = e1000_fc_full; return E1000_SUCCESS; } /** * e1000_setup_link_generic - Setup flow control and link settings * @hw: pointer to the HW structure * * Determines which flow control settings to use, then configures flow * control. Calls the appropriate media-specific link configuration * function. Assuming the adapter has a valid link partner, a valid link * should be established. Assumes the hardware has previously been reset * and the transmitter and receiver are not enabled. **/ s32 e1000_setup_link_generic(struct e1000_hw *hw) { s32 ret_val; DEBUGFUNC("e1000_setup_link_generic"); /* In the case of the phy reset being blocked, we already have a link. * We do not need to set it up again. */ if (hw->phy.ops.check_reset_block && hw->phy.ops.check_reset_block(hw)) return E1000_SUCCESS; /* If requested flow control is set to default, set flow control * based on the EEPROM flow control settings. */ if (hw->fc.requested_mode == e1000_fc_default) { ret_val = e1000_set_default_fc_generic(hw); if (ret_val) return ret_val; } /* Save off the requested flow control mode for use later. Depending * on the link partner's capabilities, we may or may not use this mode. */ hw->fc.current_mode = hw->fc.requested_mode; DEBUGOUT1("After fix-ups FlowControl is now = %x\n", hw->fc.current_mode); /* Call the necessary media_type subroutine to configure the link. */ ret_val = hw->mac.ops.setup_physical_interface(hw); if (ret_val) return ret_val; /* Initialize the flow control address, type, and PAUSE timer * registers to their default values. This is done even if flow * control is disabled, because it does not hurt anything to * initialize these registers. */ DEBUGOUT("Initializing the Flow Control address, type and timer regs\n"); E1000_WRITE_REG(hw, E1000_FCT, FLOW_CONTROL_TYPE); E1000_WRITE_REG(hw, E1000_FCAH, FLOW_CONTROL_ADDRESS_HIGH); E1000_WRITE_REG(hw, E1000_FCAL, FLOW_CONTROL_ADDRESS_LOW); E1000_WRITE_REG(hw, E1000_FCTTV, hw->fc.pause_time); return e1000_set_fc_watermarks_generic(hw); } /** * e1000_commit_fc_settings_generic - Configure flow control * @hw: pointer to the HW structure * * Write the flow control settings to the Transmit Config Word Register (TXCW) * base on the flow control settings in e1000_mac_info. **/ s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 txcw; DEBUGFUNC("e1000_commit_fc_settings_generic"); /* Check for a software override of the flow control settings, and * setup the device accordingly. If auto-negotiation is enabled, then * software will have to set the "PAUSE" bits to the correct value in * the Transmit Config Word Register (TXCW) and re-start auto- * negotiation. However, if auto-negotiation is disabled, then * software will have to manually configure the two flow control enable * bits in the CTRL register. * * The possible values of the "fc" parameter are: * 0: Flow control is completely disabled * 1: Rx flow control is enabled (we can receive pause frames, * but not send pause frames). * 2: Tx flow control is enabled (we can send pause frames but we * do not support receiving pause frames). * 3: Both Rx and Tx flow control (symmetric) are enabled. */ switch (hw->fc.current_mode) { case e1000_fc_none: /* Flow control completely disabled by a software over-ride. */ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD); break; case e1000_fc_rx_pause: /* Rx Flow control is enabled and Tx Flow control is disabled * by a software over-ride. Since there really isn't a way to * advertise that we are capable of Rx Pause ONLY, we will * advertise that we support both symmetric and asymmetric Rx * PAUSE. Later, we will disable the adapter's ability to send * PAUSE frames. */ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); break; case e1000_fc_tx_pause: /* Tx Flow control is enabled, and Rx Flow control is disabled, * by a software over-ride. */ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR); break; case e1000_fc_full: /* Flow control (both Rx and Tx) is enabled by a software * over-ride. */ txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); break; default: DEBUGOUT("Flow control param set incorrectly\n"); return -E1000_ERR_CONFIG; break; } E1000_WRITE_REG(hw, E1000_TXCW, txcw); mac->txcw = txcw; return E1000_SUCCESS; } /** * e1000_poll_fiber_serdes_link_generic - Poll for link up * @hw: pointer to the HW structure * * Polls for link up by reading the status register, if link fails to come * up with auto-negotiation, then the link is forced if a signal is detected. **/ s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; u32 i, status; s32 ret_val; DEBUGFUNC("e1000_poll_fiber_serdes_link_generic"); /* If we have a signal (the cable is plugged in, or assumed TRUE for * serdes media) then poll for a "Link-Up" indication in the Device * Status Register. Time-out if a link isn't seen in 500 milliseconds * seconds (Auto-negotiation should complete in less than 500 * milliseconds even if the other end is doing it in SW). */ for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) { msec_delay(10); status = E1000_READ_REG(hw, E1000_STATUS); if (status & E1000_STATUS_LU) break; } if (i == FIBER_LINK_UP_LIMIT) { DEBUGOUT("Never got a valid link from auto-neg!!!\n"); mac->autoneg_failed = TRUE; /* AutoNeg failed to achieve a link, so we'll call * mac->check_for_link. This routine will force the * link up if we detect a signal. This will allow us to * communicate with non-autonegotiating link partners. */ ret_val = mac->ops.check_for_link(hw); if (ret_val) { DEBUGOUT("Error while checking for link\n"); return ret_val; } mac->autoneg_failed = FALSE; } else { mac->autoneg_failed = FALSE; DEBUGOUT("Valid Link Found\n"); } return E1000_SUCCESS; } /** * e1000_setup_fiber_serdes_link_generic - Setup link for fiber/serdes * @hw: pointer to the HW structure * * Configures collision distance and flow control for fiber and serdes * links. Upon successful setup, poll for link. **/ s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw) { u32 ctrl; s32 ret_val; DEBUGFUNC("e1000_setup_fiber_serdes_link_generic"); ctrl = E1000_READ_REG(hw, E1000_CTRL); /* Take the link out of reset */ ctrl &= ~E1000_CTRL_LRST; hw->mac.ops.config_collision_dist(hw); ret_val = e1000_commit_fc_settings_generic(hw); if (ret_val) return ret_val; /* Since auto-negotiation is enabled, take the link out of reset (the * link will be in reset, because we previously reset the chip). This * will restart auto-negotiation. If auto-negotiation is successful * then the link-up status bit will be set and the flow control enable * bits (RFCE and TFCE) will be set according to their negotiated value. */ DEBUGOUT("Auto-negotiation enabled\n"); E1000_WRITE_REG(hw, E1000_CTRL, ctrl); E1000_WRITE_FLUSH(hw); msec_delay(1); /* For these adapters, the SW definable pin 1 is set when the optics * detect a signal. If we have a signal, then poll for a "Link-Up" * indication. */ if (hw->phy.media_type == e1000_media_type_internal_serdes || (E1000_READ_REG(hw, E1000_CTRL) & E1000_CTRL_SWDPIN1)) { ret_val = e1000_poll_fiber_serdes_link_generic(hw); } else { DEBUGOUT("No signal detected\n"); } return ret_val; } /** * e1000_config_collision_dist_generic - Configure collision distance * @hw: pointer to the HW structure * * Configures the collision distance to the default value and is used * during link setup. **/ static void e1000_config_collision_dist_generic(struct e1000_hw *hw) { u32 tctl; DEBUGFUNC("e1000_config_collision_dist_generic"); tctl = E1000_READ_REG(hw, E1000_TCTL); tctl &= ~E1000_TCTL_COLD; tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT; E1000_WRITE_REG(hw, E1000_TCTL, tctl); E1000_WRITE_FLUSH(hw); } /** * e1000_set_fc_watermarks_generic - Set flow control high/low watermarks * @hw: pointer to the HW structure * * Sets the flow control high/low threshold (watermark) registers. If * flow control XON frame transmission is enabled, then set XON frame * transmission as well. **/ s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw) { u32 fcrtl = 0, fcrth = 0; DEBUGFUNC("e1000_set_fc_watermarks_generic"); /* Set the flow control receive threshold registers. Normally, * these registers will be set to a default threshold that may be * adjusted later by the driver's runtime code. However, if the * ability to transmit pause frames is not enabled, then these * registers will be set to 0. */ if (hw->fc.current_mode & e1000_fc_tx_pause) { /* We need to set up the Receive Threshold high and low water * marks as well as (optionally) enabling the transmission of * XON frames. */ fcrtl = hw->fc.low_water; if (hw->fc.send_xon) fcrtl |= E1000_FCRTL_XONE; fcrth = hw->fc.high_water; } E1000_WRITE_REG(hw, E1000_FCRTL, fcrtl); E1000_WRITE_REG(hw, E1000_FCRTH, fcrth); return E1000_SUCCESS; } /** * e1000_force_mac_fc_generic - Force the MAC's flow control settings * @hw: pointer to the HW structure * * Force the MAC's flow control settings. Sets the TFCE and RFCE bits in the * device control register to reflect the adapter settings. TFCE and RFCE * need to be explicitly set by software when a copper PHY is used because * autonegotiation is managed by the PHY rather than the MAC. Software must * also configure these bits when link is forced on a fiber connection. **/ s32 e1000_force_mac_fc_generic(struct e1000_hw *hw) { u32 ctrl; DEBUGFUNC("e1000_force_mac_fc_generic"); ctrl = E1000_READ_REG(hw, E1000_CTRL); /* Because we didn't get link via the internal auto-negotiation * mechanism (we either forced link or we got link via PHY * auto-neg), we have to manually enable/disable transmit an * receive flow control. * * The "Case" statement below enables/disable flow control * according to the "hw->fc.current_mode" parameter. * * The possible values of the "fc" parameter are: * 0: Flow control is completely disabled * 1: Rx flow control is enabled (we can receive pause * frames but not send pause frames). * 2: Tx flow control is enabled (we can send pause frames * frames but we do not receive pause frames). * 3: Both Rx and Tx flow control (symmetric) is enabled. * other: No other values should be possible at this point. */ DEBUGOUT1("hw->fc.current_mode = %u\n", hw->fc.current_mode); switch (hw->fc.current_mode) { case e1000_fc_none: ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE)); break; case e1000_fc_rx_pause: ctrl &= (~E1000_CTRL_TFCE); ctrl |= E1000_CTRL_RFCE; break; case e1000_fc_tx_pause: ctrl &= (~E1000_CTRL_RFCE); ctrl |= E1000_CTRL_TFCE; break; case e1000_fc_full: ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE); break; default: DEBUGOUT("Flow control param set incorrectly\n"); return -E1000_ERR_CONFIG; } E1000_WRITE_REG(hw, E1000_CTRL, ctrl); return E1000_SUCCESS; } /** * e1000_config_fc_after_link_up_generic - Configures flow control after link * @hw: pointer to the HW structure * * Checks the status of auto-negotiation after link up to ensure that the * speed and duplex were not forced. If the link needed to be forced, then * flow control needs to be forced also. If auto-negotiation is enabled * and did not fail, then we configure flow control based on our link * partner. **/ s32 e1000_config_fc_after_link_up_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; s32 ret_val = E1000_SUCCESS; u32 pcs_status_reg, pcs_adv_reg, pcs_lp_ability_reg, pcs_ctrl_reg; u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg; u16 speed, duplex; DEBUGFUNC("e1000_config_fc_after_link_up_generic"); /* Check for the case where we have fiber media and auto-neg failed * so we had to force link. In this case, we need to force the * configuration of the MAC to match the "fc" parameter. */ if (mac->autoneg_failed) { if (hw->phy.media_type == e1000_media_type_fiber || hw->phy.media_type == e1000_media_type_internal_serdes) ret_val = e1000_force_mac_fc_generic(hw); } else { if (hw->phy.media_type == e1000_media_type_copper) ret_val = e1000_force_mac_fc_generic(hw); } if (ret_val) { DEBUGOUT("Error forcing flow control settings\n"); return ret_val; } /* Check for the case where we have copper media and auto-neg is * enabled. In this case, we need to check and see if Auto-Neg * has completed, and if so, how the PHY and link partner has * flow control configured. */ if ((hw->phy.media_type == e1000_media_type_copper) && mac->autoneg) { /* Read the MII Status Register and check to see if AutoNeg * has completed. We read this twice because this reg has * some "sticky" (latched) bits. */ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg); if (ret_val) return ret_val; ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg); if (ret_val) return ret_val; if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) { DEBUGOUT("Copper PHY and Auto Neg has not completed.\n"); return ret_val; } /* The AutoNeg process has completed, so we now need to * read both the Auto Negotiation Advertisement * Register (Address 4) and the Auto_Negotiation Base * Page Ability Register (Address 5) to determine how * flow control was negotiated. */ ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV, &mii_nway_adv_reg); if (ret_val) return ret_val; ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY, &mii_nway_lp_ability_reg); if (ret_val) return ret_val; /* Two bits in the Auto Negotiation Advertisement Register * (Address 4) and two bits in the Auto Negotiation Base * Page Ability Register (Address 5) determine flow control * for both the PHY and the link partner. The following * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, * 1999, describes these PAUSE resolution bits and how flow * control is determined based upon these settings. * NOTE: DC = Don't Care * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution *-------|---------|-------|---------|-------------------- * 0 | 0 | DC | DC | e1000_fc_none * 0 | 1 | 0 | DC | e1000_fc_none * 0 | 1 | 1 | 0 | e1000_fc_none * 0 | 1 | 1 | 1 | e1000_fc_tx_pause * 1 | 0 | 0 | DC | e1000_fc_none * 1 | DC | 1 | DC | e1000_fc_full * 1 | 1 | 0 | 0 | e1000_fc_none * 1 | 1 | 0 | 1 | e1000_fc_rx_pause * * Are both PAUSE bits set to 1? If so, this implies * Symmetric Flow Control is enabled at both ends. The * ASM_DIR bits are irrelevant per the spec. * * For Symmetric Flow Control: * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result *-------|---------|-------|---------|-------------------- * 1 | DC | 1 | DC | E1000_fc_full * */ if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) { /* Now we need to check if the user selected Rx ONLY * of pause frames. In this case, we had to advertise * FULL flow control because we could not advertise Rx * ONLY. Hence, we must now check to see if we need to * turn OFF the TRANSMISSION of PAUSE frames. */ if (hw->fc.requested_mode == e1000_fc_full) { hw->fc.current_mode = e1000_fc_full; DEBUGOUT("Flow Control = FULL.\n"); } else { hw->fc.current_mode = e1000_fc_rx_pause; DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); } } /* For receiving PAUSE frames ONLY. * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result *-------|---------|-------|---------|-------------------- * 0 | 1 | 1 | 1 | e1000_fc_tx_pause */ else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) && (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { hw->fc.current_mode = e1000_fc_tx_pause; DEBUGOUT("Flow Control = Tx PAUSE frames only.\n"); } /* For transmitting PAUSE frames ONLY. * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result *-------|---------|-------|---------|-------------------- * 1 | 1 | 0 | 1 | e1000_fc_rx_pause */ else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { hw->fc.current_mode = e1000_fc_rx_pause; DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); } else { /* Per the IEEE spec, at this point flow control * should be disabled. */ hw->fc.current_mode = e1000_fc_none; DEBUGOUT("Flow Control = NONE.\n"); } /* Now we need to do one last check... If we auto- * negotiated to HALF DUPLEX, flow control should not be * enabled per IEEE 802.3 spec. */ ret_val = mac->ops.get_link_up_info(hw, &speed, &duplex); if (ret_val) { DEBUGOUT("Error getting link speed and duplex\n"); return ret_val; } if (duplex == HALF_DUPLEX) hw->fc.current_mode = e1000_fc_none; /* Now we call a subroutine to actually force the MAC * controller to use the correct flow control settings. */ ret_val = e1000_force_mac_fc_generic(hw); if (ret_val) { DEBUGOUT("Error forcing flow control settings\n"); return ret_val; } } /* Check for the case where we have SerDes media and auto-neg is * enabled. In this case, we need to check and see if Auto-Neg * has completed, and if so, how the PHY and link partner has * flow control configured. */ if ((hw->phy.media_type == e1000_media_type_internal_serdes) && mac->autoneg) { /* Read the PCS_LSTS and check to see if AutoNeg * has completed. */ pcs_status_reg = E1000_READ_REG(hw, E1000_PCS_LSTAT); if (!(pcs_status_reg & E1000_PCS_LSTS_AN_COMPLETE)) { DEBUGOUT("PCS Auto Neg has not completed.\n"); return ret_val; } /* The AutoNeg process has completed, so we now need to * read both the Auto Negotiation Advertisement * Register (PCS_ANADV) and the Auto_Negotiation Base * Page Ability Register (PCS_LPAB) to determine how * flow control was negotiated. */ pcs_adv_reg = E1000_READ_REG(hw, E1000_PCS_ANADV); pcs_lp_ability_reg = E1000_READ_REG(hw, E1000_PCS_LPAB); /* Two bits in the Auto Negotiation Advertisement Register * (PCS_ANADV) and two bits in the Auto Negotiation Base * Page Ability Register (PCS_LPAB) determine flow control * for both the PHY and the link partner. The following * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, * 1999, describes these PAUSE resolution bits and how flow * control is determined based upon these settings. * NOTE: DC = Don't Care * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution *-------|---------|-------|---------|-------------------- * 0 | 0 | DC | DC | e1000_fc_none * 0 | 1 | 0 | DC | e1000_fc_none * 0 | 1 | 1 | 0 | e1000_fc_none * 0 | 1 | 1 | 1 | e1000_fc_tx_pause * 1 | 0 | 0 | DC | e1000_fc_none * 1 | DC | 1 | DC | e1000_fc_full * 1 | 1 | 0 | 0 | e1000_fc_none * 1 | 1 | 0 | 1 | e1000_fc_rx_pause * * Are both PAUSE bits set to 1? If so, this implies * Symmetric Flow Control is enabled at both ends. The * ASM_DIR bits are irrelevant per the spec. * * For Symmetric Flow Control: * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result *-------|---------|-------|---------|-------------------- * 1 | DC | 1 | DC | e1000_fc_full * */ if ((pcs_adv_reg & E1000_TXCW_PAUSE) && (pcs_lp_ability_reg & E1000_TXCW_PAUSE)) { /* Now we need to check if the user selected Rx ONLY * of pause frames. In this case, we had to advertise * FULL flow control because we could not advertise Rx * ONLY. Hence, we must now check to see if we need to * turn OFF the TRANSMISSION of PAUSE frames. */ if (hw->fc.requested_mode == e1000_fc_full) { hw->fc.current_mode = e1000_fc_full; DEBUGOUT("Flow Control = FULL.\n"); } else { hw->fc.current_mode = e1000_fc_rx_pause; DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); } } /* For receiving PAUSE frames ONLY. * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result *-------|---------|-------|---------|-------------------- * 0 | 1 | 1 | 1 | e1000_fc_tx_pause */ else if (!(pcs_adv_reg & E1000_TXCW_PAUSE) && (pcs_adv_reg & E1000_TXCW_ASM_DIR) && (pcs_lp_ability_reg & E1000_TXCW_PAUSE) && (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) { hw->fc.current_mode = e1000_fc_tx_pause; DEBUGOUT("Flow Control = Tx PAUSE frames only.\n"); } /* For transmitting PAUSE frames ONLY. * * LOCAL DEVICE | LINK PARTNER * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result *-------|---------|-------|---------|-------------------- * 1 | 1 | 0 | 1 | e1000_fc_rx_pause */ else if ((pcs_adv_reg & E1000_TXCW_PAUSE) && (pcs_adv_reg & E1000_TXCW_ASM_DIR) && !(pcs_lp_ability_reg & E1000_TXCW_PAUSE) && (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) { hw->fc.current_mode = e1000_fc_rx_pause; DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); } else { /* Per the IEEE spec, at this point flow control * should be disabled. */ hw->fc.current_mode = e1000_fc_none; DEBUGOUT("Flow Control = NONE.\n"); } /* Now we call a subroutine to actually force the MAC * controller to use the correct flow control settings. */ pcs_ctrl_reg = E1000_READ_REG(hw, E1000_PCS_LCTL); pcs_ctrl_reg |= E1000_PCS_LCTL_FORCE_FCTRL; E1000_WRITE_REG(hw, E1000_PCS_LCTL, pcs_ctrl_reg); ret_val = e1000_force_mac_fc_generic(hw); if (ret_val) { DEBUGOUT("Error forcing flow control settings\n"); return ret_val; } } return E1000_SUCCESS; } /** * e1000_get_speed_and_duplex_copper_generic - Retrieve current speed/duplex * @hw: pointer to the HW structure * @speed: stores the current speed * @duplex: stores the current duplex * * Read the status register for the current speed/duplex and store the current * speed and duplex for copper connections. **/ s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed, u16 *duplex) { u32 status; DEBUGFUNC("e1000_get_speed_and_duplex_copper_generic"); status = E1000_READ_REG(hw, E1000_STATUS); if (status & E1000_STATUS_SPEED_1000) { *speed = SPEED_1000; DEBUGOUT("1000 Mbs, "); } else if (status & E1000_STATUS_SPEED_100) { *speed = SPEED_100; DEBUGOUT("100 Mbs, "); } else { *speed = SPEED_10; DEBUGOUT("10 Mbs, "); } if (status & E1000_STATUS_FD) { *duplex = FULL_DUPLEX; DEBUGOUT("Full Duplex\n"); } else { *duplex = HALF_DUPLEX; DEBUGOUT("Half Duplex\n"); } return E1000_SUCCESS; } /** * e1000_get_speed_and_duplex_fiber_generic - Retrieve current speed/duplex * @hw: pointer to the HW structure * @speed: stores the current speed * @duplex: stores the current duplex * * Sets the speed and duplex to gigabit full duplex (the only possible option) * for fiber/serdes links. **/ s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw E1000_UNUSEDARG *hw, u16 *speed, u16 *duplex) { DEBUGFUNC("e1000_get_speed_and_duplex_fiber_serdes_generic"); *speed = SPEED_1000; *duplex = FULL_DUPLEX; return E1000_SUCCESS; } /** - * e1000_get_hw_semaphore_generic - Acquire hardware semaphore - * @hw: pointer to the HW structure - * - * Acquire the HW semaphore to access the PHY or NVM - **/ -s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw) -{ - u32 swsm; - s32 timeout = hw->nvm.word_size + 1; - s32 i = 0; - - DEBUGFUNC("e1000_get_hw_semaphore_generic"); - - /* Get the SW semaphore */ - while (i < timeout) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - if (!(swsm & E1000_SWSM_SMBI)) - break; - - usec_delay(50); - i++; - } - - if (i == timeout) { - DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); - return -E1000_ERR_NVM; - } - - /* Get the FW semaphore. */ - for (i = 0; i < timeout; i++) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); - - /* Semaphore acquired if bit latched */ - if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) - break; - - usec_delay(50); - } - - if (i == timeout) { - /* Release semaphores */ - e1000_put_hw_semaphore_generic(hw); - DEBUGOUT("Driver can't access the NVM\n"); - return -E1000_ERR_NVM; - } - - return E1000_SUCCESS; -} - -/** - * e1000_put_hw_semaphore_generic - Release hardware semaphore - * @hw: pointer to the HW structure - * - * Release hardware semaphore used to access the PHY or NVM - **/ -void e1000_put_hw_semaphore_generic(struct e1000_hw *hw) -{ - u32 swsm; - - DEBUGFUNC("e1000_put_hw_semaphore_generic"); - - swsm = E1000_READ_REG(hw, E1000_SWSM); - - swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); - - E1000_WRITE_REG(hw, E1000_SWSM, swsm); -} - -/** * e1000_get_auto_rd_done_generic - Check for auto read completion * @hw: pointer to the HW structure * * Check EEPROM for Auto Read done bit. **/ s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw) { s32 i = 0; DEBUGFUNC("e1000_get_auto_rd_done_generic"); while (i < AUTO_READ_DONE_TIMEOUT) { if (E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_AUTO_RD) break; msec_delay(1); i++; } if (i == AUTO_READ_DONE_TIMEOUT) { DEBUGOUT("Auto read by HW from NVM has not completed.\n"); return -E1000_ERR_RESET; } return E1000_SUCCESS; } /** * e1000_valid_led_default_generic - Verify a valid default LED config * @hw: pointer to the HW structure * @data: pointer to the NVM (EEPROM) * * Read the EEPROM for the current default LED configuration. If the * LED configuration is not valid, set to a valid LED configuration. **/ s32 e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data) { s32 ret_val; DEBUGFUNC("e1000_valid_led_default_generic"); ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); if (ret_val) { DEBUGOUT("NVM Read Error\n"); return ret_val; } if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) *data = ID_LED_DEFAULT; return E1000_SUCCESS; } /** * e1000_id_led_init_generic - * @hw: pointer to the HW structure * **/ s32 e1000_id_led_init_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; s32 ret_val; const u32 ledctl_mask = 0x000000FF; const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON; const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF; u16 data, i, temp; const u16 led_mask = 0x0F; DEBUGFUNC("e1000_id_led_init_generic"); ret_val = hw->nvm.ops.valid_led_default(hw, &data); if (ret_val) return ret_val; mac->ledctl_default = E1000_READ_REG(hw, E1000_LEDCTL); mac->ledctl_mode1 = mac->ledctl_default; mac->ledctl_mode2 = mac->ledctl_default; for (i = 0; i < 4; i++) { temp = (data >> (i << 2)) & led_mask; switch (temp) { case ID_LED_ON1_DEF2: case ID_LED_ON1_ON2: case ID_LED_ON1_OFF2: mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); mac->ledctl_mode1 |= ledctl_on << (i << 3); break; case ID_LED_OFF1_DEF2: case ID_LED_OFF1_ON2: case ID_LED_OFF1_OFF2: mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); mac->ledctl_mode1 |= ledctl_off << (i << 3); break; default: /* Do nothing */ break; } switch (temp) { case ID_LED_DEF1_ON2: case ID_LED_ON1_ON2: case ID_LED_OFF1_ON2: mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); mac->ledctl_mode2 |= ledctl_on << (i << 3); break; case ID_LED_DEF1_OFF2: case ID_LED_ON1_OFF2: case ID_LED_OFF1_OFF2: mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); mac->ledctl_mode2 |= ledctl_off << (i << 3); break; default: /* Do nothing */ break; } } return E1000_SUCCESS; } /** * e1000_setup_led_generic - Configures SW controllable LED * @hw: pointer to the HW structure * * This prepares the SW controllable LED for use and saves the current state * of the LED so it can be later restored. **/ s32 e1000_setup_led_generic(struct e1000_hw *hw) { u32 ledctl; DEBUGFUNC("e1000_setup_led_generic"); if (hw->mac.ops.setup_led != e1000_setup_led_generic) return -E1000_ERR_CONFIG; if (hw->phy.media_type == e1000_media_type_fiber) { ledctl = E1000_READ_REG(hw, E1000_LEDCTL); hw->mac.ledctl_default = ledctl; /* Turn off LED0 */ ledctl &= ~(E1000_LEDCTL_LED0_IVRT | E1000_LEDCTL_LED0_BLINK | E1000_LEDCTL_LED0_MODE_MASK); ledctl |= (E1000_LEDCTL_MODE_LED_OFF << E1000_LEDCTL_LED0_MODE_SHIFT); E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl); } else if (hw->phy.media_type == e1000_media_type_copper) { E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1); } return E1000_SUCCESS; } /** * e1000_cleanup_led_generic - Set LED config to default operation * @hw: pointer to the HW structure * * Remove the current LED configuration and set the LED configuration * to the default value, saved from the EEPROM. **/ s32 e1000_cleanup_led_generic(struct e1000_hw *hw) { DEBUGFUNC("e1000_cleanup_led_generic"); E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_default); return E1000_SUCCESS; } /** * e1000_blink_led_generic - Blink LED * @hw: pointer to the HW structure * * Blink the LEDs which are set to be on. **/ s32 e1000_blink_led_generic(struct e1000_hw *hw) { u32 ledctl_blink = 0; u32 i; DEBUGFUNC("e1000_blink_led_generic"); if (hw->phy.media_type == e1000_media_type_fiber) { /* always blink LED0 for PCI-E fiber */ ledctl_blink = E1000_LEDCTL_LED0_BLINK | (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT); } else { /* Set the blink bit for each LED that's "on" (0x0E) * (or "off" if inverted) in ledctl_mode2. The blink * logic in hardware only works when mode is set to "on" * so it must be changed accordingly when the mode is * "off" and inverted. */ ledctl_blink = hw->mac.ledctl_mode2; for (i = 0; i < 32; i += 8) { u32 mode = (hw->mac.ledctl_mode2 >> i) & E1000_LEDCTL_LED0_MODE_MASK; u32 led_default = hw->mac.ledctl_default >> i; if ((!(led_default & E1000_LEDCTL_LED0_IVRT) && (mode == E1000_LEDCTL_MODE_LED_ON)) || ((led_default & E1000_LEDCTL_LED0_IVRT) && (mode == E1000_LEDCTL_MODE_LED_OFF))) { ledctl_blink &= ~(E1000_LEDCTL_LED0_MODE_MASK << i); ledctl_blink |= (E1000_LEDCTL_LED0_BLINK | E1000_LEDCTL_MODE_LED_ON) << i; } } } E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl_blink); return E1000_SUCCESS; } /** * e1000_led_on_generic - Turn LED on * @hw: pointer to the HW structure * * Turn LED on. **/ s32 e1000_led_on_generic(struct e1000_hw *hw) { u32 ctrl; DEBUGFUNC("e1000_led_on_generic"); switch (hw->phy.media_type) { case e1000_media_type_fiber: ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl &= ~E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; E1000_WRITE_REG(hw, E1000_CTRL, ctrl); break; case e1000_media_type_copper: E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode2); break; default: break; } return E1000_SUCCESS; } /** * e1000_led_off_generic - Turn LED off * @hw: pointer to the HW structure * * Turn LED off. **/ s32 e1000_led_off_generic(struct e1000_hw *hw) { u32 ctrl; DEBUGFUNC("e1000_led_off_generic"); switch (hw->phy.media_type) { case e1000_media_type_fiber: ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= E1000_CTRL_SWDPIN0; ctrl |= E1000_CTRL_SWDPIO0; E1000_WRITE_REG(hw, E1000_CTRL, ctrl); break; case e1000_media_type_copper: E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1); break; default: break; } return E1000_SUCCESS; } /** * e1000_set_pcie_no_snoop_generic - Set PCI-express capabilities * @hw: pointer to the HW structure * @no_snoop: bitmap of snoop events * * Set the PCI-express register to snoop for events enabled in 'no_snoop'. **/ void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop) { u32 gcr; DEBUGFUNC("e1000_set_pcie_no_snoop_generic"); if (hw->bus.type != e1000_bus_type_pci_express) return; if (no_snoop) { gcr = E1000_READ_REG(hw, E1000_GCR); gcr &= ~(PCIE_NO_SNOOP_ALL); gcr |= no_snoop; E1000_WRITE_REG(hw, E1000_GCR, gcr); } } /** * e1000_disable_pcie_master_generic - Disables PCI-express master access * @hw: pointer to the HW structure * * Returns E1000_SUCCESS if successful, else returns -10 * (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused * the master requests to be disabled. * * Disables PCI-Express master access and verifies there are no pending * requests. **/ s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw) { u32 ctrl; s32 timeout = MASTER_DISABLE_TIMEOUT; DEBUGFUNC("e1000_disable_pcie_master_generic"); if (hw->bus.type != e1000_bus_type_pci_express) return E1000_SUCCESS; ctrl = E1000_READ_REG(hw, E1000_CTRL); ctrl |= E1000_CTRL_GIO_MASTER_DISABLE; E1000_WRITE_REG(hw, E1000_CTRL, ctrl); while (timeout) { if (!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_GIO_MASTER_ENABLE) || E1000_REMOVED(hw->hw_addr)) break; usec_delay(100); timeout--; } if (!timeout) { DEBUGOUT("Master requests are pending.\n"); return -E1000_ERR_MASTER_REQUESTS_PENDING; } return E1000_SUCCESS; } /** * e1000_reset_adaptive_generic - Reset Adaptive Interframe Spacing * @hw: pointer to the HW structure * * Reset the Adaptive Interframe Spacing throttle to default values. **/ void e1000_reset_adaptive_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; DEBUGFUNC("e1000_reset_adaptive_generic"); if (!mac->adaptive_ifs) { DEBUGOUT("Not in Adaptive IFS mode!\n"); return; } mac->current_ifs_val = 0; mac->ifs_min_val = IFS_MIN; mac->ifs_max_val = IFS_MAX; mac->ifs_step_size = IFS_STEP; mac->ifs_ratio = IFS_RATIO; mac->in_ifs_mode = FALSE; E1000_WRITE_REG(hw, E1000_AIT, 0); } /** * e1000_update_adaptive_generic - Update Adaptive Interframe Spacing * @hw: pointer to the HW structure * * Update the Adaptive Interframe Spacing Throttle value based on the * time between transmitted packets and time between collisions. **/ void e1000_update_adaptive_generic(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; DEBUGFUNC("e1000_update_adaptive_generic"); if (!mac->adaptive_ifs) { DEBUGOUT("Not in Adaptive IFS mode!\n"); return; } if ((mac->collision_delta * mac->ifs_ratio) > mac->tx_packet_delta) { if (mac->tx_packet_delta > MIN_NUM_XMITS) { mac->in_ifs_mode = TRUE; if (mac->current_ifs_val < mac->ifs_max_val) { if (!mac->current_ifs_val) mac->current_ifs_val = mac->ifs_min_val; else mac->current_ifs_val += mac->ifs_step_size; E1000_WRITE_REG(hw, E1000_AIT, mac->current_ifs_val); } } } else { if (mac->in_ifs_mode && (mac->tx_packet_delta <= MIN_NUM_XMITS)) { mac->current_ifs_val = 0; mac->in_ifs_mode = FALSE; E1000_WRITE_REG(hw, E1000_AIT, 0); } } } /** * e1000_validate_mdi_setting_generic - Verify MDI/MDIx settings * @hw: pointer to the HW structure * * Verify that when not using auto-negotiation that MDI/MDIx is correctly * set, which is forced to MDI mode only. **/ static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw) { DEBUGFUNC("e1000_validate_mdi_setting_generic"); if (!hw->mac.autoneg && (hw->phy.mdix == 0 || hw->phy.mdix == 3)) { DEBUGOUT("Invalid MDI setting detected\n"); hw->phy.mdix = 1; return -E1000_ERR_CONFIG; } return E1000_SUCCESS; } /** * e1000_validate_mdi_setting_crossover_generic - Verify MDI/MDIx settings * @hw: pointer to the HW structure * * Validate the MDI/MDIx setting, allowing for auto-crossover during forced * operation. **/ s32 e1000_validate_mdi_setting_crossover_generic(struct e1000_hw E1000_UNUSEDARG *hw) { DEBUGFUNC("e1000_validate_mdi_setting_crossover_generic"); return E1000_SUCCESS; } /** * e1000_write_8bit_ctrl_reg_generic - Write a 8bit CTRL register * @hw: pointer to the HW structure * @reg: 32bit register offset such as E1000_SCTL * @offset: register offset to write to * @data: data to write at register offset * * Writes an address/data control type register. There are several of these * and they all have the format address << 8 | data and bit 31 is polled for * completion. **/ s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg, u32 offset, u8 data) { u32 i, regvalue = 0; DEBUGFUNC("e1000_write_8bit_ctrl_reg_generic"); /* Set up the address and data */ regvalue = ((u32)data) | (offset << E1000_GEN_CTL_ADDRESS_SHIFT); E1000_WRITE_REG(hw, reg, regvalue); /* Poll the ready bit to see if the MDI read completed */ for (i = 0; i < E1000_GEN_POLL_TIMEOUT; i++) { usec_delay(5); regvalue = E1000_READ_REG(hw, reg); if (regvalue & E1000_GEN_CTL_READY) break; } if (!(regvalue & E1000_GEN_CTL_READY)) { DEBUGOUT1("Reg %08x did not indicate ready\n", reg); return -E1000_ERR_PHY; } return E1000_SUCCESS; } + +/** + * e1000_get_hw_semaphore - Acquire hardware semaphore + * @hw: pointer to the HW structure + * + * Acquire the HW semaphore to access the PHY or NVM + **/ +s32 e1000_get_hw_semaphore(struct e1000_hw *hw) +{ + u32 swsm; + s32 timeout = hw->nvm.word_size + 1; + s32 i = 0; + + DEBUGFUNC("e1000_get_hw_semaphore"); +#ifdef notyet + /* _82571 */ + /* If we have timedout 3 times on trying to acquire + * the inter-port SMBI semaphore, there is old code + * operating on the other port, and it is not + * releasing SMBI. Modify the number of times that + * we try for the semaphore to interwork with this + * older code. + */ + if (hw->dev_spec._82571.smb_counter > 2) + sw_timeout = 1; + +#endif + /* Get the SW semaphore */ + while (i < timeout) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + if (!(swsm & E1000_SWSM_SMBI)) + break; + + usec_delay(50); + i++; + } + + if (i == timeout) { +#ifdef notyet + /* + * XXX This sounds more like a driver bug whereby we either + * recursed accidentally or missed clearing it previously + */ + /* In rare circumstances, the SW semaphore may already be held + * unintentionally. Clear the semaphore once before giving up. + */ + if (hw->dev_spec._82575.clear_semaphore_once) { + hw->dev_spec._82575.clear_semaphore_once = FALSE; + e1000_put_hw_semaphore_generic(hw); + for (i = 0; i < timeout; i++) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + if (!(swsm & E1000_SWSM_SMBI)) + break; + + usec_delay(50); + } + } +#endif + + DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); + return -E1000_ERR_NVM; + } + + /* Get the FW semaphore. */ + for (i = 0; i < timeout; i++) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); + + /* Semaphore acquired if bit latched */ + if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) + break; + + usec_delay(50); + } + + if (i == timeout) { + /* Release semaphores */ + e1000_put_hw_semaphore(hw); + DEBUGOUT("Driver can't access the NVM\n"); + return -E1000_ERR_NVM; + } + + return E1000_SUCCESS; +} + +/** + * e1000_put_hw_semaphore - Release hardware semaphore + * @hw: pointer to the HW structure + * + * Release hardware semaphore used to access the PHY or NVM + **/ +void e1000_put_hw_semaphore(struct e1000_hw *hw) +{ + u32 swsm; + + DEBUGFUNC("e1000_put_hw_semaphore"); + + swsm = E1000_READ_REG(hw, E1000_SWSM); + + swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); + + E1000_WRITE_REG(hw, E1000_SWSM, swsm); +} + + +/** + * e1000_acquire_swfw_sync - Acquire SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Acquire the SW/FW semaphore to access the PHY or NVM. The mask + * will also specify which port we're acquiring the lock for. + **/ +s32 +e1000_acquire_swfw_sync(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + u32 swmask = mask; + u32 fwmask = mask << 16; + s32 ret_val = E1000_SUCCESS; + s32 i = 0, timeout = 200; + + DEBUGFUNC("e1000_acquire_swfw_sync"); + ASSERT_NO_LOCKS(); + while (i < timeout) { + if (e1000_get_hw_semaphore(hw)) { + ret_val = -E1000_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + if (!(swfw_sync & (fwmask | swmask))) + break; + + /* + * Firmware currently using resource (fwmask) + * or other software thread using resource (swmask) + */ + e1000_put_hw_semaphore(hw); + msec_delay_irq(5); + i++; + } + + if (i == timeout) { + DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); + ret_val = -E1000_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync |= swmask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore(hw); + +out: + return ret_val; +} + +/** + * e1000_release_swfw_sync - Release SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Release the SW/FW semaphore used to access the PHY or NVM. The mask + * will also specify which port we're releasing the lock for. + **/ +void +e1000_release_swfw_sync(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + + DEBUGFUNC("e1000_release_swfw_sync"); + + while (e1000_get_hw_semaphore(hw) != E1000_SUCCESS) + ; /* Empty */ + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + swfw_sync &= ~mask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore(hw); +} + Index: head/sys/dev/e1000/e1000_mac.h =================================================================== --- head/sys/dev/e1000/e1000_mac.h (revision 323515) +++ head/sys/dev/e1000/e1000_mac.h (revision 323516) @@ -1,95 +1,98 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _E1000_MAC_H_ #define _E1000_MAC_H_ void e1000_init_mac_ops_generic(struct e1000_hw *hw); #define E1000_REMOVED(a) (0) void e1000_null_mac_generic(struct e1000_hw *hw); s32 e1000_null_ops_generic(struct e1000_hw *hw); s32 e1000_null_link_info(struct e1000_hw *hw, u16 *s, u16 *d); bool e1000_null_mng_mode(struct e1000_hw *hw); void e1000_null_update_mc(struct e1000_hw *hw, u8 *h, u32 a); void e1000_null_write_vfta(struct e1000_hw *hw, u32 a, u32 b); int e1000_null_rar_set(struct e1000_hw *hw, u8 *h, u32 a); s32 e1000_null_set_obff_timer(struct e1000_hw *hw, u32 a); s32 e1000_blink_led_generic(struct e1000_hw *hw); s32 e1000_check_for_copper_link_generic(struct e1000_hw *hw); s32 e1000_check_for_fiber_link_generic(struct e1000_hw *hw); s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw); s32 e1000_cleanup_led_generic(struct e1000_hw *hw); s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw); s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw); s32 e1000_config_fc_after_link_up_generic(struct e1000_hw *hw); s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw); s32 e1000_force_mac_fc_generic(struct e1000_hw *hw); s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw); s32 e1000_get_bus_info_pci_generic(struct e1000_hw *hw); s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw); void e1000_set_lan_id_single_port(struct e1000_hw *hw); void e1000_set_lan_id_multi_port_pci(struct e1000_hw *hw); -s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw); s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed, u16 *duplex); s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw *hw, u16 *speed, u16 *duplex); s32 e1000_id_led_init_generic(struct e1000_hw *hw); s32 e1000_led_on_generic(struct e1000_hw *hw); s32 e1000_led_off_generic(struct e1000_hw *hw); void e1000_update_mc_addr_list_generic(struct e1000_hw *hw, u8 *mc_addr_list, u32 mc_addr_count); s32 e1000_set_default_fc_generic(struct e1000_hw *hw); s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw); s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw); s32 e1000_setup_led_generic(struct e1000_hw *hw); s32 e1000_setup_link_generic(struct e1000_hw *hw); s32 e1000_validate_mdi_setting_crossover_generic(struct e1000_hw *hw); s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg, u32 offset, u8 data); u32 e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr); void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw); void e1000_clear_vfta_generic(struct e1000_hw *hw); void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count); void e1000_pcix_mmrbc_workaround_generic(struct e1000_hw *hw); -void e1000_put_hw_semaphore_generic(struct e1000_hw *hw); s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw); void e1000_reset_adaptive_generic(struct e1000_hw *hw); void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop); void e1000_update_adaptive_generic(struct e1000_hw *hw); void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value); + +s32 e1000_get_hw_semaphore(struct e1000_hw *hw); +void e1000_put_hw_semaphore(struct e1000_hw *hw); +s32 e1000_acquire_swfw_sync(struct e1000_hw *hw, u16 mask); +void e1000_release_swfw_sync(struct e1000_hw *hw, u16 mask); #endif Index: head/sys/dev/e1000/e1000_osdep.h =================================================================== --- head/sys/dev/e1000/e1000_osdep.h (revision 323515) +++ head/sys/dev/e1000/e1000_osdep.h (revision 323516) @@ -1,220 +1,273 @@ /****************************************************************************** Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _FREEBSD_OS_H_ #define _FREEBSD_OS_H_ #include #include #include +#include #include #include #include #include #include #include #include #include + +#include +#include +#include +#include + + + #include #include #include #include #include #include #include #include #define ASSERT(x) if(!(x)) panic("EM: x") +#define us_scale(x) max(1, (x/(1000000/hz))) +static inline int +ms_scale(int x) { + if (hz == 1000) { + return (x); + } else if (hz > 1000) { + return (x*(hz/1000)); + } else { + return (max(1, x/(1000/hz))); + } +} +extern int cold; -#define usec_delay(x) DELAY(x) +static inline void +safe_pause_us(int x) { + if (cold) { + DELAY(x); + } else { + pause("e1000_delay", max(1, x/(1000000/hz))); + } +} + +static inline void +safe_pause_ms(int x) { + if (cold) { + DELAY(x*1000); + } else { + pause("e1000_delay", ms_scale(x)); + } +} + +#define usec_delay(x) safe_pause_us(x) #define usec_delay_irq(x) usec_delay(x) -#define msec_delay(x) DELAY(1000*(x)) -#define msec_delay_irq(x) DELAY(1000*(x)) +#define msec_delay(x) safe_pause_ms(x) +#define msec_delay_irq(x) msec_delay(x) /* Enable/disable debugging statements in shared code */ #define DBG 0 #define DEBUGOUT(...) \ do { if (DBG) printf(__VA_ARGS__); } while (0) #define DEBUGOUT1(...) DEBUGOUT(__VA_ARGS__) #define DEBUGOUT2(...) DEBUGOUT(__VA_ARGS__) #define DEBUGOUT3(...) DEBUGOUT(__VA_ARGS__) #define DEBUGOUT7(...) DEBUGOUT(__VA_ARGS__) #define DEBUGFUNC(F) DEBUGOUT(F "\n") #define STATIC static #define FALSE 0 #define TRUE 1 #define CMD_MEM_WRT_INVALIDATE 0x0010 /* BIT_4 */ #define PCI_COMMAND_REGISTER PCIR_COMMAND -/* Mutex used in the shared code */ -#define E1000_MUTEX struct mtx -#define E1000_MUTEX_INIT(mutex) mtx_init((mutex), #mutex, \ - MTX_NETWORK_LOCK, \ - MTX_DEF | MTX_DUPOK) -#define E1000_MUTEX_DESTROY(mutex) mtx_destroy(mutex) -#define E1000_MUTEX_LOCK(mutex) mtx_lock(mutex) -#define E1000_MUTEX_TRYLOCK(mutex) mtx_trylock(mutex) -#define E1000_MUTEX_UNLOCK(mutex) mtx_unlock(mutex) - typedef uint64_t u64; typedef uint32_t u32; typedef uint16_t u16; typedef uint8_t u8; typedef int64_t s64; typedef int32_t s32; typedef int16_t s16; typedef int8_t s8; #define __le16 u16 #define __le32 u32 #define __le64 u64 #if __FreeBSD_version < 800000 #if defined(__i386__) || defined(__amd64__) #define mb() __asm volatile("mfence" ::: "memory") #define wmb() __asm volatile("sfence" ::: "memory") #define rmb() __asm volatile("lfence" ::: "memory") #else #define mb() #define rmb() #define wmb() #endif #endif /*__FreeBSD_version < 800000 */ +#ifdef INVARIANTS +#define ASSERT_CTX_LOCK_HELD(hw) (sx_assert(iflib_ctx_lock_get(((struct e1000_osdep *)hw->back)->ctx), SX_XLOCKED)) +#else +#define ASSERT_CTX_LOCK_HELD(hw) +#endif + #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } #else #define prefetch(x) #endif struct e1000_osdep { bus_space_tag_t mem_bus_space_tag; bus_space_handle_t mem_bus_space_handle; bus_space_tag_t io_bus_space_tag; bus_space_handle_t io_bus_space_handle; bus_space_tag_t flash_bus_space_tag; bus_space_handle_t flash_bus_space_handle; device_t dev; + if_ctx_t ctx; }; #define E1000_REGISTER(hw, reg) (((hw)->mac.type >= e1000_82543) \ ? reg : e1000_translate_register_82542(reg)) #define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, E1000_STATUS) /* Read from an absolute offset in the adapter's memory space */ #define E1000_READ_OFFSET(hw, offset) \ bus_space_read_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, offset) /* Write to an absolute offset in the adapter's memory space */ #define E1000_WRITE_OFFSET(hw, offset, value) \ bus_space_write_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, offset, value) /* Register READ/WRITE macros */ #define E1000_READ_REG(hw, reg) \ bus_space_read_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ E1000_REGISTER(hw, reg)) #define E1000_WRITE_REG(hw, reg, value) \ bus_space_write_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ E1000_REGISTER(hw, reg), value) #define E1000_READ_REG_ARRAY(hw, reg, index) \ bus_space_read_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ E1000_REGISTER(hw, reg) + ((index)<< 2)) #define E1000_WRITE_REG_ARRAY(hw, reg, index, value) \ bus_space_write_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ E1000_REGISTER(hw, reg) + ((index)<< 2), value) #define E1000_READ_REG_ARRAY_DWORD E1000_READ_REG_ARRAY #define E1000_WRITE_REG_ARRAY_DWORD E1000_WRITE_REG_ARRAY #define E1000_READ_REG_ARRAY_BYTE(hw, reg, index) \ bus_space_read_1(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ E1000_REGISTER(hw, reg) + index) #define E1000_WRITE_REG_ARRAY_BYTE(hw, reg, index, value) \ bus_space_write_1(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ E1000_REGISTER(hw, reg) + index, value) #define E1000_WRITE_REG_ARRAY_WORD(hw, reg, index, value) \ bus_space_write_2(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ E1000_REGISTER(hw, reg) + (index << 1), value) #define E1000_WRITE_REG_IO(hw, reg, value) do {\ bus_space_write_4(((struct e1000_osdep *)(hw)->back)->io_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->io_bus_space_handle, \ (hw)->io_base, reg); \ bus_space_write_4(((struct e1000_osdep *)(hw)->back)->io_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->io_bus_space_handle, \ (hw)->io_base + 4, value); } while (0) #define E1000_READ_FLASH_REG(hw, reg) \ bus_space_read_4(((struct e1000_osdep *)(hw)->back)->flash_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->flash_bus_space_handle, reg) #define E1000_READ_FLASH_REG16(hw, reg) \ bus_space_read_2(((struct e1000_osdep *)(hw)->back)->flash_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->flash_bus_space_handle, reg) #define E1000_WRITE_FLASH_REG(hw, reg, value) \ bus_space_write_4(((struct e1000_osdep *)(hw)->back)->flash_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->flash_bus_space_handle, reg, value) #define E1000_WRITE_FLASH_REG16(hw, reg, value) \ bus_space_write_2(((struct e1000_osdep *)(hw)->back)->flash_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->flash_bus_space_handle, reg, value) + + +#if defined(INVARIANTS) +#include + +#define ASSERT_NO_LOCKS() \ + do { \ + int unknown_locks = curthread->td_locks - mtx_owned(&Giant); \ + if (unknown_locks > 0) { \ + WITNESS_WARN(WARN_GIANTOK|WARN_SLEEPOK|WARN_PANIC, NULL, "unexpected non-sleepable lock"); \ + } \ + MPASS(curthread->td_rw_rlocks == 0); \ + MPASS(curthread->td_lk_slocks == 0); \ + } while (0) +#else +#define ASSERT_NO_LOCKS() +#endif #endif /* _FREEBSD_OS_H_ */ Index: head/sys/dev/e1000/em_txrx.c =================================================================== --- head/sys/dev/e1000/em_txrx.c (revision 323515) +++ head/sys/dev/e1000/em_txrx.c (revision 323516) @@ -1,815 +1,809 @@ /*- * Copyright (c) 2016-2017 Matt Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #include "if_em.h" #ifdef RSS #include #include #endif #ifdef VERBOSE_DEBUG #define DPRINTF device_printf #else #define DPRINTF(...) #endif /********************************************************************* * Local Function prototypes *********************************************************************/ static int em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower); static int em_transmit_checksum_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower); static int em_isc_txd_encap(void *arg, if_pkt_info_t pi); static void em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx); static int em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear); static void em_isc_rxd_refill(void *arg, if_rxd_update_t iru); static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx); static int em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget); static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); static void lem_isc_rxd_refill(void *arg, if_rxd_update_t iru); static int lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget); static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); static void lem_receive_checksum(int status, int errors, if_rxd_info_t ri); static void em_receive_checksum(uint32_t status, if_rxd_info_t ri); static int em_determine_rsstype(u32 pkt_info); extern int em_intr(void *arg); + struct if_txrx em_txrx = { em_isc_txd_encap, em_isc_txd_flush, em_isc_txd_credits_update, em_isc_rxd_available, em_isc_rxd_pkt_get, em_isc_rxd_refill, em_isc_rxd_flush, - em_intr + em_intr, }; struct if_txrx lem_txrx = { em_isc_txd_encap, em_isc_txd_flush, em_isc_txd_credits_update, lem_isc_rxd_available, lem_isc_rxd_pkt_get, lem_isc_rxd_refill, em_isc_rxd_flush, - em_intr + em_intr, }; extern if_shared_ctx_t em_sctx; void em_dump_rs(struct adapter *adapter) { if_softc_ctx_t scctx = adapter->shared; struct em_tx_queue *que; struct tx_ring *txr; qidx_t i, ntxd, qid, cur; int16_t rs_cidx; uint8_t status; printf("\n"); ntxd = scctx->isc_ntxd[0]; for (qid = 0; qid < adapter->tx_num_queues; qid++) { que = &adapter->tx_queues[qid]; txr = &que->txr; rs_cidx = txr->tx_rs_cidx; if (rs_cidx != txr->tx_rs_pidx) { cur = txr->tx_rsq[rs_cidx]; status = txr->tx_base[cur].upper.fields.status; if (!(status & E1000_TXD_STAT_DD)) printf("qid[%d]->tx_rsq[%d]: %d clear ", qid, rs_cidx, cur); } else { rs_cidx = (rs_cidx-1)&(ntxd-1); cur = txr->tx_rsq[rs_cidx]; printf("qid[%d]->tx_rsq[rs_cidx-1=%d]: %d ", qid, rs_cidx, cur); } printf("cidx_prev=%d rs_pidx=%d ",txr->tx_cidx_processed, txr->tx_rs_pidx); for (i = 0; i < ntxd; i++) { if (txr->tx_base[i].upper.fields.status & E1000_TXD_STAT_DD) printf("%d set ", i); } printf("\n"); } } /********************************************************************** * * Setup work for hardware segmentation offload (TSO) on * adapters using advanced tx descriptors * **********************************************************************/ static int em_tso_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower) { if_softc_ctx_t scctx = adapter->shared; struct em_tx_queue *que = &adapter->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; struct e1000_context_desc *TXD; int cur, hdr_len; hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen; *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */ E1000_TXD_DTYP_D | /* Data descr type */ E1000_TXD_CMD_TSE); /* Do TSE on this packet */ /* IP and/or TCP header checksum calculation and insertion. */ *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8; cur = pi->ipi_pidx; TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place put the checksum. */ TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen; TXD->lower_setup.ip_fields.ipcse = htole16(pi->ipi_ehdrlen + pi->ipi_ip_hlen - 1); TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum); /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. * Offset of place to put the checksum. */ TXD->upper_setup.tcp_fields.tucss = pi->ipi_ehdrlen + pi->ipi_ip_hlen; TXD->upper_setup.tcp_fields.tucse = 0; TXD->upper_setup.tcp_fields.tucso = pi->ipi_ehdrlen + pi->ipi_ip_hlen + offsetof(struct tcphdr, th_sum); /* * Payload size per packet w/o any headers. * Length of all headers up to payload. */ TXD->tcp_seg_setup.fields.mss = htole16(pi->ipi_tso_segsz); TXD->tcp_seg_setup.fields.hdr_len = hdr_len; TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | /* Extended descr */ E1000_TXD_CMD_TSE | /* TSE context */ E1000_TXD_CMD_IP | /* Do IP csum */ E1000_TXD_CMD_TCP | /* Do TCP checksum */ (pi->ipi_len - hdr_len)); /* Total len */ txr->tx_tso = TRUE; if (++cur == scctx->isc_ntxd[0]) { cur = 0; } DPRINTF(iflib_get_dev(adapter->ctx), "%s: pidx: %d cur: %d\n", __FUNCTION__, pi->ipi_pidx, cur); return (cur); } #define TSO_WORKAROUND 4 #define DONT_FORCE_CTX 1 /********************************************************************* * The offload context is protocol specific (TCP/UDP) and thus * only needs to be set when the protocol changes. The occasion * of a context change can be a performance detriment, and * might be better just disabled. The reason arises in the way * in which the controller supports pipelined requests from the * Tx data DMA. Up to four requests can be pipelined, and they may * belong to the same packet or to multiple packets. However all * requests for one packet are issued before a request is issued * for a subsequent packet and if a request for the next packet * requires a context change, that request will be stalled * until the previous request completes. This means setting up * a new context effectively disables pipelined Tx data DMA which * in turn greatly slow down performance to send small sized * frames. **********************************************************************/ static int em_transmit_checksum_setup(struct adapter *adapter, if_pkt_info_t pi, u32 *txd_upper, u32 *txd_lower) { struct e1000_context_desc *TXD = NULL; if_softc_ctx_t scctx = adapter->shared; struct em_tx_queue *que = &adapter->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; int csum_flags = pi->ipi_csum_flags; int cur, hdr_len; u32 cmd; cur = pi->ipi_pidx; hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen; cmd = adapter->txd_cmd; /* * The 82574L can only remember the *last* context used * regardless of queue that it was use for. We cannot reuse * contexts on this hardware platform and must generate a new * context every time. 82574L hardware spec, section 7.2.6, * second note. */ if (DONT_FORCE_CTX && adapter->tx_num_queues == 1 && txr->csum_lhlen == pi->ipi_ehdrlen && txr->csum_iphlen == pi->ipi_ip_hlen && txr->csum_flags == csum_flags) { /* * Same csum offload context as the previous packets; * just return. */ *txd_upper = txr->csum_txd_upper; *txd_lower = txr->csum_txd_lower; return (cur); } TXD = (struct e1000_context_desc *)&txr->tx_base[cur]; if (csum_flags & CSUM_IP) { *txd_upper |= E1000_TXD_POPTS_IXSM << 8; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen; TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len); TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen + offsetof(struct ip, ip_sum); cmd |= E1000_TXD_CMD_IP; } if (csum_flags & (CSUM_TCP|CSUM_UDP)) { uint8_t tucso; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; if (csum_flags & CSUM_TCP) { tucso = hdr_len + offsetof(struct tcphdr, th_sum); cmd |= E1000_TXD_CMD_TCP; } else tucso = hdr_len + offsetof(struct udphdr, uh_sum); TXD->upper_setup.tcp_fields.tucss = hdr_len; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = tucso; } txr->csum_lhlen = pi->ipi_ehdrlen; txr->csum_iphlen = pi->ipi_ip_hlen; txr->csum_flags = csum_flags; txr->csum_txd_upper = *txd_upper; txr->csum_txd_lower = *txd_lower; TXD->tcp_seg_setup.data = htole32(0); TXD->cmd_and_length = htole32(E1000_TXD_CMD_IFCS | E1000_TXD_CMD_DEXT | cmd); if (++cur == scctx->isc_ntxd[0]) { cur = 0; } DPRINTF(iflib_get_dev(adapter->ctx), "checksum_setup csum_flags=%x txd_upper=%x txd_lower=%x hdr_len=%d cmd=%x\n", csum_flags, *txd_upper, *txd_lower, hdr_len, cmd); return (cur); } static int em_isc_txd_encap(void *arg, if_pkt_info_t pi) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; struct tx_ring *txr = &que->txr; bus_dma_segment_t *segs = pi->ipi_segs; int nsegs = pi->ipi_nsegs; int csum_flags = pi->ipi_csum_flags; int i, j, first, pidx_last; u32 txd_flags, txd_upper = 0, txd_lower = 0; struct e1000_tx_desc *ctxd = NULL; bool do_tso, tso_desc; qidx_t ntxd; txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_TXD_CMD_RS : 0; i = first = pi->ipi_pidx; do_tso = (csum_flags & CSUM_TSO); tso_desc = FALSE; ntxd = scctx->isc_ntxd[0]; /* * TSO Hardware workaround, if this packet is not * TSO, and is only a single descriptor long, and * it follows a TSO burst, then we need to add a * sentinel descriptor to prevent premature writeback. */ if ((!do_tso) && (txr->tx_tso == TRUE)) { if (nsegs == 1) tso_desc = TRUE; txr->tx_tso = FALSE; } /* Do hardware assists */ if (do_tso) { i = em_tso_setup(sc, pi, &txd_upper, &txd_lower); tso_desc = TRUE; } else if (csum_flags & EM_CSUM_OFFLOAD) { i = em_transmit_checksum_setup(sc, pi, &txd_upper, &txd_lower); } if (pi->ipi_mflags & M_VLANTAG) { /* Set the vlan id. */ txd_upper |= htole16(pi->ipi_vtag) << 16; /* Tell hardware to add tag */ txd_lower |= htole32(E1000_TXD_CMD_VLE); } DPRINTF(iflib_get_dev(sc->ctx), "encap: set up tx: nsegs=%d first=%d i=%d\n", nsegs, first, i); /* XXX adapter->pcix_82544 -- lem_fill_descriptors */ /* Set up our transmit descriptors */ for (j = 0; j < nsegs; j++) { bus_size_t seg_len; bus_addr_t seg_addr; uint32_t cmd; ctxd = &txr->tx_base[i]; seg_addr = segs[j].ds_addr; seg_len = segs[j].ds_len; cmd = E1000_TXD_CMD_IFCS | sc->txd_cmd; /* * TSO Workaround: * If this is the last descriptor, we want to * split it so we have a small final sentinel */ if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) { seg_len -= TSO_WORKAROUND; ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32(cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); if (++i == scctx->isc_ntxd[0]) i = 0; /* Now make the sentinel */ ctxd = &txr->tx_base[i]; ctxd->buffer_addr = htole64(seg_addr + seg_len); ctxd->lower.data = htole32(cmd | txd_lower | TSO_WORKAROUND); ctxd->upper.data = htole32(txd_upper); pidx_last = i; if (++i == scctx->isc_ntxd[0]) i = 0; DPRINTF(iflib_get_dev(sc->ctx), "TSO path pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]); } else { ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32(cmd | txd_lower | seg_len); ctxd->upper.data = htole32(txd_upper); pidx_last = i; if (++i == scctx->isc_ntxd[0]) i = 0; DPRINTF(iflib_get_dev(sc->ctx), "pidx_last=%d i=%d ntxd[0]=%d\n", pidx_last, i, scctx->isc_ntxd[0]); } } /* * Last Descriptor of Packet * needs End Of Packet (EOP) * and Report Status (RS) */ if (txd_flags) { txr->tx_rsq[txr->tx_rs_pidx] = pidx_last; DPRINTF(iflib_get_dev(sc->ctx), "setting to RS on %d rs_pidx %d first: %d\n", pidx_last, txr->tx_rs_pidx, first); txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1); MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx); } ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | txd_flags); DPRINTF(iflib_get_dev(sc->ctx), "tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n", first, pidx_last, i); pi->ipi_new_pidx = i; return (0); } static void em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx) { struct adapter *adapter = arg; struct em_tx_queue *que = &adapter->tx_queues[txqid]; struct tx_ring *txr = &que->txr; E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), pidx); } static int em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear) { struct adapter *adapter = arg; if_softc_ctx_t scctx = adapter->shared; struct em_tx_queue *que = &adapter->tx_queues[txqid]; struct tx_ring *txr = &que->txr; qidx_t processed = 0; int updated; qidx_t cur, prev, ntxd, rs_cidx; int32_t delta; uint8_t status; rs_cidx = txr->tx_rs_cidx; if (rs_cidx == txr->tx_rs_pidx) return (0); cur = txr->tx_rsq[rs_cidx]; MPASS(cur != QIDX_INVALID); status = txr->tx_base[cur].upper.fields.status; updated = !!(status & E1000_TXD_STAT_DD); if (clear == false || updated == 0) return (updated); prev = txr->tx_cidx_processed; ntxd = scctx->isc_ntxd[0]; do { delta = (int32_t)cur - (int32_t)prev; MPASS(prev == 0 || delta != 0); if (delta < 0) delta += ntxd; DPRINTF(iflib_get_dev(adapter->ctx), "%s: cidx_processed=%u cur=%u clear=%d delta=%d\n", __FUNCTION__, prev, cur, clear, delta); processed += delta; prev = cur; rs_cidx = (rs_cidx + 1) & (ntxd-1); if (rs_cidx == txr->tx_rs_pidx) break; cur = txr->tx_rsq[rs_cidx]; MPASS(cur != QIDX_INVALID); status = txr->tx_base[cur].upper.fields.status; } while ((status & E1000_TXD_STAT_DD)); txr->tx_rs_cidx = rs_cidx; txr->tx_cidx_processed = prev; return(processed); } static void lem_isc_rxd_refill(void *arg, if_rxd_update_t iru) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[iru->iru_qsidx]; struct rx_ring *rxr = &que->rxr; struct e1000_rx_desc *rxd; uint64_t *paddrs; uint32_t next_pidx, pidx; uint16_t count; int i; paddrs = iru->iru_paddrs; pidx = iru->iru_pidx; count = iru->iru_count; for (i = 0, next_pidx = pidx; i < count; i++) { rxd = (struct e1000_rx_desc *)&rxr->rx_base[next_pidx]; rxd->buffer_addr = htole64(paddrs[i]); /* status bits must be cleared */ rxd->status = 0; if (++next_pidx == scctx->isc_nrxd[0]) next_pidx = 0; } } static void em_isc_rxd_refill(void *arg, if_rxd_update_t iru) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; uint16_t rxqid = iru->iru_qsidx; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; union e1000_rx_desc_extended *rxd; uint64_t *paddrs; uint32_t next_pidx, pidx; uint16_t count; int i; paddrs = iru->iru_paddrs; pidx = iru->iru_pidx; count = iru->iru_count; for (i = 0, next_pidx = pidx; i < count; i++) { rxd = &rxr->rx_base[next_pidx]; rxd->read.buffer_addr = htole64(paddrs[i]); - /* DD bits must be cleared */ - rxd->wb.upper.status_error = 0; + /* Zero out rx desc status */ + rxd->wb.upper.status_error &= htole32(~0xFF); if (++next_pidx == scctx->isc_nrxd[0]) next_pidx = 0; } } static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused, qidx_t pidx) { struct adapter *sc = arg; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx); } static int lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; struct e1000_rx_desc *rxd; u32 staterr = 0; int cnt, i; + budget = min(budget, scctx->isc_nrxd[0]); - if (budget == 1) { - rxd = (struct e1000_rx_desc *)&rxr->rx_base[idx]; - staterr = rxd->status; - return (staterr & E1000_RXD_STAT_DD); - } - - for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { + for (cnt = 0, i = idx; cnt <= budget;) { rxd = (struct e1000_rx_desc *)&rxr->rx_base[i]; staterr = rxd->status; if ((staterr & E1000_RXD_STAT_DD) == 0) break; if (++i == scctx->isc_nrxd[0]) i = 0; if (staterr & E1000_RXD_STAT_EOP) cnt++; } + MPASS(cnt <= scctx->isc_nrxd[0]); return (cnt); } static int em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) { struct adapter *sc = arg; if_softc_ctx_t scctx = sc->shared; struct em_rx_queue *que = &sc->rx_queues[rxqid]; struct rx_ring *rxr = &que->rxr; union e1000_rx_desc_extended *rxd; u32 staterr = 0; int cnt, i; + budget = min(budget, scctx->isc_nrxd[0]); - if (budget == 1) { - rxd = &rxr->rx_base[idx]; - staterr = le32toh(rxd->wb.upper.status_error); - return (staterr & E1000_RXD_STAT_DD); - } - - for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { + for (cnt = 0, i = idx; cnt <= budget;) { rxd = &rxr->rx_base[i]; staterr = le32toh(rxd->wb.upper.status_error); if ((staterr & E1000_RXD_STAT_DD) == 0) break; if (++i == scctx->isc_nrxd[0]) { i = 0; } if (staterr & E1000_RXD_STAT_EOP) cnt++; } + MPASS(cnt <= scctx->isc_nrxd[0]); return (cnt); } static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct adapter *adapter = arg; if_softc_ctx_t scctx = adapter->shared; struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; struct e1000_rx_desc *rxd; u16 len; u32 status, errors; bool eop; int i, cidx; status = errors = i = 0; cidx = ri->iri_cidx; do { rxd = (struct e1000_rx_desc *)&rxr->rx_base[cidx]; status = rxd->status; errors = rxd->errors; /* Error Checking then decrement count */ MPASS ((status & E1000_RXD_STAT_DD) != 0); len = le16toh(rxd->length); ri->iri_len += len; eop = (status & E1000_RXD_STAT_EOP) != 0; /* Make sure bad packets are discarded */ if (errors & E1000_RXD_ERR_FRAME_ERR_MASK) { adapter->dropped_pkts++; /* XXX fixup if common */ return (EBADMSG); } ri->iri_frags[i].irf_flid = 0; ri->iri_frags[i].irf_idx = cidx; ri->iri_frags[i].irf_len = len; /* Zero out the receive descriptors status. */ rxd->status = 0; if (++cidx == scctx->isc_nrxd[0]) cidx = 0; i++; } while (!eop); /* XXX add a faster way to look this up */ if (adapter->hw.mac.type >= e1000_82543 && !(status & E1000_RXD_STAT_IXSM)) lem_receive_checksum(status, errors, ri); if (status & E1000_RXD_STAT_VP) { ri->iri_vtag = le16toh(rxd->special); ri->iri_flags |= M_VLANTAG; } ri->iri_nfrags = i; return (0); } static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) { struct adapter *adapter = arg; if_softc_ctx_t scctx = adapter->shared; struct em_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx]; struct rx_ring *rxr = &que->rxr; union e1000_rx_desc_extended *rxd; u16 len; u32 pkt_info; u32 staterr = 0; bool eop; int i, cidx, vtag; i = vtag = 0; cidx = ri->iri_cidx; do { rxd = &rxr->rx_base[cidx]; staterr = le32toh(rxd->wb.upper.status_error); pkt_info = le32toh(rxd->wb.lower.mrq); /* Error Checking then decrement count */ - MPASS ((staterr & E1000_RXD_STAT_DD) != 0); + KASSERT(staterr & E1000_RXD_STAT_DD, + ("cidx=%d i=%d iri_len=%d", cidx, i, ri->iri_len)); len = le16toh(rxd->wb.upper.length); ri->iri_len += len; eop = (staterr & E1000_RXD_STAT_EOP) != 0; /* Make sure bad packets are discarded */ if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) { adapter->dropped_pkts++; return EBADMSG; } ri->iri_frags[i].irf_flid = 0; ri->iri_frags[i].irf_idx = cidx; ri->iri_frags[i].irf_len = len; /* Zero out the receive descriptors status. */ rxd->wb.upper.status_error &= htole32(~0xFF); if (++cidx == scctx->isc_nrxd[0]) cidx = 0; i++; } while (!eop); /* XXX add a faster way to look this up */ if (adapter->hw.mac.type >= e1000_82543) em_receive_checksum(staterr, ri); if (staterr & E1000_RXD_STAT_VP) { vtag = le16toh(rxd->wb.upper.vlan); } ri->iri_vtag = vtag; if (vtag) ri->iri_flags |= M_VLANTAG; ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss); ri->iri_rsstype = em_determine_rsstype(pkt_info); ri->iri_nfrags = i; return (0); } /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. * Inform the stack about the status of checksum so that stack * doesn't spend time verifying the checksum. * *********************************************************************/ static void lem_receive_checksum(int status, int errors, if_rxd_info_t ri) { /* Did it pass? */ if (status & E1000_RXD_STAT_IPCS && !(errors & E1000_RXD_ERR_IPE)) ri->iri_csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); if (status & E1000_RXD_STAT_TCPCS) { /* Did it pass? */ if (!(errors & E1000_RXD_ERR_TCPE)) { ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); ri->iri_csum_data = htons(0xffff); } } } /******************************************************************** * * Parse the packet type to determine the appropriate hash * ******************************************************************/ static int em_determine_rsstype(u32 pkt_info) { switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { case E1000_RXDADV_RSSTYPE_IPV4_TCP: return M_HASHTYPE_RSS_TCP_IPV4; case E1000_RXDADV_RSSTYPE_IPV4: return M_HASHTYPE_RSS_IPV4; case E1000_RXDADV_RSSTYPE_IPV6_TCP: return M_HASHTYPE_RSS_TCP_IPV6; case E1000_RXDADV_RSSTYPE_IPV6_EX: return M_HASHTYPE_RSS_IPV6_EX; case E1000_RXDADV_RSSTYPE_IPV6: return M_HASHTYPE_RSS_IPV6; case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: return M_HASHTYPE_RSS_TCP_IPV6_EX; default: return M_HASHTYPE_OPAQUE; } } static void em_receive_checksum(uint32_t status, if_rxd_info_t ri) { ri->iri_csum_flags = 0; /* Ignore Checksum bit is set */ if (status & E1000_RXD_STAT_IXSM) return; /* If the IP checksum exists and there is no IP Checksum error */ if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == E1000_RXD_STAT_IPCS) { ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID); } /* TCP or UDP checksum */ if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == E1000_RXD_STAT_TCPCS) { ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); ri->iri_csum_data = htons(0xffff); } if (status & E1000_RXD_STAT_UDPCS) { ri->iri_csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); ri->iri_csum_data = htons(0xffff); } } Index: head/sys/dev/e1000/if_em.c =================================================================== --- head/sys/dev/e1000/if_em.c (revision 323515) +++ head/sys/dev/e1000/if_em.c (revision 323516) @@ -1,4538 +1,4642 @@ /*- * Copyright (c) 2016 Matt Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #include "if_em.h" #include #include #define em_mac_min e1000_82547 #define igb_mac_min e1000_82575 /********************************************************************* * Driver version: *********************************************************************/ char em_driver_version[] = "7.6.1-k"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static pci_vendor_info_t em_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection - Legacy em*/ PVID(0x8086, E1000_DEV_ID_82540EM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EM_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EP, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EP_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EP_LP, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541EI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541ER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541ER_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541EI_MOBILE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541GI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541GI_LF, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541GI_MOBILE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82542, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82543GC_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82543GC_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544EI_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544EI_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544GC_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544GC_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545EM_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545EM_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545GM_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545GM_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545GM_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546EB_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546EB_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_PCIE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82547EI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82547EI_MOBILE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82547GI, "Intel(R) PRO/1000 Network Connection"), /* Intel(R) PRO/1000 Network Connection - em */ PVID(0x8086, E1000_DEV_ID_82571EB_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82573E, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82573E_IAMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82573L, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82583V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_C, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE_GT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE_G, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_M, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_82567V_3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_C, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE_GT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE_G, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_BM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82574L, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82574LA, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_LF, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_LF, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_M_HV_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_M_HV_LC, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_D_HV_DM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_D_HV_DC, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH2_LV_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH2_LV_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPT_I217_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_LM2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_V2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_LM3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_V3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, "Intel(R) PRO/1000 Network Connection"), /* required last entry */ PVID_END }; static pci_vendor_info_t igb_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection - igb */ PVID(0x8086, E1000_DEV_ID_82575EB_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_NS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_NS_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_SERDES_QUAD, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_QUAD_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_VF, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_COPPER_DUAL, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_QUAD_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SFP, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_VF, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_IT, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_OEM1, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I211_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I354_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), /* required last entry */ PVID_END }; /********************************************************************* * Function prototypes *********************************************************************/ static void *em_register(device_t dev); static void *igb_register(device_t dev); static int em_if_attach_pre(if_ctx_t ctx); static int em_if_attach_post(if_ctx_t ctx); static int em_if_detach(if_ctx_t ctx); static int em_if_shutdown(if_ctx_t ctx); static int em_if_suspend(if_ctx_t ctx); static int em_if_resume(if_ctx_t ctx); static int em_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int em_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets); static void em_if_queues_free(if_ctx_t ctx); static uint64_t em_if_get_counter(if_ctx_t, ift_counter); static void em_if_init(if_ctx_t ctx); static void em_if_stop(if_ctx_t ctx); static void em_if_media_status(if_ctx_t, struct ifmediareq *); static int em_if_media_change(if_ctx_t ctx); static int em_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void em_if_timer(if_ctx_t ctx, uint16_t qid); static void em_if_vlan_register(if_ctx_t ctx, u16 vtag); static void em_if_vlan_unregister(if_ctx_t ctx, u16 vtag); static void em_identify_hardware(if_ctx_t ctx); static int em_allocate_pci_resources(if_ctx_t ctx); static void em_free_pci_resources(if_ctx_t ctx); static void em_reset(if_ctx_t ctx); static int em_setup_interface(if_ctx_t ctx); static int em_setup_msix(if_ctx_t ctx); static void em_initialize_transmit_unit(if_ctx_t ctx); static void em_initialize_receive_unit(if_ctx_t ctx); static void em_if_enable_intr(if_ctx_t ctx); static void em_if_disable_intr(if_ctx_t ctx); static int em_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static void em_if_multi_set(if_ctx_t ctx); static void em_if_update_admin_status(if_ctx_t ctx); static void em_if_debug(if_ctx_t ctx); static void em_update_stats_counters(struct adapter *); static void em_add_hw_stats(struct adapter *adapter); static int em_if_set_promisc(if_ctx_t ctx, int flags); static void em_setup_vlan_hw_support(struct adapter *); static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void em_print_nvm_info(struct adapter *); static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS); static int em_get_rs(SYSCTL_HANDLER_ARGS); static void em_print_debug_info(struct adapter *); static int em_is_valid_ether_addr(u8 *); static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS); static void em_add_int_delay_sysctl(struct adapter *, const char *, const char *, struct em_int_delay_info *, int, int); /* Management and WOL Support */ static void em_init_manageability(struct adapter *); static void em_release_manageability(struct adapter *); static void em_get_hw_control(struct adapter *); static void em_release_hw_control(struct adapter *); static void em_get_wakeup(if_ctx_t ctx); static void em_enable_wakeup(if_ctx_t ctx); static int em_enable_phy_wakeup(struct adapter *); static void em_disable_aspm(struct adapter *); int em_intr(void *arg); static void em_disable_promisc(if_ctx_t ctx); /* MSIX handlers */ static int em_if_msix_intr_assign(if_ctx_t, int); static int em_msix_link(void *); static void em_handle_link(void *context); static void em_enable_vectors_82574(if_ctx_t); static int em_set_flowcntl(SYSCTL_HANDLER_ARGS); static int em_sysctl_eee(SYSCTL_HANDLER_ARGS); static void em_if_led_func(if_ctx_t ctx, int onoff); static int em_get_regs(SYSCTL_HANDLER_ARGS); static void lem_smartspeed(struct adapter *adapter); static void igb_configure_queues(struct adapter *adapter); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t em_methods[] = { /* Device interface */ DEVMETHOD(device_register, em_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static device_method_t igb_methods[] = { /* Device interface */ DEVMETHOD(device_register, igb_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static driver_t em_driver = { "em", em_methods, sizeof(struct adapter), }; static devclass_t em_devclass; DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0); MODULE_DEPEND(em, pci, 1, 1, 1); MODULE_DEPEND(em, ether, 1, 1, 1); MODULE_DEPEND(em, iflib, 1, 1, 1); static driver_t igb_driver = { "igb", igb_methods, sizeof(struct adapter), }; static devclass_t igb_devclass; DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0); MODULE_DEPEND(igb, pci, 1, 1, 1); MODULE_DEPEND(igb, ether, 1, 1, 1); MODULE_DEPEND(igb, iflib, 1, 1, 1); static device_method_t em_if_methods[] = { DEVMETHOD(ifdi_attach_pre, em_if_attach_pre), DEVMETHOD(ifdi_attach_post, em_if_attach_post), DEVMETHOD(ifdi_detach, em_if_detach), DEVMETHOD(ifdi_shutdown, em_if_shutdown), DEVMETHOD(ifdi_suspend, em_if_suspend), DEVMETHOD(ifdi_resume, em_if_resume), DEVMETHOD(ifdi_init, em_if_init), DEVMETHOD(ifdi_stop, em_if_stop), DEVMETHOD(ifdi_msix_intr_assign, em_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, em_if_enable_intr), DEVMETHOD(ifdi_intr_disable, em_if_disable_intr), DEVMETHOD(ifdi_tx_queues_alloc, em_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, em_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, em_if_queues_free), DEVMETHOD(ifdi_update_admin_status, em_if_update_admin_status), DEVMETHOD(ifdi_multi_set, em_if_multi_set), DEVMETHOD(ifdi_media_status, em_if_media_status), DEVMETHOD(ifdi_media_change, em_if_media_change), DEVMETHOD(ifdi_mtu_set, em_if_mtu_set), DEVMETHOD(ifdi_promisc_set, em_if_set_promisc), DEVMETHOD(ifdi_timer, em_if_timer), DEVMETHOD(ifdi_vlan_register, em_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, em_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, em_if_get_counter), DEVMETHOD(ifdi_led_func, em_if_led_func), DEVMETHOD(ifdi_rx_queue_intr_enable, em_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, em_if_tx_queue_intr_enable), DEVMETHOD(ifdi_debug, em_if_debug), DEVMETHOD_END }; /* * note that if (adapter->msix_mem) is replaced by: * if (adapter->intr_type == IFLIB_INTR_MSIX) */ static driver_t em_if_driver = { "em_if", em_if_methods, sizeof(struct adapter) }; /********************************************************************* * Tunable default values. *********************************************************************/ #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) #define M_TSO_LEN 66 #define MAX_INTS_PER_SEC 8000 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) /* Allow common code without TSO */ #ifndef CSUM_TSO #define CSUM_TSO 0 #endif #define TSO_WORKAROUND 4 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters"); static int em_disable_crc_stripping = 0; SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN, &em_disable_crc_stripping, 0, "Disable CRC Stripping"); static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt, 0, "Default transmit interrupt delay in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt, 0, "Default receive interrupt delay in usecs"); static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN, &em_tx_abs_int_delay_dflt, 0, "Default transmit interrupt delay limit in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN, &em_rx_abs_int_delay_dflt, 0, "Default receive interrupt delay limit in usecs"); static int em_smart_pwr_down = FALSE; SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down, 0, "Set to true to leave smart power down enabled on newer adapters"); /* Controls whether promiscuous also shows bad packets */ static int em_debug_sbp = TRUE; SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0, "Show bad packets in promiscuous mode"); /* How many packets rxeof tries to clean at a time */ static int em_rx_process_limit = 100; SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &em_rx_process_limit, 0, "Maximum number of received packets to process " "at a time, -1 means unlimited"); /* Energy efficient ethernet - default to OFF */ static int eee_setting = 1; SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0, "Enable Energy Efficient Ethernet"); /* ** Tuneable Interrupt rate */ static int em_max_interrupt_rate = 8000; SYSCTL_INT(_hw_em, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &em_max_interrupt_rate, 0, "Maximum interrupts per second"); /* Global used in WOL setup with multiport cards */ static int global_quad_port_a = 0; extern struct if_txrx igb_txrx; extern struct if_txrx em_txrx; extern struct if_txrx lem_txrx; static struct if_shared_ctx em_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = EM_TSO_SIZE, .isc_tx_maxsegsize = PAGE_SIZE, .isc_rx_maxsize = MJUM9BYTES, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = MJUM9BYTES, .isc_nfl = 1, .isc_nrxqs = 1, .isc_ntxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = em_vendor_info_array, .isc_driver_version = em_driver_version, .isc_driver = &em_if_driver, - .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP, + .isc_flags = IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM, .isc_nrxd_min = {EM_MIN_RXD}, .isc_ntxd_min = {EM_MIN_TXD}, .isc_nrxd_max = {EM_MAX_RXD}, .isc_ntxd_max = {EM_MAX_TXD}, .isc_nrxd_default = {EM_DEFAULT_RXD}, .isc_ntxd_default = {EM_DEFAULT_TXD}, }; if_shared_ctx_t em_sctx = &em_sctx_init; static struct if_shared_ctx igb_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = EM_TSO_SIZE, .isc_tx_maxsegsize = PAGE_SIZE, .isc_rx_maxsize = MJUM9BYTES, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = MJUM9BYTES, .isc_nfl = 1, .isc_nrxqs = 1, .isc_ntxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = igb_vendor_info_array, .isc_driver_version = em_driver_version, .isc_driver = &em_if_driver, - .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP, + .isc_flags = IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM, .isc_nrxd_min = {EM_MIN_RXD}, .isc_ntxd_min = {EM_MIN_TXD}, .isc_nrxd_max = {IGB_MAX_RXD}, .isc_ntxd_max = {IGB_MAX_TXD}, .isc_nrxd_default = {EM_DEFAULT_RXD}, .isc_ntxd_default = {EM_DEFAULT_TXD}, }; if_shared_ctx_t igb_sctx = &igb_sctx_init; /***************************************************************** * * Dump Registers * ****************************************************************/ #define IGB_REGS_LEN 739 static int em_get_regs(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct e1000_hw *hw = &adapter->hw; struct sbuf *sb; u32 *regs_buff; int rc; regs_buff = malloc(sizeof(u32) * IGB_REGS_LEN, M_DEVBUF, M_WAITOK); memset(regs_buff, 0, IGB_REGS_LEN * sizeof(u32)); rc = sysctl_wire_old_buffer(req, 0); MPASS(rc == 0); if (rc != 0) { free(regs_buff, M_DEVBUF); return (rc); } sb = sbuf_new_for_sysctl(NULL, NULL, 32*400, req); MPASS(sb != NULL); if (sb == NULL) { free(regs_buff, M_DEVBUF); return (ENOMEM); } /* General Registers */ regs_buff[0] = E1000_READ_REG(hw, E1000_CTRL); regs_buff[1] = E1000_READ_REG(hw, E1000_STATUS); regs_buff[2] = E1000_READ_REG(hw, E1000_CTRL_EXT); regs_buff[3] = E1000_READ_REG(hw, E1000_ICR); regs_buff[4] = E1000_READ_REG(hw, E1000_RCTL); regs_buff[5] = E1000_READ_REG(hw, E1000_RDLEN(0)); regs_buff[6] = E1000_READ_REG(hw, E1000_RDH(0)); regs_buff[7] = E1000_READ_REG(hw, E1000_RDT(0)); regs_buff[8] = E1000_READ_REG(hw, E1000_RXDCTL(0)); regs_buff[9] = E1000_READ_REG(hw, E1000_RDBAL(0)); regs_buff[10] = E1000_READ_REG(hw, E1000_RDBAH(0)); regs_buff[11] = E1000_READ_REG(hw, E1000_TCTL); regs_buff[12] = E1000_READ_REG(hw, E1000_TDBAL(0)); regs_buff[13] = E1000_READ_REG(hw, E1000_TDBAH(0)); regs_buff[14] = E1000_READ_REG(hw, E1000_TDLEN(0)); regs_buff[15] = E1000_READ_REG(hw, E1000_TDH(0)); regs_buff[16] = E1000_READ_REG(hw, E1000_TDT(0)); regs_buff[17] = E1000_READ_REG(hw, E1000_TXDCTL(0)); regs_buff[18] = E1000_READ_REG(hw, E1000_TDFH); regs_buff[19] = E1000_READ_REG(hw, E1000_TDFT); regs_buff[20] = E1000_READ_REG(hw, E1000_TDFHS); regs_buff[21] = E1000_READ_REG(hw, E1000_TDFPC); sbuf_printf(sb, "General Registers\n"); sbuf_printf(sb, "\tCTRL\t %08x\n", regs_buff[0]); sbuf_printf(sb, "\tSTATUS\t %08x\n", regs_buff[1]); sbuf_printf(sb, "\tCTRL_EXIT\t %08x\n\n", regs_buff[2]); sbuf_printf(sb, "Interrupt Registers\n"); sbuf_printf(sb, "\tICR\t %08x\n\n", regs_buff[3]); sbuf_printf(sb, "RX Registers\n"); sbuf_printf(sb, "\tRCTL\t %08x\n", regs_buff[4]); sbuf_printf(sb, "\tRDLEN\t %08x\n", regs_buff[5]); sbuf_printf(sb, "\tRDH\t %08x\n", regs_buff[6]); sbuf_printf(sb, "\tRDT\t %08x\n", regs_buff[7]); sbuf_printf(sb, "\tRXDCTL\t %08x\n", regs_buff[8]); sbuf_printf(sb, "\tRDBAL\t %08x\n", regs_buff[9]); sbuf_printf(sb, "\tRDBAH\t %08x\n\n", regs_buff[10]); sbuf_printf(sb, "TX Registers\n"); sbuf_printf(sb, "\tTCTL\t %08x\n", regs_buff[11]); sbuf_printf(sb, "\tTDBAL\t %08x\n", regs_buff[12]); sbuf_printf(sb, "\tTDBAH\t %08x\n", regs_buff[13]); sbuf_printf(sb, "\tTDLEN\t %08x\n", regs_buff[14]); sbuf_printf(sb, "\tTDH\t %08x\n", regs_buff[15]); sbuf_printf(sb, "\tTDT\t %08x\n", regs_buff[16]); sbuf_printf(sb, "\tTXDCTL\t %08x\n", regs_buff[17]); sbuf_printf(sb, "\tTDFH\t %08x\n", regs_buff[18]); sbuf_printf(sb, "\tTDFT\t %08x\n", regs_buff[19]); sbuf_printf(sb, "\tTDFHS\t %08x\n", regs_buff[20]); sbuf_printf(sb, "\tTDFPC\t %08x\n\n", regs_buff[21]); free(regs_buff, M_DEVBUF); #ifdef DUMP_DESCS { if_softc_ctx_t scctx = adapter->shared; struct rx_ring *rxr = &rx_que->rxr; struct tx_ring *txr = &tx_que->txr; int ntxd = scctx->isc_ntxd[0]; int nrxd = scctx->isc_nrxd[0]; int j; for (j = 0; j < nrxd; j++) { u32 staterr = le32toh(rxr->rx_base[j].wb.upper.status_error); u32 length = le32toh(rxr->rx_base[j].wb.upper.length); sbuf_printf(sb, "\tReceive Descriptor Address %d: %08" PRIx64 " Error:%d Length:%d\n", j, rxr->rx_base[j].read.buffer_addr, staterr, length); } for (j = 0; j < min(ntxd, 256); j++) { unsigned int *ptr = (unsigned int *)&txr->tx_base[j]; sbuf_printf(sb, "\tTXD[%03d] [0]: %08x [1]: %08x [2]: %08x [3]: %08x eop: %d DD=%d\n", j, ptr[0], ptr[1], ptr[2], ptr[3], buf->eop, buf->eop != -1 ? txr->tx_base[buf->eop].upper.fields.status & E1000_TXD_STAT_DD : 0); } } #endif rc = sbuf_finish(sb); sbuf_delete(sb); return(rc); } static void * em_register(device_t dev) { return (em_sctx); } static void * igb_register(device_t dev) { return (igb_sctx); } static int em_set_num_queues(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); int maxqueues; /* Sanity check based on HW */ switch (adapter->hw.mac.type) { case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: maxqueues = 8; break; case e1000_i210: case e1000_82575: maxqueues = 4; break; case e1000_i211: case e1000_82574: maxqueues = 2; break; default: maxqueues = 1; break; } return (maxqueues); } #define EM_CAPS \ IFCAP_TSO4 | IFCAP_TXCSUM | IFCAP_LRO | IFCAP_RXCSUM | IFCAP_VLAN_HWFILTER | IFCAP_WOL_MAGIC | \ IFCAP_WOL_MCAST | IFCAP_WOL | IFCAP_VLAN_HWTSO | IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; #define IGB_CAPS \ IFCAP_TSO4 | IFCAP_TXCSUM | IFCAP_LRO | IFCAP_RXCSUM | IFCAP_VLAN_HWFILTER | IFCAP_WOL_MAGIC | \ IFCAP_WOL_MCAST | IFCAP_WOL | IFCAP_VLAN_HWTSO | IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | \ IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU | IFCAP_TXCSUM_IPV6 | IFCAP_HWCSUM_IPV6 | IFCAP_JUMBO_MTU; /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int em_if_attach_pre(if_ctx_t ctx) { struct adapter *adapter; if_softc_ctx_t scctx; device_t dev; struct e1000_hw *hw; int error = 0; INIT_DEBUGOUT("em_if_attach_pre begin"); dev = iflib_get_dev(ctx); adapter = iflib_get_softc(ctx); if (resource_disabled("em", device_get_unit(dev))) { device_printf(dev, "Disabled by device hint\n"); return (ENXIO); } - adapter->ctx = ctx; + adapter->ctx = adapter->osdep.ctx = ctx; adapter->dev = adapter->osdep.dev = dev; scctx = adapter->shared = iflib_get_softc_ctx(ctx); adapter->media = iflib_get_media(ctx); hw = &adapter->hw; adapter->tx_process_limit = scctx->isc_ntxd[0]; /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_nvm_info, "I", "NVM Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_debug_info, "I", "Debug Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_set_flowcntl, "I", "Flow Control"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "reg_dump", CTLTYPE_STRING | CTLFLAG_RD, adapter, 0, em_get_regs, "A", "Dump Registers"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "rs_dump", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, em_get_rs, "I", "Dump RS indexes"); /* Determine hardware and mac info */ em_identify_hardware(ctx); /* Set isc_msix_bar */ scctx->isc_msix_bar = PCIR_BAR(EM_MSIX_BAR); scctx->isc_tx_nsegments = EM_MAX_SCATTER; scctx->isc_tx_tso_segments_max = scctx->isc_tx_nsegments; scctx->isc_tx_tso_size_max = EM_TSO_SIZE; scctx->isc_tx_tso_segsize_max = EM_TSO_SEG_SIZE; scctx->isc_nrxqsets_max = scctx->isc_ntxqsets_max = em_set_num_queues(ctx); device_printf(dev, "attach_pre capping queues at %d\n", scctx->isc_ntxqsets_max); scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; if (adapter->hw.mac.type >= igb_mac_min) { int try_second_bar; scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(union e1000_adv_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union e1000_adv_rx_desc), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(union e1000_adv_tx_desc); scctx->isc_rxd_size[0] = sizeof(union e1000_adv_rx_desc); scctx->isc_txrx = &igb_txrx; scctx->isc_capenable = IGB_CAPS; scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_IP6_TCP \ | CSUM_IP6_UDP | CSUM_IP6_TCP; if (adapter->hw.mac.type != e1000_82575) scctx->isc_tx_csum_flags |= CSUM_SCTP | CSUM_IP6_SCTP; /* ** Some new devices, as with ixgbe, now may ** use a different BAR, so we need to keep ** track of which is used. */ try_second_bar = pci_read_config(dev, scctx->isc_msix_bar, 4); if (try_second_bar == 0) scctx->isc_msix_bar += 4; } else if (adapter->hw.mac.type >= em_mac_min) { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]* sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(struct e1000_tx_desc); scctx->isc_rxd_size[0] = sizeof(union e1000_rx_desc_extended); scctx->isc_txrx = &em_txrx; scctx->isc_capenable = EM_CAPS; scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; } else { scctx->isc_txqsizes[0] = roundup2((scctx->isc_ntxd[0] + 1) * sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2((scctx->isc_nrxd[0] + 1) * sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(struct e1000_tx_desc); scctx->isc_rxd_size[0] = sizeof(struct e1000_rx_desc); scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; scctx->isc_txrx = &lem_txrx; scctx->isc_capenable = EM_CAPS; if (adapter->hw.mac.type < e1000_82543) scctx->isc_capenable &= ~(IFCAP_HWCSUM|IFCAP_VLAN_HWCSUM); scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; scctx->isc_msix_bar = 0; } /* Setup PCI resources */ if (em_allocate_pci_resources(ctx)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* ** For ICH8 and family we need to ** map the flash memory, and this ** must happen after the MAC is ** identified */ if ((hw->mac.type == e1000_ich8lan) || (hw->mac.type == e1000_ich9lan) || (hw->mac.type == e1000_ich10lan) || (hw->mac.type == e1000_pchlan) || (hw->mac.type == e1000_pch2lan) || (hw->mac.type == e1000_pch_lpt)) { int rid = EM_BAR_TYPE_FLASH; adapter->flash = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->flash == NULL) { device_printf(dev, "Mapping of Flash failed\n"); error = ENXIO; goto err_pci; } /* This is used in the shared code */ hw->flash_address = (u8 *)adapter->flash; adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash); adapter->osdep.flash_bus_space_handle = rman_get_bushandle(adapter->flash); } /* ** In the new SPT device flash is not a ** separate BAR, rather it is also in BAR0, ** so use the same tag and an offset handle for the ** FLASH read/write macros in the shared code. */ else if (hw->mac.type == e1000_pch_spt) { adapter->osdep.flash_bus_space_tag = adapter->osdep.mem_bus_space_tag; adapter->osdep.flash_bus_space_handle = adapter->osdep.mem_bus_space_handle + E1000_FLASH_BASE_ADDR; } /* Do Shared Code initialization */ error = e1000_setup_init_funcs(hw, TRUE); if (error) { device_printf(dev, "Setup of Shared code failed, error %d\n", error); error = ENXIO; goto err_pci; } em_setup_msix(ctx); e1000_get_bus_info(hw); /* Set up some sysctls for the tunable interrupt delays */ em_add_int_delay_sysctl(adapter, "rx_int_delay", "receive interrupt delay in usecs", &adapter->rx_int_delay, E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_int_delay", "transmit interrupt delay in usecs", &adapter->tx_int_delay, E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "rx_abs_int_delay", "receive interrupt delay limit in usecs", &adapter->rx_abs_int_delay, E1000_REGISTER(hw, E1000_RADV), em_rx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_abs_int_delay", "transmit interrupt delay limit in usecs", &adapter->tx_abs_int_delay, E1000_REGISTER(hw, E1000_TADV), em_tx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "itr", "interrupt delay limit in usecs/4", &adapter->tx_itr, E1000_REGISTER(hw, E1000_ITR), DEFAULT_ITR); hw->mac.autoneg = DO_AUTO_NEG; hw->phy.autoneg_wait_to_complete = FALSE; hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; if (adapter->hw.mac.type < em_mac_min) { e1000_init_script_state_82541(&adapter->hw, TRUE); e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE); } /* Copper options */ if (hw->phy.media_type == e1000_media_type_copper) { hw->phy.mdix = AUTO_ALL_MODES; hw->phy.disable_polarity_correction = FALSE; hw->phy.ms_type = EM_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ scctx->isc_max_frame_size = adapter->hw.mac.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; /* * This controls when hardware reports transmit completion * status. */ hw->mac.report_tx_early = 1; /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Check SOL/IDER usage */ if (e1000_check_reset_block(hw)) device_printf(dev, "PHY reset is blocked" " due to SOL/IDER session.\n"); /* Sysctl for setting Energy Efficient Ethernet */ hw->dev_spec.ich8lan.eee_disable = eee_setting; SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_eee, "I", "Disable Energy Efficient Ethernet"); /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(hw); /* Make sure we have a good EEPROM before we read from it */ if (e1000_validate_nvm_checksum(hw) < 0) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } } /* Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_late; } if (!em_is_valid_ether_addr(hw->mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_late; } /* Disable ULP support */ e1000_disable_ulp_lpt_lp(hw, TRUE); /* * Get Wake-on-Lan and Management info for later use */ em_get_wakeup(ctx); iflib_set_mac(ctx, hw->mac.addr); return (0); err_late: em_release_hw_control(adapter); err_pci: em_free_pci_resources(ctx); free(adapter->mta, M_DEVBUF); return (error); } static int em_if_attach_post(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; int error = 0; /* Setup OS specific network interface */ error = em_setup_interface(ctx); if (error != 0) { goto err_late; } em_reset(ctx); /* Initialize statistics */ em_update_stats_counters(adapter); hw->mac.get_link_status = 1; em_if_update_admin_status(ctx); em_add_hw_stats(adapter); /* Non-AMT based hardware can now take control from firmware */ if (adapter->has_manage && !adapter->has_amt) em_get_hw_control(adapter); INIT_DEBUGOUT("em_if_attach_post: end"); return (error); err_late: em_release_hw_control(adapter); em_free_pci_resources(ctx); em_if_queues_free(ctx); free(adapter->mta, M_DEVBUF); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int em_if_detach(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("em_detach: begin"); e1000_phy_hw_reset(&adapter->hw); em_release_manageability(adapter); em_release_hw_control(adapter); em_free_pci_resources(ctx); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int em_if_shutdown(if_ctx_t ctx) { return em_if_suspend(ctx); } /* * Suspend/resume device methods. */ static int em_if_suspend(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); em_release_manageability(adapter); em_release_hw_control(adapter); em_enable_wakeup(ctx); return (0); } static int em_if_resume(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if (adapter->hw.mac.type == e1000_pch2lan) e1000_resume_workarounds_pchlan(&adapter->hw); em_if_init(ctx); em_init_manageability(adapter); return(0); } static int em_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { int max_frame_size; struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = iflib_get_softc_ctx(ctx); IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); switch (adapter->hw.mac.type) { case e1000_82571: case e1000_82572: case e1000_ich9lan: case e1000_ich10lan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_82574: case e1000_82583: case e1000_80003es2lan: /* 9K Jumbo Frame size */ max_frame_size = 9234; break; case e1000_pchlan: max_frame_size = 4096; break; case e1000_82542: case e1000_ich8lan: /* Adapters that do not support jumbo frames */ max_frame_size = ETHER_MAX_LEN; break; default: if (adapter->hw.mac.type >= igb_mac_min) max_frame_size = 9234; else /* lem */ max_frame_size = MAX_JUMBO_FRAME_SIZE; } if (mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { return (EINVAL); } scctx->isc_max_frame_size = adapter->hw.mac.max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; return (0); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ static void em_if_init(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); struct em_tx_queue *tx_que; int i; INIT_DEBUGOUT("em_if_init: begin"); /* Get the latest mac address, User can use a LAA */ bcopy(if_getlladdr(ifp), adapter->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* * With the 82571 adapter, RAR[0] may be overwritten * when the other port is reset, we make a duplicate * in RAR[14] for that eventuality, this assures * the interface continues to function. */ if (adapter->hw.mac.type == e1000_82571) { e1000_set_laa_state_82571(&adapter->hw, TRUE); e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, E1000_RAR_ENTRIES - 1); } /* Initialize the hardware */ em_reset(ctx); em_if_update_admin_status(ctx); for (i = 0, tx_que = adapter->tx_queues; i < adapter->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; txr->tx_rs_cidx = txr->tx_rs_pidx = txr->tx_cidx_processed = 0; } /* Setup VLAN support, basic and offload if available */ E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); /* Clear bad data from Rx FIFOs */ if (adapter->hw.mac.type >= igb_mac_min) e1000_rx_fifo_flush_82575(&adapter->hw); /* Configure for OS presence */ em_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ em_initialize_transmit_unit(ctx); /* Setup Multicast table */ em_if_multi_set(ctx); /* * Figure out the desired mbuf * pool for doing jumbos */ if (adapter->hw.mac.max_frame_size <= 2048) adapter->rx_mbuf_sz = MCLBYTES; #ifndef CONTIGMALLOC_WORKS else adapter->rx_mbuf_sz = MJUMPAGESIZE; #else else if (adapter->hw.mac.max_frame_size <= 4096) adapter->rx_mbuf_sz = MJUMPAGESIZE; else adapter->rx_mbuf_sz = MJUM9BYTES; #endif em_initialize_receive_unit(ctx); /* Use real VLAN Filter support? */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) { if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) /* Use real VLAN Filter support */ em_setup_vlan_hw_support(adapter); else { u32 ctrl; ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= E1000_CTRL_VME; E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); } } /* Don't lose promiscuous settings */ em_if_set_promisc(ctx, IFF_PROMISC); e1000_clear_hw_cntrs_base_generic(&adapter->hw); /* MSI/X configuration for 82574 */ if (adapter->hw.mac.type == e1000_82574) { int tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); /* Set the IVAR - interrupt vector routing. */ E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); } else if (adapter->intr_type == IFLIB_INTR_MSIX) /* Set up queue routing */ igb_configure_queues(adapter); /* this clears any pending interrupts */ E1000_READ_REG(&adapter->hw, E1000_ICR); E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); /* AMT based hardware can now take control from firmware */ if (adapter->has_manage && adapter->has_amt) em_get_hw_control(adapter); /* Set Energy Efficient Ethernet */ if (adapter->hw.mac.type >= igb_mac_min && adapter->hw.phy.media_type == e1000_media_type_copper) { if (adapter->hw.mac.type == e1000_i354) e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); else e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); } } /********************************************************************* * * Fast Legacy/MSI Combined Interrupt Service routine * *********************************************************************/ int em_intr(void *arg) { struct adapter *adapter = arg; if_ctx_t ctx = adapter->ctx; u32 reg_icr; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (adapter->intr_type != IFLIB_INTR_LEGACY) goto skip_stray; /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; /* * Starting with the 82571 chip, bit 31 should be used to * determine whether the interrupt belongs to us. */ if (adapter->hw.mac.type >= e1000_82571 && (reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; skip_stray: /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; iflib_admin_intr_deferred(ctx); } if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; return (FILTER_SCHEDULE_THREAD); } static void igb_rx_enable_queue(struct adapter *adapter, struct em_rx_queue *rxq) { E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxq->eims); } static void em_rx_enable_queue(struct adapter *adapter, struct em_rx_queue *rxq) { E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxq->eims); } static void igb_tx_enable_queue(struct adapter *adapter, struct em_tx_queue *txq) { E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txq->eims); } static void em_tx_enable_queue(struct adapter *adapter, struct em_tx_queue *txq) { E1000_WRITE_REG(&adapter->hw, E1000_IMS, txq->eims); } static int em_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *rxq = &adapter->rx_queues[rxqid]; if (adapter->hw.mac.type >= igb_mac_min) igb_rx_enable_queue(adapter, rxq); else em_rx_enable_queue(adapter, rxq); return (0); } static int em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct adapter *adapter = iflib_get_softc(ctx); struct em_tx_queue *txq = &adapter->tx_queues[txqid]; if (adapter->hw.mac.type >= igb_mac_min) igb_tx_enable_queue(adapter, txq); else em_tx_enable_queue(adapter, txq); return (0); } /********************************************************************* * * MSIX RX Interrupt Service routine * **********************************************************************/ static int em_msix_que(void *arg) { struct em_rx_queue *que = arg; ++que->irqs; return (FILTER_SCHEDULE_THREAD); } /********************************************************************* * * MSIX Link Fast Interrupt Service routine * **********************************************************************/ static int em_msix_link(void *arg) { struct adapter *adapter = arg; u32 reg_icr; + int is_igb; + is_igb = (adapter->hw.mac.type >= igb_mac_min); ++adapter->link_irq; MPASS(adapter->hw.back != NULL); reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; - if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { - em_handle_link(adapter->ctx); + if (is_igb) { + if (reg_icr & E1000_ICR_LSC) + em_handle_link(adapter->ctx); + E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); + E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); } else { + if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { + em_handle_link(adapter->ctx); + } E1000_WRITE_REG(&adapter->hw, E1000_IMS, - EM_MSIX_LINK | E1000_IMS_LSC); - if (adapter->hw.mac.type >= igb_mac_min) - E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); - } + EM_MSIX_LINK | E1000_IMS_LSC); - /* - * Because we must read the ICR for this interrupt - * it may clear other causes using autoclear, for - * this reason we simply create a soft interrupt - * for all these vectors. - */ - if (reg_icr && adapter->hw.mac.type < igb_mac_min) { - E1000_WRITE_REG(&adapter->hw, - E1000_ICS, adapter->ims); + /* + * Because we must read the ICR for this interrupt + * it may clear other causes using autoclear, for + * this reason we simply create a soft interrupt + * for all these vectors. + */ + if (reg_icr) { + E1000_WRITE_REG(&adapter->hw, + E1000_ICS, adapter->ims); + } } - return (FILTER_HANDLED); } static void em_handle_link(void *context) { if_ctx_t ctx = context; struct adapter *adapter = iflib_get_softc(ctx); adapter->hw.mac.get_link_status = 1; iflib_admin_intr_deferred(ctx); } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void em_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) { struct adapter *adapter = iflib_get_softc(ctx); u_char fiber_type = IFM_1000_SX; INIT_DEBUGOUT("em_if_media_status: begin"); iflib_admin_intr_deferred(ctx); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { return; } ifmr->ifm_status |= IFM_ACTIVE; if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { if (adapter->hw.mac.type == e1000_82545) fiber_type = IFM_1000_LX; ifmr->ifm_active |= fiber_type | IFM_FDX; } else { switch (adapter->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; } if (adapter->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int em_if_media_change(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifmedia *ifm = iflib_get_media(ctx); INIT_DEBUGOUT("em_if_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(adapter->dev, "Unsupported media type\n"); } em_if_init(ctx); return (0); } static int em_if_set_promisc(if_ctx_t ctx, int flags) { struct adapter *adapter = iflib_get_softc(ctx); u32 reg_rctl; em_disable_promisc(ctx); reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (flags & IFF_PROMISC) { reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); /* Turn this on if you want to see bad packets */ if (em_debug_sbp) reg_rctl |= E1000_RCTL_SBP; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else if (flags & IFF_ALLMULTI) { reg_rctl |= E1000_RCTL_MPE; reg_rctl &= ~E1000_RCTL_UPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } return (0); } static void em_disable_promisc(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); u32 reg_rctl; int mcnt = 0; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= (~E1000_RCTL_UPE); if (if_getflags(ifp) & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES); /* Don't disable if in MAX groups */ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg_rctl &= (~E1000_RCTL_MPE); reg_rctl &= (~E1000_RCTL_SBP); E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void em_if_multi_set(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); u32 reg_rctl = 0; u8 *mta; /* Multicast array memory */ int mcnt = 0; IOCTL_DEBUGOUT("em_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_clear_mwi(&adapter->hw); reg_rctl |= E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); } if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES); if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= ~E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_set_mwi(&adapter->hw); } } /********************************************************************* * Timer routine * * This routine checks for link status and updates statistics. * **********************************************************************/ static void em_if_timer(if_ctx_t ctx, uint16_t qid) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *que; int i; int trigger = 0; if (qid != 0) return; iflib_admin_intr_deferred(ctx); - /* Reset LAA into RAR[0] on 82571 */ - if ((adapter->hw.mac.type == e1000_82571) && - e1000_get_laa_state_82571(&adapter->hw)) - e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); - if (adapter->hw.mac.type < em_mac_min) - lem_smartspeed(adapter); - /* Mask to use in the irq trigger */ if (adapter->intr_type == IFLIB_INTR_MSIX) { for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) trigger |= que->eims; } else { trigger = E1000_ICS_RXDMT0; } } static void em_if_update_admin_status(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; struct ifnet *ifp = iflib_get_ifp(ctx); device_t dev = iflib_get_dev(ctx); u32 link_check, thstat, ctrl; link_check = thstat = ctrl = 0; /* Get the cached link value or read phy for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { if (hw->mac.type == e1000_pch_spt) msec_delay(50); /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; if (link_check) /* ESB2 fix */ e1000_cfg_on_link_up(hw); } else { link_check = TRUE; } break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = adapter->hw.mac.serdes_has_link; break; /* VF device is type_unknown */ case e1000_media_type_unknown: e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; /* FALLTHROUGH */ default: break; } /* Check for thermal downshift or shutdown */ if (hw->mac.type == e1000_i350) { thstat = E1000_READ_REG(hw, E1000_THSTAT); ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); } /* Now check for a transition */ if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(hw, &adapter->link_speed, &adapter->link_duplex); /* Check if we must disable SPEED_MODE bit on PCI-E */ if ((adapter->link_speed != SPEED_1000) && ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572))) { int tarc0; tarc0 = E1000_READ_REG(hw, E1000_TARC(0)); tarc0 &= ~TARC_SPEED_MODE_BIT; E1000_WRITE_REG(hw, E1000_TARC(0), tarc0); } if (bootverbose) device_printf(dev, "Link is up %d Mbps %s\n", adapter->link_speed, ((adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex")); adapter->link_active = 1; adapter->smartspeed = 0; if_setbaudrate(ifp, adapter->link_speed * 1000000); if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && (thstat & E1000_THSTAT_LINK_THROTTLE)) device_printf(dev, "Link: thermal downshift\n"); /* Delay Link Up for Phy update */ if (((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) && (hw->phy.id == I210_I_PHY_ID)) msec_delay(I210_LINK_DELAY); /* Reset if the media type changed. */ if ((hw->dev_spec._82575.media_changed) && (adapter->hw.mac.type >= igb_mac_min)) { hw->dev_spec._82575.media_changed = false; adapter->flags |= IGB_MEDIA_RESET; em_reset(ctx); } iflib_link_state_change(ctx, LINK_STATE_UP, ifp->if_baudrate); printf("Link state changed to up\n"); } else if (!link_check && (adapter->link_active == 1)) { if_setbaudrate(ifp, 0); adapter->link_speed = 0; adapter->link_duplex = 0; if (bootverbose) device_printf(dev, "Link is Down\n"); adapter->link_active = 0; iflib_link_state_change(ctx, LINK_STATE_DOWN, ifp->if_baudrate); printf("link state changed to down\n"); } em_update_stats_counters(adapter); + /* Reset LAA into RAR[0] on 82571 */ + if ((adapter->hw.mac.type == e1000_82571) && + e1000_get_laa_state_82571(&adapter->hw)) + e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); + + if (adapter->hw.mac.type < em_mac_min) + lem_smartspeed(adapter); + E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * * This routine should always be called with BOTH the CORE * and TX locks. **********************************************************************/ static void em_if_stop(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("em_stop: begin"); e1000_reset_hw(&adapter->hw); if (adapter->hw.mac.type >= e1000_82544) E1000_WRITE_REG(&adapter->hw, E1000_WUFC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void em_identify_hardware(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct adapter *adapter = iflib_get_softc(ctx); /* Make sure our PCI config space has the necessary stuff set */ adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Do Shared Code Init and Setup */ if (e1000_set_mac_type(&adapter->hw)) { device_printf(dev, "Setup init failure\n"); return; } } static int em_allocate_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); int rid, val; rid = PCIR_BAR(0); adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->memory == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->memory); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->memory); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; /* Only older adapters use IO mapping */ if (adapter->hw.mac.type < em_mac_min && adapter->hw.mac.type > e1000_82543) { /* Figure our where our IO BAR is ? */ for (rid = PCIR_BAR(0); rid < PCIR_CIS;) { val = pci_read_config(dev, rid, 4); if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) { adapter->io_rid = rid; break; } rid += 4; /* check for 64bit BAR */ if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT) rid += 4; } if (rid >= PCIR_CIS) { device_printf(dev, "Unable to locate IO BAR\n"); return (ENXIO); } adapter->ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE); if (adapter->ioport == NULL) { device_printf(dev, "Unable to allocate bus resource: " "ioport\n"); return (ENXIO); } adapter->hw.io_base = 0; adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->ioport); adapter->osdep.io_bus_space_handle = rman_get_bushandle(adapter->ioport); } adapter->hw.back = &adapter->osdep; return (0); } +static int +igb_intr_assign(if_ctx_t ctx, int msix) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct em_rx_queue *rx_que = adapter->rx_queues; + struct em_tx_queue *tx_que = adapter->tx_queues; + int error, rid, i, vector = 0, rx_vectors; + char buf[16]; + + /* First set up ring resources */ + for (i = 0; i < adapter->rx_num_queues; i++, rx_que++, vector++) { + rid = vector + 1; + snprintf(buf, sizeof(buf), "rxq%d", i); + error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, + em_msix_que, rx_que, rx_que->me, buf); + if (error) { + device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d\n", i, error); + adapter->rx_num_queues = i; + goto fail; + } + + rx_que->msix = vector; + + /* + * Set the bit to enable interrupt + * in E1000_IMS -- bits 20 and 21 + * are for RX0 and RX1, note this has + * NOTHING to do with the MSIX vector + */ + if (adapter->hw.mac.type == e1000_82574) { + rx_que->eims = 1 << (20 + i); + adapter->ims |= rx_que->eims; + adapter->ivars |= (8 | rx_que->msix) << (i * 4); + } else if (adapter->hw.mac.type == e1000_82575) + rx_que->eims = E1000_EICR_TX_QUEUE0 << vector; + else + rx_que->eims = 1 << vector; + } + rx_vectors = vector; + + vector = 0; + for (i = 0; i < adapter->tx_num_queues; i++, tx_que++, vector++) { + snprintf(buf, sizeof(buf), "txq%d", i); + tx_que = &adapter->tx_queues[i]; + tx_que->msix = adapter->rx_queues[i % adapter->rx_num_queues].msix; + rid = rman_get_start(adapter->rx_queues[i % adapter->rx_num_queues].que_irq.ii_res); + iflib_softirq_alloc_generic(ctx, rid, IFLIB_INTR_TX, tx_que, tx_que->me, buf); + + if (adapter->hw.mac.type == e1000_82574) { + tx_que->eims = 1 << (22 + i); + adapter->ims |= tx_que->eims; + adapter->ivars |= (8 | tx_que->msix) << (8 + (i * 4)); + } else if (adapter->hw.mac.type == e1000_82575) { + tx_que->eims = E1000_EICR_TX_QUEUE0 << (i % adapter->tx_num_queues); + } else { + tx_que->eims = 1 << (i % adapter->tx_num_queues); + } + } + + /* Link interrupt */ + rid = rx_vectors + 1; + error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, em_msix_link, adapter, 0, "aq"); + + if (error) { + device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); + goto fail; + } + adapter->linkvec = rx_vectors; + if (adapter->hw.mac.type < igb_mac_min) { + adapter->ivars |= (8 | rx_vectors) << 16; + adapter->ivars |= 0x80000000; + } + return (0); +fail: + iflib_irq_free(ctx, &adapter->irq); + rx_que = adapter->rx_queues; + for (int i = 0; i < adapter->rx_num_queues; i++, rx_que++) + iflib_irq_free(ctx, &rx_que->que_irq); + return (error); +} + /********************************************************************* * * Setup the MSIX Interrupt handlers * **********************************************************************/ static int em_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *rx_que = adapter->rx_queues; struct em_tx_queue *tx_que = adapter->tx_queues; - int error, rid, i, vector = 0, rx_vectors; + int error, rid, i, vector = 0; char buf[16]; + if (adapter->hw.mac.type >= igb_mac_min) { + return igb_intr_assign(ctx, msix); + } + /* First set up ring resources */ for (i = 0; i < adapter->rx_num_queues; i++, rx_que++, vector++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); - error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, em_msix_que, rx_que, rx_que->me, buf); + error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RX, em_msix_que, rx_que, rx_que->me, buf); if (error) { device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); adapter->rx_num_queues = i + 1; goto fail; } rx_que->msix = vector; /* * Set the bit to enable interrupt * in E1000_IMS -- bits 20 and 21 * are for RX0 and RX1, note this has * NOTHING to do with the MSIX vector */ if (adapter->hw.mac.type == e1000_82574) { rx_que->eims = 1 << (20 + i); adapter->ims |= rx_que->eims; adapter->ivars |= (8 | rx_que->msix) << (i * 4); } else if (adapter->hw.mac.type == e1000_82575) rx_que->eims = E1000_EICR_TX_QUEUE0 << vector; else rx_que->eims = 1 << vector; } - rx_vectors = vector; - vector = 0; for (i = 0; i < adapter->tx_num_queues; i++, tx_que++, vector++) { rid = vector + 1; snprintf(buf, sizeof(buf), "txq%d", i); tx_que = &adapter->tx_queues[i]; - iflib_softirq_alloc_generic(ctx, rid, IFLIB_INTR_TX, tx_que, tx_que->me, buf); - tx_que->msix = (vector % adapter->tx_num_queues); + error = iflib_irq_alloc_generic(ctx, &tx_que->que_irq, rid, IFLIB_INTR_TX, em_msix_que, tx_que, tx_que->me, buf); + if (error) { + device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); + adapter->tx_num_queues = i + 1; + goto fail; + } + tx_que->msix = vector; /* * Set the bit to enable interrupt * in E1000_IMS -- bits 22 and 23 * are for TX0 and TX1, note this has * NOTHING to do with the MSIX vector */ if (adapter->hw.mac.type == e1000_82574) { tx_que->eims = 1 << (22 + i); adapter->ims |= tx_que->eims; adapter->ivars |= (8 | tx_que->msix) << (8 + (i * 4)); } else if (adapter->hw.mac.type == e1000_82575) { - tx_que->eims = E1000_EICR_TX_QUEUE0 << (i % adapter->tx_num_queues); + tx_que->eims = E1000_EICR_TX_QUEUE0 << vector; } else { - tx_que->eims = 1 << (i % adapter->tx_num_queues); + tx_que->eims = 1 << vector; } } /* Link interrupt */ - rid = rx_vectors + 1; + rid = vector + 1; error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, em_msix_link, adapter, 0, "aq"); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); goto fail; } - adapter->linkvec = rx_vectors; + + adapter->linkvec = vector; if (adapter->hw.mac.type < igb_mac_min) { - adapter->ivars |= (8 | rx_vectors) << 16; + adapter->ivars |= (8 | vector) << 16; adapter->ivars |= 0x80000000; } return (0); fail: iflib_irq_free(ctx, &adapter->irq); rx_que = adapter->rx_queues; for (int i = 0; i < adapter->rx_num_queues; i++, rx_que++) iflib_irq_free(ctx, &rx_que->que_irq); return (error); } static void igb_configure_queues(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct em_rx_queue *rx_que; struct em_tx_queue *tx_que; u32 tmp, ivar = 0, newitr = 0; /* First turn on RSS capability */ if (adapter->hw.mac.type != e1000_82575) E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | E1000_GPIE_PBA | E1000_GPIE_NSICR); /* Turn on MSIX */ switch (adapter->hw.mac.type) { case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: /* RX entries */ for (int i = 0; i < adapter->rx_num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); rx_que = &adapter->rx_queues[i]; if (i & 1) { ivar &= 0xFF00FFFF; ivar |= (rx_que->msix | E1000_IVAR_VALID) << 16; } else { ivar &= 0xFFFFFF00; ivar |= rx_que->msix | E1000_IVAR_VALID; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); } /* TX entries */ for (int i = 0; i < adapter->tx_num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); tx_que = &adapter->tx_queues[i]; if (i & 1) { ivar &= 0x00FFFFFF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 24; } else { ivar &= 0xFFFF00FF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 8; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= tx_que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82576: /* RX entries */ for (int i = 0; i < adapter->rx_num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); rx_que = &adapter->rx_queues[i]; if (i < 8) { ivar &= 0xFFFFFF00; ivar |= rx_que->msix | E1000_IVAR_VALID; } else { ivar &= 0xFF00FFFF; ivar |= (rx_que->msix | E1000_IVAR_VALID) << 16; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= rx_que->eims; } /* TX entries */ for (int i = 0; i < adapter->tx_num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); tx_que = &adapter->tx_queues[i]; if (i < 8) { ivar &= 0xFFFF00FF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 8; } else { ivar &= 0x00FFFFFF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 24; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= tx_que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82575: /* enable MSI-X support*/ tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; /* Auto-Mask interrupts upon ICR read. */ tmp |= E1000_CTRL_EXT_EIAME; tmp |= E1000_CTRL_EXT_IRCA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); /* Queues */ for (int i = 0; i < adapter->rx_num_queues; i++) { rx_que = &adapter->rx_queues[i]; tmp = E1000_EICR_RX_QUEUE0 << i; tmp |= E1000_EICR_TX_QUEUE0 << i; rx_que->eims = tmp; E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), i, rx_que->eims); adapter->que_mask |= rx_que->eims; } /* Link */ E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec), E1000_EIMS_OTHER); adapter->link_mask |= E1000_EIMS_OTHER; default: break; } /* Set the starting interrupt rate */ if (em_max_interrupt_rate > 0) newitr = (4000000 / em_max_interrupt_rate) & 0x7FFC; if (hw->mac.type == e1000_82575) newitr |= newitr << 16; else newitr |= E1000_EITR_CNT_IGNR; for (int i = 0; i < adapter->rx_num_queues; i++) { rx_que = &adapter->rx_queues[i]; E1000_WRITE_REG(hw, E1000_EITR(rx_que->msix), newitr); } return; } static void em_free_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); - struct em_rx_queue *que = adapter->rx_queues; + struct em_rx_queue *rxque = adapter->rx_queues; + struct em_tx_queue *txque = adapter->tx_queues; device_t dev = iflib_get_dev(ctx); + int is_igb; + is_igb = (adapter->hw.mac.type >= igb_mac_min); /* Release all msix queue resources */ if (adapter->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &adapter->irq); - for (int i = 0; i < adapter->rx_num_queues; i++, que++) { - iflib_irq_free(ctx, &que->que_irq); + for (int i = 0; i < adapter->rx_num_queues; i++, rxque++) { + iflib_irq_free(ctx, &rxque->que_irq); + } + + if (!is_igb) { + for (int i = 0; i < adapter->tx_num_queues; i++, txque++) { + iflib_irq_free(ctx, &txque->que_irq); + } } /* First release all the interrupt resources */ if (adapter->memory != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->memory); adapter->memory = NULL; } if (adapter->flash != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH, adapter->flash); adapter->flash = NULL; } if (adapter->ioport != NULL) bus_release_resource(dev, SYS_RES_IOPORT, adapter->io_rid, adapter->ioport); } /* Setup MSI or MSI/X */ static int em_setup_msix(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if (adapter->hw.mac.type == e1000_82574) { em_enable_vectors_82574(ctx); } return (0); } /********************************************************************* * * Initialize the hardware to a configuration * as specified by the adapter structure. * **********************************************************************/ static void lem_smartspeed(struct adapter *adapter) { u16 phy_tmp; if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) || adapter->hw.mac.autoneg == 0 || (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0) return; if (adapter->smartspeed == 0) { /* If Master/Slave config fault is asserted twice, * we assume back-to-back */ e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT)) return; e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) { e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); if(phy_tmp & CR_1000T_MS_ENABLE) { phy_tmp &= ~CR_1000T_MS_ENABLE; e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); adapter->smartspeed++; if(adapter->hw.mac.autoneg && !e1000_copper_link_autoneg(&adapter->hw) && !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { phy_tmp |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); } } } return; } else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) { /* If still no link, perhaps using 2/3 pair cable */ e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); phy_tmp |= CR_1000T_MS_ENABLE; e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); if(adapter->hw.mac.autoneg && !e1000_copper_link_autoneg(&adapter->hw) && !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { phy_tmp |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); } } /* Restart process after EM_SMARTSPEED_MAX iterations */ if(adapter->smartspeed++ == EM_SMARTSPEED_MAX) adapter->smartspeed = 0; } /********************************************************************* * * Initialize the DMA Coalescing feature * **********************************************************************/ static void igb_init_dmac(struct adapter *adapter, u32 pba) { device_t dev = adapter->dev; struct e1000_hw *hw = &adapter->hw; u32 dmac, reg = ~E1000_DMACR_DMAC_EN; u16 hwm; u16 max_frame_size; if (hw->mac.type == e1000_i211) return; max_frame_size = adapter->shared->isc_max_frame_size; if (hw->mac.type > e1000_82580) { if (adapter->dmac == 0) { /* Disabling it */ E1000_WRITE_REG(hw, E1000_DMACR, reg); return; } else device_printf(dev, "DMA Coalescing enabled\n"); /* Set starting threshold */ E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); hwm = 64 * pba - max_frame_size / 16; if (hwm < 64 * (pba - 6)) hwm = 64 * (pba - 6); reg = E1000_READ_REG(hw, E1000_FCRTC); reg &= ~E1000_FCRTC_RTH_COAL_MASK; reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) & E1000_FCRTC_RTH_COAL_MASK); E1000_WRITE_REG(hw, E1000_FCRTC, reg); dmac = pba - max_frame_size / 512; if (dmac < pba - 10) dmac = pba - 10; reg = E1000_READ_REG(hw, E1000_DMACR); reg &= ~E1000_DMACR_DMACTHR_MASK; reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT) & E1000_DMACR_DMACTHR_MASK); /* transition to L0x or L1 if available..*/ reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); /* Check if status is 2.5Gb backplane connection * before configuration of watchdog timer, which is * in msec values in 12.8usec intervals * watchdog timer= msec values in 32usec intervals * for non 2.5Gb connection */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= ((adapter->dmac * 5) >> 6); else reg |= (adapter->dmac >> 5); } else { reg |= (adapter->dmac >> 5); } E1000_WRITE_REG(hw, E1000_DMACR, reg); E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); /* Set the interval before transition */ reg = E1000_READ_REG(hw, E1000_DMCTLX); if (hw->mac.type == e1000_i350) reg |= IGB_DMCTLX_DCFLUSH_DIS; /* ** in 2.5Gb connection, TTLX unit is 0.4 usec ** which is 0x4*2 = 0xA. But delay is still 4 usec */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= 0xA; else reg |= 0x4; } else { reg |= 0x4; } E1000_WRITE_REG(hw, E1000_DMCTLX, reg); /* free space in tx packet buffer to wake from DMA coal */ E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE - (2 * max_frame_size)) >> 6); /* make low power state decision controlled by DMA coal */ reg = E1000_READ_REG(hw, E1000_PCIEMISC); reg &= ~E1000_PCIEMISC_LX_DECISION; E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); } else if (hw->mac.type == e1000_82580) { u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); E1000_WRITE_REG(hw, E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION); E1000_WRITE_REG(hw, E1000_DMACR, 0); } } static void em_reset(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); struct e1000_hw *hw = &adapter->hw; u16 rx_buffer_size; u32 pba; INIT_DEBUGOUT("em_reset: begin"); /* Let the firmware know the OS is in control */ em_get_hw_control(adapter); /* Set up smart power down as default off on newer adapters. */ if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 || hw->mac.type == e1000_82572)) { u16 phy_tmp = 0; /* Speed up time to link by disabling smart power down. */ e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp); phy_tmp &= ~IGP02E1000_PM_SPD; e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp); } /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. */ switch (hw->mac.type) { /* Total Packet Buffer on these is 48K */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */ break; case e1000_82573: /* 82573: Total Packet Buffer is 32K */ pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */ break; case e1000_82574: case e1000_82583: pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */ break; case e1000_ich8lan: pba = E1000_PBA_8K; break; case e1000_ich9lan: case e1000_ich10lan: /* Boost Receive side for jumbo frames */ if (adapter->hw.mac.max_frame_size > 4096) pba = E1000_PBA_14K; else pba = E1000_PBA_10K; break; case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: pba = E1000_PBA_26K; break; case e1000_82575: pba = E1000_PBA_32K; break; case e1000_82576: case e1000_vfadapt: pba = E1000_READ_REG(hw, E1000_RXPBS); pba &= E1000_RXPBS_SIZE_MASK_82576; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_vfadapt_i350: pba = E1000_READ_REG(hw, E1000_RXPBS); pba = e1000_rxpbs_adjust_82580(pba); break; case e1000_i210: case e1000_i211: pba = E1000_PBA_34K; break; default: if (adapter->hw.mac.max_frame_size > 8192) pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ else pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ } /* Special needs in case of Jumbo frames */ if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) { u32 tx_space, min_tx, min_rx; pba = E1000_READ_REG(hw, E1000_PBA); tx_space = pba >> 16; pba &= 0xffff; min_tx = (adapter->hw.mac.max_frame_size + sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2; min_tx = roundup2(min_tx, 1024); min_tx >>= 10; min_rx = adapter->hw.mac.max_frame_size; min_rx = roundup2(min_rx, 1024); min_rx >>= 10; if (tx_space < min_tx && ((min_tx - tx_space) < pba)) { pba = pba - (min_tx - tx_space); /* * if short on rx space, rx wins * and must trump tx adjustment */ if (pba < min_rx) pba = min_rx; } E1000_WRITE_REG(hw, E1000_PBA, pba); } if (hw->mac.type < igb_mac_min) E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); INIT_DEBUGOUT1("em_reset: pba=%dK",pba); /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. Here we use an arbitrary value of 1500 which will * restart after one full frame is pulled from the buffer. There * could be several smaller frames in the buffer and if so they will * not trigger the XON until their total number reduces the buffer * by 1500. * - The pause time is fairly large at 1000 x 512ns = 512 usec. */ rx_buffer_size = (pba & 0xffff) << 10; hw->fc.high_water = rx_buffer_size - roundup2(adapter->hw.mac.max_frame_size, 1024); hw->fc.low_water = hw->fc.high_water - 1500; if (adapter->fc) /* locally set flow control value? */ hw->fc.requested_mode = adapter->fc; else hw->fc.requested_mode = e1000_fc_full; if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; else hw->fc.pause_time = EM_FC_PAUSE_TIME; hw->fc.send_xon = TRUE; /* Device specific overrides/settings */ switch (hw->mac.type) { case e1000_pchlan: /* Workaround: no TX flow ctrl for PCH */ hw->fc.requested_mode = e1000_fc_rx_pause; hw->fc.pause_time = 0xFFFF; /* override */ if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x3500; hw->fc.low_water = 0x1500; } else { hw->fc.high_water = 0x5000; hw->fc.low_water = 0x3000; } hw->fc.refresh_time = 0x1000; break; case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: hw->fc.high_water = 0x5C20; hw->fc.low_water = 0x5048; hw->fc.pause_time = 0x0650; hw->fc.refresh_time = 0x0400; /* Jumbos need adjusted PBA */ if (if_getmtu(ifp) > ETHERMTU) E1000_WRITE_REG(hw, E1000_PBA, 12); else E1000_WRITE_REG(hw, E1000_PBA, 26); break; case e1000_82575: case e1000_82576: /* 8-byte granularity */ hw->fc.low_water = hw->fc.high_water - 8; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: /* 16-byte granularity */ hw->fc.low_water = hw->fc.high_water - 16; break; case e1000_ich9lan: case e1000_ich10lan: if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x2800; hw->fc.low_water = hw->fc.high_water - 8; break; } /* FALLTHROUGH */ default: if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; break; } /* Issue a global reset */ e1000_reset_hw(hw); if (adapter->hw.mac.type >= igb_mac_min) { E1000_WRITE_REG(hw, E1000_WUC, 0); } else { E1000_WRITE_REG(hw, E1000_WUFC, 0); em_disable_aspm(adapter); } if (adapter->flags & IGB_MEDIA_RESET) { e1000_setup_init_funcs(hw, TRUE); e1000_get_bus_info(hw); adapter->flags &= ~IGB_MEDIA_RESET; } /* and a re-init */ if (e1000_init_hw(hw) < 0) { device_printf(dev, "Hardware Initialization Failed\n"); return; } if (adapter->hw.mac.type >= igb_mac_min) igb_init_dmac(adapter, pba); E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); } #define RSSKEYLEN 10 static void em_initialize_rss_mapping(struct adapter *adapter) { uint8_t rss_key[4 * RSSKEYLEN]; uint32_t reta = 0; struct e1000_hw *hw = &adapter->hw; int i; /* * Configure RSS key */ arc4rand(rss_key, sizeof(rss_key), 0); for (i = 0; i < RSSKEYLEN; ++i) { uint32_t rssrk = 0; rssrk = EM_RSSRK_VAL(rss_key, i); E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk); } /* * Configure RSS redirect table in following fashion: * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] */ for (i = 0; i < sizeof(reta); ++i) { uint32_t q; q = (i % adapter->rx_num_queues) << 7; reta |= q << (8 * i); } for (i = 0; i < 32; ++i) E1000_WRITE_REG(hw, E1000_RETA(i), reta); E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | E1000_MRQC_RSS_FIELD_IPV4_TCP | E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | E1000_MRQC_RSS_FIELD_IPV6_EX | E1000_MRQC_RSS_FIELD_IPV6); } static void igb_initialize_rss_mapping(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; int i; int queue_id; u32 reta; u32 rss_key[10], mrqc, shift = 0; /* XXX? */ if (adapter->hw.mac.type == e1000_82575) shift = 6; /* * The redirection table controls which destination * queue each bucket redirects traffic to. * Each DWORD represents four queues, with the LSB * being the first queue in the DWORD. * * This just allocates buckets to queues using round-robin * allocation. * * NOTE: It Just Happens to line up with the default * RSS allocation method. */ /* Warning FM follows */ reta = 0; for (i = 0; i < 128; i++) { #ifdef RSS queue_id = rss_get_indirection_to_bucket(i); /* * If we have more queues than buckets, we'll * end up mapping buckets to a subset of the * queues. * * If we have more buckets than queues, we'll * end up instead assigning multiple buckets * to queues. * * Both are suboptimal, but we need to handle * the case so we don't go out of bounds * indexing arrays and such. */ queue_id = queue_id % adapter->rx_num_queues; #else queue_id = (i % adapter->rx_num_queues); #endif /* Adjust if required */ queue_id = queue_id << shift; /* * The low 8 bits are for hash value (n+0); * The next 8 bits are for hash value (n+1), etc. */ reta = reta >> 8; reta = reta | ( ((uint32_t) queue_id) << 24); if ((i & 3) == 3) { E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta); reta = 0; } } /* Now fill in hash table */ /* * MRQC: Multiple Receive Queues Command * Set queuing to RSS control, number depends on the device. */ mrqc = E1000_MRQC_ENABLE_RSS_8Q; #ifdef RSS /* XXX ew typecasting */ rss_getkey((uint8_t *) &rss_key); #else arc4rand(&rss_key, sizeof(rss_key), 0); #endif for (i = 0; i < 10; i++) E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key[i]); /* * Configure the RSS fields to hash upon. */ mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV4_TCP); mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 | E1000_MRQC_RSS_FIELD_IPV6_TCP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP | E1000_MRQC_RSS_FIELD_IPV6_UDP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); E1000_WRITE_REG(hw, E1000_MRQC, mrqc); } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ static int em_setup_interface(if_ctx_t ctx) { struct ifnet *ifp = iflib_get_ifp(ctx); struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; uint64_t cap = 0; INIT_DEBUGOUT("em_setup_interface: begin"); /* TSO parameters */ if_sethwtsomax(ifp, IP_MAXPACKET); /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */ if_sethwtsomaxsegcount(ifp, EM_MAX_SCATTER - 5); if_sethwtsomaxsegsize(ifp, EM_TSO_SEG_SIZE); /* Single Queue */ if (adapter->tx_num_queues == 1) { if_setsendqlen(ifp, scctx->isc_ntxd[0] - 1); if_setsendqready(ifp); } cap = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4; cap |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU; /* * Tell the upper layer(s) we * support full VLAN capability */ if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); if_setcapabilitiesbit(ifp, cap, 0); /* * Don't turn this on by default, if vlans are * created on another pseudo device (eg. lagg) * then vlan events are not passed thru, breaking * operation, but with HW FILTER off it works. If * using vlans directly on the em driver you can * enable this and get full hardware tag filtering. */ if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0); /* Enable only WOL MAGIC by default */ if (adapter->wol) { if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, IFCAP_WOL_MCAST| IFCAP_WOL_UCAST); } else { if_setcapenablebit(ifp, 0, IFCAP_WOL_MAGIC | IFCAP_WOL_MCAST| IFCAP_WOL_UCAST); } /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { u_char fiber_type = IFM_1000_SX; /* default type */ if (adapter->hw.mac.type == e1000_82545) fiber_type = IFM_1000_LX; ifmedia_add(adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | fiber_type, 0, NULL); } else { ifmedia_add(adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); return (0); } static int em_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; int error = E1000_SUCCESS; struct em_tx_queue *que; int i, j; MPASS(adapter->tx_num_queues > 0); MPASS(adapter->tx_num_queues == ntxqsets); /* First allocate the top level queue structs */ if (!(adapter->tx_queues = (struct em_tx_queue *) malloc(sizeof(struct em_tx_queue) * adapter->tx_num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate queue memory\n"); return(ENOMEM); } for (i = 0, que = adapter->tx_queues; i < adapter->tx_num_queues; i++, que++) { /* Set up some basics */ struct tx_ring *txr = &que->txr; txr->adapter = que->adapter = adapter; que->me = txr->me = i; /* Allocate report status array */ if (!(txr->tx_rsq = (qidx_t *) malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "failed to allocate rs_idxs memory\n"); error = ENOMEM; goto fail; } for (j = 0; j < scctx->isc_ntxd[0]; j++) txr->tx_rsq[j] = QIDX_INVALID; /* get the virtual and physical address of the hardware queues */ txr->tx_base = (struct e1000_tx_desc *)vaddrs[i*ntxqs]; txr->tx_paddr = paddrs[i*ntxqs]; } device_printf(iflib_get_dev(ctx), "allocated for %d tx_queues\n", adapter->tx_num_queues); return (0); fail: em_if_queues_free(ctx); return (error); } static int em_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct adapter *adapter = iflib_get_softc(ctx); int error = E1000_SUCCESS; struct em_rx_queue *que; int i; MPASS(adapter->rx_num_queues > 0); MPASS(adapter->rx_num_queues == nrxqsets); /* First allocate the top level queue structs */ if (!(adapter->rx_queues = (struct em_rx_queue *) malloc(sizeof(struct em_rx_queue) * adapter->rx_num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate queue memory\n"); error = ENOMEM; goto fail; } for (i = 0, que = adapter->rx_queues; i < nrxqsets; i++, que++) { /* Set up some basics */ struct rx_ring *rxr = &que->rxr; rxr->adapter = que->adapter = adapter; rxr->que = que; que->me = rxr->me = i; /* get the virtual and physical address of the hardware queues */ rxr->rx_base = (union e1000_rx_desc_extended *)vaddrs[i*nrxqs]; rxr->rx_paddr = paddrs[i*nrxqs]; } device_printf(iflib_get_dev(ctx), "allocated for %d rx_queues\n", adapter->rx_num_queues); return (0); fail: em_if_queues_free(ctx); return (error); } static void em_if_queues_free(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct em_tx_queue *tx_que = adapter->tx_queues; struct em_rx_queue *rx_que = adapter->rx_queues; if (tx_que != NULL) { for (int i = 0; i < adapter->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; if (txr->tx_rsq == NULL) break; free(txr->tx_rsq, M_DEVBUF); txr->tx_rsq = NULL; } free(adapter->tx_queues, M_DEVBUF); adapter->tx_queues = NULL; } if (rx_que != NULL) { free(adapter->rx_queues, M_DEVBUF); adapter->rx_queues = NULL; } em_release_hw_control(adapter); if (adapter->mta != NULL) { free(adapter->mta, M_DEVBUF); } } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void em_initialize_transmit_unit(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct em_tx_queue *que; struct tx_ring *txr; struct e1000_hw *hw = &adapter->hw; u32 tctl, txdctl = 0, tarc, tipg = 0; INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); for (int i = 0; i < adapter->tx_num_queues; i++, txr++) { u64 bus_addr; caddr_t offp, endp; que = &adapter->tx_queues[i]; txr = &que->txr; bus_addr = txr->tx_paddr; /* Clear checksum offload context. */ offp = (caddr_t)&txr->csum_flags; endp = (caddr_t)(txr + 1); bzero(offp, endp - offp); /* Base and Len of TX Ring */ E1000_WRITE_REG(hw, E1000_TDLEN(i), scctx->isc_ntxd[0] * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(hw, E1000_TDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_TDBAL(i), (u32)bus_addr); /* Init the HEAD/TAIL indices */ E1000_WRITE_REG(hw, E1000_TDT(i), 0); E1000_WRITE_REG(hw, E1000_TDH(i), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)), E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); txdctl = 0; /* clear txdctl */ txdctl |= 0x1f; /* PTHRESH */ txdctl |= 1 << 8; /* HTHRESH */ txdctl |= 1 << 16;/* WTHRESH */ txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */ txdctl |= E1000_TXDCTL_GRAN; txdctl |= 1 << 25; /* LWTHRESH */ E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); } /* Set the default values for the Tx Inter Packet Gap timer */ switch (adapter->hw.mac.type) { case e1000_80003es2lan: tipg = DEFAULT_82543_TIPG_IPGR1; tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; case e1000_82542: tipg = DEFAULT_82542_TIPG_IPGT; tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; default: if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) tipg = DEFAULT_82543_TIPG_IPGT_FIBER; else tipg = DEFAULT_82543_TIPG_IPGT_COPPER; tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; } E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); if(adapter->hw.mac.type >= e1000_82540) E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value); if ((adapter->hw.mac.type == e1000_82571) || (adapter->hw.mac.type == e1000_82572)) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= TARC_SPEED_MODE_BIT; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } else if (adapter->hw.mac.type == e1000_80003es2lan) { /* errata: program both queues to unweighted RR */ tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } else if (adapter->hw.mac.type == e1000_82574) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= TARC_ERRATA_BIT; if ( adapter->tx_num_queues > 1) { tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX); E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } else E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } if (adapter->tx_int_delay.value > 0) adapter->txd_cmd |= E1000_TXD_CMD_IDE; /* Program the Transmit Control Register */ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); if (adapter->hw.mac.type >= e1000_82571) tctl |= E1000_TCTL_MULR; /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); if (hw->mac.type == e1000_pch_spt) { u32 reg; reg = E1000_READ_REG(hw, E1000_IOSFPC); reg |= E1000_RCTL_RDMTS_HEX; E1000_WRITE_REG(hw, E1000_IOSFPC, reg); reg = E1000_READ_REG(hw, E1000_TARC(0)); reg |= E1000_TARC0_CB_MULTIQ_3_REQ; E1000_WRITE_REG(hw, E1000_TARC(0), reg); } } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void em_initialize_receive_unit(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct ifnet *ifp = iflib_get_ifp(ctx); struct e1000_hw *hw = &adapter->hw; struct em_rx_queue *que; int i; u32 rctl, rxcsum, rfctl; INIT_DEBUGOUT("em_initialize_receive_units: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(hw, E1000_RCTL); /* Do not disable if ever enabled on this hardware */ if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583)) E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Do not store bad packets */ rctl &= ~E1000_RCTL_SBP; /* Enable Long Packet receive */ if (if_getmtu(ifp) > ETHERMTU) rctl |= E1000_RCTL_LPE; else rctl &= ~E1000_RCTL_LPE; /* Strip the CRC */ if (!em_disable_crc_stripping) rctl |= E1000_RCTL_SECRC; if (adapter->hw.mac.type >= e1000_82540) { E1000_WRITE_REG(&adapter->hw, E1000_RADV, adapter->rx_abs_int_delay.value); /* * Set the interrupt throttling rate. Value is calculated * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */ E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); } E1000_WRITE_REG(&adapter->hw, E1000_RDTR, adapter->rx_int_delay.value); /* Use extended rx descriptor formats */ rfctl = E1000_READ_REG(hw, E1000_RFCTL); rfctl |= E1000_RFCTL_EXTEN; /* * When using MSIX interrupts we need to throttle * using the EITR register (82574 only) */ if (hw->mac.type == e1000_82574) { for (int i = 0; i < 4; i++) E1000_WRITE_REG(hw, E1000_EITR_82574(i), DEFAULT_ITR); /* Disable accelerated acknowledge */ rfctl |= E1000_RFCTL_ACK_DIS; } E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); if (if_getcapenable(ifp) & IFCAP_RXCSUM && adapter->hw.mac.type >= e1000_82543) { if (adapter->tx_num_queues > 1) { if (adapter->hw.mac.type >= igb_mac_min) { rxcsum |= E1000_RXCSUM_PCSD; if (hw->mac.type != e1000_82575) rxcsum |= E1000_RXCSUM_CRCOFL; } else rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL | E1000_RXCSUM_PCSD; } else { if (adapter->hw.mac.type >= igb_mac_min) rxcsum |= E1000_RXCSUM_IPPCSE; else rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL; if (adapter->hw.mac.type > e1000_82575) rxcsum |= E1000_RXCSUM_CRCOFL; } } else rxcsum &= ~E1000_RXCSUM_TUOFL; E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); if (adapter->rx_num_queues > 1) { if (adapter->hw.mac.type >= igb_mac_min) igb_initialize_rss_mapping(adapter); else em_initialize_rss_mapping(adapter); } /* * XXX TEMPORARY WORKAROUND: on some systems with 82573 * long latencies are observed, like Lenovo X60. This * change eliminates the problem, but since having positive * values in RDTR is a known source of problems on other * platforms another solution is being sought. */ if (hw->mac.type == e1000_82573) E1000_WRITE_REG(hw, E1000_RDTR, 0x20); for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; /* Setup the Base and Length of the Rx Descriptor Ring */ u64 bus_addr = rxr->rx_paddr; #if 0 u32 rdt = adapter->rx_num_queues -1; /* default */ #endif E1000_WRITE_REG(hw, E1000_RDLEN(i), scctx->isc_nrxd[0] * sizeof(union e1000_rx_desc_extended)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); /* Setup the Head and Tail Descriptor Pointers */ E1000_WRITE_REG(hw, E1000_RDH(i), 0); E1000_WRITE_REG(hw, E1000_RDT(i), 0); } /* * Set PTHRESH for improved jumbo performance * According to 10.2.5.11 of Intel 82574 Datasheet, * RXDCTL(1) is written whenever RXDCTL(0) is written. * Only write to RXDCTL(1) if there is a need for different * settings. */ if (((adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_pch2lan) || (adapter->hw.mac.type == e1000_ich10lan)) && (if_getmtu(ifp) > ETHERMTU)) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); } else if (adapter->hw.mac.type == e1000_82574) { for (int i = 0; i < adapter->rx_num_queues; i++) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= 0x20; /* PTHRESH */ rxdctl |= 4 << 8; /* HTHRESH */ rxdctl |= 4 << 16;/* WTHRESH */ rxdctl |= 1 << 24; /* Switch to granularity */ E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } } else if (adapter->hw.mac.type >= igb_mac_min) { u32 psize, srrctl = 0; if (if_getmtu(ifp) > ETHERMTU) { /* Set maximum packet len */ if (adapter->rx_mbuf_sz <= 4096) { srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; } else if (adapter->rx_mbuf_sz > 4096) { srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; } psize = scctx->isc_max_frame_size; /* are we on a vlan? */ if (ifp->if_vlantrunk != NULL) psize += VLAN_TAG_SIZE; E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); } else { srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_2048; } /* * If TX flow control is disabled and there's >1 queue defined, * enable DROP. * * This drops frames rather than hanging the RX MAC for all queues. */ if ((adapter->rx_num_queues > 1) && (adapter->fc == e1000_fc_none || adapter->fc == e1000_fc_rx_pause)) { srrctl |= E1000_SRRCTL_DROP_EN; } /* Setup the Base and Length of the Rx Descriptor Rings */ for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; u64 bus_addr = rxr->rx_paddr; u32 rxdctl; #ifdef notyet /* Configure for header split? -- ignore for now */ rxr->hdr_split = igb_header_split; #else srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; #endif E1000_WRITE_REG(hw, E1000_RDLEN(i), scctx->isc_nrxd[0] * sizeof(struct e1000_rx_desc)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (uint32_t)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr); E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); /* Enable this Queue */ rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; rxdctl &= 0xFFF00000; rxdctl |= IGB_RX_PTHRESH; rxdctl |= IGB_RX_HTHRESH << 8; rxdctl |= IGB_RX_WTHRESH << 16; E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } } else if (adapter->hw.mac.type >= e1000_pch2lan) { if (if_getmtu(ifp) > ETHERMTU) e1000_lv_jumbo_workaround_ich8lan(hw, TRUE); else e1000_lv_jumbo_workaround_ich8lan(hw, FALSE); } /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; if (adapter->hw.mac.type < igb_mac_min) { if (adapter->rx_mbuf_sz == MCLBYTES) rctl |= E1000_RCTL_SZ_2048; else if (adapter->rx_mbuf_sz == MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; /* ensure we clear use DTYPE of 00 here */ rctl &= ~0x00000C00; } /* Write out the settings */ E1000_WRITE_REG(hw, E1000_RCTL, rctl); return; } static void em_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u32 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; } static void em_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u32 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; } static void em_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 reg; /* * We get here thru init_locked, meaning * a soft reset, this has already cleared * the VFTA and other state, so if there * have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* * A soft reset zero's out the VFTA, so * we need to repopulate it now. */ for (int i = 0; i < EM_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, i, adapter->shadow_vfta[i]); reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Enable the Filter Table */ reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } static void em_if_enable_intr(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; u32 ims_mask = IMS_ENABLE_MASK; if (hw->mac.type == e1000_82574) { E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK); ims_mask |= adapter->ims; } else if (adapter->intr_type == IFLIB_INTR_MSIX && hw->mac.type >= igb_mac_min) { u32 mask = (adapter->que_mask | adapter->link_mask); E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask); E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask); E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask); ims_mask = E1000_IMS_LSC; } E1000_WRITE_REG(hw, E1000_IMS, ims_mask); } static void em_if_disable_intr(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; if (adapter->intr_type == IFLIB_INTR_MSIX) { if (hw->mac.type >= igb_mac_min) E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0); E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0); } E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka * to disable special hardware management features */ static void em_init_manageability(struct adapter *adapter) { /* A shared code workaround */ #define E1000_82542_MANC2H E1000_MANC2H if (adapter->has_manage) { int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); /* enable receiving management packets to the host */ manc |= E1000_MANC_EN_MNG2HOST; #define E1000_MNG2HOST_PORT_623 (1 << 5) #define E1000_MNG2HOST_PORT_664 (1 << 6) manc2h |= E1000_MNG2HOST_PORT_623; manc2h |= E1000_MNG2HOST_PORT_664; E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void em_release_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; manc &= ~E1000_MANC_EN_MNG2HOST; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means * that the driver is loaded. For AMT version type f/w * this means that the network i/f is open. */ static void em_get_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (adapter->vf_ifp) return; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm | E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); } /* * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is no longer loaded. For AMT versions of the * f/w this means that the network i/f is closed. */ static void em_release_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (!adapter->has_manage) return; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm & ~E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); return; } static int em_is_valid_ether_addr(u8 *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (FALSE); } return (TRUE); } /* ** Parse the interface capabilities with regard ** to both system management and wake-on-lan for ** later use. */ static void em_get_wakeup(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); u16 eeprom_data = 0, device_id, apme_mask; adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); apme_mask = EM_EEPROM_APME; switch (adapter->hw.mac.type) { case e1000_82542: case e1000_82543: break; case e1000_82544: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL2_REG, 1, &eeprom_data); apme_mask = EM_82544_APME; break; case e1000_82546: case e1000_82546_rev_3: if (adapter->hw.bus.func == 1) { e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; case e1000_82573: case e1000_82583: adapter->has_amt = TRUE; /* FALLTHROUGH */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: if (adapter->hw.bus.func == 1) { e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_82575: /* listing all igb devices */ case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: apme_mask = E1000_WUC_APME; adapter->has_amt = TRUE; eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC); break; default: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; } if (eeprom_data & apme_mask) adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); /* * We have the eeprom settings, now apply the special cases * where the eeprom may be wrong or the board won't support * wake on lan on a particular port */ device_id = pci_get_device(dev); switch (device_id) { case E1000_DEV_ID_82546GB_PCIE: adapter->wol = 0; break; case E1000_DEV_ID_82546EB_FIBER: case E1000_DEV_ID_82546GB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->wol = 0; break; case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; case E1000_DEV_ID_82571EB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->wol = 0; break; case E1000_DEV_ID_82571EB_QUAD_COPPER: case E1000_DEV_ID_82571EB_QUAD_FIBER: case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; } return; } /* * Enable PCI Wake On Lan capability */ static void em_enable_wakeup(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); if_t ifp = iflib_get_ifp(ctx); int error = 0; u32 pmc, ctrl, ctrl_ext, rctl; u16 status; if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0) return; /* * Determine type of Wakeup: note that wol * is set with all bits on by default. */ if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0) adapter->wol &= ~E1000_WUFC_MAG; if ((if_getcapenable(ifp) & IFCAP_WOL_UCAST) == 0) adapter->wol &= ~E1000_WUFC_EX; if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0) adapter->wol &= ~E1000_WUFC_MC; else { rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); } if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC))) goto pme; /* Advertise the wakeup capability */ ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); /* Keep the laser running on Fiber adapters */ if (adapter->hw.phy.media_type == e1000_media_type_fiber || adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); } if ((adapter->hw.mac.type == e1000_ich8lan) || (adapter->hw.mac.type == e1000_pchlan) || (adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_ich10lan)) e1000_suspend_workarounds_ich8lan(&adapter->hw); if ( adapter->hw.mac.type >= e1000_pchlan) { error = em_enable_phy_wakeup(adapter); if (error) goto pme; } else { /* Enable wakeup by the MAC */ E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); } if (adapter->hw.phy.type == e1000_phy_igp_3) e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); pme: status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if (!error && (if_getcapenable(ifp) & IFCAP_WOL)) status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); return; } /* * WOL in the newer chipset interfaces (pchlan) * require thing to be copied into the phy */ static int em_enable_phy_wakeup(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mreg, ret = 0; u16 preg; /* copy MAC RARs to PHY RARs */ e1000_copy_rx_addrs_to_phy_ich8lan(hw); /* copy MAC MTA to PHY MTA */ for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); e1000_write_phy_reg(hw, BM_MTA(i) + 1, (u16)((mreg >> 16) & 0xFFFF)); } /* configure PHY Rx Control register */ e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); mreg = E1000_READ_REG(hw, E1000_RCTL); if (mreg & E1000_RCTL_UPE) preg |= BM_RCTL_UPE; if (mreg & E1000_RCTL_MPE) preg |= BM_RCTL_MPE; preg &= ~(BM_RCTL_MO_MASK); if (mreg & E1000_RCTL_MO_3) preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) << BM_RCTL_MO_SHIFT); if (mreg & E1000_RCTL_BAM) preg |= BM_RCTL_BAM; if (mreg & E1000_RCTL_PMCF) preg |= BM_RCTL_PMCF; mreg = E1000_READ_REG(hw, E1000_CTRL); if (mreg & E1000_CTRL_RFCE) preg |= BM_RCTL_RFCE; e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); /* enable PHY wakeup in MAC register */ E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN | E1000_WUC_APME); E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); /* configure and enable PHY wakeup in PHY registers */ e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); /* activate PHY wakeup */ ret = hw->phy.ops.acquire(hw); if (ret) { printf("Could not acquire PHY\n"); return ret; } e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); if (ret) { printf("Could not read PHY page 769\n"); goto out; } preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); if (ret) printf("Could not set PHY Host Wakeup bit\n"); out: hw->phy.ops.release(hw); return ret; } static void em_if_led_func(if_ctx_t ctx, int onoff) { struct adapter *adapter = iflib_get_softc(ctx); if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); } else { e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } } /* * Disable the L0S and L1 LINK states */ static void em_disable_aspm(struct adapter *adapter) { int base, reg; u16 link_cap,link_ctrl; device_t dev = adapter->dev; switch (adapter->hw.mac.type) { case e1000_82573: case e1000_82574: case e1000_82583: break; default: return; } if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0) return; reg = base + PCIER_LINK_CAP; link_cap = pci_read_config(dev, reg, 2); if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0) return; reg = base + PCIER_LINK_CTL; link_ctrl = pci_read_config(dev, reg, 2); link_ctrl &= ~PCIEM_LINK_CTL_ASPMC; pci_write_config(dev, reg, link_ctrl, 2); return; } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void em_update_stats_counters(struct adapter *adapter) { if(adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); } adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); /* ** For watchdog management we need to know if we have been ** paused during the last interval, so capture that here. */ adapter->shared->isc_pause_frames = adapter->stats.xoffrxc; adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32); adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32); adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); /* Interrupt Counts */ adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC); adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC); adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC); adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC); adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC); adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC); adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC); adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC); adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC); if (adapter->hw.mac.type >= e1000_82543) { adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, E1000_RXERRC); adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, E1000_TNCRS); adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, E1000_CEXTERR); adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, E1000_TSCTC); adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, E1000_TSCTFC); } } static uint64_t em_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_COLLISIONS: return (adapter->stats.colc); case IFCOUNTER_IERRORS: return (adapter->dropped_pkts + adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc + adapter->stats.mpc + adapter->stats.cexterr); case IFCOUNTER_OERRORS: return (adapter->stats.ecol + adapter->stats.latecol + adapter->watchdog_events); default: return (if_get_counter_default(ifp, cnt)); } } /* Export a single 32-bit register via a read-only sysctl. */ static int em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; u_int val; adapter = oidp->oid_arg1; val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* * Add sysctl variables, one per statistic, to the system. */ static void em_add_hw_stats(struct adapter *adapter) { device_t dev = iflib_get_dev(adapter->ctx); struct em_tx_queue *tx_que = adapter->tx_queues; struct em_rx_queue *rx_que = adapter->rx_queues; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = &adapter->stats; struct sysctl_oid *stat_node, *queue_node, *int_node; struct sysctl_oid_list *stat_list, *queue_list, *int_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSIX IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", CTLFLAG_RD, &adapter->mbuf_defrag_failed, "Defragmenting mbuf chain failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", CTLFLAG_RD, &adapter->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &adapter->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL, em_sysctl_reg_handler, "IU", "Device Control Register"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL, em_sysctl_reg_handler, "IU", "Receiver Control Register"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); for (int i = 0; i < adapter->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "TX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq", CTLFLAG_RD, &txr->tx_irq, "Queue MSI-X Transmit Interrupts"); } for (int j = 0; j < adapter->rx_num_queues; j++, rx_que++) { struct rx_ring *rxr = &rx_que->rxr; snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", j); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "RX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq", CTLFLAG_RD, &rxr->rx_irq, "Queue MSI-X Receive Interrupts"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &adapter->stats.symerrs, "Symbol Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &adapter->stats.sec, "Sequence Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &adapter->stats.dc, "Defer Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &adapter->stats.mpc, "Missed Packets"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &adapter->stats.rnbc, "Receive No Buffers"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &adapter->stats.ruc, "Receive Undersize"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &adapter->stats.rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &adapter->stats.roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &adapter->stats.rjc, "Recevied Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &adapter->stats.rxerrc, "Receive Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &adapter->stats.crcerrs, "CRC errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &adapter->stats.algnerrc, "Alignment Errors"); /* On 82575 these are collision counts */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &adapter->stats.cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &adapter->stats.xonrxc, "XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &adapter->stats.xontxc, "XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &adapter->stats.xoffrxc, "XOFF Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &adapter->stats.xofftxc, "XOFF Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &adapter->stats.tpr, "Total Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &adapter->stats.gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &adapter->stats.prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &adapter->stats.prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &adapter->stats.prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &adapter->stats.prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &adapter->stats.prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &adapter->stats.gorc, "Good Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &adapter->stats.gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &adapter->stats.tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &adapter->stats.gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &adapter->stats.bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &adapter->stats.mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &adapter->stats.ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &adapter->stats.ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &adapter->stats.ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &adapter->stats.ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &adapter->stats.ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &adapter->stats.tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &adapter->stats.tsctfc, "TSO Contexts Failed"); /* Interrupt Stats */ int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", CTLFLAG_RD, NULL, "Interrupt Statistics"); int_list = SYSCTL_CHILDREN(int_node); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts", CTLFLAG_RD, &adapter->stats.iac, "Interrupt Assertion Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", CTLFLAG_RD, &adapter->stats.icrxptc, "Interrupt Cause Rx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", CTLFLAG_RD, &adapter->stats.icrxatc, "Interrupt Cause Rx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", CTLFLAG_RD, &adapter->stats.ictxptc, "Interrupt Cause Tx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", CTLFLAG_RD, &adapter->stats.ictxatc, "Interrupt Cause Tx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", CTLFLAG_RD, &adapter->stats.ictxqec, "Interrupt Cause Tx Queue Empty Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", CTLFLAG_RD, &adapter->stats.ictxqmtc, "Interrupt Cause Tx Queue Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", CTLFLAG_RD, &adapter->stats.icrxdmtc, "Interrupt Cause Rx Desc Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun", CTLFLAG_RD, &adapter->stats.icrxoc, "Interrupt Cause Receiver Overrun Count"); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) em_print_nvm_info(adapter); return (error); } static void em_print_nvm_info(struct adapter *adapter) { u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } printf("\n"); } static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { struct em_int_delay_info *info; struct adapter *adapter; u32 regval; int error, usecs, ticks; info = (struct em_int_delay_info *) arg1; usecs = info->value; error = sysctl_handle_int(oidp, &usecs, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) return (EINVAL); info->value = usecs; ticks = EM_USECS_TO_TICKS(usecs); if (info->offset == E1000_ITR) /* units are 256ns here */ ticks *= 4; adapter = info->adapter; regval = E1000_READ_OFFSET(&adapter->hw, info->offset); regval = (regval & ~0xffff) | (ticks & 0xffff); /* Handle a few special cases. */ switch (info->offset) { case E1000_RDTR: break; case E1000_TIDV: if (ticks == 0) { adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; /* Don't write 0 into the TIDV register. */ regval++; } else adapter->txd_cmd |= E1000_TXD_CMD_IDE; break; } E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); return (0); } static void em_add_int_delay_sysctl(struct adapter *adapter, const char *name, const char *description, struct em_int_delay_info *info, int offset, int value) { info->adapter = adapter; info->offset = offset; info->value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, em_sysctl_int_delay, "I", description); } /* * Set flow control using sysctl: * Flow control values: * 0 - off * 1 - rx pause * 2 - tx pause * 3 - full */ static int em_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error; static int input = 3; /* default is full */ struct adapter *adapter = (struct adapter *) arg1; error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (input == adapter->fc) /* no change? */ return (error); switch (input) { case e1000_fc_rx_pause: case e1000_fc_tx_pause: case e1000_fc_full: case e1000_fc_none: adapter->hw.fc.requested_mode = input; adapter->fc = input; break; default: /* Do nothing */ return (error); } adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; e1000_force_mac_fc(&adapter->hw); return (error); } /* * Manage Energy Efficient Ethernet: * Control values: * 0/1 - enabled/disabled */ static int em_sysctl_eee(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error, value; value = adapter->hw.dev_spec.ich8lan.eee_disable; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0); em_if_init(adapter->ctx); return (0); } static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { adapter = (struct adapter *) arg1; em_print_debug_info(adapter); } return (error); } static int em_get_rs(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error; int result; result = 0; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr || result != 1) return (error); em_dump_rs(adapter); return (error); } static void em_if_debug(if_ctx_t ctx) { em_dump_rs(iflib_get_softc(ctx)); } /* * This routine is meant to be fluid, add whatever is * needed for debugging a problem. -jfv */ static void em_print_debug_info(struct adapter *adapter) { device_t dev = iflib_get_dev(adapter->ctx); struct ifnet *ifp = iflib_get_ifp(adapter->ctx); struct tx_ring *txr = &adapter->tx_queues->txr; struct rx_ring *rxr = &adapter->rx_queues->rxr; if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) printf("Interface is RUNNING "); else printf("Interface is NOT RUNNING\n"); if (if_getdrvflags(ifp) & IFF_DRV_OACTIVE) printf("and INACTIVE\n"); else printf("and ACTIVE\n"); for (int i = 0; i < adapter->tx_num_queues; i++, txr++) { device_printf(dev, "TX Queue %d ------\n", i); device_printf(dev, "hw tdh = %d, hw tdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_TDH(i)), E1000_READ_REG(&adapter->hw, E1000_TDT(i))); } for (int j=0; j < adapter->rx_num_queues; j++, rxr++) { device_printf(dev, "RX Queue %d ------\n", j); device_printf(dev, "hw rdh = %d, hw rdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_RDH(j)), E1000_READ_REG(&adapter->hw, E1000_RDT(j))); } } /* * 82574 only: * Write a new value to the EEPROM increasing the number of MSIX * vectors from 3 to 5, for proper multiqueue support. */ static void em_enable_vectors_82574(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; device_t dev = iflib_get_dev(ctx); u16 edata; e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); printf("Current cap: %#06x\n", edata); if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) { device_printf(dev, "Writing to eeprom: increasing " "reported MSIX vectors from 3 to 5...\n"); edata &= ~(EM_NVM_MSIX_N_MASK); edata |= 4 << EM_NVM_MSIX_N_SHIFT; e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); e1000_update_nvm_checksum(hw); device_printf(dev, "Writing to eeprom: done\n"); } } Index: head/sys/dev/e1000/if_em.h =================================================================== --- head/sys/dev/e1000/if_em.h (revision 323515) +++ head/sys/dev/e1000/if_em.h (revision 323516) @@ -1,565 +1,566 @@ /*- * Copyright (c) 2016 Matt Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*$FreeBSD$*/ #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #ifdef DDB #include #include #endif #if __FreeBSD_version >= 800000 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "e1000_api.h" #include "e1000_82571.h" #include "ifdi_if.h" #ifndef _EM_H_DEFINED_ #define _EM_H_DEFINED_ /* Tunables */ /* * EM_MAX_TXD: Maximum number of Transmit Descriptors * Valid Range: 80-256 for 82542 and 82543-based adapters * 80-4096 for others * Default Value: 1024 * This value is the number of transmit descriptors allocated by the driver. * Increasing this value allows the driver to queue more transmits. Each * descriptor is 16 bytes. * Since TDLEN should be multiple of 128bytes, the number of transmit * desscriptors should meet the following condition. * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ #define EM_MIN_TXD 128 #define EM_MAX_TXD 4096 #define EM_DEFAULT_TXD 1024 #define EM_DEFAULT_MULTI_TXD 4096 #define IGB_MAX_TXD 4096 /* * EM_MAX_RXD - Maximum number of receive Descriptors * Valid Range: 80-256 for 82542 and 82543-based adapters * 80-4096 for others * Default Value: 1024 * This value is the number of receive descriptors allocated by the driver. * Increasing this value allows the driver to buffer more incoming packets. * Each descriptor is 16 bytes. A receive buffer is also allocated for each * descriptor. The maximum MTU size is 16110. * Since TDLEN should be multiple of 128bytes, the number of transmit * desscriptors should meet the following condition. * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ #define EM_MIN_RXD 128 #define EM_MAX_RXD 4096 #define EM_DEFAULT_RXD 1024 #define EM_DEFAULT_MULTI_RXD 4096 #define IGB_MAX_RXD 4096 /* * EM_TIDV - Transmit Interrupt Delay Value * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value delays the generation of transmit interrupts in units of * 1.024 microseconds. Transmit interrupt reduction can improve CPU * efficiency if properly tuned for specific network traffic. If the * system is reporting dropped transmits, this value may be set too high * causing the driver to run out of available transmit descriptors. */ #define EM_TIDV 64 /* * EM_TADV - Transmit Absolute Interrupt Delay Value * (Not valid for 82542/82543/82544) * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value, in units of 1.024 microseconds, limits the delay in which a * transmit interrupt is generated. Useful only if EM_TIDV is non-zero, * this value ensures that an interrupt is generated after the initial * packet is sent on the wire within the set amount of time. Proper tuning, * along with EM_TIDV, may improve traffic throughput in specific * network conditions. */ #define EM_TADV 64 /* * EM_RDTR - Receive Interrupt Delay Timer (Packet Timer) * Valid Range: 0-65535 (0=off) * Default Value: 0 * This value delays the generation of receive interrupts in units of 1.024 * microseconds. Receive interrupt reduction can improve CPU efficiency if * properly tuned for specific network traffic. Increasing this value adds * extra latency to frame reception and can end up decreasing the throughput * of TCP traffic. If the system is reporting dropped receives, this value * may be set too high, causing the driver to run out of available receive * descriptors. * * CAUTION: When setting EM_RDTR to a value other than 0, adapters * may hang (stop transmitting) under certain network conditions. * If this occurs a WATCHDOG message is logged in the system * event log. In addition, the controller is automatically reset, * restoring the network connection. To eliminate the potential * for the hang ensure that EM_RDTR is set to 0. */ #define EM_RDTR 0 /* * Receive Interrupt Absolute Delay Timer (Not valid for 82542/82543/82544) * Valid Range: 0-65535 (0=off) * Default Value: 64 * This value, in units of 1.024 microseconds, limits the delay in which a * receive interrupt is generated. Useful only if EM_RDTR is non-zero, * this value ensures that an interrupt is generated after the initial * packet is received within the set amount of time. Proper tuning, * along with EM_RDTR, may improve traffic throughput in specific network * conditions. */ #define EM_RADV 64 /* * This parameter controls whether or not autonegotation is enabled. * 0 - Disable autonegotiation * 1 - Enable autonegotiation */ #define DO_AUTO_NEG 1 /* * This parameter control whether or not the driver will wait for * autonegotiation to complete. * 1 - Wait for autonegotiation to complete * 0 - Don't wait for autonegotiation to complete */ #define WAIT_FOR_AUTO_NEG_DEFAULT 0 /* Tunables -- End */ #define AUTONEG_ADV_DEFAULT (ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ ADVERTISE_100_HALF | ADVERTISE_100_FULL | \ ADVERTISE_1000_FULL) #define AUTO_ALL_MODES 0 /* PHY master/slave setting */ #define EM_MASTER_SLAVE e1000_ms_hw_default /* * Micellaneous constants */ #define EM_VENDOR_ID 0x8086 #define EM_FLASH 0x0014 #define EM_JUMBO_PBA 0x00000028 #define EM_DEFAULT_PBA 0x00000030 #define EM_SMARTSPEED_DOWNSHIFT 3 #define EM_SMARTSPEED_MAX 15 #define EM_MAX_LOOP 10 #define MAX_NUM_MULTICAST_ADDRESSES 128 #define PCI_ANY_ID (~0U) #define ETHER_ALIGN 2 #define EM_FC_PAUSE_TIME 0x0680 #define EM_EEPROM_APME 0x400; #define EM_82544_APME 0x0004; /* Support AutoMediaDetect for Marvell M88 PHY in i354 */ #define IGB_MEDIA_RESET (1 << 0) /* Define the starting Interrupt rate per Queue */ #define IGB_INTS_PER_SEC 8000 #define IGB_DEFAULT_ITR ((1000000/IGB_INTS_PER_SEC) << 2) #define IGB_LINK_ITR 2000 #define I210_LINK_DELAY 1000 #define IGB_MAX_SCATTER 40 #define IGB_VFTA_SIZE 128 #define IGB_BR_SIZE 4096 /* ring buf size */ #define IGB_TSO_SIZE (65535 + sizeof(struct ether_vlan_header)) #define IGB_TSO_SEG_SIZE 4096 /* Max dma segment size */ #define IGB_TXPBSIZE 20408 #define IGB_HDR_BUF 128 #define IGB_PKTTYPE_MASK 0x0000FFF0 #define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coalesce Flush */ /* * Driver state logic for the detection of a hung state * in hardware. Set TX_HUNG whenever a TX packet is used * (data is sent) and clear it when txeof() is invoked if * any descriptors from the ring are cleaned/reclaimed. * Increment internal counter if no descriptors are cleaned * and compare to TX_MAXTRIES. When counter > TX_MAXTRIES, * reset adapter. */ #define EM_TX_IDLE 0x00000000 #define EM_TX_BUSY 0x00000001 #define EM_TX_HUNG 0x80000000 #define EM_TX_MAXTRIES 10 #define PCICFG_DESC_RING_STATUS 0xe4 #define FLUSH_DESC_REQUIRED 0x100 #define IGB_RX_PTHRESH ((hw->mac.type == e1000_i354) ? 12 : \ ((hw->mac.type <= e1000_82576) ? 16 : 8)) #define IGB_RX_HTHRESH 8 #define IGB_RX_WTHRESH ((hw->mac.type == e1000_82576 && \ (adapter->intr_type == IFLIB_INTR_MSIX)) ? 1 : 4) #define IGB_TX_PTHRESH ((hw->mac.type == e1000_i354) ? 20 : 8) #define IGB_TX_HTHRESH 1 #define IGB_TX_WTHRESH ((hw->mac.type != e1000_82575 && \ (adapter->intr_type == IFLIB_INTR_MSIX) ? 1 : 16) /* * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will * also optimize cache line size effect. H/W supports up to cache line size 128. */ #define EM_DBA_ALIGN 128 /* * See Intel 82574 Driver Programming Interface Manual, Section 10.2.6.9 */ #define TARC_COMPENSATION_MODE (1 << 7) /* Compensation Mode */ #define TARC_SPEED_MODE_BIT (1 << 21) /* On PCI-E MACs only */ #define TARC_MQ_FIX (1 << 23) | \ (1 << 24) | \ (1 << 25) /* Handle errata in MQ mode */ #define TARC_ERRATA_BIT (1 << 26) /* Note from errata on 82574 */ /* PCI Config defines */ #define EM_BAR_TYPE(v) ((v) & EM_BAR_TYPE_MASK) #define EM_BAR_TYPE_MASK 0x00000001 #define EM_BAR_TYPE_MMEM 0x00000000 #define EM_BAR_TYPE_IO 0x00000001 #define EM_BAR_TYPE_FLASH 0x0014 #define EM_BAR_MEM_TYPE(v) ((v) & EM_BAR_MEM_TYPE_MASK) #define EM_BAR_MEM_TYPE_MASK 0x00000006 #define EM_BAR_MEM_TYPE_32BIT 0x00000000 #define EM_BAR_MEM_TYPE_64BIT 0x00000004 #define EM_MSIX_BAR 3 /* On 82575 */ /* More backward compatibility */ #if __FreeBSD_version < 900000 #define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD #endif /* Defines for printing debug information */ #define DEBUG_INIT 0 #define DEBUG_IOCTL 0 #define DEBUG_HW 0 #define INIT_DEBUGOUT(S) if (DEBUG_INIT) printf(S "\n") #define INIT_DEBUGOUT1(S, A) if (DEBUG_INIT) printf(S "\n", A) #define INIT_DEBUGOUT2(S, A, B) if (DEBUG_INIT) printf(S "\n", A, B) #define IOCTL_DEBUGOUT(S) if (DEBUG_IOCTL) printf(S "\n") #define IOCTL_DEBUGOUT1(S, A) if (DEBUG_IOCTL) printf(S "\n", A) #define IOCTL_DEBUGOUT2(S, A, B) if (DEBUG_IOCTL) printf(S "\n", A, B) #define HW_DEBUGOUT(S) if (DEBUG_HW) printf(S "\n") #define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) #define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) #define EM_MAX_SCATTER 40 #define EM_VFTA_SIZE 128 #define EM_TSO_SIZE (65535 + sizeof(struct ether_vlan_header)) #define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */ #define EM_MSIX_MASK 0x01F00000 /* For 82574 use */ #define EM_MSIX_LINK 0x01000000 /* For 82574 use */ #define ETH_ZLEN 60 #define ETH_ADDR_LEN 6 #define EM_CSUM_OFFLOAD 7 /* Offload bits in mbuf flag */ #define IGB_CSUM_OFFLOAD 0x0E0F /* Offload bits in mbuf flag */ #define IGB_PKTTYPE_MASK 0x0000FFF0 #define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coalesce Flush */ /* * 82574 has a nonstandard address for EIAC * and since its only used in MSIX, and in * the em driver only 82574 uses MSIX we can * solve it just using this define. */ #define EM_EIAC 0x000DC /* * 82574 only reports 3 MSI-X vectors by default; * defines assisting with making it report 5 are * located here. */ #define EM_NVM_PCIE_CTRL 0x1B #define EM_NVM_MSIX_N_MASK (0x7 << EM_NVM_MSIX_N_SHIFT) #define EM_NVM_MSIX_N_SHIFT 7 struct adapter; struct em_int_delay_info { struct adapter *adapter; /* Back-pointer to the adapter struct */ int offset; /* Register offset to read/write */ int value; /* Current value in usecs */ }; /* * The transmit ring, one per tx queue */ struct tx_ring { struct adapter *adapter; struct e1000_tx_desc *tx_base; uint64_t tx_paddr; qidx_t *tx_rsq; bool tx_tso; /* last tx was tso */ uint8_t me; qidx_t tx_rs_cidx; qidx_t tx_rs_pidx; qidx_t tx_cidx_processed; /* Interrupt resources */ void *tag; struct resource *res; unsigned long tx_irq; /* Saved csum offloading context information */ int csum_flags; int csum_lhlen; int csum_iphlen; int csum_thlen; int csum_mss; int csum_pktlen; uint32_t csum_txd_upper; uint32_t csum_txd_lower; /* last field */ }; /* * The Receive ring, one per rx queue */ struct rx_ring { struct adapter *adapter; struct em_rx_queue *que; u32 me; u32 payload; union e1000_rx_desc_extended *rx_base; uint64_t rx_paddr; /* Interrupt resources */ void *tag; struct resource *res; bool discard; /* Soft stats */ unsigned long rx_irq; unsigned long rx_discarded; unsigned long rx_packets; unsigned long rx_bytes; }; struct em_tx_queue { struct adapter *adapter; u32 msix; u32 eims; /* This queue's EIMS bit */ u32 me; struct tx_ring txr; + struct if_irq que_irq; }; struct em_rx_queue { struct adapter *adapter; u32 me; u32 msix; u32 eims; struct rx_ring rxr; u64 irqs; - struct if_irq que_irq; + struct if_irq que_irq; }; /* Our adapter structure */ struct adapter { struct ifnet *ifp; struct e1000_hw hw; if_softc_ctx_t shared; if_ctx_t ctx; #define tx_num_queues shared->isc_ntxqsets #define rx_num_queues shared->isc_nrxqsets #define intr_type shared->isc_intr /* FreeBSD operating-system-specific structures. */ struct e1000_osdep osdep; device_t dev; struct cdev *led_dev; struct em_tx_queue *tx_queues; struct em_rx_queue *rx_queues; struct if_irq irq; struct resource *memory; struct resource *flash; struct resource *ioport; int io_rid; struct resource *res; void *tag; u32 linkvec; u32 ivars; struct ifmedia *media; int msix; int if_flags; int em_insert_vlan_header; u32 ims; bool in_detach; u32 flags; /* Task for FAST handling */ struct grouptask link_task; u16 num_vlans; u32 txd_cmd; u32 tx_process_limit; u32 rx_process_limit; u32 rx_mbuf_sz; /* Management and WOL features */ u32 wol; bool has_manage; bool has_amt; /* Multicast array memory */ u8 *mta; /* ** Shadow VFTA table, this is needed because ** the real vlan filter table gets cleared during ** a soft reset and the driver needs to be able ** to repopulate it. */ u32 shadow_vfta[EM_VFTA_SIZE]; /* Info about the interface */ u16 link_active; u16 fc; u16 link_speed; u16 link_duplex; u32 smartspeed; u32 dmac; int link_mask; u64 que_mask; struct em_int_delay_info tx_int_delay; struct em_int_delay_info tx_abs_int_delay; struct em_int_delay_info rx_int_delay; struct em_int_delay_info rx_abs_int_delay; struct em_int_delay_info tx_itr; /* Misc stats maintained by the driver */ unsigned long dropped_pkts; unsigned long link_irq; unsigned long mbuf_defrag_failed; unsigned long no_tx_dma_setup; unsigned long no_tx_map_avail; unsigned long rx_overruns; unsigned long watchdog_events; struct e1000_hw_stats stats; u16 vf_ifp; }; /******************************************************************************** * vendor_info_array * * This array contains the list of Subvendor/Subdevice IDs on which the driver * should load. * ********************************************************************************/ typedef struct _em_vendor_info_t { unsigned int vendor_id; unsigned int device_id; unsigned int subvendor_id; unsigned int subdevice_id; unsigned int index; } em_vendor_info_t; void em_dump_rs(struct adapter *); #define EM_RSSRK_SIZE 4 #define EM_RSSRK_VAL(key, i) (key[(i) * EM_RSSRK_SIZE] | \ key[(i) * EM_RSSRK_SIZE + 1] << 8 | \ key[(i) * EM_RSSRK_SIZE + 2] << 16 | \ key[(i) * EM_RSSRK_SIZE + 3] << 24) #endif /* _EM_H_DEFINED_ */ Index: head/sys/kern/subr_gtaskqueue.c =================================================================== --- head/sys/kern/subr_gtaskqueue.c (revision 323515) +++ head/sys/kern/subr_gtaskqueue.c (revision 323516) @@ -1,965 +1,1217 @@ /*- * Copyright (c) 2000 Doug Rabson * Copyright (c) 2014 Jeff Roberson * Copyright (c) 2016 Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -static MALLOC_DEFINE(M_GTASKQUEUE, "taskqueue", "Task Queues"); +static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues"); static void gtaskqueue_thread_enqueue(void *); static void gtaskqueue_thread_loop(void *arg); +static int _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri); +TASKQGROUP_DEFINE(softirq, mp_ncpus, 1, false, PI_SOFT); -TASKQGROUP_DEFINE(softirq, mp_ncpus, 1); - struct gtaskqueue_busy { struct gtask *tb_running; TAILQ_ENTRY(gtaskqueue_busy) tb_link; }; +struct gt_intr_thread { + int git_flags; /* (j) IT_* flags. */ + int git_need; /* Needs service. */ +}; + +/* Interrupt thread flags kept in it_flags */ +#define IT_DEAD 0x000001 /* Thread is waiting to exit. */ +#define IT_WAIT 0x000002 /* Thread is waiting for completion. */ + static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1; struct gtaskqueue { STAILQ_HEAD(, gtask) tq_queue; gtaskqueue_enqueue_fn tq_enqueue; void *tq_context; char *tq_name; TAILQ_HEAD(, gtaskqueue_busy) tq_active; struct mtx tq_mutex; struct thread **tq_threads; + struct gt_intr_thread *tq_gt_intrs; int tq_tcount; int tq_spin; int tq_flags; int tq_callouts; taskqueue_callback_fn tq_callbacks[TASKQUEUE_NUM_CALLBACKS]; void *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS]; }; #define TQ_FLAGS_ACTIVE (1 << 0) #define TQ_FLAGS_BLOCKED (1 << 1) #define TQ_FLAGS_UNLOCKED_ENQUEUE (1 << 2) +#define TQ_FLAGS_INTR (1 << 3) #define DT_CALLOUT_ARMED (1 << 0) #define TQ_LOCK(tq) \ do { \ if ((tq)->tq_spin) \ mtx_lock_spin(&(tq)->tq_mutex); \ else \ mtx_lock(&(tq)->tq_mutex); \ } while (0) #define TQ_ASSERT_LOCKED(tq) mtx_assert(&(tq)->tq_mutex, MA_OWNED) #define TQ_UNLOCK(tq) \ do { \ if ((tq)->tq_spin) \ mtx_unlock_spin(&(tq)->tq_mutex); \ else \ mtx_unlock(&(tq)->tq_mutex); \ } while (0) #define TQ_ASSERT_UNLOCKED(tq) mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED) #ifdef INVARIANTS static void gtask_dump(struct gtask *gtask) { printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n", gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context); } #endif static __inline int TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm, int t) { if (tq->tq_spin) return (msleep_spin(p, m, wm, t)); return (msleep(p, m, pri, wm, t)); } static struct gtaskqueue * _gtaskqueue_create(const char *name, int mflags, taskqueue_enqueue_fn enqueue, void *context, int mtxflags, const char *mtxname __unused) { struct gtaskqueue *queue; char *tq_name; tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO); if (!tq_name) return (NULL); snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue"); queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO); if (!queue) return (NULL); STAILQ_INIT(&queue->tq_queue); TAILQ_INIT(&queue->tq_active); queue->tq_enqueue = enqueue; queue->tq_context = context; queue->tq_name = tq_name; queue->tq_spin = (mtxflags & MTX_SPIN) != 0; queue->tq_flags |= TQ_FLAGS_ACTIVE; if (enqueue == gtaskqueue_thread_enqueue) queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE; mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags); return (queue); } /* * Signal a taskqueue thread to terminate. */ static void gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq) { while (tq->tq_tcount > 0 || tq->tq_callouts > 0) { wakeup(tq); TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0); } } static void gtaskqueue_free(struct gtaskqueue *queue) { TQ_LOCK(queue); queue->tq_flags &= ~TQ_FLAGS_ACTIVE; gtaskqueue_terminate(queue->tq_threads, queue); KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?")); KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks")); mtx_destroy(&queue->tq_mutex); free(queue->tq_threads, M_GTASKQUEUE); free(queue->tq_name, M_GTASKQUEUE); free(queue, M_GTASKQUEUE); } +static void +schedule_ithread(struct gtaskqueue *queue) +{ + struct proc *p; + struct thread *td; + struct gt_intr_thread *git; + + MPASS(queue->tq_tcount == 1); + td = queue->tq_threads[0]; + git = &queue->tq_gt_intrs[0]; + p = td->td_proc; + + atomic_store_rel_int(&git->git_need, 1); + thread_lock(td); + if (TD_AWAITING_INTR(td)) { + CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid, + td->td_name); + TD_CLR_IWAIT(td); + sched_add(td, SRQ_INTR); + } else { + CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d", + __func__, p->p_pid, td->td_name, git->git_need, td->td_state); + } + thread_unlock(td); +} + int grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask) { #ifdef INVARIANTS if (queue == NULL) { gtask_dump(gtask); panic("queue == NULL"); } #endif TQ_LOCK(queue); if (gtask->ta_flags & TASK_ENQUEUED) { TQ_UNLOCK(queue); return (0); } STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link); gtask->ta_flags |= TASK_ENQUEUED; TQ_UNLOCK(queue); - if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0) - queue->tq_enqueue(queue->tq_context); + if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0) { + if (queue->tq_flags & TQ_FLAGS_INTR) { + schedule_ithread(queue); + } else { + queue->tq_enqueue(queue->tq_context); + } + } return (0); } static void gtaskqueue_task_nop_fn(void *context) { } /* * Block until all currently queued tasks in this taskqueue * have begun execution. Tasks queued during execution of * this function are ignored. */ static void gtaskqueue_drain_tq_queue(struct gtaskqueue *queue) { struct gtask t_barrier; if (STAILQ_EMPTY(&queue->tq_queue)) return; /* * Enqueue our barrier after all current tasks, but with * the highest priority so that newly queued tasks cannot * pass it. Because of the high priority, we can not use * taskqueue_enqueue_locked directly (which drops the lock * anyway) so just insert it at tail while we have the * queue lock. */ GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier); STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link); t_barrier.ta_flags |= TASK_ENQUEUED; /* * Once the barrier has executed, all previously queued tasks * have completed or are currently executing. */ while (t_barrier.ta_flags & TASK_ENQUEUED) TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0); } /* * Block until all currently executing tasks for this taskqueue * complete. Tasks that begin execution during the execution * of this function are ignored. */ static void gtaskqueue_drain_tq_active(struct gtaskqueue *queue) { struct gtaskqueue_busy tb_marker, *tb_first; if (TAILQ_EMPTY(&queue->tq_active)) return; /* Block taskq_terminate().*/ queue->tq_callouts++; /* * Wait for all currently executing taskqueue threads * to go idle. */ tb_marker.tb_running = TB_DRAIN_WAITER; TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link); while (TAILQ_FIRST(&queue->tq_active) != &tb_marker) TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0); TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link); /* * Wakeup any other drain waiter that happened to queue up * without any intervening active thread. */ tb_first = TAILQ_FIRST(&queue->tq_active); if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER) wakeup(tb_first); /* Release taskqueue_terminate(). */ queue->tq_callouts--; if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0) wakeup_one(queue->tq_threads); } void gtaskqueue_block(struct gtaskqueue *queue) { TQ_LOCK(queue); queue->tq_flags |= TQ_FLAGS_BLOCKED; TQ_UNLOCK(queue); } void gtaskqueue_unblock(struct gtaskqueue *queue) { TQ_LOCK(queue); queue->tq_flags &= ~TQ_FLAGS_BLOCKED; if (!STAILQ_EMPTY(&queue->tq_queue)) queue->tq_enqueue(queue->tq_context); TQ_UNLOCK(queue); } static void gtaskqueue_run_locked(struct gtaskqueue *queue) { struct gtaskqueue_busy tb; struct gtaskqueue_busy *tb_first; struct gtask *gtask; KASSERT(queue != NULL, ("tq is NULL")); TQ_ASSERT_LOCKED(queue); tb.tb_running = NULL; while (STAILQ_FIRST(&queue->tq_queue)) { TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link); /* * Carefully remove the first task from the queue and * clear its TASK_ENQUEUED flag */ gtask = STAILQ_FIRST(&queue->tq_queue); KASSERT(gtask != NULL, ("task is NULL")); STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link); gtask->ta_flags &= ~TASK_ENQUEUED; tb.tb_running = gtask; TQ_UNLOCK(queue); KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL")); gtask->ta_func(gtask->ta_context); TQ_LOCK(queue); tb.tb_running = NULL; wakeup(gtask); TAILQ_REMOVE(&queue->tq_active, &tb, tb_link); tb_first = TAILQ_FIRST(&queue->tq_active); if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER) wakeup(tb_first); } } static int task_is_running(struct gtaskqueue *queue, struct gtask *gtask) { struct gtaskqueue_busy *tb; TQ_ASSERT_LOCKED(queue); TAILQ_FOREACH(tb, &queue->tq_active, tb_link) { if (tb->tb_running == gtask) return (1); } return (0); } static int gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask) { if (gtask->ta_flags & TASK_ENQUEUED) STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link); gtask->ta_flags &= ~TASK_ENQUEUED; return (task_is_running(queue, gtask) ? EBUSY : 0); } int gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask) { int error; TQ_LOCK(queue); error = gtaskqueue_cancel_locked(queue, gtask); TQ_UNLOCK(queue); return (error); } void gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask) { if (!queue->tq_spin) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); TQ_LOCK(queue); while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask)) TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0); TQ_UNLOCK(queue); } void gtaskqueue_drain_all(struct gtaskqueue *queue) { if (!queue->tq_spin) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); TQ_LOCK(queue); gtaskqueue_drain_tq_queue(queue); gtaskqueue_drain_tq_active(queue); TQ_UNLOCK(queue); } static int _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, - cpuset_t *mask, const char *name, va_list ap) + cpuset_t *mask, bool intr, const char *name, va_list ap) { char ktname[MAXCOMLEN + 1]; struct thread *td; struct gtaskqueue *tq; int i, error; if (count <= 0) return (EINVAL); vsnprintf(ktname, sizeof(ktname), name, ap); tq = *tqp; tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE, M_NOWAIT | M_ZERO); if (tq->tq_threads == NULL) { printf("%s: no memory for %s threads\n", __func__, ktname); return (ENOMEM); } + tq->tq_gt_intrs = malloc(sizeof(struct gt_intr_thread) * count, M_GTASKQUEUE, + M_NOWAIT | M_ZERO); + if (tq->tq_gt_intrs == NULL) { + printf("%s: no memory for %s intr info\n", __func__, ktname); + return (ENOMEM); + } for (i = 0; i < count; i++) { if (count == 1) error = kthread_add(gtaskqueue_thread_loop, tqp, NULL, &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname); else error = kthread_add(gtaskqueue_thread_loop, tqp, NULL, &tq->tq_threads[i], RFSTOPPED, 0, "%s_%d", ktname, i); if (error) { /* should be ok to continue, taskqueue_free will dtrt */ printf("%s: kthread_add(%s): error %d", __func__, ktname, error); tq->tq_threads[i] = NULL; /* paranoid */ } else tq->tq_tcount++; } + if (intr) + tq->tq_flags |= TQ_FLAGS_INTR; + for (i = 0; i < count; i++) { if (tq->tq_threads[i] == NULL) continue; td = tq->tq_threads[i]; if (mask) { error = cpuset_setthread(td->td_tid, mask); /* * Failing to pin is rarely an actual fatal error; * it'll just affect performance. */ if (error) printf("%s: curthread=%llu: can't pin; " "error=%d\n", __func__, (unsigned long long) td->td_tid, error); } thread_lock(td); sched_prio(td, pri); - sched_add(td, SRQ_BORING); + if (intr) { + /* we need to schedule the thread from the interrupt handler for this to work */ + TD_SET_IWAIT(td); + sched_class(td, PRI_ITHD); + td->td_pflags |= TDP_ITHREAD; + } else { + sched_add(td, SRQ_BORING); + } thread_unlock(td); } return (0); } static int gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, - const char *name, ...) + bool intr, const char *name, ...) { va_list ap; int error; va_start(ap, name); - error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap); + error = _gtaskqueue_start_threads(tqp, count, pri, NULL, intr, name, ap); va_end(ap); return (error); } static inline void gtaskqueue_run_callback(struct gtaskqueue *tq, enum taskqueue_callback_type cb_type) { taskqueue_callback_fn tq_callback; TQ_ASSERT_UNLOCKED(tq); tq_callback = tq->tq_callbacks[cb_type]; if (tq_callback != NULL) tq_callback(tq->tq_cb_contexts[cb_type]); } static void -gtaskqueue_thread_loop(void *arg) +intr_thread_loop(struct gtaskqueue *tq) { - struct gtaskqueue **tqp, *tq; + struct gt_intr_thread *git; + struct thread *td; - tqp = arg; - tq = *tqp; - gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT); - TQ_LOCK(tq); + git = &tq->tq_gt_intrs[0]; + td = tq->tq_threads[0]; + MPASS(tq->tq_tcount == 1); + while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) { - /* XXX ? */ + THREAD_NO_SLEEPING(); + while (atomic_cmpset_acq_int(&git->git_need, 1, 0) != 0) { + gtaskqueue_run_locked(tq); + } + THREAD_SLEEPING_OK(); + + /* + * Because taskqueue_run() can drop tq_mutex, we need to + * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the + * meantime, which means we missed a wakeup. + */ + if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0) + break; + + TQ_UNLOCK(tq); + WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread"); + mtx_assert(&Giant, MA_NOTOWNED); + thread_lock(td); + if (atomic_load_acq_int(&git->git_need) == 0 && + (git->git_flags & (IT_DEAD | IT_WAIT)) == 0) { + TD_SET_IWAIT(td); + mi_switch(SW_VOL | SWT_IWAIT, NULL); + } +#if 0 + /* XXX is this something we want? */ + if (git->git_flags & IT_WAIT) { + wake = 1; + git->git_flags &= ~IT_WAIT; + } +#endif + thread_unlock(td); + TQ_LOCK(tq); + } + THREAD_NO_SLEEPING(); + gtaskqueue_run_locked(tq); + THREAD_SLEEPING_OK(); +} + +static void +timeshare_thread_loop(struct gtaskqueue *tq) +{ + while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) { gtaskqueue_run_locked(tq); /* * Because taskqueue_run() can drop tq_mutex, we need to * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the * meantime, which means we missed a wakeup. */ if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0) break; TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0); } gtaskqueue_run_locked(tq); +} + +static void +gtaskqueue_thread_loop(void *arg) +{ + struct gtaskqueue **tqp, *tq; + + tqp = arg; + tq = *tqp; + gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT); + TQ_LOCK(tq); + if (curthread->td_pflags & TDP_ITHREAD) { + intr_thread_loop(tq); + } else { + timeshare_thread_loop(tq); + } + /* * This thread is on its way out, so just drop the lock temporarily * in order to call the shutdown callback. This allows the callback * to look at the taskqueue, even just before it dies. */ TQ_UNLOCK(tq); gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN); TQ_LOCK(tq); /* rendezvous with thread that asked us to terminate */ tq->tq_tcount--; wakeup_one(tq->tq_threads); TQ_UNLOCK(tq); kthread_exit(); } static void gtaskqueue_thread_enqueue(void *context) { struct gtaskqueue **tqp, *tq; tqp = context; tq = *tqp; wakeup_one(tq); } static struct gtaskqueue * gtaskqueue_create_fast(const char *name, int mflags, taskqueue_enqueue_fn enqueue, void *context) { return _gtaskqueue_create(name, mflags, enqueue, context, MTX_SPIN, "fast_taskqueue"); } struct taskqgroup_cpu { LIST_HEAD(, grouptask) tgc_tasks; struct gtaskqueue *tgc_taskq; int tgc_cnt; int tgc_cpu; }; struct taskqgroup { struct taskqgroup_cpu tqg_queue[MAXCPU]; struct mtx tqg_lock; + void (*adjust_func)(void*); char * tqg_name; int tqg_adjusting; int tqg_stride; int tqg_cnt; + int tqg_pri; + int tqg_flags; + bool tqg_intr; }; +#define TQG_NEED_ADJUST 0x1 +#define TQG_ADJUSTED 0x2 struct taskq_bind_task { struct gtask bt_task; int bt_cpuid; }; static void -taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu) +taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu, bool intr, int pri) { struct taskqgroup_cpu *qcpu; qcpu = &qgroup->tqg_queue[idx]; LIST_INIT(&qcpu->tgc_tasks); - qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK, + qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &qcpu->tgc_taskq); - gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT, - "%s_%d", qgroup->tqg_name, idx); + gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, pri, + intr, "%s_%d", qgroup->tqg_name, idx); qcpu->tgc_cpu = cpu; } static void taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx) { gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq); } /* * Find the taskq with least # of tasks that doesn't currently have any * other queues from the uniq identifier. */ static int taskqgroup_find(struct taskqgroup *qgroup, void *uniq) { struct grouptask *n; int i, idx, mincnt; int strict; mtx_assert(&qgroup->tqg_lock, MA_OWNED); if (qgroup->tqg_cnt == 0) return (0); idx = -1; mincnt = INT_MAX; /* * Two passes; First scan for a queue with the least tasks that * does not already service this uniq id. If that fails simply find * the queue with the least total tasks; */ for (strict = 1; mincnt == INT_MAX; strict = 0) { for (i = 0; i < qgroup->tqg_cnt; i++) { if (qgroup->tqg_queue[i].tgc_cnt > mincnt) continue; if (strict) { LIST_FOREACH(n, &qgroup->tqg_queue[i].tgc_tasks, gt_list) if (n->gt_uniq == uniq) break; if (n != NULL) continue; } mincnt = qgroup->tqg_queue[i].tgc_cnt; idx = i; } } if (idx == -1) panic("taskqgroup_find: Failed to pick a qid."); return (idx); } /* * smp_started is unusable since it is not set for UP kernels or even for * SMP kernels when there is 1 CPU. This is usually handled by adding a * (mp_ncpus == 1) test, but that would be broken here since we need to * to synchronize with the SI_SUB_SMP ordering. Even in the pure SMP case * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP. * * So maintain our own flag. It must be set after all CPUs are started * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed * adjustment is properly delayed. SI_ORDER_FOURTH is clearly before * SI_ORDER_ANY and unclearly after the CPUs are started. It would be * simpler for adjustment to pass a flag indicating if it is delayed. */ static int tqg_smp_started; static void tqg_record_smp_started(void *arg) { tqg_smp_started = 1; } SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH, tqg_record_smp_started, NULL); void taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask, void *uniq, int irq, char *name) { cpuset_t mask; - int qid; + int qid, error; gtask->gt_uniq = uniq; gtask->gt_name = name; gtask->gt_irq = irq; gtask->gt_cpu = -1; + mtx_lock(&qgroup->tqg_lock); + qgroup->tqg_flags |= TQG_NEED_ADJUST; + mtx_unlock(&qgroup->tqg_lock); + + if (tqg_smp_started && !(qgroup->tqg_flags & TQG_ADJUSTED)) + qgroup->adjust_func(NULL); + + mtx_lock(&qgroup->tqg_lock); qid = taskqgroup_find(qgroup, uniq); qgroup->tqg_queue[qid].tgc_cnt++; LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; if (irq != -1 && tqg_smp_started) { gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu; CPU_ZERO(&mask); CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask); mtx_unlock(&qgroup->tqg_lock); - intr_setaffinity(irq, CPU_WHICH_IRQ, &mask); + error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask); + if (error) + printf("taskqgroup_attach: setaffinity failed: %d\n", error); } else mtx_unlock(&qgroup->tqg_lock); } static void taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask) { cpuset_t mask; - int qid, cpu; + int qid, cpu, error; mtx_lock(&qgroup->tqg_lock); qid = taskqgroup_find(qgroup, gtask->gt_uniq); cpu = qgroup->tqg_queue[qid].tgc_cpu; if (gtask->gt_irq != -1) { mtx_unlock(&qgroup->tqg_lock); CPU_ZERO(&mask); CPU_SET(cpu, &mask); - intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask); - + error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_INTRHANDLER, &mask); mtx_lock(&qgroup->tqg_lock); + if (error) + printf("taskqgroup_attach_deferred: setaffinity failed: %d\n", error); } qgroup->tqg_queue[qid].tgc_cnt++; LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL); gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; mtx_unlock(&qgroup->tqg_lock); } +static int +taskqgroup_adjust_deferred(struct taskqgroup *qgroup, int cpu) +{ + int i, error = 0, cpu_max = -1; + + mtx_lock(&qgroup->tqg_lock); + for (i = 0; i < qgroup->tqg_cnt; i++) + if (qgroup->tqg_queue[i].tgc_cpu > cpu_max) + cpu_max = qgroup->tqg_queue[i].tgc_cpu; + if (cpu_max >= cpu) { + mtx_unlock(&qgroup->tqg_lock); + return (0); + } + MPASS(cpu <= mp_maxid); + error = _taskqgroup_adjust(qgroup, cpu + 1, qgroup->tqg_stride, + qgroup->tqg_intr, qgroup->tqg_pri); + if (error) { + printf("%s: _taskqgroup_adjust(%p, %d, %d, %d, %d) => %d\n\n", + __func__, qgroup, cpu + 1, qgroup->tqg_stride, qgroup->tqg_intr, + qgroup->tqg_pri, error); + goto out; + } + for (i = 0; i < qgroup->tqg_cnt; i++) + if (qgroup->tqg_queue[i].tgc_cpu > cpu_max) + cpu_max = qgroup->tqg_queue[i].tgc_cpu; + MPASS(cpu_max >= cpu); +out: + mtx_unlock(&qgroup->tqg_lock); + return (error); +} + int taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask, void *uniq, int cpu, int irq, char *name) { cpuset_t mask; - int i, qid; + int i, error, qid; qid = -1; gtask->gt_uniq = uniq; gtask->gt_name = name; gtask->gt_irq = irq; gtask->gt_cpu = cpu; + MPASS(cpu >= 0); + mtx_lock(&qgroup->tqg_lock); + qgroup->tqg_flags |= TQG_NEED_ADJUST; + mtx_unlock(&qgroup->tqg_lock); + + if (tqg_smp_started && !(qgroup->tqg_flags & TQG_ADJUSTED)) { + uintptr_t cpuid = cpu + 1; + qgroup->adjust_func((void *)cpuid); + } + if ((error = taskqgroup_adjust_deferred(qgroup, cpu))) + return (error); + + mtx_lock(&qgroup->tqg_lock); if (tqg_smp_started) { - for (i = 0; i < qgroup->tqg_cnt; i++) + for (i = 0; i < qgroup->tqg_cnt; i++) { if (qgroup->tqg_queue[i].tgc_cpu == cpu) { qid = i; break; } +#ifdef INVARIANTS + else + printf("qgroup->tqg_queue[%d].tgc_cpu=0x%x tgc_cnt=0x%x\n", + i, qgroup->tqg_queue[i].tgc_cpu, qgroup->tqg_queue[i].tgc_cnt); + +#endif + } if (qid == -1) { mtx_unlock(&qgroup->tqg_lock); + printf("%s: qid not found for cpu=%d\n", __func__, cpu); return (EINVAL); } } else qid = 0; qgroup->tqg_queue[qid].tgc_cnt++; LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; cpu = qgroup->tqg_queue[qid].tgc_cpu; mtx_unlock(&qgroup->tqg_lock); CPU_ZERO(&mask); CPU_SET(cpu, &mask); - if (irq != -1 && tqg_smp_started) - intr_setaffinity(irq, CPU_WHICH_IRQ, &mask); + if (irq != -1 && tqg_smp_started) { + error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask); + if (error) + printf("taskqgroup_attach_cpu: setaffinity failed: %d\n", error); + } return (0); } static int taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask) { cpuset_t mask; - int i, qid, irq, cpu; + int i, qid, irq, cpu, error; qid = -1; irq = gtask->gt_irq; cpu = gtask->gt_cpu; MPASS(tqg_smp_started); + + if ((error = taskqgroup_adjust_deferred(qgroup, cpu))) + return (error); mtx_lock(&qgroup->tqg_lock); + /* adjust as needed */ + MPASS(cpu <= mp_maxid); for (i = 0; i < qgroup->tqg_cnt; i++) if (qgroup->tqg_queue[i].tgc_cpu == cpu) { qid = i; break; } if (qid == -1) { mtx_unlock(&qgroup->tqg_lock); + printf("%s: qid not found for cpu=%d\n", __func__, cpu); return (EINVAL); } qgroup->tqg_queue[qid].tgc_cnt++; LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list); MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL); gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq; mtx_unlock(&qgroup->tqg_lock); CPU_ZERO(&mask); CPU_SET(cpu, &mask); - if (irq != -1) - intr_setaffinity(irq, CPU_WHICH_IRQ, &mask); + if (irq != -1) { + error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask); + if (error) + printf("taskqgroup_attach_cpu: setaffinity failed: %d\n", error); + } return (0); } void taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask) { int i; mtx_lock(&qgroup->tqg_lock); for (i = 0; i < qgroup->tqg_cnt; i++) if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue) break; if (i == qgroup->tqg_cnt) panic("taskqgroup_detach: task not in group\n"); qgroup->tqg_queue[i].tgc_cnt--; LIST_REMOVE(gtask, gt_list); mtx_unlock(&qgroup->tqg_lock); gtask->gt_taskqueue = NULL; } static void taskqgroup_binder(void *ctx) { struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx; cpuset_t mask; int error; CPU_ZERO(&mask); CPU_SET(gtask->bt_cpuid, &mask); error = cpuset_setthread(curthread->td_tid, &mask); thread_lock(curthread); sched_bind(curthread, gtask->bt_cpuid); thread_unlock(curthread); if (error) printf("taskqgroup_binder: setaffinity failed: %d\n", error); free(gtask, M_DEVBUF); + } +static void +taskqgroup_ithread_binder(void *ctx) +{ + struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx; + cpuset_t mask; + int error; + CPU_ZERO(&mask); + CPU_SET(gtask->bt_cpuid, &mask); + error = cpuset_setthread(curthread->td_tid, &mask); + + if (error) + printf("taskqgroup_binder: setaffinity failed: %d\n", + error); + free(gtask, M_DEVBUF); + +} static void taskqgroup_bind(struct taskqgroup *qgroup) { struct taskq_bind_task *gtask; int i; /* * Bind taskqueue threads to specific CPUs, if they have been assigned * one. */ if (qgroup->tqg_cnt == 1) return; for (i = 0; i < qgroup->tqg_cnt; i++) { gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK); - GTASK_INIT(>ask->bt_task, 0, 0, taskqgroup_binder, gtask); + if (qgroup->tqg_intr) + GTASK_INIT(>ask->bt_task, 0, 0, taskqgroup_ithread_binder, gtask); + else + GTASK_INIT(>ask->bt_task, 0, 0, taskqgroup_binder, gtask); gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu; grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq, >ask->bt_task); } } static int -_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride) +_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri) { LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL); struct grouptask *gtask; int i, k, old_cnt, old_cpu, cpu; mtx_assert(&qgroup->tqg_lock, MA_OWNED); if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) { printf("%s: failed cnt: %d stride: %d " "mp_ncpus: %d tqg_smp_started: %d\n", __func__, cnt, stride, mp_ncpus, tqg_smp_started); return (EINVAL); } if (qgroup->tqg_adjusting) { - printf("taskqgroup_adjust failed: adjusting\n"); + printf("%s: failed: adjusting\n", __func__); return (EBUSY); } + /* No work to be done */ + if (qgroup->tqg_cnt == cnt) + return (0); qgroup->tqg_adjusting = 1; old_cnt = qgroup->tqg_cnt; old_cpu = 0; - if (old_cnt < cnt) - old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu; + if (old_cnt < cnt) { + int old_max_idx = max(0, old_cnt-1); + old_cpu = qgroup->tqg_queue[old_max_idx].tgc_cpu; + if (old_cnt > 0) + for (k = 0; k < stride; k++) + old_cpu = CPU_NEXT(old_cpu); + } mtx_unlock(&qgroup->tqg_lock); /* * Set up queue for tasks added before boot. */ if (old_cnt == 0) { LIST_SWAP(>ask_head, &qgroup->tqg_queue[0].tgc_tasks, grouptask, gt_list); qgroup->tqg_queue[0].tgc_cnt = 0; } /* * If new taskq threads have been added. */ cpu = old_cpu; for (i = old_cnt; i < cnt; i++) { - taskqgroup_cpu_create(qgroup, i, cpu); + taskqgroup_cpu_create(qgroup, i, cpu, ithread, pri); for (k = 0; k < stride; k++) cpu = CPU_NEXT(cpu); } mtx_lock(&qgroup->tqg_lock); qgroup->tqg_cnt = cnt; qgroup->tqg_stride = stride; + qgroup->tqg_intr = ithread; + qgroup->tqg_pri = pri; /* * Adjust drivers to use new taskqs. */ for (i = 0; i < old_cnt; i++) { while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) { LIST_REMOVE(gtask, gt_list); qgroup->tqg_queue[i].tgc_cnt--; LIST_INSERT_HEAD(>ask_head, gtask, gt_list); } } mtx_unlock(&qgroup->tqg_lock); while ((gtask = LIST_FIRST(>ask_head))) { LIST_REMOVE(gtask, gt_list); if (gtask->gt_cpu == -1) taskqgroup_attach_deferred(qgroup, gtask); else if (taskqgroup_attach_cpu_deferred(qgroup, gtask)) taskqgroup_attach_deferred(qgroup, gtask); } #ifdef INVARIANTS mtx_lock(&qgroup->tqg_lock); for (i = 0; i < qgroup->tqg_cnt; i++) { MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL); LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list) MPASS(gtask->gt_taskqueue != NULL); } mtx_unlock(&qgroup->tqg_lock); #endif /* * If taskq thread count has been reduced. */ for (i = cnt; i < old_cnt; i++) taskqgroup_cpu_remove(qgroup, i); taskqgroup_bind(qgroup); mtx_lock(&qgroup->tqg_lock); qgroup->tqg_adjusting = 0; return (0); } int -taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride) +taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri) { int error; mtx_lock(&qgroup->tqg_lock); - error = _taskqgroup_adjust(qgroup, cnt, stride); + error = _taskqgroup_adjust(qgroup, cnt, stride, ithread, pri); mtx_unlock(&qgroup->tqg_lock); return (error); } +void +taskqgroup_set_adjust(struct taskqgroup *qgroup, void (*adjust_func)(void*)) +{ + qgroup-> adjust_func = adjust_func; +} + +int +taskqgroup_adjust_once(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri) +{ + int error = 0; + + mtx_lock(&qgroup->tqg_lock); + if ((qgroup->tqg_flags & (TQG_ADJUSTED|TQG_NEED_ADJUST)) == TQG_NEED_ADJUST) { + qgroup->tqg_flags |= TQG_ADJUSTED; + error = _taskqgroup_adjust(qgroup, cnt, stride, ithread, pri); + MPASS(error == 0); + } + mtx_unlock(&qgroup->tqg_lock); + + return (error); +} + struct taskqgroup * taskqgroup_create(char *name) { struct taskqgroup *qgroup; qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO); mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF); qgroup->tqg_name = name; LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks); - + MPASS(qgroup->tqg_queue[0].tgc_cnt == 0); + MPASS(qgroup->tqg_queue[0].tgc_cpu == 0); + MPASS(qgroup->tqg_queue[0].tgc_taskq == 0); return (qgroup); } void taskqgroup_destroy(struct taskqgroup *qgroup) { } Index: head/sys/net/iflib.c =================================================================== --- head/sys/net/iflib.c (revision 323515) +++ head/sys/net/iflib.c (revision 323516) @@ -1,5670 +1,6087 @@ /*- * Copyright (c) 2014-2017, Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Neither the name of Matthew Macy nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_acpi.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include - #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ifdi_if.h" #if defined(__i386__) || defined(__amd64__) #include #include #include #include #include #include #endif #include /* * enable accounting of every mbuf as it comes in to and goes out of * iflib's software descriptor references */ #define MEMORY_LOGGING 0 /* * Enable mbuf vectors for compressing long mbuf chains */ /* * NB: * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead * we prefetch needs to be determined by the time spent in m_free vis a vis * the cost of a prefetch. This will of course vary based on the workload: * - NFLX's m_free path is dominated by vm-based M_EXT manipulation which * is quite expensive, thus suggesting very little prefetch. * - small packet forwarding which is just returning a single mbuf to * UMA will typically be very fast vis a vis the cost of a memory * access. */ /* * File organization: * - private structures * - iflib private utility functions * - ifnet functions * - vlan registry and other exported functions * - iflib public core functions * * */ static MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library"); struct iflib_txq; typedef struct iflib_txq *iflib_txq_t; struct iflib_rxq; typedef struct iflib_rxq *iflib_rxq_t; struct iflib_fl; typedef struct iflib_fl *iflib_fl_t; struct iflib_ctx; typedef struct iflib_filter_info { driver_filter_t *ifi_filter; void *ifi_filter_arg; struct grouptask *ifi_task; void *ifi_ctx; } *iflib_filter_info_t; struct iflib_ctx { KOBJ_FIELDS; /* * Pointer to hardware driver's softc */ void *ifc_softc; device_t ifc_dev; if_t ifc_ifp; cpuset_t ifc_cpus; if_shared_ctx_t ifc_sctx; struct if_softc_ctx ifc_softc_ctx; - struct mtx ifc_mtx; + struct sx ifc_sx; uint16_t ifc_nhwtxqs; uint16_t ifc_nhwrxqs; iflib_txq_t ifc_txqs; iflib_rxq_t ifc_rxqs; uint32_t ifc_if_flags; uint32_t ifc_flags; uint32_t ifc_max_fl_buf_size; int ifc_in_detach; int ifc_link_state; int ifc_link_irq; int ifc_watchdog_events; struct cdev *ifc_led_dev; struct resource *ifc_msix_mem; struct if_irq ifc_legacy_irq; struct grouptask ifc_admin_task; struct grouptask ifc_vflr_task; struct iflib_filter_info ifc_filter_info; struct ifmedia ifc_media; struct sysctl_oid *ifc_sysctl_node; uint16_t ifc_sysctl_ntxqs; uint16_t ifc_sysctl_nrxqs; uint16_t ifc_sysctl_qs_eq_override; + uint16_t ifc_cpuid_highest; + uint16_t ifc_sysctl_rx_budget; qidx_t ifc_sysctl_ntxds[8]; qidx_t ifc_sysctl_nrxds[8]; struct if_txrx ifc_txrx; #define isc_txd_encap ifc_txrx.ift_txd_encap #define isc_txd_flush ifc_txrx.ift_txd_flush #define isc_txd_credits_update ifc_txrx.ift_txd_credits_update #define isc_rxd_available ifc_txrx.ift_rxd_available #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_rxd_flush ifc_txrx.ift_rxd_flush #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_legacy_intr ifc_txrx.ift_legacy_intr eventhandler_tag ifc_vlan_attach_event; eventhandler_tag ifc_vlan_detach_event; uint8_t ifc_mac[ETHER_ADDR_LEN]; char ifc_mtx_name[16]; + LIST_ENTRY(iflib_ctx) ifc_next; }; +static LIST_HEAD(ctx_head, iflib_ctx) ctx_list; +static struct mtx ctx_list_lock; +TASKQGROUP_DEFINE(if_io, mp_ncpus, 1, true, PI_NET); +TASKQGROUP_DEFINE(if_config, 1, 1, false, PI_SOFT); + +static void +iflib_ctx_apply(void (*fn)(if_ctx_t ctx, void *arg), void *arg) +{ + if_ctx_t ctx; + + mtx_lock(&ctx_list_lock); + LIST_FOREACH(ctx, &ctx_list, ifc_next) { + (fn)(ctx, arg); + } + mtx_unlock(&ctx_list_lock); +} + +static void +_iflib_cpuid_highest(if_ctx_t ctx, void *arg) { + int *cpuid = arg; + + if (*cpuid < ctx->ifc_cpuid_highest) + *cpuid = ctx->ifc_cpuid_highest; +} + +static int +iflib_cpuid_highest(void) +{ + int cpuid = 0; + + iflib_ctx_apply(_iflib_cpuid_highest, &cpuid); + return (cpuid); +} + +static void +iflib_ctx_insert(if_ctx_t ctx) +{ + mtx_lock(&ctx_list_lock); + LIST_INSERT_HEAD(&ctx_list, ctx, ifc_next); + mtx_unlock(&ctx_list_lock); +} + +static void +iflib_ctx_remove(if_ctx_t ctx) +{ + int max_cpuid_prev, max_cpuid_new; + + max_cpuid_prev = iflib_cpuid_highest(); + mtx_lock(&ctx_list_lock); + LIST_REMOVE(ctx, ifc_next); + mtx_unlock(&ctx_list_lock); + max_cpuid_new = max(1, iflib_cpuid_highest()); + if (max_cpuid_new < max_cpuid_prev) { + taskqgroup_adjust(qgroup_if_io, max_cpuid_new, 1, true, PI_NET); + } +} + void * iflib_get_softc(if_ctx_t ctx) { return (ctx->ifc_softc); } device_t iflib_get_dev(if_ctx_t ctx) { return (ctx->ifc_dev); } if_t iflib_get_ifp(if_ctx_t ctx) { return (ctx->ifc_ifp); } struct ifmedia * iflib_get_media(if_ctx_t ctx) { return (&ctx->ifc_media); } void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]) { bcopy(mac, ctx->ifc_mac, ETHER_ADDR_LEN); } if_softc_ctx_t iflib_get_softc_ctx(if_ctx_t ctx) { return (&ctx->ifc_softc_ctx); } if_shared_ctx_t iflib_get_sctx(if_ctx_t ctx) { return (ctx->ifc_sctx); } #define IP_ALIGNED(m) ((((uintptr_t)(m)->m_data) & 0x3) == 0x2) #define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*)) #define CACHE_PTR_NEXT(ptr) ((void *)(((uintptr_t)(ptr)+CACHE_LINE_SIZE-1) & (CACHE_LINE_SIZE-1))) #define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP) #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF) #define RX_SW_DESC_MAP_CREATED (1 << 0) -#define TX_SW_DESC_MAP_CREATED (1 << 1) -#define RX_SW_DESC_INUSE (1 << 3) -#define TX_SW_DESC_MAPPED (1 << 4) +#define RX_SW_DESC_INUSE (1 << 1) +#define RX_NETMAP_INUSE (1 << 2) +#define TX_SW_DESC_MAP_CREATED (1 << 0) +#define TX_SW_DESC_MAPPED (1 << 1) + #define M_TOOBIG M_PROTO1 typedef struct iflib_sw_rx_desc_array { bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ struct mbuf **ifsd_m; /* pkthdr mbufs */ caddr_t *ifsd_cl; /* direct cluster pointer for rx */ uint8_t *ifsd_flags; } iflib_rxsd_array_t; typedef struct iflib_sw_tx_desc_array { bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ struct mbuf **ifsd_m; /* pkthdr mbufs */ uint8_t *ifsd_flags; } if_txsd_vec_t; /* magic number that should be high enough for any hardware */ #define IFLIB_MAX_TX_SEGS 128 #define IFLIB_MAX_RX_SEGS 32 #define IFLIB_RX_COPY_THRESH 128 #define IFLIB_MAX_RX_REFRESH 32 /* The minimum descriptors per second before we start coalescing */ #define IFLIB_MIN_DESC_SEC 16384 #define IFLIB_DEFAULT_TX_UPDATE_FREQ 16 #define IFLIB_QUEUE_IDLE 0 #define IFLIB_QUEUE_HUNG 1 #define IFLIB_QUEUE_WORKING 2 /* maximum number of txqs that can share an rx interrupt */ #define IFLIB_MAX_TX_SHARED_INTR 4 /* this should really scale with ring size - this is a fairly arbitrary value */ #define TX_BATCH_SIZE 32 #define IFLIB_RESTART_BUDGET 8 #define IFC_LEGACY 0x001 #define IFC_QFLUSH 0x002 #define IFC_MULTISEG 0x004 #define IFC_DMAR 0x008 #define IFC_SC_ALLOCATED 0x010 #define IFC_INIT_DONE 0x020 #define IFC_PREFETCH 0x040 #define IFC_DO_RESET 0x080 #define IFC_CHECK_HUNG 0x100 #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) struct iflib_txq { qidx_t ift_in_use; qidx_t ift_cidx; qidx_t ift_cidx_processed; qidx_t ift_pidx; uint8_t ift_gen; uint8_t ift_br_offset; uint16_t ift_npending; uint16_t ift_db_pending; uint16_t ift_rs_pending; /* implicit pad */ uint8_t ift_txd_size[8]; uint64_t ift_processed; uint64_t ift_cleaned; uint64_t ift_cleaned_prev; #if MEMORY_LOGGING uint64_t ift_enqueued; uint64_t ift_dequeued; #endif uint64_t ift_no_tx_dma_setup; uint64_t ift_no_desc_avail; uint64_t ift_mbuf_defrag_failed; uint64_t ift_mbuf_defrag; uint64_t ift_map_failed; uint64_t ift_txd_encap_efbig; uint64_t ift_pullups; struct mtx ift_mtx; struct mtx ift_db_mtx; /* constant values */ if_ctx_t ift_ctx; struct ifmp_ring *ift_br; struct grouptask ift_task; qidx_t ift_size; uint16_t ift_id; struct callout ift_timer; if_txsd_vec_t ift_sds; uint8_t ift_qstatus; uint8_t ift_closed; uint8_t ift_update_freq; + uint8_t ift_stall_count; struct iflib_filter_info ift_filter_info; bus_dma_tag_t ift_desc_tag; bus_dma_tag_t ift_tso_desc_tag; iflib_dma_info_t ift_ifdi; #define MTX_NAME_LEN 16 char ift_mtx_name[MTX_NAME_LEN]; char ift_db_mtx_name[MTX_NAME_LEN]; bus_dma_segment_t ift_segs[IFLIB_MAX_TX_SEGS] __aligned(CACHE_LINE_SIZE); #ifdef IFLIB_DIAGNOSTICS uint64_t ift_cpu_exec_count[256]; #endif } __aligned(CACHE_LINE_SIZE); struct iflib_fl { qidx_t ifl_cidx; qidx_t ifl_pidx; qidx_t ifl_credits; uint8_t ifl_gen; uint8_t ifl_rxd_size; #if MEMORY_LOGGING uint64_t ifl_m_enqueued; uint64_t ifl_m_dequeued; uint64_t ifl_cl_enqueued; uint64_t ifl_cl_dequeued; #endif /* implicit pad */ bitstr_t *ifl_rx_bitmap; qidx_t ifl_fragidx; /* constant */ qidx_t ifl_size; uint16_t ifl_buf_size; uint16_t ifl_cltype; uma_zone_t ifl_zone; iflib_rxsd_array_t ifl_sds; iflib_rxq_t ifl_rxq; uint8_t ifl_id; bus_dma_tag_t ifl_desc_tag; iflib_dma_info_t ifl_ifdi; uint64_t ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE); caddr_t ifl_vm_addrs[IFLIB_MAX_RX_REFRESH]; qidx_t ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH]; } __aligned(CACHE_LINE_SIZE); static inline qidx_t get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen) { qidx_t used; if (pidx > cidx) used = pidx - cidx; else if (pidx < cidx) used = size - cidx + pidx; else if (gen == 0 && pidx == cidx) used = 0; else if (gen == 1 && pidx == cidx) used = size; else panic("bad state"); return (used); } #define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen)) #define IDXDIFF(head, tail, wrap) \ ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) struct iflib_rxq { /* If there is a separate completion queue - * these are the cq cidx and pidx. Otherwise * these are unused. */ qidx_t ifr_size; qidx_t ifr_cq_cidx; qidx_t ifr_cq_pidx; uint8_t ifr_cq_gen; uint8_t ifr_fl_offset; if_ctx_t ifr_ctx; iflib_fl_t ifr_fl; uint64_t ifr_rx_irq; uint16_t ifr_id; uint8_t ifr_lro_enabled; uint8_t ifr_nfl; uint8_t ifr_ntxqirq; uint8_t ifr_txqid[IFLIB_MAX_TX_SHARED_INTR]; struct lro_ctrl ifr_lc; struct grouptask ifr_task; struct iflib_filter_info ifr_filter_info; iflib_dma_info_t ifr_ifdi; - + struct if_rxd_info ifr_ri; + struct if_rxd_update ifr_iru; /* dynamically allocate if any drivers need a value substantially larger than this */ struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE); + #ifdef IFLIB_DIAGNOSTICS uint64_t ifr_cpu_exec_count[256]; #endif } __aligned(CACHE_LINE_SIZE); typedef struct if_rxsd { caddr_t *ifsd_cl; struct mbuf **ifsd_m; iflib_fl_t ifsd_fl; qidx_t ifsd_cidx; } *if_rxsd_t; /* multiple of word size */ #ifdef __LP64__ -#define PKT_INFO_SIZE 6 +#define PKT_INFO_SIZE 7 #define RXD_INFO_SIZE 5 #define PKT_TYPE uint64_t #else -#define PKT_INFO_SIZE 11 +#define PKT_INFO_SIZE 12 #define RXD_INFO_SIZE 8 #define PKT_TYPE uint32_t #endif #define PKT_LOOP_BOUND ((PKT_INFO_SIZE/3)*3) #define RXD_LOOP_BOUND ((RXD_INFO_SIZE/4)*4) typedef struct if_pkt_info_pad { PKT_TYPE pkt_val[PKT_INFO_SIZE]; } *if_pkt_info_pad_t; typedef struct if_rxd_info_pad { PKT_TYPE rxd_val[RXD_INFO_SIZE]; } *if_rxd_info_pad_t; CTASSERT(sizeof(struct if_pkt_info_pad) == sizeof(struct if_pkt_info)); CTASSERT(sizeof(struct if_rxd_info_pad) == sizeof(struct if_rxd_info)); static inline void pkt_info_zero(if_pkt_info_t pi) { if_pkt_info_pad_t pi_pad; pi_pad = (if_pkt_info_pad_t)pi; pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0; pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0; + pi_pad->pkt_val[6] = 0; #ifndef __LP64__ - pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; - pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0; + pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; pi_pad->pkt_val[9] = 0; + pi_pad->pkt_val[10] = 0; pi_pad->pkt_val[11] = 0; #endif } static inline void rxd_info_zero(if_rxd_info_t ri) { if_rxd_info_pad_t ri_pad; int i; ri_pad = (if_rxd_info_pad_t)ri; for (i = 0; i < RXD_LOOP_BOUND; i += 4) { ri_pad->rxd_val[i] = 0; ri_pad->rxd_val[i+1] = 0; ri_pad->rxd_val[i+2] = 0; ri_pad->rxd_val[i+3] = 0; } #ifdef __LP64__ ri_pad->rxd_val[RXD_INFO_SIZE-1] = 0; #endif } /* * Only allow a single packet to take up most 1/nth of the tx ring */ #define MAX_SINGLE_PACKET_FRACTION 12 #define IF_BAD_DMA (bus_addr_t)-1 -#define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) +static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0, + "iflib driver parameters"); -#define CTX_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_mtx, _name, "iflib ctx lock", MTX_DEF) +static int iflib_timer_int; +SYSCTL_INT(_net_iflib, OID_AUTO, timer_int, CTLFLAG_RW, &iflib_timer_int, + 0, "interval at which to run per-queue timers (in ticks)"); -#define CTX_LOCK(ctx) mtx_lock(&(ctx)->ifc_mtx) -#define CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_mtx) -#define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_mtx) +static int force_busdma = 0; +SYSCTL_INT(_net_iflib, OID_AUTO, force_busdma, CTLFLAG_RDTUN, &force_busdma, + 1, "force busdma"); +#define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) +#define CTX_LOCK_INIT(_sc, _name) sx_init(&(_sc)->ifc_sx, _name) + +#define CTX_LOCK(ctx) sx_xlock(&(ctx)->ifc_sx) +#define CTX_UNLOCK(ctx) sx_xunlock(&(ctx)->ifc_sx) +#define CTX_LOCK_DESTROY(ctx) sx_destroy(&(ctx)->ifc_sx) + #define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx) #define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx) /* Our boot-time initialization hook */ static int iflib_module_event_handler(module_t, int, void *); static moduledata_t iflib_moduledata = { "iflib", iflib_module_event_handler, NULL }; DECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY); MODULE_VERSION(iflib, 1); MODULE_DEPEND(iflib, pci, 1, 1, 1); MODULE_DEPEND(iflib, ether, 1, 1, 1); -TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1); -TASKQGROUP_DEFINE(if_config_tqg, 1, 1); - #ifndef IFLIB_DEBUG_COUNTERS #ifdef INVARIANTS #define IFLIB_DEBUG_COUNTERS 1 #else #define IFLIB_DEBUG_COUNTERS 0 #endif /* !INVARIANTS */ #endif -static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0, - "iflib driver parameters"); - /* * XXX need to ensure that this can't accidentally cause the head to be moved backwards */ static int iflib_min_tx_latency = 0; SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW, &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput"); static int iflib_no_tx_batch = 0; SYSCTL_INT(_net_iflib, OID_AUTO, no_tx_batch, CTLFLAG_RW, &iflib_no_tx_batch, 0, "minimize transmit latency at the possible expense of throughput"); #if IFLIB_DEBUG_COUNTERS static int iflib_tx_seen; static int iflib_tx_sent; static int iflib_tx_encap; static int iflib_rx_allocs; static int iflib_fl_refills; static int iflib_fl_refills_large; static int iflib_tx_frees; SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD, &iflib_tx_seen, 0, "# tx mbufs seen"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD, &iflib_tx_sent, 0, "# tx mbufs sent"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD, &iflib_tx_encap, 0, "# tx mbufs encapped"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD, &iflib_tx_frees, 0, "# tx frees"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD, &iflib_rx_allocs, 0, "# rx allocations"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD, &iflib_fl_refills, 0, "# refills"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD, &iflib_fl_refills_large, 0, "# large refills"); static int iflib_txq_drain_flushing; static int iflib_txq_drain_oactive; static int iflib_txq_drain_notready; static int iflib_txq_drain_encapfail; SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD, &iflib_txq_drain_flushing, 0, "# drain flushes"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD, &iflib_txq_drain_oactive, 0, "# drain oactives"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD, &iflib_txq_drain_notready, 0, "# drain notready"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_encapfail, CTLFLAG_RD, &iflib_txq_drain_encapfail, 0, "# drain encap fails"); static int iflib_encap_load_mbuf_fail; static int iflib_encap_txq_avail_fail; static int iflib_encap_txd_encap_fail; SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD, &iflib_encap_load_mbuf_fail, 0, "# busdma load failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD, &iflib_encap_txq_avail_fail, 0, "# txq avail failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD, &iflib_encap_txd_encap_fail, 0, "# driver encap failures"); static int iflib_task_fn_rxs; static int iflib_rx_intr_enables; static int iflib_fast_intrs; static int iflib_intr_link; static int iflib_intr_msix; static int iflib_rx_unavail; static int iflib_rx_ctx_inactive; static int iflib_rx_zero_len; static int iflib_rx_if_input; static int iflib_rx_mbuf_null; static int iflib_rxd_flush; static int iflib_verbose_debug; SYSCTL_INT(_net_iflib, OID_AUTO, intr_link, CTLFLAG_RD, &iflib_intr_link, 0, "# intr link calls"); SYSCTL_INT(_net_iflib, OID_AUTO, intr_msix, CTLFLAG_RD, &iflib_intr_msix, 0, "# intr msix calls"); SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD, &iflib_task_fn_rxs, 0, "# task_fn_rx calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD, &iflib_rx_intr_enables, 0, "# rx intr enables"); SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD, &iflib_fast_intrs, 0, "# fast_intr calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD, &iflib_rx_unavail, 0, "# times rxeof called with no available data"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD, &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_zero_len, CTLFLAG_RD, &iflib_rx_zero_len, 0, "# times rxeof saw zero len mbuf"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD, &iflib_rx_if_input, 0, "# times rxeof called if_input"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_mbuf_null, CTLFLAG_RD, &iflib_rx_mbuf_null, 0, "# times rxeof got null mbuf"); SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD, &iflib_rxd_flush, 0, "# times rxd_flush called"); SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW, &iflib_verbose_debug, 0, "enable verbose debugging"); #define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1) static void iflib_debug_reset(void) { iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs = iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees = iflib_txq_drain_flushing = iflib_txq_drain_oactive = iflib_txq_drain_notready = iflib_txq_drain_encapfail = iflib_encap_load_mbuf_fail = iflib_encap_txq_avail_fail = iflib_encap_txd_encap_fail = iflib_task_fn_rxs = iflib_rx_intr_enables = iflib_fast_intrs = iflib_intr_link = iflib_intr_msix = iflib_rx_unavail = iflib_rx_ctx_inactive = iflib_rx_zero_len = iflib_rx_if_input = iflib_rx_mbuf_null = iflib_rxd_flush = 0; } #else #define DBG_COUNTER_INC(name) static void iflib_debug_reset(void) {} #endif +typedef void async_gtask_fn_t(if_ctx_t ctx, void *arg); +struct async_task_arg { + async_gtask_fn_t *ata_fn; + if_ctx_t ata_ctx; + void *ata_arg; + struct grouptask *ata_gtask; +}; #define IFLIB_DEBUG 0 static void iflib_tx_structures_free(if_ctx_t ctx); static void iflib_rx_structures_free(if_ctx_t ctx); static int iflib_queues_alloc(if_ctx_t ctx); static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq); static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget); static int iflib_qset_structures_setup(if_ctx_t ctx); static int iflib_msix_init(if_ctx_t ctx); static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, char *str); static void iflib_txq_check_drain(iflib_txq_t txq, int budget); static uint32_t iflib_txq_can_drain(struct ifmp_ring *); static int iflib_register(if_ctx_t); static void iflib_init_locked(if_ctx_t ctx); static void iflib_add_device_sysctl_pre(if_ctx_t ctx); static void iflib_add_device_sysctl_post(if_ctx_t ctx); static void iflib_ifmp_purge(iflib_txq_t txq); static void _iflib_pre_assert(if_softc_ctx_t scctx); static void iflib_stop(if_ctx_t ctx); static void iflib_if_init_locked(if_ctx_t ctx); +static int async_if_ioctl(if_ctx_t ctx, u_long command, caddr_t data); +static int iflib_config_async_gtask_dispatch(if_ctx_t ctx, async_gtask_fn_t *fn, char *name, void *arg); +static void iflib_admin_reset_deferred(if_ctx_t ctx); + + + #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m); #endif #ifdef DEV_NETMAP #include #include #include MODULE_DEPEND(iflib, netmap, 1, 1, 1); /* * device-specific sysctl variables: * * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. * During regular operations the CRC is stripped, but on some * hardware reception of frames not multiple of 64 is slower, * so using crcstrip=0 helps in benchmarks. * * iflib_rx_miss, iflib_rx_miss_bufs: * count packets that might be missed due to lost interrupts. */ SYSCTL_DECL(_dev_netmap); /* * The xl driver by default strips CRCs and we do not override it. */ int iflib_crcstrip = 1; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip, CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on rx frames"); int iflib_rx_miss, iflib_rx_miss_bufs; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss, CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed rx intr"); SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs, CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed rx intr bufs"); /* * Register/unregister. We are already under netmap lock. * Only called on the first register or the last unregister. */ static int iflib_netmap_register(struct netmap_adapter *na, int onoff) { struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; int status; CTX_LOCK(ctx); IFDI_INTR_DISABLE(ctx); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if (!CTX_IS_VF(ctx)) IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); /* enable or disable flags and callbacks in na and ifp */ if (onoff) { nm_set_native_flags(na); } else { nm_clear_native_flags(na); } iflib_stop(ctx); iflib_init_locked(ctx); IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ? status = ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1; if (status) nm_clear_native_flags(na); CTX_UNLOCK(ctx); return (status); } +static void +iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid) +{ + iflib_fl_t fl; + + fl = &rxq->ifr_fl[flid]; + iru->iru_paddrs = fl->ifl_bus_addrs; + iru->iru_vaddrs = &fl->ifl_vm_addrs[0]; + iru->iru_idxs = fl->ifl_rxd_idxs; + iru->iru_qsidx = rxq->ifr_id; + iru->iru_buf_size = fl->ifl_buf_size; + iru->iru_flidx = fl->ifl_id; +} + +static int +netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init) +{ + struct netmap_adapter *na = kring->na; + u_int const lim = kring->nkr_num_slots - 1; + u_int head = kring->rhead; + struct netmap_ring *ring = kring->ring; + bus_dmamap_t *map; + if_rxd_update_t iru; + if_ctx_t ctx = rxq->ifr_ctx; + iflib_fl_t fl = &rxq->ifr_fl[0]; + uint32_t refill_pidx, nic_i; + + iru = &rxq->ifr_iru; + iru_init(iru, rxq, 0 /* flid */); + map = fl->ifl_sds.ifsd_map; + refill_pidx = netmap_idx_k2n(kring, nm_i); + if (init && (nm_i == head)) + head = nm_prev(head, lim); + for (int tmp_pidx = 0; nm_i != head; tmp_pidx++) { + struct netmap_slot *slot = &ring->slot[nm_i]; + void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[tmp_pidx]); + uint32_t nic_i_dma = refill_pidx; + nic_i = netmap_idx_k2n(kring, nm_i); + + MPASS(tmp_pidx < IFLIB_MAX_RX_REFRESH); + + if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ + return netmap_ring_reinit(kring); + + fl->ifl_vm_addrs[tmp_pidx] = addr; + if (__predict_false(init) && map) { + netmap_load_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr); + } else if (map && (slot->flags & NS_BUF_CHANGED)) { + /* buffer has changed, reload map */ + netmap_reload_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr); + } + slot->flags &= ~NS_BUF_CHANGED; + + nm_i = nm_next(nm_i, lim); + fl->ifl_rxd_idxs[tmp_pidx] = nic_i = nm_next(nic_i, lim); + if (nm_i != head && tmp_pidx < IFLIB_MAX_RX_REFRESH-1) + continue; + + iru->iru_pidx = refill_pidx; + iru->iru_count = tmp_pidx+1; + ctx->isc_rxd_refill(ctx->ifc_softc, iru); + + tmp_pidx = 0; + refill_pidx = nic_i; + if (map == NULL) + continue; + + for (int n = 0; n < iru->iru_count; n++) { + bus_dmamap_sync(fl->ifl_ifdi->idi_tag, map[nic_i_dma], + BUS_DMASYNC_PREREAD); + /* XXX - change this to not use the netmap func*/ + nic_i_dma = nm_next(nic_i_dma, lim); + } + } + kring->nr_hwcur = head; + + if (map) + bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + /* + * IMPORTANT: we must leave one free slot in the ring, + * so move nic_i back by one unit + */ + nic_i = nm_prev(nic_i, lim); + ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i); + return (0); +} + /* * Reconcile kernel and user view of the transmit ring. * * All information is in the kring. * Userspace wants to send packets up to the one before kring->rhead, * kernel knows kring->nr_hwcur is the first unsent packet. * * Here we push packets out (as many as possible), and possibly * reclaim buffers from previously completed transmission. * * The caller (netmap) guarantees that there is only one instance * running at any time. Any interference with other driver * methods should be handled by the individual drivers. */ static int iflib_netmap_txsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct ifnet *ifp = na->ifp; struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; struct if_pkt_info pi; /* * interrupts on every tx packet are expensive so request * them every half ring, or where NS_REPORT is set */ u_int report_frequency = kring->nkr_num_slots >> 1; /* device-specific */ if_ctx_t ctx = ifp->if_softc; iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id]; if (txq->ift_sds.ifsd_map) bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* * First part: process new packets to send. * nm_i is the current index in the netmap ring, * nic_i is the corresponding index in the NIC ring. * * If we have packets to send (nm_i != head) * iterate over the netmap ring, fetch length and update * the corresponding slot in the NIC ring. Some drivers also * need to update the buffer's physical address in the NIC slot * even NS_BUF_CHANGED is not set (PNMB computes the addresses). * * The netmap_reload_map() calls is especially expensive, * even when (as in this case) the tag is 0, so do only * when the buffer has actually changed. * * If possible do not set the report/intr bit on all slots, * but only a few times per ring or when NS_REPORT is set. * * Finally, on 10G and faster drivers, it might be useful * to prefetch the next slot and txr entry. */ nm_i = kring->nr_hwcur; pkt_info_zero(&pi); pi.ipi_segs = txq->ift_segs; pi.ipi_qsidx = kring->ring_id; if (nm_i != head) { /* we have new packets to send */ nic_i = netmap_idx_k2n(kring, nm_i); __builtin_prefetch(&ring->slot[nm_i]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]); if (txq->ift_sds.ifsd_map) __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]); for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; u_int len = slot->len; uint64_t paddr; void *addr = PNMB(na, slot, &paddr); int flags = (slot->flags & NS_REPORT || nic_i == 0 || nic_i == report_frequency) ? IPI_TX_INTR : 0; /* device-specific */ pi.ipi_len = len; pi.ipi_segs[0].ds_addr = paddr; pi.ipi_segs[0].ds_len = len; pi.ipi_nsegs = 1; pi.ipi_ndescs = 0; pi.ipi_pidx = nic_i; pi.ipi_flags = flags; /* Fill the slot in the NIC ring. */ ctx->isc_txd_encap(ctx->ifc_softc, &pi); /* prefetch for next round */ __builtin_prefetch(&ring->slot[nm_i + 1]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]); if (txq->ift_sds.ifsd_map) { __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]); NM_CHECK_ADDR_LEN(na, addr, len); if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ netmap_reload_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[nic_i], addr); } /* make sure changes to the buffer are synced */ bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_sds.ifsd_map[nic_i], BUS_DMASYNC_PREWRITE); } slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } kring->nr_hwcur = head; /* synchronize the NIC ring */ if (txq->ift_sds.ifsd_map) bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* (re)start the tx unit up to slot nic_i (excluded) */ ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i); } /* * Second part: reclaim buffers for completed transmissions. */ if (iflib_tx_credits_update(ctx, txq)) { /* some tx completed, increment avail */ nic_i = txq->ift_cidx_processed; kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } return (0); } /* * Reconcile kernel and user view of the receive ring. * Same as for the txsync, this routine must be efficient. * The caller guarantees a single invocations, but races against * the rest of the driver should be handled here. * * On call, kring->rhead is the first packet that userspace wants * to keep, and kring->rcur is the wakeup point. * The kernel has previously reported packets up to kring->rtail. * * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective * of whether or not we received an interrupt. */ static int iflib_netmap_rxsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct netmap_ring *ring = kring->ring; uint32_t nm_i; /* index into the netmap ring */ - uint32_t nic_i, nic_i_start; /* index into the NIC ring */ + uint32_t nic_i; /* index into the NIC ring */ u_int i, n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - struct if_rxd_info ri; - struct if_rxd_update iru; + struct if_rxd_info *ri; + struct if_rxd_update *iru; struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id]; iflib_fl_t fl = rxq->ifr_fl; + ri = &rxq->ifr_ri; + iru = &rxq->ifr_iru; if (head > lim) return netmap_ring_reinit(kring); /* XXX check sync modes */ for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++) { if (fl->ifl_sds.ifsd_map == NULL) continue; bus_dmamap_sync(rxq->ifr_fl[i].ifl_desc_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); } /* * First part: import newly received packets. * * nm_i is the index of the next free slot in the netmap ring, * nic_i is the index of the next received packet in the NIC ring, * and they may differ in case if_init() has been called while * in netmap mode. For the receive ring we have * * nic_i = rxr->next_check; * nm_i = kring->nr_hwtail (previous) * and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size * * rxr->next_check is set to 0 on a ring reinit */ if (netmap_no_pendintr || force_update) { int crclen = iflib_crcstrip ? 0 : 4; int error, avail; uint16_t slot_flags = kring->nkr_slot_flags; for (fl = rxq->ifr_fl, i = 0; i < rxq->ifr_nfl; i++, fl++) { nic_i = fl->ifl_cidx; nm_i = netmap_idx_n2k(kring, nic_i); avail = iflib_rxd_avail(ctx, rxq, nic_i, USHRT_MAX); for (n = 0; avail > 0; n++, avail--) { - rxd_info_zero(&ri); - ri.iri_frags = rxq->ifr_frags; - ri.iri_qsidx = kring->ring_id; - ri.iri_ifp = ctx->ifc_ifp; - ri.iri_cidx = nic_i; + rxd_info_zero(ri); + ri->iri_frags = rxq->ifr_frags; + ri->iri_qsidx = kring->ring_id; + ri->iri_ifp = ctx->ifc_ifp; + ri->iri_cidx = nic_i; - error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); - ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen; + error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, ri); + ring->slot[nm_i].len = error ? 0 : ri->iri_len - crclen; ring->slot[nm_i].flags = slot_flags; if (fl->ifl_sds.ifsd_map) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } if (n) { /* update the state variables */ if (netmap_no_pendintr && !force_update) { /* diagnostics */ iflib_rx_miss ++; iflib_rx_miss_bufs += n; } fl->ifl_cidx = nic_i; kring->nr_hwtail = nm_i; } kring->nr_kflags &= ~NKR_PENDINTR; } } /* * Second part: skip past packets that userspace has released. * (kring->nr_hwcur to head excluded), * and make the buffers available for reception. * As usual nm_i is the index in the netmap ring, * nic_i is the index in the NIC ring, and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size */ /* XXX not sure how this will work with multiple free lists */ nm_i = kring->nr_hwcur; if (nm_i == head) return (0); - iru.iru_paddrs = fl->ifl_bus_addrs; - iru.iru_vaddrs = &fl->ifl_vm_addrs[0]; - iru.iru_idxs = fl->ifl_rxd_idxs; - iru.iru_qsidx = rxq->ifr_id; - iru.iru_buf_size = fl->ifl_buf_size; - iru.iru_flidx = fl->ifl_id; - nic_i_start = nic_i = netmap_idx_k2n(kring, nm_i); - for (i = 0; nm_i != head; i++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[i]); - - if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ - goto ring_reset; - - fl->ifl_vm_addrs[i] = addr; - if (fl->ifl_sds.ifsd_map && (slot->flags & NS_BUF_CHANGED)) { - /* buffer has changed, reload map */ - netmap_reload_map(na, fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i], addr); - } - slot->flags &= ~NS_BUF_CHANGED; - - nm_i = nm_next(nm_i, lim); - fl->ifl_rxd_idxs[i] = nic_i = nm_next(nic_i, lim); - if (nm_i != head && i < IFLIB_MAX_RX_REFRESH) - continue; - - iru.iru_pidx = nic_i_start; - iru.iru_count = i; - i = 0; - ctx->isc_rxd_refill(ctx->ifc_softc, &iru); - if (fl->ifl_sds.ifsd_map == NULL) { - nic_i_start = nic_i; - continue; - } - nic_i = nic_i_start; - for (n = 0; n < iru.iru_count; n++) { - bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i], - BUS_DMASYNC_PREREAD); - nic_i = nm_next(nic_i, lim); - } - nic_i_start = nic_i; - } - kring->nr_hwcur = head; - - if (fl->ifl_sds.ifsd_map) - bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - /* - * IMPORTANT: we must leave one free slot in the ring, - * so move nic_i back by one unit - */ - nic_i = nm_prev(nic_i, lim); - ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i); - return 0; - -ring_reset: - return netmap_ring_reinit(kring); + return (netmap_fl_refill(rxq, kring, nm_i, false)); } static void iflib_netmap_intr(struct netmap_adapter *na, int onoff) { struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; - CTX_LOCK(ctx); + /* XXX - do we need synchronization here?*/ if (onoff) { IFDI_INTR_ENABLE(ctx); } else { IFDI_INTR_DISABLE(ctx); } - CTX_UNLOCK(ctx); } static int iflib_netmap_attach(if_ctx_t ctx) { struct netmap_adapter na; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; bzero(&na, sizeof(na)); na.ifp = ctx->ifc_ifp; na.na_flags = NAF_BDG_MAYSLEEP; MPASS(ctx->ifc_softc_ctx.isc_ntxqsets); MPASS(ctx->ifc_softc_ctx.isc_nrxqsets); na.num_tx_desc = scctx->isc_ntxd[0]; na.num_rx_desc = scctx->isc_nrxd[0]; na.nm_txsync = iflib_netmap_txsync; na.nm_rxsync = iflib_netmap_rxsync; na.nm_register = iflib_netmap_register; na.nm_intr = iflib_netmap_intr; na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets; na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets; return (netmap_attach(&na)); } static void iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_slot *slot; slot = netmap_reset(na, NR_TX, txq->ift_id, 0); if (slot == NULL) return; if (txq->ift_sds.ifsd_map == NULL) return; for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) { /* * In netmap mode, set the map for the packet buffer. * NOTE: Some drivers (not this one) also need to set * the physical buffer address in the NIC ring. * netmap_idx_n2k() maps a nic index, i, into the corresponding * netmap slot index, si */ int si = netmap_idx_n2k(&na->tx_rings[txq->ift_id], i); netmap_load_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], NMB(na, slot + si)); } } static void iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); + struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id]; struct netmap_slot *slot; - struct if_rxd_update iru; - iflib_fl_t fl; - bus_dmamap_t *map; - int nrxd; - uint32_t i, j, pidx_start; + uint32_t nm_i; slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0); if (slot == NULL) return; - fl = &rxq->ifr_fl[0]; - map = fl->ifl_sds.ifsd_map; - nrxd = ctx->ifc_softc_ctx.isc_nrxd[0]; - iru.iru_paddrs = fl->ifl_bus_addrs; - iru.iru_vaddrs = &fl->ifl_vm_addrs[0]; - iru.iru_idxs = fl->ifl_rxd_idxs; - iru.iru_qsidx = rxq->ifr_id; - iru.iru_buf_size = rxq->ifr_fl[0].ifl_buf_size; - iru.iru_flidx = 0; - - for (pidx_start = i = j = 0; i < nrxd; i++, j++) { - int sj = netmap_idx_n2k(&na->rx_rings[rxq->ifr_id], i); - void *addr; - - fl->ifl_rxd_idxs[j] = i; - addr = fl->ifl_vm_addrs[j] = PNMB(na, slot + sj, &fl->ifl_bus_addrs[j]); - if (map) { - netmap_load_map(na, rxq->ifr_fl[0].ifl_ifdi->idi_tag, *map, addr); - map++; - } - - if (j < IFLIB_MAX_RX_REFRESH && i < nrxd - 1) - continue; - - iru.iru_pidx = pidx_start; - pidx_start = i; - iru.iru_count = j; - j = 0; - MPASS(pidx_start + j <= nrxd); - /* Update descriptors and the cached value */ - ctx->isc_rxd_refill(ctx->ifc_softc, &iru); - } - /* preserve queue */ - if (ctx->ifc_ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id]; - int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); - ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, t); - } else - ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, nrxd-1); + nm_i = netmap_idx_n2k(kring, 0); + netmap_fl_refill(rxq, kring, nm_i, true); } #define iflib_netmap_detach(ifp) netmap_detach(ifp) #else #define iflib_netmap_txq_init(ctx, txq) #define iflib_netmap_rxq_init(ctx, rxq) #define iflib_netmap_detach(ifp) #define iflib_netmap_attach(ctx) (0) #define netmap_rx_irq(ifp, qid, budget) (0) #define netmap_tx_irq(ifp, qid) do {} while (0) #endif #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } +static __inline void +prefetch2(void *x) +{ + __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); +#if (CACHE_LINE_SIZE < 128) + __asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long))))); +#endif +} #else #define prefetch(x) +#define prefetch2(x) #endif static void _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) { if (err) return; *(bus_addr_t *) arg = segs[0].ds_addr; } int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags) { int err; if_shared_ctx_t sctx = ctx->ifc_sctx; device_t dev = ctx->ifc_dev; KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized")); err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ sctx->isc_q_align, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->idi_tag); if (err) { device_printf(dev, "%s: bus_dma_tag_create failed: %d\n", __func__, err); goto fail_0; } err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map); if (err) { device_printf(dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, err); goto fail_1; } dma->idi_paddr = IF_BAD_DMA; err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr, size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT); if (err || dma->idi_paddr == IF_BAD_DMA) { device_printf(dev, "%s: bus_dmamap_load failed: %d\n", __func__, err); goto fail_2; } dma->idi_size = size; return (0); fail_2: bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); fail_1: bus_dma_tag_destroy(dma->idi_tag); fail_0: dma->idi_tag = NULL; return (err); } int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count) { int i, err; iflib_dma_info_t *dmaiter; dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) { if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0) break; } if (err) iflib_dma_free_multi(dmalist, i); return (err); } void iflib_dma_free(iflib_dma_info_t dma) { if (dma->idi_tag == NULL) return; if (dma->idi_paddr != IF_BAD_DMA) { bus_dmamap_sync(dma->idi_tag, dma->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->idi_tag, dma->idi_map); dma->idi_paddr = IF_BAD_DMA; } if (dma->idi_vaddr != NULL) { bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); dma->idi_vaddr = NULL; } bus_dma_tag_destroy(dma->idi_tag); dma->idi_tag = NULL; } void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count) { int i; iflib_dma_info_t *dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) iflib_dma_free(*dmaiter); } +static void +txq_validate(iflib_txq_t txq) { +#ifdef INVARIANTS + uint32_t cidx = txq->ift_cidx; + struct mbuf **ifsd_m = txq->ift_sds.ifsd_m; + if (txq->ift_pidx > cidx) { + int i; + for (i = txq->ift_pidx; i < txq->ift_size; i++) + MPASS(ifsd_m[i] == NULL); + for (i = 0; i < cidx; i++) + MPASS(ifsd_m[i] == NULL); + } else if (txq->ift_pidx < cidx) { + int i; + for (i = txq->ift_pidx; i < cidx; i++) + MPASS(ifsd_m[i] == NULL); + } +#endif +} + #ifdef EARLY_AP_STARTUP static const int iflib_started = 1; #else /* * We used to abuse the smp_started flag to decide if the queues have been * fully initialized (by late taskqgroup_adjust() calls in a SYSINIT()). * That gave bad races, since the SYSINIT() runs strictly after smp_started * is set. Run a SYSINIT() strictly after that to just set a usable * completion flag. */ static int iflib_started; static void iflib_record_started(void *arg) { iflib_started = 1; } SYSINIT(iflib_record_started, SI_SUB_SMP + 1, SI_ORDER_FIRST, iflib_record_started, NULL); #endif static int iflib_fast_intr(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; + if (!iflib_started) return (FILTER_HANDLED); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); } static int +iflib_fast_intr_rx(void *arg) +{ + iflib_filter_info_t info = arg; + struct grouptask *gtask = info->ifi_task; + iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; + if_ctx_t ctx; + int cidx; + + if (!iflib_started) + return (FILTER_HANDLED); + + DBG_COUNTER_INC(fast_intrs); + if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) + return (FILTER_HANDLED); + + ctx = rxq->ifr_ctx; + if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ) + cidx = rxq->ifr_cq_cidx; + else + cidx = rxq->ifr_fl[0].ifl_cidx; + if (iflib_rxd_avail(ctx, rxq, cidx, 1)) + GROUPTASK_ENQUEUE(gtask); + else + IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); + return (FILTER_HANDLED); +} + + +static int iflib_fast_intr_rxtx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; if_ctx_t ctx; int i, cidx; if (!iflib_started) return (FILTER_HANDLED); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); + ctx = rxq->ifr_ctx; for (i = 0; i < rxq->ifr_ntxqirq; i++) { qidx_t txqid = rxq->ifr_txqid[i]; - ctx = rxq->ifr_ctx; - if (!ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false)) { IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid); continue; } GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); } if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ) cidx = rxq->ifr_cq_cidx; else cidx = rxq->ifr_fl[0].ifl_cidx; if (iflib_rxd_avail(ctx, rxq, cidx, 1)) GROUPTASK_ENQUEUE(gtask); else IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); return (FILTER_HANDLED); } static int iflib_fast_intr_ctx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; if (!iflib_started) return (FILTER_HANDLED); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); } static int _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, driver_intr_t handler, void *arg, char *name) { int rc, flags; struct resource *res; void *tag = NULL; device_t dev = ctx->ifc_dev; flags = RF_ACTIVE; if (ctx->ifc_flags & IFC_LEGACY) flags |= RF_SHAREABLE; MPASS(rid < 512); irq->ii_rid = rid; res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &irq->ii_rid, flags); if (res == NULL) { device_printf(dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } irq->ii_res = res; KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL")); rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET, filter, handler, arg, &tag); if (rc != 0) { device_printf(dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name ? name : "unknown", rc); return (rc); } else if (name) bus_describe_intr(dev, res, tag, "%s", name); irq->ii_tag = tag; return (0); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int iflib_txsd_alloc(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; int err, nsegments, ntsosegments; nsegments = scctx->isc_tx_nsegments; ntsosegments = scctx->isc_tx_tso_segments_max; MPASS(scctx->isc_ntxd[0] > 0); MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0); MPASS(nsegments > 0); MPASS(ntsosegments > 0); /* * Setup DMA descriptor areas. */ if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_tx_maxsize, /* maxsize */ nsegments, /* nsegments */ sctx->isc_tx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_desc_tag))) { device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err); device_printf(dev,"maxsize: %ju nsegments: %d maxsegsize: %ju\n", (uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize); goto fail; } if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ scctx->isc_tx_tso_size_max, /* maxsize */ ntsosegments, /* nsegments */ scctx->isc_tx_tso_segsize_max, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_tso_desc_tag))) { device_printf(dev,"Unable to allocate TX TSO DMA tag: %d\n", err); goto fail; } if (!(txq->ift_sds.ifsd_flags = (uint8_t *) malloc(sizeof(uint8_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(txq->ift_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ #if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__))) if ((ctx->ifc_flags & IFC_DMAR) == 0) return (0); if (!(txq->ift_sds.ifsd_map = (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer map memory\n"); err = ENOMEM; goto fail; } for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) { err = bus_dmamap_create(txq->ift_desc_tag, 0, &txq->ift_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } #endif return (0); fail: /* We free all, it handles case where we are in the middle */ iflib_tx_structures_free(ctx); return (err); } static void iflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i) { bus_dmamap_t map; map = NULL; if (txq->ift_sds.ifsd_map != NULL) map = txq->ift_sds.ifsd_map[i]; if (map != NULL) { bus_dmamap_unload(txq->ift_desc_tag, map); bus_dmamap_destroy(txq->ift_desc_tag, map); txq->ift_sds.ifsd_map[i] = NULL; } } static void iflib_txq_destroy(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; for (int i = 0; i < txq->ift_size; i++) iflib_txsd_destroy(ctx, txq, i); if (txq->ift_sds.ifsd_map != NULL) { free(txq->ift_sds.ifsd_map, M_IFLIB); txq->ift_sds.ifsd_map = NULL; } if (txq->ift_sds.ifsd_m != NULL) { free(txq->ift_sds.ifsd_m, M_IFLIB); txq->ift_sds.ifsd_m = NULL; } if (txq->ift_sds.ifsd_flags != NULL) { free(txq->ift_sds.ifsd_flags, M_IFLIB); txq->ift_sds.ifsd_flags = NULL; } if (txq->ift_desc_tag != NULL) { bus_dma_tag_destroy(txq->ift_desc_tag); txq->ift_desc_tag = NULL; } if (txq->ift_tso_desc_tag != NULL) { bus_dma_tag_destroy(txq->ift_tso_desc_tag); txq->ift_tso_desc_tag = NULL; } } static void iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i) { struct mbuf **mp; mp = &txq->ift_sds.ifsd_m[i]; if (*mp == NULL) return; if (txq->ift_sds.ifsd_map != NULL) { bus_dmamap_sync(txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_desc_tag, txq->ift_sds.ifsd_map[i]); } m_free(*mp); DBG_COUNTER_INC(tx_frees); *mp = NULL; } static int iflib_txq_setup(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; iflib_dma_info_t di; int i; /* Set number of descriptors available */ txq->ift_qstatus = IFLIB_QUEUE_IDLE; /* XXX make configurable */ txq->ift_update_freq = IFLIB_DEFAULT_TX_UPDATE_FREQ; /* Reset indices */ txq->ift_cidx_processed = 0; txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0; txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset]; for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++) bzero((void *)di->idi_vaddr, di->idi_size); IFDI_TXQ_SETUP(ctx, txq->ift_id); for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++) bus_dmamap_sync(di->idi_tag, di->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int iflib_rxsd_alloc(iflib_rxq_t rxq) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; iflib_fl_t fl; int err; MPASS(scctx->isc_nrxd[0] > 0); MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0); fl = rxq->ifr_fl; for (int i = 0; i < rxq->ifr_nfl; i++, fl++) { fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */ err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_rx_maxsize, /* maxsize */ sctx->isc_rx_nsegments, /* nsegments */ sctx->isc_rx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &fl->ifl_desc_tag); if (err) { device_printf(dev, "%s: bus_dma_tag_create failed %d\n", __func__, err); goto fail; } if (!(fl->ifl_sds.ifsd_flags = (uint8_t *) malloc(sizeof(uint8_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(fl->ifl_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(fl->ifl_sds.ifsd_cl = (caddr_t *) malloc(sizeof(caddr_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ #if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__))) if ((ctx->ifc_flags & IFC_DMAR) == 0) continue; if (!(fl->ifl_sds.ifsd_map = (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer map memory\n"); err = ENOMEM; goto fail; } for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) { err = bus_dmamap_create(fl->ifl_desc_tag, 0, &fl->ifl_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create RX buffer DMA map\n"); goto fail; } } #endif } return (0); fail: iflib_rx_structures_free(ctx); return (err); } /* * Internal service routines */ struct rxq_refill_cb_arg { int error; bus_dma_segment_t seg; int nseg; }; static void _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct rxq_refill_cb_arg *cb_arg = arg; cb_arg->error = error; cb_arg->seg = segs[0]; cb_arg->nseg = nseg; } #ifdef ACPI_DMAR #define IS_DMAR(ctx) (ctx->ifc_flags & IFC_DMAR) #else #define IS_DMAR(ctx) (0) #endif /** * rxq_refill - refill an rxq free-buffer list * @ctx: the iflib context * @rxq: the free-list to refill * @n: the number of new buffers to allocate * * (Re)populate an rxq free-buffer list with up to @n new packet buffers. * The caller must assure that @n does not exceed the queue's capacity. */ static void _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) { struct mbuf *m; int idx, frag_idx = fl->ifl_fragidx; int pidx = fl->ifl_pidx; caddr_t cl, *sd_cl; struct mbuf **sd_m; uint8_t *sd_flags; struct if_rxd_update iru; bus_dmamap_t *sd_map; int n, i = 0; uint64_t bus_addr; int err; sd_m = fl->ifl_sds.ifsd_m; sd_map = fl->ifl_sds.ifsd_map; sd_cl = fl->ifl_sds.ifsd_cl; sd_flags = fl->ifl_sds.ifsd_flags; idx = pidx; n = count; MPASS(n > 0); MPASS(fl->ifl_credits + n <= fl->ifl_size); if (pidx < fl->ifl_cidx) MPASS(pidx + n <= fl->ifl_cidx); if (pidx == fl->ifl_cidx && (fl->ifl_credits < fl->ifl_size)) MPASS(fl->ifl_gen == 0); if (pidx > fl->ifl_cidx) MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx); DBG_COUNTER_INC(fl_refills); if (n > 8) DBG_COUNTER_INC(fl_refills_large); iru.iru_paddrs = fl->ifl_bus_addrs; iru.iru_vaddrs = &fl->ifl_vm_addrs[0]; iru.iru_idxs = fl->ifl_rxd_idxs; iru.iru_qsidx = fl->ifl_rxq->ifr_id; iru.iru_buf_size = fl->ifl_buf_size; iru.iru_flidx = fl->ifl_id; while (n--) { /* * We allocate an uninitialized mbuf + cluster, mbuf is * initialized after rx. * * If the cluster is still set then we know a minimum sized packet was received */ bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size, &frag_idx); if ((frag_idx < 0) || (frag_idx >= fl->ifl_size)) bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx); if ((cl = sd_cl[frag_idx]) == NULL) { if ((cl = sd_cl[frag_idx] = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL) break; #if MEMORY_LOGGING fl->ifl_cl_enqueued++; #endif } if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { break; } #if MEMORY_LOGGING fl->ifl_m_enqueued++; #endif DBG_COUNTER_INC(rx_allocs); #if defined(__i386__) || defined(__amd64__) if (!IS_DMAR(ctx)) { bus_addr = pmap_kextract((vm_offset_t)cl); } else #endif { struct rxq_refill_cb_arg cb_arg; iflib_rxq_t q; cb_arg.error = 0; q = fl->ifl_rxq; MPASS(sd_map != NULL); MPASS(sd_map[frag_idx] != NULL); err = bus_dmamap_load(fl->ifl_desc_tag, sd_map[frag_idx], cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 0); bus_dmamap_sync(fl->ifl_desc_tag, sd_map[frag_idx], BUS_DMASYNC_PREREAD); if (err != 0 || cb_arg.error) { /* * !zone_pack ? */ if (fl->ifl_zone == zone_pack) uma_zfree(fl->ifl_zone, cl); m_free(m); n = 0; goto done; } bus_addr = cb_arg.seg.ds_addr; } bit_set(fl->ifl_rx_bitmap, frag_idx); sd_flags[frag_idx] |= RX_SW_DESC_INUSE; MPASS(sd_m[frag_idx] == NULL); sd_cl[frag_idx] = cl; sd_m[frag_idx] = m; fl->ifl_rxd_idxs[i] = frag_idx; fl->ifl_bus_addrs[i] = bus_addr; fl->ifl_vm_addrs[i] = cl; fl->ifl_credits++; i++; MPASS(fl->ifl_credits <= fl->ifl_size); if (++idx == fl->ifl_size) { fl->ifl_gen = 1; idx = 0; } if (n == 0 || i == IFLIB_MAX_RX_REFRESH) { iru.iru_pidx = pidx; iru.iru_count = i; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); i = 0; pidx = idx; fl->ifl_pidx = idx; } } done: DBG_COUNTER_INC(rxd_flush); if (fl->ifl_pidx == 0) pidx = fl->ifl_size - 1; else pidx = fl->ifl_pidx - 1; if (sd_map) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx); fl->ifl_fragidx = frag_idx; } static __inline void __iflib_fl_refill_lt(if_ctx_t ctx, iflib_fl_t fl, int max) { /* we avoid allowing pidx to catch up with cidx as it confuses ixl */ int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1; #ifdef INVARIANTS int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1; #endif MPASS(fl->ifl_credits <= fl->ifl_size); MPASS(reclaimable == delta); if (reclaimable > 0) _iflib_fl_refill(ctx, fl, min(max, reclaimable)); } static void iflib_fl_bufs_free(iflib_fl_t fl) { iflib_dma_info_t idi = fl->ifl_ifdi; uint32_t i; for (i = 0; i < fl->ifl_size; i++) { struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i]; uint8_t *sd_flags = &fl->ifl_sds.ifsd_flags[i]; caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i]; if (*sd_flags & RX_SW_DESC_INUSE) { if (fl->ifl_sds.ifsd_map != NULL) { bus_dmamap_t sd_map = fl->ifl_sds.ifsd_map[i]; bus_dmamap_unload(fl->ifl_desc_tag, sd_map); bus_dmamap_destroy(fl->ifl_desc_tag, sd_map); } if (*sd_m != NULL) { m_init(*sd_m, M_NOWAIT, MT_DATA, 0); uma_zfree(zone_mbuf, *sd_m); } if (*sd_cl != NULL) uma_zfree(fl->ifl_zone, *sd_cl); *sd_flags = 0; + } else if (*sd_flags & RX_NETMAP_INUSE) { + if (fl->ifl_sds.ifsd_map != NULL) { + bus_dmamap_t sd_map = fl->ifl_sds.ifsd_map[i]; + bus_dmamap_unload(fl->ifl_desc_tag, sd_map); + bus_dmamap_destroy(fl->ifl_desc_tag, sd_map); + } + *sd_flags = 0; + MPASS(*sd_cl == NULL); + MPASS(*sd_m == NULL); } else { MPASS(*sd_cl == NULL); MPASS(*sd_m == NULL); } + #if MEMORY_LOGGING - fl->ifl_m_dequeued++; - fl->ifl_cl_dequeued++; + if (*sd_m != NULL) + fl->ifl_m_dequeued++; + if (*sd_cl != NULL) + fl->ifl_cl_dequeued++; #endif *sd_cl = NULL; *sd_m = NULL; } #ifdef INVARIANTS for (i = 0; i < fl->ifl_size; i++) { - MPASS(fl->ifl_sds.ifsd_flags[i] == 0); + KASSERT(fl->ifl_sds.ifsd_flags[i] == 0, ("fl->ifl_sds.ifsd_flags[%d]=0x%x, expected 0", + i, fl->ifl_sds.ifsd_flags[i])); MPASS(fl->ifl_sds.ifsd_cl[i] == NULL); MPASS(fl->ifl_sds.ifsd_m[i] == NULL); } #endif /* * Reset free list values */ fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0; bzero(idi->idi_vaddr, idi->idi_size); } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int iflib_fl_setup(iflib_fl_t fl) { iflib_rxq_t rxq = fl->ifl_rxq; if_ctx_t ctx = rxq->ifr_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; - bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size); + bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size-1); /* ** Free current RX buffer structs and their mbufs */ iflib_fl_bufs_free(fl); /* Now replenish the mbufs */ MPASS(fl->ifl_credits == 0); /* * XXX don't set the max_frame_size to larger * than the hardware can handle */ if (sctx->isc_max_frame_size <= 2048) fl->ifl_buf_size = MCLBYTES; #ifndef CONTIGMALLOC_WORKS else fl->ifl_buf_size = MJUMPAGESIZE; #else else if (sctx->isc_max_frame_size <= 4096) fl->ifl_buf_size = MJUMPAGESIZE; else if (sctx->isc_max_frame_size <= 9216) fl->ifl_buf_size = MJUM9BYTES; else fl->ifl_buf_size = MJUM16BYTES; #endif if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size) ctx->ifc_max_fl_buf_size = fl->ifl_buf_size; fl->ifl_cltype = m_gettype(fl->ifl_buf_size); fl->ifl_zone = m_getzone(fl->ifl_buf_size); /* avoid pre-allocating zillions of clusters to an idle card * potentially speeding up attach */ _iflib_fl_refill(ctx, fl, min(128, fl->ifl_size)); MPASS(min(128, fl->ifl_size) == fl->ifl_credits); if (min(128, fl->ifl_size) != fl->ifl_credits) return (ENOBUFS); /* * handle failure */ MPASS(rxq != NULL); MPASS(fl->ifl_ifdi != NULL); bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Free receive ring data structures * **********************************************************************/ static void iflib_rx_sds_free(iflib_rxq_t rxq) { iflib_fl_t fl; int i; if (rxq->ifr_fl != NULL) { for (i = 0; i < rxq->ifr_nfl; i++) { fl = &rxq->ifr_fl[i]; if (fl->ifl_desc_tag != NULL) { bus_dma_tag_destroy(fl->ifl_desc_tag); fl->ifl_desc_tag = NULL; } free(fl->ifl_sds.ifsd_m, M_IFLIB); free(fl->ifl_sds.ifsd_cl, M_IFLIB); /* XXX destroy maps first */ free(fl->ifl_sds.ifsd_map, M_IFLIB); fl->ifl_sds.ifsd_m = NULL; fl->ifl_sds.ifsd_cl = NULL; fl->ifl_sds.ifsd_map = NULL; } free(rxq->ifr_fl, M_IFLIB); rxq->ifr_fl = NULL; rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; } } +/* CONFIG context only */ +static void +iflib_handle_hang(if_ctx_t ctx, void *arg __unused) +{ + + CTX_LOCK(ctx); + if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); + IFDI_WATCHDOG_RESET(ctx); + ctx->ifc_watchdog_events++; + iflib_if_init_locked(ctx); + CTX_UNLOCK(ctx); +} + /* * MI independent logic * */ static void iflib_timer(void *arg) { - iflib_txq_t txq = arg; + iflib_txq_t txq_i, txq = arg; if_ctx_t ctx = txq->ift_ctx; - if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; + /* handle any laggards */ + if (txq->ift_db_pending) + GROUPTASK_ENQUEUE(&txq->ift_task); + IFDI_TIMER(ctx, txq->ift_id); + + if (ifmp_ring_is_stalled(txq->ift_br) && + txq->ift_cleaned_prev == txq->ift_cleaned) + txq->ift_stall_count++; + txq->ift_cleaned_prev = txq->ift_cleaned; + if (txq->ift_stall_count > 2) { + txq->ift_qstatus = IFLIB_QUEUE_HUNG; + device_printf(ctx->ifc_dev, "TX(%d) desc avail = %d, pidx = %d\n", + txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); + } + if (txq->ift_id != 0) { + if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) + callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer, + txq, txq->ift_timer.c_cpu); + return; + } /* ** Check on the state of the TX queue(s), this ** can be done without the lock because its RO ** and the HUNG state will be static if set. */ - IFDI_TIMER(ctx, txq->ift_id); - if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) && - ((txq->ift_cleaned_prev == txq->ift_cleaned) || - (sctx->isc_pause_frames == 0))) - goto hung; + txq_i = ctx->ifc_txqs; + for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq_i++) { + if (txq_i->ift_qstatus == IFLIB_QUEUE_HUNG) { + iflib_config_async_gtask_dispatch(ctx, iflib_handle_hang, "hang handler", txq); + /* init will reset the callout */ + return; + } + } - if (ifmp_ring_is_stalled(txq->ift_br)) - txq->ift_qstatus = IFLIB_QUEUE_HUNG; - txq->ift_cleaned_prev = txq->ift_cleaned; - /* handle any laggards */ - if (txq->ift_db_pending) - GROUPTASK_ENQUEUE(&txq->ift_task); - sctx->isc_pause_frames = 0; if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) - callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); - return; -hung: - CTX_LOCK(ctx); - if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); - device_printf(ctx->ifc_dev, "TX(%d) desc avail = %d, pidx = %d\n", - txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); - - IFDI_WATCHDOG_RESET(ctx); - ctx->ifc_watchdog_events++; - - ctx->ifc_flags |= IFC_DO_RESET; - iflib_admin_intr_deferred(ctx); - CTX_UNLOCK(ctx); + callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer, + txq, txq->ift_timer.c_cpu); } static void iflib_init_locked(if_ctx_t ctx) { if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_t ifp = ctx->ifc_ifp; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; - int i, j, tx_ip_csum_flags, tx_ip6_csum_flags; + int i, j, tx_ip_csum_flags, tx_ip6_csum_flags, running, reset; + running = !!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING); + reset = !!(ctx->ifc_flags & IFC_DO_RESET); if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP); tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP); /* Set hardware offload abilities */ if_clearhwassist(ifp); if (if_getcapenable(ifp) & IFCAP_TXCSUM) if_sethwassistbits(ifp, tx_ip_csum_flags, 0); if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6) if_sethwassistbits(ifp, tx_ip6_csum_flags, 0); if (if_getcapenable(ifp) & IFCAP_TSO4) if_sethwassistbits(ifp, CSUM_IP_TSO, 0); if (if_getcapenable(ifp) & IFCAP_TSO6) if_sethwassistbits(ifp, CSUM_IP6_TSO, 0); for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); iflib_netmap_txq_init(ctx, txq); } - for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { - MPASS(rxq->ifr_id == i); - iflib_netmap_rxq_init(ctx, rxq); - } #ifdef INVARIANTS i = if_getdrvflags(ifp); #endif IFDI_INIT(ctx); MPASS(if_getdrvflags(ifp) == i); + if (!running && reset) + return; for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { /* XXX this should really be done on a per-queue basis */ - if (if_getcapenable(ifp) & IFCAP_NETMAP) + if (if_getcapenable(ifp) & IFCAP_NETMAP) { + MPASS(rxq->ifr_id == i); + iflib_netmap_rxq_init(ctx, rxq); continue; + } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { if (iflib_fl_setup(fl)) { device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n"); goto done; } } } done: if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); IFDI_INTR_ENABLE(ctx); txq = ctx->ifc_txqs; for (i = 0; i < sctx->isc_ntxqsets; i++, txq++) - callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, - txq->ift_timer.c_cpu); + callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer, + txq, txq->ift_timer.c_cpu); } +/* CONFIG context only */ static int iflib_media_change(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); int err; CTX_LOCK(ctx); if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0) iflib_init_locked(ctx); CTX_UNLOCK(ctx); return (err); } +/* CONFIG context only */ static void iflib_media_status(if_t ifp, struct ifmediareq *ifmr) { if_ctx_t ctx = if_getsoftc(ifp); + iflib_admin_intr_deferred(ctx); CTX_LOCK(ctx); - IFDI_UPDATE_ADMIN_STATUS(ctx); IFDI_MEDIA_STATUS(ctx, ifmr); CTX_UNLOCK(ctx); } +/* CONFIG context only */ static void iflib_stop(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; iflib_dma_info_t di; iflib_fl_t fl; int i, j; /* Tell the stack that the interface is no longer active */ if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); - DELAY(1000); IFDI_STOP(ctx); - DELAY(1000); iflib_debug_reset(); /* Wait for current tx queue users to exit to disarm watchdog timer. */ for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) { /* make sure all transmitters have completed before proceeding XXX */ /* clean any enqueued buffers */ iflib_ifmp_purge(txq); /* Free any existing tx buffers. */ for (j = 0; j < txq->ift_size; j++) { iflib_txsd_free(ctx, txq, j); } - txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0; - txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0; + /* XXX please rewrite to simply bzero this range */ + txq->ift_processed = txq->ift_cleaned = txq->ift_cleaned_prev = 0; + txq->ift_stall_count = txq->ift_cidx_processed = 0; + txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = 0; txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0; txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0; - txq->ift_pullups = 0; + txq->ift_no_desc_avail = txq->ift_pullups = 0; ifmp_ring_reset_stats(txq->ift_br); for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwtxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); } for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) { /* make sure all transmitters have completed before proceeding XXX */ for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwrxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); /* also resets the free lists pidx/cidx */ for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) iflib_fl_bufs_free(fl); } } static inline caddr_t calc_next_rxd(iflib_fl_t fl, int cidx) { qidx_t size; int nrxd; caddr_t start, end, cur, next; nrxd = fl->ifl_size; size = fl->ifl_rxd_size; start = fl->ifl_ifdi->idi_vaddr; if (__predict_false(size == 0)) return (start); cur = start + size*cidx; end = start + size*nrxd; next = CACHE_PTR_NEXT(cur); return (next < end ? next : start); } static inline void prefetch_pkts(iflib_fl_t fl, int cidx) { int nextptr; int nrxd = fl->ifl_size; caddr_t next_rxd; nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1); prefetch(&fl->ifl_sds.ifsd_m[nextptr]); prefetch(&fl->ifl_sds.ifsd_cl[nextptr]); next_rxd = calc_next_rxd(fl, cidx); prefetch(next_rxd); prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]); } static void rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd) { int flid, cidx; bus_dmamap_t map; iflib_fl_t fl; iflib_dma_info_t di; int next; map = NULL; flid = irf->irf_flid; cidx = irf->irf_idx; fl = &rxq->ifr_fl[flid]; sd->ifsd_fl = fl; sd->ifsd_cidx = cidx; sd->ifsd_m = &fl->ifl_sds.ifsd_m[cidx]; sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx]; fl->ifl_credits--; #if MEMORY_LOGGING fl->ifl_m_dequeued++; #endif if (rxq->ifr_ctx->ifc_flags & IFC_PREFETCH) prefetch_pkts(fl, cidx); if (fl->ifl_sds.ifsd_map != NULL) { next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1); prefetch(&fl->ifl_sds.ifsd_map[next]); map = fl->ifl_sds.ifsd_map[cidx]; di = fl->ifl_ifdi; next = (cidx + CACHE_LINE_SIZE) & (fl->ifl_size-1); prefetch(&fl->ifl_sds.ifsd_flags[next]); bus_dmamap_sync(di->idi_tag, di->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* not valid assert if bxe really does SGE from non-contiguous elements */ MPASS(fl->ifl_cidx == cidx); if (unload) bus_dmamap_unload(fl->ifl_desc_tag, map); } fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1); if (__predict_false(fl->ifl_cidx == 0)) fl->ifl_gen = 0; if (map != NULL) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); bit_clear(fl->ifl_rx_bitmap, cidx); } static struct mbuf * assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd) { int i, padlen , flags; struct mbuf *m, *mh, *mt; caddr_t cl; i = 0; mh = NULL; do { rxd_frag_to_sd(rxq, &ri->iri_frags[i], TRUE, sd); MPASS(*sd->ifsd_cl != NULL); MPASS(*sd->ifsd_m != NULL); /* Don't include zero-length frags */ if (ri->iri_frags[i].irf_len == 0) { /* XXX we can save the cluster here, but not the mbuf */ m_init(*sd->ifsd_m, M_NOWAIT, MT_DATA, 0); m_free(*sd->ifsd_m); *sd->ifsd_m = NULL; continue; } m = *sd->ifsd_m; *sd->ifsd_m = NULL; if (mh == NULL) { flags = M_PKTHDR|M_EXT; mh = mt = m; padlen = ri->iri_pad; } else { flags = M_EXT; mt->m_next = m; mt = m; /* assuming padding is only on the first fragment */ padlen = 0; } cl = *sd->ifsd_cl; *sd->ifsd_cl = NULL; +#if MEMORY_LOGGING + sd->ifsd_fl->ifl_cl_dequeued++; +#endif /* Can these two be made one ? */ m_init(m, M_NOWAIT, MT_DATA, flags); m_cljset(m, cl, sd->ifsd_fl->ifl_cltype); /* * These must follow m_init and m_cljset */ m->m_data += padlen; ri->iri_len -= padlen; m->m_len = ri->iri_frags[i].irf_len; } while (++i < ri->iri_nfrags); return (mh); } /* * Process one software descriptor */ static struct mbuf * iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) { struct if_rxsd sd; struct mbuf *m; /* should I merge this back in now that the two paths are basically duplicated? */ if (ri->iri_nfrags == 1 && ri->iri_frags[0].irf_len <= IFLIB_RX_COPY_THRESH) { rxd_frag_to_sd(rxq, &ri->iri_frags[0], FALSE, &sd); m = *sd.ifsd_m; *sd.ifsd_m = NULL; m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); #ifndef __NO_STRICT_ALIGNMENT if (!IP_ALIGNED(m)) m->m_data += 2; #endif memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len); m->m_len = ri->iri_frags[0].irf_len; } else { m = assemble_segments(rxq, ri, &sd); } m->m_pkthdr.len = ri->iri_len; m->m_pkthdr.rcvif = ri->iri_ifp; m->m_flags |= ri->iri_flags; m->m_pkthdr.ether_vtag = ri->iri_vtag; m->m_pkthdr.flowid = ri->iri_flowid; M_HASHTYPE_SET(m, ri->iri_rsstype); m->m_pkthdr.csum_flags = ri->iri_csum_flags; m->m_pkthdr.csum_data = ri->iri_csum_data; return (m); } static bool iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int avail, i; qidx_t *cidxp; struct if_rxd_info ri; int err, budget_left, rx_bytes, rx_pkts; iflib_fl_t fl; struct ifnet *ifp; int lro_enabled; /* * XXX early demux data packets so that if_input processing only handles * acks in interrupt context */ - struct mbuf *m, *mh, *mt; + struct mbuf *m, *mh, *mt, *mf; ifp = ctx->ifc_ifp; -#ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - u_int work = 0; - if (netmap_rx_irq(ifp, rxq->ifr_id, &work)) - return (FALSE); - } -#endif - mh = mt = NULL; MPASS(budget > 0); - rx_pkts = rx_bytes = 0; + rx_pkts = rx_bytes = 0; if (sctx->isc_flags & IFLIB_HAS_RXCQ) cidxp = &rxq->ifr_cq_cidx; else cidxp = &rxq->ifr_fl[0].ifl_cidx; if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) { for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) __iflib_fl_refill_lt(ctx, fl, budget + 8); DBG_COUNTER_INC(rx_unavail); return (false); } for (budget_left = budget; (budget_left > 0) && (avail > 0); budget_left--, avail--) { if (__predict_false(!CTX_ACTIVE(ctx))) { DBG_COUNTER_INC(rx_ctx_inactive); break; } /* * Reset client set fields to their default values */ rxd_info_zero(&ri); ri.iri_qsidx = rxq->ifr_id; ri.iri_cidx = *cidxp; ri.iri_ifp = ifp; ri.iri_frags = rxq->ifr_frags; err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); if (err) goto err; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { *cidxp = ri.iri_cidx; /* Update our consumer index */ /* XXX NB: shurd - check if this is still safe */ while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) { rxq->ifr_cq_cidx -= scctx->isc_nrxd[0]; rxq->ifr_cq_gen = 0; } /* was this only a completion queue message? */ if (__predict_false(ri.iri_nfrags == 0)) continue; } MPASS(ri.iri_nfrags != 0); MPASS(ri.iri_len != 0); /* will advance the cidx on the corresponding free lists */ m = iflib_rxd_pkt_get(rxq, &ri); if (avail == 0 && budget_left) avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left); if (__predict_false(m == NULL)) { DBG_COUNTER_INC(rx_mbuf_null); continue; } /* imm_pkt: -- cxgb */ if (mh == NULL) mh = mt = m; else { mt->m_nextpkt = m; mt = m; } } /* make sure that we can refill faster than drain */ for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) - __iflib_fl_refill_lt(ctx, fl, budget + 8); + __iflib_fl_refill_lt(ctx, fl, 2*budget + 8); lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); + mt = mf = NULL; while (mh != NULL) { m = mh; + if (mf == NULL) + mf = m; mh = mh->m_nextpkt; m->m_nextpkt = NULL; #ifndef __NO_STRICT_ALIGNMENT if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL) continue; #endif rx_bytes += m->m_pkthdr.len; rx_pkts++; #if defined(INET6) || defined(INET) - if (lro_enabled && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) + if (lro_enabled && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) { + if (mf == m) + mf = NULL; continue; + } #endif + if (mt != NULL) + mt->m_nextpkt = m; + mt = m; + } + if (mf != NULL) { + ifp->if_input(ifp, mf); DBG_COUNTER_INC(rx_if_input); - ifp->if_input(ifp, m); } - if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); - if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); + if (rx_pkts) { + if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); + if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); + } /* * Flush any outstanding LRO work */ #if defined(INET6) || defined(INET) tcp_lro_flush_all(&rxq->ifr_lc); #endif - if (avail) - return true; - return (iflib_rxd_avail(ctx, rxq, *cidxp, 1)); + return (avail || iflib_rxd_avail(ctx, rxq, *cidxp, 1)); err: - CTX_LOCK(ctx); - ctx->ifc_flags |= IFC_DO_RESET; - iflib_admin_intr_deferred(ctx); - CTX_UNLOCK(ctx); + iflib_admin_reset_deferred(ctx); return (false); } #define TXD_NOTIFY_COUNT(txq) (((txq)->ift_size / (txq)->ift_update_freq)-1) static inline qidx_t txq_max_db_deferred(iflib_txq_t txq, qidx_t in_use) { qidx_t notify_count = TXD_NOTIFY_COUNT(txq); qidx_t minthresh = txq->ift_size / 8; if (in_use > 4*minthresh) return (notify_count); if (in_use > 2*minthresh) return (notify_count >> 1); if (in_use > minthresh) return (notify_count >> 3); return (0); } static inline qidx_t txq_max_rs_deferred(iflib_txq_t txq) { qidx_t notify_count = TXD_NOTIFY_COUNT(txq); qidx_t minthresh = txq->ift_size / 8; if (txq->ift_in_use > 4*minthresh) return (notify_count); if (txq->ift_in_use > 2*minthresh) return (notify_count >> 1); if (txq->ift_in_use > minthresh) return (notify_count >> 2); return (2); } #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags) #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG) #define TXQ_MAX_DB_DEFERRED(txq, in_use) txq_max_db_deferred((txq), (in_use)) #define TXQ_MAX_RS_DEFERRED(txq) txq_max_rs_deferred(txq) #define TXQ_MAX_DB_CONSUMED(size) (size >> 4) /* forward compatibility for cxgb */ #define FIRST_QSET(ctx) 0 #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets) #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets) #define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx)) #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments)) /* XXX we should be setting this to something other than zero */ #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh) #define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max) static inline bool iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring, qidx_t in_use) { qidx_t dbval, max; bool rang; rang = false; max = TXQ_MAX_DB_DEFERRED(txq, in_use); if (ring || txq->ift_db_pending >= max) { dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx; ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval); txq->ift_db_pending = txq->ift_npending = 0; rang = true; } return (rang); } #ifdef PKT_DEBUG static void print_pkt(if_pkt_info_t pi) { printf("pi len: %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n", pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx); printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n", pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag); printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n", pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto); } #endif #define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO) #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO) static int iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) { - if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx; + if_ctx_t ctx = txq->ift_ctx; +#ifdef INET + if_shared_ctx_t sctx = ctx->ifc_sctx; +#endif + if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; struct ether_vlan_header *eh; struct mbuf *m, *n; + int err; + if (scctx->isc_txrx->ift_txd_errata && + (err = scctx->isc_txrx->ift_txd_errata(ctx->ifc_softc, mp))) + return (err); n = m = *mp; - if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) && - M_WRITABLE(m) == 0) { - if ((m = m_dup(m, M_NOWAIT)) == NULL) { - return (ENOMEM); - } else { - m_freem(*mp); - n = *mp = m; - } - } /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ if (__predict_false(m->m_len < sizeof(*eh))) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL)) return (ENOMEM); } eh = mtod(m, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { pi->ipi_etype = ntohs(eh->evl_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { pi->ipi_etype = ntohs(eh->evl_encap_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN; } + if (if_getmtu(txq->ift_ctx->ifc_ifp) >= pi->ipi_len) { + pi->ipi_csum_flags &= ~(CSUM_IP_TSO|CSUM_IP6_TSO); + } + switch (pi->ipi_etype) { #ifdef INET case ETHERTYPE_IP: { struct ip *ip = NULL; struct tcphdr *th = NULL; int minthlen; minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th)); if (__predict_false(m->m_len < minthlen)) { /* * if this code bloat is causing too much of a hit * move it to a separate function and mark it noinline */ if (m->m_len == pi->ipi_ehdrlen) { n = m->m_next; MPASS(n); if (n->m_len >= sizeof(*ip)) { ip = (struct ip *)n->m_data; if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); } } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } } else { ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } pi->ipi_ip_hlen = ip->ip_hl << 2; pi->ipi_ipproto = ip->ip_p; pi->ipi_flags |= IPI_TX_IPV4; - if (pi->ipi_csum_flags & CSUM_IP) + if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP)) ip->ip_sum = 0; - if (pi->ipi_ipproto == IPPROTO_TCP) { - if (__predict_false(th == NULL)) { - txq->ift_pullups++; - if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) - return (ENOMEM); - th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); - } - pi->ipi_tcp_hflags = th->th_flags; - pi->ipi_tcp_hlen = th->th_off << 2; - pi->ipi_tcp_seq = th->th_seq; - } if (IS_TSO4(pi)) { + if (pi->ipi_ipproto == IPPROTO_TCP) { + if (__predict_false(th == NULL)) { + txq->ift_pullups++; + if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) + return (ENOMEM); + th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); + } + pi->ipi_tcp_hflags = th->th_flags; + pi->ipi_tcp_hlen = th->th_off << 2; + pi->ipi_tcp_seq = th->th_seq; + } if (__predict_false(ip->ip_p != IPPROTO_TCP)) return (ENXIO); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; if (sctx->isc_flags & IFLIB_TSO_INIT_IP) { ip->ip_sum = 0; ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz); } } break; } #endif #ifdef INET6 case ETHERTYPE_IPV6: { struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen); struct tcphdr *th; pi->ipi_ip_hlen = sizeof(struct ip6_hdr); if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) { if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL)) return (ENOMEM); } th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen); /* XXX-BZ this will go badly in case of ext hdrs. */ pi->ipi_ipproto = ip6->ip6_nxt; pi->ipi_flags |= IPI_TX_IPV6; - if (pi->ipi_ipproto == IPPROTO_TCP) { - if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { - if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) - return (ENOMEM); - } - pi->ipi_tcp_hflags = th->th_flags; - pi->ipi_tcp_hlen = th->th_off << 2; - } if (IS_TSO6(pi)) { + if (pi->ipi_ipproto == IPPROTO_TCP) { + if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { + if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) + return (ENOMEM); + } + pi->ipi_tcp_hflags = th->th_flags; + pi->ipi_tcp_hlen = th->th_off << 2; + } if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP)) return (ENXIO); /* * The corresponding flag is set by the stack in the IPv4 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). * So, set it here because the rest of the flow requires it. */ pi->ipi_csum_flags |= CSUM_TCP_IPV6; th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; } break; } #endif default: pi->ipi_csum_flags &= ~CSUM_OFFLOAD; pi->ipi_ip_hlen = 0; break; } *mp = m; return (0); } static __noinline struct mbuf * collapse_pkthdr(struct mbuf *m0) { struct mbuf *m, *m_next, *tmp; m = m0; m_next = m->m_next; while (m_next != NULL && m_next->m_len == 0) { m = m_next; m->m_next = NULL; m_free(m); m_next = m_next->m_next; } m = m0; m->m_next = m_next; if ((m_next->m_flags & M_EXT) == 0) { m = m_defrag(m, M_NOWAIT); } else { tmp = m_next->m_next; memcpy(m_next, m, MPKTHSIZE); m = m_next; m->m_next = tmp; } return (m); } /* * If dodgy hardware rejects the scatter gather chain we've handed it * we'll need to remove the mbuf chain from ifsg_m[] before we can add the * m_defrag'd mbufs */ static __noinline struct mbuf * iflib_remove_mbuf(iflib_txq_t txq) { int ntxd, i, pidx; struct mbuf *m, *mh, **ifsd_m; pidx = txq->ift_pidx; ifsd_m = txq->ift_sds.ifsd_m; ntxd = txq->ift_size; mh = m = ifsd_m[pidx]; ifsd_m[pidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif i = 1; while (m) { ifsd_m[(pidx + i) & (ntxd -1)] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif m = m->m_next; i++; } return (mh); } static int iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map, struct mbuf **m0, bus_dma_segment_t *segs, int *nsegs, int max_segs, int flags) { if_ctx_t ctx; if_shared_ctx_t sctx; if_softc_ctx_t scctx; int i, next, pidx, err, ntxd, count; struct mbuf *m, *tmp, **ifsd_m; m = *m0; /* * Please don't ever do this */ if (__predict_false(m->m_len == 0)) *m0 = m = collapse_pkthdr(m); ctx = txq->ift_ctx; sctx = ctx->ifc_sctx; scctx = &ctx->ifc_softc_ctx; ifsd_m = txq->ift_sds.ifsd_m; ntxd = txq->ift_size; pidx = txq->ift_pidx; - if (map != NULL) { + MPASS(ifsd_m[pidx] == NULL); + if (force_busdma || map != NULL) { uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags; - err = bus_dmamap_load_mbuf_sg(tag, map, *m0, segs, nsegs, BUS_DMA_NOWAIT); if (err) return (err); ifsd_flags[pidx] |= TX_SW_DESC_MAPPED; count = 0; m = *m0; do { if (__predict_false(m->m_len <= 0)) { tmp = m; m = m->m_next; tmp->m_next = NULL; m_free(tmp); continue; } m = m->m_next; count++; } while (m != NULL); if (count > *nsegs) { ifsd_m[pidx] = *m0; ifsd_m[pidx]->m_flags |= M_TOOBIG; return (0); } m = *m0; count = 0; do { next = (pidx + count) & (ntxd-1); MPASS(ifsd_m[next] == NULL); ifsd_m[next] = m; count++; tmp = m; m = m->m_next; } while (m != NULL); } else { int buflen, sgsize, maxsegsz, max_sgsize; vm_offset_t vaddr; vm_paddr_t curaddr; count = i = 0; m = *m0; if (m->m_pkthdr.csum_flags & CSUM_TSO) maxsegsz = scctx->isc_tx_tso_segsize_max; else maxsegsz = sctx->isc_tx_maxsegsize; do { if (__predict_false(m->m_len <= 0)) { tmp = m; m = m->m_next; tmp->m_next = NULL; m_free(tmp); continue; } buflen = m->m_len; vaddr = (vm_offset_t)m->m_data; /* * see if we can't be smarter about physically * contiguous mappings */ next = (pidx + count) & (ntxd-1); MPASS(ifsd_m[next] == NULL); #if MEMORY_LOGGING txq->ift_enqueued++; #endif ifsd_m[next] = m; while (buflen > 0) { if (i >= max_segs) goto err; max_sgsize = MIN(buflen, maxsegsz); curaddr = pmap_kextract(vaddr); sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); sgsize = MIN(sgsize, max_sgsize); segs[i].ds_addr = curaddr; segs[i].ds_len = sgsize; vaddr += sgsize; buflen -= sgsize; i++; } count++; tmp = m; m = m->m_next; } while (m != NULL); *nsegs = i; } return (0); err: *m0 = iflib_remove_mbuf(txq); return (EFBIG); } static inline caddr_t calc_next_txd(iflib_txq_t txq, int cidx, uint8_t qid) { qidx_t size; int ntxd; caddr_t start, end, cur, next; ntxd = txq->ift_size; size = txq->ift_txd_size[qid]; start = txq->ift_ifdi[qid].idi_vaddr; if (__predict_false(size == 0)) return (start); cur = start + size*cidx; end = start + size*ntxd; next = CACHE_PTR_NEXT(cur); return (next < end ? next : start); } static int iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) { if_ctx_t ctx; if_shared_ctx_t sctx; if_softc_ctx_t scctx; bus_dma_segment_t *segs; struct mbuf *m_head; void *next_txd; bus_dmamap_t map; struct if_pkt_info pi; int remap = 0; int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd; bus_dma_tag_t desc_tag; segs = txq->ift_segs; ctx = txq->ift_ctx; sctx = ctx->ifc_sctx; scctx = &ctx->ifc_softc_ctx; segs = txq->ift_segs; ntxd = txq->ift_size; m_head = *m_headp; map = NULL; /* * If we're doing TSO the next descriptor to clean may be quite far ahead */ cidx = txq->ift_cidx; pidx = txq->ift_pidx; if (ctx->ifc_flags & IFC_PREFETCH) { next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1); if (!(ctx->ifc_flags & IFLIB_HAS_TXCQ)) { next_txd = calc_next_txd(txq, cidx, 0); prefetch(next_txd); } /* prefetch the next cache line of mbuf pointers and flags */ prefetch(&txq->ift_sds.ifsd_m[next]); if (txq->ift_sds.ifsd_map != NULL) { prefetch(&txq->ift_sds.ifsd_map[next]); next = (cidx + CACHE_LINE_SIZE) & (ntxd-1); prefetch(&txq->ift_sds.ifsd_flags[next]); } - } else if (txq->ift_sds.ifsd_map != NULL) + } + if (txq->ift_sds.ifsd_map != NULL) map = txq->ift_sds.ifsd_map[pidx]; if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { desc_tag = txq->ift_tso_desc_tag; max_segs = scctx->isc_tx_tso_segments_max; } else { desc_tag = txq->ift_desc_tag; max_segs = scctx->isc_tx_nsegments; } m_head = *m_headp; pkt_info_zero(&pi); - pi.ipi_len = m_head->m_pkthdr.len; pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST)); - pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; - pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0; pi.ipi_pidx = pidx; pi.ipi_qsidx = txq->ift_id; + pi.ipi_len = m_head->m_pkthdr.len; + pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; + pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0; /* deliberate bitwise OR to make one condition */ if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) { if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0)) return (err); m_head = *m_headp; + pi.ipi_hdr_data = mtod(m_head, caddr_t); } retry: err = iflib_busdma_load_mbuf_sg(txq, desc_tag, map, m_headp, segs, &nsegs, max_segs, BUS_DMA_NOWAIT); defrag: if (__predict_false(err)) { switch (err) { case EFBIG: /* try collapse once and defrag once */ if (remap == 0) m_head = m_collapse(*m_headp, M_NOWAIT, max_segs); if (remap == 1) m_head = m_defrag(*m_headp, M_NOWAIT); remap++; if (__predict_false(m_head == NULL)) goto defrag_failed; txq->ift_mbuf_defrag++; *m_headp = m_head; goto retry; break; case ENOMEM: txq->ift_no_tx_dma_setup++; break; default: txq->ift_no_tx_dma_setup++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; break; } txq->ift_map_failed++; DBG_COUNTER_INC(encap_load_mbuf_fail); return (err); } /* * XXX assumes a 1 to 1 relationship between segments and * descriptors - this does not hold true on all drivers, e.g. * cxgb */ if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { txq->ift_no_desc_avail++; if (map != NULL) bus_dmamap_unload(desc_tag, map); DBG_COUNTER_INC(encap_txq_avail_fail); if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) GROUPTASK_ENQUEUE(&txq->ift_task); return (ENOBUFS); } /* * On Intel cards we can greatly reduce the number of TX interrupts * we see by only setting report status on every Nth descriptor. * However, this also means that the driver will need to keep track * of the descriptors that RS was set on to check them for the DD bit. */ txq->ift_rs_pending += nsegs + 1; if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) || iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs - 1) <= MAX_TX_DESC(ctx)) { pi.ipi_flags |= IPI_TX_INTR; txq->ift_rs_pending = 0; } pi.ipi_segs = segs; pi.ipi_nsegs = nsegs; MPASS(pidx >= 0 && pidx < txq->ift_size); #ifdef PKT_DEBUG print_pkt(&pi); #endif if (map != NULL) bus_dmamap_sync(desc_tag, map, BUS_DMASYNC_PREWRITE); if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) { if (map != NULL) bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); DBG_COUNTER_INC(tx_encap); MPASS(pi.ipi_new_pidx < txq->ift_size); ndesc = pi.ipi_new_pidx - pi.ipi_pidx; if (pi.ipi_new_pidx < pi.ipi_pidx) { ndesc += txq->ift_size; txq->ift_gen = 1; } /* * drivers can need as many as * two sentinels */ MPASS(ndesc <= pi.ipi_nsegs + 2); MPASS(pi.ipi_new_pidx != pidx); MPASS(ndesc > 0); txq->ift_in_use += ndesc; /* * We update the last software descriptor again here because there may * be a sentinel and/or there may be more mbufs than segments */ txq->ift_pidx = pi.ipi_new_pidx; txq->ift_npending += pi.ipi_ndescs; } else if (__predict_false(err == EFBIG && remap < 2)) { *m_headp = m_head = iflib_remove_mbuf(txq); remap = 1; txq->ift_txd_encap_efbig++; goto defrag; } else DBG_COUNTER_INC(encap_txd_encap_fail); return (err); defrag_failed: txq->ift_mbuf_defrag_failed++; txq->ift_map_failed++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; return (ENOMEM); } static void iflib_tx_desc_free(iflib_txq_t txq, int n) { int hasmap; uint32_t qsize, cidx, mask, gen; struct mbuf *m, **ifsd_m; uint8_t *ifsd_flags; bus_dmamap_t *ifsd_map; bool do_prefetch; cidx = txq->ift_cidx; gen = txq->ift_gen; qsize = txq->ift_size; mask = qsize-1; hasmap = txq->ift_sds.ifsd_map != NULL; ifsd_flags = txq->ift_sds.ifsd_flags; ifsd_m = txq->ift_sds.ifsd_m; ifsd_map = txq->ift_sds.ifsd_map; do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH); while (n--) { if (do_prefetch) { prefetch(ifsd_m[(cidx + 3) & mask]); prefetch(ifsd_m[(cidx + 4) & mask]); } if (ifsd_m[cidx] != NULL) { prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]); prefetch(&ifsd_flags[(cidx + CACHE_PTR_INCREMENT) & mask]); if (hasmap && (ifsd_flags[cidx] & TX_SW_DESC_MAPPED)) { /* * does it matter if it's not the TSO tag? If so we'll * have to add the type to flags */ bus_dmamap_unload(txq->ift_desc_tag, ifsd_map[cidx]); ifsd_flags[cidx] &= ~TX_SW_DESC_MAPPED; } if ((m = ifsd_m[cidx]) != NULL) { /* XXX we don't support any drivers that batch packets yet */ MPASS(m->m_nextpkt == NULL); /* if the number of clusters exceeds the number of segments * there won't be space on the ring to save a pointer to each * cluster so we simply free the list here */ if (m->m_flags & M_TOOBIG) { m_freem(m); } else { m_free(m); } ifsd_m[cidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif DBG_COUNTER_INC(tx_frees); } } if (__predict_false(++cidx == qsize)) { cidx = 0; gen = 0; } } + txq_validate(txq); txq->ift_cidx = cidx; txq->ift_gen = gen; } static __inline int iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh) { int reclaim; if_ctx_t ctx = txq->ift_ctx; KASSERT(thresh >= 0, ("invalid threshold to reclaim")); MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size); /* * Need a rate-limiting check so that this isn't called every time */ iflib_tx_credits_update(ctx, txq); reclaim = DESC_RECLAIMABLE(txq); if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) { #ifdef INVARIANTS if (iflib_verbose_debug) { printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__, txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments, reclaim, thresh); } #endif return (0); } iflib_tx_desc_free(txq, reclaim); txq->ift_cleaned += reclaim; txq->ift_in_use -= reclaim; return (reclaim); } static struct mbuf ** _ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining) { int next, size; struct mbuf **items; size = r->size; next = (cidx + CACHE_PTR_INCREMENT) & (size-1); items = __DEVOLATILE(struct mbuf **, &r->items[0]); prefetch(items[(cidx + offset) & (size-1)]); if (remaining > 1) { - prefetch(&items[next]); - prefetch(items[(cidx + offset + 1) & (size-1)]); - prefetch(items[(cidx + offset + 2) & (size-1)]); - prefetch(items[(cidx + offset + 3) & (size-1)]); + prefetch2(&items[next]); + prefetch2(items[(cidx + offset + 1) & (size-1)]); + prefetch2(items[(cidx + offset + 2) & (size-1)]); + prefetch2(items[(cidx + offset + 3) & (size-1)]); } return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)])); } static void iflib_txq_check_drain(iflib_txq_t txq, int budget) { ifmp_ring_check_drainage(txq->ift_br, budget); } static uint32_t iflib_txq_can_drain(struct ifmp_ring *r) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; return ((TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) || ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)); } static uint32_t iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; struct ifnet *ifp = ctx->ifc_ifp; struct mbuf **mp, *m; int i, count, consumed, pkt_sent, bytes_sent, mcast_sent, avail; int reclaimed, err, in_use_prev, desc_used; bool do_prefetch, ring, rang; if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(txq_drain_notready); return (0); } reclaimed = iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); rang = iflib_txd_db_check(ctx, txq, reclaimed, txq->ift_in_use); avail = IDXDIFF(pidx, cidx, r->size); if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) { DBG_COUNTER_INC(txq_drain_flushing); for (i = 0; i < avail; i++) { m_free(r->items[(cidx + i) & (r->size-1)]); r->items[(cidx + i) & (r->size-1)] = NULL; } return (avail); } if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { txq->ift_qstatus = IFLIB_QUEUE_IDLE; CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); DBG_COUNTER_INC(txq_drain_oactive); return (0); } if (reclaimed) txq->ift_qstatus = IFLIB_QUEUE_IDLE; consumed = mcast_sent = bytes_sent = pkt_sent = 0; count = MIN(avail, TX_BATCH_SIZE); #ifdef INVARIANTS if (iflib_verbose_debug) printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__, avail, ctx->ifc_flags, TXQ_AVAIL(txq)); #endif do_prefetch = (ctx->ifc_flags & IFC_PREFETCH); avail = TXQ_AVAIL(txq); for (desc_used = i = 0; i < count && avail > MAX_TX_DESC(ctx) + 2; i++) { int pidx_prev, rem = do_prefetch ? count - i : 0; mp = _ring_peek_one(r, cidx, i, rem); MPASS(mp != NULL && *mp != NULL); if (__predict_false(*mp == (struct mbuf *)txq)) { consumed++; reclaimed++; continue; } in_use_prev = txq->ift_in_use; pidx_prev = txq->ift_pidx; err = iflib_encap(txq, mp); if (__predict_false(err)) { DBG_COUNTER_INC(txq_drain_encapfail); /* no room - bail out */ if (err == ENOBUFS) break; consumed++; DBG_COUNTER_INC(txq_drain_encapfail); /* we can't send this packet - skip it */ continue; } consumed++; pkt_sent++; m = *mp; DBG_COUNTER_INC(tx_sent); bytes_sent += m->m_pkthdr.len; mcast_sent += !!(m->m_flags & M_MCAST); avail = TXQ_AVAIL(txq); txq->ift_db_pending += (txq->ift_in_use - in_use_prev); desc_used += (txq->ift_in_use - in_use_prev); ETHER_BPF_MTAP(ifp, m); if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING))) break; rang = iflib_txd_db_check(ctx, txq, false, in_use_prev); } /* deliberate use of bitwise or to avoid gratuitous short-circuit */ ring = rang ? false : (iflib_min_tx_latency | err) || (TXQ_AVAIL(txq) < MAX_TX_DESC(ctx)); iflib_txd_db_check(ctx, txq, ring, txq->ift_in_use); if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent); if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent); if (mcast_sent) if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent); #ifdef INVARIANTS if (iflib_verbose_debug) printf("consumed=%d\n", consumed); #endif return (consumed); } static uint32_t iflib_txq_drain_always(struct ifmp_ring *r) { return (1); } static uint32_t iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { int i, avail; struct mbuf **mp; iflib_txq_t txq; txq = r->cookie; txq->ift_qstatus = IFLIB_QUEUE_IDLE; CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); avail = IDXDIFF(pidx, cidx, r->size); for (i = 0; i < avail; i++) { mp = _ring_peek_one(r, cidx, i, avail - i); if (__predict_false(*mp == (struct mbuf *)txq)) continue; m_freem(*mp); } MPASS(ifmp_ring_is_stalled(r) == 0); return (avail); } static void iflib_ifmp_purge(iflib_txq_t txq) { struct ifmp_ring *r; r = txq->ift_br; r->drain = iflib_txq_drain_free; r->can_drain = iflib_txq_drain_always; ifmp_ring_check_drainage(r, r->size); r->drain = iflib_txq_drain; r->can_drain = iflib_txq_can_drain; } static void _task_fn_tx(void *context) { iflib_txq_t txq = context; if_ctx_t ctx = txq->ift_ctx; struct ifnet *ifp = ctx->ifc_ifp; int rc; #ifdef IFLIB_DIAGNOSTICS txq->ift_cpu_exec_count[curcpu]++; #endif if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; - if ((ifp->if_capenable & IFCAP_NETMAP)) { + if (if_getcapenable(ifp) & IFCAP_NETMAP) { if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)) netmap_tx_irq(ifp, txq->ift_id); IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); return; } if (txq->ift_db_pending) ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE); - else - ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); + ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else { rc = IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); } } static void _task_fn_rx(void *context) { iflib_rxq_t rxq = context; if_ctx_t ctx = rxq->ifr_ctx; bool more; int rc; + uint16_t budget; #ifdef IFLIB_DIAGNOSTICS rxq->ifr_cpu_exec_count[curcpu]++; #endif DBG_COUNTER_INC(task_fn_rxs); if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; - if ((more = iflib_rxeof(rxq, 16 /* XXX */)) == false) { + more = true; +#ifdef DEV_NETMAP + if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) { + u_int work = 0; + if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work)) { + more = false; + } + } +#endif + budget = ctx->ifc_sysctl_rx_budget; + if (budget == 0) + budget = 16; /* XXX */ + if (more == false || (more = iflib_rxeof(rxq, budget)) == false) { if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else { DBG_COUNTER_INC(rx_intr_enables); rc = IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); } } if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; if (more) GROUPTASK_ENQUEUE(&rxq->ifr_task); } +/* CONFIG context only */ static void _task_fn_admin(void *context) { if_ctx_t ctx = context; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; iflib_txq_t txq; - int i; + int i, running; - if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) { - if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { - return; - } - } - CTX_LOCK(ctx); + running = !!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING); + for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); } - IFDI_UPDATE_ADMIN_STATUS(ctx); - for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) - callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); - IFDI_LINK_INTR_ENABLE(ctx); + if (running) { + for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) + callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer, + txq, txq->ift_timer.c_cpu); + IFDI_LINK_INTR_ENABLE(ctx); + } if (ctx->ifc_flags & IFC_DO_RESET) { - ctx->ifc_flags &= ~IFC_DO_RESET; iflib_if_init_locked(ctx); + ctx->ifc_flags &= ~IFC_DO_RESET; } + IFDI_UPDATE_ADMIN_STATUS(ctx); CTX_UNLOCK(ctx); - if (LINK_ACTIVE(ctx) == 0) + if (LINK_ACTIVE(ctx) == 0 || !running) return; for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); } +/* CONFIG context only */ static void _task_fn_iov(void *context) { if_ctx_t ctx = context; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; CTX_LOCK(ctx); IFDI_VFLR_HANDLE(ctx); CTX_UNLOCK(ctx); } static int iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { int err; if_int_delay_info_t info; if_ctx_t ctx; info = (if_int_delay_info_t)arg1; ctx = info->iidi_ctx; info->iidi_req = req; info->iidi_oidp = oidp; CTX_LOCK(ctx); err = IFDI_SYSCTL_INT_DELAY(ctx, info); CTX_UNLOCK(ctx); return (err); } /********************************************************************* * * IFNET FUNCTIONS * **********************************************************************/ static void iflib_if_init_locked(if_ctx_t ctx) { iflib_stop(ctx); iflib_init_locked(ctx); } static void iflib_if_init(void *arg) { if_ctx_t ctx = arg; CTX_LOCK(ctx); iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } static int iflib_if_transmit(if_t ifp, struct mbuf *m) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq; int err, qidx; if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(tx_frees); m_freem(m); return (ENOBUFS); } MPASS(m->m_nextpkt == NULL); qidx = 0; if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m)) qidx = QIDX(ctx, m); /* * XXX calculate buf_ring based on flowid (divvy up bits?) */ txq = &ctx->ifc_txqs[qidx]; #ifdef DRIVER_BACKPRESSURE if (txq->ift_closed) { while (m != NULL) { next = m->m_nextpkt; m->m_nextpkt = NULL; m_freem(m); m = next; } return (ENOBUFS); } #endif #ifdef notyet qidx = count = 0; mp = marr; next = m; do { count++; next = next->m_nextpkt; } while (next != NULL); if (count > nitems(marr)) if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) { /* XXX check nextpkt */ m_freem(m); /* XXX simplify for now */ DBG_COUNTER_INC(tx_frees); return (ENOBUFS); } for (next = m, i = 0; next != NULL; i++) { mp[i] = next; next = next->m_nextpkt; mp[i]->m_nextpkt = NULL; } #endif DBG_COUNTER_INC(tx_seen); err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE); + GROUPTASK_ENQUEUE(&txq->ift_task); if (err) { - GROUPTASK_ENQUEUE(&txq->ift_task); /* support forthcoming later */ #ifdef DRIVER_BACKPRESSURE txq->ift_closed = TRUE; #endif ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); m_freem(m); - } else if (TXQ_AVAIL(txq) < (txq->ift_size >> 1)) { - GROUPTASK_ENQUEUE(&txq->ift_task); } return (err); } +/* CONFIG context only */ static void iflib_if_qflush(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq = ctx->ifc_txqs; int i; CTX_LOCK(ctx); ctx->ifc_flags |= IFC_QFLUSH; CTX_UNLOCK(ctx); for (i = 0; i < NTXQSETS(ctx); i++, txq++) while (!(ifmp_ring_is_idle(txq->ift_br) || ifmp_ring_is_stalled(txq->ift_br))) iflib_txq_check_drain(txq, 0); CTX_LOCK(ctx); ctx->ifc_flags &= ~IFC_QFLUSH; CTX_UNLOCK(ctx); if_qflush(ifp); } #define IFCAP_FLAGS (IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \ IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \ IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO) static int iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) { if_ctx_t ctx = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = FALSE; int err = 0, reinit = 0, bits; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { if_setflagbits(ifp, IFF_UP,0); if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING)) reinit = 1; #ifdef INET if (!(if_getflags(ifp) & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else err = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: CTX_LOCK(ctx); if (ifr->ifr_mtu == if_getmtu(ifp)) { CTX_UNLOCK(ctx); break; } bits = if_getdrvflags(ifp); /* stop the driver and free any clusters before proceeding */ iflib_stop(ctx); if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) { if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size) ctx->ifc_flags |= IFC_MULTISEG; else ctx->ifc_flags &= ~IFC_MULTISEG; err = if_setmtu(ifp, ifr->ifr_mtu); } iflib_init_locked(ctx); if_setdrvflags(ifp, bits); CTX_UNLOCK(ctx); break; case SIOCSIFFLAGS: - CTX_LOCK(ctx); - if (if_getflags(ifp) & IFF_UP) { - if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & - (IFF_PROMISC | IFF_ALLMULTI)) { - err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); - } - } else - reinit = 1; - } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - iflib_stop(ctx); - } - ctx->ifc_if_flags = if_getflags(ifp); - CTX_UNLOCK(ctx); + err = async_if_ioctl(ctx, command, data); break; case SIOCADDMULTI: case SIOCDELMULTI: if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - CTX_LOCK(ctx); - IFDI_INTR_DISABLE(ctx); - IFDI_MULTI_SET(ctx); - IFDI_INTR_ENABLE(ctx); - CTX_UNLOCK(ctx); + err = async_if_ioctl(ctx, command, data); } break; case SIOCSIFMEDIA: CTX_LOCK(ctx); IFDI_MEDIA_SET(ctx); CTX_UNLOCK(ctx); /* falls thru */ case SIOCGIFMEDIA: err = ifmedia_ioctl(ifp, ifr, &ctx->ifc_media, command); break; case SIOCGI2C: { struct ifi2creq i2c; err = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); if (err != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { err = EINVAL; break; } if (i2c.len > sizeof(i2c.data)) { err = EINVAL; break; } if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0) err = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); break; } case SIOCSIFCAP: { int mask, setmask; mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); setmask = 0; #ifdef TCP_OFFLOAD setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6); #endif setmask |= (mask & IFCAP_FLAGS); if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) setmask |= (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6); if ((mask & IFCAP_WOL) && (if_getcapabilities(ifp) & IFCAP_WOL) != 0) setmask |= (mask & (IFCAP_WOL_MCAST|IFCAP_WOL_MAGIC)); if_vlancap(ifp); /* * want to ensure that traffic has stopped before we change any of the flags */ if (setmask) { CTX_LOCK(ctx); bits = if_getdrvflags(ifp); if (bits & IFF_DRV_RUNNING) iflib_stop(ctx); if_togglecapenable(ifp, setmask); if (bits & IFF_DRV_RUNNING) iflib_init_locked(ctx); if_setdrvflags(ifp, bits); CTX_UNLOCK(ctx); } break; } case SIOCGPRIVATE_0: case SIOCSDRVSPEC: case SIOCGDRVSPEC: CTX_LOCK(ctx); err = IFDI_PRIV_IOCTL(ctx, command, data); CTX_UNLOCK(ctx); break; default: err = ether_ioctl(ifp, command, data); break; } if (reinit) iflib_if_init(ctx); return (err); } static uint64_t iflib_if_get_counter(if_t ifp, ift_counter cnt) { if_ctx_t ctx = if_getsoftc(ifp); return (IFDI_GET_COUNTER(ctx, cnt)); } /********************************************************************* * * OTHER FUNCTIONS EXPORTED TO THE STACK * **********************************************************************/ +/* CONFIG context only */ static void iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; CTX_LOCK(ctx); IFDI_VLAN_REGISTER(ctx, vtag); /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } +/* CONFIG context only */ static void iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; CTX_LOCK(ctx); IFDI_VLAN_UNREGISTER(ctx, vtag); /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } +/* CONFIG context only */ static void iflib_led_func(void *arg, int onoff) { if_ctx_t ctx = arg; CTX_LOCK(ctx); IFDI_LED_FUNC(ctx, onoff); CTX_UNLOCK(ctx); } /********************************************************************* * * BUS FUNCTION DEFINITIONS * **********************************************************************/ int iflib_device_probe(device_t dev) { pci_vendor_info_t *ent; uint16_t pci_vendor_id, pci_device_id; uint16_t pci_subvendor_id, pci_subdevice_id; uint16_t pci_rev_id; if_shared_ctx_t sctx; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_vendor_id = pci_get_vendor(dev); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); pci_rev_id = pci_get_revid(dev); if (sctx->isc_parse_devinfo != NULL) sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id); ent = sctx->isc_vendor_info; while (ent->pvi_vendor_id != 0) { if (pci_vendor_id != ent->pvi_vendor_id) { ent++; continue; } if ((pci_device_id == ent->pvi_device_id) && ((pci_subvendor_id == ent->pvi_subvendor_id) || (ent->pvi_subvendor_id == 0)) && ((pci_subdevice_id == ent->pvi_subdevice_id) || (ent->pvi_subdevice_id == 0)) && ((pci_rev_id == ent->pvi_rev_id) || (ent->pvi_rev_id == 0))) { device_set_desc_copy(dev, ent->pvi_name); /* this needs to be changed to zero if the bus probing code * ever stops re-probing on best match because the sctx * may have its values over written by register calls * in subsequent probes */ return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } int iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp) { int err, rid, msix, msix_bar; if_ctx_t ctx; if_t ifp; if_softc_ctx_t scctx; int i; uint16_t main_txq; uint16_t main_rxq; ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); if (sc == NULL) { sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); device_set_softc(dev, ctx); ctx->ifc_flags |= IFC_SC_ALLOCATED; } ctx->ifc_sctx = sctx; ctx->ifc_dev = dev; ctx->ifc_softc = sc; if ((err = iflib_register(ctx)) != 0) { device_printf(dev, "iflib_register failed %d\n", err); return (err); } iflib_add_device_sysctl_pre(ctx); scctx = &ctx->ifc_softc_ctx; ifp = ctx->ifc_ifp; /* * XXX sanity check that ntxd & nrxd are a power of 2 */ if (ctx->ifc_sysctl_ntxqs != 0) scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs; if (ctx->ifc_sysctl_nrxqs != 0) scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs; for (i = 0; i < sctx->isc_ntxqs; i++) { if (ctx->ifc_sysctl_ntxds[i] != 0) scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i]; else scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (ctx->ifc_sysctl_nrxds[i] != 0) scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i]; else scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) { device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i]; } if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) { device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i]; } } for (i = 0; i < sctx->isc_ntxqs; i++) { if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) { device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i]; } if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) { device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; } } - - if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { + CTX_LOCK(ctx); + err = IFDI_ATTACH_PRE(ctx); + CTX_UNLOCK(ctx); + if (err) { device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); return (err); } _iflib_pre_assert(scctx); ctx->ifc_txrx = *scctx->isc_txrx; #ifdef INVARIANTS MPASS(scctx->isc_capenable); if (scctx->isc_capenable & IFCAP_TXCSUM) MPASS(scctx->isc_tx_csum_flags); #endif if_setcapabilities(ifp, scctx->isc_capenable | IFCAP_HWSTATS); if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS); if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; #ifdef ACPI_DMAR if (dmar_get_dma_tag(device_get_parent(dev), dev) != NULL) ctx->ifc_flags |= IFC_DMAR; #elif !(defined(__i386__) || defined(__amd64__)) /* set unconditionally for !x86 */ ctx->ifc_flags |= IFC_DMAR; #endif + if (force_busdma) + ctx->ifc_flags |= IFC_DMAR; msix_bar = scctx->isc_msix_bar; main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0; main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; /* XXX change for per-queue sizes */ device_printf(dev, "using %d tx descriptors and %d rx descriptors\n", scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); for (i = 0; i < sctx->isc_nrxqs; i++) { if (!powerof2(scctx->isc_nrxd[i])) { /* round down instead? */ device_printf(dev, "# rx descriptors must be a power of 2\n"); + err = EINVAL; goto fail; } } for (i = 0; i < sctx->isc_ntxqs; i++) { if (!powerof2(scctx->isc_ntxd[i])) { device_printf(dev, "# tx descriptors must be a power of 2"); err = EINVAL; goto fail; } } if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_tso_segments_max = max(1, scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); /* * Protect the stack against modern hardware */ if (scctx->isc_tx_tso_size_max > FREEBSD_TSO_SIZE_MAX) scctx->isc_tx_tso_size_max = FREEBSD_TSO_SIZE_MAX; /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ ifp->if_hw_tsomaxsegcount = scctx->isc_tx_tso_segments_max; ifp->if_hw_tsomax = scctx->isc_tx_tso_size_max; ifp->if_hw_tsomaxsegsize = scctx->isc_tx_tso_segsize_max; if (scctx->isc_rss_table_size == 0) scctx->isc_rss_table_size = 64; scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); /* XXX format name */ - taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin"); + taskqgroup_attach(qgroup_if_config, &ctx->ifc_admin_task, ctx, -1, "admin"); /* ** Now setup MSI or MSI/X, should ** return us the number of supported ** vectors. (Will be 1 for MSI) */ if (sctx->isc_flags & IFLIB_SKIP_MSIX) { msix = scctx->isc_vectors; } else if (scctx->isc_msix_bar != 0) /* * The simple fact that isc_msix_bar is not 0 does not mean we * we have a good value there that is known to work. */ msix = iflib_msix_init(ctx); else { scctx->isc_vectors = 1; scctx->isc_ntxqsets = 1; scctx->isc_nrxqsets = 1; scctx->isc_intr = IFLIB_INTR_LEGACY; msix = 0; } /* Get memory for the station queues */ if ((err = iflib_queues_alloc(ctx))) { device_printf(dev, "Unable to allocate queue memory\n"); goto fail; } if ((err = iflib_qset_structures_setup(ctx))) { device_printf(dev, "qset structure setup failed %d\n", err); goto fail_queues; } /* * Group taskqueues aren't properly set up until SMP is started, * so we disable interrupts until we can handle them post * SI_SUB_SMP. * * XXX: disabling interrupts doesn't actually work, at least for * the non-MSI case. When they occur before SI_SUB_SMP completes, * we do null handling and depend on this not causing too large an * interrupt storm. */ IFDI_INTR_DISABLE(ctx); if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) { device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err); goto fail_intr_free; } if (msix <= 1) { rid = 0; if (scctx->isc_intr == IFLIB_INTR_MSI) { MPASS(msix == 1); rid = 1; } if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) { device_printf(dev, "iflib_legacy_setup failed %d\n", err); goto fail_intr_free; } } ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac); - if ((err = IFDI_ATTACH_POST(ctx)) != 0) { + CTX_LOCK(ctx); + err = IFDI_ATTACH_POST(ctx); + CTX_UNLOCK(ctx); + if (err) { device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); goto fail_detach; } if ((err = iflib_netmap_attach(ctx))) { device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err); goto fail_detach; } *ctxp = ctx; if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); + iflib_ctx_insert(ctx); ctx->ifc_flags |= IFC_INIT_DONE; return (0); fail_detach: ether_ifdetach(ctx->ifc_ifp); fail_intr_free: if (scctx->isc_intr == IFLIB_INTR_MSIX || scctx->isc_intr == IFLIB_INTR_MSI) pci_release_msi(ctx->ifc_dev); fail_queues: /* XXX free queues */ fail: + CTX_LOCK(ctx); IFDI_DETACH(ctx); + CTX_UNLOCK(ctx); return (err); } int iflib_device_attach(device_t dev) { if_ctx_t ctx; if_shared_ctx_t sctx; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_enable_busmaster(dev); return (iflib_device_register(dev, NULL, sctx, &ctx)); } int iflib_device_deregister(if_ctx_t ctx) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq; iflib_rxq_t rxq; device_t dev = ctx->ifc_dev; int i, j; struct taskqgroup *tqg; iflib_fl_t fl; /* Make sure VLANS are not using driver */ if (if_vlantrunkinuse(ifp)) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } CTX_LOCK(ctx); ctx->ifc_in_detach = 1; iflib_stop(ctx); CTX_UNLOCK(ctx); /* Unregister VLAN events */ if (ctx->ifc_vlan_attach_event != NULL) EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event); if (ctx->ifc_vlan_detach_event != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event); iflib_netmap_detach(ifp); ether_ifdetach(ifp); - /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ - CTX_LOCK_DESTROY(ctx); if (ctx->ifc_led_dev != NULL) led_destroy(ctx->ifc_led_dev); /* XXX drain any dependent tasks */ - tqg = qgroup_if_io_tqg; + tqg = qgroup_if_io; for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { callout_drain(&txq->ift_timer); if (txq->ift_task.gt_uniq != NULL) taskqgroup_detach(tqg, &txq->ift_task); } for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { if (rxq->ifr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &rxq->ifr_task); for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) free(fl->ifl_rx_bitmap, M_IFLIB); } - tqg = qgroup_if_config_tqg; + tqg = qgroup_if_config; if (ctx->ifc_admin_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_admin_task); if (ctx->ifc_vflr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_vflr_task); + CTX_LOCK(ctx); IFDI_DETACH(ctx); + CTX_UNLOCK(ctx); + CTX_LOCK_DESTROY(ctx); device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { pci_release_msi(dev); } if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) { iflib_irq_free(ctx, &ctx->ifc_legacy_irq); } if (ctx->ifc_msix_mem != NULL) { bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY, ctx->ifc_softc_ctx.isc_msix_bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; } bus_generic_detach(dev); if_free(ifp); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); + iflib_ctx_remove(ctx); free(ctx, M_IFLIB); return (0); } int iflib_device_detach(device_t dev) { if_ctx_t ctx = device_get_softc(dev); return (iflib_device_deregister(ctx)); } int iflib_device_suspend(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SUSPEND(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_shutdown(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SHUTDOWN(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_resume(device_t dev) { if_ctx_t ctx = device_get_softc(dev); iflib_txq_t txq = ctx->ifc_txqs; CTX_LOCK(ctx); IFDI_RESUME(ctx); iflib_init_locked(ctx); CTX_UNLOCK(ctx); for (int i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); return (bus_generic_resume(dev)); } int iflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_INIT(ctx, num_vfs, params); CTX_UNLOCK(ctx); return (error); } void iflib_device_iov_uninit(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_IOV_UNINIT(ctx); CTX_UNLOCK(ctx); } int iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_VF_ADD(ctx, vfnum, params); CTX_UNLOCK(ctx); return (error); } /********************************************************************* * * MODULE FUNCTION DEFINITIONS * **********************************************************************/ -/* - * - Start a fast taskqueue thread for each core - * - Start a taskqueue for control operations - */ static int iflib_module_init(void) { + + iflib_timer_int = hz / 2; + TUNABLE_INT_FETCH("net.iflib.timer_int", &iflib_timer_int); + LIST_INIT(&ctx_list); + mtx_init(&ctx_list_lock, "ctx list", NULL, MTX_DEF); return (0); } static int iflib_module_event_handler(module_t mod, int what, void *arg) { int err; switch (what) { case MOD_LOAD: if ((err = iflib_module_init()) != 0) return (err); break; case MOD_UNLOAD: return (EBUSY); default: return (EOPNOTSUPP); } return (0); } /********************************************************************* * * PUBLIC FUNCTION DEFINITIONS * ordered as in iflib.h * **********************************************************************/ static void _iflib_assert(if_shared_ctx_t sctx) { MPASS(sctx->isc_tx_maxsize); MPASS(sctx->isc_tx_maxsegsize); MPASS(sctx->isc_rx_maxsize); MPASS(sctx->isc_rx_nsegments); MPASS(sctx->isc_rx_maxsegsize); MPASS(sctx->isc_nrxd_min[0]); MPASS(sctx->isc_nrxd_max[0]); MPASS(sctx->isc_nrxd_default[0]); MPASS(sctx->isc_ntxd_min[0]); MPASS(sctx->isc_ntxd_max[0]); MPASS(sctx->isc_ntxd_default[0]); } static void _iflib_pre_assert(if_softc_ctx_t scctx) { MPASS(scctx->isc_txrx->ift_txd_encap); MPASS(scctx->isc_txrx->ift_txd_flush); MPASS(scctx->isc_txrx->ift_txd_credits_update); MPASS(scctx->isc_txrx->ift_rxd_available); MPASS(scctx->isc_txrx->ift_rxd_pkt_get); MPASS(scctx->isc_txrx->ift_rxd_refill); MPASS(scctx->isc_txrx->ift_rxd_flush); } static int iflib_register(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; driver_t *driver = sctx->isc_driver; device_t dev = ctx->ifc_dev; if_t ifp; _iflib_assert(sctx); CTX_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev)); ifp = ctx->ifc_ifp = if_gethandle(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (ENOMEM); } /* * Initialize our context's device specific methods */ kobj_init((kobj_t) ctx, (kobj_class_t) driver); kobj_class_compile((kobj_class_t) driver); driver->refs++; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setsoftc(ifp, ctx); if_setdev(ifp, dev); if_setinitfn(ifp, iflib_if_init); if_setioctlfn(ifp, iflib_if_ioctl); if_settransmitfn(ifp, iflib_if_transmit); if_setqflushfn(ifp, iflib_if_qflush); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); ctx->ifc_vlan_attach_event = EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx, EVENTHANDLER_PRI_FIRST); ctx->ifc_vlan_detach_event = EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx, EVENTHANDLER_PRI_FIRST); ifmedia_init(&ctx->ifc_media, IFM_IMASK, iflib_media_change, iflib_media_status); return (0); } static int iflib_queues_alloc(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; int nrxqsets = scctx->isc_nrxqsets; int ntxqsets = scctx->isc_ntxqsets; iflib_txq_t txq; iflib_rxq_t rxq; iflib_fl_t fl = NULL; int i, j, cpu, err, txconf, rxconf; iflib_dma_info_t ifdip; uint32_t *rxqsizes = scctx->isc_rxqsizes; uint32_t *txqsizes = scctx->isc_txqsizes; uint8_t nrxqs = sctx->isc_nrxqs; uint8_t ntxqs = sctx->isc_ntxqs; int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1; caddr_t *vaddrs; uint64_t *paddrs; struct ifmp_ring **brscp; KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1")); KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1")); brscp = NULL; txq = NULL; rxq = NULL; /* Allocate the TX ring struct memory */ if (!(txq = (iflib_txq_t) malloc(sizeof(struct iflib_txq) * ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); err = ENOMEM; goto fail; } /* Now allocate the RX */ if (!(rxq = (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) * nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); err = ENOMEM; goto rx_fail; } ctx->ifc_txqs = txq; ctx->ifc_rxqs = rxq; /* * XXX handle allocation failure */ for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) { /* Set up some basics */ if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) { device_printf(dev, "failed to allocate iflib_dma_info\n"); err = ENOMEM; goto err_tx_desc; } txq->ift_ifdi = ifdip; for (j = 0; j < ntxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate Descriptor memory\n"); err = ENOMEM; goto err_tx_desc; } txq->ift_txd_size[j] = scctx->isc_txd_size[j]; bzero((void *)ifdip->idi_vaddr, txqsizes[j]); } txq->ift_ctx = ctx; txq->ift_id = i; if (sctx->isc_flags & IFLIB_HAS_TXCQ) { txq->ift_br_offset = 1; } else { txq->ift_br_offset = 0; } /* XXX fix this */ txq->ift_timer.c_cpu = cpu; if (iflib_txsd_alloc(txq)) { device_printf(dev, "Critical Failure setting up TX buffers\n"); err = ENOMEM; goto err_tx_desc; } /* Initialize the TX lock */ snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:tx(%d):callout", device_get_nameunit(dev), txq->ift_id); mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF); callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0); snprintf(txq->ift_db_mtx_name, MTX_NAME_LEN, "%s:tx(%d):db", device_get_nameunit(dev), txq->ift_id); err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain, iflib_txq_can_drain, M_IFLIB, M_WAITOK); if (err) { /* XXX free any allocated rings */ device_printf(dev, "Unable to allocate buf_ring\n"); goto err_tx_desc; } } for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) { /* Set up some basics */ if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) { device_printf(dev, "failed to allocate iflib_dma_info\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_ifdi = ifdip; /* XXX this needs to be changed if #rx queues != #tx queues */ rxq->ifr_ntxqirq = 1; rxq->ifr_txqid[0] = i; for (j = 0; j < nrxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate Descriptor memory\n"); err = ENOMEM; goto err_tx_desc; } bzero((void *)ifdip->idi_vaddr, rxqsizes[j]); } rxq->ifr_ctx = ctx; rxq->ifr_id = i; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { rxq->ifr_fl_offset = 1; } else { rxq->ifr_fl_offset = 0; } rxq->ifr_nfl = nfree_lists; if (!(fl = (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate free list memory\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_fl = fl; for (j = 0; j < nfree_lists; j++) { fl[j].ifl_rxq = rxq; fl[j].ifl_id = j; fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset]; fl[j].ifl_rxd_size = scctx->isc_rxd_size[j]; } /* Allocate receive buffers for the ring*/ if (iflib_rxsd_alloc(rxq)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); err = ENOMEM; goto err_rx_desc; } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, M_WAITOK|M_ZERO); } /* TXQs */ vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); for (i = 0; i < ntxqsets; i++) { iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi; for (j = 0; j < ntxqs; j++, di++) { vaddrs[i*ntxqs + j] = di->idi_vaddr; paddrs[i*ntxqs + j] = di->idi_paddr; } } if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) { device_printf(ctx->ifc_dev, "device queue allocation failed\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); /* RXQs */ vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); for (i = 0; i < nrxqsets; i++) { iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi; for (j = 0; j < nrxqs; j++, di++) { vaddrs[i*nrxqs + j] = di->idi_vaddr; paddrs[i*nrxqs + j] = di->idi_paddr; } } if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) { device_printf(ctx->ifc_dev, "device queue allocation failed\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); return (0); /* XXX handle allocation failure changes */ err_rx_desc: err_tx_desc: if (ctx->ifc_rxqs != NULL) free(ctx->ifc_rxqs, M_IFLIB); ctx->ifc_rxqs = NULL; if (ctx->ifc_txqs != NULL) free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; rx_fail: if (brscp != NULL) free(brscp, M_IFLIB); if (rxq != NULL) free(rxq, M_IFLIB); if (txq != NULL) free(txq, M_IFLIB); fail: return (err); } static int iflib_tx_structures_setup(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; int i; for (i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_setup(txq); return (0); } static void iflib_tx_structures_free(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; int i, j; for (i = 0; i < NTXQSETS(ctx); i++, txq++) { iflib_txq_destroy(txq); for (j = 0; j < ctx->ifc_nhwtxqs; j++) iflib_dma_free(&txq->ift_ifdi[j]); } free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; IFDI_QUEUES_FREE(ctx); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int iflib_rx_structures_setup(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; int q; #if defined(INET6) || defined(INET) int i, err; #endif for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) { #if defined(INET6) || defined(INET) tcp_lro_free(&rxq->ifr_lc); if ((err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp, TCP_LRO_ENTRIES, min(1024, ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]))) != 0) { device_printf(ctx->ifc_dev, "LRO Initialization failed!\n"); goto fail; } rxq->ifr_lro_enabled = TRUE; #endif IFDI_RXQ_SETUP(ctx, rxq->ifr_id); } return (0); #if defined(INET6) || defined(INET) fail: /* * Free RX software descriptors allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'q' failed, so its the terminus. */ rxq = ctx->ifc_rxqs; for (i = 0; i < q; ++i, rxq++) { iflib_rx_sds_free(rxq); rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; } return (err); #endif } /********************************************************************* * * Free all receive rings. * **********************************************************************/ static void iflib_rx_structures_free(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; for (int i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) { iflib_rx_sds_free(rxq); } } static int iflib_qset_structures_setup(if_ctx_t ctx) { int err; if ((err = iflib_tx_structures_setup(ctx)) != 0) return (err); if ((err = iflib_rx_structures_setup(ctx)) != 0) { device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); } return (err); } int iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, char *name) { return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name)); } +#ifdef SMP static int -find_nth(if_ctx_t ctx, cpuset_t *cpus, int qid) +find_nth(if_ctx_t ctx, int qid) { + cpuset_t cpus; int i, cpuid, eqid, count; - CPU_COPY(&ctx->ifc_cpus, cpus); + CPU_COPY(&ctx->ifc_cpus, &cpus); count = CPU_COUNT(&ctx->ifc_cpus); eqid = qid % count; /* clear up to the qid'th bit */ for (i = 0; i < eqid; i++) { - cpuid = CPU_FFS(cpus); + cpuid = CPU_FFS(&cpus); MPASS(cpuid != 0); - CPU_CLR(cpuid-1, cpus); + CPU_CLR(cpuid-1, &cpus); } - cpuid = CPU_FFS(cpus); + cpuid = CPU_FFS(&cpus); MPASS(cpuid != 0); return (cpuid-1); } +static int +find_child_with_core(int cpu, struct cpu_group *grp) +{ + int i; + + if (grp->cg_children == 0) + return -1; + + MPASS(grp->cg_child); + for (i = 0; i < grp->cg_children; i++) { + if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask)) + return i; + } + + return -1; +} + +/* + * Find the nth thread on the specified core + */ +static int +find_thread(int cpu, int thread_num) +{ + struct cpu_group *grp; + int i; + cpuset_t cs; + + grp = smp_topo(); + if (grp == NULL) + return cpu; + i = 0; + while ((i = find_child_with_core(cpu, grp)) != -1) { + /* If the child only has one cpu, don't descend */ + if (grp->cg_child[i].cg_count <= 1) + break; + grp = &grp->cg_child[i]; + } + + /* If they don't share at least an L2 cache, use the same CPU */ + if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE) + return cpu; + + /* Now pick one */ + CPU_COPY(&grp->cg_mask, &cs); + for (i = thread_num % grp->cg_count; i > 0; i--) { + MPASS(CPU_FFS(&cs)); + CPU_CLR(CPU_FFS(&cs) - 1, &cs); + } + MPASS(CPU_FFS(&cs)); + return CPU_FFS(&cs) - 1; +} + +static int +get_thread_num(if_ctx_t ctx, iflib_intr_type_t type, int qid) +{ + switch (type) { + case IFLIB_INTR_TX: + /* TX queues get threads on the same core as the corresponding RX queue */ + /* XXX handle multiple RX threads per core and more than two threads per core */ + return qid / CPU_COUNT(&ctx->ifc_cpus) + 1; + case IFLIB_INTR_RX: + case IFLIB_INTR_RXTX: + /* RX queues get the first thread on their core */ + return qid / CPU_COUNT(&ctx->ifc_cpus); + default: + return -1; + } +} +#else +#define get_thread_num(ctx, type, qid) 0 +#define find_thread(cpuid, tid) 0 +#define find_nth(ctx, gid) 0 +#endif + +/* Just to avoid copy/paste */ +static inline int +iflib_irq_set_affinity(if_ctx_t ctx, int irq, iflib_intr_type_t type, int qid, + struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, char *name) +{ + int cpuid; + int err, tid; + + cpuid = find_nth(ctx, qid); + tid = get_thread_num(ctx, type, qid); + MPASS(tid >= 0); + cpuid = find_thread(cpuid, tid); + err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, irq, name); + if (err) { + device_printf(ctx->ifc_dev, "taskqgroup_attach_cpu failed %d\n", err); + return (err); + } + if (cpuid > ctx->ifc_cpuid_highest) + ctx->ifc_cpuid_highest = cpuid; + MPASS(gtask->gt_taskqueue != NULL); + return 0; +} + int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, void *filter_arg, int qid, char *name) { struct grouptask *gtask; struct taskqgroup *tqg; iflib_filter_info_t info; - cpuset_t cpus; gtask_fn_t *fn; - int tqrid, err, cpuid; + int tqrid, err; driver_filter_t *intr_fast; void *q; info = &ctx->ifc_filter_info; tqrid = rid; switch (type) { /* XXX merge tx/rx for netmap? */ case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; info = &ctx->ifc_txqs[qid].ift_filter_info; gtask = &ctx->ifc_txqs[qid].ift_task; - tqg = qgroup_if_io_tqg; + tqg = qgroup_if_io; fn = _task_fn_tx; intr_fast = iflib_fast_intr; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; - tqg = qgroup_if_io_tqg; + tqg = qgroup_if_io; fn = _task_fn_rx; - intr_fast = iflib_fast_intr; + intr_fast = iflib_fast_intr_rx; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RXTX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; - tqg = qgroup_if_io_tqg; + tqg = qgroup_if_io; fn = _task_fn_rx; intr_fast = iflib_fast_intr_rxtx; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_ADMIN: q = ctx; tqrid = -1; info = &ctx->ifc_filter_info; gtask = &ctx->ifc_admin_task; - tqg = qgroup_if_config_tqg; + tqg = qgroup_if_config; fn = _task_fn_admin; intr_fast = iflib_fast_intr_ctx; break; default: panic("unknown net intr type"); } info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; info->ifi_ctx = q; err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info, name); if (err != 0) { device_printf(ctx->ifc_dev, "_iflib_irq_alloc failed %d\n", err); return (err); } if (type == IFLIB_INTR_ADMIN) return (0); if (tqrid != -1) { - cpuid = find_nth(ctx, &cpus, qid); - taskqgroup_attach_cpu(tqg, gtask, q, cpuid, irq->ii_rid, name); + err = iflib_irq_set_affinity(ctx, rman_get_start(irq->ii_res), type, qid, gtask, tqg, q, name); + if (err) + return (err); } else { taskqgroup_attach(tqg, gtask, q, tqrid, name); } return (0); } void iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type, void *arg, int qid, char *name) { struct grouptask *gtask; struct taskqgroup *tqg; gtask_fn_t *fn; void *q; + int err; switch (type) { case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; gtask = &ctx->ifc_txqs[qid].ift_task; - tqg = qgroup_if_io_tqg; + tqg = qgroup_if_io; fn = _task_fn_tx; break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; gtask = &ctx->ifc_rxqs[qid].ifr_task; - tqg = qgroup_if_io_tqg; + tqg = qgroup_if_io; fn = _task_fn_rx; break; case IFLIB_INTR_IOV: q = ctx; gtask = &ctx->ifc_vflr_task; - tqg = qgroup_if_config_tqg; + tqg = qgroup_if_config; rid = -1; fn = _task_fn_iov; break; default: panic("unknown net intr type"); } GROUPTASK_INIT(gtask, 0, fn, q); - taskqgroup_attach(tqg, gtask, q, rid, name); + if (rid != -1) { + err = iflib_irq_set_affinity(ctx, rid, type, qid, gtask, tqg, q, name); + if (err) + taskqgroup_attach(tqg, gtask, q, rid, name); + } + else { + taskqgroup_attach(tqg, gtask, q, rid, name); + } } void iflib_irq_free(if_ctx_t ctx, if_irq_t irq) { if (irq->ii_tag) bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag); if (irq->ii_res) bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, irq->ii_rid, irq->ii_res); } static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, char *name) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_irq_t irq = &ctx->ifc_legacy_irq; iflib_filter_info_t info; struct grouptask *gtask; struct taskqgroup *tqg; gtask_fn_t *fn; int tqrid; void *q; int err; q = &ctx->ifc_rxqs[0]; info = &rxq[0].ifr_filter_info; gtask = &rxq[0].ifr_task; - tqg = qgroup_if_io_tqg; + tqg = qgroup_if_io; tqrid = irq->ii_rid = *rid; fn = _task_fn_rx; ctx->ifc_flags |= IFC_LEGACY; info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; info->ifi_ctx = ctx; /* We allocate a single interrupt resource */ if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr_ctx, NULL, info, name)) != 0) return (err); GROUPTASK_INIT(gtask, 0, fn, q); taskqgroup_attach(tqg, gtask, q, tqrid, name); GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq); - taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, tqrid, "tx"); + taskqgroup_attach(qgroup_if_io, &txq->ift_task, txq, tqrid, "tx"); return (0); } void iflib_led_create(if_ctx_t ctx) { ctx->ifc_led_dev = led_create(iflib_led_func, ctx, device_get_nameunit(ctx->ifc_dev)); } void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid) { GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); } void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid) { GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task); } void iflib_admin_intr_deferred(if_ctx_t ctx) { #ifdef INVARIANTS struct grouptask *gtask; gtask = &ctx->ifc_admin_task; - MPASS(gtask->gt_taskqueue != NULL); + MPASS(gtask != NULL && gtask->gt_taskqueue != NULL); #endif GROUPTASK_ENQUEUE(&ctx->ifc_admin_task); } +/* CONFIG context only */ +static void +iflib_handle_reset(if_ctx_t ctx, void *arg) +{ + CTX_LOCK(ctx); + ctx->ifc_flags |= IFC_DO_RESET; + iflib_admin_intr_deferred(ctx); + CTX_UNLOCK(ctx); +} + +static void +iflib_admin_reset_deferred(if_ctx_t ctx) +{ + iflib_config_async_gtask_dispatch(ctx, iflib_handle_reset, "reset handler", NULL); +} + void iflib_iov_intr_deferred(if_ctx_t ctx) { GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task); } void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name) { - taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, -1, name); + taskqgroup_attach_cpu(qgroup_if_io, gt, uniq, cpu, -1, name); } void iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn, char *name) { GROUPTASK_INIT(gtask, 0, fn, ctx); - taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, -1, name); + taskqgroup_attach(qgroup_if_config, gtask, gtask, -1, name); } +static void +iflib_multi_set(if_ctx_t ctx, void *arg) +{ + CTX_LOCK(ctx); + IFDI_INTR_DISABLE(ctx); + IFDI_MULTI_SET(ctx); + IFDI_INTR_ENABLE(ctx); + CTX_UNLOCK(ctx); +} + +static void +iflib_flags_set(if_ctx_t ctx, void *arg) +{ + int reinit, err; + if_t ifp = ctx->ifc_ifp; + + err = reinit = 0; + CTX_LOCK(ctx); + if (if_getflags(ifp) & IFF_UP) { + if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { + if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & + (IFF_PROMISC | IFF_ALLMULTI)) { + err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); + } + } else + reinit = 1; + } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { + iflib_stop(ctx); + } + ctx->ifc_if_flags = if_getflags(ifp); + if (reinit) + iflib_if_init_locked(ctx); + CTX_UNLOCK(ctx); + if (err) + log(LOG_WARNING, "IFDI_PROMISC_SET returned %d\n", err); +} + +static void +async_gtask(void *ctx) +{ + struct async_task_arg *at_arg = ctx; + if_ctx_t if_ctx = at_arg->ata_ctx; + void *arg = at_arg->ata_arg; + + at_arg->ata_fn(if_ctx, arg); + taskqgroup_detach(qgroup_if_config, at_arg->ata_gtask); + free(at_arg->ata_gtask, M_IFLIB); +} + +static int +iflib_config_async_gtask_dispatch(if_ctx_t ctx, async_gtask_fn_t *fn, char *name, void *arg) +{ + struct grouptask *gtask; + struct async_task_arg *at_arg; + + if ((gtask = malloc(sizeof(struct grouptask) + sizeof(struct async_task_arg), M_IFLIB, M_NOWAIT|M_ZERO)) == NULL) + return (ENOMEM); + + at_arg = (struct async_task_arg *)(gtask + 1); + at_arg->ata_fn = fn; + at_arg->ata_ctx = ctx; + at_arg->ata_arg = arg; + at_arg->ata_gtask = gtask; + + GROUPTASK_INIT(gtask, 0, async_gtask, at_arg); + taskqgroup_attach(qgroup_if_config, gtask, gtask, -1, name); + GROUPTASK_ENQUEUE(gtask); + return (0); +} + +static int +async_if_ioctl(if_ctx_t ctx, u_long command, caddr_t data) +{ + int rc; + + switch (command) { + case SIOCADDMULTI: + case SIOCDELMULTI: + rc = iflib_config_async_gtask_dispatch(ctx, iflib_multi_set, "async_if_multi", NULL); + break; + case SIOCSIFFLAGS: + rc = iflib_config_async_gtask_dispatch(ctx, iflib_flags_set, "async_if_flags", NULL); + break; + default: + panic("unknown command %lx", command); + } + return (rc); +} + + void iflib_config_gtask_deinit(struct grouptask *gtask) { - taskqgroup_detach(qgroup_if_config_tqg, gtask); + taskqgroup_detach(qgroup_if_config, gtask); } void iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq = ctx->ifc_txqs; if_setbaudrate(ifp, baudrate); if (baudrate >= IF_Gbps(10)) ctx->ifc_flags |= IFC_PREFETCH; /* If link down, disable watchdog */ if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) { for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++) txq->ift_qstatus = IFLIB_QUEUE_IDLE; } ctx->ifc_link_state = link_state; if_link_state_change(ifp, link_state); } static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq) { int credits; #ifdef INVARIANTS int credits_pre = txq->ift_cidx_processed; #endif if (ctx->isc_txd_credits_update == NULL) return (0); if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0) return (0); txq->ift_processed += credits; txq->ift_cidx_processed += credits; MPASS(credits_pre + credits == txq->ift_cidx_processed); if (txq->ift_cidx_processed >= txq->ift_size) txq->ift_cidx_processed -= txq->ift_size; return (credits); } static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget) { return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx, budget)); } void iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name, const char *description, if_int_delay_info_t info, int offset, int value) { info->iidi_ctx = ctx; info->iidi_offset = offset; info->iidi_value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev), SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, iflib_sysctl_int_delay, "I", description); } -struct mtx * +struct sx * iflib_ctx_lock_get(if_ctx_t ctx) { - return (&ctx->ifc_mtx); + return (&ctx->ifc_sx); } static int iflib_msix_init(if_ctx_t ctx) { device_t dev = ctx->ifc_dev; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int vectors, queues, rx_queues, tx_queues, queuemsgs, msgs; int iflib_num_tx_queues, iflib_num_rx_queues; int err, admincnt, bar; iflib_num_tx_queues = scctx->isc_ntxqsets; iflib_num_rx_queues = scctx->isc_nrxqsets; device_printf(dev, "msix_init qsets capped at %d\n", iflib_num_tx_queues); bar = ctx->ifc_softc_ctx.isc_msix_bar; admincnt = sctx->isc_admin_intrcnt; /* Override by tuneable */ if (scctx->isc_disable_msix) goto msi; /* ** When used in a virtualized environment ** PCI BUSMASTER capability may not be set ** so explicity set it here and rewrite ** the ENABLE in the MSIX control register ** at this point to cause the host to ** successfully initialize us. */ { int msix_ctrl, rid; pci_enable_busmaster(dev); rid = 0; if (pci_find_cap(dev, PCIY_MSIX, &rid) == 0 && rid != 0) { rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } else { device_printf(dev, "PCIY_MSIX capability not found; " "or rid %d == 0.\n", rid); goto msi; } } /* * bar == -1 => "trust me I know what I'm doing" * Some drivers are for hardware that is so shoddily * documented that no one knows which bars are which * so the developer has to map all bars. This hack * allows shoddy garbage to use msix in this framework. */ if (bar != -1) { ctx->ifc_msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE); if (ctx->ifc_msix_mem == NULL) { /* May not be enabled */ device_printf(dev, "Unable to map MSIX table \n"); goto msi; } } /* First try MSI/X */ if ((msgs = pci_msix_count(dev)) == 0) { /* system has msix disabled */ device_printf(dev, "System has MSIX disabled \n"); bus_release_resource(dev, SYS_RES_MEMORY, bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; goto msi; } #if IFLIB_DEBUG /* use only 1 qset in debug mode */ queuemsgs = min(msgs - admincnt, 1); #else queuemsgs = msgs - admincnt; #endif if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) == 0) { #ifdef RSS queues = imin(queuemsgs, rss_getnumbuckets()); #else queues = queuemsgs; #endif queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues); device_printf(dev, "pxm cpus: %d queue msgs: %d admincnt: %d\n", CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt); } else { device_printf(dev, "Unable to fetch CPU list\n"); /* Figure out a reasonable auto config value */ queues = min(queuemsgs, mp_ncpus); } #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt) rx_queues = iflib_num_rx_queues; else rx_queues = queues; /* * We want this to be all logical CPUs by default */ if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues) tx_queues = iflib_num_tx_queues; else tx_queues = mp_ncpus; if (ctx->ifc_sysctl_qs_eq_override == 0) { #ifdef INVARIANTS if (tx_queues != rx_queues) device_printf(dev, "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n", min(rx_queues, tx_queues), min(rx_queues, tx_queues)); #endif tx_queues = min(rx_queues, tx_queues); rx_queues = min(rx_queues, tx_queues); } - device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues); + device_printf(dev, "trying %d rx queues %d tx queues \n", rx_queues, tx_queues); - vectors = rx_queues + admincnt; + vectors = tx_queues + rx_queues + admincnt; if ((err = pci_alloc_msix(dev, &vectors)) == 0) { device_printf(dev, "Using MSIX interrupts with %d vectors\n", vectors); scctx->isc_vectors = vectors; + + if (vectors < tx_queues + rx_queues + admincnt) { + vectors -= admincnt; + if (vectors % 2 != 0) + vectors -= 1; + if (rx_queues > vectors / 2) + rx_queues = vectors / 2; + tx_queues = vectors - rx_queues; + } scctx->isc_nrxqsets = rx_queues; scctx->isc_ntxqsets = tx_queues; scctx->isc_intr = IFLIB_INTR_MSIX; return (vectors); } else { device_printf(dev, "failed to allocate %d msix vectors, err: %d - using MSI\n", vectors, err); } msi: vectors = pci_msi_count(dev); scctx->isc_nrxqsets = 1; scctx->isc_ntxqsets = 1; scctx->isc_vectors = vectors; if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) { device_printf(dev,"Using an MSI interrupt\n"); scctx->isc_intr = IFLIB_INTR_MSI; } else { device_printf(dev,"Using a Legacy interrupt\n"); scctx->isc_intr = IFLIB_INTR_LEGACY; } return (vectors); } char * ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" }; static int mp_ring_state_handler(SYSCTL_HANDLER_ARGS) { int rc; uint16_t *state = ((uint16_t *)oidp->oid_arg1); struct sbuf *sb; char *ring_state = "UNKNOWN"; /* XXX needed ? */ rc = sysctl_wire_old_buffer(req, 0); MPASS(rc == 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 80, req); MPASS(sb != NULL); if (sb == NULL) return (ENOMEM); if (state[3] <= 3) ring_state = ring_states[state[3]]; sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s", state[0], state[1], state[2], ring_state); rc = sbuf_finish(sb); sbuf_delete(sb); return(rc); } enum iflib_ndesc_handler { IFLIB_NTXD_HANDLER, IFLIB_NRXD_HANDLER, }; static int mp_ndesc_handler(SYSCTL_HANDLER_ARGS) { if_ctx_t ctx = (void *)arg1; enum iflib_ndesc_handler type = arg2; char buf[256] = {0}; qidx_t *ndesc; char *p, *next; int nqs, rc, i; MPASS(type == IFLIB_NTXD_HANDLER || type == IFLIB_NRXD_HANDLER); nqs = 8; switch(type) { case IFLIB_NTXD_HANDLER: ndesc = ctx->ifc_sysctl_ntxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_ntxqs; break; case IFLIB_NRXD_HANDLER: ndesc = ctx->ifc_sysctl_nrxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_nrxqs; break; } if (nqs == 0) nqs = 8; for (i=0; i<8; i++) { if (i >= nqs) break; if (i) strcat(buf, ","); sprintf(strchr(buf, 0), "%d", ndesc[i]); } rc = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (rc || req->newptr == NULL) return rc; for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p; i++, p = strsep(&next, " ,")) { ndesc[i] = strtoul(p, NULL, 10); } return(rc); } #define NAME_BUFLEN 32 static void iflib_add_device_sysctl_pre(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child, *oid_list; struct sysctl_ctx_list *ctx_list; struct sysctl_oid *node; ctx_list = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib", CTLFLAG_RD, NULL, "IFLIB fields"); oid_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, 0, "driver version"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, "# of txqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0, "# of rxqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, "permit #txq != #rxq"); - SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", + SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, "disable MSIX (default 0)"); + SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", + CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, + "set the rx budget"); /* XXX change for per-queue sizes */ SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER, mp_ndesc_handler, "A", "list of # of tx descriptors to use, 0 = use default #"); SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A", "list of # of rx descriptors to use, 0 = use default #"); + + SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "watchdog_events", + CTLFLAG_RD, &ctx->ifc_watchdog_events, 0, + "Watchdog events seen since load"); } static void iflib_add_device_sysctl_post(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx_list; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; int i, j; char namebuf[NAME_BUFLEN]; char *qfmt; struct sysctl_oid *queue_node, *fl_node, *node; struct sysctl_oid_list *queue_list, *fl_list; ctx_list = device_get_sysctl_ctx(dev); node = ctx->ifc_sysctl_node; child = SYSCTL_CHILDREN(node); if (scctx->isc_ntxqsets > 100) qfmt = "txq%03d"; else if (scctx->isc_ntxqsets > 10) qfmt = "txq%02d"; else qfmt = "txq%d"; for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued", CTLFLAG_RD, &txq->ift_dequeued, "total mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued", CTLFLAG_RD, &txq->ift_enqueued, "total mbufs enqueued"); #endif SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag", CTLFLAG_RD, &txq->ift_mbuf_defrag, "# of times m_defrag was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups", CTLFLAG_RD, &txq->ift_pullups, "# of times m_pullup was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txq->ift_no_desc_avail, "# of times no descriptors were available"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed", CTLFLAG_RD, &txq->ift_map_failed, "# of times dma map failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig", CTLFLAG_RD, &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup", CTLFLAG_RD, &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx", CTLFLAG_RD, &txq->ift_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx", CTLFLAG_RD, &txq->ift_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed", CTLFLAG_RD, &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use", CTLFLAG_RD, &txq->ift_in_use, 1, "descriptors in use"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed", CTLFLAG_RD, &txq->ift_processed, "descriptors procesed for clean"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned", CTLFLAG_RD, &txq->ift_cleaned, "total cleaned"); SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state", CTLTYPE_STRING | CTLFLAG_RD, __DEVOLATILE(uint64_t *, &txq->ift_br->state), 0, mp_ring_state_handler, "A", "soft ring state"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues", CTLFLAG_RD, &txq->ift_br->enqueues, "# of enqueues to the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops", CTLFLAG_RD, &txq->ift_br->drops, "# of drops in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts", CTLFLAG_RD, &txq->ift_br->starts, "# of normal consumer starts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls", CTLFLAG_RD, &txq->ift_br->stalls, "# of consumer stalls in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts", CTLFLAG_RD, &txq->ift_br->restarts, "# of consumer restarts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications", CTLFLAG_RD, &txq->ift_br->abdications, "# of consumer abdications in the mp_ring for this queue"); } if (scctx->isc_nrxqsets > 100) qfmt = "rxq%03d"; else if (scctx->isc_nrxqsets > 10) qfmt = "rxq%02d"; else qfmt = "rxq%d"; for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); if (sctx->isc_flags & IFLIB_HAS_RXCQ) { SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_pidx", CTLFLAG_RD, &rxq->ifr_cq_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx", CTLFLAG_RD, &rxq->ifr_cq_cidx, 1, "Consumer Index"); } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j); fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "freelist Name"); fl_list = SYSCTL_CHILDREN(fl_node); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx", CTLFLAG_RD, &fl->ifl_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx", CTLFLAG_RD, &fl->ifl_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits", CTLFLAG_RD, &fl->ifl_credits, 1, "credits available"); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued", CTLFLAG_RD, &fl->ifl_m_enqueued, "mbufs allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued", CTLFLAG_RD, &fl->ifl_m_dequeued, "mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued", CTLFLAG_RD, &fl->ifl_cl_enqueued, "clusters allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued", CTLFLAG_RD, &fl->ifl_cl_dequeued, "clusters freed"); #endif } } } #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m) { struct mbuf *n; if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); m->m_data += ETHER_HDR_LEN; n = m; } else { MGETHDR(n, M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return (NULL); } bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); m->m_data += ETHER_HDR_LEN; m->m_len -= ETHER_HDR_LEN; n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; } return (n); } #endif Index: head/sys/net/iflib.h =================================================================== --- head/sys/net/iflib.h (revision 323515) +++ head/sys/net/iflib.h (revision 323516) @@ -1,392 +1,398 @@ /*- * Copyright (c) 2014-2017, Matthew Macy (mmacy@nextbsd.org) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Neither the name of Matthew Macy nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __IFLIB_H_ #define __IFLIB_H_ #include #include #include #include #include #include /* * The value type for indexing, limits max descriptors * to 65535 can be conditionally redefined to uint32_t * in the future if the need arises. */ typedef uint16_t qidx_t; #define QIDX_INVALID 0xFFFF /* * Most cards can handle much larger TSO requests * but the FreeBSD TCP stack will break on larger * values */ #define FREEBSD_TSO_SIZE_MAX 65518 struct iflib_ctx; typedef struct iflib_ctx *if_ctx_t; struct if_shared_ctx; typedef struct if_shared_ctx *if_shared_ctx_t; struct if_int_delay_info; typedef struct if_int_delay_info *if_int_delay_info_t; /* * File organization: * - public structures * - iflib accessors * - iflib utility functions * - iflib core functions */ typedef struct if_rxd_frag { uint8_t irf_flid; qidx_t irf_idx; uint16_t irf_len; } *if_rxd_frag_t; typedef struct if_rxd_info { /* set by iflib */ uint16_t iri_qsidx; /* qset index */ uint16_t iri_vtag; /* vlan tag - if flag set */ /* XXX redundant with the new irf_len field */ uint16_t iri_len; /* packet length */ qidx_t iri_cidx; /* consumer index of cq */ struct ifnet *iri_ifp; /* some drivers >1 interface per softc */ /* updated by driver */ if_rxd_frag_t iri_frags; uint32_t iri_flowid; /* RSS hash for packet */ uint32_t iri_csum_flags; /* m_pkthdr csum flags */ uint32_t iri_csum_data; /* m_pkthdr csum data */ uint8_t iri_flags; /* mbuf flags for packet */ uint8_t iri_nfrags; /* number of fragments in packet */ uint8_t iri_rsstype; /* RSS hash type */ uint8_t iri_pad; /* any padding in the received data */ } *if_rxd_info_t; typedef struct if_rxd_update { uint64_t *iru_paddrs; caddr_t *iru_vaddrs; qidx_t *iru_idxs; qidx_t iru_pidx; uint16_t iru_qsidx; uint16_t iru_count; uint16_t iru_buf_size; uint8_t iru_flidx; } *if_rxd_update_t; #define IPI_TX_INTR 0x1 /* send an interrupt when this packet is sent */ #define IPI_TX_IPV4 0x2 /* ethertype IPv4 */ #define IPI_TX_IPV6 0x4 /* ethertype IPv6 */ typedef struct if_pkt_info { bus_dma_segment_t *ipi_segs; /* physical addresses */ uint32_t ipi_len; /* packet length */ uint16_t ipi_qsidx; /* queue set index */ qidx_t ipi_nsegs; /* number of segments */ qidx_t ipi_ndescs; /* number of descriptors used by encap */ uint16_t ipi_flags; /* iflib per-packet flags */ qidx_t ipi_pidx; /* start pidx for encap */ qidx_t ipi_new_pidx; /* next available pidx post-encap */ /* offload handling */ + caddr_t ipi_hdr_data; /* raw header */ uint8_t ipi_ehdrlen; /* ether header length */ uint8_t ipi_ip_hlen; /* ip header length */ uint8_t ipi_tcp_hlen; /* tcp header length */ uint8_t ipi_ipproto; /* ip protocol */ uint32_t ipi_csum_flags; /* packet checksum flags */ uint16_t ipi_tso_segsz; /* tso segment size */ uint16_t ipi_vtag; /* VLAN tag */ uint16_t ipi_etype; /* ether header type */ uint8_t ipi_tcp_hflags; /* tcp header flags */ uint8_t ipi_mflags; /* packet mbuf flags */ uint32_t ipi_tcp_seq; /* tcp seqno */ uint32_t ipi_tcp_sum; /* tcp csum */ } *if_pkt_info_t; typedef struct if_irq { struct resource *ii_res; int ii_rid; void *ii_tag; } *if_irq_t; struct if_int_delay_info { if_ctx_t iidi_ctx; /* Back-pointer to the iflib ctx (softc) */ int iidi_offset; /* Register offset to read/write */ int iidi_value; /* Current value in usecs */ struct sysctl_oid *iidi_oidp; struct sysctl_req *iidi_req; }; typedef enum { IFLIB_INTR_LEGACY, IFLIB_INTR_MSI, IFLIB_INTR_MSIX } iflib_intr_mode_t; /* * This really belongs in pciio.h or some place more general * but this is the only consumer for now. */ typedef struct pci_vendor_info { uint32_t pvi_vendor_id; uint32_t pvi_device_id; uint32_t pvi_subvendor_id; uint32_t pvi_subdevice_id; uint32_t pvi_rev_id; uint32_t pvi_class_mask; caddr_t pvi_name; } pci_vendor_info_t; #define PVID(vendor, devid, name) {vendor, devid, 0, 0, 0, 0, name} #define PVID_OEM(vendor, devid, svid, sdevid, revid, name) {vendor, devid, svid, sdevid, revid, 0, name} #define PVID_END {0, 0, 0, 0, 0, 0, NULL} typedef struct if_txrx { int (*ift_txd_encap) (void *, if_pkt_info_t); void (*ift_txd_flush) (void *, uint16_t, qidx_t pidx); int (*ift_txd_credits_update) (void *, uint16_t qsidx, bool clear); int (*ift_rxd_available) (void *, uint16_t qsidx, qidx_t pidx, qidx_t budget); int (*ift_rxd_pkt_get) (void *, if_rxd_info_t ri); void (*ift_rxd_refill) (void * , if_rxd_update_t iru); void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, qidx_t pidx); int (*ift_legacy_intr) (void *); + int (*ift_txd_errata) (void *, struct mbuf **mp); } *if_txrx_t; typedef struct if_softc_ctx { int isc_vectors; int isc_nrxqsets; int isc_ntxqsets; int isc_msix_bar; /* can be model specific - initialize in attach_pre */ int isc_tx_nsegments; /* can be model specific - initialize in attach_pre */ int isc_ntxd[8]; int isc_nrxd[8]; uint32_t isc_txqsizes[8]; uint32_t isc_rxqsizes[8]; /* is there such thing as a descriptor that is more than 248 bytes ? */ uint8_t isc_txd_size[8]; uint8_t isc_rxd_size[8]; int isc_tx_tso_segments_max; int isc_tx_tso_size_max; int isc_tx_tso_segsize_max; int isc_tx_csum_flags; int isc_capenable; int isc_rss_table_size; int isc_rss_table_mask; int isc_nrxqsets_max; int isc_ntxqsets_max; iflib_intr_mode_t isc_intr; uint16_t isc_max_frame_size; /* set at init time by driver */ uint32_t isc_pause_frames; /* set by driver for iflib_timer to detect */ pci_vendor_info_t isc_vendor_info; /* set by iflib prior to attach_pre */ int isc_disable_msix; if_txrx_t isc_txrx; } *if_softc_ctx_t; /* * Initialization values for device */ struct if_shared_ctx { int isc_magic; driver_t *isc_driver; bus_size_t isc_q_align; bus_size_t isc_tx_maxsize; bus_size_t isc_tx_maxsegsize; bus_size_t isc_rx_maxsize; bus_size_t isc_rx_maxsegsize; int isc_rx_nsegments; int isc_admin_intrcnt; /* # of admin/link interrupts */ /* fields necessary for probe */ pci_vendor_info_t *isc_vendor_info; char *isc_driver_version; /* optional function to transform the read values to match the table*/ void (*isc_parse_devinfo) (uint16_t *device_id, uint16_t *subvendor_id, uint16_t *subdevice_id, uint16_t *rev_id); int isc_nrxd_min[8]; int isc_nrxd_default[8]; int isc_nrxd_max[8]; int isc_ntxd_min[8]; int isc_ntxd_default[8]; int isc_ntxd_max[8]; /* actively used during operation */ int isc_nfl __aligned(CACHE_LINE_SIZE); int isc_ntxqs; /* # of tx queues per tx qset - usually 1 */ int isc_nrxqs; /* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */ int isc_rx_process_limit; int isc_tx_reclaim_thresh; int isc_flags; }; typedef struct iflib_dma_info { bus_addr_t idi_paddr; caddr_t idi_vaddr; bus_dma_tag_t idi_tag; bus_dmamap_t idi_map; uint32_t idi_size; } *iflib_dma_info_t; #define IFLIB_MAGIC 0xCAFEF00D typedef enum { IFLIB_INTR_RX, IFLIB_INTR_TX, IFLIB_INTR_RXTX, IFLIB_INTR_ADMIN, IFLIB_INTR_IOV, } iflib_intr_type_t; #ifndef ETH_ADDR_LEN #define ETH_ADDR_LEN 6 #endif /* * Interface has a separate command queue for RX */ #define IFLIB_HAS_RXCQ 0x01 /* * Driver has already allocated vectors */ #define IFLIB_SKIP_MSIX 0x02 /* * Interface is a virtual function */ #define IFLIB_IS_VF 0x04 /* * Interface has a separate command queue for TX */ #define IFLIB_HAS_TXCQ 0x08 /* - * Interface does checksum in place + * */ -#define IFLIB_NEED_SCRATCH 0x10 +#define IFLIB_UNUSED___0 0x10 /* * Interface doesn't expect in_pseudo for th_sum */ #define IFLIB_TSO_INIT_IP 0x20 /* * Interface doesn't align IP header */ #define IFLIB_DO_RX_FIXUP 0x40 +/* + * Driver needs csum zeroed for offloading + */ +#define IFLIB_NEED_ZERO_CSUM 0x80 /* * field accessors */ void *iflib_get_softc(if_ctx_t ctx); device_t iflib_get_dev(if_ctx_t ctx); if_t iflib_get_ifp(if_ctx_t ctx); struct ifmedia *iflib_get_media(if_ctx_t ctx); if_softc_ctx_t iflib_get_softc_ctx(if_ctx_t ctx); if_shared_ctx_t iflib_get_sctx(if_ctx_t ctx); void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]); /* * If the driver can plug cleanly in to newbus use these */ int iflib_device_probe(device_t); int iflib_device_attach(device_t); int iflib_device_detach(device_t); int iflib_device_suspend(device_t); int iflib_device_resume(device_t); int iflib_device_shutdown(device_t); int iflib_device_iov_init(device_t, uint16_t, const nvlist_t *); void iflib_device_iov_uninit(device_t); int iflib_device_iov_add_vf(device_t, uint16_t, const nvlist_t *); /* * If the driver can't plug cleanly in to newbus * use these */ int iflib_device_register(device_t dev, void *softc, if_shared_ctx_t sctx, if_ctx_t *ctxp); int iflib_device_deregister(if_ctx_t); int iflib_irq_alloc(if_ctx_t, if_irq_t, int, driver_filter_t, void *filter_arg, driver_intr_t, void *arg, char *name); int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, void *filter_arg, int qid, char *name); void iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type, void *arg, int qid, char *name); void iflib_irq_free(if_ctx_t ctx, if_irq_t irq); void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name); void iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn, char *name); void iflib_config_gtask_deinit(struct grouptask *gtask); void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid); void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid); void iflib_admin_intr_deferred(if_ctx_t ctx); void iflib_iov_intr_deferred(if_ctx_t ctx); void iflib_link_state_change(if_ctx_t ctx, int linkstate, uint64_t baudrate); int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags); void iflib_dma_free(iflib_dma_info_t dma); int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count); void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count); -struct mtx *iflib_ctx_lock_get(if_ctx_t); +struct sx *iflib_ctx_lock_get(if_ctx_t); struct mtx *iflib_qset_lock_get(if_ctx_t, uint16_t); void iflib_led_create(if_ctx_t ctx); void iflib_add_int_delay_sysctl(if_ctx_t, const char *, const char *, if_int_delay_info_t, int, int); #endif /* __IFLIB_H_ */ Index: head/sys/net/mp_ring.c =================================================================== --- head/sys/net/mp_ring.c (revision 323515) +++ head/sys/net/mp_ring.c (revision 323516) @@ -1,544 +1,544 @@ /*- * Copyright (c) 2014 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #if defined(__powerpc__) || defined(__mips__) #define NO_64BIT_ATOMICS #endif #if defined(__i386__) #define atomic_cmpset_acq_64 atomic_cmpset_64 #define atomic_cmpset_rel_64 atomic_cmpset_64 #endif #include union ring_state { struct { uint16_t pidx_head; uint16_t pidx_tail; uint16_t cidx; uint16_t flags; }; uint64_t state; }; enum { IDLE = 0, /* consumer ran to completion, nothing more to do. */ BUSY, /* consumer is running already, or will be shortly. */ STALLED, /* consumer stopped due to lack of resources. */ ABDICATED, /* consumer stopped even though there was work to be done because it wants another thread to take over. */ }; static inline uint16_t space_available(struct ifmp_ring *r, union ring_state s) { uint16_t x = r->size - 1; if (s.cidx == s.pidx_head) return (x); else if (s.cidx > s.pidx_head) return (s.cidx - s.pidx_head - 1); else return (x - s.pidx_head + s.cidx); } static inline uint16_t increment_idx(struct ifmp_ring *r, uint16_t idx, uint16_t n) { int x = r->size - idx; MPASS(x > 0); return (x > n ? idx + n : n - x); } /* Consumer is about to update the ring's state to s */ static inline uint16_t state_to_flags(union ring_state s, int abdicate) { if (s.cidx == s.pidx_tail) return (IDLE); else if (abdicate && s.pidx_tail != s.pidx_head) return (ABDICATED); return (BUSY); } #ifdef NO_64BIT_ATOMICS static void drain_ring_locked(struct ifmp_ring *r, union ring_state os, uint16_t prev, int budget) { union ring_state ns; int n, pending, total; uint16_t cidx = os.cidx; uint16_t pidx = os.pidx_tail; MPASS(os.flags == BUSY); MPASS(cidx != pidx); if (prev == IDLE) counter_u64_add(r->starts, 1); pending = 0; total = 0; while (cidx != pidx) { /* Items from cidx to pidx are available for consumption. */ n = r->drain(r, cidx, pidx); if (n == 0) { os.state = ns.state = r->state; ns.cidx = cidx; ns.flags = STALLED; r->state = ns.state; if (prev != STALLED) counter_u64_add(r->stalls, 1); else if (total > 0) { counter_u64_add(r->restarts, 1); counter_u64_add(r->stalls, 1); } break; } cidx = increment_idx(r, cidx, n); pending += n; total += n; /* * We update the cidx only if we've caught up with the pidx, the * real cidx is getting too far ahead of the one visible to * everyone else, or we have exceeded our budget. */ if (cidx != pidx && pending < 64 && total < budget) continue; os.state = ns.state = r->state; ns.cidx = cidx; ns.flags = state_to_flags(ns, total >= budget); r->state = ns.state; if (ns.flags == ABDICATED) counter_u64_add(r->abdications, 1); if (ns.flags != BUSY) { /* Wrong loop exit if we're going to stall. */ MPASS(ns.flags != STALLED); if (prev == STALLED) { MPASS(total > 0); counter_u64_add(r->restarts, 1); } break; } /* * The acquire style atomic above guarantees visibility of items * associated with any pidx change that we notice here. */ pidx = ns.pidx_tail; pending = 0; } } #else /* * Caller passes in a state, with a guarantee that there is work to do and that * all items up to the pidx_tail in the state are visible. */ static void drain_ring_lockless(struct ifmp_ring *r, union ring_state os, uint16_t prev, int budget) { union ring_state ns; int n, pending, total; uint16_t cidx = os.cidx; uint16_t pidx = os.pidx_tail; MPASS(os.flags == BUSY); MPASS(cidx != pidx); if (prev == IDLE) counter_u64_add(r->starts, 1); pending = 0; total = 0; while (cidx != pidx) { /* Items from cidx to pidx are available for consumption. */ n = r->drain(r, cidx, pidx); if (n == 0) { critical_enter(); do { os.state = ns.state = r->state; ns.cidx = cidx; ns.flags = STALLED; } while (atomic_cmpset_64(&r->state, os.state, ns.state) == 0); critical_exit(); if (prev != STALLED) counter_u64_add(r->stalls, 1); else if (total > 0) { counter_u64_add(r->restarts, 1); counter_u64_add(r->stalls, 1); } break; } cidx = increment_idx(r, cidx, n); pending += n; total += n; /* * We update the cidx only if we've caught up with the pidx, the * real cidx is getting too far ahead of the one visible to * everyone else, or we have exceeded our budget. */ if (cidx != pidx && pending < 64 && total < budget) continue; critical_enter(); - do { + os.state = ns.state = r->state; + ns.cidx = cidx; + ns.flags = state_to_flags(ns, total >= budget); + while (atomic_cmpset_acq_64(&r->state, os.state, ns.state) == 0) { + cpu_spinwait(); os.state = ns.state = r->state; ns.cidx = cidx; ns.flags = state_to_flags(ns, total >= budget); - } while (atomic_cmpset_acq_64(&r->state, os.state, ns.state) == 0); + } critical_exit(); if (ns.flags == ABDICATED) counter_u64_add(r->abdications, 1); if (ns.flags != BUSY) { /* Wrong loop exit if we're going to stall. */ MPASS(ns.flags != STALLED); if (prev == STALLED) { MPASS(total > 0); counter_u64_add(r->restarts, 1); } break; } /* * The acquire style atomic above guarantees visibility of items * associated with any pidx change that we notice here. */ pidx = ns.pidx_tail; pending = 0; } } #endif int ifmp_ring_alloc(struct ifmp_ring **pr, int size, void *cookie, mp_ring_drain_t drain, mp_ring_can_drain_t can_drain, struct malloc_type *mt, int flags) { struct ifmp_ring *r; /* All idx are 16b so size can be 65536 at most */ if (pr == NULL || size < 2 || size > 65536 || drain == NULL || can_drain == NULL) return (EINVAL); *pr = NULL; flags &= M_NOWAIT | M_WAITOK; MPASS(flags != 0); r = malloc(__offsetof(struct ifmp_ring, items[size]), mt, flags | M_ZERO); if (r == NULL) return (ENOMEM); r->size = size; r->cookie = cookie; r->mt = mt; r->drain = drain; r->can_drain = can_drain; r->enqueues = counter_u64_alloc(flags); r->drops = counter_u64_alloc(flags); r->starts = counter_u64_alloc(flags); r->stalls = counter_u64_alloc(flags); r->restarts = counter_u64_alloc(flags); r->abdications = counter_u64_alloc(flags); if (r->enqueues == NULL || r->drops == NULL || r->starts == NULL || r->stalls == NULL || r->restarts == NULL || r->abdications == NULL) { ifmp_ring_free(r); return (ENOMEM); } *pr = r; #ifdef NO_64BIT_ATOMICS mtx_init(&r->lock, "mp_ring lock", NULL, MTX_DEF); #endif return (0); } void ifmp_ring_free(struct ifmp_ring *r) { if (r == NULL) return; if (r->enqueues != NULL) counter_u64_free(r->enqueues); if (r->drops != NULL) counter_u64_free(r->drops); if (r->starts != NULL) counter_u64_free(r->starts); if (r->stalls != NULL) counter_u64_free(r->stalls); if (r->restarts != NULL) counter_u64_free(r->restarts); if (r->abdications != NULL) counter_u64_free(r->abdications); free(r, r->mt); } /* * Enqueue n items and maybe drain the ring for some time. * * Returns an errno. */ #ifdef NO_64BIT_ATOMICS int ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget) { union ring_state os, ns; uint16_t pidx_start, pidx_stop; int i; MPASS(items != NULL); MPASS(n > 0); mtx_lock(&r->lock); /* * Reserve room for the new items. Our reservation, if successful, is * from 'pidx_start' to 'pidx_stop'. */ os.state = r->state; if (n >= space_available(r, os)) { counter_u64_add(r->drops, n); MPASS(os.flags != IDLE); if (os.flags == STALLED) ifmp_ring_check_drainage(r, 0); return (ENOBUFS); } ns.state = os.state; ns.pidx_head = increment_idx(r, os.pidx_head, n); r->state = ns.state; pidx_start = os.pidx_head; pidx_stop = ns.pidx_head; /* * Wait for other producers who got in ahead of us to enqueue their * items, one producer at a time. It is our turn when the ring's * pidx_tail reaches the beginning of our reservation (pidx_start). */ while (ns.pidx_tail != pidx_start) { cpu_spinwait(); ns.state = r->state; } /* Now it is our turn to fill up the area we reserved earlier. */ i = pidx_start; do { r->items[i] = *items++; if (__predict_false(++i == r->size)) i = 0; } while (i != pidx_stop); /* * Update the ring's pidx_tail. The release style atomic guarantees * that the items are visible to any thread that sees the updated pidx. */ os.state = ns.state = r->state; ns.pidx_tail = pidx_stop; ns.flags = BUSY; r->state = ns.state; counter_u64_add(r->enqueues, n); /* * Turn into a consumer if some other thread isn't active as a consumer * already. */ if (os.flags != BUSY) drain_ring_locked(r, ns, os.flags, budget); mtx_unlock(&r->lock); return (0); } #else int ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget) { union ring_state os, ns; uint16_t pidx_start, pidx_stop; int i; MPASS(items != NULL); MPASS(n > 0); /* * Reserve room for the new items. Our reservation, if successful, is * from 'pidx_start' to 'pidx_stop'. */ for (;;) { os.state = r->state; if (n >= space_available(r, os)) { counter_u64_add(r->drops, n); MPASS(os.flags != IDLE); if (os.flags == STALLED) ifmp_ring_check_drainage(r, 0); return (ENOBUFS); } ns.state = os.state; ns.pidx_head = increment_idx(r, os.pidx_head, n); critical_enter(); if (atomic_cmpset_64(&r->state, os.state, ns.state)) break; critical_exit(); cpu_spinwait(); } pidx_start = os.pidx_head; pidx_stop = ns.pidx_head; /* * Wait for other producers who got in ahead of us to enqueue their * items, one producer at a time. It is our turn when the ring's * pidx_tail reaches the beginning of our reservation (pidx_start). */ while (ns.pidx_tail != pidx_start) { cpu_spinwait(); ns.state = r->state; } /* Now it is our turn to fill up the area we reserved earlier. */ i = pidx_start; do { r->items[i] = *items++; if (__predict_false(++i == r->size)) i = 0; } while (i != pidx_stop); /* * Update the ring's pidx_tail. The release style atomic guarantees * that the items are visible to any thread that sees the updated pidx. */ do { os.state = ns.state = r->state; ns.pidx_tail = pidx_stop; - ns.flags = BUSY; + if (os.flags == IDLE) + ns.flags = ABDICATED; } while (atomic_cmpset_rel_64(&r->state, os.state, ns.state) == 0); critical_exit(); counter_u64_add(r->enqueues, n); - /* - * Turn into a consumer if some other thread isn't active as a consumer - * already. - */ - if (os.flags != BUSY) - drain_ring_lockless(r, ns, os.flags, budget); - return (0); } #endif void ifmp_ring_check_drainage(struct ifmp_ring *r, int budget) { union ring_state os, ns; os.state = r->state; - if (os.flags != STALLED || os.pidx_head != os.pidx_tail || r->can_drain(r) == 0) + if ((os.flags != STALLED && os.flags != ABDICATED) || // Only continue in STALLED and ABDICATED + os.pidx_head != os.pidx_tail || // Require work to be available + (os.flags != ABDICATED && r->can_drain(r) == 0)) // Can either drain, or everyone left return; MPASS(os.cidx != os.pidx_tail); /* implied by STALLED */ ns.state = os.state; ns.flags = BUSY; #ifdef NO_64BIT_ATOMICS mtx_lock(&r->lock); if (r->state != os.state) { mtx_unlock(&r->lock); return; } r->state = ns.state; drain_ring_locked(r, ns, os.flags, budget); mtx_unlock(&r->lock); #else /* * The acquire style atomic guarantees visibility of items associated * with the pidx that we read here. */ if (!atomic_cmpset_acq_64(&r->state, os.state, ns.state)) return; drain_ring_lockless(r, ns, os.flags, budget); #endif } void ifmp_ring_reset_stats(struct ifmp_ring *r) { counter_u64_zero(r->enqueues); counter_u64_zero(r->drops); counter_u64_zero(r->starts); counter_u64_zero(r->stalls); counter_u64_zero(r->restarts); counter_u64_zero(r->abdications); } int ifmp_ring_is_idle(struct ifmp_ring *r) { union ring_state s; s.state = r->state; if (s.pidx_head == s.pidx_tail && s.pidx_tail == s.cidx && s.flags == IDLE) return (1); return (0); } int ifmp_ring_is_stalled(struct ifmp_ring *r) { union ring_state s; s.state = r->state; if (s.pidx_head == s.pidx_tail && s.flags == STALLED) return (1); return (0); } Index: head/sys/sys/gtaskqueue.h =================================================================== --- head/sys/sys/gtaskqueue.h (revision 323515) +++ head/sys/sys/gtaskqueue.h (revision 323516) @@ -1,108 +1,123 @@ /*- * Copyright (c) 2014 Jeffrey Roberson * Copyright (c) 2016 Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_GTASKQUEUE_H_ #define _SYS_GTASKQUEUE_H_ #include #ifndef _KERNEL #error "no user-serviceable parts inside" #endif struct gtaskqueue; typedef void (*gtaskqueue_enqueue_fn)(void *context); /* * Taskqueue groups. Manages dynamic thread groups and irq binding for * device and other tasks. */ void gtaskqueue_block(struct gtaskqueue *queue); void gtaskqueue_unblock(struct gtaskqueue *queue); int gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask); void gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *task); void gtaskqueue_drain_all(struct gtaskqueue *queue); int grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *task); void taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *grptask, void *uniq, int irq, char *name); int taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *grptask, void *uniq, int cpu, int irq, char *name); void taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask); struct taskqgroup *taskqgroup_create(char *name); void taskqgroup_destroy(struct taskqgroup *qgroup); -int taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride); +int taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri); +int taskqgroup_adjust_once(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri); +void taskqgroup_set_adjust(struct taskqgroup *qgroup, void (*adjust_func)(void*)); #define TASK_ENQUEUED 0x1 #define TASK_SKIP_WAKEUP 0x2 #define GTASK_INIT(task, flags, priority, func, context) do { \ (task)->ta_flags = flags; \ (task)->ta_priority = (priority); \ (task)->ta_func = (func); \ (task)->ta_context = (context); \ } while (0) #define GROUPTASK_INIT(gtask, priority, func, context) \ GTASK_INIT(&(gtask)->gt_task, TASK_SKIP_WAKEUP, priority, func, context) #define GROUPTASK_ENQUEUE(gtask) \ grouptaskqueue_enqueue((gtask)->gt_taskqueue, &(gtask)->gt_task) #define TASKQGROUP_DECLARE(name) \ extern struct taskqgroup *qgroup_##name -#define TASKQGROUP_DEFINE(name, cnt, stride) \ + +#define TASKQGROUP_DEFINE(name, cnt, stride, intr, pri) \ \ struct taskqgroup *qgroup_##name; \ \ static void \ -taskqgroup_define_##name(void *arg) \ +taskqgroup_adjust_##name(void *arg) \ { \ - qgroup_##name = taskqgroup_create(#name); \ + int max = (intr) ? 1 : (cnt); \ + if (arg != NULL) { \ + uintptr_t maxcpu = (uintptr_t) arg; \ + max = maxcpu; \ + } \ + \ + taskqgroup_adjust_once(qgroup_##name, max, (stride), (intr), (pri)); \ } \ \ -SYSINIT(taskqgroup_##name, SI_SUB_TASKQ, SI_ORDER_FIRST, \ - taskqgroup_define_##name, NULL); \ +SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY, \ + taskqgroup_adjust_##name, NULL); \ \ static void \ -taskqgroup_adjust_##name(void *arg) \ +taskqgroup_define_##name(void *arg) \ { \ - taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \ + qgroup_##name = taskqgroup_create(#name); \ + taskqgroup_set_adjust(qgroup_##name, taskqgroup_adjust_##name); \ } \ - \ -SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY, \ - taskqgroup_adjust_##name, NULL) +SYSINIT(taskqgroup_##name, SI_SUB_TASKQ, SI_ORDER_FIRST, \ + taskqgroup_define_##name, NULL) + + + + + + TASKQGROUP_DECLARE(net); TASKQGROUP_DECLARE(softirq); #endif /* !_SYS_GTASKQUEUE_H_ */