Index: head/sys/dev/cxgb/cxgb_main.c =================================================================== --- head/sys/dev/cxgb/cxgb_main.c (revision 170006) +++ head/sys/dev/cxgb/cxgb_main.c (revision 170007) @@ -1,2232 +1,2237 @@ /************************************************************************** Copyright (c) 2007, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PRIV_SUPPORTED #include #endif static int cxgb_setup_msix(adapter_t *, int); static void cxgb_init(void *); static void cxgb_init_locked(struct port_info *); static void cxgb_stop_locked(struct port_info *); static void cxgb_set_rxmode(struct port_info *); static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t); static void cxgb_start(struct ifnet *); static void cxgb_start_proc(void *, int ncount); static int cxgb_media_change(struct ifnet *); static void cxgb_media_status(struct ifnet *, struct ifmediareq *); static int setup_sge_qsets(adapter_t *); static void cxgb_async_intr(void *); static void cxgb_ext_intr_handler(void *, int); static void cxgb_down(struct adapter *sc); static void cxgb_tick(void *); static void setup_rss(adapter_t *sc); /* Attachment glue for the PCI controller end of the device. Each port of * the device is attached separately, as defined later. */ static int cxgb_controller_probe(device_t); static int cxgb_controller_attach(device_t); static int cxgb_controller_detach(device_t); static void cxgb_free(struct adapter *); static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start, unsigned int end); static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf); static int cxgb_get_regs_len(void); static int offload_open(struct port_info *pi); static int offload_close(struct toedev *tdev); static device_method_t cxgb_controller_methods[] = { DEVMETHOD(device_probe, cxgb_controller_probe), DEVMETHOD(device_attach, cxgb_controller_attach), DEVMETHOD(device_detach, cxgb_controller_detach), /* bus interface */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_driver_added, bus_generic_driver_added), { 0, 0 } }; static driver_t cxgb_controller_driver = { "cxgbc", cxgb_controller_methods, sizeof(struct adapter) }; static devclass_t cxgb_controller_devclass; DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0); /* * Attachment glue for the ports. Attachment is done directly to the * controller device. */ static int cxgb_port_probe(device_t); static int cxgb_port_attach(device_t); static int cxgb_port_detach(device_t); static device_method_t cxgb_port_methods[] = { DEVMETHOD(device_probe, cxgb_port_probe), DEVMETHOD(device_attach, cxgb_port_attach), DEVMETHOD(device_detach, cxgb_port_detach), { 0, 0 } }; static driver_t cxgb_port_driver = { "cxgb", cxgb_port_methods, 0 }; static d_ioctl_t cxgb_extension_ioctl; static devclass_t cxgb_port_devclass; DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0); #define SGE_MSIX_COUNT (SGE_QSETS + 1) extern int collapse_mbufs; /* * The driver uses the best interrupt scheme available on a platform in the * order MSI-X, MSI, legacy pin interrupts. This parameter determines which * of these schemes the driver may consider as follows: * * msi = 2: choose from among all three options * msi = 1 : only consider MSI and pin interrupts * msi = 0: force pin interrupts */ static int msi_allowed = 2; TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed); SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters"); SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0, "MSI-X, MSI, INTx selector"); /* * The driver enables offload as a default. * To disable it, use ofld_disable = 1. */ static int ofld_disable = 0; TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable); SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0, "disable ULP offload"); /* * The driver uses an auto-queue algorithm by default. * To disable it and force a single queue-set per port, use singleq = 1. */ static int singleq = 1; TUNABLE_INT("hw.cxgb.singleq", &singleq); SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0, "use a single queue-set per port"); enum { MAX_TXQ_ENTRIES = 16384, MAX_CTRL_TXQ_ENTRIES = 1024, MAX_RSPQ_ENTRIES = 16384, MAX_RX_BUFFERS = 16384, MAX_RX_JUMBO_BUFFERS = 16384, MIN_TXQ_ENTRIES = 4, MIN_CTRL_TXQ_ENTRIES = 4, MIN_RSPQ_ENTRIES = 32, MIN_FL_ENTRIES = 32 }; #define PORT_MASK ((1 << MAX_NPORTS) - 1) /* Table for probing the cards. The desc field isn't actually used */ struct cxgb_ident { uint16_t vendor; uint16_t device; int index; char *desc; } cxgb_identifiers[] = { {PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"}, {PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"}, {PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"}, {PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"}, {PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"}, {PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"}, {PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"}, {PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"}, {PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"}, {PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"}, {0, 0, 0, NULL} }; static struct cxgb_ident * cxgb_get_ident(device_t dev) { struct cxgb_ident *id; for (id = cxgb_identifiers; id->desc != NULL; id++) { if ((id->vendor == pci_get_vendor(dev)) && (id->device == pci_get_device(dev))) { return (id); } } return (NULL); } static const struct adapter_info * cxgb_get_adapter_info(device_t dev) { struct cxgb_ident *id; const struct adapter_info *ai; id = cxgb_get_ident(dev); if (id == NULL) return (NULL); ai = t3_get_adapter_info(id->index); return (ai); } static int cxgb_controller_probe(device_t dev) { const struct adapter_info *ai; char *ports, buf[80]; ai = cxgb_get_adapter_info(dev); if (ai == NULL) return (ENXIO); if (ai->nports == 1) ports = "port"; else ports = "ports"; snprintf(buf, sizeof(buf), "%s RNIC, %d %s", ai->desc, ai->nports, ports); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } static int upgrade_fw(adapter_t *sc) { char buf[32]; #ifdef FIRMWARE_LATEST const struct firmware *fw; #else struct firmware *fw; #endif int status; snprintf(&buf[0], sizeof(buf), "t3fw%d%d%d", FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO); fw = firmware_get(buf); if (fw == NULL) { device_printf(sc->dev, "Could not find firmware image %s\n", buf); return (ENOENT); } status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize); firmware_put(fw, FIRMWARE_UNLOAD); return (status); } static int cxgb_controller_attach(device_t dev) { driver_intr_t *cxgb_intr = NULL; device_t child; const struct adapter_info *ai; struct adapter *sc; int i, reg, msi_needed, error = 0; uint32_t vers; int port_qsets = 1; sc = device_get_softc(dev); sc->dev = dev; sc->msi_count = 0; /* find the PCIe link width and set max read request to 4KB*/ if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { uint16_t lnk, pectl; lnk = pci_read_config(dev, reg + 0x12, 2); sc->link_width = (lnk >> 4) & 0x3f; pectl = pci_read_config(dev, reg + 0x8, 2); pectl = (pectl & ~0x7000) | (5 << 12); pci_write_config(dev, reg + 0x8, pectl, 2); } if (sc->link_width != 0 && sc->link_width <= 4) { device_printf(sc->dev, "PCIe x%d Link, expect reduced performance\n", sc->link_width); } pci_enable_busmaster(dev); /* * Allocate the registers and make them available to the driver. * The registers that we care about for NIC mode are in BAR 0 */ sc->regs_rid = PCIR_BAR(0); if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->regs_rid, RF_ACTIVE)) == NULL) { device_printf(dev, "Cannot allocate BAR\n"); return (ENXIO); } mtx_init(&sc->sge.reg_lock, "SGE reg lock", NULL, MTX_DEF); mtx_init(&sc->lock, "cxgb controller lock", NULL, MTX_DEF); mtx_init(&sc->mdio_lock, "cxgb mdio", NULL, MTX_DEF); sc->bt = rman_get_bustag(sc->regs_res); sc->bh = rman_get_bushandle(sc->regs_res); sc->mmio_len = rman_get_size(sc->regs_res); ai = cxgb_get_adapter_info(dev); if (t3_prep_adapter(sc, ai, 1) < 0) { error = ENODEV; goto out; } /* Allocate the BAR for doing MSI-X. If it succeeds, try to allocate * enough messages for the queue sets. If that fails, try falling * back to MSI. If that fails, then try falling back to the legacy * interrupt pin model. */ #ifdef MSI_SUPPORTED sc->msix_regs_rid = 0x20; if ((msi_allowed >= 2) && (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->msix_regs_rid, RF_ACTIVE)) != NULL) { msi_needed = sc->msi_count = SGE_MSIX_COUNT; if (((error = pci_alloc_msix(dev, &sc->msi_count)) != 0) || (sc->msi_count != msi_needed)) { device_printf(dev, "msix allocation failed - msi_count = %d" " msi_needed=%d will try msi err=%d\n", sc->msi_count, msi_needed, error); sc->msi_count = 0; pci_release_msi(dev); bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_regs_rid, sc->msix_regs_res); sc->msix_regs_res = NULL; } else { sc->flags |= USING_MSIX; cxgb_intr = t3_intr_msix; } } if ((msi_allowed >= 1) && (sc->msi_count == 0)) { sc->msi_count = 1; if (pci_alloc_msi(dev, &sc->msi_count)) { device_printf(dev, "alloc msi failed - will try INTx\n"); sc->msi_count = 0; pci_release_msi(dev); } else { sc->flags |= USING_MSI; sc->irq_rid = 1; cxgb_intr = t3_intr_msi; } } #endif if (sc->msi_count == 0) { device_printf(dev, "using line interrupts\n"); sc->irq_rid = 0; cxgb_intr = t3b_intr; } /* Create a private taskqueue thread for handling driver events */ #ifdef TASKQUEUE_CURRENT sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->tq); #else sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->tq); #endif if (sc->tq == NULL) { device_printf(dev, "failed to allocate controller task queue\n"); goto out; } taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", device_get_nameunit(dev)); TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc); /* Create a periodic callout for checking adapter status */ callout_init_mtx(&sc->cxgb_tick_ch, &sc->lock, CALLOUT_RETURNUNLOCKED); if (t3_check_fw_version(sc) != 0) { /* * Warn user that a firmware update will be attempted in init. */ device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n", FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO); sc->flags &= ~FW_UPTODATE; } else { sc->flags |= FW_UPTODATE; } if ((sc->flags & USING_MSIX) && !singleq) port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus); /* * Create a child device for each MAC. The ethernet attachment * will be done in these children. */ for (i = 0; i < (sc)->params.nports; i++) { if ((child = device_add_child(dev, "cxgb", -1)) == NULL) { device_printf(dev, "failed to add child port\n"); error = EINVAL; goto out; } sc->portdev[i] = child; sc->port[i].adapter = sc; sc->port[i].nqsets = port_qsets; sc->port[i].first_qset = i*port_qsets; sc->port[i].port = i; device_set_softc(child, &sc->port[i]); } if ((error = bus_generic_attach(dev)) != 0) goto out; /* * XXX need to poll for link status */ sc->params.stats_update_period = 1; /* initialize sge private state */ t3_sge_init_sw(sc); t3_led_ready(sc); cxgb_offload_init(); if (is_offload(sc)) { setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT); cxgb_adapter_ofld(sc); } error = t3_get_fw_version(sc, &vers); if (error) goto out; snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d", G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers)); t3_add_sysctls(sc); out: if (error) cxgb_free(sc); return (error); } static int cxgb_controller_detach(device_t dev) { struct adapter *sc; sc = device_get_softc(dev); cxgb_free(sc); return (0); } static void cxgb_free(struct adapter *sc) { int i; cxgb_down(sc); #ifdef MSI_SUPPORTED if (sc->flags & (USING_MSI | USING_MSIX)) { device_printf(sc->dev, "releasing msi message(s)\n"); pci_release_msi(sc->dev); } else { device_printf(sc->dev, "no msi message to release\n"); } #endif if (sc->msix_regs_res != NULL) { bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid, sc->msix_regs_res); } /* * XXX need to drain the ifq by hand until * it is taught about mbuf iovecs */ callout_drain(&sc->cxgb_tick_ch); t3_sge_deinit_sw(sc); if (sc->tq != NULL) { taskqueue_drain(sc->tq, &sc->ext_intr_task); taskqueue_free(sc->tq); } for (i = 0; i < (sc)->params.nports; ++i) { if (sc->portdev[i] != NULL) device_delete_child(sc->dev, sc->portdev[i]); } bus_generic_detach(sc->dev); if (is_offload(sc)) { cxgb_adapter_unofld(sc); if (isset(&sc->open_device_map, OFFLOAD_DEVMAP_BIT)) offload_close(&sc->tdev); } t3_free_sge_resources(sc); t3_sge_free(sc); if (sc->regs_res != NULL) bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid, sc->regs_res); mtx_destroy(&sc->mdio_lock); mtx_destroy(&sc->sge.reg_lock); mtx_destroy(&sc->lock); return; } /** * setup_sge_qsets - configure SGE Tx/Rx/response queues * @sc: the controller softc * * Determines how many sets of SGE queues to use and initializes them. * We support multiple queue sets per port if we have MSI-X, otherwise * just one queue set per port. */ static int setup_sge_qsets(adapter_t *sc) { int i, j, err, irq_idx, qset_idx; u_int ntxq = SGE_TXQ_PER_SET; if ((err = t3_sge_alloc(sc)) != 0) { device_printf(sc->dev, "t3_sge_alloc returned %d\n", err); return (err); } if (sc->params.rev > 0 && !(sc->flags & USING_MSI)) irq_idx = -1; else irq_idx = 0; for (qset_idx = 0, i = 0; i < (sc)->params.nports; ++i) { struct port_info *pi = &sc->port[i]; for (j = 0; j < pi->nqsets; ++j, ++qset_idx) { err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports, (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx, &sc->params.sge.qset[qset_idx], ntxq, pi); if (err) { t3_free_sge_resources(sc); device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n", err); return (err); } } } return (0); } static int cxgb_setup_msix(adapter_t *sc, int msix_count) { int i, j, k, nqsets, rid; /* The first message indicates link changes and error conditions */ sc->irq_rid = 1; if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) { device_printf(sc->dev, "Cannot allocate msix interrupt\n"); return (EINVAL); } if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET, #ifdef INTR_FILTERS NULL, #endif cxgb_async_intr, sc, &sc->intr_tag)) { device_printf(sc->dev, "Cannot set up interrupt\n"); return (EINVAL); } for (i = 0, k = 0; i < (sc)->params.nports; ++i) { nqsets = sc->port[i].nqsets; for (j = 0; j < nqsets; ++j, k++) { struct sge_qset *qs = &sc->sge.qs[k]; rid = k + 2; if (cxgb_debug) printf("rid=%d ", rid); if ((sc->msix_irq_res[k] = bus_alloc_resource_any( sc->dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) { device_printf(sc->dev, "Cannot allocate " "interrupt for message %d\n", rid); return (EINVAL); } sc->msix_irq_rid[k] = rid; if (bus_setup_intr(sc->dev, sc->msix_irq_res[j], INTR_MPSAFE|INTR_TYPE_NET, #ifdef INTR_FILTERS NULL, #endif t3_intr_msix, qs, &sc->msix_intr_tag[k])) { device_printf(sc->dev, "Cannot set up " "interrupt for message %d\n", rid); return (EINVAL); } } } return (0); } static int cxgb_port_probe(device_t dev) { struct port_info *p; char buf[80]; p = device_get_softc(dev); snprintf(buf, sizeof(buf), "Port %d %s", p->port, p->port_type->desc); device_set_desc_copy(dev, buf); return (0); } static int cxgb_makedev(struct port_info *pi) { struct cdevsw *cxgb_cdevsw; if ((cxgb_cdevsw = malloc(sizeof(struct cdevsw), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) return (ENOMEM); cxgb_cdevsw->d_version = D_VERSION; cxgb_cdevsw->d_name = strdup(pi->ifp->if_xname, M_DEVBUF); cxgb_cdevsw->d_ioctl = cxgb_extension_ioctl; pi->port_cdev = make_dev(cxgb_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, pi->ifp->if_xname); if (pi->port_cdev == NULL) return (ENOMEM); pi->port_cdev->si_drv1 = (void *)pi; return (0); } #ifdef TSO_SUPPORTED #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU) /* Don't enable TSO6 yet */ #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU) #else #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU) /* Don't enable TSO6 yet */ #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU) #define IFCAP_TSO4 0x0 #define CSUM_TSO 0x0 #endif static int cxgb_port_attach(device_t dev) { struct port_info *p; struct ifnet *ifp; int media_flags; int err; char buf[64]; p = device_get_softc(dev); snprintf(buf, sizeof(buf), "cxgb port %d", p->port); mtx_init(&p->lock, buf, 0, MTX_DEF); /* Allocate an ifnet object and set it up */ ifp = p->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "Cannot allocate ifnet\n"); return (ENOMEM); } /* * Note that there is currently no watchdog timer. */ if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_init = cxgb_init; ifp->if_softc = p; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = cxgb_ioctl; ifp->if_start = cxgb_start; ifp->if_timer = 0; /* Disable ifnet watchdog */ ifp->if_watchdog = NULL; ifp->if_snd.ifq_drv_maxlen = TX_ETH_Q_SIZE; IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); IFQ_SET_READY(&ifp->if_snd); ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0; ifp->if_capabilities |= CXGB_CAP; ifp->if_capenable |= CXGB_CAP_ENABLE; ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO); ifp->if_baudrate = 100000000; ether_ifattach(ifp, p->hw_addr); #ifdef DEFAULT_JUMBO ifp->if_mtu = 9000; #endif if ((err = cxgb_makedev(p)) != 0) { printf("makedev failed %d\n", err); return (err); } ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change, cxgb_media_status); if (!strcmp(p->port_type->desc, "10GBASE-CX4")) media_flags = IFM_ETHER | IFM_10G_CX4; else if (!strcmp(p->port_type->desc, "10GBASE-SR")) media_flags = IFM_ETHER | IFM_10G_SR; else if (!strcmp(p->port_type->desc, "10GBASE-XR")) media_flags = IFM_ETHER | IFM_10G_LR; else { printf("unsupported media type %s\n", p->port_type->desc); return (ENXIO); } ifmedia_add(&p->media, media_flags, 0, NULL); ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&p->media, media_flags); snprintf(buf, sizeof(buf), "cxgb_port_taskq%d", p->port); #ifdef TASKQUEUE_CURRENT /* Create a port for handling TX without starvation */ p->tq = taskqueue_create(buf, M_NOWAIT, taskqueue_thread_enqueue, &p->tq); #else /* Create a port for handling TX without starvation */ p->tq = taskqueue_create_fast(buf, M_NOWAIT, taskqueue_thread_enqueue, &p->tq); #endif if (p->tq == NULL) { device_printf(dev, "failed to allocate port task queue\n"); return (ENOMEM); } taskqueue_start_threads(&p->tq, 1, PI_NET, "%s taskq", device_get_nameunit(dev)); TASK_INIT(&p->start_task, 0, cxgb_start_proc, ifp); return (0); } static int cxgb_port_detach(device_t dev) { struct port_info *p; p = device_get_softc(dev); PORT_LOCK(p); cxgb_stop_locked(p); PORT_UNLOCK(p); mtx_destroy(&p->lock); if (p->tq != NULL) { taskqueue_drain(p->tq, &p->start_task); taskqueue_free(p->tq); p->tq = NULL; } ether_ifdetach(p->ifp); if_free(p->ifp); destroy_dev(p->port_cdev); return (0); } void t3_fatal_err(struct adapter *sc) { u_int fw_status[4]; device_printf(sc->dev,"encountered fatal error, operation suspended\n"); if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status)) device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n", fw_status[0], fw_status[1], fw_status[2], fw_status[3]); } int t3_os_find_pci_capability(adapter_t *sc, int cap) { device_t dev; struct pci_devinfo *dinfo; pcicfgregs *cfg; uint32_t status; uint8_t ptr; dev = sc->dev; dinfo = device_get_ivars(dev); cfg = &dinfo->cfg; status = pci_read_config(dev, PCIR_STATUS, 2); if (!(status & PCIM_STATUS_CAPPRESENT)) return (0); switch (cfg->hdrtype & PCIM_HDRTYPE) { case 0: case 1: ptr = PCIR_CAP_PTR; break; case 2: ptr = PCIR_CAP_PTR_2; break; default: return (0); break; } ptr = pci_read_config(dev, ptr, 1); while (ptr != 0) { if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap) return (ptr); ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1); } return (0); } int t3_os_pci_save_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_save(dev, dinfo, 0); return (0); } int t3_os_pci_restore_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_restore(dev, dinfo); return (0); } /** * t3_os_link_changed - handle link status changes * @adapter: the adapter associated with the link change * @port_id: the port index whose limk status has changed * @link_stat: the new status of the link * @speed: the new speed setting * @duplex: the new duplex setting * @fc: the new flow-control setting * * This is the OS-dependent handler for link status changes. The OS * neutral handler takes care of most of the processing for these events, * then calls this handler for any OS-specific processing. */ void t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed, int duplex, int fc) { struct port_info *pi = &adapter->port[port_id]; struct cmac *mac = &adapter->port[port_id].mac; if ((pi->ifp->if_flags & IFF_UP) == 0) return; if (link_status) { t3_mac_enable(mac, MAC_DIRECTION_RX); if_link_state_change(pi->ifp, LINK_STATE_UP); } else { if_link_state_change(pi->ifp, LINK_STATE_DOWN); pi->phy.ops->power_down(&pi->phy, 1); t3_mac_disable(mac, MAC_DIRECTION_RX); t3_link_start(&pi->phy, mac, &pi->link_config); } } /* * Interrupt-context handler for external (PHY) interrupts. */ void t3_os_ext_intr_handler(adapter_t *sc) { if (cxgb_debug) printf("t3_os_ext_intr_handler\n"); /* * Schedule a task to handle external interrupts as they may be slow * and we use a mutex to protect MDIO registers. We disable PHY * interrupts in the meantime and let the task reenable them when * it's done. */ ADAPTER_LOCK(sc); if (sc->slow_intr_mask) { sc->slow_intr_mask &= ~F_T3DBG; t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask); taskqueue_enqueue(sc->tq, &sc->ext_intr_task); } ADAPTER_UNLOCK(sc); } void t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[]) { /* * The ifnet might not be allocated before this gets called, * as this is called early on in attach by t3_prep_adapter * save the address off in the port structure */ if (cxgb_debug) printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":"); bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN); } /** * link_start - enable a port * @p: the port to enable * * Performs the MAC and PHY actions needed to enable a port. */ static void cxgb_link_start(struct port_info *p) { struct ifnet *ifp; struct t3_rx_mode rm; struct cmac *mac = &p->mac; ifp = p->ifp; t3_init_rx_mode(&rm, p); t3_mac_reset(mac); t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN); t3_mac_set_address(mac, 0, p->hw_addr); t3_mac_set_rx_mode(mac, &rm); t3_link_start(&p->phy, mac, &p->link_config); t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX); } /** * setup_rss - configure Receive Side Steering (per-queue connection demux) * @adap: the adapter * * Sets up RSS to distribute packets to multiple receive queues. We * configure the RSS CPU lookup table to distribute to the number of HW * receive queues, and the response queue lookup table to narrow that * down to the response queues actually configured for each port. * We always configure the RSS mapping for two ports since the mapping * table has plenty of entries. */ static void setup_rss(adapter_t *adap) { int i; u_int nq0 = adap->port[0].nqsets; u_int nq1 = max((u_int)adap->port[1].nqsets, 1U); uint8_t cpus[SGE_QSETS + 1]; uint16_t rspq_map[RSS_TABLE_SIZE]; for (i = 0; i < SGE_QSETS; ++i) cpus[i] = i; cpus[SGE_QSETS] = 0xff; for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) { rspq_map[i] = i % nq0; rspq_map[i + RSS_TABLE_SIZE / 2] = (i % nq1) + nq0; } t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN | F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | V_RRCPLCPUSIZE(6), cpus, rspq_map); } /* * Sends an mbuf to an offload queue driver * after dealing with any active network taps. */ static inline int offload_tx(struct toedev *tdev, struct mbuf *m) { int ret; critical_enter(); ret = t3_offload_tx(tdev, m); critical_exit(); return ret; } static int write_smt_entry(struct adapter *adapter, int idx) { struct port_info *pi = &adapter->port[idx]; struct cpl_smt_write_req *req; struct mbuf *m; if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) return (ENOMEM); req = mtod(m, struct cpl_smt_write_req *); req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx)); req->mtu_idx = NMTUS - 1; /* should be 0 but there's a T3 bug */ req->iff = idx; memset(req->src_mac1, 0, sizeof(req->src_mac1)); memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN); m_set_priority(m, 1); offload_tx(&adapter->tdev, m); return (0); } static int init_smt(struct adapter *adapter) { int i; for_each_port(adapter, i) write_smt_entry(adapter, i); return 0; } static void init_port_mtus(adapter_t *adapter) { unsigned int mtus = adapter->port[0].ifp->if_mtu; if (adapter->port[1].ifp) mtus |= adapter->port[1].ifp->if_mtu << 16; t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus); } static void send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo, int hi, int port) { struct mbuf *m; struct mngt_pktsched_wr *req; m = m_gethdr(M_NOWAIT, MT_DATA); if (m) { req = mtod(m, struct mngt_pktsched_wr *); req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT)); req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET; req->sched = sched; req->idx = qidx; req->min = lo; req->max = hi; req->binding = port; m->m_len = m->m_pkthdr.len = sizeof(*req); t3_mgmt_tx(adap, m); } } static void bind_qsets(adapter_t *sc) { int i, j; for (i = 0; i < (sc)->params.nports; ++i) { const struct port_info *pi = adap2pinfo(sc, i); for (j = 0; j < pi->nqsets; ++j) send_pktsched_cmd(sc, 1, pi->first_qset + j, -1, -1, i); } } /** * cxgb_up - enable the adapter * @adap: adapter being enabled * * Called when the first port is enabled, this function performs the * actions necessary to make an adapter operational, such as completing * the initialization of HW modules, and enabling interrupts. * */ static int cxgb_up(struct adapter *sc) { int err = 0; if ((sc->flags & FULL_INIT_DONE) == 0) { if ((sc->flags & FW_UPTODATE) == 0) err = upgrade_fw(sc); if (err) goto out; err = t3_init_hw(sc, 0); if (err) goto out; t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12)); err = setup_sge_qsets(sc); if (err) goto out; setup_rss(sc); sc->flags |= FULL_INIT_DONE; } t3_intr_clear(sc); /* If it's MSI or INTx, allocate a single interrupt for everything */ if ((sc->flags & USING_MSIX) == 0) { if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) { device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n", sc->irq_rid); err = EINVAL; goto out; } device_printf(sc->dev, "allocated irq_res=%p\n", sc->irq_res); if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET, #ifdef INTR_FILTERS NULL, #endif sc->cxgb_intr, sc, &sc->intr_tag)) { device_printf(sc->dev, "Cannot set up interrupt\n"); err = EINVAL; goto irq_err; } } else { cxgb_setup_msix(sc, sc->msi_count); } t3_sge_start(sc); t3_intr_enable(sc); if ((sc->flags & (USING_MSIX | QUEUES_BOUND)) == USING_MSIX) bind_qsets(sc); sc->flags |= QUEUES_BOUND; out: return (err); irq_err: CH_ERR(sc, "request_irq failed, err %d\n", err); goto out; } /* * Release resources when all the ports and offloading have been stopped. */ static void cxgb_down(struct adapter *sc) { int i; t3_sge_stop(sc); t3_intr_disable(sc); for (i = 0; i < SGE_QSETS; i++) { if (sc->msix_intr_tag[i] != NULL) { bus_teardown_intr(sc->dev, sc->msix_irq_res[i], sc->msix_intr_tag[i]); sc->msix_intr_tag[i] = NULL; } if (sc->msix_irq_res[i] != NULL) { bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i], sc->msix_irq_res[i]); sc->msix_irq_res[i] = NULL; } } if (sc->intr_tag != NULL) { bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag); sc->intr_tag = NULL; } if (sc->irq_res != NULL) { device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n", sc->irq_rid, sc->irq_res); bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid, sc->irq_res); sc->irq_res = NULL; } callout_drain(&sc->sge_timer_ch); taskqueue_drain(sc->tq, &sc->slow_intr_task); taskqueue_drain(sc->tq, &sc->timer_reclaim_task); } static int offload_open(struct port_info *pi) { struct adapter *adapter = pi->adapter; struct toedev *tdev = TOEDEV(pi->ifp); int adap_up = adapter->open_device_map & PORT_MASK; int err = 0; if (atomic_cmpset_int(&adapter->open_device_map, (adapter->open_device_map & ~OFFLOAD_DEVMAP_BIT), (adapter->open_device_map | OFFLOAD_DEVMAP_BIT)) == 0) return (0); ADAPTER_LOCK(pi->adapter); if (!adap_up) err = cxgb_up(adapter); ADAPTER_UNLOCK(pi->adapter); if (err < 0) return (err); t3_tp_set_offload_mode(adapter, 1); tdev->lldev = adapter->port[0].ifp; err = cxgb_offload_activate(adapter); if (err) goto out; init_port_mtus(adapter); t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd, adapter->params.b_wnd, adapter->params.rev == 0 ? adapter->port[0].ifp->if_mtu : 0xffff); init_smt(adapter); /* Call back all registered clients */ cxgb_add_clients(tdev); out: /* restore them in case the offload module has changed them */ if (err) { t3_tp_set_offload_mode(adapter, 0); clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT); cxgb_set_dummy_ops(tdev); } return (err); } static int offload_close(struct toedev *tdev) { struct adapter *adapter = tdev2adap(tdev); if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT)) return 0; /* Call back all registered clients */ cxgb_remove_clients(tdev); tdev->lldev = NULL; cxgb_set_dummy_ops(tdev); t3_tp_set_offload_mode(adapter, 0); clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT); ADAPTER_LOCK(adapter); if (!adapter->open_device_map) cxgb_down(adapter); ADAPTER_UNLOCK(adapter); cxgb_offload_deactivate(adapter); return 0; } static void cxgb_init(void *arg) { struct port_info *p = arg; PORT_LOCK(p); cxgb_init_locked(p); PORT_UNLOCK(p); } static void cxgb_init_locked(struct port_info *p) { struct ifnet *ifp; adapter_t *sc = p->adapter; int err; mtx_assert(&p->lock, MA_OWNED); ifp = p->ifp; ADAPTER_LOCK(p->adapter); if ((sc->open_device_map == 0) && ((err = cxgb_up(sc)) < 0)) { ADAPTER_UNLOCK(p->adapter); cxgb_stop_locked(p); return; } if (p->adapter->open_device_map == 0) t3_intr_clear(sc); setbit(&p->adapter->open_device_map, p->port); ADAPTER_UNLOCK(p->adapter); if (is_offload(sc) && !ofld_disable) { err = offload_open(p); if (err) log(LOG_WARNING, "Could not initialize offload capabilities\n"); } cxgb_link_start(p); t3_port_intr_enable(sc, p->port); callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz, cxgb_tick, sc); PORT_LOCK(p); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; PORT_UNLOCK(p); } static void cxgb_set_rxmode(struct port_info *p) { struct t3_rx_mode rm; struct cmac *mac = &p->mac; mtx_assert(&p->lock, MA_OWNED); t3_init_rx_mode(&rm, p); t3_mac_set_rx_mode(mac, &rm); } static void cxgb_stop_locked(struct port_info *p) { struct ifnet *ifp; mtx_assert(&p->lock, MA_OWNED); mtx_assert(&p->adapter->lock, MA_NOTOWNED); ifp = p->ifp; t3_port_intr_disable(p->adapter, p->port); PORT_LOCK(p); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); PORT_UNLOCK(p); p->phy.ops->power_down(&p->phy, 1); t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX); ADAPTER_LOCK(p->adapter); clrbit(&p->adapter->open_device_map, p->port); /* * XXX cancel check_task */ if (p->adapter->open_device_map == 0) cxgb_down(p->adapter); ADAPTER_UNLOCK(p->adapter); } static int cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data) { struct port_info *p = ifp->if_softc; struct ifaddr *ifa = (struct ifaddr *)data; struct ifreq *ifr = (struct ifreq *)data; int flags, error = 0; uint32_t mask; /* * XXX need to check that we aren't in the middle of an unload */ switch (command) { case SIOCSIFMTU: if ((ifr->ifr_mtu < ETHERMIN) || (ifr->ifr_mtu > ETHER_MAX_LEN_JUMBO)) error = EINVAL; else if (ifp->if_mtu != ifr->ifr_mtu) { PORT_LOCK(p); ifp->if_mtu = ifr->ifr_mtu; t3_mac_set_mtu(&p->mac, ifp->if_mtu + ETHER_HDR_LEN); PORT_UNLOCK(p); } break; case SIOCSIFADDR: case SIOCGIFADDR: if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { cxgb_init(p); } arp_ifinit(ifp, ifa); } else error = ether_ioctl(ifp, command, data); break; case SIOCSIFFLAGS: if (ifp->if_flags & IFF_UP) { PORT_LOCK(p); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { flags = p->if_flags; if (((ifp->if_flags ^ flags) & IFF_PROMISC) || ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) cxgb_set_rxmode(p); } else cxgb_init_locked(p); p->if_flags = ifp->if_flags; PORT_UNLOCK(p); } else { callout_drain(&p->adapter->cxgb_tick_ch); PORT_LOCK(p); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { cxgb_stop_locked(p); } else { adapter_t *sc = p->adapter; callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz, cxgb_tick, sc); } PORT_UNLOCK(p); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &p->media, command); break; case SIOCSIFCAP: PORT_LOCK(p); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { if (IFCAP_TXCSUM & ifp->if_capenable) { ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP | CSUM_TSO); } else { ifp->if_capenable |= IFCAP_TXCSUM; ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); } } else if (mask & IFCAP_RXCSUM) { if (IFCAP_RXCSUM & ifp->if_capenable) { ifp->if_capenable &= ~IFCAP_RXCSUM; } else { ifp->if_capenable |= IFCAP_RXCSUM; } } if (mask & IFCAP_TSO4) { if (IFCAP_TSO4 & ifp->if_capenable) { ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_TSO; } else if (IFCAP_TXCSUM & ifp->if_capenable) { ifp->if_capenable |= IFCAP_TSO4; ifp->if_hwassist |= CSUM_TSO; } else { if (cxgb_debug) printf("cxgb requires tx checksum offload" " be enabled to use TSO\n"); error = EINVAL; } } PORT_UNLOCK(p); break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static int cxgb_start_tx(struct ifnet *ifp, uint32_t txmax) { struct sge_qset *qs; struct sge_txq *txq; struct port_info *p = ifp->if_softc; struct mbuf *m0, *m = NULL; int err, in_use_init; if (!p->link_config.link_ok) return (ENXIO); if (IFQ_DRV_IS_EMPTY(&ifp->if_snd)) return (ENOBUFS); qs = &p->adapter->sge.qs[p->first_qset]; txq = &qs->txq[TXQ_ETH]; err = 0; mtx_lock(&txq->lock); in_use_init = txq->in_use; while ((txq->in_use - in_use_init < txmax) && (txq->size > txq->in_use + TX_MAX_DESC)) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (m == NULL) break; /* * Convert chain to M_IOVEC */ KASSERT((m->m_flags & M_IOVEC) == 0, ("IOVEC set too early")); m0 = m; #ifdef INVARIANTS /* * Clean up after net stack sloppiness * before calling m_sanity */ m0 = m->m_next; while (m0) { m0->m_flags &= ~M_PKTHDR; m0 = m0->m_next; } m_sanity(m0, 0); m0 = m; #endif if (collapse_mbufs && m->m_pkthdr.len > MCLBYTES && m_collapse(m, TX_MAX_SEGS, &m0) == EFBIG) { if ((m0 = m_defrag(m, M_NOWAIT)) != NULL) { m = m0; m_collapse(m, TX_MAX_SEGS, &m0); } else break; } m = m0; if ((err = t3_encap(p, &m)) != 0) break; BPF_MTAP(ifp, m); } mtx_unlock(&txq->lock); if (__predict_false(err)) { if (cxgb_debug) printf("would set OFLAGS\n"); if (err == ENOMEM) { IFQ_LOCK(&ifp->if_snd); IFQ_DRV_PREPEND(&ifp->if_snd, m); IFQ_UNLOCK(&ifp->if_snd); } } if (err == 0 && m == NULL) err = ENOBUFS; + if ((err == 0) && (txq->size <= txq->in_use + TX_MAX_DESC) && + (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + err = ENOSPC; + } return (err); } static void cxgb_start_proc(void *arg, int ncount) { struct ifnet *ifp = arg; struct port_info *pi = ifp->if_softc; struct sge_qset *qs; struct sge_txq *txq; int error = 0; qs = &pi->adapter->sge.qs[pi->first_qset]; txq = &qs->txq[TXQ_ETH]; while (error == 0) { if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC) taskqueue_enqueue(pi->adapter->tq, &pi->adapter->timer_reclaim_task); error = cxgb_start_tx(ifp, TX_START_MAX_DESC); } } static void cxgb_start(struct ifnet *ifp) { struct port_info *pi = ifp->if_softc; struct sge_qset *qs; struct sge_txq *txq; int err; qs = &pi->adapter->sge.qs[pi->first_qset]; txq = &qs->txq[TXQ_ETH]; if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC) taskqueue_enqueue(pi->adapter->tq, &pi->adapter->timer_reclaim_task); err = cxgb_start_tx(ifp, TX_START_MAX_DESC); if (err == 0) taskqueue_enqueue(pi->tq, &pi->start_task); } static int cxgb_media_change(struct ifnet *ifp) { if_printf(ifp, "media change not supported\n"); return (ENXIO); } static void cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct port_info *p = ifp->if_softc; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!p->link_config.link_ok) return; ifmr->ifm_status |= IFM_ACTIVE; if (p->link_config.duplex) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } static void cxgb_async_intr(void *data) { adapter_t *sc = data; if (cxgb_debug) device_printf(sc->dev, "cxgb_async_intr\n"); t3_slow_intr_handler(sc); } static void cxgb_ext_intr_handler(void *arg, int count) { adapter_t *sc = (adapter_t *)arg; if (cxgb_debug) printf("cxgb_ext_intr_handler\n"); t3_phy_intr_handler(sc); /* Now reenable external interrupts */ ADAPTER_LOCK(sc); if (sc->slow_intr_mask) { sc->slow_intr_mask |= F_T3DBG; t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG); t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask); } ADAPTER_UNLOCK(sc); } static void check_link_status(adapter_t *sc) { int i; for (i = 0; i < (sc)->params.nports; ++i) { struct port_info *p = &sc->port[i]; if (!(p->port_type->caps & SUPPORTED_IRQ)) t3_link_changed(sc, i); } } static void check_t3b2_mac(struct adapter *adapter) { int i; for_each_port(adapter, i) { struct port_info *p = &adapter->port[i]; struct ifnet *ifp = p->ifp; int status; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) continue; status = 0; PORT_LOCK(p); if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) status = t3b2_mac_watchdog_task(&p->mac); if (status == 1) p->mac.stats.num_toggled++; else if (status == 2) { struct cmac *mac = &p->mac; t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN); t3_mac_set_address(mac, 0, p->hw_addr); cxgb_set_rxmode(p); t3_link_start(&p->phy, mac, &p->link_config); t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX); t3_port_intr_enable(adapter, p->port); p->mac.stats.num_resets++; } PORT_UNLOCK(p); } } static void cxgb_tick(void *arg) { adapter_t *sc = (adapter_t *)arg; const struct adapter_params *p = &sc->params; if (p->linkpoll_period) check_link_status(sc); callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz, cxgb_tick, sc); /* * adapter lock can currently only be acquire after the * port lock */ ADAPTER_UNLOCK(sc); if (p->rev == T3_REV_B2) check_t3b2_mac(sc); } static int in_range(int val, int lo, int hi) { return val < 0 || (val <= hi && val >= lo); } static int cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, struct thread *td) { int mmd, error = 0; struct port_info *pi = dev->si_drv1; adapter_t *sc = pi->adapter; #ifdef PRIV_SUPPORTED if (priv_check(td, PRIV_DRIVER)) { if (cxgb_debug) printf("user does not have access to privileged ioctls\n"); return (EPERM); } #else if (suser(td)) { if (cxgb_debug) printf("user does not have access to privileged ioctls\n"); return (EPERM); } #endif switch (cmd) { case SIOCGMIIREG: { uint32_t val; struct cphy *phy = &pi->phy; struct mii_data *mid = (struct mii_data *)data; if (!phy->mdio_read) return (EOPNOTSUPP); if (is_10G(sc)) { mmd = mid->phy_id >> 8; if (!mmd) mmd = MDIO_DEV_PCS; else if (mmd > MDIO_DEV_XGXS) return -EINVAL; error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd, mid->reg_num, &val); } else error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0, mid->reg_num & 0x1f, &val); if (error == 0) mid->val_out = val; break; } case SIOCSMIIREG: { struct cphy *phy = &pi->phy; struct mii_data *mid = (struct mii_data *)data; if (!phy->mdio_write) return (EOPNOTSUPP); if (is_10G(sc)) { mmd = mid->phy_id >> 8; if (!mmd) mmd = MDIO_DEV_PCS; else if (mmd > MDIO_DEV_XGXS) return (EINVAL); error = phy->mdio_write(sc, mid->phy_id & 0x1f, mmd, mid->reg_num, mid->val_in); } else error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0, mid->reg_num & 0x1f, mid->val_in); break; } case CHELSIO_SETREG: { struct ch_reg *edata = (struct ch_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); t3_write_reg(sc, edata->addr, edata->val); break; } case CHELSIO_GETREG: { struct ch_reg *edata = (struct ch_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); edata->val = t3_read_reg(sc, edata->addr); break; } case CHELSIO_GET_SGE_CONTEXT: { struct ch_cntxt *ecntxt = (struct ch_cntxt *)data; mtx_lock(&sc->sge.reg_lock); switch (ecntxt->cntxt_type) { case CNTXT_TYPE_EGRESS: error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id, ecntxt->data); break; case CNTXT_TYPE_FL: error = t3_sge_read_fl(sc, ecntxt->cntxt_id, ecntxt->data); break; case CNTXT_TYPE_RSP: error = t3_sge_read_rspq(sc, ecntxt->cntxt_id, ecntxt->data); break; case CNTXT_TYPE_CQ: error = t3_sge_read_cq(sc, ecntxt->cntxt_id, ecntxt->data); break; default: error = EINVAL; break; } mtx_unlock(&sc->sge.reg_lock); break; } case CHELSIO_GET_SGE_DESC: { struct ch_desc *edesc = (struct ch_desc *)data; int ret; if (edesc->queue_num >= SGE_QSETS * 6) return (EINVAL); ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6], edesc->queue_num % 6, edesc->idx, edesc->data); if (ret < 0) return (EINVAL); edesc->size = ret; break; } case CHELSIO_SET_QSET_PARAMS: { struct qset_params *q; struct ch_qset_params *t = (struct ch_qset_params *)data; if (t->qset_idx >= SGE_QSETS) return -EINVAL; if (!in_range(t->intr_lat, 0, M_NEWTIMER) || !in_range(t->cong_thres, 0, 255) || !in_range(t->txq_size[0], MIN_TXQ_ENTRIES, MAX_TXQ_ENTRIES) || !in_range(t->txq_size[1], MIN_TXQ_ENTRIES, MAX_TXQ_ENTRIES) || !in_range(t->txq_size[2], MIN_CTRL_TXQ_ENTRIES, MAX_CTRL_TXQ_ENTRIES) || !in_range(t->fl_size[0], MIN_FL_ENTRIES, MAX_RX_BUFFERS) || !in_range(t->fl_size[1], MIN_FL_ENTRIES, MAX_RX_JUMBO_BUFFERS) || !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES)) return -EINVAL; if ((sc->flags & FULL_INIT_DONE) && (t->rspq_size >= 0 || t->fl_size[0] >= 0 || t->fl_size[1] >= 0 || t->txq_size[0] >= 0 || t->txq_size[1] >= 0 || t->txq_size[2] >= 0 || t->polling >= 0 || t->cong_thres >= 0)) return -EBUSY; q = &sc->params.sge.qset[t->qset_idx]; if (t->rspq_size >= 0) q->rspq_size = t->rspq_size; if (t->fl_size[0] >= 0) q->fl_size = t->fl_size[0]; if (t->fl_size[1] >= 0) q->jumbo_size = t->fl_size[1]; if (t->txq_size[0] >= 0) q->txq_size[0] = t->txq_size[0]; if (t->txq_size[1] >= 0) q->txq_size[1] = t->txq_size[1]; if (t->txq_size[2] >= 0) q->txq_size[2] = t->txq_size[2]; if (t->cong_thres >= 0) q->cong_thres = t->cong_thres; if (t->intr_lat >= 0) { struct sge_qset *qs = &sc->sge.qs[t->qset_idx]; q->coalesce_nsecs = t->intr_lat*1000; t3_update_qset_coalesce(qs, q); } break; } case CHELSIO_GET_QSET_PARAMS: { struct qset_params *q; struct ch_qset_params *t = (struct ch_qset_params *)data; if (t->qset_idx >= SGE_QSETS) return (EINVAL); q = &(sc)->params.sge.qset[t->qset_idx]; t->rspq_size = q->rspq_size; t->txq_size[0] = q->txq_size[0]; t->txq_size[1] = q->txq_size[1]; t->txq_size[2] = q->txq_size[2]; t->fl_size[0] = q->fl_size; t->fl_size[1] = q->jumbo_size; t->polling = q->polling; t->intr_lat = q->coalesce_nsecs / 1000; t->cong_thres = q->cong_thres; break; } case CHELSIO_SET_QSET_NUM: { struct ch_reg *edata = (struct ch_reg *)data; unsigned int port_idx = pi->port; if (sc->flags & FULL_INIT_DONE) return (EBUSY); if (edata->val < 1 || (edata->val > 1 && !(sc->flags & USING_MSIX))) return (EINVAL); if (edata->val + sc->port[!port_idx].nqsets > SGE_QSETS) return (EINVAL); sc->port[port_idx].nqsets = edata->val; sc->port[0].first_qset = 0; /* * XXX hardcode ourselves to 2 ports just like LEEENUX */ sc->port[1].first_qset = sc->port[0].nqsets; break; } case CHELSIO_GET_QSET_NUM: { struct ch_reg *edata = (struct ch_reg *)data; edata->val = pi->nqsets; break; } #ifdef notyet case CHELSIO_LOAD_FW: case CHELSIO_GET_PM: case CHELSIO_SET_PM: return (EOPNOTSUPP); break; #endif case CHELSIO_SETMTUTAB: { struct ch_mtus *m = (struct ch_mtus *)data; int i; if (!is_offload(sc)) return (EOPNOTSUPP); if (offload_running(sc)) return (EBUSY); if (m->nmtus != NMTUS) return (EINVAL); if (m->mtus[0] < 81) /* accommodate SACK */ return (EINVAL); /* * MTUs must be in ascending order */ for (i = 1; i < NMTUS; ++i) if (m->mtus[i] < m->mtus[i - 1]) return (EINVAL); memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus)); break; } case CHELSIO_GETMTUTAB: { struct ch_mtus *m = (struct ch_mtus *)data; if (!is_offload(sc)) return (EOPNOTSUPP); memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus)); m->nmtus = NMTUS; break; } case CHELSIO_DEVUP: if (!is_offload(sc)) return (EOPNOTSUPP); return offload_open(pi); break; case CHELSIO_GET_MEM: { struct ch_mem_range *t = (struct ch_mem_range *)data; struct mc7 *mem; uint8_t *useraddr; u64 buf[32]; if (!is_offload(sc)) return (EOPNOTSUPP); if (!(sc->flags & FULL_INIT_DONE)) return (EIO); /* need the memory controllers */ if ((t->addr & 0x7) || (t->len & 0x7)) return (EINVAL); if (t->mem_id == MEM_CM) mem = &sc->cm; else if (t->mem_id == MEM_PMRX) mem = &sc->pmrx; else if (t->mem_id == MEM_PMTX) mem = &sc->pmtx; else return (EINVAL); /* * Version scheme: * bits 0..9: chip version * bits 10..15: chip revision */ t->version = 3 | (sc->params.rev << 10); /* * Read 256 bytes at a time as len can be large and we don't * want to use huge intermediate buffers. */ useraddr = (uint8_t *)(t + 1); /* advance to start of buffer */ while (t->len) { unsigned int chunk = min(t->len, sizeof(buf)); error = t3_mc7_bd_read(mem, t->addr / 8, chunk / 8, buf); if (error) return (-error); if (copyout(buf, useraddr, chunk)) return (EFAULT); useraddr += chunk; t->addr += chunk; t->len -= chunk; } break; } case CHELSIO_READ_TCAM_WORD: { struct ch_tcam_word *t = (struct ch_tcam_word *)data; if (!is_offload(sc)) return (EOPNOTSUPP); return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf); break; } case CHELSIO_SET_TRACE_FILTER: { struct ch_trace *t = (struct ch_trace *)data; const struct trace_params *tp; tp = (const struct trace_params *)&t->sip; if (t->config_tx) t3_config_trace_filter(sc, tp, 0, t->invert_match, t->trace_tx); if (t->config_rx) t3_config_trace_filter(sc, tp, 1, t->invert_match, t->trace_rx); break; } case CHELSIO_SET_PKTSCHED: { struct ch_pktsched_params *p = (struct ch_pktsched_params *)data; if (sc->open_device_map == 0) return (EAGAIN); send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max, p->binding); break; } case CHELSIO_IFCONF_GETREGS: { struct ifconf_regs *regs = (struct ifconf_regs *)data; int reglen = cxgb_get_regs_len(); uint8_t *buf = malloc(REGDUMP_SIZE, M_DEVBUF, M_NOWAIT); if (buf == NULL) { return (ENOMEM); } if (regs->len > reglen) regs->len = reglen; else if (regs->len < reglen) { error = E2BIG; goto done; } cxgb_get_regs(sc, regs, buf); error = copyout(buf, regs->data, reglen); done: free(buf, M_DEVBUF); break; } case CHELSIO_SET_HW_SCHED: { struct ch_hw_sched *t = (struct ch_hw_sched *)data; unsigned int ticks_per_usec = core_ticks_per_usec(sc); if ((sc->flags & FULL_INIT_DONE) == 0) return (EAGAIN); /* need TP to be initialized */ if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) || !in_range(t->channel, 0, 1) || !in_range(t->kbps, 0, 10000000) || !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) || !in_range(t->flow_ipg, 0, dack_ticks_to_usec(sc, 0x7ff))) return (EINVAL); if (t->kbps >= 0) { error = t3_config_sched(sc, t->kbps, t->sched); if (error < 0) return (-error); } if (t->class_ipg >= 0) t3_set_sched_ipg(sc, t->sched, t->class_ipg); if (t->flow_ipg >= 0) { t->flow_ipg *= 1000; /* us -> ns */ t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1); } if (t->mode >= 0) { int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched); t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP, bit, t->mode ? bit : 0); } if (t->channel >= 0) t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP, 1 << t->sched, t->channel << t->sched); break; } default: return (EOPNOTSUPP); break; } return (error); } static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start, unsigned int end) { uint32_t *p = (uint32_t *)buf + start; for ( ; start <= end; start += sizeof(uint32_t)) *p++ = t3_read_reg(ap, start); } #define T3_REGMAP_SIZE (3 * 1024) static int cxgb_get_regs_len(void) { return T3_REGMAP_SIZE; } #undef T3_REGMAP_SIZE static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf) { /* * Version scheme: * bits 0..9: chip version * bits 10..15: chip revision * bit 31: set for PCIe cards */ regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31); /* * We skip the MAC statistics registers because they are clear-on-read. * Also reading multi-register stats would need to synchronize with the * periodic mac stats accumulation. Hard to justify the complexity. */ memset(buf, 0, REGDUMP_SIZE); reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN); reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT); reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE); reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA); reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3); reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0, XGM_REG(A_XGM_SERDES_STAT3, 1)); reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1), XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1)); } Index: head/sys/dev/cxgb/cxgb_sge.c =================================================================== --- head/sys/dev/cxgb/cxgb_sge.c (revision 170006) +++ head/sys/dev/cxgb/cxgb_sge.c (revision 170007) @@ -1,2665 +1,2670 @@ /************************************************************************** Copyright (c) 2007, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include uint32_t collapse_free = 0; uint32_t mb_free_vec_free = 0; int collapse_mbufs = 0; static int recycle_enable = 1; /* * XXX GC */ #define NET_XMIT_CN 2 #define NET_XMIT_SUCCESS 0 #define USE_GTS 0 #define SGE_RX_SM_BUF_SIZE 1536 #define SGE_RX_DROP_THRES 16 #define SGE_RX_COPY_THRES 128 /* * Period of the Tx buffer reclaim timer. This timer does not need to run * frequently as Tx buffers are usually reclaimed by new Tx packets. */ #define TX_RECLAIM_PERIOD (hz >> 2) /* * work request size in bytes */ #define WR_LEN (WR_FLITS * 8) /* * Values for sge_txq.flags */ enum { TXQ_RUNNING = 1 << 0, /* fetch engine is running */ TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ }; struct tx_desc { uint64_t flit[TX_DESC_FLITS]; } __packed; struct rx_desc { uint32_t addr_lo; uint32_t len_gen; uint32_t gen2; uint32_t addr_hi; } __packed;; struct rsp_desc { /* response queue descriptor */ struct rss_header rss_hdr; uint32_t flags; uint32_t len_cq; uint8_t imm_data[47]; uint8_t intr_gen; } __packed; #define RX_SW_DESC_MAP_CREATED (1 << 0) #define TX_SW_DESC_MAP_CREATED (1 << 1) #define RX_SW_DESC_INUSE (1 << 3) #define TX_SW_DESC_MAPPED (1 << 4) #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) struct tx_sw_desc { /* SW state per Tx descriptor */ struct mbuf *m; bus_dmamap_t map; int flags; }; struct rx_sw_desc { /* SW state per Rx descriptor */ void *cl; bus_dmamap_t map; int flags; }; struct txq_state { unsigned int compl; unsigned int gen; unsigned int pidx; }; struct refill_fl_cb_arg { int error; bus_dma_segment_t seg; int nseg; }; /* * Maps a number of flits to the number of Tx descriptors that can hold them. * The formula is * * desc = 1 + (flits - 2) / (WR_FLITS - 1). * * HW allows up to 4 descriptors to be combined into a WR. */ static uint8_t flit_desc_map[] = { 0, #if SGE_NUM_GENBITS == 1 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 #elif SGE_NUM_GENBITS == 2 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, #else # error "SGE_NUM_GENBITS must be 1 or 2" #endif }; static int lro_default = 0; int cxgb_debug = 0; static void t3_free_qset(adapter_t *sc, struct sge_qset *q); static void sge_timer_cb(void *arg); static void sge_timer_reclaim(void *arg, int ncount); static int free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec); /** * reclaim_completed_tx - reclaims completed Tx descriptors * @adapter: the adapter * @q: the Tx queue to reclaim completed descriptors from * * Reclaims Tx descriptors that the SGE has indicated it has processed, * and frees the associated buffers if possible. Called with the Tx * queue's lock held. */ static __inline int reclaim_completed_tx(adapter_t *adapter, struct sge_txq *q, int nbufs, struct mbuf **mvec) { int reclaimed, reclaim = desc_reclaimable(q); int n = 0; mtx_assert(&q->lock, MA_OWNED); if (reclaim > 0) { n = free_tx_desc(adapter, q, min(reclaim, nbufs), mvec); reclaimed = min(reclaim, nbufs); q->cleaned += reclaimed; q->in_use -= reclaimed; } return (n); } /** * should_restart_tx - are there enough resources to restart a Tx queue? * @q: the Tx queue * * Checks if there are enough descriptors to restart a suspended Tx queue. */ static __inline int should_restart_tx(const struct sge_txq *q) { unsigned int r = q->processed - q->cleaned; return q->in_use - r < (q->size >> 1); } /** * t3_sge_init - initialize SGE * @adap: the adapter * @p: the SGE parameters * * Performs SGE initialization needed every time after a chip reset. * We do not initialize any of the queue sets here, instead the driver * top-level must request those individually. We also do not enable DMA * here, that should be done after the queues have been set up. */ void t3_sge_init(adapter_t *adap, struct sge_params *p) { u_int ctrl, ups; ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | F_CQCRDTCTRL | V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; #if SGE_NUM_GENBITS == 1 ctrl |= F_EGRGENCTRL; #endif if (adap->params.rev > 0) { if (!(adap->flags & (USING_MSIX | USING_MSI))) ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL; } t3_write_reg(adap, A_SG_CONTROL, ctrl); t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | V_LORCQDRBTHRSH(512)); t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | V_TIMEOUT(200 * core_ticks_per_usec(adap))); t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000); t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); } /** * sgl_len - calculates the size of an SGL of the given capacity * @n: the number of SGL entries * * Calculates the number of flits needed for a scatter/gather list that * can hold the given number of entries. */ static __inline unsigned int sgl_len(unsigned int n) { return ((3 * n) / 2 + (n & 1)); } /** * get_imm_packet - return the next ingress packet buffer from a response * @resp: the response descriptor containing the packet data * * Return a packet containing the immediate data of the given response. */ static __inline void get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl) { int len; uint32_t flags = ntohl(resp->flags); uint8_t sopeop = G_RSPD_SOP_EOP(flags); /* * would be a firmware bug */ if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) return; len = G_RSPD_LEN(ntohl(resp->len_cq)); switch (sopeop) { case RSPQ_SOP_EOP: m->m_len = m->m_pkthdr.len = len; memcpy(m->m_data, resp->imm_data, len); break; case RSPQ_EOP: memcpy(cl, resp->imm_data, len); m_iovappend(m, cl, MSIZE, len, 0); break; } } static __inline u_int flits_to_desc(u_int n) { return (flit_desc_map[n]); } void t3_sge_err_intr_handler(adapter_t *adapter) { unsigned int v, status; status = t3_read_reg(adapter, A_SG_INT_CAUSE); if (status & F_RSPQCREDITOVERFOW) CH_ALERT(adapter, "SGE response queue credit overflow\n"); if (status & F_RSPQDISABLED) { v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); CH_ALERT(adapter, "packet delivered to disabled response queue (0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff); } t3_write_reg(adapter, A_SG_INT_CAUSE, status); if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED)) t3_fatal_err(adapter); } void t3_sge_prep(adapter_t *adap, struct sge_params *p) { int i; /* XXX Does ETHER_ALIGN need to be accounted for here? */ p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data); for (i = 0; i < SGE_QSETS; ++i) { struct qset_params *q = p->qset + i; q->polling = adap->params.rev > 0; if (adap->flags & USING_MSIX) q->coalesce_nsecs = 6000; else q->coalesce_nsecs = 3500; q->rspq_size = RSPQ_Q_SIZE; q->fl_size = FL_Q_SIZE; q->jumbo_size = JUMBO_Q_SIZE; q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; q->txq_size[TXQ_OFLD] = 1024; q->txq_size[TXQ_CTRL] = 256; q->cong_thres = 0; } } int t3_sge_alloc(adapter_t *sc) { /* The parent tag. */ if (bus_dma_tag_create( NULL, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ BUS_SPACE_UNRESTRICTED, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 0, /* flags */ NULL, NULL, /* lock, lockarg */ &sc->parent_dmat)) { device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); return (ENOMEM); } /* * DMA tag for normal sized RX frames */ if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); return (ENOMEM); } /* * DMA tag for jumbo sized RX frames. */ if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); return (ENOMEM); } /* * DMA tag for TX frames. */ if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, TX_MAX_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->tx_dmat)) { device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); return (ENOMEM); } return (0); } int t3_sge_free(struct adapter * sc) { if (sc->tx_dmat != NULL) bus_dma_tag_destroy(sc->tx_dmat); if (sc->rx_jumbo_dmat != NULL) bus_dma_tag_destroy(sc->rx_jumbo_dmat); if (sc->rx_dmat != NULL) bus_dma_tag_destroy(sc->rx_dmat); if (sc->parent_dmat != NULL) bus_dma_tag_destroy(sc->parent_dmat); return (0); } void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) { qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U); qs->rspq.polling = 0 /* p->polling */; } static void refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct refill_fl_cb_arg *cb_arg = arg; cb_arg->error = error; cb_arg->seg = segs[0]; cb_arg->nseg = nseg; } /** * refill_fl - refill an SGE free-buffer list * @sc: the controller softc * @q: the free-list to refill * @n: the number of new buffers to allocate * * (Re)populate an SGE free-buffer list with up to @n new packet buffers. * The caller must assure that @n does not exceed the queue's capacity. */ static void refill_fl(adapter_t *sc, struct sge_fl *q, int n) { struct rx_sw_desc *sd = &q->sdesc[q->pidx]; struct rx_desc *d = &q->desc[q->pidx]; struct refill_fl_cb_arg cb_arg; void *cl; int err; cb_arg.error = 0; while (n--) { /* * We only allocate a cluster, mbuf allocation happens after rx */ if ((cl = m_cljget(NULL, M_DONTWAIT, q->buf_size)) == NULL) { log(LOG_WARNING, "Failed to allocate cluster\n"); goto done; } if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); uma_zfree(q->zone, cl); goto done; } sd->flags |= RX_SW_DESC_MAP_CREATED; } err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, refill_fl_cb, &cb_arg, 0); if (err != 0 || cb_arg.error) { log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); /* * XXX free cluster */ return; } sd->flags |= RX_SW_DESC_INUSE; sd->cl = cl; d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); d->len_gen = htobe32(V_FLD_GEN1(q->gen)); d->gen2 = htobe32(V_FLD_GEN2(q->gen)); d++; sd++; if (++q->pidx == q->size) { q->pidx = 0; q->gen ^= 1; sd = q->sdesc; d = q->desc; } q->credits++; } done: t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); } /** * free_rx_bufs - free the Rx buffers on an SGE free list * @sc: the controle softc * @q: the SGE free list to clean up * * Release the buffers on an SGE free-buffer Rx queue. HW fetching from * this queue should be stopped before calling this function. */ static void free_rx_bufs(adapter_t *sc, struct sge_fl *q) { u_int cidx = q->cidx; while (q->credits--) { struct rx_sw_desc *d = &q->sdesc[cidx]; if (d->flags & RX_SW_DESC_INUSE) { bus_dmamap_unload(q->entry_tag, d->map); bus_dmamap_destroy(q->entry_tag, d->map); uma_zfree(q->zone, d->cl); } d->cl = NULL; if (++cidx == q->size) cidx = 0; } } static __inline void __refill_fl(adapter_t *adap, struct sge_fl *fl) { refill_fl(adap, fl, min(16U, fl->size - fl->credits)); } /** * recycle_rx_buf - recycle a receive buffer * @adapter: the adapter * @q: the SGE free list * @idx: index of buffer to recycle * * Recycles the specified buffer on the given free list by adding it at * the next available slot on the list. */ static void recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) { struct rx_desc *from = &q->desc[idx]; struct rx_desc *to = &q->desc[q->pidx]; q->sdesc[q->pidx] = q->sdesc[idx]; to->addr_lo = from->addr_lo; // already big endian to->addr_hi = from->addr_hi; // likewise wmb(); to->len_gen = htobe32(V_FLD_GEN1(q->gen)); to->gen2 = htobe32(V_FLD_GEN2(q->gen)); q->credits++; if (++q->pidx == q->size) { q->pidx = 0; q->gen ^= 1; } t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); } static void alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { uint32_t *addr; addr = arg; *addr = segs[0].ds_addr; } static int alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) { size_t len = nelem * elem_size; void *s = NULL; void *p = NULL; int err; if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag)) != 0) { device_printf(sc->dev, "Cannot allocate descriptor tag\n"); return (ENOMEM); } if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, map)) != 0) { device_printf(sc->dev, "Cannot allocate descriptor memory\n"); return (ENOMEM); } bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); bzero(p, len); *(void **)desc = p; if (sw_size) { len = nelem * sw_size; s = malloc(len, M_DEVBUF, M_WAITOK); bzero(s, len); *(void **)sdesc = s; } if (parent_entry_tag == NULL) return (0); if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, TX_MAX_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL, entry_tag)) != 0) { device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); return (ENOMEM); } return (0); } static void sge_slow_intr_handler(void *arg, int ncount) { adapter_t *sc = arg; t3_slow_intr_handler(sc); } static void sge_timer_cb(void *arg) { adapter_t *sc = arg; struct sge_qset *qs; struct sge_txq *txq; int i, j; int reclaim_eth, reclaim_ofl, refill_rx; for (i = 0; i < sc->params.nports; i++) for (j = 0; j < sc->port[i].nqsets; j++) { qs = &sc->sge.qs[i + j]; txq = &qs->txq[0]; reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned; reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || (qs->fl[1].credits < qs->fl[1].size)); if (reclaim_eth || reclaim_ofl || refill_rx) { taskqueue_enqueue(sc->tq, &sc->timer_reclaim_task); goto done; } } done: callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); } /* * This is meant to be a catch-all function to keep sge state private * to sge.c * */ int t3_sge_init_sw(adapter_t *sc) { callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); TASK_INIT(&sc->timer_reclaim_task, 0, sge_timer_reclaim, sc); TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); return (0); } void t3_sge_deinit_sw(adapter_t *sc) { callout_drain(&sc->sge_timer_ch); if (sc->tq) { taskqueue_drain(sc->tq, &sc->timer_reclaim_task); taskqueue_drain(sc->tq, &sc->slow_intr_task); } } /** * refill_rspq - replenish an SGE response queue * @adapter: the adapter * @q: the response queue to replenish * @credits: how many new responses to make available * * Replenishes a response queue by making the supplied number of responses * available to HW. */ static __inline void refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) { /* mbufs are allocated on demand when a rspq entry is processed. */ t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); } static void sge_timer_reclaim(void *arg, int ncount) { adapter_t *sc = arg; int i, nqsets = 0; struct sge_qset *qs; struct sge_txq *txq; struct mtx *lock; struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; int n, reclaimable; /* * XXX assuming these quantities are allowed to change during operation */ for (i = 0; i < sc->params.nports; i++) nqsets += sc->port[i].nqsets; for (i = 0; i < nqsets; i++) { qs = &sc->sge.qs[i]; txq = &qs->txq[TXQ_ETH]; reclaimable = desc_reclaimable(txq); if (reclaimable > 0) { mtx_lock(&txq->lock); n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); mtx_unlock(&txq->lock); for (i = 0; i < n; i++) { m_freem_vec(m_vec[i]); } + if (qs->port->ifp->if_drv_flags & IFF_DRV_OACTIVE && + txq->size - txq->in_use >= TX_START_MAX_DESC) { + qs->port->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + taskqueue_enqueue(qs->port->tq, &qs->port->start_task); + } } txq = &qs->txq[TXQ_OFLD]; reclaimable = desc_reclaimable(txq); if (reclaimable > 0) { mtx_lock(&txq->lock); n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); mtx_unlock(&txq->lock); for (i = 0; i < n; i++) { m_freem_vec(m_vec[i]); } } lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : &sc->sge.qs[0].rspq.lock; if (mtx_trylock(lock)) { /* XXX currently assume that we are *NOT* polling */ uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); if (qs->fl[0].credits < qs->fl[0].size - 16) __refill_fl(sc, &qs->fl[0]); if (qs->fl[1].credits < qs->fl[1].size - 16) __refill_fl(sc, &qs->fl[1]); if (status & (1 << qs->rspq.cntxt_id)) { if (qs->rspq.credits) { refill_rspq(sc, &qs->rspq, 1); qs->rspq.credits--; t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1 << qs->rspq.cntxt_id); } } mtx_unlock(lock); } } } /** * init_qset_cntxt - initialize an SGE queue set context info * @qs: the queue set * @id: the queue set id * * Initializes the TIDs and context ids for the queues of a queue set. */ static void init_qset_cntxt(struct sge_qset *qs, u_int id) { qs->rspq.cntxt_id = id; qs->fl[0].cntxt_id = 2 * id; qs->fl[1].cntxt_id = 2 * id + 1; qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; } static void txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) { txq->in_use += ndesc; /* * XXX we don't handle stopping of queue * presumably start handles this when we bump against the end */ txqs->gen = txq->gen; txq->unacked += ndesc; txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3); txq->unacked &= 7; txqs->pidx = txq->pidx; txq->pidx += ndesc; if (txq->pidx >= txq->size) { txq->pidx -= txq->size; txq->gen ^= 1; } } /** * calc_tx_descs - calculate the number of Tx descriptors for a packet * @m: the packet mbufs * @nsegs: the number of segments * * Returns the number of Tx descriptors needed for the given Ethernet * packet. Ethernet packets require addition of WR and CPL headers. */ static __inline unsigned int calc_tx_descs(const struct mbuf *m, int nsegs) { unsigned int flits; if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) return 1; flits = sgl_len(nsegs) + 2; #ifdef TSO_SUPPORTED if (m->m_pkthdr.csum_flags & (CSUM_TSO)) flits++; #endif return flits_to_desc(flits); } static unsigned int busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs) { struct mbuf *m0; int err, pktlen; m0 = *m; pktlen = m0->m_pkthdr.len; err = bus_dmamap_load_mvec_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); #ifdef DEBUG if (err) { int n = 0; struct mbuf *mtmp = m0; while(mtmp) { n++; mtmp = mtmp->m_next; } printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n", err, m0->m_pkthdr.len, n); } #endif if (err == EFBIG) { /* Too many segments, try to defrag */ m0 = m_defrag(m0, M_NOWAIT); if (m0 == NULL) { m_freem(*m); *m = NULL; return (ENOBUFS); } *m = m0; err = bus_dmamap_load_mbuf_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); } if (err == ENOMEM) { return (err); } if (err) { if (cxgb_debug) printf("map failure err=%d pktlen=%d\n", err, pktlen); m_freem_vec(m0); *m = NULL; return (err); } bus_dmamap_sync(txq->entry_tag, stx->map, BUS_DMASYNC_PREWRITE); stx->flags |= TX_SW_DESC_MAPPED; return (0); } /** * make_sgl - populate a scatter/gather list for a packet * @sgp: the SGL to populate * @segs: the packet dma segments * @nsegs: the number of segments * * Generates a scatter/gather list for the buffers that make up a packet * and returns the SGL size in 8-byte words. The caller must size the SGL * appropriately. */ static __inline void make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) { int i, idx; for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) { if (i && idx == 0) ++sgp; sgp->len[idx] = htobe32(segs[i].ds_len); sgp->addr[idx] = htobe64(segs[i].ds_addr); } if (idx) sgp->len[idx] = 0; } /** * check_ring_tx_db - check and potentially ring a Tx queue's doorbell * @adap: the adapter * @q: the Tx queue * * Ring the doorbel if a Tx queue is asleep. There is a natural race, * where the HW is going to sleep just after we checked, however, * then the interrupt handler will detect the outstanding TX packet * and ring the doorbell for us. * * When GTS is disabled we unconditionally ring the doorbell. */ static __inline void check_ring_tx_db(adapter_t *adap, struct sge_txq *q) { #if USE_GTS clear_bit(TXQ_LAST_PKT_DB, &q->flags); if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { set_bit(TXQ_LAST_PKT_DB, &q->flags); #ifdef T3_TRACE T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", q->cntxt_id); #endif t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } #else wmb(); /* write descriptors before telling HW */ t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); #endif } static __inline void wr_gen2(struct tx_desc *d, unsigned int gen) { #if SGE_NUM_GENBITS == 2 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); #endif } /** * write_wr_hdr_sgl - write a WR header and, optionally, SGL * @ndesc: number of Tx descriptors spanned by the SGL * @txd: first Tx descriptor to be written * @txqs: txq state (generation and producer index) * @txq: the SGE Tx queue * @sgl: the SGL * @flits: number of flits to the start of the SGL in the first descriptor * @sgl_flits: the SGL size in flits * @wr_hi: top 32 bits of WR header based on WR type (big endian) * @wr_lo: low 32 bits of WR header based on WR type (big endian) * * Write a work request header and an associated SGL. If the SGL is * small enough to fit into one Tx descriptor it has already been written * and we just need to write the WR header. Otherwise we distribute the * SGL across the number of descriptors it spans. */ static void write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) { struct work_request_hdr *wrp = (struct work_request_hdr *)txd; struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; if (__predict_true(ndesc == 1)) { wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | V_WR_SGLSFLT(flits)) | wr_hi; wmb(); wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) | wr_lo; /* XXX gen? */ wr_gen2(txd, txqs->gen); } else { unsigned int ogen = txqs->gen; const uint64_t *fp = (const uint64_t *)sgl; struct work_request_hdr *wp = wrp; wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | V_WR_SGLSFLT(flits)) | wr_hi; while (sgl_flits) { unsigned int avail = WR_FLITS - flits; if (avail > sgl_flits) avail = sgl_flits; memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); sgl_flits -= avail; ndesc--; if (!sgl_flits) break; fp += avail; txd++; txsd++; if (++txqs->pidx == txq->size) { txqs->pidx = 0; txqs->gen ^= 1; txd = txq->desc; txsd = txq->sdesc; } /* * when the head of the mbuf chain * is freed all clusters will be freed * with it */ txsd->m = NULL; wrp = (struct work_request_hdr *)txd; wrp->wr_hi = htonl(V_WR_DATATYPE(1) | V_WR_SGLSFLT(1)) | wr_hi; wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, sgl_flits + 1)) | V_WR_GEN(txqs->gen)) | wr_lo; wr_gen2(txd, txqs->gen); flits = 1; } wrp->wr_hi |= htonl(F_WR_EOP); wmb(); wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; wr_gen2((struct tx_desc *)wp, ogen); } } /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) int t3_encap(struct port_info *p, struct mbuf **m) { adapter_t *sc; struct mbuf *m0; struct sge_qset *qs; struct sge_txq *txq; struct tx_sw_desc *stx; struct txq_state txqs; unsigned int nsegs, ndesc, flits, cntrl, mlen; int err, tso_info = 0; struct work_request_hdr *wrp; struct tx_sw_desc *txsd; struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; bus_dma_segment_t segs[TX_MAX_SEGS]; uint32_t wr_hi, wr_lo, sgl_flits; struct tx_desc *txd; struct cpl_tx_pkt *cpl; DPRINTF("t3_encap "); m0 = *m; sc = p->adapter; qs = &sc->sge.qs[p->first_qset]; txq = &qs->txq[TXQ_ETH]; stx = &txq->sdesc[txq->pidx]; txd = &txq->desc[txq->pidx]; cpl = (struct cpl_tx_pkt *)txd; mlen = m0->m_pkthdr.len; cpl->len = htonl(mlen | 0x80000000); DPRINTF("mlen=%d\n", mlen); /* * XXX handle checksum, TSO, and VLAN here * */ cntrl = V_TXPKT_INTF(p->port); /* * XXX need to add VLAN support for 6.x */ #ifdef VLAN_SUPPORTED if (m0->m_flags & M_VLANTAG) cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); #endif if (tso_info) { int eth_type; struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl; struct ip *ip; struct tcphdr *tcp; uint8_t *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */ txd->flit[2] = 0; cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); hdr->cntrl = htonl(cntrl); if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { pkthdr = &tmp[0]; m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr); } else { pkthdr = m0->m_data; } if (__predict_false(m0->m_flags & M_VLANTAG)) { eth_type = CPL_ETH_II_VLAN; ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); } else { eth_type = CPL_ETH_II; ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); } tcp = (struct tcphdr *)((uint8_t *)ip + sizeof(*ip)); tso_info |= V_LSO_ETH_TYPE(eth_type) | V_LSO_IPHDR_WORDS(ip->ip_hl) | V_LSO_TCPHDR_WORDS(tcp->th_off); hdr->lso_info = htonl(tso_info); flits = 3; } else { cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); cpl->cntrl = htonl(cntrl); if (mlen <= WR_LEN - sizeof(*cpl)) { txq_prod(txq, 1, &txqs); txq->sdesc[txqs.pidx].m = m0; m_set_priority(m0, txqs.pidx); if (m0->m_len == m0->m_pkthdr.len) memcpy(&txd->flit[2], m0->m_data, mlen); else m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); flits = (mlen + 7) / 8 + 2; cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | F_WR_SOP | F_WR_EOP | txqs.compl); wmb(); cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq); return (0); } flits = 2; } wrp = (struct work_request_hdr *)txd; if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) { return (err); } m0 = *m; ndesc = calc_tx_descs(m0, nsegs); sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; make_sgl(sgp, segs, nsegs); sgl_flits = sgl_len(nsegs); DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); txq_prod(txq, ndesc, &txqs); txsd = &txq->sdesc[txqs.pidx]; wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); wr_lo = htonl(V_WR_TID(txq->token)); txsd->m = m0; m_set_priority(m0, txqs.pidx); write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); check_ring_tx_db(p->adapter, txq); return (0); } /** * write_imm - write a packet into a Tx descriptor as immediate data * @d: the Tx descriptor to write * @m: the packet * @len: the length of packet data to write as immediate data * @gen: the generation bit value to write * * Writes a packet as immediate data into a Tx descriptor. The packet * contains a work request at its beginning. We must write the packet * carefully so the SGE doesn't read accidentally before it's written in * its entirety. */ static __inline void write_imm(struct tx_desc *d, struct mbuf *m, unsigned int len, unsigned int gen) { struct work_request_hdr *from = mtod(m, struct work_request_hdr *); struct work_request_hdr *to = (struct work_request_hdr *)d; memcpy(&to[1], &from[1], len - sizeof(*from)); to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | V_WR_BCNTLFLT(len & 7)); wmb(); to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8)); wr_gen2(d, gen); m_freem(m); } /** * check_desc_avail - check descriptor availability on a send queue * @adap: the adapter * @q: the TX queue * @m: the packet needing the descriptors * @ndesc: the number of Tx descriptors needed * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) * * Checks if the requested number of Tx descriptors is available on an * SGE send queue. If the queue is already suspended or not enough * descriptors are available the packet is queued for later transmission. * Must be called with the Tx queue locked. * * Returns 0 if enough descriptors are available, 1 if there aren't * enough descriptors and the packet has been queued, and 2 if the caller * needs to retry because there weren't enough descriptors at the * beginning of the call but some freed up in the mean time. */ static __inline int check_desc_avail(adapter_t *adap, struct sge_txq *q, struct mbuf *m, unsigned int ndesc, unsigned int qid) { /* * XXX We currently only use this for checking the control queue * the control queue is only used for binding qsets which happens * at init time so we are guaranteed enough descriptors */ if (__predict_false(!mbufq_empty(&q->sendq))) { addq_exit: mbufq_tail(&q->sendq, m); return 1; } if (__predict_false(q->size - q->in_use < ndesc)) { struct sge_qset *qs = txq_to_qset(q, qid); setbit(&qs->txq_stopped, qid); smp_mb(); if (should_restart_tx(q) && test_and_clear_bit(qid, &qs->txq_stopped)) return 2; q->stops++; goto addq_exit; } return 0; } /** * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs * @q: the SGE control Tx queue * * This is a variant of reclaim_completed_tx() that is used for Tx queues * that send only immediate data (presently just the control queues) and * thus do not have any mbufs */ static __inline void reclaim_completed_tx_imm(struct sge_txq *q) { unsigned int reclaim = q->processed - q->cleaned; mtx_assert(&q->lock, MA_OWNED); q->in_use -= reclaim; q->cleaned += reclaim; } static __inline int immediate(const struct mbuf *m) { return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; } /** * ctrl_xmit - send a packet through an SGE control Tx queue * @adap: the adapter * @q: the control queue * @m: the packet * * Send a packet through an SGE control Tx queue. Packets sent through * a control queue must fit entirely as immediate data in a single Tx * descriptor and have no page fragments. */ static int ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) { int ret; struct work_request_hdr *wrp = (struct work_request_hdr *)m->m_data; if (__predict_false(!immediate(m))) { m_freem(m); return 0; } wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); wrp->wr_lo = htonl(V_WR_TID(q->token)); mtx_lock(&q->lock); again: reclaim_completed_tx_imm(q); ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); if (__predict_false(ret)) { if (ret == 1) { mtx_unlock(&q->lock); return (-1); } goto again; } write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); q->in_use++; if (++q->pidx >= q->size) { q->pidx = 0; q->gen ^= 1; } mtx_unlock(&q->lock); wmb(); t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); return (0); } /** * restart_ctrlq - restart a suspended control queue * @qs: the queue set cotaining the control queue * * Resumes transmission on a suspended Tx control queue. */ static void restart_ctrlq(void *data, int npending) { struct mbuf *m; struct sge_qset *qs = (struct sge_qset *)data; struct sge_txq *q = &qs->txq[TXQ_CTRL]; adapter_t *adap = qs->port->adapter; mtx_lock(&q->lock); again: reclaim_completed_tx_imm(q); while (q->in_use < q->size && (m = mbufq_dequeue(&q->sendq)) != NULL) { write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); if (++q->pidx >= q->size) { q->pidx = 0; q->gen ^= 1; } q->in_use++; } if (!mbufq_empty(&q->sendq)) { setbit(&qs->txq_stopped, TXQ_CTRL); smp_mb(); if (should_restart_tx(q) && test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) goto again; q->stops++; } mtx_unlock(&q->lock); t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } /* * Send a management message through control queue 0 */ int t3_mgmt_tx(struct adapter *adap, struct mbuf *m) { return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); } /** * free_qset - free the resources of an SGE queue set * @sc: the controller owning the queue set * @q: the queue set * * Release the HW and SW resources associated with an SGE queue set, such * as HW contexts, packet buffers, and descriptor rings. Traffic to the * queue set must be quiesced prior to calling this. */ static void t3_free_qset(adapter_t *sc, struct sge_qset *q) { int i; for (i = 0; i < SGE_RXQ_PER_SET; ++i) { if (q->fl[i].desc) { mtx_lock(&sc->sge.reg_lock); t3_sge_disable_fl(sc, q->fl[i].cntxt_id); mtx_unlock(&sc->sge.reg_lock); bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, q->fl[i].desc_map); bus_dma_tag_destroy(q->fl[i].desc_tag); bus_dma_tag_destroy(q->fl[i].entry_tag); } if (q->fl[i].sdesc) { free_rx_bufs(sc, &q->fl[i]); free(q->fl[i].sdesc, M_DEVBUF); } } for (i = 0; i < SGE_TXQ_PER_SET; ++i) { if (q->txq[i].desc) { mtx_lock(&sc->sge.reg_lock); t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); mtx_unlock(&sc->sge.reg_lock); bus_dmamap_unload(q->txq[i].desc_tag, q->txq[i].desc_map); bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, q->txq[i].desc_map); bus_dma_tag_destroy(q->txq[i].desc_tag); bus_dma_tag_destroy(q->txq[i].entry_tag); } if (q->txq[i].sdesc) { free(q->txq[i].sdesc, M_DEVBUF); } if (mtx_initialized(&q->txq[i].lock)) { mtx_destroy(&q->txq[i].lock); } } if (q->rspq.desc) { mtx_lock(&sc->sge.reg_lock); t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); mtx_unlock(&sc->sge.reg_lock); bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, q->rspq.desc_map); bus_dma_tag_destroy(q->rspq.desc_tag); } if (mtx_initialized(&q->rspq.lock)) mtx_destroy(&q->rspq.lock); bzero(q, sizeof(*q)); } /** * t3_free_sge_resources - free SGE resources * @sc: the adapter softc * * Frees resources used by the SGE queue sets. */ void t3_free_sge_resources(adapter_t *sc) { int i; for (i = 0; i < SGE_QSETS; ++i) t3_free_qset(sc, &sc->sge.qs[i]); } /** * t3_sge_start - enable SGE * @sc: the controller softc * * Enables the SGE for DMAs. This is the last step in starting packet * transfers. */ void t3_sge_start(adapter_t *sc) { t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); } /** * t3_sge_stop - disable SGE operation * @sc: the adapter * * Disables the DMA engine. This can be called in emeregencies (e.g., * from error interrupts) or from normal process context. In the latter * case it also disables any pending queue restart tasklets. Note that * if it is called in interrupt context it cannot disable the restart * tasklets as it cannot wait, however the tasklets will have no effect * since the doorbells are disabled and the driver will call this again * later from process context, at which time the tasklets will be stopped * if they are still running. */ void t3_sge_stop(adapter_t *sc) { int i; t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); for (i = 0; i < SGE_QSETS; ++i) { struct sge_qset *qs = &sc->sge.qs[i]; taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk); taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk); } } /** * free_tx_desc - reclaims Tx descriptors and their buffers * @adapter: the adapter * @q: the Tx queue to reclaim descriptors from * @n: the number of descriptors to reclaim * * Reclaims Tx descriptors from an SGE Tx queue and frees the associated * Tx buffers. Called with the Tx queue lock held. */ int free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec) { struct tx_sw_desc *d; unsigned int cidx = q->cidx; int nbufs = 0; #ifdef T3_TRACE T3_TRACE2(sc->tb[q->cntxt_id & 7], "reclaiming %u Tx descriptors at cidx %u", n, cidx); #endif d = &q->sdesc[cidx]; while (n-- > 0) { DPRINTF("cidx=%d d=%p\n", cidx, d); if (d->m) { if (d->flags & TX_SW_DESC_MAPPED) { bus_dmamap_unload(q->entry_tag, d->map); bus_dmamap_destroy(q->entry_tag, d->map); d->flags &= ~TX_SW_DESC_MAPPED; } if (m_get_priority(d->m) == cidx) { m_vec[nbufs] = d->m; d->m = NULL; nbufs++; } else { printf("pri=%d cidx=%d\n", m_get_priority(d->m), cidx); } } ++d; if (++cidx == q->size) { cidx = 0; d = q->sdesc; } } q->cidx = cidx; return (nbufs); } /** * is_new_response - check if a response is newly written * @r: the response descriptor * @q: the response queue * * Returns true if a response descriptor contains a yet unprocessed * response. */ static __inline int is_new_response(const struct rsp_desc *r, const struct sge_rspq *q) { return (r->intr_gen & F_RSPD_GEN2) == q->gen; } #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ #define NOMEM_INTR_DELAY 2500 /** * write_ofld_wr - write an offload work request * @adap: the adapter * @m: the packet to send * @q: the Tx queue * @pidx: index of the first Tx descriptor to write * @gen: the generation value to use * @ndesc: number of descriptors the packet will occupy * * Write an offload work request to send the supplied packet. The packet * data already carry the work request with most fields populated. */ static void write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q, unsigned int pidx, unsigned int gen, unsigned int ndesc, bus_dma_segment_t *segs, unsigned int nsegs) { unsigned int sgl_flits, flits; struct work_request_hdr *from; struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; struct tx_desc *d = &q->desc[pidx]; struct txq_state txqs; if (immediate(m)) { q->sdesc[pidx].m = NULL; write_imm(d, m, m->m_len, gen); return; } /* Only TX_DATA builds SGLs */ from = mtod(m, struct work_request_hdr *); memcpy(&d->flit[1], &from[1], (uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *) - sizeof(*from)); flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; make_sgl(sgp, segs, nsegs); sgl_flits = sgl_len(nsegs); txqs.gen = q->gen; txqs.pidx = q->pidx; txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3); write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, from->wr_hi, from->wr_lo); } /** * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet * @m: the packet * * Returns the number of Tx descriptors needed for the given offload * packet. These packets are already fully constructed. */ static __inline unsigned int calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) { unsigned int flits, cnt = 0; if (m->m_len <= WR_LEN) return 1; /* packet fits as immediate data */ if (m->m_flags & M_IOVEC) cnt = mtomv(m)->mv_count; flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; /* headers */ return flits_to_desc(flits + sgl_len(cnt)); } /** * ofld_xmit - send a packet through an offload queue * @adap: the adapter * @q: the Tx offload queue * @m: the packet * * Send an offload packet through an SGE offload queue. */ static int ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) { int ret; unsigned int pidx, gen, nsegs; unsigned int ndesc; struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; bus_dma_segment_t segs[TX_MAX_SEGS]; int i, cleaned; struct tx_sw_desc *stx = &q->sdesc[q->pidx]; mtx_lock(&q->lock); if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) { mtx_unlock(&q->lock); return (ret); } ndesc = calc_tx_descs_ofld(m, nsegs); again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec); ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); if (__predict_false(ret)) { if (ret == 1) { m_set_priority(m, ndesc); /* save for restart */ mtx_unlock(&q->lock); return NET_XMIT_CN; } goto again; } gen = q->gen; q->in_use += ndesc; pidx = q->pidx; q->pidx += ndesc; if (q->pidx >= q->size) { q->pidx -= q->size; q->gen ^= 1; } #ifdef T3_TRACE T3_TRACE5(adap->tb[q->cntxt_id & 7], "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", ndesc, pidx, skb->len, skb->len - skb->data_len, skb_shinfo(skb)->nr_frags); #endif mtx_unlock(&q->lock); write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); check_ring_tx_db(adap, q); for (i = 0; i < cleaned; i++) { m_freem_vec(m_vec[i]); } return NET_XMIT_SUCCESS; } /** * restart_offloadq - restart a suspended offload queue * @qs: the queue set cotaining the offload queue * * Resumes transmission on a suspended Tx offload queue. */ static void restart_offloadq(void *data, int npending) { struct mbuf *m; struct sge_qset *qs = data; struct sge_txq *q = &qs->txq[TXQ_OFLD]; adapter_t *adap = qs->port->adapter; struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; bus_dma_segment_t segs[TX_MAX_SEGS]; int nsegs, i, cleaned; struct tx_sw_desc *stx = &q->sdesc[q->pidx]; mtx_lock(&q->lock); again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec); while ((m = mbufq_peek(&q->sendq)) != NULL) { unsigned int gen, pidx; unsigned int ndesc = m_get_priority(m); if (__predict_false(q->size - q->in_use < ndesc)) { setbit(&qs->txq_stopped, TXQ_OFLD); smp_mb(); if (should_restart_tx(q) && test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) goto again; q->stops++; break; } gen = q->gen; q->in_use += ndesc; pidx = q->pidx; q->pidx += ndesc; if (q->pidx >= q->size) { q->pidx -= q->size; q->gen ^= 1; } (void)mbufq_dequeue(&q->sendq); busdma_map_mbufs(&m, q, stx, segs, &nsegs); mtx_unlock(&q->lock); write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); mtx_lock(&q->lock); } mtx_unlock(&q->lock); #if USE_GTS set_bit(TXQ_RUNNING, &q->flags); set_bit(TXQ_LAST_PKT_DB, &q->flags); #endif t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); for (i = 0; i < cleaned; i++) { m_freem_vec(m_vec[i]); } } /** * queue_set - return the queue set a packet should use * @m: the packet * * Maps a packet to the SGE queue set it should use. The desired queue * set is carried in bits 1-3 in the packet's priority. */ static __inline int queue_set(const struct mbuf *m) { return m_get_priority(m) >> 1; } /** * is_ctrl_pkt - return whether an offload packet is a control packet * @m: the packet * * Determines whether an offload packet should use an OFLD or a CTRL * Tx queue. This is indicated by bit 0 in the packet's priority. */ static __inline int is_ctrl_pkt(const struct mbuf *m) { return m_get_priority(m) & 1; } /** * t3_offload_tx - send an offload packet * @tdev: the offload device to send to * @m: the packet * * Sends an offload packet. We use the packet priority to select the * appropriate Tx queue as follows: bit 0 indicates whether the packet * should be sent as regular or control, bits 1-3 select the queue set. */ int t3_offload_tx(struct toedev *tdev, struct mbuf *m) { adapter_t *adap = tdev2adap(tdev); struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; if (__predict_false(is_ctrl_pkt(m))) return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m); return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m); } /** * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts * @tdev: the offload device that will be receiving the packets * @q: the SGE response queue that assembled the bundle * @m: the partial bundle * @n: the number of packets in the bundle * * Delivers a (partial) bundle of Rx offload packets to an offload device. */ static __inline void deliver_partial_bundle(struct toedev *tdev, struct sge_rspq *q, struct mbuf *mbufs[], int n) { if (n) { q->offload_bundles++; cxgb_ofld_recv(tdev, mbufs, n); } } static __inline int rx_offload(struct toedev *tdev, struct sge_rspq *rq, struct mbuf *m, struct mbuf *rx_gather[], unsigned int gather_idx) { rq->offload_pkts++; m->m_pkthdr.header = mtod(m, void *); rx_gather[gather_idx++] = m; if (gather_idx == RX_BUNDLE_SIZE) { cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); gather_idx = 0; rq->offload_bundles++; } return (gather_idx); } static void restart_tx(struct sge_qset *qs) { struct adapter *sc = qs->port->adapter; if (isset(&qs->txq_stopped, TXQ_OFLD) && should_restart_tx(&qs->txq[TXQ_OFLD]) && test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { qs->txq[TXQ_OFLD].restarts++; taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk); } if (isset(&qs->txq_stopped, TXQ_CTRL) && should_restart_tx(&qs->txq[TXQ_CTRL]) && test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { qs->txq[TXQ_CTRL].restarts++; taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk); } } /** * t3_sge_alloc_qset - initialize an SGE queue set * @sc: the controller softc * @id: the queue set id * @nports: how many Ethernet ports will be using this queue set * @irq_vec_idx: the IRQ vector index for response queue interrupts * @p: configuration parameters for this queue set * @ntxq: number of Tx queues for the queue set * @pi: port info for queue set * * Allocate resources and initialize an SGE queue set. A queue set * comprises a response queue, two Rx free-buffer queues, and up to 3 * Tx queues. The Tx queues are assigned roles in the order Ethernet * queue, offload queue, and control queue. */ int t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, const struct qset_params *p, int ntxq, struct port_info *pi) { struct sge_qset *q = &sc->sge.qs[id]; int i, ret = 0; init_qset_cntxt(q, id); if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, &q->fl[0].desc, &q->fl[0].sdesc, &q->fl[0].desc_tag, &q->fl[0].desc_map, sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { printf("error %d from alloc ring fl0\n", ret); goto err; } if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, &q->fl[1].desc, &q->fl[1].sdesc, &q->fl[1].desc_tag, &q->fl[1].desc_map, sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { printf("error %d from alloc ring fl1\n", ret); goto err; } if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, &q->rspq.phys_addr, &q->rspq.desc, NULL, &q->rspq.desc_tag, &q->rspq.desc_map, NULL, NULL)) != 0) { printf("error %d from alloc ring rspq\n", ret); goto err; } for (i = 0; i < ntxq; ++i) { /* * The control queue always uses immediate data so does not * need to keep track of any mbufs. * XXX Placeholder for future TOE support. */ size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); if ((ret = alloc_ring(sc, p->txq_size[i], sizeof(struct tx_desc), sz, &q->txq[i].phys_addr, &q->txq[i].desc, &q->txq[i].sdesc, &q->txq[i].desc_tag, &q->txq[i].desc_map, sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { printf("error %d from alloc ring tx %i\n", ret, i); goto err; } mbufq_init(&q->txq[i].sendq); q->txq[i].gen = 1; q->txq[i].size = p->txq_size[i]; mtx_init(&q->txq[i].lock, "t3 txq lock", NULL, MTX_DEF); } TASK_INIT(&q->txq[TXQ_OFLD].qresume_tsk, 0, restart_offloadq, q); TASK_INIT(&q->txq[TXQ_CTRL].qresume_tsk, 0, restart_ctrlq, q); q->fl[0].gen = q->fl[1].gen = 1; q->fl[0].size = p->fl_size; q->fl[1].size = p->jumbo_size; q->rspq.gen = 1; q->rspq.size = p->rspq_size; mtx_init(&q->rspq.lock, "t3 rspq lock", NULL, MTX_DEF); q->txq[TXQ_ETH].stop_thres = nports * flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); q->fl[0].buf_size = MCLBYTES; q->fl[0].zone = zone_clust; q->fl[0].type = EXT_CLUSTER; q->fl[1].buf_size = MJUMPAGESIZE; q->fl[1].zone = zone_jumbop; q->fl[1].type = EXT_JUMBOP; q->lro.enabled = lro_default; mtx_lock(&sc->sge.reg_lock); ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, q->rspq.phys_addr, q->rspq.size, q->fl[0].buf_size, 1, 0); if (ret) { printf("error %d from t3_sge_init_rspcntxt\n", ret); goto err_unlock; } for (i = 0; i < SGE_RXQ_PER_SET; ++i) { ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, q->fl[i].phys_addr, q->fl[i].size, q->fl[i].buf_size, p->cong_thres, 1, 0); if (ret) { printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); goto err_unlock; } } ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 1, 0); if (ret) { printf("error %d from t3_sge_init_ecntxt\n", ret); goto err_unlock; } if (ntxq > 1) { ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, USE_GTS, SGE_CNTXT_OFLD, id, q->txq[TXQ_OFLD].phys_addr, q->txq[TXQ_OFLD].size, 0, 1, 0); if (ret) { printf("error %d from t3_sge_init_ecntxt\n", ret); goto err_unlock; } } if (ntxq > 2) { ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, SGE_CNTXT_CTRL, id, q->txq[TXQ_CTRL].phys_addr, q->txq[TXQ_CTRL].size, q->txq[TXQ_CTRL].token, 1, 0); if (ret) { printf("error %d from t3_sge_init_ecntxt\n", ret); goto err_unlock; } } mtx_unlock(&sc->sge.reg_lock); t3_update_qset_coalesce(q, p); q->port = pi; refill_fl(sc, &q->fl[0], q->fl[0].size); refill_fl(sc, &q->fl[1], q->fl[1].size); refill_rspq(sc, &q->rspq, q->rspq.size - 1); t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | V_NEWTIMER(q->rspq.holdoff_tmr)); return (0); err_unlock: mtx_unlock(&sc->sge.reg_lock); err: t3_free_qset(sc, q); return (ret); } void t3_rx_eth(struct port_info *pi, struct sge_rspq *rq, struct mbuf *m, int ethpad) { struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(m->m_data + ethpad); struct ifnet *ifp = pi->ifp; DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, m->m_data, cpl->iff); if (&pi->adapter->port[cpl->iff] != pi) panic("bad port index %d m->m_data=%p\n", cpl->iff, m->m_data); if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) { m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; } /* * XXX need to add VLAN support for 6.x */ #ifdef VLAN_SUPPORTED if (__predict_false(cpl->vlan_valid)) { m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); m->m_flags |= M_VLANTAG; } #endif m->m_pkthdr.rcvif = ifp; m->m_pkthdr.header = m->m_data + sizeof(*cpl) + ethpad; m_explode(m); /* * adjust after conversion to mbuf chain */ m_adj(m, sizeof(*cpl) + ethpad); (*ifp->if_input)(ifp, m); } /** * get_packet - return the next ingress packet buffer from a free list * @adap: the adapter that received the packet * @drop_thres: # of remaining buffers before we start dropping packets * @qs: the qset that the SGE free list holding the packet belongs to * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain * @r: response descriptor * * Get the next packet from a free list and complete setup of the * sk_buff. If the packet is small we make a copy and recycle the * original buffer, otherwise we use the original buffer itself. If a * positive drop threshold is supplied packets are dropped and their * buffers recycled if (a) the number of remaining buffers is under the * threshold and the packet is too big to copy, or (b) the packet should * be copied but there is no memory for the copy. */ static int get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, struct mbuf *m, struct rsp_desc *r) { unsigned int len_cq = ntohl(r->len_cq); struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; uint32_t len = G_RSPD_LEN(len_cq); uint32_t flags = ntohl(r->flags); uint8_t sopeop = G_RSPD_SOP_EOP(flags); void *cl; int ret = 0; prefetch(sd->cl); fl->credits--; bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { cl = mtod(m, void *); memcpy(cl, sd->cl, len); recycle_rx_buf(adap, fl, fl->cidx); } else { cl = sd->cl; bus_dmamap_unload(fl->entry_tag, sd->map); } switch(sopeop) { case RSPQ_SOP_EOP: DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); if (cl == sd->cl) m_cljset(m, cl, fl->type); m->m_len = m->m_pkthdr.len = len; ret = 1; goto done; break; case RSPQ_NSOP_NEOP: DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); ret = 0; break; case RSPQ_SOP: DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); m_iovinit(m); ret = 0; break; case RSPQ_EOP: DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); ret = 1; break; } m_iovappend(m, cl, fl->buf_size, len, 0); done: if (++fl->cidx == fl->size) fl->cidx = 0; return (ret); } /** * handle_rsp_cntrl_info - handles control information in a response * @qs: the queue set corresponding to the response * @flags: the response control flags * * Handles the control information of an SGE response, such as GTS * indications and completion credits for the queue set's Tx queues. * HW coalesces credits, we don't do any extra SW coalescing. */ static __inline void handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) { unsigned int credits; #if USE_GTS if (flags & F_RSPD_TXQ0_GTS) clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); #endif credits = G_RSPD_TXQ0_CR(flags); if (credits) { qs->txq[TXQ_ETH].processed += credits; if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC) taskqueue_enqueue(qs->port->adapter->tq, &qs->port->adapter->timer_reclaim_task); } credits = G_RSPD_TXQ2_CR(flags); if (credits) qs->txq[TXQ_CTRL].processed += credits; # if USE_GTS if (flags & F_RSPD_TXQ1_GTS) clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); # endif credits = G_RSPD_TXQ1_CR(flags); if (credits) qs->txq[TXQ_OFLD].processed += credits; } static void check_ring_db(adapter_t *adap, struct sge_qset *qs, unsigned int sleeping) { ; } /** * process_responses - process responses from an SGE response queue * @adap: the adapter * @qs: the queue set to which the response queue belongs * @budget: how many responses can be processed in this round * * Process responses from an SGE response queue up to the supplied budget. * Responses include received packets as well as credits and other events * for the queues that belong to the response queue's queue set. * A negative budget is effectively unlimited. * * Additionally choose the interrupt holdoff time for the next interrupt * on this queue. If the system is under memory shortage use a fairly * long delay to help recovery. */ static int process_responses(adapter_t *adap, struct sge_qset *qs, int budget) { struct sge_rspq *rspq = &qs->rspq; struct rsp_desc *r = &rspq->desc[rspq->cidx]; int budget_left = budget; unsigned int sleeping = 0; int lro = qs->lro.enabled; struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; int ngathered = 0; #ifdef DEBUG static int last_holdoff = 0; if (rspq->holdoff_tmr != last_holdoff) { printf("next_holdoff=%d\n", rspq->holdoff_tmr); last_holdoff = rspq->holdoff_tmr; } #endif rspq->next_holdoff = rspq->holdoff_tmr; while (__predict_true(budget_left && is_new_response(r, rspq))) { int eth, eop = 0, ethpad = 0; uint32_t flags = ntohl(r->flags); uint32_t rss_csum = *(const uint32_t *)r; uint32_t rss_hash = r->rss_hdr.rss_hash_val; eth = (r->rss_hdr.opcode == CPL_RX_PKT); if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { /* XXX */ printf("async notification\n"); } else if (flags & F_RSPD_IMM_DATA_VALID) { struct mbuf *m = NULL; if (cxgb_debug) printf("IMM DATA VALID\n"); if (rspq->m == NULL) rspq->m = m_gethdr(M_NOWAIT, MT_DATA); else m = m_gethdr(M_NOWAIT, MT_DATA); if (rspq->m == NULL || m == NULL) { rspq->next_holdoff = NOMEM_INTR_DELAY; budget_left--; break; } get_imm_packet(adap, r, rspq->m, m); eop = 1; rspq->imm_data++; } else if (r->len_cq) { int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; if (rspq->m == NULL) rspq->m = m_gethdr(M_NOWAIT, MT_DATA); if (rspq->m == NULL) { log(LOG_WARNING, "failed to get mbuf for packet\n"); break; } ethpad = 2; eop = get_packet(adap, drop_thresh, qs, rspq->m, r); } else { DPRINTF("pure response\n"); rspq->pure_rsps++; } if (flags & RSPD_CTRL_MASK) { sleeping |= flags & RSPD_GTS_MASK; handle_rsp_cntrl_info(qs, flags); } r++; if (__predict_false(++rspq->cidx == rspq->size)) { rspq->cidx = 0; rspq->gen ^= 1; r = rspq->desc; } prefetch(r); if (++rspq->credits >= (rspq->size / 4)) { refill_rspq(adap, rspq, rspq->credits); rspq->credits = 0; } if (eop) { prefetch(rspq->m->m_data); prefetch(rspq->m->m_data + L1_CACHE_BYTES); if (eth) { t3_rx_eth_lro(adap, rspq, rspq->m, ethpad, rss_hash, rss_csum, lro); rspq->m = NULL; } else { rspq->m->m_pkthdr.csum_data = rss_csum; /* * XXX size mismatch */ m_set_priority(rspq->m, rss_hash); ngathered = rx_offload(&adap->tdev, rspq, rspq->m, offload_mbufs, ngathered); } #ifdef notyet taskqueue_enqueue(adap->tq, &adap->timer_reclaim_task); #else __refill_fl(adap, &qs->fl[0]); __refill_fl(adap, &qs->fl[1]); #endif } --budget_left; } deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); t3_lro_flush(adap, qs, &qs->lro); if (sleeping) check_ring_db(adap, qs, sleeping); smp_mb(); /* commit Tx queue processed updates */ if (__predict_false(qs->txq_stopped != 0)) restart_tx(qs); budget -= budget_left; return (budget); } /* * A helper function that processes responses and issues GTS. */ static __inline int process_responses_gts(adapter_t *adap, struct sge_rspq *rq) { int work; static int last_holdoff = 0; work = process_responses(adap, rspq_to_qset(rq), -1); if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { printf("next_holdoff=%d\n", rq->next_holdoff); last_holdoff = rq->next_holdoff; } t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); return work; } /* * Interrupt handler for legacy INTx interrupts for T3B-based cards. * Handles data events from SGE response queues as well as error and other * async events as they all use the same interrupt pin. We use one SGE * response queue per port in this mode and protect all response queues with * queue 0's lock. */ void t3b_intr(void *data) { uint32_t map; adapter_t *adap = data; struct sge_rspq *q0 = &adap->sge.qs[0].rspq; struct sge_rspq *q1 = &adap->sge.qs[1].rspq; t3_write_reg(adap, A_PL_CLI, 0); map = t3_read_reg(adap, A_SG_DATA_INTR); if (!map) return; if (__predict_false(map & F_ERRINTR)) taskqueue_enqueue(adap->tq, &adap->slow_intr_task); mtx_lock(&q0->lock); if (__predict_true(map & 1)) process_responses_gts(adap, q0); if (map & 2) process_responses_gts(adap, q1); mtx_unlock(&q0->lock); } /* * The MSI interrupt handler. This needs to handle data events from SGE * response queues as well as error and other async events as they all use * the same MSI vector. We use one SGE response queue per port in this mode * and protect all response queues with queue 0's lock. */ void t3_intr_msi(void *data) { adapter_t *adap = data; struct sge_rspq *q0 = &adap->sge.qs[0].rspq; struct sge_rspq *q1 = &adap->sge.qs[1].rspq; int new_packets = 0; mtx_lock(&q0->lock); if (process_responses_gts(adap, q0)) { new_packets = 1; } if (adap->params.nports == 2 && process_responses_gts(adap, q1)) { new_packets = 1; } mtx_unlock(&q0->lock); if (new_packets == 0) taskqueue_enqueue(adap->tq, &adap->slow_intr_task); } void t3_intr_msix(void *data) { struct sge_qset *qs = data; adapter_t *adap = qs->port->adapter; struct sge_rspq *rspq = &qs->rspq; mtx_lock(&rspq->lock); if (process_responses_gts(adap, rspq) == 0) rspq->unhandled_irqs++; mtx_unlock(&rspq->lock); } /* * broken by recent mbuf changes */ static int t3_lro_enable(SYSCTL_HANDLER_ARGS) { adapter_t *sc; int i, j, enabled, err, nqsets = 0; #ifndef LRO_WORKING return (0); #endif sc = arg1; enabled = sc->sge.qs[0].lro.enabled; err = sysctl_handle_int(oidp, &enabled, arg2, req); if (err != 0) { return (err); } if (enabled == sc->sge.qs[0].lro.enabled) return (0); for (i = 0; i < sc->params.nports; i++) for (j = 0; j < sc->port[i].nqsets; j++) nqsets++; for (i = 0; i < nqsets; i++) { sc->sge.qs[i].lro.enabled = enabled; } return (0); } static int t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS) { adapter_t *sc = arg1; struct qset_params *qsp = &sc->params.sge.qset[0]; int coalesce_nsecs; struct sge_qset *qs; int i, j, err, nqsets = 0; struct mtx *lock; coalesce_nsecs = qsp->coalesce_nsecs; err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req); if (err != 0) { return (err); } if (coalesce_nsecs == qsp->coalesce_nsecs) return (0); for (i = 0; i < sc->params.nports; i++) for (j = 0; j < sc->port[i].nqsets; j++) nqsets++; coalesce_nsecs = max(100, coalesce_nsecs); for (i = 0; i < nqsets; i++) { qs = &sc->sge.qs[i]; qsp = &sc->params.sge.qset[i]; qsp->coalesce_nsecs = coalesce_nsecs; lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : &sc->sge.qs[0].rspq.lock; mtx_lock(lock); t3_update_qset_coalesce(qs, qsp); t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | V_NEWTIMER(qs->rspq.holdoff_tmr)); mtx_unlock(lock); } return (0); } void t3_add_sysctls(adapter_t *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); /* random information */ SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", CTLFLAG_RD, &sc->fw_version, 0, "firmware version"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "enable_lro", CTLTYPE_INT|CTLFLAG_RW, sc, 0, t3_lro_enable, "I", "enable large receive offload"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal", CTLTYPE_INT|CTLFLAG_RW, sc, 0, t3_set_coalesce_nsecs, "I", "interrupt coalescing timer (ns)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "enable_debug", CTLFLAG_RW, &cxgb_debug, 0, "enable verbose debugging output"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "collapse_free", CTLFLAG_RD, &collapse_free, 0, "frees during collapse"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "mb_free_vec_free", CTLFLAG_RD, &mb_free_vec_free, 0, "frees during mb_free_vec"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "collapse_mbufs", CTLFLAG_RW, &collapse_mbufs, 0, "collapse mbuf chains into iovecs"); } /** * t3_get_desc - dump an SGE descriptor for debugging purposes * @qs: the queue set * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) * @idx: the descriptor index in the queue * @data: where to dump the descriptor contents * * Dumps the contents of a HW descriptor of an SGE queue. Returns the * size of the descriptor. */ int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, unsigned char *data) { if (qnum >= 6) return (EINVAL); if (qnum < 3) { if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) return -EINVAL; memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); return sizeof(struct tx_desc); } if (qnum == 3) { if (!qs->rspq.desc || idx >= qs->rspq.size) return (EINVAL); memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); return sizeof(struct rsp_desc); } qnum -= 4; if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) return (EINVAL); memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); return sizeof(struct rx_desc); }