diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index ddc4cf3337aa..58ff04e46021 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -1,613 +1,616 @@ /*- * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef __T4_ADAPTER_H__ #define __T4_ADAPTER_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "offload.h" #include "common/t4fw_interface.h" #define T4_FWNAME "t4fw" MALLOC_DECLARE(M_CXGBE); #define CXGBE_UNIMPLEMENTED(s) \ panic("%s (%s, line %d) not implemented yet.", s, __FILE__, __LINE__) #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } #else #define prefetch(x) #endif #ifdef __amd64__ /* XXX: need systemwide bus_space_read_8/bus_space_write_8 */ static __inline uint64_t t4_bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { KASSERT(tag == X86_BUS_SPACE_MEM, ("%s: can only handle mem space", __func__)); return (*(volatile uint64_t *)(handle + offset)); } static __inline void t4_bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, uint64_t value) { KASSERT(tag == X86_BUS_SPACE_MEM, ("%s: can only handle mem space", __func__)); *(volatile uint64_t *)(bsh + offset) = value; } #else static __inline uint64_t t4_bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { return (uint64_t)bus_space_read_4(tag, handle, offset) + ((uint64_t)bus_space_read_4(tag, handle, offset + 4) << 32); } static __inline void t4_bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t bsh, bus_size_t offset, uint64_t value) { bus_space_write_4(tag, bsh, offset, value); bus_space_write_4(tag, bsh, offset + 4, value >> 32); } #endif struct adapter; typedef struct adapter adapter_t; enum { FW_IQ_QSIZE = 256, FW_IQ_ESIZE = 64, /* At least 64 mandated by the firmware spec */ + INTR_IQ_QSIZE = 64, + INTR_IQ_ESIZE = 64, /* Handles some CPLs too, do not reduce */ + CTRL_EQ_QSIZE = 128, CTRL_EQ_ESIZE = 64, RX_IQ_QSIZE = 1024, RX_IQ_ESIZE = 64, /* At least 64 so CPL_RX_PKT will fit */ RX_FL_ESIZE = 64, /* 8 64bit addresses */ #if MJUMPAGESIZE != MCLBYTES FL_BUF_SIZES = 4, /* cluster, jumbop, jumbo9k, jumbo16k */ #else FL_BUF_SIZES = 3, /* cluster, jumbo9k, jumbo16k */ #endif TX_EQ_QSIZE = 1024, TX_EQ_ESIZE = 64, TX_SGL_SEGS = 36, TX_WR_FLITS = SGE_MAX_WR_LEN / 8 }; enum { /* adapter intr_type */ INTR_INTX = (1 << 0), INTR_MSI = (1 << 1), INTR_MSIX = (1 << 2) }; enum { /* adapter flags */ FULL_INIT_DONE = (1 << 0), FW_OK = (1 << 1), - INTR_FWD = (1 << 2), + INTR_SHARED = (1 << 2), /* one set of intrq's for all ports */ CXGBE_BUSY = (1 << 9), /* port flags */ DOOMED = (1 << 0), VI_ENABLED = (1 << 1), }; #define IS_DOOMED(pi) (pi->flags & DOOMED) #define SET_DOOMED(pi) do {pi->flags |= DOOMED;} while (0) #define IS_BUSY(sc) (sc->flags & CXGBE_BUSY) #define SET_BUSY(sc) do {sc->flags |= CXGBE_BUSY;} while (0) #define CLR_BUSY(sc) do {sc->flags &= ~CXGBE_BUSY;} while (0) struct port_info { device_t dev; struct adapter *adapter; struct ifnet *ifp; struct ifmedia media; struct mtx pi_lock; char lockname[16]; unsigned long flags; int if_flags; uint16_t viid; int16_t xact_addr_filt;/* index of exact MAC address filter */ uint16_t rss_size; /* size of VI's RSS table slice */ uint8_t lport; /* associated offload logical port */ int8_t mdio_addr; uint8_t port_type; uint8_t mod_type; uint8_t port_id; uint8_t tx_chan; /* These need to be int as they are used in sysctl */ int ntxq; /* # of tx queues */ int first_txq; /* index of first tx queue */ int nrxq; /* # of rx queues */ int first_rxq; /* index of first rx queue */ int tmr_idx; int pktc_idx; int qsize_rxq; int qsize_txq; struct link_config link_cfg; struct port_stats stats; struct taskqueue *tq; struct callout tick; struct sysctl_ctx_list ctx; /* lives from ifconfig up to down */ struct sysctl_oid *oid_rxq; struct sysctl_oid *oid_txq; uint8_t hw_addr[ETHER_ADDR_LEN]; /* factory MAC address, won't change */ }; struct fl_sdesc { struct mbuf *m; bus_dmamap_t map; caddr_t cl; uint8_t tag_idx; /* the sc->fl_tag this map comes from */ #ifdef INVARIANTS __be64 ba_tag; #endif }; struct tx_desc { __be64 flit[8]; }; struct tx_map { struct mbuf *m; bus_dmamap_t map; }; struct tx_sdesc { uint8_t desc_used; /* # of hardware descriptors used by the WR */ uint8_t credits; /* NIC txq: # of frames sent out in the WR */ }; typedef void (iq_intr_handler_t)(void *); enum { /* iq flags */ IQ_ALLOCATED = (1 << 1), /* firmware resources allocated */ IQ_STARTED = (1 << 2), /* started */ /* iq state */ IQS_DISABLED = 0, IQS_BUSY = 1, IQS_IDLE = 2, }; /* * Ingress Queue: T4 is producer, driver is consumer. */ struct sge_iq { bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_addr_t ba; /* bus address of descriptor ring */ char lockname[16]; uint32_t flags; uint16_t abs_id; /* absolute SGE id for the iq */ int8_t intr_pktc_idx; /* packet count threshold index */ int8_t pad0; iq_intr_handler_t *handler; __be64 *desc; /* KVA of descriptor ring */ volatile uint32_t state; struct adapter *adapter; const __be64 *cdesc; /* current descriptor */ uint8_t gen; /* generation bit */ uint8_t intr_params; /* interrupt holdoff parameters */ uint8_t intr_next; /* holdoff for next interrupt */ uint8_t esize; /* size (bytes) of each entry in the queue */ uint16_t qsize; /* size (# of entries) of the queue */ uint16_t cidx; /* consumer index */ uint16_t cntxt_id; /* SGE context id for the iq */ }; enum { /* eq flags */ EQ_ALLOCATED = (1 << 1), /* firmware resources allocated */ EQ_STARTED = (1 << 2), /* started */ EQ_CRFLUSHED = (1 << 3), /* expecting an update from SGE */ }; /* * Egress Queue: driver is producer, T4 is consumer. * * Note: A free list is an egress queue (driver produces the buffers and T4 * consumes them) but it's special enough to have its own struct (see sge_fl). */ struct sge_eq { bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; char lockname[16]; unsigned int flags; struct mtx eq_lock; struct tx_desc *desc; /* KVA of descriptor ring */ bus_addr_t ba; /* bus address of descriptor ring */ struct sge_qstat *spg; /* status page, for convenience */ uint16_t cap; /* max # of desc, for convenience */ uint16_t avail; /* available descriptors, for convenience */ uint16_t qsize; /* size (# of entries) of the queue */ uint16_t cidx; /* consumer idx (desc idx) */ uint16_t pidx; /* producer idx (desc idx) */ uint16_t pending; /* # of descriptors used since last doorbell */ uint16_t iqid; /* iq that gets egr_update for the eq */ uint32_t cntxt_id; /* SGE context id for the eq */ }; struct sge_fl { bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_dma_tag_t tag[FL_BUF_SIZES]; uint8_t tag_idx; struct mtx fl_lock; char lockname[16]; __be64 *desc; /* KVA of descriptor ring, ptr to addresses */ bus_addr_t ba; /* bus address of descriptor ring */ struct fl_sdesc *sdesc; /* KVA of software descriptor ring */ uint32_t cap; /* max # of buffers, for convenience */ uint16_t qsize; /* size (# of entries) of the queue */ uint16_t cntxt_id; /* SGE context id for the freelist */ uint32_t cidx; /* consumer idx (buffer idx, NOT hw desc idx) */ uint32_t pidx; /* producer idx (buffer idx, NOT hw desc idx) */ uint32_t needed; /* # of buffers needed to fill up fl. */ uint32_t pending; /* # of bufs allocated since last doorbell */ unsigned int dmamap_failed; }; /* txq: SGE egress queue + what's needed for Ethernet NIC */ struct sge_txq { struct sge_eq eq; /* MUST be first */ struct ifnet *ifp; /* the interface this txq belongs to */ bus_dma_tag_t tx_tag; /* tag for transmit buffers */ struct buf_ring *br; /* tx buffer ring */ struct tx_sdesc *sdesc; /* KVA of software descriptor ring */ struct mbuf *m; /* held up due to temporary resource shortage */ struct task resume_tx; /* DMA maps used for tx */ struct tx_map *maps; uint32_t map_total; /* # of DMA maps */ uint32_t map_pidx; /* next map to be used */ uint32_t map_cidx; /* reclaimed up to this index */ uint32_t map_avail; /* # of available maps */ /* stats for common events first */ uint64_t txcsum; /* # of times hardware assisted with checksum */ uint64_t tso_wrs; /* # of IPv4 TSO work requests */ uint64_t vlan_insertion;/* # of times VLAN tag was inserted */ uint64_t imm_wrs; /* # of work requests with immediate data */ uint64_t sgl_wrs; /* # of work requests with direct SGL */ uint64_t txpkt_wrs; /* # of txpkt work requests (not coalesced) */ uint64_t txpkts_wrs; /* # of coalesced tx work requests */ uint64_t txpkts_pkts; /* # of frames in coalesced tx work requests */ /* stats for not-that-common events */ uint32_t no_dmamap; /* no DMA map to load the mbuf */ uint32_t no_desc; /* out of hardware descriptors */ uint32_t egr_update; /* # of SGE_EGR_UPDATE notifications for txq */ } __aligned(CACHE_LINE_SIZE); enum { RXQ_LRO_ENABLED = (1 << 0) }; /* rxq: SGE ingress queue + SGE free list + miscellaneous items */ struct sge_rxq { struct sge_iq iq; /* MUST be first */ struct sge_fl fl; struct ifnet *ifp; /* the interface this rxq belongs to */ unsigned int flags; #ifdef INET struct lro_ctrl lro; /* LRO state */ #endif /* stats for common events first */ uint64_t rxcsum; /* # of times hardware assisted with checksum */ uint64_t vlan_extraction;/* # of times VLAN tag was extracted */ /* stats for not-that-common events */ } __aligned(CACHE_LINE_SIZE); /* ctrlq: SGE egress queue + stats for control queue */ struct sge_ctrlq { struct sge_eq eq; /* MUST be first */ /* stats for common events first */ - uint64_t total_wrs; /* # of work requests sent down this queue */ /* stats for not-that-common events */ uint32_t no_desc; /* out of hardware descriptors */ - uint32_t too_long; /* WR longer than hardware max */ } __aligned(CACHE_LINE_SIZE); struct sge { uint16_t timer_val[SGE_NTIMERS]; uint8_t counter_val[SGE_NCOUNTERS]; int nrxq; /* total rx queues (all ports and the rest) */ int ntxq; /* total tx queues (all ports and the rest) */ int niq; /* total ingress queues */ int neq; /* total egress queues */ struct sge_iq fwq; /* Firmware event queue */ struct sge_ctrlq *ctrlq;/* Control queues */ - struct sge_iq *fiq; /* Forwarded interrupt queues (INTR_FWD) */ + struct sge_iq *intrq; /* Interrupt queues */ struct sge_txq *txq; /* NIC tx queues */ struct sge_rxq *rxq; /* NIC rx queues */ uint16_t iq_start; int eq_start; struct sge_iq **iqmap; /* iq->cntxt_id to iq mapping */ struct sge_eq **eqmap; /* eq->cntxt_id to eq mapping */ }; struct adapter { device_t dev; struct cdev *cdev; /* PCIe register resources */ int regs_rid; struct resource *regs_res; int msix_rid; struct resource *msix_res; bus_space_handle_t bh; bus_space_tag_t bt; bus_size_t mmio_len; unsigned int pf; unsigned int mbox; /* Interrupt information */ int intr_type; int intr_count; struct irq { struct resource *res; int rid; void *tag; } *irq; bus_dma_tag_t dmat; /* Parent DMA tag */ struct sge sge; struct port_info *port[MAX_NPORTS]; uint8_t chan_map[NCHAN]; struct l2t_data *l2t; /* L2 table */ struct tid_info tids; int registered_device_map; int open_device_map; int flags; char fw_version[32]; struct adapter_params params; struct t4_virt_res vres; struct sysctl_ctx_list ctx; /* from first_port_up to last_port_down */ + struct sysctl_oid *oid_fwq; struct sysctl_oid *oid_ctrlq; + struct sysctl_oid *oid_intrq; struct mtx sc_lock; char lockname[16]; }; #define ADAPTER_LOCK(sc) mtx_lock(&(sc)->sc_lock) #define ADAPTER_UNLOCK(sc) mtx_unlock(&(sc)->sc_lock) #define ADAPTER_LOCK_ASSERT_OWNED(sc) mtx_assert(&(sc)->sc_lock, MA_OWNED) #define ADAPTER_LOCK_ASSERT_NOTOWNED(sc) mtx_assert(&(sc)->sc_lock, MA_NOTOWNED) #define PORT_LOCK(pi) mtx_lock(&(pi)->pi_lock) #define PORT_UNLOCK(pi) mtx_unlock(&(pi)->pi_lock) #define PORT_LOCK_ASSERT_OWNED(pi) mtx_assert(&(pi)->pi_lock, MA_OWNED) #define PORT_LOCK_ASSERT_NOTOWNED(pi) mtx_assert(&(pi)->pi_lock, MA_NOTOWNED) #define FL_LOCK(fl) mtx_lock(&(fl)->fl_lock) #define FL_TRYLOCK(fl) mtx_trylock(&(fl)->fl_lock) #define FL_UNLOCK(fl) mtx_unlock(&(fl)->fl_lock) #define FL_LOCK_ASSERT_OWNED(fl) mtx_assert(&(fl)->fl_lock, MA_OWNED) #define FL_LOCK_ASSERT_NOTOWNED(fl) mtx_assert(&(fl)->fl_lock, MA_NOTOWNED) #define RXQ_FL_LOCK(rxq) FL_LOCK(&(rxq)->fl) #define RXQ_FL_UNLOCK(rxq) FL_UNLOCK(&(rxq)->fl) #define RXQ_FL_LOCK_ASSERT_OWNED(rxq) FL_LOCK_ASSERT_OWNED(&(rxq)->fl) #define RXQ_FL_LOCK_ASSERT_NOTOWNED(rxq) FL_LOCK_ASSERT_NOTOWNED(&(rxq)->fl) #define EQ_LOCK(eq) mtx_lock(&(eq)->eq_lock) #define EQ_TRYLOCK(eq) mtx_trylock(&(eq)->eq_lock) #define EQ_UNLOCK(eq) mtx_unlock(&(eq)->eq_lock) #define EQ_LOCK_ASSERT_OWNED(eq) mtx_assert(&(eq)->eq_lock, MA_OWNED) #define EQ_LOCK_ASSERT_NOTOWNED(eq) mtx_assert(&(eq)->eq_lock, MA_NOTOWNED) #define TXQ_LOCK(txq) EQ_LOCK(&(txq)->eq) #define TXQ_TRYLOCK(txq) EQ_TRYLOCK(&(txq)->eq) #define TXQ_UNLOCK(txq) EQ_UNLOCK(&(txq)->eq) #define TXQ_LOCK_ASSERT_OWNED(txq) EQ_LOCK_ASSERT_OWNED(&(txq)->eq) #define TXQ_LOCK_ASSERT_NOTOWNED(txq) EQ_LOCK_ASSERT_NOTOWNED(&(txq)->eq) #define for_each_txq(pi, iter, txq) \ txq = &pi->adapter->sge.txq[pi->first_txq]; \ for (iter = 0; iter < pi->ntxq; ++iter, ++txq) #define for_each_rxq(pi, iter, rxq) \ rxq = &pi->adapter->sge.rxq[pi->first_rxq]; \ for (iter = 0; iter < pi->nrxq; ++iter, ++rxq) -#define NFIQ(sc) ((sc)->intr_count > 1 ? (sc)->intr_count - 1 : 1) +/* One for errors, one for firmware events */ +#define T4_EXTRA_INTR 2 +#define NINTRQ(sc) ((sc)->intr_count > T4_EXTRA_INTR ? \ + (sc)->intr_count - T4_EXTRA_INTR : 1) static inline uint32_t t4_read_reg(struct adapter *sc, uint32_t reg) { return bus_space_read_4(sc->bt, sc->bh, reg); } static inline void t4_write_reg(struct adapter *sc, uint32_t reg, uint32_t val) { bus_space_write_4(sc->bt, sc->bh, reg, val); } static inline uint64_t t4_read_reg64(struct adapter *sc, uint32_t reg) { return t4_bus_space_read_8(sc->bt, sc->bh, reg); } static inline void t4_write_reg64(struct adapter *sc, uint32_t reg, uint64_t val) { t4_bus_space_write_8(sc->bt, sc->bh, reg, val); } static inline void t4_os_pci_read_cfg1(struct adapter *sc, int reg, uint8_t *val) { *val = pci_read_config(sc->dev, reg, 1); } static inline void t4_os_pci_write_cfg1(struct adapter *sc, int reg, uint8_t val) { pci_write_config(sc->dev, reg, val, 1); } static inline void t4_os_pci_read_cfg2(struct adapter *sc, int reg, uint16_t *val) { *val = pci_read_config(sc->dev, reg, 2); } static inline void t4_os_pci_write_cfg2(struct adapter *sc, int reg, uint16_t val) { pci_write_config(sc->dev, reg, val, 2); } static inline void t4_os_pci_read_cfg4(struct adapter *sc, int reg, uint32_t *val) { *val = pci_read_config(sc->dev, reg, 4); } static inline void t4_os_pci_write_cfg4(struct adapter *sc, int reg, uint32_t val) { pci_write_config(sc->dev, reg, val, 4); } static inline struct port_info * adap2pinfo(struct adapter *sc, int idx) { return (sc->port[idx]); } static inline void t4_os_set_hw_addr(struct adapter *sc, int idx, uint8_t hw_addr[]) { bcopy(hw_addr, sc->port[idx]->hw_addr, ETHER_ADDR_LEN); } static inline bool is_10G_port(const struct port_info *pi) { return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G) != 0); } /* t4_main.c */ void cxgbe_txq_start(void *, int); int t4_os_find_pci_capability(struct adapter *, int); int t4_os_pci_save_state(struct adapter *); int t4_os_pci_restore_state(struct adapter *); void t4_os_portmod_changed(const struct adapter *, int); void t4_os_link_changed(struct adapter *, int, int); /* t4_sge.c */ void t4_sge_modload(void); void t4_sge_init(struct adapter *); int t4_create_dma_tag(struct adapter *); int t4_destroy_dma_tag(struct adapter *); int t4_setup_adapter_queues(struct adapter *); int t4_teardown_adapter_queues(struct adapter *); int t4_setup_eth_queues(struct port_info *); int t4_teardown_eth_queues(struct port_info *); void t4_intr_all(void *); -void t4_intr_fwd(void *); +void t4_intr(void *); void t4_intr_err(void *); void t4_intr_evt(void *); -void t4_intr_data(void *); -void t4_evt_rx(void *); -void t4_eth_rx(void *); int t4_mgmt_tx(struct adapter *, struct mbuf *); int t4_eth_tx(struct ifnet *, struct sge_txq *, struct mbuf *); void t4_update_fl_bufsize(struct ifnet *); #endif diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index cff4ef2bf8a6..ec72d04cdb9e 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -1,3422 +1,3436 @@ /*- * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common/t4_hw.h" #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "common/t4fw_interface.h" #include "t4_ioctl.h" #include "t4_l2t.h" /* T4 bus driver interface */ static int t4_probe(device_t); static int t4_attach(device_t); static int t4_detach(device_t); static device_method_t t4_methods[] = { DEVMETHOD(device_probe, t4_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), /* bus interface */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_driver_added, bus_generic_driver_added), { 0, 0 } }; static driver_t t4_driver = { "t4nex", t4_methods, sizeof(struct adapter) }; /* T4 port (cxgbe) interface */ static int cxgbe_probe(device_t); static int cxgbe_attach(device_t); static int cxgbe_detach(device_t); static device_method_t cxgbe_methods[] = { DEVMETHOD(device_probe, cxgbe_probe), DEVMETHOD(device_attach, cxgbe_attach), DEVMETHOD(device_detach, cxgbe_detach), { 0, 0 } }; static driver_t cxgbe_driver = { "cxgbe", cxgbe_methods, sizeof(struct port_info) }; static d_ioctl_t t4_ioctl; static d_open_t t4_open; static d_close_t t4_close; static struct cdevsw t4_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_open = t4_open, .d_close = t4_close, .d_ioctl = t4_ioctl, .d_name = "t4nex", }; /* ifnet + media interface */ static void cxgbe_init(void *); static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t); static void cxgbe_start(struct ifnet *); static int cxgbe_transmit(struct ifnet *, struct mbuf *); static void cxgbe_qflush(struct ifnet *); static int cxgbe_media_change(struct ifnet *); static void cxgbe_media_status(struct ifnet *, struct ifmediareq *); MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4 Ethernet driver and services"); /* * Tunables. */ SYSCTL_NODE(_hw, OID_AUTO, cxgbe, CTLFLAG_RD, 0, "cxgbe driver parameters"); static int force_firmware_install = 0; TUNABLE_INT("hw.cxgbe.force_firmware_install", &force_firmware_install); SYSCTL_UINT(_hw_cxgbe, OID_AUTO, force_firmware_install, CTLFLAG_RDTUN, &force_firmware_install, 0, "install firmware on every attach."); /* * Holdoff timer and packet counter values. */ static unsigned int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; static unsigned int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ /* * Max # of tx and rx queues to use for each 10G and 1G port. */ static unsigned int max_ntxq_10g = 8; TUNABLE_INT("hw.cxgbe.max_ntxq_10G_port", &max_ntxq_10g); SYSCTL_UINT(_hw_cxgbe, OID_AUTO, max_ntxq_10G_port, CTLFLAG_RDTUN, &max_ntxq_10g, 0, "maximum number of tx queues per 10G port."); static unsigned int max_nrxq_10g = 8; TUNABLE_INT("hw.cxgbe.max_nrxq_10G_port", &max_nrxq_10g); SYSCTL_UINT(_hw_cxgbe, OID_AUTO, max_nrxq_10G_port, CTLFLAG_RDTUN, &max_nrxq_10g, 0, "maximum number of rxq's (per 10G port)."); static unsigned int max_ntxq_1g = 2; TUNABLE_INT("hw.cxgbe.max_ntxq_1G_port", &max_ntxq_1g); SYSCTL_UINT(_hw_cxgbe, OID_AUTO, max_ntxq_1G_port, CTLFLAG_RDTUN, &max_ntxq_1g, 0, "maximum number of tx queues per 1G port."); static unsigned int max_nrxq_1g = 2; TUNABLE_INT("hw.cxgbe.max_nrxq_1G_port", &max_nrxq_1g); SYSCTL_UINT(_hw_cxgbe, OID_AUTO, max_nrxq_1G_port, CTLFLAG_RDTUN, &max_nrxq_1g, 0, "maximum number of rxq's (per 1G port)."); /* * Holdoff parameters for 10G and 1G ports. */ static unsigned int tmr_idx_10g = 1; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &tmr_idx_10g); SYSCTL_UINT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx_10G, CTLFLAG_RDTUN, &tmr_idx_10g, 0, "default timer index for interrupt holdoff (10G ports)."); static int pktc_idx_10g = 2; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &pktc_idx_10g); SYSCTL_UINT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx_10G, CTLFLAG_RDTUN, &pktc_idx_10g, 0, "default pkt counter index for interrupt holdoff (10G ports)."); static unsigned int tmr_idx_1g = 1; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &tmr_idx_1g); SYSCTL_UINT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx_1G, CTLFLAG_RDTUN, &tmr_idx_1g, 0, "default timer index for interrupt holdoff (1G ports)."); static int pktc_idx_1g = 2; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &pktc_idx_1g); SYSCTL_UINT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx_1G, CTLFLAG_RDTUN, &pktc_idx_1g, 0, "default pkt counter index for interrupt holdoff (1G ports)."); /* * Size (# of entries) of each tx and rx queue. */ static unsigned int qsize_txq = TX_EQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_txq", &qsize_txq); SYSCTL_UINT(_hw_cxgbe, OID_AUTO, qsize_txq, CTLFLAG_RDTUN, &qsize_txq, 0, "default queue size of NIC tx queues."); static unsigned int qsize_rxq = RX_IQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_rxq", &qsize_rxq); SYSCTL_UINT(_hw_cxgbe, OID_AUTO, qsize_rxq, CTLFLAG_RDTUN, &qsize_rxq, 0, "default queue size of NIC rx queues."); /* * Interrupt types allowed. */ static int intr_types = INTR_MSIX | INTR_MSI | INTR_INTX; TUNABLE_INT("hw.cxgbe.interrupt_types", &intr_types); SYSCTL_UINT(_hw_cxgbe, OID_AUTO, interrupt_types, CTLFLAG_RDTUN, &intr_types, 0, "interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively)"); /* - * Force the driver to use interrupt forwarding. + * Force the driver to use the same set of interrupts for all ports. */ -static int intr_fwd = 0; -TUNABLE_INT("hw.cxgbe.interrupt_forwarding", &intr_fwd); -SYSCTL_UINT(_hw_cxgbe, OID_AUTO, interrupt_forwarding, CTLFLAG_RDTUN, - &intr_fwd, 0, "always use forwarded interrupts"); +static int intr_shared = 0; +TUNABLE_INT("hw.cxgbe.interrupts_shared", &intr_shared); +SYSCTL_UINT(_hw_cxgbe, OID_AUTO, interrupts_shared, CTLFLAG_RDTUN, + &intr_shared, 0, "interrupts shared between all ports"); static unsigned int filter_mode = HW_TPL_FR_MT_PR_IV_P_FC; TUNABLE_INT("hw.cxgbe.filter_mode", &filter_mode); SYSCTL_UINT(_hw_cxgbe, OID_AUTO, filter_mode, CTLFLAG_RDTUN, &filter_mode, 0, "default global filter mode."); struct intrs_and_queues { int intr_type; /* INTx, MSI, or MSI-X */ int nirq; /* Number of vectors */ - int intr_fwd; /* Interrupts forwarded */ + int intr_shared; /* Interrupts shared between all ports */ int ntxq10g; /* # of NIC txq's for each 10G port */ int nrxq10g; /* # of NIC rxq's for each 10G port */ int ntxq1g; /* # of NIC txq's for each 1G port */ int nrxq1g; /* # of NIC rxq's for each 1G port */ }; struct filter_entry { uint32_t valid:1; /* filter allocated and valid */ uint32_t locked:1; /* filter is administratively locked */ uint32_t pending:1; /* filter action is pending firmware reply */ uint32_t smtidx:8; /* Source MAC Table index for smac */ struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ struct t4_filter_specification fs; }; enum { MEMWIN0_APERTURE = 2048, MEMWIN0_BASE = 0x1b800, MEMWIN1_APERTURE = 32768, MEMWIN1_BASE = 0x28000, MEMWIN2_APERTURE = 65536, MEMWIN2_BASE = 0x30000, }; enum { XGMAC_MTU = (1 << 0), XGMAC_PROMISC = (1 << 1), XGMAC_ALLMULTI = (1 << 2), XGMAC_VLANEX = (1 << 3), XGMAC_UCADDR = (1 << 4), XGMAC_MCADDRS = (1 << 5), XGMAC_ALL = 0xffff }; static int map_bars(struct adapter *); static void setup_memwin(struct adapter *); static int cfg_itype_and_nqueues(struct adapter *, int, int, struct intrs_and_queues *); static int prep_firmware(struct adapter *); static int get_capabilities(struct adapter *, struct fw_caps_config_cmd *); static int get_params(struct adapter *, struct fw_caps_config_cmd *); static void t4_set_desc(struct adapter *); static void build_medialist(struct port_info *); static int update_mac_settings(struct port_info *, int); static int cxgbe_init_locked(struct port_info *); static int cxgbe_init_synchronized(struct port_info *); static int cxgbe_uninit_locked(struct port_info *); static int cxgbe_uninit_synchronized(struct port_info *); static int first_port_up(struct adapter *); static int last_port_down(struct adapter *); static int t4_alloc_irq(struct adapter *, struct irq *, int rid, iq_intr_handler_t *, void *, char *); static int t4_free_irq(struct adapter *, struct irq *); static void reg_block_dump(struct adapter *, uint8_t *, unsigned int, unsigned int); static void t4_get_regs(struct adapter *, struct t4_regdump *, uint8_t *); static void cxgbe_tick(void *); static int t4_sysctls(struct adapter *); static int cxgbe_sysctls(struct port_info *); static int sysctl_int_array(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS); static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS); static inline void txq_start(struct ifnet *, struct sge_txq *); static uint32_t fconf_to_mode(uint32_t); static uint32_t mode_to_fconf(uint32_t); static uint32_t fspec_to_fconf(struct t4_filter_specification *); static int get_filter_mode(struct adapter *, uint32_t *); static int set_filter_mode(struct adapter *, uint32_t); static int get_filter(struct adapter *, struct t4_filter *); static int set_filter(struct adapter *, struct t4_filter *); static int del_filter(struct adapter *, struct t4_filter *); static void clear_filter(struct filter_entry *); static int set_filter_wr(struct adapter *, int); static int del_filter_wr(struct adapter *, int); void filter_rpl(struct adapter *, const struct cpl_set_tcb_rpl *); static int t4_mod_event(module_t, int, void *); struct t4_pciids { uint16_t device; uint8_t mpf; char *desc; } t4_pciids[] = { {0xa000, 0, "Chelsio Terminator 4 FPGA"}, {0x4400, 4, "Chelsio T440-dbg"}, {0x4401, 4, "Chelsio T420-CR"}, {0x4402, 4, "Chelsio T422-CR"}, {0x4403, 4, "Chelsio T440-CR"}, {0x4404, 4, "Chelsio T420-BCH"}, {0x4405, 4, "Chelsio T440-BCH"}, {0x4406, 4, "Chelsio T440-CH"}, {0x4407, 4, "Chelsio T420-SO"}, {0x4408, 4, "Chelsio T420-CX"}, {0x4409, 4, "Chelsio T420-BT"}, {0x440a, 4, "Chelsio T404-BT"}, }; static int t4_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); for (i = 0; i < ARRAY_SIZE(t4_pciids); i++) { if (d == t4_pciids[i].device && pci_get_function(dev) == t4_pciids[i].mpf) { device_set_desc(dev, t4_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int t4_attach(device_t dev) { struct adapter *sc; int rc = 0, i, n10g, n1g, rqidx, tqidx; struct fw_caps_config_cmd caps; uint32_t p, v; struct intrs_and_queues iaq; struct sge *s; sc = device_get_softc(dev); sc->dev = dev; sc->pf = pci_get_function(dev); sc->mbox = sc->pf; pci_enable_busmaster(dev); if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) { pci_set_max_read_req(dev, 4096); v = pci_read_config(dev, i + PCIR_EXPRESS_DEVICE_CTL, 2); v |= PCIM_EXP_CTL_RELAXED_ORD_ENABLE; pci_write_config(dev, i + PCIR_EXPRESS_DEVICE_CTL, v, 2); } snprintf(sc->lockname, sizeof(sc->lockname), "%s", device_get_nameunit(dev)); mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF); rc = map_bars(sc); if (rc != 0) goto done; /* error message displayed already */ memset(sc->chan_map, 0xff, sizeof(sc->chan_map)); /* Prepare the adapter for operation */ rc = -t4_prep_adapter(sc); if (rc != 0) { device_printf(dev, "failed to prepare adapter: %d.\n", rc); goto done; } /* Do this really early */ sc->cdev = make_dev(&t4_cdevsw, device_get_unit(dev), UID_ROOT, GID_WHEEL, 0600, "%s", device_get_nameunit(dev)); sc->cdev->si_drv1 = sc; /* Prepare the firmware for operation */ rc = prep_firmware(sc); if (rc != 0) goto done; /* error message displayed already */ /* Get device capabilities and select which ones we'll use */ rc = get_capabilities(sc, &caps); if (rc != 0) { device_printf(dev, "failed to initialize adapter capabilities: %d.\n", rc); goto done; } /* Choose the global RSS mode. */ rc = -t4_config_glbl_rss(sc, sc->mbox, FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL, F_FW_RSS_GLB_CONFIG_CMD_TNLMAPEN | F_FW_RSS_GLB_CONFIG_CMD_HASHTOEPLITZ | F_FW_RSS_GLB_CONFIG_CMD_TNLALLLKP); if (rc != 0) { device_printf(dev, "failed to select global RSS mode: %d.\n", rc); goto done; } /* These are total (sum of all ports) limits for a bus driver */ rc = -t4_cfg_pfvf(sc, sc->mbox, sc->pf, 0, 128, /* max # of egress queues */ 64, /* max # of egress Ethernet or control queues */ 64, /* max # of ingress queues with fl/interrupt */ 0, /* max # of ingress queues without interrupt */ 0, /* PCIe traffic class */ 4, /* max # of virtual interfaces */ M_FW_PFVF_CMD_CMASK, M_FW_PFVF_CMD_PMASK, 16, FW_CMD_CAP_PF, FW_CMD_CAP_PF); if (rc != 0) { device_printf(dev, "failed to configure pf/vf resources: %d.\n", rc); goto done; } /* Need this before sge_init */ for (i = 0; i < SGE_NTIMERS; i++) sc->sge.timer_val[i] = min(intr_timer[i], 200U); for (i = 0; i < SGE_NCOUNTERS; i++) sc->sge.counter_val[i] = min(intr_pktcount[i], M_THRESHOLD_0); /* Also need the cooked value of cclk before sge_init */ p = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_CCLK)); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &p, &v); if (rc != 0) { device_printf(sc->dev, "failed to obtain core clock value: %d.\n", rc); goto done; } sc->params.vpd.cclk = v; t4_sge_init(sc); t4_set_filter_mode(sc, filter_mode); t4_set_reg_field(sc, A_TP_GLOBAL_CONFIG, V_FIVETUPLELOOKUP(M_FIVETUPLELOOKUP), V_FIVETUPLELOOKUP(M_FIVETUPLELOOKUP)); t4_tp_wr_bits_indirect(sc, A_TP_INGRESS_CONFIG, F_CSUM_HAS_PSEUDO_HDR, F_LOOKUPEVERYPKT); /* get basic stuff going */ rc = -t4_early_init(sc, sc->mbox); if (rc != 0) { device_printf(dev, "early init failed: %d.\n", rc); goto done; } rc = get_params(sc, &caps); if (rc != 0) goto done; /* error message displayed already */ /* These are finalized by FW initialization, load their values now */ v = t4_read_reg(sc, A_TP_TIMER_RESOLUTION); sc->params.tp.tre = G_TIMERRESOLUTION(v); sc->params.tp.dack_re = G_DELAYEDACKRESOLUTION(v); t4_read_mtu_tbl(sc, sc->params.mtus, NULL); /* tweak some settings */ t4_write_reg(sc, A_TP_SHIFT_CNT, V_SYNSHIFTMAX(6) | V_RXTSHIFTMAXR1(4) | V_RXTSHIFTMAXR2(15) | V_PERSHIFTBACKOFFMAX(8) | V_PERSHIFTMAX(8) | V_KEEPALIVEMAXR1(4) | V_KEEPALIVEMAXR2(9)); t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12)); setup_memwin(sc); rc = t4_create_dma_tag(sc); if (rc != 0) goto done; /* error message displayed already */ /* * First pass over all the ports - allocate VIs and initialize some * basic parameters like mac address, port type, etc. We also figure * out whether a port is 10G or 1G and use that information when * calculating how many interrupts to attempt to allocate. */ n10g = n1g = 0; for_each_port(sc, i) { struct port_info *pi; pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK); sc->port[i] = pi; /* These must be set before t4_port_init */ pi->adapter = sc; pi->port_id = i; /* Allocate the vi and initialize parameters like mac addr */ rc = -t4_port_init(pi, sc->mbox, sc->pf, 0); if (rc != 0) { device_printf(dev, "unable to initialize port %d: %d\n", i, rc); free(pi, M_CXGBE); - sc->port[i] = NULL; /* indicates init failed */ - continue; + sc->port[i] = NULL; + goto done; } snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d", device_get_nameunit(dev), i); mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF); if (is_10G_port(pi)) { n10g++; pi->tmr_idx = tmr_idx_10g; pi->pktc_idx = pktc_idx_10g; } else { n1g++; pi->tmr_idx = tmr_idx_1g; pi->pktc_idx = pktc_idx_1g; } pi->xact_addr_filt = -1; pi->qsize_rxq = max(qsize_rxq, 128); while (pi->qsize_rxq & 7) pi->qsize_rxq++; pi->qsize_txq = max(qsize_txq, 128); if (pi->qsize_rxq != qsize_rxq) { device_printf(dev, "using %d instead of %d as the rx queue size.\n", pi->qsize_rxq, qsize_rxq); } if (pi->qsize_txq != qsize_txq) { device_printf(dev, "using %d instead of %d as the tx queue size.\n", pi->qsize_txq, qsize_txq); } pi->dev = device_add_child(dev, "cxgbe", -1); if (pi->dev == NULL) { device_printf(dev, "failed to add device for port %d.\n", i); rc = ENXIO; goto done; } device_set_softc(pi->dev, pi); setbit(&sc->registered_device_map, i); } if (sc->registered_device_map == 0) { device_printf(dev, "no usable ports\n"); rc = ENXIO; goto done; } /* * Interrupt type, # of interrupts, # of rx/tx queues, etc. */ rc = cfg_itype_and_nqueues(sc, n10g, n1g, &iaq); if (rc != 0) goto done; /* error message displayed already */ sc->intr_type = iaq.intr_type; sc->intr_count = iaq.nirq; s = &sc->sge; s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g; s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g; s->neq = s->ntxq + s->nrxq; /* the free list in an rxq is an eq */ - s->neq += NCHAN; /* control queues, 1 per hw channel */ + s->neq += sc->params.nports; /* control queues, 1 per port */ s->niq = s->nrxq + 1; /* 1 extra for firmware event queue */ - if (iaq.intr_fwd) { - sc->flags |= INTR_FWD; - s->niq += NFIQ(sc); /* forwarded interrupt queues */ - s->fiq = malloc(NFIQ(sc) * sizeof(struct sge_iq), M_CXGBE, - M_ZERO | M_WAITOK); - } - s->ctrlq = malloc(NCHAN * sizeof(struct sge_ctrlq), M_CXGBE, + if (iaq.intr_shared) + sc->flags |= INTR_SHARED; + s->niq += NINTRQ(sc); /* interrupt queues */ + + s->intrq = malloc(NINTRQ(sc) * sizeof(struct sge_iq), M_CXGBE, + M_ZERO | M_WAITOK); + s->ctrlq = malloc(sc->params.nports * sizeof(struct sge_ctrlq), M_CXGBE, M_ZERO | M_WAITOK); s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE, M_ZERO | M_WAITOK); s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE, M_ZERO | M_WAITOK); s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE, M_ZERO | M_WAITOK); sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE, M_ZERO | M_WAITOK); sc->l2t = t4_init_l2t(M_WAITOK); t4_sysctls(sc); /* * Second pass over the ports. This time we know the number of rx and * tx queues that each port should get. */ rqidx = tqidx = 0; for_each_port(sc, i) { struct port_info *pi = sc->port[i]; if (pi == NULL) continue; pi->first_rxq = rqidx; pi->nrxq = is_10G_port(pi) ? iaq.nrxq10g : iaq.nrxq1g; pi->first_txq = tqidx; pi->ntxq = is_10G_port(pi) ? iaq.ntxq10g : iaq.ntxq1g; rqidx += pi->nrxq; tqidx += pi->ntxq; } rc = bus_generic_attach(dev); if (rc != 0) { device_printf(dev, "failed to attach all child ports: %d\n", rc); goto done; } #ifdef INVARIANTS device_printf(dev, "%p, %d ports (0x%x), %d intr_type, %d intr_count\n", sc, sc->params.nports, sc->params.portvec, sc->intr_type, sc->intr_count); #endif t4_set_desc(sc); done: if (rc != 0) t4_detach(dev); return (rc); } /* * Idempotent */ static int t4_detach(device_t dev) { struct adapter *sc; struct port_info *pi; int i; sc = device_get_softc(dev); if (sc->cdev) destroy_dev(sc->cdev); bus_generic_detach(dev); for (i = 0; i < MAX_NPORTS; i++) { pi = sc->port[i]; if (pi) { t4_free_vi(pi->adapter, sc->mbox, sc->pf, 0, pi->viid); if (pi->dev) device_delete_child(dev, pi->dev); mtx_destroy(&pi->pi_lock); free(pi, M_CXGBE); } } if (sc->flags & FW_OK) t4_fw_bye(sc, sc->mbox); if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX) pci_release_msi(dev); if (sc->regs_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid, sc->regs_res); if (sc->msix_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid, sc->msix_res); if (sc->l2t) t4_free_l2t(sc->l2t); free(sc->irq, M_CXGBE); free(sc->sge.rxq, M_CXGBE); free(sc->sge.txq, M_CXGBE); free(sc->sge.ctrlq, M_CXGBE); - free(sc->sge.fiq, M_CXGBE); + free(sc->sge.intrq, M_CXGBE); free(sc->sge.iqmap, M_CXGBE); free(sc->sge.eqmap, M_CXGBE); free(sc->tids.ftid_tab, M_CXGBE); t4_destroy_dma_tag(sc); mtx_destroy(&sc->sc_lock); bzero(sc, sizeof(*sc)); return (0); } static int cxgbe_probe(device_t dev) { char buf[128]; struct port_info *pi = device_get_softc(dev); snprintf(buf, sizeof(buf), "Port %d", pi->port_id); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \ IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \ IFCAP_VLAN_HWTSO) #define T4_CAP_ENABLE (T4_CAP & ~IFCAP_TSO6) static int cxgbe_attach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct ifnet *ifp; /* Allocate an ifnet and set it up */ ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "Cannot allocate ifnet\n"); return (ENOMEM); } pi->ifp = ifp; ifp->if_softc = pi; callout_init(&pi->tick, CALLOUT_MPSAFE); pi->tq = taskqueue_create("cxgbe_taskq", M_NOWAIT, taskqueue_thread_enqueue, &pi->tq); if (pi->tq == NULL) { device_printf(dev, "failed to allocate port task queue\n"); if_free(pi->ifp); return (ENOMEM); } taskqueue_start_threads(&pi->tq, 1, PI_NET, "%s taskq", device_get_nameunit(dev)); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = cxgbe_init; ifp->if_ioctl = cxgbe_ioctl; ifp->if_start = cxgbe_start; ifp->if_transmit = cxgbe_transmit; ifp->if_qflush = cxgbe_qflush; ifp->if_snd.ifq_drv_maxlen = 1024; IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); IFQ_SET_READY(&ifp->if_snd); ifp->if_capabilities = T4_CAP; ifp->if_capenable = T4_CAP_ENABLE; ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO; /* Initialize ifmedia for this port */ ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change, cxgbe_media_status); build_medialist(pi); ether_ifattach(ifp, pi->hw_addr); #ifdef INVARIANTS device_printf(dev, "%p, %d txq, %d rxq\n", pi, pi->ntxq, pi->nrxq); #endif cxgbe_sysctls(pi); return (0); } static int cxgbe_detach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct adapter *sc = pi->adapter; int rc; /* Tell if_ioctl and if_init that the port is going away */ ADAPTER_LOCK(sc); SET_DOOMED(pi); wakeup(&sc->flags); while (IS_BUSY(sc)) mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0); SET_BUSY(sc); ADAPTER_UNLOCK(sc); rc = cxgbe_uninit_synchronized(pi); if (rc != 0) device_printf(dev, "port uninit failed: %d.\n", rc); taskqueue_free(pi->tq); ifmedia_removeall(&pi->media); ether_ifdetach(pi->ifp); if_free(pi->ifp); ADAPTER_LOCK(sc); CLR_BUSY(sc); wakeup_one(&sc->flags); ADAPTER_UNLOCK(sc); return (0); } static void cxgbe_init(void *arg) { struct port_info *pi = arg; struct adapter *sc = pi->adapter; ADAPTER_LOCK(sc); cxgbe_init_locked(pi); /* releases adapter lock */ ADAPTER_LOCK_ASSERT_NOTOWNED(sc); } static int cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data) { int rc = 0, mtu, flags; struct port_info *pi = ifp->if_softc; struct adapter *sc = pi->adapter; struct ifreq *ifr = (struct ifreq *)data; uint32_t mask; switch (cmd) { case SIOCSIFMTU: ADAPTER_LOCK(sc); rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); if (rc) { fail: ADAPTER_UNLOCK(sc); return (rc); } mtu = ifr->ifr_mtu; if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) { rc = EINVAL; } else { ifp->if_mtu = mtu; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { t4_update_fl_bufsize(ifp); PORT_LOCK(pi); rc = update_mac_settings(pi, XGMAC_MTU); PORT_UNLOCK(pi); } } ADAPTER_UNLOCK(sc); break; case SIOCSIFFLAGS: ADAPTER_LOCK(sc); if (IS_DOOMED(pi)) { rc = ENXIO; goto fail; } if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { flags = pi->if_flags; if ((ifp->if_flags ^ flags) & (IFF_PROMISC | IFF_ALLMULTI)) { if (IS_BUSY(sc)) { rc = EBUSY; goto fail; } PORT_LOCK(pi); rc = update_mac_settings(pi, XGMAC_PROMISC | XGMAC_ALLMULTI); PORT_UNLOCK(pi); } ADAPTER_UNLOCK(sc); } else rc = cxgbe_init_locked(pi); pi->if_flags = ifp->if_flags; } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = cxgbe_uninit_locked(pi); else ADAPTER_UNLOCK(sc); ADAPTER_LOCK_ASSERT_NOTOWNED(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: /* these two can be called with a mutex held :-( */ ADAPTER_LOCK(sc); rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); if (rc) goto fail; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { PORT_LOCK(pi); rc = update_mac_settings(pi, XGMAC_MCADDRS); PORT_UNLOCK(pi); } ADAPTER_UNLOCK(sc); break; case SIOCSIFCAP: ADAPTER_LOCK(sc); rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); if (rc) goto fail; mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO; ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "tso disabled due to -txcsum.\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; if (IFCAP_TSO & ifp->if_capenable) { if (IFCAP_TXCSUM & ifp->if_capenable) ifp->if_hwassist |= CSUM_TSO; else { ifp->if_capenable &= ~IFCAP_TSO; ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "enable txcsum first.\n"); rc = EAGAIN; } } else ifp->if_hwassist &= ~CSUM_TSO; } if (mask & IFCAP_LRO) { #ifdef INET int i; struct sge_rxq *rxq; ifp->if_capenable ^= IFCAP_LRO; for_each_rxq(pi, i, rxq) { if (ifp->if_capenable & IFCAP_LRO) rxq->flags |= RXQ_LRO_ENABLED; else rxq->flags &= ~RXQ_LRO_ENABLED; } #endif } #ifndef TCP_OFFLOAD_DISABLE if (mask & IFCAP_TOE4) { rc = EOPNOTSUPP; } #endif if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { PORT_LOCK(pi); rc = update_mac_settings(pi, XGMAC_VLANEX); PORT_UNLOCK(pi); } } if (mask & IFCAP_VLAN_MTU) { ifp->if_capenable ^= IFCAP_VLAN_MTU; /* Need to find out how to disable auto-mtu-inflation */ } if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_VLAN_HWCSUM) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; #ifdef VLAN_CAPABILITIES VLAN_CAPABILITIES(ifp); #endif ADAPTER_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: ifmedia_ioctl(ifp, ifr, &pi->media, cmd); break; default: rc = ether_ioctl(ifp, cmd, data); } return (rc); } static void cxgbe_start(struct ifnet *ifp) { struct port_info *pi = ifp->if_softc; struct sge_txq *txq; int i; for_each_txq(pi, i, txq) { if (TXQ_TRYLOCK(txq)) { txq_start(ifp, txq); TXQ_UNLOCK(txq); } } } static int cxgbe_transmit(struct ifnet *ifp, struct mbuf *m) { struct port_info *pi = ifp->if_softc; struct adapter *sc = pi->adapter; struct sge_txq *txq = &sc->sge.txq[pi->first_txq]; struct buf_ring *br; int rc; M_ASSERTPKTHDR(m); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { m_freem(m); return (0); } if (m->m_flags & M_FLOWID) txq += (m->m_pkthdr.flowid % pi->ntxq); br = txq->br; if (TXQ_TRYLOCK(txq) == 0) { /* * XXX: make sure that this packet really is sent out. There is * a small race where t4_eth_tx may stop draining the drbr and * goes away, just before we enqueued this mbuf. */ return (drbr_enqueue(ifp, br, m)); } /* * txq->m is the mbuf that is held up due to a temporary shortage of * resources and it should be put on the wire first. Then what's in * drbr and finally the mbuf that was just passed in to us. * * Return code should indicate the fate of the mbuf that was passed in * this time. */ TXQ_LOCK_ASSERT_OWNED(txq); if (drbr_needs_enqueue(ifp, br) || txq->m) { /* Queued for transmission. */ rc = drbr_enqueue(ifp, br, m); m = txq->m ? txq->m : drbr_dequeue(ifp, br); (void) t4_eth_tx(ifp, txq, m); TXQ_UNLOCK(txq); return (rc); } /* Direct transmission. */ rc = t4_eth_tx(ifp, txq, m); if (rc != 0 && txq->m) rc = 0; /* held, will be transmitted soon (hopefully) */ TXQ_UNLOCK(txq); return (rc); } static void cxgbe_qflush(struct ifnet *ifp) { struct port_info *pi = ifp->if_softc; struct sge_txq *txq; int i; struct mbuf *m; /* queues do not exist if !IFF_DRV_RUNNING. */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) { for_each_txq(pi, i, txq) { TXQ_LOCK(txq); m_freem(txq->m); while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) m_freem(m); TXQ_UNLOCK(txq); } } if_qflush(ifp); } static int cxgbe_media_change(struct ifnet *ifp) { struct port_info *pi = ifp->if_softc; device_printf(pi->dev, "%s unimplemented.\n", __func__); return (EOPNOTSUPP); } static void cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct port_info *pi = ifp->if_softc; struct ifmedia_entry *cur = pi->media.ifm_cur; int speed = pi->link_cfg.speed; int data = (pi->port_type << 8) | pi->mod_type; if (cur->ifm_data != data) { build_medialist(pi); cur = pi->media.ifm_cur; } ifmr->ifm_status = IFM_AVALID; if (!pi->link_cfg.link_ok) return; ifmr->ifm_status |= IFM_ACTIVE; /* active and current will differ iff current media is autoselect. */ if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO) return; ifmr->ifm_active = IFM_ETHER | IFM_FDX; if (speed == SPEED_10000) ifmr->ifm_active |= IFM_10G_T; else if (speed == SPEED_1000) ifmr->ifm_active |= IFM_1000_T; else if (speed == SPEED_100) ifmr->ifm_active |= IFM_100_TX; else if (speed == SPEED_10) ifmr->ifm_active |= IFM_10_T; else KASSERT(0, ("%s: link up but speed unknown (%u)", __func__, speed)); } void t4_fatal_err(struct adapter *sc) { t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0); t4_intr_disable(sc); log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n", device_get_nameunit(sc->dev)); } static int map_bars(struct adapter *sc) { sc->regs_rid = PCIR_BAR(0); sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->regs_rid, RF_ACTIVE); if (sc->regs_res == NULL) { device_printf(sc->dev, "cannot map registers.\n"); return (ENXIO); } sc->bt = rman_get_bustag(sc->regs_res); sc->bh = rman_get_bushandle(sc->regs_res); sc->mmio_len = rman_get_size(sc->regs_res); sc->msix_rid = PCIR_BAR(4); sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->msix_rid, RF_ACTIVE); if (sc->msix_res == NULL) { device_printf(sc->dev, "cannot map MSI-X BAR.\n"); return (ENXIO); } return (0); } static void setup_memwin(struct adapter *sc) { u_long bar0; bar0 = rman_get_start(sc->regs_res); t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 0), (bar0 + MEMWIN0_BASE) | V_BIR(0) | V_WINDOW(ilog2(MEMWIN0_APERTURE) - 10)); t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 1), (bar0 + MEMWIN1_BASE) | V_BIR(0) | V_WINDOW(ilog2(MEMWIN1_APERTURE) - 10)); t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2), (bar0 + MEMWIN2_BASE) | V_BIR(0) | V_WINDOW(ilog2(MEMWIN2_APERTURE) - 10)); } static int cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g, struct intrs_and_queues *iaq) { int rc, itype, navail, nc, nrxq10g, nrxq1g; bzero(iaq, sizeof(*iaq)); nc = mp_ncpus; /* our snapshot of the number of CPUs */ for (itype = INTR_MSIX; itype; itype >>= 1) { if ((itype & intr_types) == 0) continue; /* not allowed */ if (itype == INTR_MSIX) navail = pci_msix_count(sc->dev); else if (itype == INTR_MSI) navail = pci_msi_count(sc->dev); else navail = 1; if (navail == 0) continue; iaq->intr_type = itype; iaq->ntxq10g = min(nc, max_ntxq_10g); iaq->ntxq1g = min(nc, max_ntxq_1g); nrxq10g = min(nc, max_nrxq_10g); nrxq1g = min(nc, max_nrxq_1g); - /* Extra 2 is for a) error interrupt b) firmware event */ - iaq->nirq = n10g * nrxq10g + n1g * nrxq1g + 2; - if (iaq->nirq <= navail && intr_fwd == 0) { + iaq->nirq = n10g * nrxq10g + n1g * nrxq1g + T4_EXTRA_INTR; + if (iaq->nirq <= navail && intr_shared == 0) { if (itype == INTR_MSI && !powerof2(iaq->nirq)) - goto fwd; + goto share; /* One for err, one for fwq, and one for each rxq */ - iaq->intr_fwd = 0; + iaq->intr_shared = 0; iaq->nrxq10g = nrxq10g; iaq->nrxq1g = nrxq1g; } else { -fwd: - iaq->intr_fwd = 1; +share: + iaq->intr_shared = 1; - if (navail > nc) { + if (navail >= nc + T4_EXTRA_INTR) { if (itype == INTR_MSIX) - navail = nc + 1; + navail = nc + T4_EXTRA_INTR; /* navail is and must remain a pow2 for MSI */ if (itype == INTR_MSI) { KASSERT(powerof2(navail), ("%d not power of 2", navail)); - while (navail / 2 > nc) + while (navail / 2 >= nc + T4_EXTRA_INTR) navail /= 2; } } iaq->nirq = navail; /* total # of interrupts */ /* * If we have multiple vectors available reserve one * exclusively for errors. The rest will be shared by * the fwq and data. */ if (navail > 1) navail--; iaq->nrxq10g = min(nrxq10g, navail); iaq->nrxq1g = min(nrxq1g, navail); } navail = iaq->nirq; rc = 0; if (itype == INTR_MSIX) rc = pci_alloc_msix(sc->dev, &navail); else if (itype == INTR_MSI) rc = pci_alloc_msi(sc->dev, &navail); if (rc == 0) { if (navail == iaq->nirq) return (0); /* * Didn't get the number requested. Use whatever number * the kernel is willing to allocate (it's in navail). */ pci_release_msi(sc->dev); - goto fwd; + goto share; } device_printf(sc->dev, "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n", itype, rc, iaq->nirq, navail); } device_printf(sc->dev, "failed to find a usable interrupt type. " "allowed=%d, msi-x=%d, msi=%d, intx=1", intr_types, pci_msix_count(sc->dev), pci_msi_count(sc->dev)); return (ENXIO); } /* * Install a compatible firmware (if required), establish contact with it, * become the master, and reset the device. */ static int prep_firmware(struct adapter *sc) { const struct firmware *fw; int rc; enum dev_state state; /* Check firmware version and install a different one if necessary */ rc = t4_check_fw_version(sc); if (rc != 0 || force_firmware_install) { uint32_t v = 0; fw = firmware_get(T4_FWNAME); if (fw != NULL) { const struct fw_hdr *hdr = (const void *)fw->data; v = ntohl(hdr->fw_ver); /* * The firmware module will not be used if it isn't the * same major version as what the driver was compiled * with. This check trumps force_firmware_install. */ if (G_FW_HDR_FW_VER_MAJOR(v) != FW_VERSION_MAJOR) { device_printf(sc->dev, "Found firmware image but version %d " "can not be used with this driver (%d)\n", G_FW_HDR_FW_VER_MAJOR(v), FW_VERSION_MAJOR); firmware_put(fw, FIRMWARE_UNLOAD); fw = NULL; } } if (fw == NULL && (rc < 0 || force_firmware_install)) { device_printf(sc->dev, "No usable firmware. " "card has %d.%d.%d, driver compiled with %d.%d.%d, " "force_firmware_install%s set", G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers), FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO, force_firmware_install ? "" : " not"); return (EAGAIN); } /* * Always upgrade, even for minor/micro/build mismatches. * Downgrade only for a major version mismatch or if * force_firmware_install was specified. */ if (fw != NULL && (rc < 0 || force_firmware_install || v > sc->params.fw_vers)) { device_printf(sc->dev, "installing firmware %d.%d.%d.%d on card.\n", G_FW_HDR_FW_VER_MAJOR(v), G_FW_HDR_FW_VER_MINOR(v), G_FW_HDR_FW_VER_MICRO(v), G_FW_HDR_FW_VER_BUILD(v)); rc = -t4_load_fw(sc, fw->data, fw->datasize); if (rc != 0) { device_printf(sc->dev, "failed to install firmware: %d\n", rc); firmware_put(fw, FIRMWARE_UNLOAD); return (rc); } else { /* refresh */ (void) t4_check_fw_version(sc); } } if (fw != NULL) firmware_put(fw, FIRMWARE_UNLOAD); } /* Contact firmware, request master */ rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MUST, &state); if (rc < 0) { rc = -rc; device_printf(sc->dev, "failed to connect to the firmware: %d.\n", rc); return (rc); } /* Reset device */ rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST); if (rc != 0) { device_printf(sc->dev, "firmware reset failed: %d.\n", rc); if (rc != ETIMEDOUT && rc != EIO) t4_fw_bye(sc, sc->mbox); return (rc); } snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers), G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers)); sc->flags |= FW_OK; return (0); } static int get_capabilities(struct adapter *sc, struct fw_caps_config_cmd *caps) { int rc; bzero(caps, sizeof(*caps)); caps->op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps->retval_len16 = htobe32(FW_LEN16(*caps)); rc = -t4_wr_mbox(sc, sc->mbox, caps, sizeof(*caps), caps); if (rc != 0) return (rc); if (caps->niccaps & htobe16(FW_CAPS_CONFIG_NIC_VM)) caps->niccaps ^= htobe16(FW_CAPS_CONFIG_NIC_VM); caps->op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE); rc = -t4_wr_mbox(sc, sc->mbox, caps, sizeof(*caps), NULL); return (rc); } static int get_params(struct adapter *sc, struct fw_caps_config_cmd *caps) { int rc; uint32_t params[7], val[7]; #define FW_PARAM_DEV(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param)) #define FW_PARAM_PFVF(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param)) params[0] = FW_PARAM_DEV(PORTVEC); params[1] = FW_PARAM_PFVF(IQFLINT_START); params[2] = FW_PARAM_PFVF(EQ_START); params[3] = FW_PARAM_PFVF(FILTER_START); params[4] = FW_PARAM_PFVF(FILTER_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 5, params, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters: %d.\n", rc); goto done; } sc->params.portvec = val[0]; sc->params.nports = 0; while (val[0]) { sc->params.nports++; val[0] &= val[0] - 1; } sc->sge.iq_start = val[1]; sc->sge.eq_start = val[2]; sc->tids.ftid_base = val[3]; sc->tids.nftids = val[4] - val[3] + 1; if (caps->toecaps) { /* query offload-related parameters */ params[0] = FW_PARAM_DEV(NTID); params[1] = FW_PARAM_PFVF(SERVER_START); params[2] = FW_PARAM_PFVF(SERVER_END); params[3] = FW_PARAM_PFVF(TDDP_START); params[4] = FW_PARAM_PFVF(TDDP_END); params[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, params, val); if (rc != 0) { device_printf(sc->dev, "failed to query TOE parameters: %d.\n", rc); goto done; } sc->tids.ntids = val[0]; sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS); sc->tids.stid_base = val[1]; sc->tids.nstids = val[2] - val[1] + 1; sc->vres.ddp.start = val[3]; sc->vres.ddp.size = val[4] - val[3] + 1; sc->params.ofldq_wr_cred = val[5]; sc->params.offload = 1; } if (caps->rdmacaps) { params[0] = FW_PARAM_PFVF(STAG_START); params[1] = FW_PARAM_PFVF(STAG_END); params[2] = FW_PARAM_PFVF(RQ_START); params[3] = FW_PARAM_PFVF(RQ_END); params[4] = FW_PARAM_PFVF(PBL_START); params[5] = FW_PARAM_PFVF(PBL_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, params, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters: %d.\n", rc); goto done; } sc->vres.stag.start = val[0]; sc->vres.stag.size = val[1] - val[0] + 1; sc->vres.rq.start = val[2]; sc->vres.rq.size = val[3] - val[2] + 1; sc->vres.pbl.start = val[4]; sc->vres.pbl.size = val[5] - val[4] + 1; } if (caps->iscsicaps) { params[0] = FW_PARAM_PFVF(ISCSI_START); params[1] = FW_PARAM_PFVF(ISCSI_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, params, val); if (rc != 0) { device_printf(sc->dev, "failed to query iSCSI parameters: %d.\n", rc); goto done; } sc->vres.iscsi.start = val[0]; sc->vres.iscsi.size = val[1] - val[0] + 1; } #undef FW_PARAM_PFVF #undef FW_PARAM_DEV done: return (rc); } static void t4_set_desc(struct adapter *sc) { char buf[128]; struct adapter_params *p = &sc->params; snprintf(buf, sizeof(buf), "Chelsio %s (rev %d) %d port %sNIC PCIe-x%d %d %s, S/N:%s, E/C:%s", p->vpd.id, p->rev, p->nports, is_offload(sc) ? "R" : "", p->pci.width, sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" : (sc->intr_type == INTR_MSI ? "MSI" : "INTx"), p->vpd.sn, p->vpd.ec); device_set_desc_copy(sc->dev, buf); } static void build_medialist(struct port_info *pi) { struct ifmedia *media = &pi->media; int data, m; PORT_LOCK(pi); ifmedia_removeall(media); m = IFM_ETHER | IFM_FDX; data = (pi->port_type << 8) | pi->mod_type; switch(pi->port_type) { case FW_PORT_TYPE_BT_XFI: ifmedia_add(media, m | IFM_10G_T, data, NULL); break; case FW_PORT_TYPE_BT_XAUI: ifmedia_add(media, m | IFM_10G_T, data, NULL); /* fall through */ case FW_PORT_TYPE_BT_SGMII: ifmedia_add(media, m | IFM_1000_T, data, NULL); ifmedia_add(media, m | IFM_100_TX, data, NULL); ifmedia_add(media, IFM_ETHER | IFM_AUTO, data, NULL); ifmedia_set(media, IFM_ETHER | IFM_AUTO); break; case FW_PORT_TYPE_CX4: ifmedia_add(media, m | IFM_10G_CX4, data, NULL); ifmedia_set(media, m | IFM_10G_CX4); break; case FW_PORT_TYPE_SFP: case FW_PORT_TYPE_FIBER_XFI: case FW_PORT_TYPE_FIBER_XAUI: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_10G_LR, data, NULL); ifmedia_set(media, m | IFM_10G_LR); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_10G_SR, data, NULL); ifmedia_set(media, m | IFM_10G_SR); break; case FW_PORT_MOD_TYPE_LRM: ifmedia_add(media, m | IFM_10G_LRM, data, NULL); ifmedia_set(media, m | IFM_10G_LRM); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_10G_TWINAX, data, NULL); ifmedia_set(media, m | IFM_10G_TWINAX); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, data, NULL); ifmedia_set(media, m | IFM_NONE); break; case FW_PORT_MOD_TYPE_NA: case FW_PORT_MOD_TYPE_ER: default: ifmedia_add(media, m | IFM_UNKNOWN, data, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; case FW_PORT_TYPE_KX4: case FW_PORT_TYPE_KX: case FW_PORT_TYPE_KR: default: ifmedia_add(media, m | IFM_UNKNOWN, data, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } PORT_UNLOCK(pi); } /* * Program the port's XGMAC based on parameters in ifnet. The caller also * indicates which parameters should be programmed (the rest are left alone). */ static int update_mac_settings(struct port_info *pi, int flags) { int rc; struct ifnet *ifp = pi->ifp; struct adapter *sc = pi->adapter; int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1; PORT_LOCK_ASSERT_OWNED(pi); KASSERT(flags, ("%s: not told what to update.", __func__)); if (flags & XGMAC_MTU) mtu = ifp->if_mtu; if (flags & XGMAC_PROMISC) promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0; if (flags & XGMAC_ALLMULTI) allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0; if (flags & XGMAC_VLANEX) vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0; rc = -t4_set_rxmode(sc, sc->mbox, pi->viid, mtu, promisc, allmulti, 1, vlanex, false); if (rc) { if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags, rc); return (rc); } if (flags & XGMAC_UCADDR) { uint8_t ucaddr[ETHER_ADDR_LEN]; bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr)); rc = t4_change_mac(sc, sc->mbox, pi->viid, pi->xact_addr_filt, ucaddr, true, true); if (rc < 0) { rc = -rc; if_printf(ifp, "change_mac failed: %d\n", rc); return (rc); } else { pi->xact_addr_filt = rc; rc = 0; } } if (flags & XGMAC_MCADDRS) { const uint8_t *mcaddr; int del = 1; uint64_t hash = 0; struct ifmultiaddr *ifma; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mcaddr = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); rc = t4_alloc_mac_filt(sc, sc->mbox, pi->viid, del, 1, &mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; if_printf(ifp, "failed to add mc address" " %02x:%02x:%02x:%02x:%02x:%02x rc=%d\n", mcaddr[0], mcaddr[1], mcaddr[2], mcaddr[3], mcaddr[4], mcaddr[5], rc); goto mcfail; } del = 0; } rc = -t4_set_addr_hash(sc, sc->mbox, pi->viid, 0, hash, 0); if (rc != 0) if_printf(ifp, "failed to set mc address hash: %d", rc); mcfail: if_maddr_runlock(ifp); } return (rc); } static int cxgbe_init_locked(struct port_info *pi) { struct adapter *sc = pi->adapter; int rc = 0; ADAPTER_LOCK_ASSERT_OWNED(sc); while (!IS_DOOMED(pi) && IS_BUSY(sc)) { if (mtx_sleep(&sc->flags, &sc->sc_lock, PCATCH, "t4init", 0)) { rc = EINTR; goto done; } } if (IS_DOOMED(pi)) { rc = ENXIO; goto done; } KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); /* Give up the adapter lock, port init code can sleep. */ SET_BUSY(sc); ADAPTER_UNLOCK(sc); rc = cxgbe_init_synchronized(pi); done: ADAPTER_LOCK(sc); KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); CLR_BUSY(sc); wakeup_one(&sc->flags); ADAPTER_UNLOCK(sc); return (rc); } static int cxgbe_init_synchronized(struct port_info *pi) { struct adapter *sc = pi->adapter; struct ifnet *ifp = pi->ifp; int rc = 0, i; uint16_t *rss; struct sge_rxq *rxq; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); if (isset(&sc->open_device_map, pi->port_id)) { KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING, ("mismatch between open_device_map and if_drv_flags")); return (0); /* already running */ } if (sc->open_device_map == 0 && ((rc = first_port_up(sc)) != 0)) return (rc); /* error message displayed already */ /* * Allocate tx/rx/fl queues for this port. */ rc = t4_setup_eth_queues(pi); if (rc != 0) goto done; /* error message displayed already */ /* * Setup RSS for this port. */ rss = malloc(pi->nrxq * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK); for_each_rxq(pi, i, rxq) { rss[i] = rxq->iq.abs_id; } rc = -t4_config_rss_range(sc, sc->mbox, pi->viid, 0, pi->rss_size, rss, pi->nrxq); free(rss, M_CXGBE); if (rc != 0) { if_printf(ifp, "rss_config failed: %d\n", rc); goto done; } PORT_LOCK(pi); rc = update_mac_settings(pi, XGMAC_ALL); PORT_UNLOCK(pi); if (rc) goto done; /* error message displayed already */ rc = -t4_link_start(sc, sc->mbox, pi->tx_chan, &pi->link_cfg); if (rc != 0) { if_printf(ifp, "start_link failed: %d\n", rc); goto done; } rc = -t4_enable_vi(sc, sc->mbox, pi->viid, true, true); if (rc != 0) { if_printf(ifp, "enable_vi failed: %d\n", rc); goto done; } pi->flags |= VI_ENABLED; /* all ok */ setbit(&sc->open_device_map, pi->port_id); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&pi->tick, hz, cxgbe_tick, pi); done: if (rc != 0) cxgbe_uninit_synchronized(pi); return (rc); } static int cxgbe_uninit_locked(struct port_info *pi) { struct adapter *sc = pi->adapter; int rc; ADAPTER_LOCK_ASSERT_OWNED(sc); while (!IS_DOOMED(pi) && IS_BUSY(sc)) { if (mtx_sleep(&sc->flags, &sc->sc_lock, PCATCH, "t4uninit", 0)) { rc = EINTR; goto done; } } if (IS_DOOMED(pi)) { rc = ENXIO; goto done; } KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); SET_BUSY(sc); ADAPTER_UNLOCK(sc); rc = cxgbe_uninit_synchronized(pi); ADAPTER_LOCK(sc); KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); CLR_BUSY(sc); wakeup_one(&sc->flags); done: ADAPTER_UNLOCK(sc); return (rc); } /* * Idempotent. */ static int cxgbe_uninit_synchronized(struct port_info *pi) { struct adapter *sc = pi->adapter; struct ifnet *ifp = pi->ifp; int rc; /* * taskqueue_drain may cause a deadlock if the adapter lock is held. */ ADAPTER_LOCK_ASSERT_NOTOWNED(sc); /* * Clear this port's bit from the open device map, and then drain * tasks and callouts. */ clrbit(&sc->open_device_map, pi->port_id); PORT_LOCK(pi); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); callout_stop(&pi->tick); PORT_UNLOCK(pi); callout_drain(&pi->tick); /* * Stop and then free the queues' resources, including the queues * themselves. * * XXX: we could just stop the queues here (on ifconfig down) and free * them later (on port detach), but having up/down go through the entire * allocate/activate/deactivate/free sequence is a good way to find * leaks and bugs. */ rc = t4_teardown_eth_queues(pi); if (rc != 0) if_printf(ifp, "teardown failed: %d\n", rc); if (pi->flags & VI_ENABLED) { rc = -t4_enable_vi(sc, sc->mbox, pi->viid, false, false); if (rc) if_printf(ifp, "disable_vi failed: %d\n", rc); else pi->flags &= ~VI_ENABLED; } pi->link_cfg.link_ok = 0; pi->link_cfg.speed = 0; t4_os_link_changed(sc, pi->port_id, 0); if (sc->open_device_map == 0) last_port_down(sc); return (0); } -#define T4_ALLOC_IRQ(sc, irqid, rid, handler, arg, name) do { \ - rc = t4_alloc_irq(sc, &sc->irq[irqid], rid, handler, arg, name); \ +#define T4_ALLOC_IRQ(sc, irq, rid, handler, arg, name) do { \ + rc = t4_alloc_irq(sc, irq, rid, handler, arg, name); \ if (rc != 0) \ goto done; \ } while (0) static int first_port_up(struct adapter *sc) { - int rc, i; - char name[8]; + int rc, i, rid, p, q; + char s[8]; + struct irq *irq; + struct sge_iq *intrq; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); /* * queues that belong to the adapter (not any particular port). */ rc = t4_setup_adapter_queues(sc); if (rc != 0) goto done; /* * Setup interrupts. */ + irq = &sc->irq[0]; + rid = sc->intr_type == INTR_INTX ? 0 : 1; if (sc->intr_count == 1) { - KASSERT(sc->flags & INTR_FWD, - ("%s: single interrupt but not forwarded?", __func__)); - T4_ALLOC_IRQ(sc, 0, 0, t4_intr_all, sc, "all"); + KASSERT(sc->flags & INTR_SHARED, + ("%s: single interrupt but not shared?", __func__)); + + T4_ALLOC_IRQ(sc, irq, rid, t4_intr_all, sc, "all"); } else { /* Multiple interrupts. The first one is always error intr */ - T4_ALLOC_IRQ(sc, 0, 1, t4_intr_err, sc, "err"); - - if (sc->flags & INTR_FWD) { - /* The rest are shared by the fwq and all data intr */ - for (i = 1; i < sc->intr_count; i++) { - snprintf(name, sizeof(name), "mux%d", i - 1); - T4_ALLOC_IRQ(sc, i, i + 1, t4_intr_fwd, - &sc->sge.fiq[i - 1], name); + T4_ALLOC_IRQ(sc, irq, rid, t4_intr_err, sc, "err"); + irq++; + rid++; + + /* Firmware event queue normally has an interrupt of its own */ + if (sc->intr_count > T4_EXTRA_INTR) { + T4_ALLOC_IRQ(sc, irq, rid, t4_intr_evt, &sc->sge.fwq, + "evt"); + irq++; + rid++; + } + + intrq = &sc->sge.intrq[0]; + if (sc->flags & INTR_SHARED) { + + /* All ports share these interrupt queues */ + + for (i = 0; i < NINTRQ(sc); i++) { + snprintf(s, sizeof(s), "*.%d", i); + T4_ALLOC_IRQ(sc, irq, rid, t4_intr, intrq, s); + irq++; + rid++; + intrq++; } } else { - struct port_info *pi; - int p, q; - T4_ALLOC_IRQ(sc, 1, 2, t4_intr_evt, &sc->sge.fwq, - "evt"); + /* Each port has its own set of interrupt queues */ - p = q = 0; - pi = sc->port[p]; - for (i = 2; i < sc->intr_count; i++) { - snprintf(name, sizeof(name), "p%dq%d", p, q); - if (++q >= pi->nrxq) { - p++; - q = 0; - pi = sc->port[p]; + for (p = 0; p < sc->params.nports; p++) { + for (q = 0; q < sc->port[p]->nrxq; q++) { + snprintf(s, sizeof(s), "%d.%d", p, q); + T4_ALLOC_IRQ(sc, irq, rid, t4_intr, + intrq, s); + irq++; + rid++; + intrq++; } - T4_ALLOC_IRQ(sc, i, i + 1, t4_intr_data, - &sc->sge.rxq[i - 2], name); } } } t4_intr_enable(sc); sc->flags |= FULL_INIT_DONE; done: if (rc != 0) last_port_down(sc); return (rc); } #undef T4_ALLOC_IRQ /* * Idempotent. */ static int last_port_down(struct adapter *sc) { int i; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); t4_intr_disable(sc); t4_teardown_adapter_queues(sc); for (i = 0; i < sc->intr_count; i++) t4_free_irq(sc, &sc->irq[i]); sc->flags &= ~FULL_INIT_DONE; return (0); } static int t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid, iq_intr_handler_t *handler, void *arg, char *name) { int rc; irq->rid = rid; irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid, RF_SHAREABLE | RF_ACTIVE); if (irq->res == NULL) { device_printf(sc->dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET, NULL, handler, arg, &irq->tag); if (rc != 0) { device_printf(sc->dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name, rc); } else if (name) bus_describe_intr(sc->dev, irq->res, irq->tag, name); return (rc); } static int t4_free_irq(struct adapter *sc, struct irq *irq) { if (irq->tag) bus_teardown_intr(sc->dev, irq->res, irq->tag); if (irq->res) bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res); bzero(irq, sizeof(*irq)); return (0); } static void reg_block_dump(struct adapter *sc, uint8_t *buf, unsigned int start, unsigned int end) { uint32_t *p = (uint32_t *)(buf + start); for ( ; start <= end; start += sizeof(uint32_t)) *p++ = t4_read_reg(sc, start); } static void t4_get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf) { int i; static const unsigned int reg_ranges[] = { 0x1008, 0x1108, 0x1180, 0x11b4, 0x11fc, 0x123c, 0x1300, 0x173c, 0x1800, 0x18fc, 0x3000, 0x30d8, 0x30e0, 0x5924, 0x5960, 0x59d4, 0x5a00, 0x5af8, 0x6000, 0x6098, 0x6100, 0x6150, 0x6200, 0x6208, 0x6240, 0x6248, 0x6280, 0x6338, 0x6370, 0x638c, 0x6400, 0x643c, 0x6500, 0x6524, 0x6a00, 0x6a38, 0x6a60, 0x6a78, 0x6b00, 0x6b84, 0x6bf0, 0x6c84, 0x6cf0, 0x6d84, 0x6df0, 0x6e84, 0x6ef0, 0x6f84, 0x6ff0, 0x7084, 0x70f0, 0x7184, 0x71f0, 0x7284, 0x72f0, 0x7384, 0x73f0, 0x7450, 0x7500, 0x7530, 0x7600, 0x761c, 0x7680, 0x76cc, 0x7700, 0x7798, 0x77c0, 0x77fc, 0x7900, 0x79fc, 0x7b00, 0x7c38, 0x7d00, 0x7efc, 0x8dc0, 0x8e1c, 0x8e30, 0x8e78, 0x8ea0, 0x8f6c, 0x8fc0, 0x9074, 0x90fc, 0x90fc, 0x9400, 0x9458, 0x9600, 0x96bc, 0x9800, 0x9808, 0x9820, 0x983c, 0x9850, 0x9864, 0x9c00, 0x9c6c, 0x9c80, 0x9cec, 0x9d00, 0x9d6c, 0x9d80, 0x9dec, 0x9e00, 0x9e6c, 0x9e80, 0x9eec, 0x9f00, 0x9f6c, 0x9f80, 0x9fec, 0xd004, 0xd03c, 0xdfc0, 0xdfe0, 0xe000, 0xea7c, 0xf000, 0x11190, 0x19040, 0x19124, 0x19150, 0x191b0, 0x191d0, 0x191e8, 0x19238, 0x1924c, 0x193f8, 0x19474, 0x19490, 0x194f8, 0x19800, 0x19f30, 0x1a000, 0x1a06c, 0x1a0b0, 0x1a120, 0x1a128, 0x1a138, 0x1a190, 0x1a1c4, 0x1a1fc, 0x1a1fc, 0x1e040, 0x1e04c, 0x1e240, 0x1e28c, 0x1e2c0, 0x1e2c0, 0x1e2e0, 0x1e2e0, 0x1e300, 0x1e384, 0x1e3c0, 0x1e3c8, 0x1e440, 0x1e44c, 0x1e640, 0x1e68c, 0x1e6c0, 0x1e6c0, 0x1e6e0, 0x1e6e0, 0x1e700, 0x1e784, 0x1e7c0, 0x1e7c8, 0x1e840, 0x1e84c, 0x1ea40, 0x1ea8c, 0x1eac0, 0x1eac0, 0x1eae0, 0x1eae0, 0x1eb00, 0x1eb84, 0x1ebc0, 0x1ebc8, 0x1ec40, 0x1ec4c, 0x1ee40, 0x1ee8c, 0x1eec0, 0x1eec0, 0x1eee0, 0x1eee0, 0x1ef00, 0x1ef84, 0x1efc0, 0x1efc8, 0x1f040, 0x1f04c, 0x1f240, 0x1f28c, 0x1f2c0, 0x1f2c0, 0x1f2e0, 0x1f2e0, 0x1f300, 0x1f384, 0x1f3c0, 0x1f3c8, 0x1f440, 0x1f44c, 0x1f640, 0x1f68c, 0x1f6c0, 0x1f6c0, 0x1f6e0, 0x1f6e0, 0x1f700, 0x1f784, 0x1f7c0, 0x1f7c8, 0x1f840, 0x1f84c, 0x1fa40, 0x1fa8c, 0x1fac0, 0x1fac0, 0x1fae0, 0x1fae0, 0x1fb00, 0x1fb84, 0x1fbc0, 0x1fbc8, 0x1fc40, 0x1fc4c, 0x1fe40, 0x1fe8c, 0x1fec0, 0x1fec0, 0x1fee0, 0x1fee0, 0x1ff00, 0x1ff84, 0x1ffc0, 0x1ffc8, 0x20000, 0x2002c, 0x20100, 0x2013c, 0x20190, 0x201c8, 0x20200, 0x20318, 0x20400, 0x20528, 0x20540, 0x20614, 0x21000, 0x21040, 0x2104c, 0x21060, 0x210c0, 0x210ec, 0x21200, 0x21268, 0x21270, 0x21284, 0x212fc, 0x21388, 0x21400, 0x21404, 0x21500, 0x21518, 0x2152c, 0x2153c, 0x21550, 0x21554, 0x21600, 0x21600, 0x21608, 0x21628, 0x21630, 0x2163c, 0x21700, 0x2171c, 0x21780, 0x2178c, 0x21800, 0x21c38, 0x21c80, 0x21d7c, 0x21e00, 0x21e04, 0x22000, 0x2202c, 0x22100, 0x2213c, 0x22190, 0x221c8, 0x22200, 0x22318, 0x22400, 0x22528, 0x22540, 0x22614, 0x23000, 0x23040, 0x2304c, 0x23060, 0x230c0, 0x230ec, 0x23200, 0x23268, 0x23270, 0x23284, 0x232fc, 0x23388, 0x23400, 0x23404, 0x23500, 0x23518, 0x2352c, 0x2353c, 0x23550, 0x23554, 0x23600, 0x23600, 0x23608, 0x23628, 0x23630, 0x2363c, 0x23700, 0x2371c, 0x23780, 0x2378c, 0x23800, 0x23c38, 0x23c80, 0x23d7c, 0x23e00, 0x23e04, 0x24000, 0x2402c, 0x24100, 0x2413c, 0x24190, 0x241c8, 0x24200, 0x24318, 0x24400, 0x24528, 0x24540, 0x24614, 0x25000, 0x25040, 0x2504c, 0x25060, 0x250c0, 0x250ec, 0x25200, 0x25268, 0x25270, 0x25284, 0x252fc, 0x25388, 0x25400, 0x25404, 0x25500, 0x25518, 0x2552c, 0x2553c, 0x25550, 0x25554, 0x25600, 0x25600, 0x25608, 0x25628, 0x25630, 0x2563c, 0x25700, 0x2571c, 0x25780, 0x2578c, 0x25800, 0x25c38, 0x25c80, 0x25d7c, 0x25e00, 0x25e04, 0x26000, 0x2602c, 0x26100, 0x2613c, 0x26190, 0x261c8, 0x26200, 0x26318, 0x26400, 0x26528, 0x26540, 0x26614, 0x27000, 0x27040, 0x2704c, 0x27060, 0x270c0, 0x270ec, 0x27200, 0x27268, 0x27270, 0x27284, 0x272fc, 0x27388, 0x27400, 0x27404, 0x27500, 0x27518, 0x2752c, 0x2753c, 0x27550, 0x27554, 0x27600, 0x27600, 0x27608, 0x27628, 0x27630, 0x2763c, 0x27700, 0x2771c, 0x27780, 0x2778c, 0x27800, 0x27c38, 0x27c80, 0x27d7c, 0x27e00, 0x27e04 }; regs->version = 4 | (sc->params.rev << 10); for (i = 0; i < ARRAY_SIZE(reg_ranges); i += 2) reg_block_dump(sc, buf, reg_ranges[i], reg_ranges[i + 1]); } static void cxgbe_tick(void *arg) { struct port_info *pi = arg; struct ifnet *ifp = pi->ifp; struct sge_txq *txq; int i, drops; struct port_stats *s = &pi->stats; PORT_LOCK(pi); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { PORT_UNLOCK(pi); return; /* without scheduling another callout */ } t4_get_port_stats(pi->adapter, pi->tx_chan, s); ifp->if_opackets = s->tx_frames; ifp->if_ipackets = s->rx_frames; ifp->if_obytes = s->tx_octets; ifp->if_ibytes = s->rx_octets; ifp->if_omcasts = s->tx_mcast_frames; ifp->if_imcasts = s->rx_mcast_frames; ifp->if_iqdrops = s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 + s->rx_ovflow3; drops = s->tx_drop; for_each_txq(pi, i, txq) drops += txq->br->br_drops; ifp->if_snd.ifq_drops = drops; ifp->if_oerrors = s->tx_error_frames; ifp->if_ierrors = s->rx_jabber + s->rx_runt + s->rx_too_long + s->rx_fcs_err + s->rx_len_err; callout_schedule(&pi->tick, hz); PORT_UNLOCK(pi); } static int t4_sysctls(struct adapter *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children; ctx = device_get_sysctl_ctx(sc->dev); oid = device_get_sysctl_tree(sc->dev); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, &sc->params.nports, 0, "# of ports"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD, &sc->params.rev, 0, "chip hardware revision"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", CTLFLAG_RD, &sc->fw_version, 0, "firmware version"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "TOE", CTLFLAG_RD, &sc->params.offload, 0, "hardware is capable of TCP offload"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, &sc->params.vpd.cclk, 0, "core clock frequency (in KHz)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers", CTLTYPE_STRING | CTLFLAG_RD, &intr_timer, sizeof(intr_timer), sysctl_int_array, "A", "interrupt holdoff timer values (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts", CTLTYPE_STRING | CTLFLAG_RD, &intr_pktcount, sizeof(intr_pktcount), sysctl_int_array, "A", "interrupt holdoff packet counter values"); return (0); } static int cxgbe_sysctls(struct port_info *pi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children; ctx = device_get_sysctl_ctx(pi->dev); /* * dev.cxgbe.X. */ oid = device_get_sysctl_tree(pi->dev); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD, &pi->nrxq, 0, "# of rx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD, &pi->ntxq, 0, "# of tx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD, &pi->first_rxq, 0, "index of first rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD, &pi->first_txq, 0, "index of first tx queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx", CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_holdoff_tmr_idx, "I", "holdoff timer index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx", CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_holdoff_pktc_idx, "I", "holdoff packet counter index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq", CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_qsize_rxq, "I", "rx queue size"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq", CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_qsize_txq, "I", "tx queue size"); /* * dev.cxgbe.X.stats. */ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD, NULL, "port statistics"); children = SYSCTL_CHILDREN(oid); #define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \ SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \ CTLTYPE_U64 | CTLFLAG_RD, pi->adapter, reg, \ sysctl_handle_t4_reg64, "QU", desc) SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_64", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L)); SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L)); SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err", "# of frames received with bad FCS", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_len_err", "# of frames received with length error", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_64", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L)); #undef SYSCTL_ADD_T4_REG64 #define SYSCTL_ADD_T4_PORTSTAT(name, desc) \ SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \ &pi->stats.name, desc) /* We get these from port_stats and they may be stale by upto 1s */ SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0, "# drops due to buffer-group 0 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1, "# drops due to buffer-group 1 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2, "# drops due to buffer-group 2 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3, "# drops due to buffer-group 3 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc0, "# of buffer-group 0 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc1, "# of buffer-group 1 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc2, "# of buffer-group 2 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc3, "# of buffer-group 3 truncated packets"); #undef SYSCTL_ADD_T4_PORTSTAT return (0); } static int sysctl_int_array(SYSCTL_HANDLER_ARGS) { int rc, *i; struct sbuf sb; sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND); for (i = arg1; arg2; arg2 -= sizeof(int), i++) sbuf_printf(&sb, "%d ", *i); sbuf_trim(&sb); sbuf_finish(&sb); rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); sbuf_delete(&sb); return (rc); } static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; struct sge_rxq *rxq; int idx, rc, i; idx = pi->tmr_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < 0 || idx >= SGE_NTIMERS) return (EINVAL); ADAPTER_LOCK(sc); rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); if (rc == 0) { for_each_rxq(pi, i, rxq) { rxq->iq.intr_params = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(pi->pktc_idx != -1); } pi->tmr_idx = idx; } ADAPTER_UNLOCK(sc); return (rc); } static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; int idx, rc; idx = pi->pktc_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < -1 || idx >= SGE_NCOUNTERS) return (EINVAL); ADAPTER_LOCK(sc); rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); if (rc == 0 && pi->ifp->if_drv_flags & IFF_DRV_RUNNING) rc = EBUSY; /* can be changed only when port is down */ if (rc == 0) pi->pktc_idx = idx; ADAPTER_UNLOCK(sc); return (rc); } static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; int qsize, rc; qsize = pi->qsize_rxq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (qsize < 128 || (qsize & 7)) return (EINVAL); ADAPTER_LOCK(sc); rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); if (rc == 0 && pi->ifp->if_drv_flags & IFF_DRV_RUNNING) rc = EBUSY; /* can be changed only when port is down */ if (rc == 0) pi->qsize_rxq = qsize; ADAPTER_UNLOCK(sc); return (rc); } static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; int qsize, rc; qsize = pi->qsize_txq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (qsize < 128) return (EINVAL); ADAPTER_LOCK(sc); rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); if (rc == 0 && pi->ifp->if_drv_flags & IFF_DRV_RUNNING) rc = EBUSY; /* can be changed only when port is down */ if (rc == 0) pi->qsize_txq = qsize; ADAPTER_UNLOCK(sc); return (rc); } static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int reg = arg2; uint64_t val; val = t4_read_reg64(sc, reg); return (sysctl_handle_64(oidp, &val, 0, req)); } static inline void txq_start(struct ifnet *ifp, struct sge_txq *txq) { struct buf_ring *br; struct mbuf *m; TXQ_LOCK_ASSERT_OWNED(txq); br = txq->br; m = txq->m ? txq->m : drbr_dequeue(ifp, br); if (m) t4_eth_tx(ifp, txq, m); } void cxgbe_txq_start(void *arg, int count) { struct sge_txq *txq = arg; TXQ_LOCK(txq); if (txq->eq.flags & EQ_CRFLUSHED) { txq->eq.flags &= ~EQ_CRFLUSHED; txq_start(txq->ifp, txq); } else wakeup_one(txq); /* txq is going away, wakeup free_txq */ TXQ_UNLOCK(txq); } static uint32_t fconf_to_mode(uint32_t fconf) { uint32_t mode; mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR | T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT; if (fconf & F_FRAGMENTATION) mode |= T4_FILTER_IP_FRAGMENT; if (fconf & F_MPSHITTYPE) mode |= T4_FILTER_MPS_HIT_TYPE; if (fconf & F_MACMATCH) mode |= T4_FILTER_MAC_IDX; if (fconf & F_ETHERTYPE) mode |= T4_FILTER_ETH_TYPE; if (fconf & F_PROTOCOL) mode |= T4_FILTER_IP_PROTO; if (fconf & F_TOS) mode |= T4_FILTER_IP_TOS; if (fconf & F_VLAN) mode |= T4_FILTER_IVLAN; if (fconf & F_VNIC_ID) mode |= T4_FILTER_OVLAN; if (fconf & F_PORT) mode |= T4_FILTER_PORT; if (fconf & F_FCOE) mode |= T4_FILTER_FCoE; return (mode); } static uint32_t mode_to_fconf(uint32_t mode) { uint32_t fconf = 0; if (mode & T4_FILTER_IP_FRAGMENT) fconf |= F_FRAGMENTATION; if (mode & T4_FILTER_MPS_HIT_TYPE) fconf |= F_MPSHITTYPE; if (mode & T4_FILTER_MAC_IDX) fconf |= F_MACMATCH; if (mode & T4_FILTER_ETH_TYPE) fconf |= F_ETHERTYPE; if (mode & T4_FILTER_IP_PROTO) fconf |= F_PROTOCOL; if (mode & T4_FILTER_IP_TOS) fconf |= F_TOS; if (mode & T4_FILTER_IVLAN) fconf |= F_VLAN; if (mode & T4_FILTER_OVLAN) fconf |= F_VNIC_ID; if (mode & T4_FILTER_PORT) fconf |= F_PORT; if (mode & T4_FILTER_FCoE) fconf |= F_FCOE; return (fconf); } static uint32_t fspec_to_fconf(struct t4_filter_specification *fs) { uint32_t fconf = 0; if (fs->val.frag || fs->mask.frag) fconf |= F_FRAGMENTATION; if (fs->val.matchtype || fs->mask.matchtype) fconf |= F_MPSHITTYPE; if (fs->val.macidx || fs->mask.macidx) fconf |= F_MACMATCH; if (fs->val.ethtype || fs->mask.ethtype) fconf |= F_ETHERTYPE; if (fs->val.proto || fs->mask.proto) fconf |= F_PROTOCOL; if (fs->val.tos || fs->mask.tos) fconf |= F_TOS; if (fs->val.ivlan_vld || fs->mask.ivlan_vld) fconf |= F_VLAN; if (fs->val.ovlan_vld || fs->mask.ovlan_vld) fconf |= F_VNIC_ID; if (fs->val.iport || fs->mask.iport) fconf |= F_PORT; if (fs->val.fcoe || fs->mask.fcoe) fconf |= F_FCOE; return (fconf); } static int get_filter_mode(struct adapter *sc, uint32_t *mode) { uint32_t fconf; t4_read_indirect(sc, A_TP_PIO_ADDR, A_TP_PIO_DATA, &fconf, 1, A_TP_VLAN_PRI_MAP); *mode = fconf_to_mode(fconf); return (0); } static int set_filter_mode(struct adapter *sc, uint32_t mode) { uint32_t fconf; int rc; fconf = mode_to_fconf(mode); ADAPTER_LOCK(sc); if (IS_BUSY(sc)) { rc = EAGAIN; goto done; } if (sc->tids.ftids_in_use > 0) { rc = EBUSY; goto done; } rc = -t4_set_filter_mode(sc, fconf); done: ADAPTER_UNLOCK(sc); return (rc); } static int get_filter(struct adapter *sc, struct t4_filter *t) { int i, nfilters = sc->tids.nftids; struct filter_entry *f; ADAPTER_LOCK_ASSERT_OWNED(sc); if (IS_BUSY(sc)) return (EAGAIN); if (sc->tids.ftids_in_use == 0 || sc->tids.ftid_tab == NULL || t->idx >= nfilters) { t->idx = 0xffffffff; return (0); } f = &sc->tids.ftid_tab[t->idx]; for (i = t->idx; i < nfilters; i++, f++) { if (f->valid) { t->idx = i; t->l2tidx = f->l2t ? f->l2t->idx : 0; t->smtidx = f->smtidx; t->hits = 0; /* XXX implement */ t->fs = f->fs; return (0); } } t->idx = 0xffffffff; return (0); } static int set_filter(struct adapter *sc, struct t4_filter *t) { uint32_t fconf; unsigned int nfilters, nports; struct filter_entry *f; int i; ADAPTER_LOCK_ASSERT_OWNED(sc); nfilters = sc->tids.nftids; nports = sc->params.nports; if (nfilters == 0) return (ENOTSUP); if (!(sc->flags & FULL_INIT_DONE)) return (EAGAIN); if (t->idx >= nfilters) return (EINVAL); /* Validate against the global filter mode */ t4_read_indirect(sc, A_TP_PIO_ADDR, A_TP_PIO_DATA, &fconf, 1, A_TP_VLAN_PRI_MAP); if ((fconf | fspec_to_fconf(&t->fs)) != fconf) return (E2BIG); if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) return (EINVAL); if (t->fs.val.iport >= nports) return (EINVAL); /* Can't specify an iq if not steering to it */ if (!t->fs.dirsteer && t->fs.iq) return (EINVAL); /* IPv6 filter idx must be 4 aligned */ if (t->fs.type == 1 && ((t->idx & 0x3) || t->idx + 4 >= nfilters)) return (EINVAL); if (sc->tids.ftid_tab == NULL) { KASSERT(sc->tids.ftids_in_use == 0, ("%s: no memory allocated but filters_in_use > 0", __func__)); sc->tids.ftid_tab = malloc(sizeof (struct filter_entry) * nfilters, M_CXGBE, M_NOWAIT | M_ZERO); if (sc->tids.ftid_tab == NULL) return (ENOMEM); } for (i = 0; i < 4; i++) { f = &sc->tids.ftid_tab[t->idx + i]; if (f->pending || f->valid) return (EBUSY); if (f->locked) return (EPERM); if (t->fs.type == 0) break; } f = &sc->tids.ftid_tab[t->idx]; f->fs = t->fs; return set_filter_wr(sc, t->idx); } static int del_filter(struct adapter *sc, struct t4_filter *t) { unsigned int nfilters; struct filter_entry *f; ADAPTER_LOCK_ASSERT_OWNED(sc); if (IS_BUSY(sc)) return (EAGAIN); nfilters = sc->tids.nftids; if (nfilters == 0) return (ENOTSUP); if (sc->tids.ftid_tab == NULL || sc->tids.ftids_in_use == 0 || t->idx >= nfilters) return (EINVAL); if (!(sc->flags & FULL_INIT_DONE)) return (EAGAIN); f = &sc->tids.ftid_tab[t->idx]; if (f->pending) return (EBUSY); if (f->locked) return (EPERM); if (f->valid) { t->fs = f->fs; /* extra info for the caller */ return del_filter_wr(sc, t->idx); } return (0); } static void clear_filter(struct filter_entry *f) { if (f->l2t) t4_l2t_release(f->l2t); bzero(f, sizeof (*f)); } static int set_filter_wr(struct adapter *sc, int fidx) { int rc; struct filter_entry *f = &sc->tids.ftid_tab[fidx]; struct mbuf *m; struct fw_filter_wr *fwr; unsigned int ftid; ADAPTER_LOCK_ASSERT_OWNED(sc); if (f->fs.newdmac || f->fs.newvlan) { /* This filter needs an L2T entry; allocate one. */ f->l2t = t4_l2t_alloc_switching(sc->l2t); if (f->l2t == NULL) return (EAGAIN); if (t4_l2t_set_switching(sc, f->l2t, f->fs.vlan, f->fs.eport, f->fs.dmac)) { t4_l2t_release(f->l2t); f->l2t = NULL; return (ENOMEM); } } ftid = sc->tids.ftid_base + fidx; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return (ENOMEM); fwr = mtod(m, struct fw_filter_wr *); m->m_len = m->m_pkthdr.len = sizeof(*fwr); bzero(fwr, sizeof (*fwr)); fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR)); fwr->len16_pkd = htobe32(FW_LEN16(*fwr)); fwr->tid_to_iq = htobe32(V_FW_FILTER_WR_TID(ftid) | V_FW_FILTER_WR_RQTYPE(f->fs.type) | V_FW_FILTER_WR_NOREPLY(0) | V_FW_FILTER_WR_IQ(f->fs.iq)); fwr->del_filter_to_l2tix = htobe32(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) | V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) | V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) | V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) | V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) | V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) | V_FW_FILTER_WR_DMAC(f->fs.newdmac) | V_FW_FILTER_WR_SMAC(f->fs.newsmac) | V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) | V_FW_FILTER_WR_TXCHAN(f->fs.eport) | V_FW_FILTER_WR_PRIO(f->fs.prio) | V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0)); fwr->ethtype = htobe16(f->fs.val.ethtype); fwr->ethtypem = htobe16(f->fs.mask.ethtype); fwr->frag_to_ovlan_vldm = (V_FW_FILTER_WR_FRAG(f->fs.val.frag) | V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) | V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.ivlan_vld) | V_FW_FILTER_WR_OVLAN_VLD(f->fs.val.ovlan_vld) | V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.ivlan_vld) | V_FW_FILTER_WR_OVLAN_VLDM(f->fs.mask.ovlan_vld)); fwr->smac_sel = 0; fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) | - V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id)); + V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.intrq[0].abs_id)); fwr->maci_to_matchtypem = htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) | V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) | V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) | V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) | V_FW_FILTER_WR_PORT(f->fs.val.iport) | V_FW_FILTER_WR_PORTM(f->fs.mask.iport) | V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) | V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype)); fwr->ptcl = f->fs.val.proto; fwr->ptclm = f->fs.mask.proto; fwr->ttyp = f->fs.val.tos; fwr->ttypm = f->fs.mask.tos; fwr->ivlan = htobe16(f->fs.val.ivlan); fwr->ivlanm = htobe16(f->fs.mask.ivlan); fwr->ovlan = htobe16(f->fs.val.ovlan); fwr->ovlanm = htobe16(f->fs.mask.ovlan); bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip)); bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm)); bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip)); bcopy(f->fs.mask.sip, fwr->fipm, sizeof (fwr->fipm)); fwr->lp = htobe16(f->fs.val.dport); fwr->lpm = htobe16(f->fs.mask.dport); fwr->fp = htobe16(f->fs.val.sport); fwr->fpm = htobe16(f->fs.mask.sport); if (f->fs.newsmac) bcopy(f->fs.smac, fwr->sma, sizeof (fwr->sma)); f->pending = 1; sc->tids.ftids_in_use++; rc = t4_mgmt_tx(sc, m); if (rc != 0) { sc->tids.ftids_in_use--; m_freem(m); clear_filter(f); } return (rc); } static int del_filter_wr(struct adapter *sc, int fidx) { struct filter_entry *f = &sc->tids.ftid_tab[fidx]; struct mbuf *m; struct fw_filter_wr *fwr; unsigned int rc, ftid; ADAPTER_LOCK_ASSERT_OWNED(sc); ftid = sc->tids.ftid_base + fidx; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return (ENOMEM); fwr = mtod(m, struct fw_filter_wr *); m->m_len = m->m_pkthdr.len = sizeof(*fwr); bzero(fwr, sizeof (*fwr)); - t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id); + t4_mk_filtdelwr(ftid, fwr, sc->sge.intrq[0].abs_id); f->pending = 1; rc = t4_mgmt_tx(sc, m); if (rc != 0) { f->pending = 0; m_freem(m); } return (rc); } /* XXX move intr handlers to main.c and make this static */ void filter_rpl(struct adapter *sc, const struct cpl_set_tcb_rpl *rpl) { unsigned int idx = GET_TID(rpl); if (idx >= sc->tids.ftid_base && (idx -= sc->tids.ftid_base) < sc->tids.nftids) { unsigned int rc = G_COOKIE(rpl->cookie); struct filter_entry *f = &sc->tids.ftid_tab[idx]; if (rc == FW_FILTER_WR_FLT_DELETED) { /* * Clear the filter when we get confirmation from the * hardware that the filter has been deleted. */ clear_filter(f); sc->tids.ftids_in_use--; } else if (rc == FW_FILTER_WR_SMT_TBL_FULL) { device_printf(sc->dev, "filter %u setup failed due to full SMT\n", idx); clear_filter(f); sc->tids.ftids_in_use--; } else if (rc == FW_FILTER_WR_FLT_ADDED) { f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff; f->pending = 0; /* asynchronous setup completed */ f->valid = 1; } else { /* * Something went wrong. Issue a warning about the * problem and clear everything out. */ device_printf(sc->dev, "filter %u setup failed with error %u\n", idx, rc); clear_filter(f); sc->tids.ftids_in_use--; } } } int t4_os_find_pci_capability(struct adapter *sc, int cap) { int i; return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0); } int t4_os_pci_save_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_save(dev, dinfo, 0); return (0); } int t4_os_pci_restore_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_restore(dev, dinfo); return (0); } void t4_os_portmod_changed(const struct adapter *sc, int idx) { struct port_info *pi = sc->port[idx]; static const char *mod_str[] = { NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM" }; if (pi->mod_type == FW_PORT_MOD_TYPE_NONE) if_printf(pi->ifp, "transceiver unplugged.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN) if_printf(pi->ifp, "unknown transceiver inserted.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED) if_printf(pi->ifp, "unsupported transceiver inserted.\n"); else if (pi->mod_type > 0 && pi->mod_type < ARRAY_SIZE(mod_str)) { if_printf(pi->ifp, "%s transceiver inserted.\n", mod_str[pi->mod_type]); } else { if_printf(pi->ifp, "transceiver (type %d) inserted.\n", pi->mod_type); } } void t4_os_link_changed(struct adapter *sc, int idx, int link_stat) { struct port_info *pi = sc->port[idx]; struct ifnet *ifp = pi->ifp; if (link_stat) { ifp->if_baudrate = IF_Mbps(pi->link_cfg.speed); if_link_state_change(ifp, LINK_STATE_UP); } else if_link_state_change(ifp, LINK_STATE_DOWN); } static int t4_open(struct cdev *dev, int flags, int type, struct thread *td) { return (0); } static int t4_close(struct cdev *dev, int flags, int type, struct thread *td) { return (0); } static int t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, struct thread *td) { int rc; struct adapter *sc = dev->si_drv1; rc = priv_check(td, PRIV_DRIVER); if (rc != 0) return (rc); switch (cmd) { case CHELSIO_T4_GETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) edata->val = t4_read_reg(sc, edata->addr); else if (edata->size == 8) edata->val = t4_read_reg64(sc, edata->addr); else return (EINVAL); break; } case CHELSIO_T4_SETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) { if (edata->val & 0xffffffff00000000) return (EINVAL); t4_write_reg(sc, edata->addr, (uint32_t) edata->val); } else if (edata->size == 8) t4_write_reg64(sc, edata->addr, edata->val); else return (EINVAL); break; } case CHELSIO_T4_REGDUMP: { struct t4_regdump *regs = (struct t4_regdump *)data; int reglen = T4_REGDUMP_SIZE; uint8_t *buf; if (regs->len < reglen) { regs->len = reglen; /* hint to the caller */ return (ENOBUFS); } regs->len = reglen; buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO); t4_get_regs(sc, regs, buf); rc = copyout(buf, regs->data, reglen); free(buf, M_CXGBE); break; } case CHELSIO_T4_GET_FILTER_MODE: rc = get_filter_mode(sc, (uint32_t *)data); break; case CHELSIO_T4_SET_FILTER_MODE: rc = set_filter_mode(sc, *(uint32_t *)data); break; case CHELSIO_T4_GET_FILTER: ADAPTER_LOCK(sc); rc = get_filter(sc, (struct t4_filter *)data); ADAPTER_UNLOCK(sc); break; case CHELSIO_T4_SET_FILTER: ADAPTER_LOCK(sc); rc = set_filter(sc, (struct t4_filter *)data); ADAPTER_UNLOCK(sc); break; case CHELSIO_T4_DEL_FILTER: ADAPTER_LOCK(sc); rc = del_filter(sc, (struct t4_filter *)data); ADAPTER_UNLOCK(sc); break; default: rc = EINVAL; } return (rc); } static int t4_mod_event(module_t mod, int cmd, void *arg) { if (cmd == MOD_LOAD) t4_sge_modload(); return (0); } static devclass_t t4_devclass; static devclass_t cxgbe_devclass; DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, t4_mod_event, 0); MODULE_VERSION(t4nex, 1); DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0); MODULE_VERSION(cxgbe, 1); diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index a0ef17226eb4..cdcedf38855b 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -1,2758 +1,2825 @@ /*- * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common/common.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "common/t4_msg.h" #include "common/t4fw_interface.h" struct fl_buf_info { int size; int type; uma_zone_t zone; }; /* Filled up by t4_sge_modload */ static struct fl_buf_info fl_buf_info[FL_BUF_SIZES]; #define FL_BUF_SIZE(x) (fl_buf_info[x].size) #define FL_BUF_TYPE(x) (fl_buf_info[x].type) #define FL_BUF_ZONE(x) (fl_buf_info[x].zone) enum { FL_PKTSHIFT = 2 }; #define FL_ALIGN min(CACHE_LINE_SIZE, 32) #if CACHE_LINE_SIZE > 64 #define SPG_LEN 128 #else #define SPG_LEN 64 #endif /* Used to track coalesced tx work request */ struct txpkts { uint64_t *flitp; /* ptr to flit where next pkt should start */ uint8_t npkt; /* # of packets in this work request */ uint8_t nflits; /* # of flits used by this work request */ uint16_t plen; /* total payload (sum of all packets) */ }; /* A packet's SGL. This + m_pkthdr has all info needed for tx */ struct sgl { int nsegs; /* # of segments in the SGL, 0 means imm. tx */ int nflits; /* # of flits needed for the SGL */ bus_dma_segment_t seg[TX_SGL_SEGS]; }; +static void t4_evt_rx(void *); +static void t4_eth_rx(void *); static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int, int, iq_intr_handler_t *, char *); static inline void init_fl(struct sge_fl *, int, char *); static inline void init_eq(struct sge_eq *, int, char *); static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, bus_addr_t *, void **); static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, void *); static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *, int, int); static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *); -static int alloc_iq(struct sge_iq *, int); -static int free_iq(struct sge_iq *); +static int alloc_intrq(struct adapter *, int, int, int); +static int free_intrq(struct sge_iq *); +static int alloc_fwq(struct adapter *, int); +static int free_fwq(struct sge_iq *); static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int); static int free_rxq(struct port_info *, struct sge_rxq *); static int alloc_ctrlq(struct adapter *, struct sge_ctrlq *, int); static int free_ctrlq(struct adapter *, struct sge_ctrlq *); static int alloc_txq(struct port_info *, struct sge_txq *, int); static int free_txq(struct port_info *, struct sge_txq *); static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); static inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **); static inline void iq_next(struct sge_iq *); static inline void ring_fl_db(struct adapter *, struct sge_fl *); static void refill_fl(struct adapter *, struct sge_fl *, int, int); static int alloc_fl_sdesc(struct sge_fl *); static void free_fl_sdesc(struct sge_fl *); static int alloc_tx_maps(struct sge_txq *); static void free_tx_maps(struct sge_txq *); static void set_fl_tag_idx(struct sge_fl *, int); static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int); static int free_pkt_sgl(struct sge_txq *, struct sgl *); static int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *, struct sgl *); static int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *, struct mbuf *, struct sgl *); static void write_txpkts_wr(struct sge_txq *, struct txpkts *); static inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *, struct txpkts *, struct mbuf *, struct sgl *); static int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *); static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); static inline void ring_eq_db(struct adapter *, struct sge_eq *); static inline int reclaimable(struct sge_eq *); static int reclaim_tx_descs(struct sge_txq *, int, int); static void write_eqflush_wr(struct sge_eq *); static __be64 get_flit(bus_dma_segment_t *, int, int); static int handle_sge_egr_update(struct adapter *, const struct cpl_sge_egr_update *); +static void handle_cpl(struct adapter *, struct sge_iq *); static int ctrl_tx(struct adapter *, struct sge_ctrlq *, struct mbuf *); -static int sysctl_abs_id(SYSCTL_HANDLER_ARGS); +static int sysctl_uint16(SYSCTL_HANDLER_ARGS); extern void filter_rpl(struct adapter *, const struct cpl_set_tcb_rpl *); /* * Called on MOD_LOAD and fills up fl_buf_info[]. */ void t4_sge_modload(void) { int i; int bufsize[FL_BUF_SIZES] = { MCLBYTES, #if MJUMPAGESIZE != MCLBYTES MJUMPAGESIZE, #endif MJUM9BYTES, MJUM16BYTES }; for (i = 0; i < FL_BUF_SIZES; i++) { FL_BUF_SIZE(i) = bufsize[i]; FL_BUF_TYPE(i) = m_gettype(bufsize[i]); FL_BUF_ZONE(i) = m_getzone(bufsize[i]); } } /** * t4_sge_init - initialize SGE * @sc: the adapter * * Performs SGE initialization needed every time after a chip reset. * We do not initialize any of the queues here, instead the driver * top-level must request them individually. */ void t4_sge_init(struct adapter *sc) { struct sge *s = &sc->sge; int i; t4_set_reg_field(sc, A_SGE_CONTROL, V_PKTSHIFT(M_PKTSHIFT) | V_INGPADBOUNDARY(M_INGPADBOUNDARY) | F_EGRSTATUSPAGESIZE, V_INGPADBOUNDARY(ilog2(FL_ALIGN) - 5) | V_PKTSHIFT(FL_PKTSHIFT) | F_RXPKTCPLMODE | V_EGRSTATUSPAGESIZE(SPG_LEN == 128)); t4_set_reg_field(sc, A_SGE_HOST_PAGE_SIZE, V_HOSTPAGESIZEPF0(M_HOSTPAGESIZEPF0), V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10)); for (i = 0; i < FL_BUF_SIZES; i++) { t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i), FL_BUF_SIZE(i)); } t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, V_THRESHOLD_0(s->counter_val[0]) | V_THRESHOLD_1(s->counter_val[1]) | V_THRESHOLD_2(s->counter_val[2]) | V_THRESHOLD_3(s->counter_val[3])); t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, V_TIMERVALUE0(us_to_core_ticks(sc, s->timer_val[0])) | V_TIMERVALUE1(us_to_core_ticks(sc, s->timer_val[1]))); t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, V_TIMERVALUE2(us_to_core_ticks(sc, s->timer_val[2])) | V_TIMERVALUE3(us_to_core_ticks(sc, s->timer_val[3]))); t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, V_TIMERVALUE4(us_to_core_ticks(sc, s->timer_val[4])) | V_TIMERVALUE5(us_to_core_ticks(sc, s->timer_val[5]))); } int t4_create_dma_tag(struct adapter *sc) { int rc; rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->dmat); if (rc != 0) { device_printf(sc->dev, "failed to create main DMA tag: %d\n", rc); } return (rc); } int t4_destroy_dma_tag(struct adapter *sc) { if (sc->dmat) bus_dma_tag_destroy(sc->dmat); return (0); } /* * Allocate and initialize the firmware event queue, control queues, and the - * forwarded interrupt queues (if any). The adapter owns all these queues as - * they are not associated with any particular port. + * interrupt queues. The adapter owns all of these queues. * * Returns errno on failure. Resources allocated up to that point may still be * allocated. Caller is responsible for cleanup in case this function fails. */ int t4_setup_adapter_queues(struct adapter *sc) { - int i, rc; - struct sge_iq *iq, *fwq; + int i, j, rc, intr_idx, qsize; + struct sge_iq *iq; struct sge_ctrlq *ctrlq; iq_intr_handler_t *handler; char name[16]; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); if (sysctl_ctx_init(&sc->ctx) == 0) { struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); + sc->oid_fwq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, + "fwq", CTLFLAG_RD, NULL, "firmware event queue"); sc->oid_ctrlq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD, NULL, "ctrl queues"); + sc->oid_intrq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, + "intrq", CTLFLAG_RD, NULL, "interrupt queues"); } - fwq = &sc->sge.fwq; - if (sc->flags & INTR_FWD) { - iq = &sc->sge.fiq[0]; - - /* - * Forwarded interrupt queues - allocate 1 if there's only 1 - * vector available, one less than the number of vectors - * otherwise (the first vector is reserved for the error - * interrupt in that case). - */ - i = sc->intr_count > 1 ? 1 : 0; - for (; i < sc->intr_count; i++, iq++) { - - snprintf(name, sizeof(name), "%s fiq%d", + /* + * Interrupt queues + */ + intr_idx = sc->intr_count - NINTRQ(sc); + if (sc->flags & INTR_SHARED) { + qsize = max((sc->sge.nrxq + 1) * 2, INTR_IQ_QSIZE); + for (i = 0; i < NINTRQ(sc); i++, intr_idx++) { + snprintf(name, sizeof(name), "%s intrq%d", device_get_nameunit(sc->dev), i); - init_iq(iq, sc, 0, 0, (sc->sge.nrxq + 1) * 2, 16, NULL, - name); - rc = alloc_iq(iq, i); + iq = &sc->sge.intrq[i]; + init_iq(iq, sc, 0, 0, qsize, INTR_IQ_ESIZE, NULL, name); + rc = alloc_intrq(sc, i % sc->params.nports, i, + intr_idx); + if (rc != 0) { device_printf(sc->dev, - "failed to create fwd intr queue %d: %d\n", - i, rc); + "failed to create %s: %d\n", name, rc); return (rc); } } - - handler = t4_evt_rx; - i = 0; /* forward fwq's interrupt to the first fiq */ } else { - handler = NULL; - i = 1; /* fwq should use vector 1 (0 is used by error) */ + int qidx = 0; + struct port_info *pi; + + for (i = 0; i < sc->params.nports; i++) { + pi = sc->port[i]; + qsize = max((pi->nrxq + 1) * 2, INTR_IQ_QSIZE); + for (j = 0; j < pi->nrxq; j++, qidx++, intr_idx++) { + snprintf(name, sizeof(name), "%s intrq%d", + device_get_nameunit(pi->dev), j); + + iq = &sc->sge.intrq[qidx]; + init_iq(iq, sc, 0, 0, qsize, INTR_IQ_ESIZE, + NULL, name); + rc = alloc_intrq(sc, i, qidx, intr_idx); + + if (rc != 0) { + device_printf(sc->dev, + "failed to create %s: %d\n", + name, rc); + return (rc); + } + } + } } + /* + * Firmware event queue + */ snprintf(name, sizeof(name), "%s fwq", device_get_nameunit(sc->dev)); - init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE, handler, name); - rc = alloc_iq(fwq, i); + if (sc->intr_count > T4_EXTRA_INTR) { + handler = NULL; + intr_idx = 1; + } else { + handler = t4_evt_rx; + intr_idx = 0; + } + + iq = &sc->sge.fwq; + init_iq(iq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE, handler, name); + rc = alloc_fwq(sc, intr_idx); if (rc != 0) { device_printf(sc->dev, "failed to create firmware event queue: %d\n", rc); return (rc); } /* - * Control queues - one per hardware channel. + * Control queues - one per port. */ ctrlq = &sc->sge.ctrlq[0]; - for (i = 0; i < NCHAN; i++, ctrlq++) { + for (i = 0; i < sc->params.nports; i++, ctrlq++) { snprintf(name, sizeof(name), "%s ctrlq%d", device_get_nameunit(sc->dev), i); init_eq(&ctrlq->eq, CTRL_EQ_QSIZE, name); rc = alloc_ctrlq(sc, ctrlq, i); if (rc != 0) { device_printf(sc->dev, "failed to create control queue %d: %d\n", i, rc); return (rc); } } return (rc); } /* * Idempotent */ int t4_teardown_adapter_queues(struct adapter *sc) { int i; struct sge_iq *iq; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); /* Do this before freeing the queues */ - if (sc->oid_ctrlq) { + if (sc->oid_fwq || sc->oid_ctrlq || sc->oid_intrq) { sysctl_ctx_free(&sc->ctx); + sc->oid_fwq = NULL; sc->oid_ctrlq = NULL; + sc->oid_intrq = NULL; } - for (i = 0; i < NCHAN; i++) + for (i = 0; i < sc->params.nports; i++) free_ctrlq(sc, &sc->sge.ctrlq[i]); iq = &sc->sge.fwq; - free_iq(iq); - if (sc->flags & INTR_FWD) { - for (i = 0; i < NFIQ(sc); i++) { - iq = &sc->sge.fiq[i]; - free_iq(iq); - } + free_fwq(iq); + + for (i = 0; i < NINTRQ(sc); i++) { + iq = &sc->sge.intrq[i]; + free_intrq(iq); } return (0); } int t4_setup_eth_queues(struct port_info *pi) { int rc = 0, i, intr_idx; struct sge_rxq *rxq; struct sge_txq *txq; char name[16]; struct adapter *sc = pi->adapter; if (sysctl_ctx_init(&pi->ctx) == 0) { struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev); struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); pi->oid_rxq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD, NULL, "rx queues"); pi->oid_txq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD, NULL, "tx queues"); } for_each_rxq(pi, i, rxq) { snprintf(name, sizeof(name), "%s rxq%d-iq", device_get_nameunit(pi->dev), i); init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, - pi->qsize_rxq, RX_IQ_ESIZE, - sc->flags & INTR_FWD ? t4_eth_rx : NULL, name); + pi->qsize_rxq, RX_IQ_ESIZE, t4_eth_rx, name); snprintf(name, sizeof(name), "%s rxq%d-fl", device_get_nameunit(pi->dev), i); init_fl(&rxq->fl, pi->qsize_rxq / 8, name); - if (sc->flags & INTR_FWD) - intr_idx = (pi->first_rxq + i) % NFIQ(sc); - else - intr_idx = pi->first_rxq + i + 2; + intr_idx = pi->first_rxq + i; + if (sc->flags & INTR_SHARED) + intr_idx %= NINTRQ(sc); rc = alloc_rxq(pi, rxq, intr_idx, i); if (rc != 0) goto done; - - intr_idx++; } for_each_txq(pi, i, txq) { snprintf(name, sizeof(name), "%s txq%d", device_get_nameunit(pi->dev), i); init_eq(&txq->eq, pi->qsize_txq, name); rc = alloc_txq(pi, txq, i); if (rc != 0) goto done; } done: if (rc) t4_teardown_eth_queues(pi); return (rc); } /* * Idempotent */ int t4_teardown_eth_queues(struct port_info *pi) { int i; struct sge_rxq *rxq; struct sge_txq *txq; /* Do this before freeing the queues */ if (pi->oid_txq || pi->oid_rxq) { sysctl_ctx_free(&pi->ctx); pi->oid_txq = pi->oid_rxq = NULL; } for_each_txq(pi, i, txq) { free_txq(pi, txq); } for_each_rxq(pi, i, rxq) { free_rxq(pi, rxq); } return (0); } -/* Deals with errors and forwarded interrupts */ +/* Deals with errors and the first (and only) interrupt queue */ void t4_intr_all(void *arg) { struct adapter *sc = arg; t4_intr_err(arg); - t4_intr_fwd(&sc->sge.fiq[0]); + t4_intr(&sc->sge.intrq[0]); } -/* Deals with forwarded interrupts on the given ingress queue */ +/* Deals with interrupts, and a few CPLs, on the given interrupt queue */ void -t4_intr_fwd(void *arg) +t4_intr(void *arg) { struct sge_iq *iq = arg, *q; struct adapter *sc = iq->adapter; struct rsp_ctrl *ctrl; + const struct rss_header *rss; int ndesc_pending = 0, ndesc_total = 0; - int qid; + int qid, rsp_type; if (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) return; while (is_new_response(iq, &ctrl)) { rmb(); - /* Only interrupt muxing expected on this queue */ - KASSERT(G_RSPD_TYPE(ctrl->u.type_gen) == X_RSPD_TYPE_INTR, - ("unexpected event on forwarded interrupt queue: %x", - G_RSPD_TYPE(ctrl->u.type_gen))); + rss = (const void *)iq->cdesc; + rsp_type = G_RSPD_TYPE(ctrl->u.type_gen); + + if (__predict_false(rsp_type == X_RSPD_TYPE_CPL)) { + handle_cpl(sc, iq); + goto nextdesc; + } qid = ntohl(ctrl->pldbuflen_qid) - sc->sge.iq_start; q = sc->sge.iqmap[qid]; - q->handler(q); + if (atomic_cmpset_32(&q->state, IQS_IDLE, IQS_BUSY)) { + q->handler(q); + atomic_cmpset_32(&q->state, IQS_BUSY, IQS_IDLE); + } - ndesc_total++; +nextdesc: ndesc_total++; if (++ndesc_pending >= iq->qsize / 4) { t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) | V_INGRESSQID(iq->cntxt_id) | V_SEINTARM( V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); ndesc_pending = 0; } iq_next(iq); } t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) | V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE); } /* Deals with error interrupts */ void t4_intr_err(void *arg) { struct adapter *sc = arg; - if (sc->intr_type == INTR_INTX) - t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); - + t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); t4_slow_intr_handler(sc); } /* Deals with the firmware event queue */ void t4_intr_evt(void *arg) { struct sge_iq *iq = arg; - if (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) - return; - - t4_evt_rx(arg); - - atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE); -} - -void -t4_intr_data(void *arg) -{ - struct sge_iq *iq = arg; - - if (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) - return; - - t4_eth_rx(arg); - - atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE); + if (atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) { + t4_evt_rx(arg); + atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE); + } } -void +static void t4_evt_rx(void *arg) { struct sge_iq *iq = arg; struct adapter *sc = iq->adapter; struct rsp_ctrl *ctrl; - const struct rss_header *rss; int ndesc_pending = 0, ndesc_total = 0; KASSERT(iq == &sc->sge.fwq, ("%s: unexpected ingress queue", __func__)); while (is_new_response(iq, &ctrl)) { + int rsp_type; rmb(); - rss = (const void *)iq->cdesc; - - /* Should only get CPL on this queue */ - KASSERT(G_RSPD_TYPE(ctrl->u.type_gen) == X_RSPD_TYPE_CPL, - ("%s: unexpected type %d", __func__, - G_RSPD_TYPE(ctrl->u.type_gen))); + rsp_type = G_RSPD_TYPE(ctrl->u.type_gen); + if (__predict_false(rsp_type != X_RSPD_TYPE_CPL)) + panic("%s: unexpected rsp_type %d", __func__, rsp_type); - switch (rss->opcode) { - case CPL_FW4_MSG: - case CPL_FW6_MSG: { - const struct cpl_fw6_msg *cpl; - - cpl = (const void *)(rss + 1); - if (cpl->type == FW6_TYPE_CMD_RPL) - t4_handle_fw_rpl(sc, cpl->data); - - break; - } - case CPL_SGE_EGR_UPDATE: - handle_sge_egr_update(sc, (const void *)(rss + 1)); - break; - case CPL_SET_TCB_RPL: - filter_rpl(sc, (const void *) (rss + 1)); - break; - default: - device_printf(sc->dev, - "can't handle CPL opcode %d.", rss->opcode); - } + handle_cpl(sc, iq); ndesc_total++; if (++ndesc_pending >= iq->qsize / 4) { t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) | V_INGRESSQID(iq->cntxt_id) | V_SEINTARM( V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); ndesc_pending = 0; } + iq_next(iq); } t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) | V_INGRESSQID(iq->cntxt_id) | V_SEINTARM(iq->intr_params)); } #ifdef T4_PKT_TIMESTAMP #define RX_COPY_THRESHOLD (MINCLSIZE - 8) #else #define RX_COPY_THRESHOLD MINCLSIZE #endif -void +static void t4_eth_rx(void *arg) { struct sge_rxq *rxq = arg; struct sge_iq *iq = arg; struct adapter *sc = iq->adapter; struct rsp_ctrl *ctrl; struct ifnet *ifp = rxq->ifp; struct sge_fl *fl = &rxq->fl; struct fl_sdesc *sd = &fl->sdesc[fl->cidx], *sd_next; const struct rss_header *rss; const struct cpl_rx_pkt *cpl; uint32_t len; int ndescs = 0, i; struct mbuf *m0, *m; #ifdef INET struct lro_ctrl *lro = &rxq->lro; struct lro_entry *l; #endif prefetch(sd->m); prefetch(sd->cl); iq->intr_next = iq->intr_params; while (is_new_response(iq, &ctrl)) { rmb(); rss = (const void *)iq->cdesc; i = G_RSPD_TYPE(ctrl->u.type_gen); - if (__predict_false(i == X_RSPD_TYPE_CPL)) { - - /* Can't be anything except an egress update */ - KASSERT(rss->opcode == CPL_SGE_EGR_UPDATE, - ("%s: unexpected CPL %x", __func__, rss->opcode)); - - handle_sge_egr_update(sc, (const void *)(rss + 1)); - goto nextdesc; - } KASSERT(i == X_RSPD_TYPE_FLBUF && rss->opcode == CPL_RX_PKT, - ("%s: unexpected CPL %x rsp %d", __func__, rss->opcode, i)); + ("%s: unexpected type %d CPL opcode 0x%x", + __func__, i, rss->opcode)); sd_next = sd + 1; if (__predict_false(fl->cidx + 1 == fl->cap)) sd_next = fl->sdesc; prefetch(sd_next->m); prefetch(sd_next->cl); cpl = (const void *)(rss + 1); m0 = sd->m; sd->m = NULL; /* consumed */ len = be32toh(ctrl->pldbuflen_qid); if (__predict_false((len & F_RSPD_NEWBUF) == 0)) panic("%s: cannot handle packed frames", __func__); len = G_RSPD_LEN(len); bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD); m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR); #ifdef T4_PKT_TIMESTAMP *mtod(m0, uint64_t *) = be64toh(ctrl->u.last_flit & 0xfffffffffffffff); m0->m_data += 8; /* * 60 bit timestamp value is *(uint64_t *)m0->m_pktdat. Note * that it is in the leading free-space (see M_LEADINGSPACE) in * the mbuf. The kernel can clobber it during a pullup, * m_copymdata, etc. You need to make sure that the mbuf * reaches you unmolested if you care about the timestamp. */ #endif if (len < RX_COPY_THRESHOLD) { /* copy data to mbuf, buffer will be recycled */ bcopy(sd->cl, mtod(m0, caddr_t), len); m0->m_len = len; } else { bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); m_cljset(m0, sd->cl, FL_BUF_TYPE(sd->tag_idx)); sd->cl = NULL; /* consumed */ m0->m_len = min(len, FL_BUF_SIZE(sd->tag_idx)); } len -= FL_PKTSHIFT; m0->m_len -= FL_PKTSHIFT; m0->m_data += FL_PKTSHIFT; m0->m_pkthdr.len = len; m0->m_pkthdr.rcvif = ifp; m0->m_flags |= M_FLOWID; m0->m_pkthdr.flowid = rss->hash_val; if (cpl->csum_calc && !cpl->err_vec && ifp->if_capenable & IFCAP_RXCSUM) { m0->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); if (cpl->ip_frag) m0->m_pkthdr.csum_data = be16toh(cpl->csum); else m0->m_pkthdr.csum_data = 0xffff; rxq->rxcsum++; } if (cpl->vlan_ex) { m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); m0->m_flags |= M_VLANTAG; rxq->vlan_extraction++; } i = 1; /* # of fl sdesc used */ sd = sd_next; if (__predict_false(++fl->cidx == fl->cap)) fl->cidx = 0; len -= m0->m_len; m = m0; while (len) { i++; sd_next = sd + 1; if (__predict_false(fl->cidx + 1 == fl->cap)) sd_next = fl->sdesc; prefetch(sd_next->m); prefetch(sd_next->cl); m->m_next = sd->m; sd->m = NULL; /* consumed */ m = m->m_next; bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD); m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0); if (len <= MLEN) { bcopy(sd->cl, mtod(m, caddr_t), len); m->m_len = len; } else { bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx)); sd->cl = NULL; /* consumed */ m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx)); } i++; sd = sd_next; if (__predict_false(++fl->cidx == fl->cap)) fl->cidx = 0; len -= m->m_len; } #ifdef INET if (cpl->l2info & htobe32(F_RXF_LRO) && rxq->flags & RXQ_LRO_ENABLED && tcp_lro_rx(lro, m0, 0) == 0) { /* queued for LRO */ } else #endif ifp->if_input(ifp, m0); FL_LOCK(fl); fl->needed += i; if (fl->needed >= 32) refill_fl(sc, fl, 64, 32); FL_UNLOCK(fl); -nextdesc: ndescs++; - iq_next(iq); - - if (ndescs > 32) { + if (++ndescs > 32) { t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) | V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); ndescs = 0; } + + iq_next(iq); } #ifdef INET while (!SLIST_EMPTY(&lro->lro_active)) { l = SLIST_FIRST(&lro->lro_active); SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, l); } #endif t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) | V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_next)); FL_LOCK(fl); if (fl->needed >= 32) refill_fl(sc, fl, 128, 8); FL_UNLOCK(fl); } int t4_mgmt_tx(struct adapter *sc, struct mbuf *m) { return ctrl_tx(sc, &sc->sge.ctrlq[0], m); } /* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */ #define TXPKTS_PKT_HDR ((\ sizeof(struct ulp_txpkt) + \ sizeof(struct ulptx_idata) + \ sizeof(struct cpl_tx_pkt_core) \ ) / 8) /* Header of a coalesced tx WR, before SGL of first packet (in flits) */ #define TXPKTS_WR_HDR (\ sizeof(struct fw_eth_tx_pkts_wr) / 8 + \ TXPKTS_PKT_HDR) /* Header of a tx WR, before SGL of first packet (in flits) */ #define TXPKT_WR_HDR ((\ sizeof(struct fw_eth_tx_pkt_wr) + \ sizeof(struct cpl_tx_pkt_core) \ ) / 8 ) /* Header of a tx LSO WR, before SGL of first packet (in flits) */ #define TXPKT_LSO_WR_HDR ((\ sizeof(struct fw_eth_tx_pkt_wr) + \ sizeof(struct cpl_tx_pkt_lso) + \ sizeof(struct cpl_tx_pkt_core) \ ) / 8 ) int t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m) { struct port_info *pi = (void *)ifp->if_softc; struct adapter *sc = pi->adapter; struct sge_eq *eq = &txq->eq; struct buf_ring *br = txq->br; struct mbuf *next; int rc, coalescing, can_reclaim; struct txpkts txpkts; struct sgl sgl; TXQ_LOCK_ASSERT_OWNED(txq); KASSERT(m, ("%s: called with nothing to do.", __func__)); prefetch(&eq->desc[eq->pidx]); prefetch(&txq->sdesc[eq->pidx]); txpkts.npkt = 0;/* indicates there's nothing in txpkts */ coalescing = 0; if (eq->avail < 8) reclaim_tx_descs(txq, 0, 8); for (; m; m = next ? next : drbr_dequeue(ifp, br)) { if (eq->avail < 8) break; next = m->m_nextpkt; m->m_nextpkt = NULL; if (next || buf_ring_peek(br)) coalescing = 1; rc = get_pkt_sgl(txq, &m, &sgl, coalescing); if (rc != 0) { if (rc == ENOMEM) { /* Short of resources, suspend tx */ m->m_nextpkt = next; break; } /* * Unrecoverable error for this packet, throw it away * and move on to the next. get_pkt_sgl may already * have freed m (it will be NULL in that case and the * m_freem here is still safe). */ m_freem(m); continue; } if (coalescing && add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) { /* Successfully absorbed into txpkts */ write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl); goto doorbell; } /* * We weren't coalescing to begin with, or current frame could * not be coalesced (add_to_txpkts flushes txpkts if a frame * given to it can't be coalesced). Either way there should be * nothing in txpkts. */ KASSERT(txpkts.npkt == 0, ("%s: txpkts not empty: %d", __func__, txpkts.npkt)); /* We're sending out individual packets now */ coalescing = 0; if (eq->avail < 8) reclaim_tx_descs(txq, 0, 8); rc = write_txpkt_wr(pi, txq, m, &sgl); if (rc != 0) { /* Short of hardware descriptors, suspend tx */ /* * This is an unlikely but expensive failure. We've * done all the hard work (DMA mappings etc.) and now we * can't send out the packet. What's worse, we have to * spend even more time freeing up everything in sgl. */ txq->no_desc++; free_pkt_sgl(txq, &sgl); m->m_nextpkt = next; break; } ETHER_BPF_MTAP(ifp, m); if (sgl.nsegs == 0) m_freem(m); doorbell: /* Fewer and fewer doorbells as the queue fills up */ if (eq->pending >= (1 << (fls(eq->qsize - eq->avail) / 2))) ring_eq_db(sc, eq); can_reclaim = reclaimable(eq); if (can_reclaim >= 32) reclaim_tx_descs(txq, can_reclaim, 32); } if (txpkts.npkt > 0) write_txpkts_wr(txq, &txpkts); /* * m not NULL means there was an error but we haven't thrown it away. * This can happen when we're short of tx descriptors (no_desc) or maybe * even DMA maps (no_dmamap). Either way, a credit flush and reclaim * will get things going again. * * If eq->avail is already 0 we know a credit flush was requested in the * WR that reduced it to 0 so we don't need another flush (we don't have * any descriptor for a flush WR anyway, duh). */ if (m && eq->avail > 0 && !(eq->flags & EQ_CRFLUSHED)) { struct tx_sdesc *txsd = &txq->sdesc[eq->pidx]; txsd->desc_used = 1; txsd->credits = 0; write_eqflush_wr(eq); } txq->m = m; if (eq->pending) ring_eq_db(sc, eq); can_reclaim = reclaimable(eq); if (can_reclaim >= 32) reclaim_tx_descs(txq, can_reclaim, 128); return (0); } void t4_update_fl_bufsize(struct ifnet *ifp) { struct port_info *pi = ifp->if_softc; struct sge_rxq *rxq; struct sge_fl *fl; int i; for_each_rxq(pi, i, rxq) { fl = &rxq->fl; FL_LOCK(fl); set_fl_tag_idx(fl, ifp->if_mtu); FL_UNLOCK(fl); } } /* * A non-NULL handler indicates this iq will not receive direct interrupts, the - * handler will be invoked by a forwarded interrupt queue. + * handler will be invoked by an interrupt queue. */ static inline void init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, int qsize, int esize, iq_intr_handler_t *handler, char *name) { KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, ("%s: bad tmr_idx %d", __func__, tmr_idx)); KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ ("%s: bad pktc_idx %d", __func__, pktc_idx)); iq->flags = 0; iq->adapter = sc; iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx) | V_QINTR_CNT_EN(pktc_idx >= 0); iq->intr_pktc_idx = pktc_idx; iq->qsize = roundup(qsize, 16); /* See FW_IQ_CMD/iqsize */ iq->esize = max(esize, 16); /* See FW_IQ_CMD/iqesize */ iq->handler = handler; strlcpy(iq->lockname, name, sizeof(iq->lockname)); } static inline void init_fl(struct sge_fl *fl, int qsize, char *name) { fl->qsize = qsize; strlcpy(fl->lockname, name, sizeof(fl->lockname)); } static inline void init_eq(struct sge_eq *eq, int qsize, char *name) { eq->qsize = qsize; strlcpy(eq->lockname, name, sizeof(eq->lockname)); } static int alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, bus_dmamap_t *map, bus_addr_t *pa, void **va) { int rc; rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); if (rc != 0) { device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc); goto done; } rc = bus_dmamem_alloc(*tag, va, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); if (rc != 0) { device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc); goto done; } rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); if (rc != 0) { device_printf(sc->dev, "cannot load DMA map: %d\n", rc); goto done; } done: if (rc) free_ring(sc, *tag, *map, *pa, *va); return (rc); } static int free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, bus_addr_t pa, void *va) { if (pa) bus_dmamap_unload(tag, map); if (va) bus_dmamem_free(tag, va, map); if (tag) bus_dma_tag_destroy(tag); return (0); } /* * Allocates the ring for an ingress queue and an optional freelist. If the * freelist is specified it will be allocated and then associated with the * ingress queue. * * Returns errno on failure. Resources allocated up to that point may still be * allocated. Caller is responsible for cleanup in case this function fails. * * If the ingress queue will take interrupts directly (iq->handler == NULL) then * the intr_idx specifies the vector, starting from 0. Otherwise it specifies - * the index of the queue to which its interrupts will be forwarded. + * the index of the interrupt queue to which its interrupts will be forwarded. */ static int alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, int intr_idx, int cong) { int rc, i, cntxt_id; size_t len; struct fw_iq_cmd c; struct adapter *sc = iq->adapter; __be32 v = 0; - /* The adapter queues are nominally allocated in port[0]'s name */ - if (pi == NULL) - pi = sc->port[0]; - len = iq->qsize * iq->esize; rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, (void **)&iq->desc); if (rc != 0) return (rc); bzero(&c, sizeof(c)); c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | V_FW_IQ_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | FW_LEN16(c)); /* Special handling for firmware event queue */ if (iq == &sc->sge.fwq) v |= F_FW_IQ_CMD_IQASYNCH; if (iq->handler) { - KASSERT(intr_idx < NFIQ(sc), + KASSERT(intr_idx < NINTRQ(sc), ("%s: invalid indirect intr_idx %d", __func__, intr_idx)); v |= F_FW_IQ_CMD_IQANDST; - v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.fiq[intr_idx].abs_id); + v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.intrq[intr_idx].abs_id); } else { KASSERT(intr_idx < sc->intr_count, ("%s: invalid direct intr_idx %d", __func__, intr_idx)); v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); } c.type_to_iqandstindex = htobe32(v | V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | V_FW_IQ_CMD_VIID(pi->viid) | V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | F_FW_IQ_CMD_IQGTSMODE | V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4)); c.iqsize = htobe16(iq->qsize); c.iqaddr = htobe64(iq->ba); if (cong >= 0) c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN); if (fl) { mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); for (i = 0; i < FL_BUF_SIZES; i++) { /* * A freelist buffer must be 16 byte aligned as the SGE * uses the low 4 bits of the bus addr to figure out the * buffer size. */ rc = bus_dma_tag_create(sc->dmat, 16, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, FL_BUF_SIZE(i), 1, FL_BUF_SIZE(i), BUS_DMA_ALLOCNOW, NULL, NULL, &fl->tag[i]); if (rc != 0) { device_printf(sc->dev, "failed to create fl DMA tag[%d]: %d\n", i, rc); return (rc); } } len = fl->qsize * RX_FL_ESIZE; rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, &fl->ba, (void **)&fl->desc); if (rc) return (rc); /* Allocate space for one software descriptor per buffer. */ fl->cap = (fl->qsize - SPG_LEN / RX_FL_ESIZE) * 8; FL_LOCK(fl); set_fl_tag_idx(fl, pi->ifp->if_mtu); rc = alloc_fl_sdesc(fl); FL_UNLOCK(fl); if (rc != 0) { device_printf(sc->dev, "failed to setup fl software descriptors: %d\n", rc); return (rc); } fl->needed = fl->cap; c.iqns_to_fl0congen = htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | F_FW_IQ_CMD_FL0PADEN); if (cong >= 0) { c.iqns_to_fl0congen |= htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) | F_FW_IQ_CMD_FL0CONGCIF | F_FW_IQ_CMD_FL0CONGEN); } c.fl0dcaen_to_fl0cidxfthresh = htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) | V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B)); c.fl0size = htobe16(fl->qsize); c.fl0addr = htobe64(fl->ba); } rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(sc->dev, "failed to create ingress queue: %d\n", rc); return (rc); } iq->cdesc = iq->desc; iq->cidx = 0; iq->gen = 1; iq->intr_next = iq->intr_params; iq->cntxt_id = be16toh(c.iqid); iq->abs_id = be16toh(c.physiqid); iq->flags |= (IQ_ALLOCATED | IQ_STARTED); cntxt_id = iq->cntxt_id - sc->sge.iq_start; KASSERT(cntxt_id < sc->sge.niq, ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.niq - 1)); sc->sge.iqmap[cntxt_id] = iq; if (fl) { fl->cntxt_id = be16toh(c.fl0id); fl->pidx = fl->cidx = 0; cntxt_id = fl->cntxt_id - sc->sge.eq_start; KASSERT(cntxt_id < sc->sge.neq, ("%s: fl->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.neq - 1)); sc->sge.eqmap[cntxt_id] = (void *)fl; FL_LOCK(fl); refill_fl(sc, fl, -1, 8); FL_UNLOCK(fl); } /* Enable IQ interrupts */ atomic_store_rel_32(&iq->state, IQS_IDLE); t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) | V_INGRESSQID(iq->cntxt_id)); return (0); } /* * This can be called with the iq/fl in any state - fully allocated and * functional, partially allocated, even all-zeroed out. */ static int free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl) { int i, rc; struct adapter *sc = iq->adapter; device_t dev; if (sc == NULL) return (0); /* nothing to do */ dev = pi ? pi->dev : sc->dev; if (iq->flags & IQ_STARTED) { rc = -t4_iq_start_stop(sc, sc->mbox, 0, sc->pf, 0, iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff); if (rc != 0) { device_printf(dev, "failed to stop queue %p: %d\n", iq, rc); return (rc); } iq->flags &= ~IQ_STARTED; /* Synchronize with the interrupt handler */ while (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_DISABLED)) pause("iqfree", hz / 1000); } if (iq->flags & IQ_ALLOCATED) { rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff); if (rc != 0) { device_printf(dev, "failed to free queue %p: %d\n", iq, rc); return (rc); } iq->flags &= ~IQ_ALLOCATED; } free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); bzero(iq, sizeof(*iq)); if (fl) { free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, fl->desc); if (fl->sdesc) { FL_LOCK(fl); free_fl_sdesc(fl); FL_UNLOCK(fl); } if (mtx_initialized(&fl->fl_lock)) mtx_destroy(&fl->fl_lock); for (i = 0; i < FL_BUF_SIZES; i++) { if (fl->tag[i]) bus_dma_tag_destroy(fl->tag[i]); } bzero(fl, sizeof(*fl)); } return (0); } static int -alloc_iq(struct sge_iq *iq, int intr_idx) +alloc_intrq(struct adapter *sc, int port_idx, int intrq_idx, int intr_idx) +{ + int rc; + struct sysctl_oid *oid; + struct sysctl_oid_list *children; + char name[16]; + struct sge_iq *intrq = &sc->sge.intrq[intrq_idx]; + + rc = alloc_iq_fl(sc->port[port_idx], intrq, NULL, intr_idx, -1); + if (rc != 0) + return (rc); + + children = SYSCTL_CHILDREN(sc->oid_intrq); + + snprintf(name, sizeof(name), "%d", intrq_idx); + oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, CTLFLAG_RD, + NULL, "interrupt queue"); + children = SYSCTL_CHILDREN(oid); + + SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx", + CTLTYPE_INT | CTLFLAG_RD, &intrq->cidx, 0, sysctl_uint16, "I", + "consumer index"); + + return (rc); +} + +static int +free_intrq(struct sge_iq *iq) { - return alloc_iq_fl(NULL, iq, NULL, intr_idx, -1); + return free_iq_fl(NULL, iq, NULL); + +} + +static int +alloc_fwq(struct adapter *sc, int intr_idx) +{ + int rc; + struct sysctl_oid_list *children; + struct sge_iq *fwq = &sc->sge.fwq; + + rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1); + if (rc != 0) + return (rc); + + children = SYSCTL_CHILDREN(sc->oid_fwq); + + SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx", + CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I", + "consumer index"); + + return (rc); } static int -free_iq(struct sge_iq *iq) +free_fwq(struct sge_iq *iq) { return free_iq_fl(NULL, iq, NULL); } static int alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx) { int rc; struct sysctl_oid *oid; struct sysctl_oid_list *children; char name[16]; rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, 1 << pi->tx_chan); if (rc != 0) return (rc); #ifdef INET rc = tcp_lro_init(&rxq->lro); if (rc != 0) return (rc); rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */ if (pi->ifp->if_capenable & IFCAP_LRO) rxq->flags |= RXQ_LRO_ENABLED; #endif rxq->ifp = pi->ifp; children = SYSCTL_CHILDREN(pi->oid_rxq); snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "rx queue"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id", - CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_abs_id, "I", + CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I", "absolute id of the queue"); #ifdef INET SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, &rxq->lro.lro_queued, 0, NULL); SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, &rxq->lro.lro_flushed, 0, NULL); #endif SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, &rxq->rxcsum, "# of times hardware assisted with checksum"); SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction", CTLFLAG_RD, &rxq->vlan_extraction, "# of times hardware extracted 802.1Q tag"); return (rc); } static int free_rxq(struct port_info *pi, struct sge_rxq *rxq) { int rc; #ifdef INET if (rxq->lro.ifp) { tcp_lro_free(&rxq->lro); rxq->lro.ifp = NULL; } #endif rc = free_iq_fl(pi, &rxq->iq, &rxq->fl); if (rc == 0) bzero(rxq, sizeof(*rxq)); return (rc); } static int alloc_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq, int idx) { int rc, cntxt_id; size_t len; struct fw_eq_ctrl_cmd c; struct sge_eq *eq = &ctrlq->eq; char name[16]; struct sysctl_oid *oid; struct sysctl_oid_list *children; mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); len = eq->qsize * CTRL_EQ_ESIZE; rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, &eq->ba, (void **)&eq->desc); if (rc) return (rc); eq->cap = eq->qsize - SPG_LEN / CTRL_EQ_ESIZE; eq->spg = (void *)&eq->desc[eq->cap]; eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */ - eq->iqid = sc->sge.fwq.cntxt_id; + if (sc->flags & INTR_SHARED) + eq->iqid = sc->sge.intrq[idx % NINTRQ(sc)].cntxt_id; + else + eq->iqid = sc->sge.intrq[sc->port[idx]->first_rxq].cntxt_id; bzero(&c, sizeof(c)); c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | V_FW_EQ_CTRL_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* XXX */ c.physeqid_pkd = htobe32(0); c.fetchszm_to_iqid = htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | - V_FW_EQ_CTRL_CMD_PCIECHN(idx) | F_FW_EQ_CTRL_CMD_FETCHRO | - V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); + V_FW_EQ_CTRL_CMD_PCIECHN(sc->port[idx]->tx_chan) | + F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); c.dcaen_to_eqsize = htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) | V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize)); c.eqaddr = htobe64(eq->ba); rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(sc->dev, "failed to create control queue %d: %d\n", idx, rc); return (rc); } eq->pidx = eq->cidx = 0; eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); eq->flags |= (EQ_ALLOCATED | EQ_STARTED); cntxt_id = eq->cntxt_id - sc->sge.eq_start; KASSERT(cntxt_id < sc->sge.neq, ("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.neq - 1)); sc->sge.eqmap[cntxt_id] = eq; children = SYSCTL_CHILDREN(sc->oid_ctrlq); snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "ctrl queue"); children = SYSCTL_CHILDREN(oid); - SYSCTL_ADD_UQUAD(&sc->ctx, children, OID_AUTO, "total_wrs", CTLFLAG_RD, - &ctrlq->total_wrs, "total # of work requests"); + SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "pidx", + CTLTYPE_INT | CTLFLAG_RD, &ctrlq->eq.pidx, 0, sysctl_uint16, "I", + "producer index"); SYSCTL_ADD_UINT(&sc->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, &ctrlq->no_desc, 0, "# of times ctrlq ran out of hardware descriptors"); - SYSCTL_ADD_UINT(&sc->ctx, children, OID_AUTO, "too_long", CTLFLAG_RD, - &ctrlq->too_long, 0, "# of oversized work requests"); return (rc); } static int free_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq) { int rc; struct sge_eq *eq = &ctrlq->eq; if (eq->flags & (EQ_ALLOCATED | EQ_STARTED)) { rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); if (rc != 0) { device_printf(sc->dev, "failed to free ctrl queue %p: %d\n", eq, rc); return (rc); } eq->flags &= ~(EQ_ALLOCATED | EQ_STARTED); } free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); if (mtx_initialized(&eq->eq_lock)) mtx_destroy(&eq->eq_lock); bzero(ctrlq, sizeof(*ctrlq)); return (0); } static int alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx) { int rc, cntxt_id; size_t len; struct adapter *sc = pi->adapter; struct fw_eq_eth_cmd c; struct sge_eq *eq = &txq->eq; char name[16]; struct sysctl_oid *oid; struct sysctl_oid_list *children; + struct sge_iq *intrq; txq->ifp = pi->ifp; TASK_INIT(&txq->resume_tx, 0, cxgbe_txq_start, txq); mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); len = eq->qsize * TX_EQ_ESIZE; rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, &eq->ba, (void **)&eq->desc); if (rc) return (rc); eq->cap = eq->qsize - SPG_LEN / TX_EQ_ESIZE; eq->spg = (void *)&eq->desc[eq->cap]; eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */ txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE, M_ZERO | M_WAITOK); txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock); - eq->iqid = sc->sge.rxq[pi->first_rxq].iq.cntxt_id; + + intrq = &sc->sge.intrq[0]; + if (sc->flags & INTR_SHARED) + eq->iqid = intrq[(pi->first_txq + idx) % NINTRQ(sc)].cntxt_id; + else + eq->iqid = intrq[pi->first_rxq + (idx % pi->nrxq)].cntxt_id; rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag); if (rc != 0) { device_printf(sc->dev, "failed to create tx DMA tag: %d\n", rc); return (rc); } rc = alloc_tx_maps(txq); if (rc != 0) { device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc); return (rc); } bzero(&c, sizeof(c)); c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | V_FW_EQ_ETH_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid)); c.fetchszm_to_iqid = htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | V_FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | V_FW_EQ_ETH_CMD_IQID(eq->iqid)); c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize)); c.eqaddr = htobe64(eq->ba); rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(pi->dev, "failed to create egress queue: %d\n", rc); return (rc); } eq->pidx = eq->cidx = 0; eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); eq->flags |= (EQ_ALLOCATED | EQ_STARTED); cntxt_id = eq->cntxt_id - sc->sge.eq_start; KASSERT(cntxt_id < sc->sge.neq, ("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.neq - 1)); sc->sge.eqmap[cntxt_id] = eq; children = SYSCTL_CHILDREN(pi->oid_txq); snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "tx queue"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, &txq->txcsum, "# of times hardware assisted with checksum"); SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion", CTLFLAG_RD, &txq->vlan_insertion, "# of times hardware inserted 802.1Q tag"); SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, &txq->tso_wrs, "# of IPv4 TSO work requests"); SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, &txq->imm_wrs, "# of work requests with immediate data"); SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, &txq->sgl_wrs, "# of work requests with direct SGL"); SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD, &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)"); SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD, &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests"); SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD, &txq->no_dmamap, 0, "# of times txq ran out of DMA maps"); SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, &txq->no_desc, 0, "# of times txq ran out of hardware descriptors"); SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD, &txq->egr_update, 0, "egress update notifications from the SGE"); return (rc); } static int free_txq(struct port_info *pi, struct sge_txq *txq) { int rc; struct adapter *sc = pi->adapter; struct sge_eq *eq = &txq->eq; if (eq->flags & (EQ_ALLOCATED | EQ_STARTED)) { /* * Wait for the response to a credit flush if there's one * pending. Clearing the flag tells handle_sge_egr_update or * cxgbe_txq_start (depending on how far the response has made * it) that they should ignore the response and wake up free_txq * instead. * * The interface has been marked down by the time we get here * (both IFF_UP and IFF_DRV_RUNNING cleared). qflush has * emptied the tx buf_rings and we know nothing new is being * queued for tx so we don't have to worry about a new credit * flush request. */ TXQ_LOCK(txq); if (eq->flags & EQ_CRFLUSHED) { eq->flags &= ~EQ_CRFLUSHED; msleep(txq, &eq->eq_lock, 0, "crflush", 0); } TXQ_UNLOCK(txq); rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); if (rc != 0) { device_printf(pi->dev, "failed to free egress queue %p: %d\n", eq, rc); return (rc); } eq->flags &= ~(EQ_ALLOCATED | EQ_STARTED); } free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); free(txq->sdesc, M_CXGBE); if (txq->maps) free_tx_maps(txq); buf_ring_free(txq->br, M_CXGBE); if (txq->tx_tag) bus_dma_tag_destroy(txq->tx_tag); if (mtx_initialized(&eq->eq_lock)) mtx_destroy(&eq->eq_lock); bzero(txq, sizeof(*txq)); return (0); } static void oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *ba = arg; KASSERT(nseg == 1, ("%s meant for single segment mappings only.", __func__)); *ba = error ? 0 : segs->ds_addr; } static inline bool is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl) { *ctrl = (void *)((uintptr_t)iq->cdesc + (iq->esize - sizeof(struct rsp_ctrl))); return (((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen); } static inline void iq_next(struct sge_iq *iq) { iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize); if (__predict_false(++iq->cidx == iq->qsize - 1)) { iq->cidx = 0; iq->gen ^= 1; iq->cdesc = iq->desc; } } #define FL_HW_IDX(x) ((x) >> 3) static inline void ring_fl_db(struct adapter *sc, struct sge_fl *fl) { int ndesc = fl->pending / 8; if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx)) ndesc--; /* hold back one credit */ if (ndesc <= 0) return; /* nothing to do */ wmb(); t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), F_DBPRIO | V_QID(fl->cntxt_id) | V_PIDX(ndesc)); fl->pending -= ndesc * 8; } /* * Fill up the freelist by upto nbufs and ring its doorbell if the number of * buffers ready to be handed to the hardware >= dbthresh. */ static void refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs, int dbthresh) { __be64 *d = &fl->desc[fl->pidx]; struct fl_sdesc *sd = &fl->sdesc[fl->pidx]; bus_dma_tag_t tag; bus_addr_t pa; caddr_t cl; int rc; FL_LOCK_ASSERT_OWNED(fl); if (nbufs < 0 || nbufs > fl->needed) nbufs = fl->needed; while (nbufs--) { if (sd->cl != NULL) { /* * This happens when a frame small enough to fit * entirely in an mbuf was received in cl last time. * We'd held on to cl and can reuse it now. Note that * we reuse a cluster of the old size if fl->tag_idx is * no longer the same as sd->tag_idx. */ KASSERT(*d == sd->ba_tag, ("%s: recyling problem at pidx %d", __func__, fl->pidx)); d++; goto recycled; } if (fl->tag_idx != sd->tag_idx) { bus_dmamap_t map; bus_dma_tag_t newtag = fl->tag[fl->tag_idx]; bus_dma_tag_t oldtag = fl->tag[sd->tag_idx]; /* * An MTU change can get us here. Discard the old map * which was created with the old tag, but only if * we're able to get a new one. */ rc = bus_dmamap_create(newtag, 0, &map); if (rc == 0) { bus_dmamap_destroy(oldtag, sd->map); sd->map = map; sd->tag_idx = fl->tag_idx; } } tag = fl->tag[sd->tag_idx]; cl = m_cljget(NULL, M_NOWAIT, FL_BUF_SIZE(sd->tag_idx)); if (cl == NULL) break; rc = bus_dmamap_load(tag, sd->map, cl, FL_BUF_SIZE(sd->tag_idx), oneseg_dma_callback, &pa, 0); if (rc != 0 || pa == 0) { fl->dmamap_failed++; uma_zfree(FL_BUF_ZONE(sd->tag_idx), cl); break; } sd->cl = cl; *d++ = htobe64(pa | sd->tag_idx); #ifdef INVARIANTS sd->ba_tag = htobe64(pa | sd->tag_idx); #endif recycled: /* sd->m is never recycled, should always be NULL */ KASSERT(sd->m == NULL, ("%s: stray mbuf", __func__)); sd->m = m_gethdr(M_NOWAIT, MT_NOINIT); if (sd->m == NULL) break; fl->pending++; fl->needed--; sd++; if (++fl->pidx == fl->cap) { fl->pidx = 0; sd = fl->sdesc; d = fl->desc; } } if (fl->pending >= dbthresh) ring_fl_db(sc, fl); } static int alloc_fl_sdesc(struct sge_fl *fl) { struct fl_sdesc *sd; bus_dma_tag_t tag; int i, rc; FL_LOCK_ASSERT_OWNED(fl); fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE, M_ZERO | M_WAITOK); tag = fl->tag[fl->tag_idx]; sd = fl->sdesc; for (i = 0; i < fl->cap; i++, sd++) { sd->tag_idx = fl->tag_idx; rc = bus_dmamap_create(tag, 0, &sd->map); if (rc != 0) goto failed; } return (0); failed: while (--i >= 0) { sd--; bus_dmamap_destroy(tag, sd->map); if (sd->m) { m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0); m_free(sd->m); sd->m = NULL; } } KASSERT(sd == fl->sdesc, ("%s: EDOOFUS", __func__)); free(fl->sdesc, M_CXGBE); fl->sdesc = NULL; return (rc); } static void free_fl_sdesc(struct sge_fl *fl) { struct fl_sdesc *sd; int i; FL_LOCK_ASSERT_OWNED(fl); sd = fl->sdesc; for (i = 0; i < fl->cap; i++, sd++) { if (sd->m) { m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0); m_free(sd->m); sd->m = NULL; } if (sd->cl) { bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); uma_zfree(FL_BUF_ZONE(sd->tag_idx), sd->cl); sd->cl = NULL; } bus_dmamap_destroy(fl->tag[sd->tag_idx], sd->map); } free(fl->sdesc, M_CXGBE); fl->sdesc = NULL; } static int alloc_tx_maps(struct sge_txq *txq) { struct tx_map *txm; int i, rc, count; /* * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE * limit for any WR). txq->no_dmamap events shouldn't occur if maps is * sized for the worst case. */ count = txq->eq.qsize * 10 / 8; txq->map_total = txq->map_avail = count; txq->map_cidx = txq->map_pidx = 0; txq->maps = malloc(count * sizeof(struct tx_map), M_CXGBE, M_ZERO | M_WAITOK); txm = txq->maps; for (i = 0; i < count; i++, txm++) { rc = bus_dmamap_create(txq->tx_tag, 0, &txm->map); if (rc != 0) goto failed; } return (0); failed: while (--i >= 0) { txm--; bus_dmamap_destroy(txq->tx_tag, txm->map); } KASSERT(txm == txq->maps, ("%s: EDOOFUS", __func__)); free(txq->maps, M_CXGBE); txq->maps = NULL; return (rc); } static void free_tx_maps(struct sge_txq *txq) { struct tx_map *txm; int i; txm = txq->maps; for (i = 0; i < txq->map_total; i++, txm++) { if (txm->m) { bus_dmamap_unload(txq->tx_tag, txm->map); m_freem(txm->m); txm->m = NULL; } bus_dmamap_destroy(txq->tx_tag, txm->map); } free(txq->maps, M_CXGBE); txq->maps = NULL; } /* * We'll do immediate data tx for non-TSO, but only when not coalescing. We're * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes * of immediate data. */ #define IMM_LEN ( \ 2 * TX_EQ_ESIZE \ - sizeof(struct fw_eth_tx_pkt_wr) \ - sizeof(struct cpl_tx_pkt_core)) /* * Returns non-zero on failure, no need to cleanup anything in that case. * * Note 1: We always try to defrag the mbuf if required and return EFBIG only * if the resulting chain still won't fit in a tx descriptor. * * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf * does not have the TCP header in it. */ static int get_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl, int sgl_only) { struct mbuf *m = *fp; struct tx_map *txm; int rc, defragged = 0, n; TXQ_LOCK_ASSERT_OWNED(txq); if (m->m_pkthdr.tso_segsz) sgl_only = 1; /* Do not allow immediate data with LSO */ start: sgl->nsegs = 0; if (m->m_pkthdr.len <= IMM_LEN && !sgl_only) return (0); /* nsegs = 0 tells caller to use imm. tx */ if (txq->map_avail == 0) { txq->no_dmamap++; return (ENOMEM); } txm = &txq->maps[txq->map_pidx]; if (m->m_pkthdr.tso_segsz && m->m_len < 50) { *fp = m_pullup(m, 50); m = *fp; if (m == NULL) return (ENOBUFS); } rc = bus_dmamap_load_mbuf_sg(txq->tx_tag, txm->map, m, sgl->seg, &sgl->nsegs, BUS_DMA_NOWAIT); if (rc == EFBIG && defragged == 0) { m = m_defrag(m, M_DONTWAIT); if (m == NULL) return (EFBIG); defragged = 1; *fp = m; goto start; } if (rc != 0) return (rc); txm->m = m; txq->map_avail--; if (++txq->map_pidx == txq->map_total) txq->map_pidx = 0; KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS, ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs)); /* * Store the # of flits required to hold this frame's SGL in nflits. An * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by * multiple (len0 + len1, addr0, addr1) tuples. If addr1 is not used * then len1 must be set to 0. */ n = sgl->nsegs - 1; sgl->nflits = (3 * n) / 2 + (n & 1) + 2; return (0); } /* * Releases all the txq resources used up in the specified sgl. */ static int free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl) { struct tx_map *txm; TXQ_LOCK_ASSERT_OWNED(txq); if (sgl->nsegs == 0) return (0); /* didn't use any map */ /* 1 pkt uses exactly 1 map, back it out */ txq->map_avail++; if (txq->map_pidx > 0) txq->map_pidx--; else txq->map_pidx = txq->map_total - 1; txm = &txq->maps[txq->map_pidx]; bus_dmamap_unload(txq->tx_tag, txm->map); txm->m = NULL; return (0); } static int write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m, struct sgl *sgl) { struct sge_eq *eq = &txq->eq; struct fw_eth_tx_pkt_wr *wr; struct cpl_tx_pkt_core *cpl; uint32_t ctrl; /* used in many unrelated places */ uint64_t ctrl1; int nflits, ndesc, pktlen; struct tx_sdesc *txsd; caddr_t dst; TXQ_LOCK_ASSERT_OWNED(txq); pktlen = m->m_pkthdr.len; /* * Do we have enough flits to send this frame out? */ ctrl = sizeof(struct cpl_tx_pkt_core); if (m->m_pkthdr.tso_segsz) { nflits = TXPKT_LSO_WR_HDR; ctrl += sizeof(struct cpl_tx_pkt_lso); } else nflits = TXPKT_WR_HDR; if (sgl->nsegs > 0) nflits += sgl->nflits; else { nflits += howmany(pktlen, 8); ctrl += pktlen; } ndesc = howmany(nflits, 8); if (ndesc > eq->avail) return (ENOMEM); /* Firmware work request header */ wr = (void *)&eq->desc[eq->pidx]; wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | V_FW_WR_IMMDLEN(ctrl)); ctrl = V_FW_WR_LEN16(howmany(nflits, 2)); if (eq->avail == ndesc && !(eq->flags & EQ_CRFLUSHED)) { ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; eq->flags |= EQ_CRFLUSHED; } wr->equiq_to_len16 = htobe32(ctrl); wr->r3 = 0; if (m->m_pkthdr.tso_segsz) { struct cpl_tx_pkt_lso *lso = (void *)(wr + 1); struct ether_header *eh; struct ip *ip; struct tcphdr *tcp; ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE; eh = mtod(m, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { ctrl |= V_LSO_ETHHDR_LEN(1); ip = (void *)((struct ether_vlan_header *)eh + 1); } else ip = (void *)(eh + 1); tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4); ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) | V_LSO_TCPHDR_LEN(tcp->th_off); lso->lso_ctrl = htobe32(ctrl); lso->ipid_ofst = htobe16(0); lso->mss = htobe16(m->m_pkthdr.tso_segsz); lso->seqno_offset = htobe32(0); lso->len = htobe32(pktlen); cpl = (void *)(lso + 1); txq->tso_wrs++; } else cpl = (void *)(wr + 1); /* Checksum offload */ ctrl1 = 0; if (!(m->m_pkthdr.csum_flags & CSUM_IP)) ctrl1 |= F_TXPKT_IPCSUM_DIS; if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))) ctrl1 |= F_TXPKT_L4CSUM_DIS; if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP)) txq->txcsum++; /* some hardware assistance provided */ /* VLAN tag insertion */ if (m->m_flags & M_VLANTAG) { ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); txq->vlan_insertion++; } /* CPL header */ cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); cpl->pack = 0; cpl->len = htobe16(pktlen); cpl->ctrl1 = htobe64(ctrl1); /* Software descriptor */ txsd = &txq->sdesc[eq->pidx]; txsd->desc_used = ndesc; eq->pending += ndesc; eq->avail -= ndesc; eq->pidx += ndesc; if (eq->pidx >= eq->cap) eq->pidx -= eq->cap; /* SGL */ dst = (void *)(cpl + 1); if (sgl->nsegs > 0) { txsd->credits = 1; txq->sgl_wrs++; write_sgl_to_txd(eq, sgl, &dst); } else { txsd->credits = 0; txq->imm_wrs++; for (; m; m = m->m_next) { copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); #ifdef INVARIANTS pktlen -= m->m_len; #endif } #ifdef INVARIANTS KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); #endif } txq->txpkt_wrs++; return (0); } /* * Returns 0 to indicate that m has been accepted into a coalesced tx work * request. It has either been folded into txpkts or txpkts was flushed and m * has started a new coalesced work request (as the first frame in a fresh * txpkts). * * Returns non-zero to indicate a failure - caller is responsible for * transmitting m, if there was anything in txpkts it has been flushed. */ static int add_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl) { struct sge_eq *eq = &txq->eq; int can_coalesce; struct tx_sdesc *txsd; int flits; TXQ_LOCK_ASSERT_OWNED(txq); if (txpkts->npkt > 0) { flits = TXPKTS_PKT_HDR + sgl->nflits; can_coalesce = m->m_pkthdr.tso_segsz == 0 && txpkts->nflits + flits <= TX_WR_FLITS && txpkts->nflits + flits <= eq->avail * 8 && txpkts->plen + m->m_pkthdr.len < 65536; if (can_coalesce) { txpkts->npkt++; txpkts->nflits += flits; txpkts->plen += m->m_pkthdr.len; txsd = &txq->sdesc[eq->pidx]; txsd->credits++; return (0); } /* * Couldn't coalesce m into txpkts. The first order of business * is to send txpkts on its way. Then we'll revisit m. */ write_txpkts_wr(txq, txpkts); } /* * Check if we can start a new coalesced tx work request with m as * the first packet in it. */ KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__)); flits = TXPKTS_WR_HDR + sgl->nflits; can_coalesce = m->m_pkthdr.tso_segsz == 0 && flits <= eq->avail * 8 && flits <= TX_WR_FLITS; if (can_coalesce == 0) return (EINVAL); /* * Start a fresh coalesced tx WR with m as the first frame in it. */ txpkts->npkt = 1; txpkts->nflits = flits; txpkts->flitp = &eq->desc[eq->pidx].flit[2]; txpkts->plen = m->m_pkthdr.len; txsd = &txq->sdesc[eq->pidx]; txsd->credits = 1; return (0); } /* * Note that write_txpkts_wr can never run out of hardware descriptors (but * write_txpkt_wr can). add_to_txpkts ensures that a frame is accepted for * coalescing only if sufficient hardware descriptors are available. */ static void write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts) { struct sge_eq *eq = &txq->eq; struct fw_eth_tx_pkts_wr *wr; struct tx_sdesc *txsd; uint32_t ctrl; int ndesc; TXQ_LOCK_ASSERT_OWNED(txq); ndesc = howmany(txpkts->nflits, 8); wr = (void *)&eq->desc[eq->pidx]; wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR) | V_FW_WR_IMMDLEN(0)); /* immdlen does not matter in this WR */ ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2)); if (eq->avail == ndesc && !(eq->flags & EQ_CRFLUSHED)) { ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; eq->flags |= EQ_CRFLUSHED; } wr->equiq_to_len16 = htobe32(ctrl); wr->plen = htobe16(txpkts->plen); wr->npkt = txpkts->npkt; wr->r3 = wr->r4 = 0; /* Everything else already written */ txsd = &txq->sdesc[eq->pidx]; txsd->desc_used = ndesc; KASSERT(eq->avail >= ndesc, ("%s: out of descriptors", __func__)); eq->pending += ndesc; eq->avail -= ndesc; eq->pidx += ndesc; if (eq->pidx >= eq->cap) eq->pidx -= eq->cap; txq->txpkts_pkts += txpkts->npkt; txq->txpkts_wrs++; txpkts->npkt = 0; /* emptied */ } static inline void write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl) { struct ulp_txpkt *ulpmc; struct ulptx_idata *ulpsc; struct cpl_tx_pkt_core *cpl; struct sge_eq *eq = &txq->eq; uintptr_t flitp, start, end; uint64_t ctrl; caddr_t dst; KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__)); start = (uintptr_t)eq->desc; end = (uintptr_t)eq->spg; /* Checksum offload */ ctrl = 0; if (!(m->m_pkthdr.csum_flags & CSUM_IP)) ctrl |= F_TXPKT_IPCSUM_DIS; if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))) ctrl |= F_TXPKT_L4CSUM_DIS; if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP)) txq->txcsum++; /* some hardware assistance provided */ /* VLAN tag insertion */ if (m->m_flags & M_VLANTAG) { ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); txq->vlan_insertion++; } /* * The previous packet's SGL must have ended at a 16 byte boundary (this * is required by the firmware/hardware). It follows that flitp cannot * wrap around between the ULPTX master command and ULPTX subcommand (8 * bytes each), and that it can not wrap around in the middle of the * cpl_tx_pkt_core either. */ flitp = (uintptr_t)txpkts->flitp; KASSERT((flitp & 0xf) == 0, ("%s: last SGL did not end at 16 byte boundary: %p", __func__, txpkts->flitp)); /* ULP master command */ ulpmc = (void *)flitp; ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(eq->iqid)); ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) + sizeof(*cpl) + 8 * sgl->nflits, 16)); /* ULP subcommand */ ulpsc = (void *)(ulpmc + 1); ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) | F_ULP_TX_SC_MORE); ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); flitp += sizeof(*ulpmc) + sizeof(*ulpsc); if (flitp == end) flitp = start; /* CPL_TX_PKT */ cpl = (void *)flitp; cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); cpl->pack = 0; cpl->len = htobe16(m->m_pkthdr.len); cpl->ctrl1 = htobe64(ctrl); flitp += sizeof(*cpl); if (flitp == end) flitp = start; /* SGL for this frame */ dst = (caddr_t)flitp; txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst); txpkts->flitp = (void *)dst; KASSERT(((uintptr_t)dst & 0xf) == 0, ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst)); } /* * If the SGL ends on an address that is not 16 byte aligned, this function will * add a 0 filled flit at the end. It returns 1 in that case. */ static int write_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to) { __be64 *flitp, *end; struct ulptx_sgl *usgl; bus_dma_segment_t *seg; int i, padded; KASSERT(sgl->nsegs > 0 && sgl->nflits > 0, ("%s: bad SGL - nsegs=%d, nflits=%d", __func__, sgl->nsegs, sgl->nflits)); KASSERT(((uintptr_t)(*to) & 0xf) == 0, ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); flitp = (__be64 *)(*to); end = flitp + sgl->nflits; seg = &sgl->seg[0]; usgl = (void *)flitp; /* * We start at a 16 byte boundary somewhere inside the tx descriptor * ring, so we're at least 16 bytes away from the status page. There is * no chance of a wrap around in the middle of usgl (which is 16 bytes). */ usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | V_ULPTX_NSGE(sgl->nsegs)); usgl->len0 = htobe32(seg->ds_len); usgl->addr0 = htobe64(seg->ds_addr); seg++; if ((uintptr_t)end <= (uintptr_t)eq->spg) { /* Won't wrap around at all */ for (i = 0; i < sgl->nsegs - 1; i++, seg++) { usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len); usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr); } if (i & 1) usgl->sge[i / 2].len[1] = htobe32(0); } else { /* Will wrap somewhere in the rest of the SGL */ /* 2 flits already written, write the rest flit by flit */ flitp = (void *)(usgl + 1); for (i = 0; i < sgl->nflits - 2; i++) { if ((uintptr_t)flitp == (uintptr_t)eq->spg) flitp = (void *)eq->desc; *flitp++ = get_flit(seg, sgl->nsegs - 1, i); } end = flitp; } if ((uintptr_t)end & 0xf) { *(uint64_t *)end = 0; end++; padded = 1; } else padded = 0; if ((uintptr_t)end == (uintptr_t)eq->spg) *to = (void *)eq->desc; else *to = (void *)end; return (padded); } static inline void copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) { if ((uintptr_t)(*to) + len <= (uintptr_t)eq->spg) { bcopy(from, *to, len); (*to) += len; } else { int portion = (uintptr_t)eq->spg - (uintptr_t)(*to); bcopy(from, *to, portion); from += portion; portion = len - portion; /* remaining */ bcopy(from, (void *)eq->desc, portion); (*to) = (caddr_t)eq->desc + portion; } } static inline void ring_eq_db(struct adapter *sc, struct sge_eq *eq) { wmb(); t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), V_QID(eq->cntxt_id) | V_PIDX(eq->pending)); eq->pending = 0; } static inline int reclaimable(struct sge_eq *eq) { unsigned int cidx; cidx = eq->spg->cidx; /* stable snapshot */ cidx = be16_to_cpu(cidx); if (cidx >= eq->cidx) return (cidx - eq->cidx); else return (cidx + eq->cap - eq->cidx); } /* * There are "can_reclaim" tx descriptors ready to be reclaimed. Reclaim as * many as possible but stop when there are around "n" mbufs to free. * * The actual number reclaimed is provided as the return value. */ static int reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n) { struct tx_sdesc *txsd; struct tx_map *txm; unsigned int reclaimed, maps; struct sge_eq *eq = &txq->eq; EQ_LOCK_ASSERT_OWNED(eq); if (can_reclaim == 0) can_reclaim = reclaimable(eq); maps = reclaimed = 0; while (can_reclaim && maps < n) { int ndesc; txsd = &txq->sdesc[eq->cidx]; ndesc = txsd->desc_used; /* Firmware doesn't return "partial" credits. */ KASSERT(can_reclaim >= ndesc, ("%s: unexpected number of credits: %d, %d", __func__, can_reclaim, ndesc)); maps += txsd->credits; reclaimed += ndesc; can_reclaim -= ndesc; eq->cidx += ndesc; if (__predict_false(eq->cidx >= eq->cap)) eq->cidx -= eq->cap; } txm = &txq->maps[txq->map_cidx]; if (maps) prefetch(txm->m); eq->avail += reclaimed; KASSERT(eq->avail < eq->cap, /* avail tops out at (cap - 1) */ ("%s: too many descriptors available", __func__)); txq->map_avail += maps; KASSERT(txq->map_avail <= txq->map_total, ("%s: too many maps available", __func__)); while (maps--) { struct tx_map *next; next = txm + 1; if (__predict_false(txq->map_cidx + 1 == txq->map_total)) next = txq->maps; prefetch(next->m); bus_dmamap_unload(txq->tx_tag, txm->map); m_freem(txm->m); txm->m = NULL; txm = next; if (__predict_false(++txq->map_cidx == txq->map_total)) txq->map_cidx = 0; } return (reclaimed); } static void write_eqflush_wr(struct sge_eq *eq) { struct fw_eq_flush_wr *wr; EQ_LOCK_ASSERT_OWNED(eq); KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__)); wr = (void *)&eq->desc[eq->pidx]; bzero(wr, sizeof(*wr)); wr->opcode = FW_EQ_FLUSH_WR; wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) | F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); eq->flags |= EQ_CRFLUSHED; eq->pending++; eq->avail--; if (++eq->pidx == eq->cap) eq->pidx = 0; } static __be64 get_flit(bus_dma_segment_t *sgl, int nsegs, int idx) { int i = (idx / 3) * 2; switch (idx % 3) { case 0: { __be64 rc; rc = htobe32(sgl[i].ds_len); if (i + 1 < nsegs) rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32; return (rc); } case 1: return htobe64(sgl[i].ds_addr); case 2: return htobe64(sgl[i + 1].ds_addr); } return (0); } static void set_fl_tag_idx(struct sge_fl *fl, int mtu) { int i; FL_LOCK_ASSERT_OWNED(fl); for (i = 0; i < FL_BUF_SIZES - 1; i++) { if (FL_BUF_SIZE(i) >= (mtu + FL_PKTSHIFT)) break; } fl->tag_idx = i; } static int handle_sge_egr_update(struct adapter *sc, const struct cpl_sge_egr_update *cpl) { unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); struct sge *s = &sc->sge; struct sge_txq *txq; struct port_info *pi; txq = (void *)s->eqmap[qid - s->eq_start]; TXQ_LOCK(txq); if (txq->eq.flags & EQ_CRFLUSHED) { pi = txq->ifp->if_softc; taskqueue_enqueue(pi->tq, &txq->resume_tx); txq->egr_update++; } else wakeup_one(txq); /* txq is going away, wakeup free_txq */ TXQ_UNLOCK(txq); return (0); } +static void +handle_cpl(struct adapter *sc, struct sge_iq *iq) +{ + const struct rss_header *rss = (const void *)iq->cdesc; + const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); + + switch (rss->opcode) { + case CPL_FW4_MSG: + case CPL_FW6_MSG: + if (cpl->type == FW6_TYPE_CMD_RPL) + t4_handle_fw_rpl(sc, cpl->data); + break; + + case CPL_SGE_EGR_UPDATE: + handle_sge_egr_update(sc, (const void *)cpl); + break; + + case CPL_SET_TCB_RPL: + filter_rpl(sc, (const void *)cpl); + break; + + default: + panic("%s: unexpected CPL opcode 0x%x", __func__, rss->opcode); + } +} + /* * m0 is freed on successful transmission. */ static int ctrl_tx(struct adapter *sc, struct sge_ctrlq *ctrlq, struct mbuf *m0) { struct sge_eq *eq = &ctrlq->eq; int rc = 0, ndesc; int can_reclaim; caddr_t dst; struct mbuf *m; M_ASSERTPKTHDR(m0); if (m0->m_pkthdr.len > SGE_MAX_WR_LEN) { - ctrlq->too_long++; + log(LOG_ERR, "%s: %s work request too long (%d)", + device_get_nameunit(sc->dev), __func__, m0->m_pkthdr.len); return (EMSGSIZE); } ndesc = howmany(m0->m_pkthdr.len, CTRL_EQ_ESIZE); EQ_LOCK(eq); can_reclaim = reclaimable(eq); eq->cidx += can_reclaim; eq->avail += can_reclaim; if (__predict_false(eq->cidx >= eq->cap)) eq->cidx -= eq->cap; if (eq->avail < ndesc) { rc = EAGAIN; ctrlq->no_desc++; goto failed; } dst = (void *)&eq->desc[eq->pidx]; for (m = m0; m; m = m->m_next) copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); eq->pidx += ndesc; if (__predict_false(eq->pidx >= eq->cap)) eq->pidx -= eq->cap; eq->pending += ndesc; - ctrlq->total_wrs++; ring_eq_db(sc, eq); failed: EQ_UNLOCK(eq); if (rc == 0) m_freem(m0); return (rc); } static int -sysctl_abs_id(SYSCTL_HANDLER_ARGS) +sysctl_uint16(SYSCTL_HANDLER_ARGS) { uint16_t *id = arg1; int i = *id; return sysctl_handle_int(oidp, &i, 0, req); }