Index: head/sys/dev/cxgb/cxgb_adapter.h =================================================================== --- head/sys/dev/cxgb/cxgb_adapter.h (revision 333287) +++ head/sys/dev/cxgb/cxgb_adapter.h (revision 333288) @@ -1,579 +1,586 @@ /************************************************************************** SPDX-License-Identifier: BSD-2-Clause-FreeBSD Copyright (c) 2007-2009, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. $FreeBSD$ ***************************************************************************/ #ifndef _CXGB_ADAPTER_H_ #define _CXGB_ADAPTER_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct adapter; struct sge_qset; extern int cxgb_debug; #ifdef DEBUG_LOCKING #define MTX_INIT(lock, lockname, class, flags) \ do { \ printf("initializing %s at %s:%d\n", lockname, __FILE__, __LINE__); \ mtx_init((lock), lockname, class, flags); \ } while (0) #define MTX_DESTROY(lock) \ do { \ printf("destroying %s at %s:%d\n", (lock)->lock_object.lo_name, __FILE__, __LINE__); \ mtx_destroy((lock)); \ } while (0) #else #define MTX_INIT mtx_init #define MTX_DESTROY mtx_destroy #endif enum { LF_NO = 0, LF_MAYBE, LF_YES }; struct port_info { struct adapter *adapter; struct ifnet *ifp; int if_flags; int flags; const struct port_type_info *port_type; struct cphy phy; struct cmac mac; struct timeval last_refreshed; struct link_config link_config; struct ifmedia media; struct mtx lock; uint32_t port_id; uint32_t tx_chan; uint32_t txpkt_intf; uint32_t first_qset; uint32_t nqsets; int link_fault; uint8_t hw_addr[ETHER_ADDR_LEN]; struct callout link_check_ch; struct task link_check_task; struct task timer_reclaim_task; struct cdev *port_cdev; #define PORT_LOCK_NAME_LEN 32 #define PORT_NAME_LEN 32 char lockbuf[PORT_LOCK_NAME_LEN]; char namebuf[PORT_NAME_LEN]; } __aligned(L1_CACHE_BYTES); enum { /* adapter flags */ FULL_INIT_DONE = (1 << 0), USING_MSI = (1 << 1), USING_MSIX = (1 << 2), QUEUES_BOUND = (1 << 3), FW_UPTODATE = (1 << 4), TPS_UPTODATE = (1 << 5), CXGB_SHUTDOWN = (1 << 6), CXGB_OFLD_INIT = (1 << 7), TP_PARITY_INIT = (1 << 8), CXGB_BUSY = (1 << 9), TOM_INIT_DONE = (1 << 10), /* port flags */ DOOMED = (1 << 0), }; #define IS_DOOMED(p) (p->flags & DOOMED) #define SET_DOOMED(p) do {p->flags |= DOOMED;} while (0) #define IS_BUSY(sc) (sc->flags & CXGB_BUSY) #define SET_BUSY(sc) do {sc->flags |= CXGB_BUSY;} while (0) #define CLR_BUSY(sc) do {sc->flags &= ~CXGB_BUSY;} while (0) #define FL_Q_SIZE 4096 #define JUMBO_Q_SIZE 1024 #define RSPQ_Q_SIZE 2048 #define TX_ETH_Q_SIZE 1024 #define TX_OFLD_Q_SIZE 1024 #define TX_CTRL_Q_SIZE 256 enum { TXQ_ETH = 0, TXQ_OFLD = 1, TXQ_CTRL = 2, }; /* * work request size in bytes */ #define WR_LEN (WR_FLITS * 8) #define PIO_LEN (WR_LEN - sizeof(struct cpl_tx_pkt_lso)) struct lro_state { unsigned short enabled; struct lro_ctrl ctrl; }; #define RX_BUNDLE_SIZE 8 struct rsp_desc; struct sge_rspq { uint32_t credits; uint32_t size; uint32_t cidx; uint32_t gen; uint32_t polling; uint32_t holdoff_tmr; uint32_t next_holdoff; uint32_t imm_data; uint32_t async_notif; uint32_t cntxt_id; uint32_t offload_pkts; uint32_t pure_rsps; uint32_t unhandled_irqs; uint32_t starved; bus_addr_t phys_addr; bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; struct t3_mbuf_hdr rspq_mh; struct rsp_desc *desc; struct mtx lock; #define RSPQ_NAME_LEN 32 char lockbuf[RSPQ_NAME_LEN]; uint32_t rspq_dump_start; uint32_t rspq_dump_count; }; struct rx_desc; struct rx_sw_desc; struct sge_fl { uint32_t buf_size; uint32_t credits; uint32_t size; uint32_t cidx; uint32_t pidx; uint32_t gen; uint32_t db_pending; bus_addr_t phys_addr; uint32_t cntxt_id; uint32_t empty; bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_dma_tag_t entry_tag; uma_zone_t zone; struct rx_desc *desc; struct rx_sw_desc *sdesc; int type; }; struct tx_desc; struct tx_sw_desc; #define TXQ_TRANSMITTING 0x1 struct sge_txq { uint64_t flags; uint32_t in_use; uint32_t size; uint32_t processed; uint32_t cleaned; uint32_t stop_thres; uint32_t cidx; uint32_t pidx; uint32_t gen; uint32_t unacked; uint32_t db_pending; struct tx_desc *desc; struct tx_sw_desc *sdesc; uint32_t token; bus_addr_t phys_addr; struct task qresume_task; struct task qreclaim_task; uint32_t cntxt_id; uint64_t stops; uint64_t restarts; bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_dma_tag_t entry_tag; struct mbufq sendq; struct buf_ring *txq_mr; struct ifaltq *txq_ifq; struct callout txq_timer; struct callout txq_watchdog; uint64_t txq_coalesced; uint32_t txq_skipped; uint32_t txq_enqueued; uint32_t txq_dump_start; uint32_t txq_dump_count; uint64_t txq_direct_packets; uint64_t txq_direct_bytes; uint64_t txq_frees; struct sg_ent txq_sgl[TX_MAX_SEGS / 2 + 1]; }; #define SGE_PSTAT_MAX (SGE_PSTAT_VLANINS+1) #define QS_EXITING 0x1 #define QS_RUNNING 0x2 #define QS_BOUND 0x4 #define QS_FLUSHING 0x8 #define QS_TIMEOUT 0x10 struct sge_qset { struct sge_rspq rspq; struct sge_fl fl[SGE_RXQ_PER_SET]; struct lro_state lro; struct sge_txq txq[SGE_TXQ_PER_SET]; uint32_t txq_stopped; /* which Tx queues are stopped */ struct port_info *port; struct adapter *adap; int idx; /* qset # */ int qs_flags; int coalescing; struct cv qs_cv; struct mtx lock; #define QS_NAME_LEN 32 char namebuf[QS_NAME_LEN]; }; struct sge { struct sge_qset qs[SGE_QSETS]; struct mtx reg_lock; }; struct filter_info; typedef int (*cpl_handler_t)(struct sge_qset *, struct rsp_desc *, struct mbuf *); struct adapter { SLIST_ENTRY(adapter) link; device_t dev; int flags; /* PCI register resources */ int regs_rid; struct resource *regs_res; int udbs_rid; struct resource *udbs_res; bus_space_handle_t bh; bus_space_tag_t bt; bus_size_t mmio_len; uint32_t link_width; /* DMA resources */ bus_dma_tag_t parent_dmat; bus_dma_tag_t rx_dmat; bus_dma_tag_t rx_jumbo_dmat; bus_dma_tag_t tx_dmat; /* Interrupt resources */ struct resource *irq_res; int irq_rid; void *intr_tag; uint32_t msix_regs_rid; struct resource *msix_regs_res; struct resource *msix_irq_res[SGE_QSETS]; int msix_irq_rid[SGE_QSETS]; void *msix_intr_tag[SGE_QSETS]; uint8_t rxpkt_map[8]; /* maps RX_PKT interface values to port ids */ uint8_t rrss_map[SGE_QSETS]; /* revers RSS map table */ uint16_t rspq_map[RSS_TABLE_SIZE]; /* maps 7-bit cookie to qidx */ union { uint8_t fill[SGE_QSETS]; uint64_t coalesce; } u; #define tunq_fill u.fill #define tunq_coalesce u.coalesce struct filter_info *filters; /* Tasks */ struct task slow_intr_task; struct task tick_task; struct taskqueue *tq; struct callout cxgb_tick_ch; struct callout sge_timer_ch; /* Register lock for use by the hardware layer */ struct mtx mdio_lock; struct mtx elmer_lock; /* Bookkeeping for the hardware layer */ struct adapter_params params; unsigned int slow_intr_mask; unsigned long irq_stats[IRQ_NUM_STATS]; struct sge sge; struct mc7 pmrx; struct mc7 pmtx; struct mc7 cm; struct mc5 mc5; struct port_info port[MAX_NPORTS]; device_t portdev[MAX_NPORTS]; #ifdef TCP_OFFLOAD void *tom_softc; void *iwarp_softc; #endif char fw_version[64]; char port_types[MAX_NPORTS + 1]; uint32_t open_device_map; #ifdef TCP_OFFLOAD int offload_map; #endif struct mtx lock; driver_intr_t *cxgb_intr; int msi_count; #define ADAPTER_LOCK_NAME_LEN 32 char lockbuf[ADAPTER_LOCK_NAME_LEN]; char reglockbuf[ADAPTER_LOCK_NAME_LEN]; char mdiolockbuf[ADAPTER_LOCK_NAME_LEN]; char elmerlockbuf[ADAPTER_LOCK_NAME_LEN]; int timestamp; #ifdef TCP_OFFLOAD #define NUM_CPL_HANDLERS 0xa7 cpl_handler_t cpl_handler[NUM_CPL_HANDLERS] __aligned(CACHE_LINE_SIZE); #endif }; struct t3_rx_mode { uint32_t idx; struct port_info *port; }; #define MDIO_LOCK(adapter) mtx_lock(&(adapter)->mdio_lock) #define MDIO_UNLOCK(adapter) mtx_unlock(&(adapter)->mdio_lock) #define ELMR_LOCK(adapter) mtx_lock(&(adapter)->elmer_lock) #define ELMR_UNLOCK(adapter) mtx_unlock(&(adapter)->elmer_lock) #define PORT_LOCK(port) mtx_lock(&(port)->lock); #define PORT_UNLOCK(port) mtx_unlock(&(port)->lock); #define PORT_LOCK_INIT(port, name) mtx_init(&(port)->lock, name, 0, MTX_DEF) #define PORT_LOCK_DEINIT(port) mtx_destroy(&(port)->lock) #define PORT_LOCK_ASSERT_NOTOWNED(port) mtx_assert(&(port)->lock, MA_NOTOWNED) #define PORT_LOCK_ASSERT_OWNED(port) mtx_assert(&(port)->lock, MA_OWNED) #define ADAPTER_LOCK(adap) mtx_lock(&(adap)->lock); #define ADAPTER_UNLOCK(adap) mtx_unlock(&(adap)->lock); #define ADAPTER_LOCK_INIT(adap, name) mtx_init(&(adap)->lock, name, 0, MTX_DEF) #define ADAPTER_LOCK_DEINIT(adap) mtx_destroy(&(adap)->lock) #define ADAPTER_LOCK_ASSERT_NOTOWNED(adap) mtx_assert(&(adap)->lock, MA_NOTOWNED) #define ADAPTER_LOCK_ASSERT_OWNED(adap) mtx_assert(&(adap)->lock, MA_OWNED) static __inline uint32_t t3_read_reg(adapter_t *adapter, uint32_t reg_addr) { return (bus_space_read_4(adapter->bt, adapter->bh, reg_addr)); } static __inline void t3_write_reg(adapter_t *adapter, uint32_t reg_addr, uint32_t val) { bus_space_write_4(adapter->bt, adapter->bh, reg_addr, val); } static __inline void t3_os_pci_read_config_4(adapter_t *adapter, int reg, uint32_t *val) { *val = pci_read_config(adapter->dev, reg, 4); } static __inline void t3_os_pci_write_config_4(adapter_t *adapter, int reg, uint32_t val) { pci_write_config(adapter->dev, reg, val, 4); } static __inline void t3_os_pci_read_config_2(adapter_t *adapter, int reg, uint16_t *val) { *val = pci_read_config(adapter->dev, reg, 2); } static __inline void t3_os_pci_write_config_2(adapter_t *adapter, int reg, uint16_t val) { pci_write_config(adapter->dev, reg, val, 2); } static __inline uint8_t * t3_get_next_mcaddr(struct t3_rx_mode *rm) { uint8_t *macaddr = NULL; struct ifnet *ifp = rm->port->ifp; struct ifmultiaddr *ifma; int i = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (i == rm->idx) { macaddr = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); break; } i++; } if_maddr_runlock(ifp); rm->idx++; return (macaddr); } static __inline void t3_init_rx_mode(struct t3_rx_mode *rm, struct port_info *port) { rm->idx = 0; rm->port = port; } static __inline struct port_info * adap2pinfo(struct adapter *adap, int idx) { return &adap->port[idx]; } int t3_os_find_pci_capability(adapter_t *adapter, int cap); int t3_os_pci_save_state(struct adapter *adapter); int t3_os_pci_restore_state(struct adapter *adapter); void t3_os_link_intr(struct port_info *); void t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed, int duplex, int fc, int mac_was_reset); void t3_os_phymod_changed(struct adapter *adap, int port_id); void t3_sge_err_intr_handler(adapter_t *adapter); #ifdef TCP_OFFLOAD int t3_offload_tx(struct adapter *, struct mbuf *); #endif void t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[]); int t3_mgmt_tx(adapter_t *adap, struct mbuf *m); int t3_register_cpl_handler(struct adapter *, int, cpl_handler_t); int t3_sge_alloc(struct adapter *); int t3_sge_free(struct adapter *); int t3_sge_alloc_qset(adapter_t *, uint32_t, int, int, const struct qset_params *, int, struct port_info *); void t3_free_sge_resources(adapter_t *, int); void t3_sge_start(adapter_t *); void t3_sge_stop(adapter_t *); void t3b_intr(void *data); void t3_intr_msi(void *data); void t3_intr_msix(void *data); int t3_sge_init_adapter(adapter_t *); int t3_sge_reset_adapter(adapter_t *); int t3_sge_init_port(struct port_info *); void t3_free_tx_desc(struct sge_qset *qs, int n, int qid); void t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad); void t3_add_attach_sysctls(adapter_t *sc); void t3_add_configured_sysctls(adapter_t *sc); int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, unsigned char *data); void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p); /* * XXX figure out how we can return this to being private to sge */ #define desc_reclaimable(q) ((int)((q)->processed - (q)->cleaned - TX_MAX_DESC)) #define container_of(p, stype, field) ((stype *)(((uint8_t *)(p)) - offsetof(stype, field))) static __inline struct sge_qset * fl_to_qset(struct sge_fl *q, int qidx) { return container_of(q, struct sge_qset, fl[qidx]); } static __inline struct sge_qset * rspq_to_qset(struct sge_rspq *q) { return container_of(q, struct sge_qset, rspq); } static __inline struct sge_qset * txq_to_qset(struct sge_txq *q, int qidx) { return container_of(q, struct sge_qset, txq[qidx]); } #undef container_of #define OFFLOAD_DEVMAP_BIT (1 << MAX_NPORTS) static inline int offload_running(adapter_t *adapter) { return isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT); } void cxgb_tx_watchdog(void *arg); int cxgb_transmit(struct ifnet *ifp, struct mbuf *m); void cxgb_qflush(struct ifnet *ifp); void t3_iterate(void (*)(struct adapter *, void *), void *); void cxgb_refresh_stats(struct port_info *); + +#ifdef NETDUMP +int cxgb_netdump_encap(struct sge_qset *qs, struct mbuf **m); +int cxgb_netdump_poll_rx(adapter_t *adap, struct sge_qset *qs); +int cxgb_netdump_poll_tx(struct sge_qset *qs); +#endif + #endif Index: head/sys/dev/cxgb/cxgb_main.c =================================================================== --- head/sys/dev/cxgb/cxgb_main.c (revision 333287) +++ head/sys/dev/cxgb/cxgb_main.c (revision 333288) @@ -1,3580 +1,3655 @@ /************************************************************************** SPDX-License-Identifier: BSD-2-Clause-FreeBSD Copyright (c) 2007-2009, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #ifdef PRIV_SUPPORTED #include #endif static int cxgb_setup_interrupts(adapter_t *); static void cxgb_teardown_interrupts(adapter_t *); static void cxgb_init(void *); static int cxgb_init_locked(struct port_info *); static int cxgb_uninit_locked(struct port_info *); static int cxgb_uninit_synchronized(struct port_info *); static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t); static int cxgb_media_change(struct ifnet *); static int cxgb_ifm_type(int); static void cxgb_build_medialist(struct port_info *); static void cxgb_media_status(struct ifnet *, struct ifmediareq *); static uint64_t cxgb_get_counter(struct ifnet *, ift_counter); static int setup_sge_qsets(adapter_t *); static void cxgb_async_intr(void *); static void cxgb_tick_handler(void *, int); static void cxgb_tick(void *); static void link_check_callout(void *); static void check_link_status(void *, int); static void setup_rss(adapter_t *sc); static int alloc_filters(struct adapter *); static int setup_hw_filters(struct adapter *); static int set_filter(struct adapter *, int, const struct filter_info *); static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int, unsigned int, u64, u64); static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int, unsigned int, u64, u64); #ifdef TCP_OFFLOAD static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *); #endif /* Attachment glue for the PCI controller end of the device. Each port of * the device is attached separately, as defined later. */ static int cxgb_controller_probe(device_t); static int cxgb_controller_attach(device_t); static int cxgb_controller_detach(device_t); static void cxgb_free(struct adapter *); static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start, unsigned int end); static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf); static int cxgb_get_regs_len(void); static void touch_bars(device_t dev); static void cxgb_update_mac_settings(struct port_info *p); #ifdef TCP_OFFLOAD static int toe_capability(struct port_info *, int); #endif static device_method_t cxgb_controller_methods[] = { DEVMETHOD(device_probe, cxgb_controller_probe), DEVMETHOD(device_attach, cxgb_controller_attach), DEVMETHOD(device_detach, cxgb_controller_detach), DEVMETHOD_END }; static driver_t cxgb_controller_driver = { "cxgbc", cxgb_controller_methods, sizeof(struct adapter) }; static int cxgbc_mod_event(module_t, int, void *); static devclass_t cxgb_controller_devclass; DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, cxgbc_mod_event, 0); MODULE_VERSION(cxgbc, 1); MODULE_DEPEND(cxgbc, firmware, 1, 1, 1); /* * Attachment glue for the ports. Attachment is done directly to the * controller device. */ static int cxgb_port_probe(device_t); static int cxgb_port_attach(device_t); static int cxgb_port_detach(device_t); static device_method_t cxgb_port_methods[] = { DEVMETHOD(device_probe, cxgb_port_probe), DEVMETHOD(device_attach, cxgb_port_attach), DEVMETHOD(device_detach, cxgb_port_detach), { 0, 0 } }; static driver_t cxgb_port_driver = { "cxgb", cxgb_port_methods, 0 }; static d_ioctl_t cxgb_extension_ioctl; static d_open_t cxgb_extension_open; static d_close_t cxgb_extension_close; static struct cdevsw cxgb_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_open = cxgb_extension_open, .d_close = cxgb_extension_close, .d_ioctl = cxgb_extension_ioctl, .d_name = "cxgb", }; static devclass_t cxgb_port_devclass; DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0); MODULE_VERSION(cxgb, 1); +NETDUMP_DEFINE(cxgb); + static struct mtx t3_list_lock; static SLIST_HEAD(, adapter) t3_list; #ifdef TCP_OFFLOAD static struct mtx t3_uld_list_lock; static SLIST_HEAD(, uld_info) t3_uld_list; #endif /* * The driver uses the best interrupt scheme available on a platform in the * order MSI-X, MSI, legacy pin interrupts. This parameter determines which * of these schemes the driver may consider as follows: * * msi = 2: choose from among all three options * msi = 1 : only consider MSI and pin interrupts * msi = 0: force pin interrupts */ static int msi_allowed = 2; SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters"); SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0, "MSI-X, MSI, INTx selector"); /* * The driver uses an auto-queue algorithm by default. * To disable it and force a single queue-set per port, use multiq = 0 */ static int multiq = 1; SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0, "use min(ncpus/ports, 8) queue-sets per port"); /* * By default the driver will not update the firmware unless * it was compiled against a newer version * */ static int force_fw_update = 0; SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0, "update firmware even if up to date"); int cxgb_use_16k_clusters = -1; SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN, &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue "); static int nfilters = -1; SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN, &nfilters, 0, "max number of entries in the filter table"); enum { MAX_TXQ_ENTRIES = 16384, MAX_CTRL_TXQ_ENTRIES = 1024, MAX_RSPQ_ENTRIES = 16384, MAX_RX_BUFFERS = 16384, MAX_RX_JUMBO_BUFFERS = 16384, MIN_TXQ_ENTRIES = 4, MIN_CTRL_TXQ_ENTRIES = 4, MIN_RSPQ_ENTRIES = 32, MIN_FL_ENTRIES = 32, MIN_FL_JUMBO_ENTRIES = 32 }; struct filter_info { u32 sip; u32 sip_mask; u32 dip; u16 sport; u16 dport; u32 vlan:12; u32 vlan_prio:3; u32 mac_hit:1; u32 mac_idx:4; u32 mac_vld:1; u32 pkt_type:2; u32 report_filter_id:1; u32 pass:1; u32 rss:1; u32 qset:3; u32 locked:1; u32 valid:1; }; enum { FILTER_NO_VLAN_PRI = 7 }; #define EEPROM_MAGIC 0x38E2F10C #define PORT_MASK ((1 << MAX_NPORTS) - 1) /* Table for probing the cards. The desc field isn't actually used */ struct cxgb_ident { uint16_t vendor; uint16_t device; int index; char *desc; } cxgb_identifiers[] = { {PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"}, {PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"}, {PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"}, {PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"}, {PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"}, {PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"}, {PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"}, {PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"}, {PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"}, {PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"}, {PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"}, {PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"}, {PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"}, {PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"}, {0, 0, 0, NULL} }; static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset); static __inline char t3rev2char(struct adapter *adapter) { char rev = 'z'; switch(adapter->params.rev) { case T3_REV_A: rev = 'a'; break; case T3_REV_B: case T3_REV_B2: rev = 'b'; break; case T3_REV_C: rev = 'c'; break; } return rev; } static struct cxgb_ident * cxgb_get_ident(device_t dev) { struct cxgb_ident *id; for (id = cxgb_identifiers; id->desc != NULL; id++) { if ((id->vendor == pci_get_vendor(dev)) && (id->device == pci_get_device(dev))) { return (id); } } return (NULL); } static const struct adapter_info * cxgb_get_adapter_info(device_t dev) { struct cxgb_ident *id; const struct adapter_info *ai; id = cxgb_get_ident(dev); if (id == NULL) return (NULL); ai = t3_get_adapter_info(id->index); return (ai); } static int cxgb_controller_probe(device_t dev) { const struct adapter_info *ai; char *ports, buf[80]; int nports; ai = cxgb_get_adapter_info(dev); if (ai == NULL) return (ENXIO); nports = ai->nports0 + ai->nports1; if (nports == 1) ports = "port"; else ports = "ports"; snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } #define FW_FNAME "cxgb_t3fw" #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom" #define TPSRAM_NAME "cxgb_t3%c_protocol_sram" static int upgrade_fw(adapter_t *sc) { const struct firmware *fw; int status; u32 vers; if ((fw = firmware_get(FW_FNAME)) == NULL) { device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME); return (ENOENT); } else device_printf(sc->dev, "installing firmware on card\n"); status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize); if (status != 0) { device_printf(sc->dev, "failed to install firmware: %d\n", status); } else { t3_get_fw_version(sc, &vers); snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d", G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers)); } firmware_put(fw, FIRMWARE_UNLOAD); return (status); } /* * The cxgb_controller_attach function is responsible for the initial * bringup of the device. Its responsibilities include: * * 1. Determine if the device supports MSI or MSI-X. * 2. Allocate bus resources so that we can access the Base Address Register * 3. Create and initialize mutexes for the controller and its control * logic such as SGE and MDIO. * 4. Call hardware specific setup routine for the adapter as a whole. * 5. Allocate the BAR for doing MSI-X. * 6. Setup the line interrupt iff MSI-X is not supported. * 7. Create the driver's taskq. * 8. Start one task queue service thread. * 9. Check if the firmware and SRAM are up-to-date. They will be * auto-updated later (before FULL_INIT_DONE), if required. * 10. Create a child device for each MAC (port) * 11. Initialize T3 private state. * 12. Trigger the LED * 13. Setup offload iff supported. * 14. Reset/restart the tick callout. * 15. Attach sysctls * * NOTE: Any modification or deviation from this list MUST be reflected in * the above comment. Failure to do so will result in problems on various * error conditions including link flapping. */ static int cxgb_controller_attach(device_t dev) { device_t child; const struct adapter_info *ai; struct adapter *sc; int i, error = 0; uint32_t vers; int port_qsets = 1; int msi_needed, reg; char buf[80]; sc = device_get_softc(dev); sc->dev = dev; sc->msi_count = 0; ai = cxgb_get_adapter_info(dev); snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d", device_get_unit(dev)); ADAPTER_LOCK_INIT(sc, sc->lockbuf); snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d", device_get_unit(dev)); snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d", device_get_unit(dev)); snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d", device_get_unit(dev)); MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN); MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF); MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF); mtx_lock(&t3_list_lock); SLIST_INSERT_HEAD(&t3_list, sc, link); mtx_unlock(&t3_list_lock); /* find the PCIe link width and set max read request to 4KB*/ if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { uint16_t lnk; lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2); sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4; if (sc->link_width < 8 && (ai->caps & SUPPORTED_10000baseT_Full)) { device_printf(sc->dev, "PCIe x%d Link, expect reduced performance\n", sc->link_width); } pci_set_max_read_req(dev, 4096); } touch_bars(dev); pci_enable_busmaster(dev); /* * Allocate the registers and make them available to the driver. * The registers that we care about for NIC mode are in BAR 0 */ sc->regs_rid = PCIR_BAR(0); if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->regs_rid, RF_ACTIVE)) == NULL) { device_printf(dev, "Cannot allocate BAR region 0\n"); error = ENXIO; goto out; } sc->bt = rman_get_bustag(sc->regs_res); sc->bh = rman_get_bushandle(sc->regs_res); sc->mmio_len = rman_get_size(sc->regs_res); for (i = 0; i < MAX_NPORTS; i++) sc->port[i].adapter = sc; if (t3_prep_adapter(sc, ai, 1) < 0) { printf("prep adapter failed\n"); error = ENODEV; goto out; } sc->udbs_rid = PCIR_BAR(2); sc->udbs_res = NULL; if (is_offload(sc) && ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->udbs_rid, RF_ACTIVE)) == NULL)) { device_printf(dev, "Cannot allocate BAR region 1\n"); error = ENXIO; goto out; } /* Allocate the BAR for doing MSI-X. If it succeeds, try to allocate * enough messages for the queue sets. If that fails, try falling * back to MSI. If that fails, then try falling back to the legacy * interrupt pin model. */ sc->msix_regs_rid = 0x20; if ((msi_allowed >= 2) && (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->msix_regs_rid, RF_ACTIVE)) != NULL) { if (multiq) port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus); msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1; if (pci_msix_count(dev) == 0 || (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 || sc->msi_count != msi_needed) { device_printf(dev, "alloc msix failed - " "msi_count=%d, msi_needed=%d, err=%d; " "will try MSI\n", sc->msi_count, msi_needed, error); sc->msi_count = 0; port_qsets = 1; pci_release_msi(dev); bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_regs_rid, sc->msix_regs_res); sc->msix_regs_res = NULL; } else { sc->flags |= USING_MSIX; sc->cxgb_intr = cxgb_async_intr; device_printf(dev, "using MSI-X interrupts (%u vectors)\n", sc->msi_count); } } if ((msi_allowed >= 1) && (sc->msi_count == 0)) { sc->msi_count = 1; if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) { device_printf(dev, "alloc msi failed - " "err=%d; will try INTx\n", error); sc->msi_count = 0; port_qsets = 1; pci_release_msi(dev); } else { sc->flags |= USING_MSI; sc->cxgb_intr = t3_intr_msi; device_printf(dev, "using MSI interrupts\n"); } } if (sc->msi_count == 0) { device_printf(dev, "using line interrupts\n"); sc->cxgb_intr = t3b_intr; } /* Create a private taskqueue thread for handling driver events */ sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->tq); if (sc->tq == NULL) { device_printf(dev, "failed to allocate controller task queue\n"); goto out; } taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", device_get_nameunit(dev)); TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc); /* Create a periodic callout for checking adapter status */ callout_init(&sc->cxgb_tick_ch, 1); if (t3_check_fw_version(sc) < 0 || force_fw_update) { /* * Warn user that a firmware update will be attempted in init. */ device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n", FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO); sc->flags &= ~FW_UPTODATE; } else { sc->flags |= FW_UPTODATE; } if (t3_check_tpsram_version(sc) < 0) { /* * Warn user that a firmware update will be attempted in init. */ device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n", t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO); sc->flags &= ~TPS_UPTODATE; } else { sc->flags |= TPS_UPTODATE; } /* * Create a child device for each MAC. The ethernet attachment * will be done in these children. */ for (i = 0; i < (sc)->params.nports; i++) { struct port_info *pi; if ((child = device_add_child(dev, "cxgb", -1)) == NULL) { device_printf(dev, "failed to add child port\n"); error = EINVAL; goto out; } pi = &sc->port[i]; pi->adapter = sc; pi->nqsets = port_qsets; pi->first_qset = i*port_qsets; pi->port_id = i; pi->tx_chan = i >= ai->nports0; pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i; sc->rxpkt_map[pi->txpkt_intf] = i; sc->port[i].tx_chan = i >= ai->nports0; sc->portdev[i] = child; device_set_softc(child, pi); } if ((error = bus_generic_attach(dev)) != 0) goto out; /* initialize sge private state */ t3_sge_init_adapter(sc); t3_led_ready(sc); error = t3_get_fw_version(sc, &vers); if (error) goto out; snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d", G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers)); snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s", ai->desc, is_offload(sc) ? "R" : "", sc->params.vpd.ec, sc->params.vpd.sn); device_set_desc_copy(dev, buf); snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x", sc->params.vpd.port_type[0], sc->params.vpd.port_type[1], sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]); device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]); callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc); t3_add_attach_sysctls(sc); #ifdef TCP_OFFLOAD for (i = 0; i < NUM_CPL_HANDLERS; i++) sc->cpl_handler[i] = cpl_not_handled; #endif t3_intr_clear(sc); error = cxgb_setup_interrupts(sc); out: if (error) cxgb_free(sc); return (error); } /* * The cxgb_controller_detach routine is called with the device is * unloaded from the system. */ static int cxgb_controller_detach(device_t dev) { struct adapter *sc; sc = device_get_softc(dev); cxgb_free(sc); return (0); } /* * The cxgb_free() is called by the cxgb_controller_detach() routine * to tear down the structures that were built up in * cxgb_controller_attach(), and should be the final piece of work * done when fully unloading the driver. * * * 1. Shutting down the threads started by the cxgb_controller_attach() * routine. * 2. Stopping the lower level device and all callouts (cxgb_down_locked()). * 3. Detaching all of the port devices created during the * cxgb_controller_attach() routine. * 4. Removing the device children created via cxgb_controller_attach(). * 5. Releasing PCI resources associated with the device. * 6. Turning off the offload support, iff it was turned on. * 7. Destroying the mutexes created in cxgb_controller_attach(). * */ static void cxgb_free(struct adapter *sc) { int i, nqsets = 0; ADAPTER_LOCK(sc); sc->flags |= CXGB_SHUTDOWN; ADAPTER_UNLOCK(sc); /* * Make sure all child devices are gone. */ bus_generic_detach(sc->dev); for (i = 0; i < (sc)->params.nports; i++) { if (sc->portdev[i] && device_delete_child(sc->dev, sc->portdev[i]) != 0) device_printf(sc->dev, "failed to delete child port\n"); nqsets += sc->port[i].nqsets; } /* * At this point, it is as if cxgb_port_detach has run on all ports, and * cxgb_down has run on the adapter. All interrupts have been silenced, * all open devices have been closed. */ KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)", __func__, sc->open_device_map)); for (i = 0; i < sc->params.nports; i++) { KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!", __func__, i)); } /* * Finish off the adapter's callouts. */ callout_drain(&sc->cxgb_tick_ch); callout_drain(&sc->sge_timer_ch); /* * Release resources grabbed under FULL_INIT_DONE by cxgb_up. The * sysctls are cleaned up by the kernel linker. */ if (sc->flags & FULL_INIT_DONE) { t3_free_sge_resources(sc, nqsets); sc->flags &= ~FULL_INIT_DONE; } /* * Release all interrupt resources. */ cxgb_teardown_interrupts(sc); if (sc->flags & (USING_MSI | USING_MSIX)) { device_printf(sc->dev, "releasing msi message(s)\n"); pci_release_msi(sc->dev); } else { device_printf(sc->dev, "no msi message to release\n"); } if (sc->msix_regs_res != NULL) { bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid, sc->msix_regs_res); } /* * Free the adapter's taskqueue. */ if (sc->tq != NULL) { taskqueue_free(sc->tq); sc->tq = NULL; } free(sc->filters, M_DEVBUF); t3_sge_free(sc); if (sc->udbs_res != NULL) bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid, sc->udbs_res); if (sc->regs_res != NULL) bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid, sc->regs_res); MTX_DESTROY(&sc->mdio_lock); MTX_DESTROY(&sc->sge.reg_lock); MTX_DESTROY(&sc->elmer_lock); mtx_lock(&t3_list_lock); SLIST_REMOVE(&t3_list, sc, adapter, link); mtx_unlock(&t3_list_lock); ADAPTER_LOCK_DEINIT(sc); } /** * setup_sge_qsets - configure SGE Tx/Rx/response queues * @sc: the controller softc * * Determines how many sets of SGE queues to use and initializes them. * We support multiple queue sets per port if we have MSI-X, otherwise * just one queue set per port. */ static int setup_sge_qsets(adapter_t *sc) { int i, j, err, irq_idx = 0, qset_idx = 0; u_int ntxq = SGE_TXQ_PER_SET; if ((err = t3_sge_alloc(sc)) != 0) { device_printf(sc->dev, "t3_sge_alloc returned %d\n", err); return (err); } if (sc->params.rev > 0 && !(sc->flags & USING_MSI)) irq_idx = -1; for (i = 0; i < (sc)->params.nports; i++) { struct port_info *pi = &sc->port[i]; for (j = 0; j < pi->nqsets; j++, qset_idx++) { err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports, (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx, &sc->params.sge.qset[qset_idx], ntxq, pi); if (err) { t3_free_sge_resources(sc, qset_idx); device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n", err); return (err); } } } return (0); } static void cxgb_teardown_interrupts(adapter_t *sc) { int i; for (i = 0; i < SGE_QSETS; i++) { if (sc->msix_intr_tag[i] == NULL) { /* Should have been setup fully or not at all */ KASSERT(sc->msix_irq_res[i] == NULL && sc->msix_irq_rid[i] == 0, ("%s: half-done interrupt (%d).", __func__, i)); continue; } bus_teardown_intr(sc->dev, sc->msix_irq_res[i], sc->msix_intr_tag[i]); bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i], sc->msix_irq_res[i]); sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL; sc->msix_irq_rid[i] = 0; } if (sc->intr_tag) { KASSERT(sc->irq_res != NULL, ("%s: half-done interrupt.", __func__)); bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag); bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid, sc->irq_res); sc->irq_res = sc->intr_tag = NULL; sc->irq_rid = 0; } } static int cxgb_setup_interrupts(adapter_t *sc) { struct resource *res; void *tag; int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX); sc->irq_rid = intr_flag ? 1 : 0; sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE); if (sc->irq_res == NULL) { device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n", intr_flag, sc->irq_rid); err = EINVAL; sc->irq_rid = 0; } else { err = bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE | INTR_TYPE_NET, NULL, sc->cxgb_intr, sc, &sc->intr_tag); if (err) { device_printf(sc->dev, "Cannot set up interrupt (%x, %u, %d)\n", intr_flag, sc->irq_rid, err); bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid, sc->irq_res); sc->irq_res = sc->intr_tag = NULL; sc->irq_rid = 0; } } /* That's all for INTx or MSI */ if (!(intr_flag & USING_MSIX) || err) return (err); bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err"); for (i = 0; i < sc->msi_count - 1; i++) { rid = i + 2; res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (res == NULL) { device_printf(sc->dev, "Cannot allocate interrupt " "for message %d\n", rid); err = EINVAL; break; } err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET, NULL, t3_intr_msix, &sc->sge.qs[i], &tag); if (err) { device_printf(sc->dev, "Cannot set up interrupt " "for message %d (%d)\n", rid, err); bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res); break; } sc->msix_irq_rid[i] = rid; sc->msix_irq_res[i] = res; sc->msix_intr_tag[i] = tag; bus_describe_intr(sc->dev, res, tag, "qs%d", i); } if (err) cxgb_teardown_interrupts(sc); return (err); } static int cxgb_port_probe(device_t dev) { struct port_info *p; char buf[80]; const char *desc; p = device_get_softc(dev); desc = p->phy.desc; snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc); device_set_desc_copy(dev, buf); return (0); } static int cxgb_makedev(struct port_info *pi) { pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit, UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp)); if (pi->port_cdev == NULL) return (ENOMEM); pi->port_cdev->si_drv1 = (void *)pi; return (0); } #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \ IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \ IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6) #define CXGB_CAP_ENABLE CXGB_CAP static int cxgb_port_attach(device_t dev) { struct port_info *p; struct ifnet *ifp; int err; struct adapter *sc; p = device_get_softc(dev); sc = p->adapter; snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d", device_get_unit(device_get_parent(dev)), p->port_id); PORT_LOCK_INIT(p, p->lockbuf); callout_init(&p->link_check_ch, 1); TASK_INIT(&p->link_check_task, 0, check_link_status, p); /* Allocate an ifnet object and set it up */ ifp = p->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "Cannot allocate ifnet\n"); return (ENOMEM); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_init = cxgb_init; ifp->if_softc = p; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = cxgb_ioctl; ifp->if_transmit = cxgb_transmit; ifp->if_qflush = cxgb_qflush; ifp->if_get_counter = cxgb_get_counter; ifp->if_capabilities = CXGB_CAP; #ifdef TCP_OFFLOAD if (is_offload(sc)) ifp->if_capabilities |= IFCAP_TOE4; #endif ifp->if_capenable = CXGB_CAP_ENABLE; ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO | CSUM_UDP_IPV6 | CSUM_TCP_IPV6; /* * Disable TSO on 4-port - it isn't supported by the firmware. */ if (sc->params.nports > 2) { ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO); ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO); ifp->if_hwassist &= ~CSUM_TSO; } ether_ifattach(ifp, p->hw_addr); + /* Attach driver netdump methods. */ + NETDUMP_SET(ifp, cxgb); + #ifdef DEFAULT_JUMBO if (sc->params.nports <= 2) ifp->if_mtu = ETHERMTU_JUMBO; #endif if ((err = cxgb_makedev(p)) != 0) { printf("makedev failed %d\n", err); return (err); } /* Create a list of media supported by this port */ ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change, cxgb_media_status); cxgb_build_medialist(p); t3_sge_init_port(p); return (err); } /* * cxgb_port_detach() is called via the device_detach methods when * cxgb_free() calls the bus_generic_detach. It is responsible for * removing the device from the view of the kernel, i.e. from all * interfaces lists etc. This routine is only called when the driver is * being unloaded, not when the link goes down. */ static int cxgb_port_detach(device_t dev) { struct port_info *p; struct adapter *sc; int i; p = device_get_softc(dev); sc = p->adapter; /* Tell cxgb_ioctl and if_init that the port is going away */ ADAPTER_LOCK(sc); SET_DOOMED(p); wakeup(&sc->flags); while (IS_BUSY(sc)) mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0); SET_BUSY(sc); ADAPTER_UNLOCK(sc); if (p->port_cdev != NULL) destroy_dev(p->port_cdev); cxgb_uninit_synchronized(p); ether_ifdetach(p->ifp); for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) { struct sge_qset *qs = &sc->sge.qs[i]; struct sge_txq *txq = &qs->txq[TXQ_ETH]; callout_drain(&txq->txq_watchdog); callout_drain(&txq->txq_timer); } PORT_LOCK_DEINIT(p); if_free(p->ifp); p->ifp = NULL; ADAPTER_LOCK(sc); CLR_BUSY(sc); wakeup_one(&sc->flags); ADAPTER_UNLOCK(sc); return (0); } void t3_fatal_err(struct adapter *sc) { u_int fw_status[4]; if (sc->flags & FULL_INIT_DONE) { t3_sge_stop(sc); t3_write_reg(sc, A_XGM_TX_CTRL, 0); t3_write_reg(sc, A_XGM_RX_CTRL, 0); t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0); t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0); t3_intr_disable(sc); } device_printf(sc->dev,"encountered fatal error, operation suspended\n"); if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status)) device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n", fw_status[0], fw_status[1], fw_status[2], fw_status[3]); } int t3_os_find_pci_capability(adapter_t *sc, int cap) { device_t dev; struct pci_devinfo *dinfo; pcicfgregs *cfg; uint32_t status; uint8_t ptr; dev = sc->dev; dinfo = device_get_ivars(dev); cfg = &dinfo->cfg; status = pci_read_config(dev, PCIR_STATUS, 2); if (!(status & PCIM_STATUS_CAPPRESENT)) return (0); switch (cfg->hdrtype & PCIM_HDRTYPE) { case 0: case 1: ptr = PCIR_CAP_PTR; break; case 2: ptr = PCIR_CAP_PTR_2; break; default: return (0); break; } ptr = pci_read_config(dev, ptr, 1); while (ptr != 0) { if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap) return (ptr); ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1); } return (0); } int t3_os_pci_save_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_save(dev, dinfo, 0); return (0); } int t3_os_pci_restore_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_restore(dev, dinfo); return (0); } /** * t3_os_link_changed - handle link status changes * @sc: the adapter associated with the link change * @port_id: the port index whose link status has changed * @link_status: the new status of the link * @speed: the new speed setting * @duplex: the new duplex setting * @fc: the new flow-control setting * * This is the OS-dependent handler for link status changes. The OS * neutral handler takes care of most of the processing for these events, * then calls this handler for any OS-specific processing. */ void t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed, int duplex, int fc, int mac_was_reset) { struct port_info *pi = &adapter->port[port_id]; struct ifnet *ifp = pi->ifp; /* no race with detach, so ifp should always be good */ KASSERT(ifp, ("%s: if detached.", __func__)); /* Reapply mac settings if they were lost due to a reset */ if (mac_was_reset) { PORT_LOCK(pi); cxgb_update_mac_settings(pi); PORT_UNLOCK(pi); } if (link_status) { ifp->if_baudrate = IF_Mbps(speed); if_link_state_change(ifp, LINK_STATE_UP); } else if_link_state_change(ifp, LINK_STATE_DOWN); } /** * t3_os_phymod_changed - handle PHY module changes * @phy: the PHY reporting the module change * @mod_type: new module type * * This is the OS-dependent handler for PHY module changes. It is * invoked when a PHY module is removed or inserted for any OS-specific * processing. */ void t3_os_phymod_changed(struct adapter *adap, int port_id) { static const char *mod_str[] = { NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown" }; struct port_info *pi = &adap->port[port_id]; int mod = pi->phy.modtype; if (mod != pi->media.ifm_cur->ifm_data) cxgb_build_medialist(pi); if (mod == phy_modtype_none) if_printf(pi->ifp, "PHY module unplugged\n"); else { KASSERT(mod < ARRAY_SIZE(mod_str), ("invalid PHY module type %d", mod)); if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]); } } void t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[]) { /* * The ifnet might not be allocated before this gets called, * as this is called early on in attach by t3_prep_adapter * save the address off in the port structure */ if (cxgb_debug) printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":"); bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN); } /* * Programs the XGMAC based on the settings in the ifnet. These settings * include MTU, MAC address, mcast addresses, etc. */ static void cxgb_update_mac_settings(struct port_info *p) { struct ifnet *ifp = p->ifp; struct t3_rx_mode rm; struct cmac *mac = &p->mac; int mtu, hwtagging; PORT_LOCK_ASSERT_OWNED(p); bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN); mtu = ifp->if_mtu; if (ifp->if_capenable & IFCAP_VLAN_MTU) mtu += ETHER_VLAN_ENCAP_LEN; hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0; t3_mac_set_mtu(mac, mtu); t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging); t3_mac_set_address(mac, 0, p->hw_addr); t3_init_rx_mode(&rm, p); t3_mac_set_rx_mode(mac, &rm); } static int await_mgmt_replies(struct adapter *adap, unsigned long init_cnt, unsigned long n) { int attempts = 5; while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) { if (!--attempts) return (ETIMEDOUT); t3_os_sleep(10); } return 0; } static int init_tp_parity(struct adapter *adap) { int i; struct mbuf *m; struct cpl_set_tcb_field *greq; unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts; t3_tp_set_offload_mode(adap, 1); for (i = 0; i < 16; i++) { struct cpl_smt_write_req *req; m = m_gethdr(M_WAITOK, MT_DATA); req = mtod(m, struct cpl_smt_write_req *); m->m_len = m->m_pkthdr.len = sizeof(*req); memset(req, 0, sizeof(*req)); req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i)); req->iff = i; t3_mgmt_tx(adap, m); } for (i = 0; i < 2048; i++) { struct cpl_l2t_write_req *req; m = m_gethdr(M_WAITOK, MT_DATA); req = mtod(m, struct cpl_l2t_write_req *); m->m_len = m->m_pkthdr.len = sizeof(*req); memset(req, 0, sizeof(*req)); req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i)); req->params = htonl(V_L2T_W_IDX(i)); t3_mgmt_tx(adap, m); } for (i = 0; i < 2048; i++) { struct cpl_rte_write_req *req; m = m_gethdr(M_WAITOK, MT_DATA); req = mtod(m, struct cpl_rte_write_req *); m->m_len = m->m_pkthdr.len = sizeof(*req); memset(req, 0, sizeof(*req)); req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i)); req->l2t_idx = htonl(V_L2T_W_IDX(i)); t3_mgmt_tx(adap, m); } m = m_gethdr(M_WAITOK, MT_DATA); greq = mtod(m, struct cpl_set_tcb_field *); m->m_len = m->m_pkthdr.len = sizeof(*greq); memset(greq, 0, sizeof(*greq)); greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0)); greq->mask = htobe64(1); t3_mgmt_tx(adap, m); i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1); t3_tp_set_offload_mode(adap, 0); return (i); } /** * setup_rss - configure Receive Side Steering (per-queue connection demux) * @adap: the adapter * * Sets up RSS to distribute packets to multiple receive queues. We * configure the RSS CPU lookup table to distribute to the number of HW * receive queues, and the response queue lookup table to narrow that * down to the response queues actually configured for each port. * We always configure the RSS mapping for two ports since the mapping * table has plenty of entries. */ static void setup_rss(adapter_t *adap) { int i; u_int nq[2]; uint8_t cpus[SGE_QSETS + 1]; uint16_t rspq_map[RSS_TABLE_SIZE]; for (i = 0; i < SGE_QSETS; ++i) cpus[i] = i; cpus[SGE_QSETS] = 0xff; nq[0] = nq[1] = 0; for_each_port(adap, i) { const struct port_info *pi = adap2pinfo(adap, i); nq[pi->tx_chan] += pi->nqsets; } for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) { rspq_map[i] = nq[0] ? i % nq[0] : 0; rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0; } /* Calculate the reverse RSS map table */ for (i = 0; i < SGE_QSETS; ++i) adap->rrss_map[i] = 0xff; for (i = 0; i < RSS_TABLE_SIZE; ++i) if (adap->rrss_map[rspq_map[i]] == 0xff) adap->rrss_map[rspq_map[i]] = i; t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN | F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN | F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ, cpus, rspq_map); } static void send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo, int hi, int port) { struct mbuf *m; struct mngt_pktsched_wr *req; m = m_gethdr(M_NOWAIT, MT_DATA); if (m) { req = mtod(m, struct mngt_pktsched_wr *); req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT)); req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET; req->sched = sched; req->idx = qidx; req->min = lo; req->max = hi; req->binding = port; m->m_len = m->m_pkthdr.len = sizeof(*req); t3_mgmt_tx(adap, m); } } static void bind_qsets(adapter_t *sc) { int i, j; for (i = 0; i < (sc)->params.nports; ++i) { const struct port_info *pi = adap2pinfo(sc, i); for (j = 0; j < pi->nqsets; ++j) { send_pktsched_cmd(sc, 1, pi->first_qset + j, -1, -1, pi->tx_chan); } } } static void update_tpeeprom(struct adapter *adap) { const struct firmware *tpeeprom; uint32_t version; unsigned int major, minor; int ret, len; char rev, name[32]; t3_seeprom_read(adap, TP_SRAM_OFFSET, &version); major = G_TP_VERSION_MAJOR(version); minor = G_TP_VERSION_MINOR(version); if (major == TP_VERSION_MAJOR && minor == TP_VERSION_MINOR) return; rev = t3rev2char(adap); snprintf(name, sizeof(name), TPEEPROM_NAME, rev); tpeeprom = firmware_get(name); if (tpeeprom == NULL) { device_printf(adap->dev, "could not load TP EEPROM: unable to load %s\n", name); return; } len = tpeeprom->datasize - 4; ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize); if (ret) goto release_tpeeprom; if (len != TP_SRAM_LEN) { device_printf(adap->dev, "%s length is wrong len=%d expected=%d\n", name, len, TP_SRAM_LEN); return; } ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize, TP_SRAM_OFFSET); if (!ret) { device_printf(adap->dev, "Protocol SRAM image updated in EEPROM to %d.%d.%d\n", TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO); } else device_printf(adap->dev, "Protocol SRAM image update in EEPROM failed\n"); release_tpeeprom: firmware_put(tpeeprom, FIRMWARE_UNLOAD); return; } static int update_tpsram(struct adapter *adap) { const struct firmware *tpsram; int ret; char rev, name[32]; rev = t3rev2char(adap); snprintf(name, sizeof(name), TPSRAM_NAME, rev); update_tpeeprom(adap); tpsram = firmware_get(name); if (tpsram == NULL){ device_printf(adap->dev, "could not load TP SRAM\n"); return (EINVAL); } else device_printf(adap->dev, "updating TP SRAM\n"); ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize); if (ret) goto release_tpsram; ret = t3_set_proto_sram(adap, tpsram->data); if (ret) device_printf(adap->dev, "loading protocol SRAM failed\n"); release_tpsram: firmware_put(tpsram, FIRMWARE_UNLOAD); return ret; } /** * cxgb_up - enable the adapter * @adap: adapter being enabled * * Called when the first port is enabled, this function performs the * actions necessary to make an adapter operational, such as completing * the initialization of HW modules, and enabling interrupts. */ static int cxgb_up(struct adapter *sc) { int err = 0; unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS; KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)", __func__, sc->open_device_map)); if ((sc->flags & FULL_INIT_DONE) == 0) { ADAPTER_LOCK_ASSERT_NOTOWNED(sc); if ((sc->flags & FW_UPTODATE) == 0) if ((err = upgrade_fw(sc))) goto out; if ((sc->flags & TPS_UPTODATE) == 0) if ((err = update_tpsram(sc))) goto out; if (is_offload(sc) && nfilters != 0) { sc->params.mc5.nservers = 0; if (nfilters < 0) sc->params.mc5.nfilters = mxf; else sc->params.mc5.nfilters = min(nfilters, mxf); } err = t3_init_hw(sc, 0); if (err) goto out; t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT); t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12)); err = setup_sge_qsets(sc); if (err) goto out; alloc_filters(sc); setup_rss(sc); t3_add_configured_sysctls(sc); sc->flags |= FULL_INIT_DONE; } t3_intr_clear(sc); t3_sge_start(sc); t3_intr_enable(sc); if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) && is_offload(sc) && init_tp_parity(sc) == 0) sc->flags |= TP_PARITY_INIT; if (sc->flags & TP_PARITY_INIT) { t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR); t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff); } if (!(sc->flags & QUEUES_BOUND)) { bind_qsets(sc); setup_hw_filters(sc); sc->flags |= QUEUES_BOUND; } t3_sge_reset_adapter(sc); out: return (err); } /* * Called when the last open device is closed. Does NOT undo all of cxgb_up's * work. Specifically, the resources grabbed under FULL_INIT_DONE are released * during controller_detach, not here. */ static void cxgb_down(struct adapter *sc) { t3_sge_stop(sc); t3_intr_disable(sc); } /* * if_init for cxgb ports. */ static void cxgb_init(void *arg) { struct port_info *p = arg; struct adapter *sc = p->adapter; ADAPTER_LOCK(sc); cxgb_init_locked(p); /* releases adapter lock */ ADAPTER_LOCK_ASSERT_NOTOWNED(sc); } static int cxgb_init_locked(struct port_info *p) { struct adapter *sc = p->adapter; struct ifnet *ifp = p->ifp; struct cmac *mac = &p->mac; int i, rc = 0, may_sleep = 0, gave_up_lock = 0; ADAPTER_LOCK_ASSERT_OWNED(sc); while (!IS_DOOMED(p) && IS_BUSY(sc)) { gave_up_lock = 1; if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) { rc = EINTR; goto done; } } if (IS_DOOMED(p)) { rc = ENXIO; goto done; } KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); /* * The code that runs during one-time adapter initialization can sleep * so it's important not to hold any locks across it. */ may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1; if (may_sleep) { SET_BUSY(sc); gave_up_lock = 1; ADAPTER_UNLOCK(sc); } if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0)) goto done; PORT_LOCK(p); if (isset(&sc->open_device_map, p->port_id) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { PORT_UNLOCK(p); goto done; } t3_port_intr_enable(sc, p->port_id); if (!mac->multiport) t3_mac_init(mac); cxgb_update_mac_settings(p); t3_link_start(&p->phy, mac, &p->link_config); t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; PORT_UNLOCK(p); for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) { struct sge_qset *qs = &sc->sge.qs[i]; struct sge_txq *txq = &qs->txq[TXQ_ETH]; callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs, txq->txq_watchdog.c_cpu); } /* all ok */ setbit(&sc->open_device_map, p->port_id); callout_reset(&p->link_check_ch, p->phy.caps & SUPPORTED_LINK_IRQ ? hz * 3 : hz / 4, link_check_callout, p); done: if (may_sleep) { ADAPTER_LOCK(sc); KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); CLR_BUSY(sc); } if (gave_up_lock) wakeup_one(&sc->flags); ADAPTER_UNLOCK(sc); return (rc); } static int cxgb_uninit_locked(struct port_info *p) { struct adapter *sc = p->adapter; int rc; ADAPTER_LOCK_ASSERT_OWNED(sc); while (!IS_DOOMED(p) && IS_BUSY(sc)) { if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) { rc = EINTR; goto done; } } if (IS_DOOMED(p)) { rc = ENXIO; goto done; } KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); SET_BUSY(sc); ADAPTER_UNLOCK(sc); rc = cxgb_uninit_synchronized(p); ADAPTER_LOCK(sc); KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); CLR_BUSY(sc); wakeup_one(&sc->flags); done: ADAPTER_UNLOCK(sc); return (rc); } /* * Called on "ifconfig down", and from port_detach */ static int cxgb_uninit_synchronized(struct port_info *pi) { struct adapter *sc = pi->adapter; struct ifnet *ifp = pi->ifp; /* * taskqueue_drain may cause a deadlock if the adapter lock is held. */ ADAPTER_LOCK_ASSERT_NOTOWNED(sc); /* * Clear this port's bit from the open device map, and then drain all * the tasks that can access/manipulate this port's port_info or ifp. * We disable this port's interrupts here and so the slow/ext * interrupt tasks won't be enqueued. The tick task will continue to * be enqueued every second but the runs after this drain will not see * this port in the open device map. * * A well behaved task must take open_device_map into account and ignore * ports that are not open. */ clrbit(&sc->open_device_map, pi->port_id); t3_port_intr_disable(sc, pi->port_id); taskqueue_drain(sc->tq, &sc->slow_intr_task); taskqueue_drain(sc->tq, &sc->tick_task); callout_drain(&pi->link_check_ch); taskqueue_drain(sc->tq, &pi->link_check_task); PORT_LOCK(pi); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); /* disable pause frames */ t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0); /* Reset RX FIFO HWM */ t3_set_reg_field(sc, A_XGM_RXFIFO_CFG + pi->mac.offset, V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0); DELAY(100 * 1000); /* Wait for TXFIFO empty */ t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset, F_TXFIFO_EMPTY, 1, 20, 5); DELAY(100 * 1000); t3_mac_disable(&pi->mac, MAC_DIRECTION_RX); pi->phy.ops->power_down(&pi->phy, 1); PORT_UNLOCK(pi); pi->link_config.link_ok = 0; t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0); if (sc->open_device_map == 0) cxgb_down(pi->adapter); return (0); } /* * Mark lro enabled or disabled in all qsets for this port */ static int cxgb_set_lro(struct port_info *p, int enabled) { int i; struct adapter *adp = p->adapter; struct sge_qset *q; for (i = 0; i < p->nqsets; i++) { q = &adp->sge.qs[p->first_qset + i]; q->lro.enabled = (enabled != 0); } return (0); } static int cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data) { struct port_info *p = ifp->if_softc; struct adapter *sc = p->adapter; struct ifreq *ifr = (struct ifreq *)data; int flags, error = 0, mtu; uint32_t mask; switch (command) { case SIOCSIFMTU: ADAPTER_LOCK(sc); error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); if (error) { fail: ADAPTER_UNLOCK(sc); return (error); } mtu = ifr->ifr_mtu; if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) { error = EINVAL; } else { ifp->if_mtu = mtu; PORT_LOCK(p); cxgb_update_mac_settings(p); PORT_UNLOCK(p); } ADAPTER_UNLOCK(sc); break; case SIOCSIFFLAGS: ADAPTER_LOCK(sc); if (IS_DOOMED(p)) { error = ENXIO; goto fail; } if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { flags = p->if_flags; if (((ifp->if_flags ^ flags) & IFF_PROMISC) || ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) { if (IS_BUSY(sc)) { error = EBUSY; goto fail; } PORT_LOCK(p); cxgb_update_mac_settings(p); PORT_UNLOCK(p); } ADAPTER_UNLOCK(sc); } else error = cxgb_init_locked(p); p->if_flags = ifp->if_flags; } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) error = cxgb_uninit_locked(p); else ADAPTER_UNLOCK(sc); ADAPTER_LOCK_ASSERT_NOTOWNED(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: ADAPTER_LOCK(sc); error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); if (error) goto fail; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { PORT_LOCK(p); cxgb_update_mac_settings(p); PORT_UNLOCK(p); } ADAPTER_UNLOCK(sc); break; case SIOCSIFCAP: ADAPTER_LOCK(sc); error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); if (error) goto fail; mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO4; if_printf(ifp, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & ifp->if_capenable && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO6; if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; /* * Note that we leave CSUM_TSO alone (it is always set). The * kernel takes both IFCAP_TSOx and CSUM_TSO into account before * sending a TSO request our way, so it's sufficient to toggle * IFCAP_TSOx only. */ if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & ifp->if_capenable) && !(IFCAP_TXCSUM & ifp->if_capenable)) { if_printf(ifp, "enable txcsum first.\n"); error = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO4; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & ifp->if_capenable) && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { if_printf(ifp, "enable txcsum6 first.\n"); error = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO6; } if (mask & IFCAP_LRO) { ifp->if_capenable ^= IFCAP_LRO; /* Safe to do this even if cxgb_up not called yet */ cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO); } #ifdef TCP_OFFLOAD if (mask & IFCAP_TOE4) { int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4; error = toe_capability(p, enable); if (error == 0) ifp->if_capenable ^= mask; } #endif if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { PORT_LOCK(p); cxgb_update_mac_settings(p); PORT_UNLOCK(p); } } if (mask & IFCAP_VLAN_MTU) { ifp->if_capenable ^= IFCAP_VLAN_MTU; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { PORT_LOCK(p); cxgb_update_mac_settings(p); PORT_UNLOCK(p); } } if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_VLAN_HWCSUM) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; #ifdef VLAN_CAPABILITIES VLAN_CAPABILITIES(ifp); #endif ADAPTER_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &p->media, command); break; default: error = ether_ioctl(ifp, command, data); } return (error); } static int cxgb_media_change(struct ifnet *ifp) { return (EOPNOTSUPP); } /* * Translates phy->modtype to the correct Ethernet media subtype. */ static int cxgb_ifm_type(int mod) { switch (mod) { case phy_modtype_sr: return (IFM_10G_SR); case phy_modtype_lr: return (IFM_10G_LR); case phy_modtype_lrm: return (IFM_10G_LRM); case phy_modtype_twinax: return (IFM_10G_TWINAX); case phy_modtype_twinax_long: return (IFM_10G_TWINAX_LONG); case phy_modtype_none: return (IFM_NONE); case phy_modtype_unknown: return (IFM_UNKNOWN); } KASSERT(0, ("%s: modtype %d unknown", __func__, mod)); return (IFM_UNKNOWN); } /* * Rebuilds the ifmedia list for this port, and sets the current media. */ static void cxgb_build_medialist(struct port_info *p) { struct cphy *phy = &p->phy; struct ifmedia *media = &p->media; int mod = phy->modtype; int m = IFM_ETHER | IFM_FDX; PORT_LOCK(p); ifmedia_removeall(media); if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) { /* Copper (RJ45) */ if (phy->caps & SUPPORTED_10000baseT_Full) ifmedia_add(media, m | IFM_10G_T, mod, NULL); if (phy->caps & SUPPORTED_1000baseT_Full) ifmedia_add(media, m | IFM_1000_T, mod, NULL); if (phy->caps & SUPPORTED_100baseT_Full) ifmedia_add(media, m | IFM_100_TX, mod, NULL); if (phy->caps & SUPPORTED_10baseT_Full) ifmedia_add(media, m | IFM_10_T, mod, NULL); ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL); ifmedia_set(media, IFM_ETHER | IFM_AUTO); } else if (phy->caps & SUPPORTED_TP) { /* Copper (CX4) */ KASSERT(phy->caps & SUPPORTED_10000baseT_Full, ("%s: unexpected cap 0x%x", __func__, phy->caps)); ifmedia_add(media, m | IFM_10G_CX4, mod, NULL); ifmedia_set(media, m | IFM_10G_CX4); } else if (phy->caps & SUPPORTED_FIBRE && phy->caps & SUPPORTED_10000baseT_Full) { /* 10G optical (but includes SFP+ twinax) */ m |= cxgb_ifm_type(mod); if (IFM_SUBTYPE(m) == IFM_NONE) m &= ~IFM_FDX; ifmedia_add(media, m, mod, NULL); ifmedia_set(media, m); } else if (phy->caps & SUPPORTED_FIBRE && phy->caps & SUPPORTED_1000baseT_Full) { /* 1G optical */ /* XXX: Lie and claim to be SX, could actually be any 1G-X */ ifmedia_add(media, m | IFM_1000_SX, mod, NULL); ifmedia_set(media, m | IFM_1000_SX); } else { KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__, phy->caps)); } PORT_UNLOCK(p); } static void cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct port_info *p = ifp->if_softc; struct ifmedia_entry *cur = p->media.ifm_cur; int speed = p->link_config.speed; if (cur->ifm_data != p->phy.modtype) { cxgb_build_medialist(p); cur = p->media.ifm_cur; } ifmr->ifm_status = IFM_AVALID; if (!p->link_config.link_ok) return; ifmr->ifm_status |= IFM_ACTIVE; /* * active and current will differ iff current media is autoselect. That * can happen only for copper RJ45. */ if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO) return; KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg, ("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps)); ifmr->ifm_active = IFM_ETHER | IFM_FDX; if (speed == SPEED_10000) ifmr->ifm_active |= IFM_10G_T; else if (speed == SPEED_1000) ifmr->ifm_active |= IFM_1000_T; else if (speed == SPEED_100) ifmr->ifm_active |= IFM_100_TX; else if (speed == SPEED_10) ifmr->ifm_active |= IFM_10_T; else KASSERT(0, ("%s: link up but speed unknown (%u)", __func__, speed)); } static uint64_t cxgb_get_counter(struct ifnet *ifp, ift_counter c) { struct port_info *pi = ifp->if_softc; struct adapter *sc = pi->adapter; struct cmac *mac = &pi->mac; struct mac_stats *mstats = &mac->stats; cxgb_refresh_stats(pi); switch (c) { case IFCOUNTER_IPACKETS: return (mstats->rx_frames); case IFCOUNTER_IERRORS: return (mstats->rx_jabber + mstats->rx_data_errs + mstats->rx_sequence_errs + mstats->rx_runt + mstats->rx_too_long + mstats->rx_mac_internal_errs + mstats->rx_short + mstats->rx_fcs_errs); case IFCOUNTER_OPACKETS: return (mstats->tx_frames); case IFCOUNTER_OERRORS: return (mstats->tx_excess_collisions + mstats->tx_underrun + mstats->tx_len_errs + mstats->tx_mac_internal_errs + mstats->tx_excess_deferral + mstats->tx_fcs_errs); case IFCOUNTER_COLLISIONS: return (mstats->tx_total_collisions); case IFCOUNTER_IBYTES: return (mstats->rx_octets); case IFCOUNTER_OBYTES: return (mstats->tx_octets); case IFCOUNTER_IMCASTS: return (mstats->rx_mcast_frames); case IFCOUNTER_OMCASTS: return (mstats->tx_mcast_frames); case IFCOUNTER_IQDROPS: return (mstats->rx_cong_drops); case IFCOUNTER_OQDROPS: { int i; uint64_t drops; drops = 0; if (sc->flags & FULL_INIT_DONE) { for (i = pi->first_qset; i < pi->first_qset + pi->nqsets; i++) drops += sc->sge.qs[i].txq[TXQ_ETH].txq_mr->br_drops; } return (drops); } default: return (if_get_counter_default(ifp, c)); } } static void cxgb_async_intr(void *data) { adapter_t *sc = data; t3_write_reg(sc, A_PL_INT_ENABLE0, 0); (void) t3_read_reg(sc, A_PL_INT_ENABLE0); taskqueue_enqueue(sc->tq, &sc->slow_intr_task); } static void link_check_callout(void *arg) { struct port_info *pi = arg; struct adapter *sc = pi->adapter; if (!isset(&sc->open_device_map, pi->port_id)) return; taskqueue_enqueue(sc->tq, &pi->link_check_task); } static void check_link_status(void *arg, int pending) { struct port_info *pi = arg; struct adapter *sc = pi->adapter; if (!isset(&sc->open_device_map, pi->port_id)) return; t3_link_changed(sc, pi->port_id); if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ) || pi->link_config.link_ok == 0) callout_reset(&pi->link_check_ch, hz, link_check_callout, pi); } void t3_os_link_intr(struct port_info *pi) { /* * Schedule a link check in the near future. If the link is flapping * rapidly we'll keep resetting the callout and delaying the check until * things stabilize a bit. */ callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi); } static void check_t3b2_mac(struct adapter *sc) { int i; if (sc->flags & CXGB_SHUTDOWN) return; for_each_port(sc, i) { struct port_info *p = &sc->port[i]; int status; #ifdef INVARIANTS struct ifnet *ifp = p->ifp; #endif if (!isset(&sc->open_device_map, p->port_id) || p->link_fault || !p->link_config.link_ok) continue; KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING, ("%s: state mismatch (drv_flags %x, device_map %x)", __func__, ifp->if_drv_flags, sc->open_device_map)); PORT_LOCK(p); status = t3b2_mac_watchdog_task(&p->mac); if (status == 1) p->mac.stats.num_toggled++; else if (status == 2) { struct cmac *mac = &p->mac; cxgb_update_mac_settings(p); t3_link_start(&p->phy, mac, &p->link_config); t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX); t3_port_intr_enable(sc, p->port_id); p->mac.stats.num_resets++; } PORT_UNLOCK(p); } } static void cxgb_tick(void *arg) { adapter_t *sc = (adapter_t *)arg; if (sc->flags & CXGB_SHUTDOWN) return; taskqueue_enqueue(sc->tq, &sc->tick_task); callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc); } void cxgb_refresh_stats(struct port_info *pi) { struct timeval tv; const struct timeval interval = {0, 250000}; /* 250ms */ getmicrotime(&tv); timevalsub(&tv, &interval); if (timevalcmp(&tv, &pi->last_refreshed, <)) return; PORT_LOCK(pi); t3_mac_update_stats(&pi->mac); PORT_UNLOCK(pi); getmicrotime(&pi->last_refreshed); } static void cxgb_tick_handler(void *arg, int count) { adapter_t *sc = (adapter_t *)arg; const struct adapter_params *p = &sc->params; int i; uint32_t cause, reset; if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE)) return; if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map) check_t3b2_mac(sc); cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY); if (cause) { struct sge_qset *qs = &sc->sge.qs[0]; uint32_t mask, v; v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00; mask = 1; for (i = 0; i < SGE_QSETS; i++) { if (v & mask) qs[i].rspq.starved++; mask <<= 1; } mask <<= SGE_QSETS; /* skip RSPQXDISABLED */ for (i = 0; i < SGE_QSETS * 2; i++) { if (v & mask) { qs[i / 2].fl[i % 2].empty++; } mask <<= 1; } /* clear */ t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v); t3_write_reg(sc, A_SG_INT_CAUSE, cause); } for (i = 0; i < sc->params.nports; i++) { struct port_info *pi = &sc->port[i]; struct cmac *mac = &pi->mac; if (!isset(&sc->open_device_map, pi->port_id)) continue; cxgb_refresh_stats(pi); if (mac->multiport) continue; /* Count rx fifo overflows, once per second */ cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset); reset = 0; if (cause & F_RXFIFO_OVERFLOW) { mac->stats.rx_fifo_ovfl++; reset |= F_RXFIFO_OVERFLOW; } t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset); } } static void touch_bars(device_t dev) { /* * Don't enable yet */ #if !defined(__LP64__) && 0 u32 v; pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v); pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v); pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v); pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v); pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v); pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v); #endif } static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset) { uint8_t *buf; int err = 0; u32 aligned_offset, aligned_len, *p; struct adapter *adapter = pi->adapter; aligned_offset = offset & ~3; aligned_len = (len + (offset & 3) + 3) & ~3; if (aligned_offset != offset || aligned_len != len) { buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO); if (!buf) return (ENOMEM); err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf); if (!err && aligned_len > 4) err = t3_seeprom_read(adapter, aligned_offset + aligned_len - 4, (u32 *)&buf[aligned_len - 4]); if (err) goto out; memcpy(buf + (offset & 3), data, len); } else buf = (uint8_t *)(uintptr_t)data; err = t3_seeprom_wp(adapter, 0); if (err) goto out; for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) { err = t3_seeprom_write(adapter, aligned_offset, *p); aligned_offset += 4; } if (!err) err = t3_seeprom_wp(adapter, 1); out: if (buf != data) free(buf, M_DEVBUF); return err; } static int in_range(int val, int lo, int hi) { return val < 0 || (val <= hi && val >= lo); } static int cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td) { return (0); } static int cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td) { return (0); } static int cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, struct thread *td) { int mmd, error = 0; struct port_info *pi = dev->si_drv1; adapter_t *sc = pi->adapter; #ifdef PRIV_SUPPORTED if (priv_check(td, PRIV_DRIVER)) { if (cxgb_debug) printf("user does not have access to privileged ioctls\n"); return (EPERM); } #else if (suser(td)) { if (cxgb_debug) printf("user does not have access to privileged ioctls\n"); return (EPERM); } #endif switch (cmd) { case CHELSIO_GET_MIIREG: { uint32_t val; struct cphy *phy = &pi->phy; struct ch_mii_data *mid = (struct ch_mii_data *)data; if (!phy->mdio_read) return (EOPNOTSUPP); if (is_10G(sc)) { mmd = mid->phy_id >> 8; if (!mmd) mmd = MDIO_DEV_PCS; else if (mmd > MDIO_DEV_VEND2) return (EINVAL); error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd, mid->reg_num, &val); } else error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0, mid->reg_num & 0x1f, &val); if (error == 0) mid->val_out = val; break; } case CHELSIO_SET_MIIREG: { struct cphy *phy = &pi->phy; struct ch_mii_data *mid = (struct ch_mii_data *)data; if (!phy->mdio_write) return (EOPNOTSUPP); if (is_10G(sc)) { mmd = mid->phy_id >> 8; if (!mmd) mmd = MDIO_DEV_PCS; else if (mmd > MDIO_DEV_VEND2) return (EINVAL); error = phy->mdio_write(sc, mid->phy_id & 0x1f, mmd, mid->reg_num, mid->val_in); } else error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0, mid->reg_num & 0x1f, mid->val_in); break; } case CHELSIO_SETREG: { struct ch_reg *edata = (struct ch_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); t3_write_reg(sc, edata->addr, edata->val); break; } case CHELSIO_GETREG: { struct ch_reg *edata = (struct ch_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); edata->val = t3_read_reg(sc, edata->addr); break; } case CHELSIO_GET_SGE_CONTEXT: { struct ch_cntxt *ecntxt = (struct ch_cntxt *)data; mtx_lock_spin(&sc->sge.reg_lock); switch (ecntxt->cntxt_type) { case CNTXT_TYPE_EGRESS: error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id, ecntxt->data); break; case CNTXT_TYPE_FL: error = -t3_sge_read_fl(sc, ecntxt->cntxt_id, ecntxt->data); break; case CNTXT_TYPE_RSP: error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id, ecntxt->data); break; case CNTXT_TYPE_CQ: error = -t3_sge_read_cq(sc, ecntxt->cntxt_id, ecntxt->data); break; default: error = EINVAL; break; } mtx_unlock_spin(&sc->sge.reg_lock); break; } case CHELSIO_GET_SGE_DESC: { struct ch_desc *edesc = (struct ch_desc *)data; int ret; if (edesc->queue_num >= SGE_QSETS * 6) return (EINVAL); ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6], edesc->queue_num % 6, edesc->idx, edesc->data); if (ret < 0) return (EINVAL); edesc->size = ret; break; } case CHELSIO_GET_QSET_PARAMS: { struct qset_params *q; struct ch_qset_params *t = (struct ch_qset_params *)data; int q1 = pi->first_qset; int nqsets = pi->nqsets; int i; if (t->qset_idx >= nqsets) return EINVAL; i = q1 + t->qset_idx; q = &sc->params.sge.qset[i]; t->rspq_size = q->rspq_size; t->txq_size[0] = q->txq_size[0]; t->txq_size[1] = q->txq_size[1]; t->txq_size[2] = q->txq_size[2]; t->fl_size[0] = q->fl_size; t->fl_size[1] = q->jumbo_size; t->polling = q->polling; t->lro = q->lro; t->intr_lat = q->coalesce_usecs; t->cong_thres = q->cong_thres; t->qnum = i; if ((sc->flags & FULL_INIT_DONE) == 0) t->vector = 0; else if (sc->flags & USING_MSIX) t->vector = rman_get_start(sc->msix_irq_res[i]); else t->vector = rman_get_start(sc->irq_res); break; } case CHELSIO_GET_QSET_NUM: { struct ch_reg *edata = (struct ch_reg *)data; edata->val = pi->nqsets; break; } case CHELSIO_LOAD_FW: { uint8_t *fw_data; uint32_t vers; struct ch_mem_range *t = (struct ch_mem_range *)data; /* * You're allowed to load a firmware only before FULL_INIT_DONE * * FW_UPTODATE is also set so the rest of the initialization * will not overwrite what was loaded here. This gives you the * flexibility to load any firmware (and maybe shoot yourself in * the foot). */ ADAPTER_LOCK(sc); if (sc->open_device_map || sc->flags & FULL_INIT_DONE) { ADAPTER_UNLOCK(sc); return (EBUSY); } fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT); if (!fw_data) error = ENOMEM; else error = copyin(t->buf, fw_data, t->len); if (!error) error = -t3_load_fw(sc, fw_data, t->len); if (t3_get_fw_version(sc, &vers) == 0) { snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d", G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers)); } if (!error) sc->flags |= FW_UPTODATE; free(fw_data, M_DEVBUF); ADAPTER_UNLOCK(sc); break; } case CHELSIO_LOAD_BOOT: { uint8_t *boot_data; struct ch_mem_range *t = (struct ch_mem_range *)data; boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT); if (!boot_data) return ENOMEM; error = copyin(t->buf, boot_data, t->len); if (!error) error = -t3_load_boot(sc, boot_data, t->len); free(boot_data, M_DEVBUF); break; } case CHELSIO_GET_PM: { struct ch_pm *m = (struct ch_pm *)data; struct tp_params *p = &sc->params.tp; if (!is_offload(sc)) return (EOPNOTSUPP); m->tx_pg_sz = p->tx_pg_size; m->tx_num_pg = p->tx_num_pgs; m->rx_pg_sz = p->rx_pg_size; m->rx_num_pg = p->rx_num_pgs; m->pm_total = p->pmtx_size + p->chan_rx_size * p->nchan; break; } case CHELSIO_SET_PM: { struct ch_pm *m = (struct ch_pm *)data; struct tp_params *p = &sc->params.tp; if (!is_offload(sc)) return (EOPNOTSUPP); if (sc->flags & FULL_INIT_DONE) return (EBUSY); if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) || !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1))) return (EINVAL); /* not power of 2 */ if (!(m->rx_pg_sz & 0x14000)) return (EINVAL); /* not 16KB or 64KB */ if (!(m->tx_pg_sz & 0x1554000)) return (EINVAL); if (m->tx_num_pg == -1) m->tx_num_pg = p->tx_num_pgs; if (m->rx_num_pg == -1) m->rx_num_pg = p->rx_num_pgs; if (m->tx_num_pg % 24 || m->rx_num_pg % 24) return (EINVAL); if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size || m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size) return (EINVAL); p->rx_pg_size = m->rx_pg_sz; p->tx_pg_size = m->tx_pg_sz; p->rx_num_pgs = m->rx_num_pg; p->tx_num_pgs = m->tx_num_pg; break; } case CHELSIO_SETMTUTAB: { struct ch_mtus *m = (struct ch_mtus *)data; int i; if (!is_offload(sc)) return (EOPNOTSUPP); if (offload_running(sc)) return (EBUSY); if (m->nmtus != NMTUS) return (EINVAL); if (m->mtus[0] < 81) /* accommodate SACK */ return (EINVAL); /* * MTUs must be in ascending order */ for (i = 1; i < NMTUS; ++i) if (m->mtus[i] < m->mtus[i - 1]) return (EINVAL); memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus)); break; } case CHELSIO_GETMTUTAB: { struct ch_mtus *m = (struct ch_mtus *)data; if (!is_offload(sc)) return (EOPNOTSUPP); memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus)); m->nmtus = NMTUS; break; } case CHELSIO_GET_MEM: { struct ch_mem_range *t = (struct ch_mem_range *)data; struct mc7 *mem; uint8_t *useraddr; u64 buf[32]; /* * Use these to avoid modifying len/addr in the return * struct */ uint32_t len = t->len, addr = t->addr; if (!is_offload(sc)) return (EOPNOTSUPP); if (!(sc->flags & FULL_INIT_DONE)) return (EIO); /* need the memory controllers */ if ((addr & 0x7) || (len & 0x7)) return (EINVAL); if (t->mem_id == MEM_CM) mem = &sc->cm; else if (t->mem_id == MEM_PMRX) mem = &sc->pmrx; else if (t->mem_id == MEM_PMTX) mem = &sc->pmtx; else return (EINVAL); /* * Version scheme: * bits 0..9: chip version * bits 10..15: chip revision */ t->version = 3 | (sc->params.rev << 10); /* * Read 256 bytes at a time as len can be large and we don't * want to use huge intermediate buffers. */ useraddr = (uint8_t *)t->buf; while (len) { unsigned int chunk = min(len, sizeof(buf)); error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf); if (error) return (-error); if (copyout(buf, useraddr, chunk)) return (EFAULT); useraddr += chunk; addr += chunk; len -= chunk; } break; } case CHELSIO_READ_TCAM_WORD: { struct ch_tcam_word *t = (struct ch_tcam_word *)data; if (!is_offload(sc)) return (EOPNOTSUPP); if (!(sc->flags & FULL_INIT_DONE)) return (EIO); /* need MC5 */ return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf); break; } case CHELSIO_SET_TRACE_FILTER: { struct ch_trace *t = (struct ch_trace *)data; const struct trace_params *tp; tp = (const struct trace_params *)&t->sip; if (t->config_tx) t3_config_trace_filter(sc, tp, 0, t->invert_match, t->trace_tx); if (t->config_rx) t3_config_trace_filter(sc, tp, 1, t->invert_match, t->trace_rx); break; } case CHELSIO_SET_PKTSCHED: { struct ch_pktsched_params *p = (struct ch_pktsched_params *)data; if (sc->open_device_map == 0) return (EAGAIN); send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max, p->binding); break; } case CHELSIO_IFCONF_GETREGS: { struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data; int reglen = cxgb_get_regs_len(); uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT); if (buf == NULL) { return (ENOMEM); } if (regs->len > reglen) regs->len = reglen; else if (regs->len < reglen) error = ENOBUFS; if (!error) { cxgb_get_regs(sc, regs, buf); error = copyout(buf, regs->data, reglen); } free(buf, M_DEVBUF); break; } case CHELSIO_SET_HW_SCHED: { struct ch_hw_sched *t = (struct ch_hw_sched *)data; unsigned int ticks_per_usec = core_ticks_per_usec(sc); if ((sc->flags & FULL_INIT_DONE) == 0) return (EAGAIN); /* need TP to be initialized */ if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) || !in_range(t->channel, 0, 1) || !in_range(t->kbps, 0, 10000000) || !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) || !in_range(t->flow_ipg, 0, dack_ticks_to_usec(sc, 0x7ff))) return (EINVAL); if (t->kbps >= 0) { error = t3_config_sched(sc, t->kbps, t->sched); if (error < 0) return (-error); } if (t->class_ipg >= 0) t3_set_sched_ipg(sc, t->sched, t->class_ipg); if (t->flow_ipg >= 0) { t->flow_ipg *= 1000; /* us -> ns */ t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1); } if (t->mode >= 0) { int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched); t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP, bit, t->mode ? bit : 0); } if (t->channel >= 0) t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP, 1 << t->sched, t->channel << t->sched); break; } case CHELSIO_GET_EEPROM: { int i; struct ch_eeprom *e = (struct ch_eeprom *)data; uint8_t *buf; if (e->offset & 3 || e->offset >= EEPROMSIZE || e->len > EEPROMSIZE || e->offset + e->len > EEPROMSIZE) { return (EINVAL); } buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT); if (buf == NULL) { return (ENOMEM); } e->magic = EEPROM_MAGIC; for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4) error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]); if (!error) error = copyout(buf + e->offset, e->data, e->len); free(buf, M_DEVBUF); break; } case CHELSIO_CLEAR_STATS: { if (!(sc->flags & FULL_INIT_DONE)) return EAGAIN; PORT_LOCK(pi); t3_mac_update_stats(&pi->mac); memset(&pi->mac.stats, 0, sizeof(pi->mac.stats)); PORT_UNLOCK(pi); break; } case CHELSIO_GET_UP_LA: { struct ch_up_la *la = (struct ch_up_la *)data; uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT); if (buf == NULL) { return (ENOMEM); } if (la->bufsize < LA_BUFSIZE) error = ENOBUFS; if (!error) error = -t3_get_up_la(sc, &la->stopped, &la->idx, &la->bufsize, buf); if (!error) error = copyout(buf, la->data, la->bufsize); free(buf, M_DEVBUF); break; } case CHELSIO_GET_UP_IOQS: { struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data; uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT); uint32_t *v; if (buf == NULL) { return (ENOMEM); } if (ioqs->bufsize < IOQS_BUFSIZE) error = ENOBUFS; if (!error) error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf); if (!error) { v = (uint32_t *)buf; ioqs->ioq_rx_enable = *v++; ioqs->ioq_tx_enable = *v++; ioqs->ioq_rx_status = *v++; ioqs->ioq_tx_status = *v++; error = copyout(v, ioqs->data, ioqs->bufsize); } free(buf, M_DEVBUF); break; } case CHELSIO_SET_FILTER: { struct ch_filter *f = (struct ch_filter *)data; struct filter_info *p; unsigned int nfilters = sc->params.mc5.nfilters; if (!is_offload(sc)) return (EOPNOTSUPP); /* No TCAM */ if (!(sc->flags & FULL_INIT_DONE)) return (EAGAIN); /* mc5 not setup yet */ if (nfilters == 0) return (EBUSY); /* TOE will use TCAM */ /* sanity checks */ if (f->filter_id >= nfilters || (f->val.dip && f->mask.dip != 0xffffffff) || (f->val.sport && f->mask.sport != 0xffff) || (f->val.dport && f->mask.dport != 0xffff) || (f->val.vlan && f->mask.vlan != 0xfff) || (f->val.vlan_prio && f->mask.vlan_prio != FILTER_NO_VLAN_PRI) || (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) || f->qset >= SGE_QSETS || sc->rrss_map[f->qset] >= RSS_TABLE_SIZE) return (EINVAL); /* Was allocated with M_WAITOK */ KASSERT(sc->filters, ("filter table NULL\n")); p = &sc->filters[f->filter_id]; if (p->locked) return (EPERM); bzero(p, sizeof(*p)); p->sip = f->val.sip; p->sip_mask = f->mask.sip; p->dip = f->val.dip; p->sport = f->val.sport; p->dport = f->val.dport; p->vlan = f->mask.vlan ? f->val.vlan : 0xfff; p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) : FILTER_NO_VLAN_PRI; p->mac_hit = f->mac_hit; p->mac_vld = f->mac_addr_idx != 0xffff; p->mac_idx = f->mac_addr_idx; p->pkt_type = f->proto; p->report_filter_id = f->want_filter_id; p->pass = f->pass; p->rss = f->rss; p->qset = f->qset; error = set_filter(sc, f->filter_id, p); if (error == 0) p->valid = 1; break; } case CHELSIO_DEL_FILTER: { struct ch_filter *f = (struct ch_filter *)data; struct filter_info *p; unsigned int nfilters = sc->params.mc5.nfilters; if (!is_offload(sc)) return (EOPNOTSUPP); if (!(sc->flags & FULL_INIT_DONE)) return (EAGAIN); if (nfilters == 0 || sc->filters == NULL) return (EINVAL); if (f->filter_id >= nfilters) return (EINVAL); p = &sc->filters[f->filter_id]; if (p->locked) return (EPERM); if (!p->valid) return (EFAULT); /* Read "Bad address" as "Bad index" */ bzero(p, sizeof(*p)); p->sip = p->sip_mask = 0xffffffff; p->vlan = 0xfff; p->vlan_prio = FILTER_NO_VLAN_PRI; p->pkt_type = 1; error = set_filter(sc, f->filter_id, p); break; } case CHELSIO_GET_FILTER: { struct ch_filter *f = (struct ch_filter *)data; struct filter_info *p; unsigned int i, nfilters = sc->params.mc5.nfilters; if (!is_offload(sc)) return (EOPNOTSUPP); if (!(sc->flags & FULL_INIT_DONE)) return (EAGAIN); if (nfilters == 0 || sc->filters == NULL) return (EINVAL); i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1; for (; i < nfilters; i++) { p = &sc->filters[i]; if (!p->valid) continue; bzero(f, sizeof(*f)); f->filter_id = i; f->val.sip = p->sip; f->mask.sip = p->sip_mask; f->val.dip = p->dip; f->mask.dip = p->dip ? 0xffffffff : 0; f->val.sport = p->sport; f->mask.sport = p->sport ? 0xffff : 0; f->val.dport = p->dport; f->mask.dport = p->dport ? 0xffff : 0; f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan; f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff; f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ? 0 : p->vlan_prio; f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ? 0 : FILTER_NO_VLAN_PRI; f->mac_hit = p->mac_hit; f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff; f->proto = p->pkt_type; f->want_filter_id = p->report_filter_id; f->pass = p->pass; f->rss = p->rss; f->qset = p->qset; break; } if (i == nfilters) f->filter_id = 0xffffffff; break; } default: return (EOPNOTSUPP); break; } return (error); } static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start, unsigned int end) { uint32_t *p = (uint32_t *)(buf + start); for ( ; start <= end; start += sizeof(uint32_t)) *p++ = t3_read_reg(ap, start); } #define T3_REGMAP_SIZE (3 * 1024) static int cxgb_get_regs_len(void) { return T3_REGMAP_SIZE; } static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf) { /* * Version scheme: * bits 0..9: chip version * bits 10..15: chip revision * bit 31: set for PCIe cards */ regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31); /* * We skip the MAC statistics registers because they are clear-on-read. * Also reading multi-register stats would need to synchronize with the * periodic mac stats accumulation. Hard to justify the complexity. */ memset(buf, 0, cxgb_get_regs_len()); reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN); reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT); reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE); reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA); reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3); reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0, XGM_REG(A_XGM_SERDES_STAT3, 1)); reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1), XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1)); } static int alloc_filters(struct adapter *sc) { struct filter_info *p; unsigned int nfilters = sc->params.mc5.nfilters; if (nfilters == 0) return (0); p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO); sc->filters = p; p = &sc->filters[nfilters - 1]; p->vlan = 0xfff; p->vlan_prio = FILTER_NO_VLAN_PRI; p->pass = p->rss = p->valid = p->locked = 1; return (0); } static int setup_hw_filters(struct adapter *sc) { int i, rc; unsigned int nfilters = sc->params.mc5.nfilters; if (!sc->filters) return (0); t3_enable_filters(sc); for (i = rc = 0; i < nfilters && !rc; i++) { if (sc->filters[i].locked) rc = set_filter(sc, i, &sc->filters[i]); } return (rc); } static int set_filter(struct adapter *sc, int id, const struct filter_info *f) { int len; struct mbuf *m; struct ulp_txpkt *txpkt; struct work_request_hdr *wr; struct cpl_pass_open_req *oreq; struct cpl_set_tcb_field *sreq; len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq); KASSERT(len <= MHLEN, ("filter request too big for an mbuf")); id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes - sc->params.mc5.nfilters; m = m_gethdr(M_WAITOK, MT_DATA); m->m_len = m->m_pkthdr.len = len; bzero(mtod(m, char *), len); wr = mtod(m, struct work_request_hdr *); wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC); oreq = (struct cpl_pass_open_req *)(wr + 1); txpkt = (struct ulp_txpkt *)oreq; txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8)); OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id)); oreq->local_port = htons(f->dport); oreq->peer_port = htons(f->sport); oreq->local_ip = htonl(f->dip); oreq->peer_ip = htonl(f->sip); oreq->peer_netmask = htonl(f->sip_mask); oreq->opt0h = 0; oreq->opt0l = htonl(F_NO_OFFLOAD); oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) | V_CONN_POLICY(CPL_CONN_POLICY_FILTER) | V_VLAN_PRI(f->vlan_prio >> 1) | V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) | V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) | V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4))); sreq = (struct cpl_set_tcb_field *)(oreq + 1); set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL, (f->report_filter_id << 15) | (1 << 23) | ((u64)f->pass << 35) | ((u64)!f->rss << 36)); set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1); t3_mgmt_tx(sc, m); if (f->pass && !f->rss) { len = sizeof(*sreq); m = m_gethdr(M_WAITOK, MT_DATA); m->m_len = m->m_pkthdr.len = len; bzero(mtod(m, char *), len); sreq = mtod(m, struct cpl_set_tcb_field *); sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); mk_set_tcb_field(sreq, id, 25, 0x3f80000, (u64)sc->rrss_map[f->qset] << 19); t3_mgmt_tx(sc, m); } return 0; } static inline void mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid, unsigned int word, u64 mask, u64 val) { OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); req->reply = V_NO_REPLY(1); req->cpu_idx = 0; req->word = htons(word); req->mask = htobe64(mask); req->val = htobe64(val); } static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid, unsigned int word, u64 mask, u64 val) { struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); mk_set_tcb_field(req, tid, word, mask, val); } void t3_iterate(void (*func)(struct adapter *, void *), void *arg) { struct adapter *sc; mtx_lock(&t3_list_lock); SLIST_FOREACH(sc, &t3_list, link) { /* * func should not make any assumptions about what state sc is * in - the only guarantee is that sc->sc_lock is a valid lock. */ func(sc, arg); } mtx_unlock(&t3_list_lock); } #ifdef TCP_OFFLOAD static int toe_capability(struct port_info *pi, int enable) { int rc; struct adapter *sc = pi->adapter; ADAPTER_LOCK_ASSERT_OWNED(sc); if (!is_offload(sc)) return (ENODEV); if (enable) { if (!(sc->flags & FULL_INIT_DONE)) { log(LOG_WARNING, "You must enable a cxgb interface first\n"); return (EAGAIN); } if (isset(&sc->offload_map, pi->port_id)) return (0); if (!(sc->flags & TOM_INIT_DONE)) { rc = t3_activate_uld(sc, ULD_TOM); if (rc == EAGAIN) { log(LOG_WARNING, "You must kldload t3_tom.ko before trying " "to enable TOE on a cxgb interface.\n"); } if (rc != 0) return (rc); KASSERT(sc->tom_softc != NULL, ("%s: TOM activated but softc NULL", __func__)); KASSERT(sc->flags & TOM_INIT_DONE, ("%s: TOM activated but flag not set", __func__)); } setbit(&sc->offload_map, pi->port_id); /* * XXX: Temporary code to allow iWARP to be enabled when TOE is * enabled on any port. Need to figure out how to enable, * disable, load, and unload iWARP cleanly. */ if (!isset(&sc->offload_map, MAX_NPORTS) && t3_activate_uld(sc, ULD_IWARP) == 0) setbit(&sc->offload_map, MAX_NPORTS); } else { if (!isset(&sc->offload_map, pi->port_id)) return (0); KASSERT(sc->flags & TOM_INIT_DONE, ("%s: TOM never initialized?", __func__)); clrbit(&sc->offload_map, pi->port_id); } return (0); } /* * Add an upper layer driver to the global list. */ int t3_register_uld(struct uld_info *ui) { int rc = 0; struct uld_info *u; mtx_lock(&t3_uld_list_lock); SLIST_FOREACH(u, &t3_uld_list, link) { if (u->uld_id == ui->uld_id) { rc = EEXIST; goto done; } } SLIST_INSERT_HEAD(&t3_uld_list, ui, link); ui->refcount = 0; done: mtx_unlock(&t3_uld_list_lock); return (rc); } int t3_unregister_uld(struct uld_info *ui) { int rc = EINVAL; struct uld_info *u; mtx_lock(&t3_uld_list_lock); SLIST_FOREACH(u, &t3_uld_list, link) { if (u == ui) { if (ui->refcount > 0) { rc = EBUSY; goto done; } SLIST_REMOVE(&t3_uld_list, ui, uld_info, link); rc = 0; goto done; } } done: mtx_unlock(&t3_uld_list_lock); return (rc); } int t3_activate_uld(struct adapter *sc, int id) { int rc = EAGAIN; struct uld_info *ui; mtx_lock(&t3_uld_list_lock); SLIST_FOREACH(ui, &t3_uld_list, link) { if (ui->uld_id == id) { rc = ui->activate(sc); if (rc == 0) ui->refcount++; goto done; } } done: mtx_unlock(&t3_uld_list_lock); return (rc); } int t3_deactivate_uld(struct adapter *sc, int id) { int rc = EINVAL; struct uld_info *ui; mtx_lock(&t3_uld_list_lock); SLIST_FOREACH(ui, &t3_uld_list, link) { if (ui->uld_id == id) { rc = ui->deactivate(sc); if (rc == 0) ui->refcount--; goto done; } } done: mtx_unlock(&t3_uld_list_lock); return (rc); } static int cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused, struct mbuf *m) { m_freem(m); return (EDOOFUS); } int t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h) { uintptr_t *loc, new; if (opcode >= NUM_CPL_HANDLERS) return (EINVAL); new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled; loc = (uintptr_t *) &sc->cpl_handler[opcode]; atomic_store_rel_ptr(loc, new); return (0); } #endif static int cxgbc_mod_event(module_t mod, int cmd, void *arg) { int rc = 0; switch (cmd) { case MOD_LOAD: mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF); SLIST_INIT(&t3_list); #ifdef TCP_OFFLOAD mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF); SLIST_INIT(&t3_uld_list); #endif break; case MOD_UNLOAD: #ifdef TCP_OFFLOAD mtx_lock(&t3_uld_list_lock); if (!SLIST_EMPTY(&t3_uld_list)) { rc = EBUSY; mtx_unlock(&t3_uld_list_lock); break; } mtx_unlock(&t3_uld_list_lock); mtx_destroy(&t3_uld_list_lock); #endif mtx_lock(&t3_list_lock); if (!SLIST_EMPTY(&t3_list)) { rc = EBUSY; mtx_unlock(&t3_list_lock); break; } mtx_unlock(&t3_list_lock); mtx_destroy(&t3_list_lock); break; } return (rc); } + +#ifdef NETDUMP +static void +cxgb_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) +{ + struct port_info *pi; + adapter_t *adap; + + pi = if_getsoftc(ifp); + adap = pi->adapter; + ADAPTER_LOCK(adap); + *nrxr = SGE_QSETS; + *ncl = adap->sge.qs[0].fl[1].size; + *clsize = adap->sge.qs[0].fl[1].buf_size; + ADAPTER_UNLOCK(adap); +} + +static void +cxgb_netdump_event(struct ifnet *ifp, enum netdump_ev event) +{ + struct port_info *pi; + struct sge_qset *qs; + int i; + + pi = if_getsoftc(ifp); + if (event == NETDUMP_START) + for (i = 0; i < SGE_QSETS; i++) { + qs = &pi->adapter->sge.qs[i]; + + /* Need to reinit after netdump_mbuf_dump(). */ + qs->fl[0].zone = zone_pack; + qs->fl[1].zone = zone_clust; + qs->lro.enabled = 0; + } +} + +static int +cxgb_netdump_transmit(struct ifnet *ifp, struct mbuf *m) +{ + struct port_info *pi; + struct sge_qset *qs; + + pi = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return (ENOENT); + + qs = &pi->adapter->sge.qs[pi->first_qset]; + return (cxgb_netdump_encap(qs, &m)); +} + +static int +cxgb_netdump_poll(struct ifnet *ifp, int count) +{ + struct port_info *pi; + adapter_t *adap; + int i; + + pi = if_getsoftc(ifp); + if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) + return (ENOENT); + + adap = pi->adapter; + for (i = 0; i < SGE_QSETS; i++) + (void)cxgb_netdump_poll_rx(adap, &adap->sge.qs[i]); + (void)cxgb_netdump_poll_tx(&adap->sge.qs[pi->first_qset]); + return (0); +} +#endif /* NETDUMP */ Index: head/sys/dev/cxgb/cxgb_sge.c =================================================================== --- head/sys/dev/cxgb/cxgb_sge.c (revision 333287) +++ head/sys/dev/cxgb/cxgb_sge.c (revision 333288) @@ -1,3707 +1,3741 @@ /************************************************************************** SPDX-License-Identifier: BSD-2-Clause-FreeBSD Copyright (c) 2007-2009, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int txq_fills = 0; int multiq_tx_enable = 1; #ifdef TCP_OFFLOAD CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS); #endif extern struct sysctl_oid_list sysctl__hw_cxgb_children; int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, "size of per-queue mbuf ring"); static int cxgb_tx_coalesce_force = 0; SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RWTUN, &cxgb_tx_coalesce_force, 0, "coalesce small packets into a single work request regardless of ring state"); #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RWTUN, &cxgb_tx_coalesce_enable_start, 0, "coalesce enable threshold"); static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RWTUN, &cxgb_tx_coalesce_enable_stop, 0, "coalesce disable threshold"); static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RWTUN, &cxgb_tx_reclaim_threshold, 0, "tx cleaning minimum threshold"); /* * XXX don't re-enable this until TOE stops assuming * we have an m_ext */ static int recycle_enable = 0; extern int cxgb_use_16k_clusters; extern int nmbjumbop; extern int nmbjumbo9; extern int nmbjumbo16; #define USE_GTS 0 #define SGE_RX_SM_BUF_SIZE 1536 #define SGE_RX_DROP_THRES 16 #define SGE_RX_COPY_THRES 128 /* * Period of the Tx buffer reclaim timer. This timer does not need to run * frequently as Tx buffers are usually reclaimed by new Tx packets. */ #define TX_RECLAIM_PERIOD (hz >> 1) /* * Values for sge_txq.flags */ enum { TXQ_RUNNING = 1 << 0, /* fetch engine is running */ TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ }; struct tx_desc { uint64_t flit[TX_DESC_FLITS]; } __packed; struct rx_desc { uint32_t addr_lo; uint32_t len_gen; uint32_t gen2; uint32_t addr_hi; } __packed; struct rsp_desc { /* response queue descriptor */ struct rss_header rss_hdr; uint32_t flags; uint32_t len_cq; uint8_t imm_data[47]; uint8_t intr_gen; } __packed; #define RX_SW_DESC_MAP_CREATED (1 << 0) #define TX_SW_DESC_MAP_CREATED (1 << 1) #define RX_SW_DESC_INUSE (1 << 3) #define TX_SW_DESC_MAPPED (1 << 4) #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) struct tx_sw_desc { /* SW state per Tx descriptor */ struct mbuf *m; bus_dmamap_t map; int flags; }; struct rx_sw_desc { /* SW state per Rx descriptor */ caddr_t rxsd_cl; struct mbuf *m; bus_dmamap_t map; int flags; }; struct txq_state { unsigned int compl; unsigned int gen; unsigned int pidx; }; struct refill_fl_cb_arg { int error; bus_dma_segment_t seg; int nseg; }; /* * Maps a number of flits to the number of Tx descriptors that can hold them. * The formula is * * desc = 1 + (flits - 2) / (WR_FLITS - 1). * * HW allows up to 4 descriptors to be combined into a WR. */ static uint8_t flit_desc_map[] = { 0, #if SGE_NUM_GENBITS == 1 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 #elif SGE_NUM_GENBITS == 2 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, #else # error "SGE_NUM_GENBITS must be 1 or 2" #endif }; #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) #define TXQ_RING_NEEDS_ENQUEUE(qs) \ drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) #define TXQ_RING_DEQUEUE(qs) \ drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) int cxgb_debug = 0; static void sge_timer_cb(void *arg); static void sge_timer_reclaim(void *arg, int ncount); static void sge_txq_reclaim_handler(void *arg, int ncount); static void cxgb_start_locked(struct sge_qset *qs); /* * XXX need to cope with bursty scheduling by looking at a wider * window than we are now for determining the need for coalescing * */ static __inline uint64_t check_pkt_coalesce(struct sge_qset *qs) { struct adapter *sc; struct sge_txq *txq; uint8_t *fill; if (__predict_false(cxgb_tx_coalesce_force)) return (1); txq = &qs->txq[TXQ_ETH]; sc = qs->port->adapter; fill = &sc->tunq_fill[qs->idx]; if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; /* * if the hardware transmit queue is more than 1/8 full * we mark it as coalescing - we drop back from coalescing * when we go below 1/32 full and there are no packets enqueued, * this provides us with some degree of hysteresis */ if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) *fill = 0; else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) *fill = 1; return (sc->tunq_coalesce); } #ifdef __LP64__ static void set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) { uint64_t wr_hilo; #if _BYTE_ORDER == _LITTLE_ENDIAN wr_hilo = wr_hi; wr_hilo |= (((uint64_t)wr_lo)<<32); #else wr_hilo = wr_lo; wr_hilo |= (((uint64_t)wr_hi)<<32); #endif wrp->wrh_hilo = wr_hilo; } #else static void set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) { wrp->wrh_hi = wr_hi; wmb(); wrp->wrh_lo = wr_lo; } #endif struct coalesce_info { int count; int nbytes; }; static int coalesce_check(struct mbuf *m, void *arg) { struct coalesce_info *ci = arg; int *count = &ci->count; int *nbytes = &ci->nbytes; if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) && (*count < 7) && (m->m_next == NULL))) { *count += 1; *nbytes += m->m_len; return (1); } return (0); } static struct mbuf * cxgb_dequeue(struct sge_qset *qs) { struct mbuf *m, *m_head, *m_tail; struct coalesce_info ci; if (check_pkt_coalesce(qs) == 0) return TXQ_RING_DEQUEUE(qs); m_head = m_tail = NULL; ci.count = ci.nbytes = 0; do { m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); if (m_head == NULL) { m_tail = m_head = m; } else if (m != NULL) { m_tail->m_nextpkt = m; m_tail = m; } } while (m != NULL); if (ci.count > 7) panic("trying to coalesce %d packets in to one WR", ci.count); return (m_head); } /** * reclaim_completed_tx - reclaims completed Tx descriptors * @adapter: the adapter * @q: the Tx queue to reclaim completed descriptors from * * Reclaims Tx descriptors that the SGE has indicated it has processed, * and frees the associated buffers if possible. Called with the Tx * queue's lock held. */ static __inline int reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) { struct sge_txq *q = &qs->txq[queue]; int reclaim = desc_reclaimable(q); if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; if (reclaim < reclaim_min) return (0); mtx_assert(&qs->lock, MA_OWNED); if (reclaim > 0) { t3_free_tx_desc(qs, reclaim, queue); q->cleaned += reclaim; q->in_use -= reclaim; } if (isset(&qs->txq_stopped, TXQ_ETH)) clrbit(&qs->txq_stopped, TXQ_ETH); return (reclaim); } +#ifdef NETDUMP +int +cxgb_netdump_poll_tx(struct sge_qset *qs) +{ + + return (reclaim_completed_tx(qs, TX_RECLAIM_MAX, TXQ_ETH)); +} +#endif + /** * should_restart_tx - are there enough resources to restart a Tx queue? * @q: the Tx queue * * Checks if there are enough descriptors to restart a suspended Tx queue. */ static __inline int should_restart_tx(const struct sge_txq *q) { unsigned int r = q->processed - q->cleaned; return q->in_use - r < (q->size >> 1); } /** * t3_sge_init - initialize SGE * @adap: the adapter * @p: the SGE parameters * * Performs SGE initialization needed every time after a chip reset. * We do not initialize any of the queue sets here, instead the driver * top-level must request those individually. We also do not enable DMA * here, that should be done after the queues have been set up. */ void t3_sge_init(adapter_t *adap, struct sge_params *p) { u_int ctrl, ups; ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; #if SGE_NUM_GENBITS == 1 ctrl |= F_EGRGENCTRL; #endif if (adap->params.rev > 0) { if (!(adap->flags & (USING_MSIX | USING_MSI))) ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; } t3_write_reg(adap, A_SG_CONTROL, ctrl); t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | V_LORCQDRBTHRSH(512)); t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | V_TIMEOUT(200 * core_ticks_per_usec(adap))); t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, adap->params.rev < T3_REV_C ? 1000 : 500); t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); } /** * sgl_len - calculates the size of an SGL of the given capacity * @n: the number of SGL entries * * Calculates the number of flits needed for a scatter/gather list that * can hold the given number of entries. */ static __inline unsigned int sgl_len(unsigned int n) { return ((3 * n) / 2 + (n & 1)); } /** * get_imm_packet - return the next ingress packet buffer from a response * @resp: the response descriptor containing the packet data * * Return a packet containing the immediate data of the given response. */ static int get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) { if (resp->rss_hdr.opcode == CPL_RX_DATA) { const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0]; m->m_len = sizeof(*cpl) + ntohs(cpl->len); } else if (resp->rss_hdr.opcode == CPL_RX_PKT) { const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0]; m->m_len = sizeof(*cpl) + ntohs(cpl->len); } else m->m_len = IMMED_PKT_SIZE; m->m_ext.ext_buf = NULL; m->m_ext.ext_type = 0; memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len); return (0); } static __inline u_int flits_to_desc(u_int n) { return (flit_desc_map[n]); } #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ F_HIRCQPARITYERROR) #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ F_RSPQDISABLED) /** * t3_sge_err_intr_handler - SGE async event interrupt handler * @adapter: the adapter * * Interrupt handler for SGE asynchronous (non-data) events. */ void t3_sge_err_intr_handler(adapter_t *adapter) { unsigned int v, status; status = t3_read_reg(adapter, A_SG_INT_CAUSE); if (status & SGE_PARERR) CH_ALERT(adapter, "SGE parity error (0x%x)\n", status & SGE_PARERR); if (status & SGE_FRAMINGERR) CH_ALERT(adapter, "SGE framing error (0x%x)\n", status & SGE_FRAMINGERR); if (status & F_RSPQCREDITOVERFOW) CH_ALERT(adapter, "SGE response queue credit overflow\n"); if (status & F_RSPQDISABLED) { v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); CH_ALERT(adapter, "packet delivered to disabled response queue (0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff); } t3_write_reg(adapter, A_SG_INT_CAUSE, status); if (status & SGE_FATALERR) t3_fatal_err(adapter); } void t3_sge_prep(adapter_t *adap, struct sge_params *p) { int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size; nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus); nqsets *= adap->params.nports; fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); while (!powerof2(fl_q_size)) fl_q_size--; use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters : is_offload(adap); #if __FreeBSD_version >= 700111 if (use_16k) { jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); jumbo_buf_size = MJUM16BYTES; } else { jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); jumbo_buf_size = MJUM9BYTES; } #else jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE); jumbo_buf_size = MJUMPAGESIZE; #endif while (!powerof2(jumbo_q_size)) jumbo_q_size--; if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2)) device_printf(adap->dev, "Insufficient clusters and/or jumbo buffers.\n"); p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data); for (i = 0; i < SGE_QSETS; ++i) { struct qset_params *q = p->qset + i; if (adap->params.nports > 2) { q->coalesce_usecs = 50; } else { #ifdef INVARIANTS q->coalesce_usecs = 10; #else q->coalesce_usecs = 5; #endif } q->polling = 0; q->rspq_size = RSPQ_Q_SIZE; q->fl_size = fl_q_size; q->jumbo_size = jumbo_q_size; q->jumbo_buf_size = jumbo_buf_size; q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16; q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE; q->cong_thres = 0; } } int t3_sge_alloc(adapter_t *sc) { /* The parent tag. */ if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ BUS_SPACE_UNRESTRICTED, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 0, /* flags */ NULL, NULL, /* lock, lockarg */ &sc->parent_dmat)) { device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); return (ENOMEM); } /* * DMA tag for normal sized RX frames */ if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); return (ENOMEM); } /* * DMA tag for jumbo sized RX frames. */ if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); return (ENOMEM); } /* * DMA tag for TX frames. */ if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, TX_MAX_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->tx_dmat)) { device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); return (ENOMEM); } return (0); } int t3_sge_free(struct adapter * sc) { if (sc->tx_dmat != NULL) bus_dma_tag_destroy(sc->tx_dmat); if (sc->rx_jumbo_dmat != NULL) bus_dma_tag_destroy(sc->rx_jumbo_dmat); if (sc->rx_dmat != NULL) bus_dma_tag_destroy(sc->rx_dmat); if (sc->parent_dmat != NULL) bus_dma_tag_destroy(sc->parent_dmat); return (0); } void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) { qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); qs->rspq.polling = 0 /* p->polling */; } #if !defined(__i386__) && !defined(__amd64__) static void refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct refill_fl_cb_arg *cb_arg = arg; cb_arg->error = error; cb_arg->seg = segs[0]; cb_arg->nseg = nseg; } #endif /** * refill_fl - refill an SGE free-buffer list * @sc: the controller softc * @q: the free-list to refill * @n: the number of new buffers to allocate * * (Re)populate an SGE free-buffer list with up to @n new packet buffers. * The caller must assure that @n does not exceed the queue's capacity. */ static void refill_fl(adapter_t *sc, struct sge_fl *q, int n) { struct rx_sw_desc *sd = &q->sdesc[q->pidx]; struct rx_desc *d = &q->desc[q->pidx]; struct refill_fl_cb_arg cb_arg; struct mbuf *m; caddr_t cl; int err; cb_arg.error = 0; while (n--) { /* * We allocate an uninitialized mbuf + cluster, mbuf is * initialized after rx. */ if (q->zone == zone_pack) { if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) break; cl = m->m_ext.ext_buf; } else { if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) break; if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { uma_zfree(q->zone, cl); break; } } if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); uma_zfree(q->zone, cl); goto done; } sd->flags |= RX_SW_DESC_MAP_CREATED; } #if !defined(__i386__) && !defined(__amd64__) err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, refill_fl_cb, &cb_arg, 0); if (err != 0 || cb_arg.error) { if (q->zone != zone_pack) uma_zfree(q->zone, cl); m_free(m); goto done; } #else cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); #endif sd->flags |= RX_SW_DESC_INUSE; sd->rxsd_cl = cl; sd->m = m; d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); d->len_gen = htobe32(V_FLD_GEN1(q->gen)); d->gen2 = htobe32(V_FLD_GEN2(q->gen)); d++; sd++; if (++q->pidx == q->size) { q->pidx = 0; q->gen ^= 1; sd = q->sdesc; d = q->desc; } q->credits++; q->db_pending++; } done: if (q->db_pending >= 32) { q->db_pending = 0; t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); } } /** * free_rx_bufs - free the Rx buffers on an SGE free list * @sc: the controle softc * @q: the SGE free list to clean up * * Release the buffers on an SGE free-buffer Rx queue. HW fetching from * this queue should be stopped before calling this function. */ static void free_rx_bufs(adapter_t *sc, struct sge_fl *q) { u_int cidx = q->cidx; while (q->credits--) { struct rx_sw_desc *d = &q->sdesc[cidx]; if (d->flags & RX_SW_DESC_INUSE) { bus_dmamap_unload(q->entry_tag, d->map); bus_dmamap_destroy(q->entry_tag, d->map); if (q->zone == zone_pack) { m_init(d->m, M_NOWAIT, MT_DATA, M_EXT); uma_zfree(zone_pack, d->m); } else { m_init(d->m, M_NOWAIT, MT_DATA, 0); uma_zfree(zone_mbuf, d->m); uma_zfree(q->zone, d->rxsd_cl); } } d->rxsd_cl = NULL; d->m = NULL; if (++cidx == q->size) cidx = 0; } } static __inline void __refill_fl(adapter_t *adap, struct sge_fl *fl) { refill_fl(adap, fl, min(16U, fl->size - fl->credits)); } static __inline void __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) { uint32_t reclaimable = fl->size - fl->credits; if (reclaimable > 0) refill_fl(adap, fl, min(max, reclaimable)); } /** * recycle_rx_buf - recycle a receive buffer * @adapter: the adapter * @q: the SGE free list * @idx: index of buffer to recycle * * Recycles the specified buffer on the given free list by adding it at * the next available slot on the list. */ static void recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) { struct rx_desc *from = &q->desc[idx]; struct rx_desc *to = &q->desc[q->pidx]; q->sdesc[q->pidx] = q->sdesc[idx]; to->addr_lo = from->addr_lo; // already big endian to->addr_hi = from->addr_hi; // likewise wmb(); /* necessary ? */ to->len_gen = htobe32(V_FLD_GEN1(q->gen)); to->gen2 = htobe32(V_FLD_GEN2(q->gen)); q->credits++; if (++q->pidx == q->size) { q->pidx = 0; q->gen ^= 1; } t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); } static void alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { uint32_t *addr; addr = arg; *addr = segs[0].ds_addr; } static int alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) { size_t len = nelem * elem_size; void *s = NULL; void *p = NULL; int err; if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag)) != 0) { device_printf(sc->dev, "Cannot allocate descriptor tag\n"); return (ENOMEM); } if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, map)) != 0) { device_printf(sc->dev, "Cannot allocate descriptor memory\n"); return (ENOMEM); } bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); bzero(p, len); *(void **)desc = p; if (sw_size) { len = nelem * sw_size; s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); *(void **)sdesc = s; } if (parent_entry_tag == NULL) return (0); if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, TX_MAX_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL, entry_tag)) != 0) { device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); return (ENOMEM); } return (0); } static void sge_slow_intr_handler(void *arg, int ncount) { adapter_t *sc = arg; t3_slow_intr_handler(sc); t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask); (void) t3_read_reg(sc, A_PL_INT_ENABLE0); } /** * sge_timer_cb - perform periodic maintenance of an SGE qset * @data: the SGE queue set to maintain * * Runs periodically from a timer to perform maintenance of an SGE queue * set. It performs two tasks: * * a) Cleans up any completed Tx descriptors that may still be pending. * Normal descriptor cleanup happens when new packets are added to a Tx * queue so this timer is relatively infrequent and does any cleanup only * if the Tx queue has not seen any new packets in a while. We make a * best effort attempt to reclaim descriptors, in that we don't wait * around if we cannot get a queue's lock (which most likely is because * someone else is queueing new packets and so will also handle the clean * up). Since control queues use immediate data exclusively we don't * bother cleaning them up here. * * b) Replenishes Rx queues that have run out due to memory shortage. * Normally new Rx buffers are added when existing ones are consumed but * when out of memory a queue can become empty. We try to add only a few * buffers here, the queue will be replenished fully as these new buffers * are used up if memory shortage has subsided. * * c) Return coalesced response queue credits in case a response queue is * starved. * * d) Ring doorbells for T304 tunnel queues since we have seen doorbell * fifo overflows and the FW doesn't implement any recovery scheme yet. */ static void sge_timer_cb(void *arg) { adapter_t *sc = arg; if ((sc->flags & USING_MSIX) == 0) { struct port_info *pi; struct sge_qset *qs; struct sge_txq *txq; int i, j; int reclaim_ofl, refill_rx; if (sc->open_device_map == 0) return; for (i = 0; i < sc->params.nports; i++) { pi = &sc->port[i]; for (j = 0; j < pi->nqsets; j++) { qs = &sc->sge.qs[pi->first_qset + j]; txq = &qs->txq[0]; reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || (qs->fl[1].credits < qs->fl[1].size)); if (reclaim_ofl || refill_rx) { taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); break; } } } } if (sc->params.nports > 2) { int i; for_each_port(sc, i) { struct port_info *pi = &sc->port[i]; t3_write_reg(sc, A_SG_KDOORBELL, F_SELEGRCNTX | (FW_TUNNEL_SGEEC_START + pi->first_qset)); } } if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && sc->open_device_map != 0) callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); } /* * This is meant to be a catch-all function to keep sge state private * to sge.c * */ int t3_sge_init_adapter(adapter_t *sc) { callout_init(&sc->sge_timer_ch, 1); callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); return (0); } int t3_sge_reset_adapter(adapter_t *sc) { callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); return (0); } int t3_sge_init_port(struct port_info *pi) { TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); return (0); } /** * refill_rspq - replenish an SGE response queue * @adapter: the adapter * @q: the response queue to replenish * @credits: how many new responses to make available * * Replenishes a response queue by making the supplied number of responses * available to HW. */ static __inline void refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) { /* mbufs are allocated on demand when a rspq entry is processed. */ t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); } static void sge_txq_reclaim_handler(void *arg, int ncount) { struct sge_qset *qs = arg; int i; for (i = 0; i < 3; i++) reclaim_completed_tx(qs, 16, i); } static void sge_timer_reclaim(void *arg, int ncount) { struct port_info *pi = arg; int i, nqsets = pi->nqsets; adapter_t *sc = pi->adapter; struct sge_qset *qs; struct mtx *lock; KASSERT((sc->flags & USING_MSIX) == 0, ("can't call timer reclaim for msi-x")); for (i = 0; i < nqsets; i++) { qs = &sc->sge.qs[pi->first_qset + i]; reclaim_completed_tx(qs, 16, TXQ_OFLD); lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : &sc->sge.qs[0].rspq.lock; if (mtx_trylock(lock)) { /* XXX currently assume that we are *NOT* polling */ uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); if (qs->fl[0].credits < qs->fl[0].size - 16) __refill_fl(sc, &qs->fl[0]); if (qs->fl[1].credits < qs->fl[1].size - 16) __refill_fl(sc, &qs->fl[1]); if (status & (1 << qs->rspq.cntxt_id)) { if (qs->rspq.credits) { refill_rspq(sc, &qs->rspq, 1); qs->rspq.credits--; t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1 << qs->rspq.cntxt_id); } } mtx_unlock(lock); } } } /** * init_qset_cntxt - initialize an SGE queue set context info * @qs: the queue set * @id: the queue set id * * Initializes the TIDs and context ids for the queues of a queue set. */ static void init_qset_cntxt(struct sge_qset *qs, u_int id) { qs->rspq.cntxt_id = id; qs->fl[0].cntxt_id = 2 * id; qs->fl[1].cntxt_id = 2 * id + 1; qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; /* XXX: a sane limit is needed instead of INT_MAX */ mbufq_init(&qs->txq[TXQ_ETH].sendq, INT_MAX); mbufq_init(&qs->txq[TXQ_OFLD].sendq, INT_MAX); mbufq_init(&qs->txq[TXQ_CTRL].sendq, INT_MAX); } static void txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) { txq->in_use += ndesc; /* * XXX we don't handle stopping of queue * presumably start handles this when we bump against the end */ txqs->gen = txq->gen; txq->unacked += ndesc; txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); txq->unacked &= 31; txqs->pidx = txq->pidx; txq->pidx += ndesc; #ifdef INVARIANTS if (((txqs->pidx > txq->cidx) && (txq->pidx < txqs->pidx) && (txq->pidx >= txq->cidx)) || ((txqs->pidx < txq->cidx) && (txq->pidx >= txq-> cidx)) || ((txqs->pidx < txq->cidx) && (txq->cidx < txqs->pidx))) panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", txqs->pidx, txq->pidx, txq->cidx); #endif if (txq->pidx >= txq->size) { txq->pidx -= txq->size; txq->gen ^= 1; } } /** * calc_tx_descs - calculate the number of Tx descriptors for a packet * @m: the packet mbufs * @nsegs: the number of segments * * Returns the number of Tx descriptors needed for the given Ethernet * packet. Ethernet packets require addition of WR and CPL headers. */ static __inline unsigned int calc_tx_descs(const struct mbuf *m, int nsegs) { unsigned int flits; if (m->m_pkthdr.len <= PIO_LEN) return 1; flits = sgl_len(nsegs) + 2; if (m->m_pkthdr.csum_flags & CSUM_TSO) flits++; return flits_to_desc(flits); } /** * make_sgl - populate a scatter/gather list for a packet * @sgp: the SGL to populate * @segs: the packet dma segments * @nsegs: the number of segments * * Generates a scatter/gather list for the buffers that make up a packet * and returns the SGL size in 8-byte words. The caller must size the SGL * appropriately. */ static __inline void make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) { int i, idx; for (idx = 0, i = 0; i < nsegs; i++) { /* * firmware doesn't like empty segments */ if (segs[i].ds_len == 0) continue; if (i && idx == 0) ++sgp; sgp->len[idx] = htobe32(segs[i].ds_len); sgp->addr[idx] = htobe64(segs[i].ds_addr); idx ^= 1; } if (idx) { sgp->len[idx] = 0; sgp->addr[idx] = 0; } } /** * check_ring_tx_db - check and potentially ring a Tx queue's doorbell * @adap: the adapter * @q: the Tx queue * * Ring the doorbell if a Tx queue is asleep. There is a natural race, * where the HW is going to sleep just after we checked, however, * then the interrupt handler will detect the outstanding TX packet * and ring the doorbell for us. * * When GTS is disabled we unconditionally ring the doorbell. */ static __inline void check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring) { #if USE_GTS clear_bit(TXQ_LAST_PKT_DB, &q->flags); if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { set_bit(TXQ_LAST_PKT_DB, &q->flags); #ifdef T3_TRACE T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", q->cntxt_id); #endif t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } #else if (mustring || ++q->db_pending >= 32) { wmb(); /* write descriptors before telling HW */ t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); q->db_pending = 0; } #endif } static __inline void wr_gen2(struct tx_desc *d, unsigned int gen) { #if SGE_NUM_GENBITS == 2 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); #endif } /** * write_wr_hdr_sgl - write a WR header and, optionally, SGL * @ndesc: number of Tx descriptors spanned by the SGL * @txd: first Tx descriptor to be written * @txqs: txq state (generation and producer index) * @txq: the SGE Tx queue * @sgl: the SGL * @flits: number of flits to the start of the SGL in the first descriptor * @sgl_flits: the SGL size in flits * @wr_hi: top 32 bits of WR header based on WR type (big endian) * @wr_lo: low 32 bits of WR header based on WR type (big endian) * * Write a work request header and an associated SGL. If the SGL is * small enough to fit into one Tx descriptor it has already been written * and we just need to write the WR header. Otherwise we distribute the * SGL across the number of descriptors it spans. */ static void write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) { struct work_request_hdr *wrp = (struct work_request_hdr *)txd; struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; if (__predict_true(ndesc == 1)) { set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | V_WR_SGLSFLT(flits)) | wr_hi, htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) | wr_lo); wr_gen2(txd, txqs->gen); } else { unsigned int ogen = txqs->gen; const uint64_t *fp = (const uint64_t *)sgl; struct work_request_hdr *wp = wrp; wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | V_WR_SGLSFLT(flits)) | wr_hi; while (sgl_flits) { unsigned int avail = WR_FLITS - flits; if (avail > sgl_flits) avail = sgl_flits; memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); sgl_flits -= avail; ndesc--; if (!sgl_flits) break; fp += avail; txd++; txsd++; if (++txqs->pidx == txq->size) { txqs->pidx = 0; txqs->gen ^= 1; txd = txq->desc; txsd = txq->sdesc; } /* * when the head of the mbuf chain * is freed all clusters will be freed * with it */ wrp = (struct work_request_hdr *)txd; wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | V_WR_SGLSFLT(1)) | wr_hi; wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, sgl_flits + 1)) | V_WR_GEN(txqs->gen)) | wr_lo; wr_gen2(txd, txqs->gen); flits = 1; } wrp->wrh_hi |= htonl(F_WR_EOP); wmb(); wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; wr_gen2((struct tx_desc *)wp, ogen); } } /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */ #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20) #define GET_VTAG(cntrl, m) \ do { \ if ((m)->m_flags & M_VLANTAG) \ cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ } while (0) static int t3_encap(struct sge_qset *qs, struct mbuf **m) { adapter_t *sc; struct mbuf *m0; struct sge_txq *txq; struct txq_state txqs; struct port_info *pi; unsigned int ndesc, flits, cntrl, mlen; int err, nsegs, tso_info = 0; struct work_request_hdr *wrp; struct tx_sw_desc *txsd; struct sg_ent *sgp, *sgl; uint32_t wr_hi, wr_lo, sgl_flits; bus_dma_segment_t segs[TX_MAX_SEGS]; struct tx_desc *txd; pi = qs->port; sc = pi->adapter; txq = &qs->txq[TXQ_ETH]; txd = &txq->desc[txq->pidx]; txsd = &txq->sdesc[txq->pidx]; sgl = txq->txq_sgl; prefetch(txd); m0 = *m; mtx_assert(&qs->lock, MA_OWNED); cntrl = V_TXPKT_INTF(pi->txpkt_intf); KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); if (m0->m_nextpkt == NULL && m0->m_next != NULL && m0->m_pkthdr.csum_flags & (CSUM_TSO)) tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); if (m0->m_nextpkt != NULL) { busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); ndesc = 1; mlen = 0; } else { if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, &m0, segs, &nsegs))) { if (cxgb_debug) printf("failed ... err=%d\n", err); return (err); } mlen = m0->m_pkthdr.len; ndesc = calc_tx_descs(m0, nsegs); } txq_prod(txq, ndesc, &txqs); KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); txsd->m = m0; if (m0->m_nextpkt != NULL) { struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; int i, fidx; if (nsegs > 7) panic("trying to coalesce %d packets in to one WR", nsegs); txq->txq_coalesced += nsegs; wrp = (struct work_request_hdr *)txd; flits = nsegs*2 + 1; for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { struct cpl_tx_pkt_batch_entry *cbe; uint64_t flit; uint32_t *hflit = (uint32_t *)&flit; int cflags = m0->m_pkthdr.csum_flags; cntrl = V_TXPKT_INTF(pi->txpkt_intf); GET_VTAG(cntrl, m0); cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); if (__predict_false(!(cflags & CSUM_IP))) cntrl |= F_TXPKT_IPCSUM_DIS; if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6)))) cntrl |= F_TXPKT_L4CSUM_DIS; hflit[0] = htonl(cntrl); hflit[1] = htonl(segs[i].ds_len | 0x80000000); flit |= htobe64(1 << 24); cbe = &cpl_batch->pkt_entry[i]; cbe->cntrl = hflit[0]; cbe->len = hflit[1]; cbe->addr = htobe64(segs[i].ds_addr); } wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); set_wr_hdr(wrp, wr_hi, wr_lo); wmb(); ETHER_BPF_MTAP(pi->ifp, m0); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq, 0); return (0); } else if (tso_info) { uint16_t eth_type; struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; struct ether_header *eh; void *l3hdr; struct tcphdr *tcp; txd->flit[2] = 0; GET_VTAG(cntrl, m0); cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); hdr->cntrl = htonl(cntrl); hdr->len = htonl(mlen | 0x80000000); if (__predict_false(mlen < TCPPKTHDRSIZE)) { printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x", m0, mlen, m0->m_pkthdr.tso_segsz, (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags); panic("tx tso packet too small"); } /* Make sure that ether, ip, tcp headers are all in m0 */ if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { m0 = m_pullup(m0, TCPPKTHDRSIZE); if (__predict_false(m0 == NULL)) { /* XXX panic probably an overreaction */ panic("couldn't fit header into mbuf"); } } eh = mtod(m0, struct ether_header *); eth_type = eh->ether_type; if (eth_type == htons(ETHERTYPE_VLAN)) { struct ether_vlan_header *evh = (void *)eh; tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN); l3hdr = evh + 1; eth_type = evh->evl_proto; } else { tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II); l3hdr = eh + 1; } if (eth_type == htons(ETHERTYPE_IP)) { struct ip *ip = l3hdr; tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl); tcp = (struct tcphdr *)(ip + 1); } else if (eth_type == htons(ETHERTYPE_IPV6)) { struct ip6_hdr *ip6 = l3hdr; KASSERT(ip6->ip6_nxt == IPPROTO_TCP, ("%s: CSUM_TSO with ip6_nxt %d", __func__, ip6->ip6_nxt)); tso_info |= F_LSO_IPV6; tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2); tcp = (struct tcphdr *)(ip6 + 1); } else panic("%s: CSUM_TSO but neither ip nor ip6", __func__); tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off); hdr->lso_info = htonl(tso_info); if (__predict_false(mlen <= PIO_LEN)) { /* * pkt not undersized but fits in PIO_LEN * Indicates a TSO bug at the higher levels. */ txsd->m = NULL; m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); flits = (mlen + 7) / 8 + 3; wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | F_WR_SOP | F_WR_EOP | txqs.compl); wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); set_wr_hdr(&hdr->wr, wr_hi, wr_lo); wmb(); ETHER_BPF_MTAP(pi->ifp, m0); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq, 0); m_freem(m0); return (0); } flits = 3; } else { struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; GET_VTAG(cntrl, m0); cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) cntrl |= F_TXPKT_IPCSUM_DIS; if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6)))) cntrl |= F_TXPKT_L4CSUM_DIS; cpl->cntrl = htonl(cntrl); cpl->len = htonl(mlen | 0x80000000); if (mlen <= PIO_LEN) { txsd->m = NULL; m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); flits = (mlen + 7) / 8 + 2; wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | F_WR_SOP | F_WR_EOP | txqs.compl); wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); set_wr_hdr(&cpl->wr, wr_hi, wr_lo); wmb(); ETHER_BPF_MTAP(pi->ifp, m0); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq, 0); m_freem(m0); return (0); } flits = 2; } wrp = (struct work_request_hdr *)txd; sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; make_sgl(sgp, segs, nsegs); sgl_flits = sgl_len(nsegs); ETHER_BPF_MTAP(pi->ifp, m0); KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); wr_lo = htonl(V_WR_TID(txq->token)); write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); check_ring_tx_db(sc, txq, 0); return (0); } +#ifdef NETDUMP +int +cxgb_netdump_encap(struct sge_qset *qs, struct mbuf **m) +{ + int error; + + error = t3_encap(qs, m); + if (error == 0) + check_ring_tx_db(qs->port->adapter, &qs->txq[TXQ_ETH], 1); + else if (*m != NULL) { + m_freem(*m); + *m = NULL; + } + return (error); +} +#endif + void cxgb_tx_watchdog(void *arg) { struct sge_qset *qs = arg; struct sge_txq *txq = &qs->txq[TXQ_ETH]; if (qs->coalescing != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && TXQ_RING_EMPTY(qs)) qs->coalescing = 0; else if (qs->coalescing == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) qs->coalescing = 1; if (TXQ_TRYLOCK(qs)) { qs->qs_flags |= QS_FLUSHING; cxgb_start_locked(qs); qs->qs_flags &= ~QS_FLUSHING; TXQ_UNLOCK(qs); } if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, qs, txq->txq_watchdog.c_cpu); } static void cxgb_tx_timeout(void *arg) { struct sge_qset *qs = arg; struct sge_txq *txq = &qs->txq[TXQ_ETH]; if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) qs->coalescing = 1; if (TXQ_TRYLOCK(qs)) { qs->qs_flags |= QS_TIMEOUT; cxgb_start_locked(qs); qs->qs_flags &= ~QS_TIMEOUT; TXQ_UNLOCK(qs); } } static void cxgb_start_locked(struct sge_qset *qs) { struct mbuf *m_head = NULL; struct sge_txq *txq = &qs->txq[TXQ_ETH]; struct port_info *pi = qs->port; struct ifnet *ifp = pi->ifp; if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) reclaim_completed_tx(qs, 0, TXQ_ETH); if (!pi->link_config.link_ok) { TXQ_RING_FLUSH(qs); return; } TXQ_LOCK_ASSERT(qs); while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) && pi->link_config.link_ok) { reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); if (txq->size - txq->in_use <= TX_MAX_DESC) break; if ((m_head = cxgb_dequeue(qs)) == NULL) break; /* * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. */ if (t3_encap(qs, &m_head) || m_head == NULL) break; m_head = NULL; } if (txq->db_pending) check_ring_tx_db(pi->adapter, txq, 1); if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && pi->link_config.link_ok) callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, qs, txq->txq_timer.c_cpu); if (m_head != NULL) m_freem(m_head); } static int cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) { struct port_info *pi = qs->port; struct sge_txq *txq = &qs->txq[TXQ_ETH]; struct buf_ring *br = txq->txq_mr; int error, avail; avail = txq->size - txq->in_use; TXQ_LOCK_ASSERT(qs); /* * We can only do a direct transmit if the following are true: * - we aren't coalescing (ring < 3/4 full) * - the link is up -- checked in caller * - there are no packets enqueued already * - there is space in hardware transmit queue */ if (check_pkt_coalesce(qs) == 0 && !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) { if (t3_encap(qs, &m)) { if (m != NULL && (error = drbr_enqueue(ifp, br, m)) != 0) return (error); } else { if (txq->db_pending) check_ring_tx_db(pi->adapter, txq, 1); /* * We've bypassed the buf ring so we need to update * the stats directly */ txq->txq_direct_packets++; txq->txq_direct_bytes += m->m_pkthdr.len; } } else if ((error = drbr_enqueue(ifp, br, m)) != 0) return (error); reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) cxgb_start_locked(qs); else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, qs, txq->txq_timer.c_cpu); return (0); } int cxgb_transmit(struct ifnet *ifp, struct mbuf *m) { struct sge_qset *qs; struct port_info *pi = ifp->if_softc; int error, qidx = pi->first_qset; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||(!pi->link_config.link_ok)) { m_freem(m); return (0); } /* check if flowid is set */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; qs = &pi->adapter->sge.qs[qidx]; if (TXQ_TRYLOCK(qs)) { /* XXX running */ error = cxgb_transmit_locked(ifp, qs, m); TXQ_UNLOCK(qs); } else error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); return (error); } void cxgb_qflush(struct ifnet *ifp) { /* * flush any enqueued mbufs in the buf_rings * and in the transmit queues * no-op for now */ return; } /** * write_imm - write a packet into a Tx descriptor as immediate data * @d: the Tx descriptor to write * @m: the packet * @len: the length of packet data to write as immediate data * @gen: the generation bit value to write * * Writes a packet as immediate data into a Tx descriptor. The packet * contains a work request at its beginning. We must write the packet * carefully so the SGE doesn't read accidentally before it's written in * its entirety. */ static __inline void write_imm(struct tx_desc *d, caddr_t src, unsigned int len, unsigned int gen) { struct work_request_hdr *from = (struct work_request_hdr *)src; struct work_request_hdr *to = (struct work_request_hdr *)d; uint32_t wr_hi, wr_lo; KASSERT(len <= WR_LEN && len >= sizeof(*from), ("%s: invalid len %d", __func__, len)); memcpy(&to[1], &from[1], len - sizeof(*from)); wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | V_WR_BCNTLFLT(len & 7)); wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8)); set_wr_hdr(to, wr_hi, wr_lo); wmb(); wr_gen2(d, gen); } /** * check_desc_avail - check descriptor availability on a send queue * @adap: the adapter * @q: the TX queue * @m: the packet needing the descriptors * @ndesc: the number of Tx descriptors needed * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) * * Checks if the requested number of Tx descriptors is available on an * SGE send queue. If the queue is already suspended or not enough * descriptors are available the packet is queued for later transmission. * Must be called with the Tx queue locked. * * Returns 0 if enough descriptors are available, 1 if there aren't * enough descriptors and the packet has been queued, and 2 if the caller * needs to retry because there weren't enough descriptors at the * beginning of the call but some freed up in the mean time. */ static __inline int check_desc_avail(adapter_t *adap, struct sge_txq *q, struct mbuf *m, unsigned int ndesc, unsigned int qid) { /* * XXX We currently only use this for checking the control queue * the control queue is only used for binding qsets which happens * at init time so we are guaranteed enough descriptors */ if (__predict_false(mbufq_len(&q->sendq))) { addq_exit: (void )mbufq_enqueue(&q->sendq, m); return 1; } if (__predict_false(q->size - q->in_use < ndesc)) { struct sge_qset *qs = txq_to_qset(q, qid); setbit(&qs->txq_stopped, qid); if (should_restart_tx(q) && test_and_clear_bit(qid, &qs->txq_stopped)) return 2; q->stops++; goto addq_exit; } return 0; } /** * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs * @q: the SGE control Tx queue * * This is a variant of reclaim_completed_tx() that is used for Tx queues * that send only immediate data (presently just the control queues) and * thus do not have any mbufs */ static __inline void reclaim_completed_tx_imm(struct sge_txq *q) { unsigned int reclaim = q->processed - q->cleaned; q->in_use -= reclaim; q->cleaned += reclaim; } /** * ctrl_xmit - send a packet through an SGE control Tx queue * @adap: the adapter * @q: the control queue * @m: the packet * * Send a packet through an SGE control Tx queue. Packets sent through * a control queue must fit entirely as immediate data in a single Tx * descriptor and have no page fragments. */ static int ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) { int ret; struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); struct sge_txq *q = &qs->txq[TXQ_CTRL]; KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__)); wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); wrp->wrh_lo = htonl(V_WR_TID(q->token)); TXQ_LOCK(qs); again: reclaim_completed_tx_imm(q); ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); if (__predict_false(ret)) { if (ret == 1) { TXQ_UNLOCK(qs); return (ENOSPC); } goto again; } write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen); q->in_use++; if (++q->pidx >= q->size) { q->pidx = 0; q->gen ^= 1; } TXQ_UNLOCK(qs); wmb(); t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); m_free(m); return (0); } /** * restart_ctrlq - restart a suspended control queue * @qs: the queue set cotaining the control queue * * Resumes transmission on a suspended Tx control queue. */ static void restart_ctrlq(void *data, int npending) { struct mbuf *m; struct sge_qset *qs = (struct sge_qset *)data; struct sge_txq *q = &qs->txq[TXQ_CTRL]; adapter_t *adap = qs->port->adapter; TXQ_LOCK(qs); again: reclaim_completed_tx_imm(q); while (q->in_use < q->size && (m = mbufq_dequeue(&q->sendq)) != NULL) { write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen); m_free(m); if (++q->pidx >= q->size) { q->pidx = 0; q->gen ^= 1; } q->in_use++; } if (mbufq_len(&q->sendq)) { setbit(&qs->txq_stopped, TXQ_CTRL); if (should_restart_tx(q) && test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) goto again; q->stops++; } TXQ_UNLOCK(qs); t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } /* * Send a management message through control queue 0 */ int t3_mgmt_tx(struct adapter *adap, struct mbuf *m) { return ctrl_xmit(adap, &adap->sge.qs[0], m); } /** * free_qset - free the resources of an SGE queue set * @sc: the controller owning the queue set * @q: the queue set * * Release the HW and SW resources associated with an SGE queue set, such * as HW contexts, packet buffers, and descriptor rings. Traffic to the * queue set must be quiesced prior to calling this. */ static void t3_free_qset(adapter_t *sc, struct sge_qset *q) { int i; reclaim_completed_tx(q, 0, TXQ_ETH); if (q->txq[TXQ_ETH].txq_mr != NULL) buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF); if (q->txq[TXQ_ETH].txq_ifq != NULL) { ifq_delete(q->txq[TXQ_ETH].txq_ifq); free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF); } for (i = 0; i < SGE_RXQ_PER_SET; ++i) { if (q->fl[i].desc) { mtx_lock_spin(&sc->sge.reg_lock); t3_sge_disable_fl(sc, q->fl[i].cntxt_id); mtx_unlock_spin(&sc->sge.reg_lock); bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, q->fl[i].desc_map); bus_dma_tag_destroy(q->fl[i].desc_tag); bus_dma_tag_destroy(q->fl[i].entry_tag); } if (q->fl[i].sdesc) { free_rx_bufs(sc, &q->fl[i]); free(q->fl[i].sdesc, M_DEVBUF); } } mtx_unlock(&q->lock); MTX_DESTROY(&q->lock); for (i = 0; i < SGE_TXQ_PER_SET; i++) { if (q->txq[i].desc) { mtx_lock_spin(&sc->sge.reg_lock); t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); mtx_unlock_spin(&sc->sge.reg_lock); bus_dmamap_unload(q->txq[i].desc_tag, q->txq[i].desc_map); bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, q->txq[i].desc_map); bus_dma_tag_destroy(q->txq[i].desc_tag); bus_dma_tag_destroy(q->txq[i].entry_tag); } if (q->txq[i].sdesc) { free(q->txq[i].sdesc, M_DEVBUF); } } if (q->rspq.desc) { mtx_lock_spin(&sc->sge.reg_lock); t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); mtx_unlock_spin(&sc->sge.reg_lock); bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, q->rspq.desc_map); bus_dma_tag_destroy(q->rspq.desc_tag); MTX_DESTROY(&q->rspq.lock); } #if defined(INET6) || defined(INET) tcp_lro_free(&q->lro.ctrl); #endif bzero(q, sizeof(*q)); } /** * t3_free_sge_resources - free SGE resources * @sc: the adapter softc * * Frees resources used by the SGE queue sets. */ void t3_free_sge_resources(adapter_t *sc, int nqsets) { int i; for (i = 0; i < nqsets; ++i) { TXQ_LOCK(&sc->sge.qs[i]); t3_free_qset(sc, &sc->sge.qs[i]); } } /** * t3_sge_start - enable SGE * @sc: the controller softc * * Enables the SGE for DMAs. This is the last step in starting packet * transfers. */ void t3_sge_start(adapter_t *sc) { t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); } /** * t3_sge_stop - disable SGE operation * @sc: the adapter * * Disables the DMA engine. This can be called in emeregencies (e.g., * from error interrupts) or from normal process context. In the latter * case it also disables any pending queue restart tasklets. Note that * if it is called in interrupt context it cannot disable the restart * tasklets as it cannot wait, however the tasklets will have no effect * since the doorbells are disabled and the driver will call this again * later from process context, at which time the tasklets will be stopped * if they are still running. */ void t3_sge_stop(adapter_t *sc) { int i, nqsets; t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); if (sc->tq == NULL) return; for (nqsets = i = 0; i < (sc)->params.nports; i++) nqsets += sc->port[i].nqsets; #ifdef notyet /* * * XXX */ for (i = 0; i < nqsets; ++i) { struct sge_qset *qs = &sc->sge.qs[i]; taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); } #endif } /** * t3_free_tx_desc - reclaims Tx descriptors and their buffers * @adapter: the adapter * @q: the Tx queue to reclaim descriptors from * @reclaimable: the number of descriptors to reclaim * @m_vec_size: maximum number of buffers to reclaim * @desc_reclaimed: returns the number of descriptors reclaimed * * Reclaims Tx descriptors from an SGE Tx queue and frees the associated * Tx buffers. Called with the Tx queue lock held. * * Returns number of buffers of reclaimed */ void t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) { struct tx_sw_desc *txsd; unsigned int cidx, mask; struct sge_txq *q = &qs->txq[queue]; #ifdef T3_TRACE T3_TRACE2(sc->tb[q->cntxt_id & 7], "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); #endif cidx = q->cidx; mask = q->size - 1; txsd = &q->sdesc[cidx]; mtx_assert(&qs->lock, MA_OWNED); while (reclaimable--) { prefetch(q->sdesc[(cidx + 1) & mask].m); prefetch(q->sdesc[(cidx + 2) & mask].m); if (txsd->m != NULL) { if (txsd->flags & TX_SW_DESC_MAPPED) { bus_dmamap_unload(q->entry_tag, txsd->map); txsd->flags &= ~TX_SW_DESC_MAPPED; } m_freem_list(txsd->m); txsd->m = NULL; } else q->txq_skipped++; ++txsd; if (++cidx == q->size) { cidx = 0; txsd = q->sdesc; } } q->cidx = cidx; } /** * is_new_response - check if a response is newly written * @r: the response descriptor * @q: the response queue * * Returns true if a response descriptor contains a yet unprocessed * response. */ static __inline int is_new_response(const struct rsp_desc *r, const struct sge_rspq *q) { return (r->intr_gen & F_RSPD_GEN2) == q->gen; } #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ #define NOMEM_INTR_DELAY 2500 #ifdef TCP_OFFLOAD /** * write_ofld_wr - write an offload work request * @adap: the adapter * @m: the packet to send * @q: the Tx queue * @pidx: index of the first Tx descriptor to write * @gen: the generation value to use * @ndesc: number of descriptors the packet will occupy * * Write an offload work request to send the supplied packet. The packet * data already carry the work request with most fields populated. */ static void write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q, unsigned int pidx, unsigned int gen, unsigned int ndesc) { unsigned int sgl_flits, flits; int i, idx, nsegs, wrlen; struct work_request_hdr *from; struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1]; struct tx_desc *d = &q->desc[pidx]; struct txq_state txqs; struct sglist_seg *segs; struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); struct sglist *sgl; from = (void *)(oh + 1); /* Start of WR within mbuf */ wrlen = m->m_len - sizeof(*oh); if (!(oh->flags & F_HDR_SGL)) { write_imm(d, (caddr_t)from, wrlen, gen); /* * mbuf with "real" immediate tx data will be enqueue_wr'd by * t3_push_frames and freed in wr_ack. Others, like those sent * down by close_conn, t3_send_reset, etc. should be freed here. */ if (!(oh->flags & F_HDR_DF)) m_free(m); return; } memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from)); sgl = oh->sgl; flits = wrlen / 8; sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl; nsegs = sgl->sg_nseg; segs = sgl->sg_segs; for (idx = 0, i = 0; i < nsegs; i++) { KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__)); if (i && idx == 0) ++sgp; sgp->len[idx] = htobe32(segs[i].ss_len); sgp->addr[idx] = htobe64(segs[i].ss_paddr); idx ^= 1; } if (idx) { sgp->len[idx] = 0; sgp->addr[idx] = 0; } sgl_flits = sgl_len(nsegs); txqs.gen = gen; txqs.pidx = pidx; txqs.compl = 0; write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits, from->wrh_hi, from->wrh_lo); } /** * ofld_xmit - send a packet through an offload queue * @adap: the adapter * @q: the Tx offload queue * @m: the packet * * Send an offload packet through an SGE offload queue. */ static int ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) { int ret; unsigned int ndesc; unsigned int pidx, gen; struct sge_txq *q = &qs->txq[TXQ_OFLD]; struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); ndesc = G_HDR_NDESC(oh->flags); TXQ_LOCK(qs); again: reclaim_completed_tx(qs, 16, TXQ_OFLD); ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); if (__predict_false(ret)) { if (ret == 1) { TXQ_UNLOCK(qs); return (EINTR); } goto again; } gen = q->gen; q->in_use += ndesc; pidx = q->pidx; q->pidx += ndesc; if (q->pidx >= q->size) { q->pidx -= q->size; q->gen ^= 1; } write_ofld_wr(adap, m, q, pidx, gen, ndesc); check_ring_tx_db(adap, q, 1); TXQ_UNLOCK(qs); return (0); } /** * restart_offloadq - restart a suspended offload queue * @qs: the queue set cotaining the offload queue * * Resumes transmission on a suspended Tx offload queue. */ static void restart_offloadq(void *data, int npending) { struct mbuf *m; struct sge_qset *qs = data; struct sge_txq *q = &qs->txq[TXQ_OFLD]; adapter_t *adap = qs->port->adapter; int cleaned; TXQ_LOCK(qs); again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); while ((m = mbufq_first(&q->sendq)) != NULL) { unsigned int gen, pidx; struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); unsigned int ndesc = G_HDR_NDESC(oh->flags); if (__predict_false(q->size - q->in_use < ndesc)) { setbit(&qs->txq_stopped, TXQ_OFLD); if (should_restart_tx(q) && test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) goto again; q->stops++; break; } gen = q->gen; q->in_use += ndesc; pidx = q->pidx; q->pidx += ndesc; if (q->pidx >= q->size) { q->pidx -= q->size; q->gen ^= 1; } (void)mbufq_dequeue(&q->sendq); TXQ_UNLOCK(qs); write_ofld_wr(adap, m, q, pidx, gen, ndesc); TXQ_LOCK(qs); } #if USE_GTS set_bit(TXQ_RUNNING, &q->flags); set_bit(TXQ_LAST_PKT_DB, &q->flags); #endif TXQ_UNLOCK(qs); wmb(); t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } /** * t3_offload_tx - send an offload packet * @m: the packet * * Sends an offload packet. We use the packet priority to select the * appropriate Tx queue as follows: bit 0 indicates whether the packet * should be sent as regular or control, bits 1-3 select the queue set. */ int t3_offload_tx(struct adapter *sc, struct mbuf *m) { struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)]; if (oh->flags & F_HDR_CTRL) { m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */ return (ctrl_xmit(sc, qs, m)); } else return (ofld_xmit(sc, qs, m)); } #endif static void restart_tx(struct sge_qset *qs) { struct adapter *sc = qs->port->adapter; if (isset(&qs->txq_stopped, TXQ_OFLD) && should_restart_tx(&qs->txq[TXQ_OFLD]) && test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { qs->txq[TXQ_OFLD].restarts++; taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); } if (isset(&qs->txq_stopped, TXQ_CTRL) && should_restart_tx(&qs->txq[TXQ_CTRL]) && test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { qs->txq[TXQ_CTRL].restarts++; taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); } } /** * t3_sge_alloc_qset - initialize an SGE queue set * @sc: the controller softc * @id: the queue set id * @nports: how many Ethernet ports will be using this queue set * @irq_vec_idx: the IRQ vector index for response queue interrupts * @p: configuration parameters for this queue set * @ntxq: number of Tx queues for the queue set * @pi: port info for queue set * * Allocate resources and initialize an SGE queue set. A queue set * comprises a response queue, two Rx free-buffer queues, and up to 3 * Tx queues. The Tx queues are assigned roles in the order Ethernet * queue, offload queue, and control queue. */ int t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, const struct qset_params *p, int ntxq, struct port_info *pi) { struct sge_qset *q = &sc->sge.qs[id]; int i, ret = 0; MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); q->port = pi; q->adap = sc; if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { device_printf(sc->dev, "failed to allocate mbuf ring\n"); goto err; } if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) { device_printf(sc->dev, "failed to allocate ifq\n"); goto err; } ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp); callout_init(&q->txq[TXQ_ETH].txq_timer, 1); callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1); q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus; q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus; init_qset_cntxt(q, id); q->idx = id; if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, &q->fl[0].desc, &q->fl[0].sdesc, &q->fl[0].desc_tag, &q->fl[0].desc_map, sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { printf("error %d from alloc ring fl0\n", ret); goto err; } if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, &q->fl[1].desc, &q->fl[1].sdesc, &q->fl[1].desc_tag, &q->fl[1].desc_map, sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { printf("error %d from alloc ring fl1\n", ret); goto err; } if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, &q->rspq.phys_addr, &q->rspq.desc, NULL, &q->rspq.desc_tag, &q->rspq.desc_map, NULL, NULL)) != 0) { printf("error %d from alloc ring rspq\n", ret); goto err; } snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", device_get_unit(sc->dev), irq_vec_idx); MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); for (i = 0; i < ntxq; ++i) { size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); if ((ret = alloc_ring(sc, p->txq_size[i], sizeof(struct tx_desc), sz, &q->txq[i].phys_addr, &q->txq[i].desc, &q->txq[i].sdesc, &q->txq[i].desc_tag, &q->txq[i].desc_map, sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { printf("error %d from alloc ring tx %i\n", ret, i); goto err; } mbufq_init(&q->txq[i].sendq, INT_MAX); q->txq[i].gen = 1; q->txq[i].size = p->txq_size[i]; } #ifdef TCP_OFFLOAD TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); #endif TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); q->fl[0].gen = q->fl[1].gen = 1; q->fl[0].size = p->fl_size; q->fl[1].size = p->jumbo_size; q->rspq.gen = 1; q->rspq.cidx = 0; q->rspq.size = p->rspq_size; q->txq[TXQ_ETH].stop_thres = nports * flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); q->fl[0].buf_size = MCLBYTES; q->fl[0].zone = zone_pack; q->fl[0].type = EXT_PACKET; if (p->jumbo_buf_size == MJUM16BYTES) { q->fl[1].zone = zone_jumbo16; q->fl[1].type = EXT_JUMBO16; } else if (p->jumbo_buf_size == MJUM9BYTES) { q->fl[1].zone = zone_jumbo9; q->fl[1].type = EXT_JUMBO9; } else if (p->jumbo_buf_size == MJUMPAGESIZE) { q->fl[1].zone = zone_jumbop; q->fl[1].type = EXT_JUMBOP; } else { KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size)); ret = EDOOFUS; goto err; } q->fl[1].buf_size = p->jumbo_buf_size; /* Allocate and setup the lro_ctrl structure */ q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); #if defined(INET6) || defined(INET) ret = tcp_lro_init(&q->lro.ctrl); if (ret) { printf("error %d from tcp_lro_init\n", ret); goto err; } #endif q->lro.ctrl.ifp = pi->ifp; mtx_lock_spin(&sc->sge.reg_lock); ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, q->rspq.phys_addr, q->rspq.size, q->fl[0].buf_size, 1, 0); if (ret) { printf("error %d from t3_sge_init_rspcntxt\n", ret); goto err_unlock; } for (i = 0; i < SGE_RXQ_PER_SET; ++i) { ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, q->fl[i].phys_addr, q->fl[i].size, q->fl[i].buf_size, p->cong_thres, 1, 0); if (ret) { printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); goto err_unlock; } } ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 1, 0); if (ret) { printf("error %d from t3_sge_init_ecntxt\n", ret); goto err_unlock; } if (ntxq > 1) { ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, USE_GTS, SGE_CNTXT_OFLD, id, q->txq[TXQ_OFLD].phys_addr, q->txq[TXQ_OFLD].size, 0, 1, 0); if (ret) { printf("error %d from t3_sge_init_ecntxt\n", ret); goto err_unlock; } } if (ntxq > 2) { ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, SGE_CNTXT_CTRL, id, q->txq[TXQ_CTRL].phys_addr, q->txq[TXQ_CTRL].size, q->txq[TXQ_CTRL].token, 1, 0); if (ret) { printf("error %d from t3_sge_init_ecntxt\n", ret); goto err_unlock; } } mtx_unlock_spin(&sc->sge.reg_lock); t3_update_qset_coalesce(q, p); refill_fl(sc, &q->fl[0], q->fl[0].size); refill_fl(sc, &q->fl[1], q->fl[1].size); refill_rspq(sc, &q->rspq, q->rspq.size - 1); t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | V_NEWTIMER(q->rspq.holdoff_tmr)); return (0); err_unlock: mtx_unlock_spin(&sc->sge.reg_lock); err: TXQ_LOCK(q); t3_free_qset(sc, q); return (ret); } /* * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with * ethernet data. Hardware assistance with various checksums and any vlan tag * will also be taken into account here. */ void t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad) { struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; struct ifnet *ifp = pi->ifp; if (cpl->vlan_valid) { m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); m->m_flags |= M_VLANTAG; } m->m_pkthdr.rcvif = ifp; /* * adjust after conversion to mbuf chain */ m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); m->m_len -= (sizeof(*cpl) + ethpad); m->m_data += (sizeof(*cpl) + ethpad); if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) { struct ether_header *eh = mtod(m, void *); uint16_t eh_type; if (eh->ether_type == htons(ETHERTYPE_VLAN)) { struct ether_vlan_header *evh = mtod(m, void *); eh_type = evh->evl_proto; } else eh_type = eh->ether_type; if (ifp->if_capenable & IFCAP_RXCSUM && eh_type == htons(ETHERTYPE_IP)) { m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && eh_type == htons(ETHERTYPE_IPV6)) { m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; } } } /** * get_packet - return the next ingress packet buffer from a free list * @adap: the adapter that received the packet * @drop_thres: # of remaining buffers before we start dropping packets * @qs: the qset that the SGE free list holding the packet belongs to * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain * @r: response descriptor * * Get the next packet from a free list and complete setup of the * sk_buff. If the packet is small we make a copy and recycle the * original buffer, otherwise we use the original buffer itself. If a * positive drop threshold is supplied packets are dropped and their * buffers recycled if (a) the number of remaining buffers is under the * threshold and the packet is too big to copy, or (b) the packet should * be copied but there is no memory for the copy. */ static int get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, struct t3_mbuf_hdr *mh, struct rsp_desc *r) { unsigned int len_cq = ntohl(r->len_cq); struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; int mask, cidx = fl->cidx; struct rx_sw_desc *sd = &fl->sdesc[cidx]; uint32_t len = G_RSPD_LEN(len_cq); uint32_t flags = M_EXT; uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); caddr_t cl; struct mbuf *m; int ret = 0; mask = fl->size - 1; prefetch(fl->sdesc[(cidx + 1) & mask].m); prefetch(fl->sdesc[(cidx + 2) & mask].m); prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); fl->credits--; bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) goto skip_recycle; cl = mtod(m, void *); memcpy(cl, sd->rxsd_cl, len); recycle_rx_buf(adap, fl, fl->cidx); m->m_pkthdr.len = m->m_len = len; m->m_flags = 0; mh->mh_head = mh->mh_tail = m; ret = 1; goto done; } else { skip_recycle: bus_dmamap_unload(fl->entry_tag, sd->map); cl = sd->rxsd_cl; m = sd->m; if ((sopeop == RSPQ_SOP_EOP) || (sopeop == RSPQ_SOP)) flags |= M_PKTHDR; m_init(m, M_NOWAIT, MT_DATA, flags); if (fl->zone == zone_pack) { /* * restore clobbered data pointer */ m->m_data = m->m_ext.ext_buf; } else { m_cljset(m, cl, fl->type); } m->m_len = len; } switch(sopeop) { case RSPQ_SOP_EOP: ret = 1; /* FALLTHROUGH */ case RSPQ_SOP: mh->mh_head = mh->mh_tail = m; m->m_pkthdr.len = len; break; case RSPQ_EOP: ret = 1; /* FALLTHROUGH */ case RSPQ_NSOP_NEOP: if (mh->mh_tail == NULL) { log(LOG_ERR, "discarding intermediate descriptor entry\n"); m_freem(m); break; } mh->mh_tail->m_next = m; mh->mh_tail = m; mh->mh_head->m_pkthdr.len += len; break; } if (cxgb_debug) printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); done: if (++fl->cidx == fl->size) fl->cidx = 0; return (ret); } /** * handle_rsp_cntrl_info - handles control information in a response * @qs: the queue set corresponding to the response * @flags: the response control flags * * Handles the control information of an SGE response, such as GTS * indications and completion credits for the queue set's Tx queues. * HW coalesces credits, we don't do any extra SW coalescing. */ static __inline void handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) { unsigned int credits; #if USE_GTS if (flags & F_RSPD_TXQ0_GTS) clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); #endif credits = G_RSPD_TXQ0_CR(flags); if (credits) qs->txq[TXQ_ETH].processed += credits; credits = G_RSPD_TXQ2_CR(flags); if (credits) qs->txq[TXQ_CTRL].processed += credits; # if USE_GTS if (flags & F_RSPD_TXQ1_GTS) clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); # endif credits = G_RSPD_TXQ1_CR(flags); if (credits) qs->txq[TXQ_OFLD].processed += credits; } static void check_ring_db(adapter_t *adap, struct sge_qset *qs, unsigned int sleeping) { ; } /** * process_responses - process responses from an SGE response queue * @adap: the adapter * @qs: the queue set to which the response queue belongs * @budget: how many responses can be processed in this round * * Process responses from an SGE response queue up to the supplied budget. * Responses include received packets as well as credits and other events * for the queues that belong to the response queue's queue set. * A negative budget is effectively unlimited. * * Additionally choose the interrupt holdoff time for the next interrupt * on this queue. If the system is under memory shortage use a fairly * long delay to help recovery. */ static int process_responses(adapter_t *adap, struct sge_qset *qs, int budget) { struct sge_rspq *rspq = &qs->rspq; struct rsp_desc *r = &rspq->desc[rspq->cidx]; int budget_left = budget; unsigned int sleeping = 0; #if defined(INET6) || defined(INET) int lro_enabled = qs->lro.enabled; int skip_lro; struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; #endif struct t3_mbuf_hdr *mh = &rspq->rspq_mh; #ifdef DEBUG static int last_holdoff = 0; if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { printf("next_holdoff=%d\n", rspq->holdoff_tmr); last_holdoff = rspq->holdoff_tmr; } #endif rspq->next_holdoff = rspq->holdoff_tmr; while (__predict_true(budget_left && is_new_response(r, rspq))) { int eth, eop = 0, ethpad = 0; uint32_t flags = ntohl(r->flags); uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); uint8_t opcode = r->rss_hdr.opcode; eth = (opcode == CPL_RX_PKT); if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { struct mbuf *m; if (cxgb_debug) printf("async notification\n"); if (mh->mh_head == NULL) { mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA); m = mh->mh_head; } else { m = m_gethdr(M_NOWAIT, MT_DATA); } if (m == NULL) goto no_mem; memcpy(mtod(m, char *), r, AN_PKT_SIZE); m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; *mtod(m, uint8_t *) = CPL_ASYNC_NOTIF; opcode = CPL_ASYNC_NOTIF; eop = 1; rspq->async_notif++; goto skip; } else if (flags & F_RSPD_IMM_DATA_VALID) { struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { no_mem: rspq->next_holdoff = NOMEM_INTR_DELAY; budget_left--; break; } if (mh->mh_head == NULL) mh->mh_head = m; else mh->mh_tail->m_next = m; mh->mh_tail = m; get_imm_packet(adap, r, m); mh->mh_head->m_pkthdr.len += m->m_len; eop = 1; rspq->imm_data++; } else if (r->len_cq) { int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; eop = get_packet(adap, drop_thresh, qs, mh, r); if (eop) { if (r->rss_hdr.hash_type && !adap->timestamp) { M_HASHTYPE_SET(mh->mh_head, M_HASHTYPE_OPAQUE_HASH); mh->mh_head->m_pkthdr.flowid = rss_hash; } } ethpad = 2; } else { rspq->pure_rsps++; } skip: if (flags & RSPD_CTRL_MASK) { sleeping |= flags & RSPD_GTS_MASK; handle_rsp_cntrl_info(qs, flags); } if (!eth && eop) { rspq->offload_pkts++; #ifdef TCP_OFFLOAD adap->cpl_handler[opcode](qs, r, mh->mh_head); #else m_freem(mh->mh_head); #endif mh->mh_head = NULL; } else if (eth && eop) { struct mbuf *m = mh->mh_head; t3_rx_eth(adap, m, ethpad); /* * The T304 sends incoming packets on any qset. If LRO * is also enabled, we could end up sending packet up * lro_ctrl->ifp's input. That is incorrect. * * The mbuf's rcvif was derived from the cpl header and * is accurate. Skip LRO and just use that. */ #if defined(INET6) || defined(INET) skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro && (tcp_lro_rx(lro_ctrl, m, 0) == 0) ) { /* successfully queue'd for LRO */ } else #endif { /* * LRO not enabled, packet unsuitable for LRO, * or unable to queue. Pass it up right now in * either case. */ struct ifnet *ifp = m->m_pkthdr.rcvif; (*ifp->if_input)(ifp, m); } mh->mh_head = NULL; } r++; if (__predict_false(++rspq->cidx == rspq->size)) { rspq->cidx = 0; rspq->gen ^= 1; r = rspq->desc; } if (++rspq->credits >= 64) { refill_rspq(adap, rspq, rspq->credits); rspq->credits = 0; } __refill_fl_lt(adap, &qs->fl[0], 32); __refill_fl_lt(adap, &qs->fl[1], 32); --budget_left; } #if defined(INET6) || defined(INET) /* Flush LRO */ tcp_lro_flush_all(lro_ctrl); #endif if (sleeping) check_ring_db(adap, qs, sleeping); mb(); /* commit Tx queue processed updates */ if (__predict_false(qs->txq_stopped > 1)) restart_tx(qs); __refill_fl_lt(adap, &qs->fl[0], 512); __refill_fl_lt(adap, &qs->fl[1], 512); budget -= budget_left; return (budget); } /* * A helper function that processes responses and issues GTS. */ static __inline int process_responses_gts(adapter_t *adap, struct sge_rspq *rq) { int work; static int last_holdoff = 0; work = process_responses(adap, rspq_to_qset(rq), -1); if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { printf("next_holdoff=%d\n", rq->next_holdoff); last_holdoff = rq->next_holdoff; } t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); return (work); } +#ifdef NETDUMP +int +cxgb_netdump_poll_rx(adapter_t *adap, struct sge_qset *qs) +{ + + return (process_responses_gts(adap, &qs->rspq)); +} +#endif /* * Interrupt handler for legacy INTx interrupts for T3B-based cards. * Handles data events from SGE response queues as well as error and other * async events as they all use the same interrupt pin. We use one SGE * response queue per port in this mode and protect all response queues with * queue 0's lock. */ void t3b_intr(void *data) { uint32_t i, map; adapter_t *adap = data; struct sge_rspq *q0 = &adap->sge.qs[0].rspq; t3_write_reg(adap, A_PL_CLI, 0); map = t3_read_reg(adap, A_SG_DATA_INTR); if (!map) return; if (__predict_false(map & F_ERRINTR)) { t3_write_reg(adap, A_PL_INT_ENABLE0, 0); (void) t3_read_reg(adap, A_PL_INT_ENABLE0); taskqueue_enqueue(adap->tq, &adap->slow_intr_task); } mtx_lock(&q0->lock); for_each_port(adap, i) if (map & (1 << i)) process_responses_gts(adap, &adap->sge.qs[i].rspq); mtx_unlock(&q0->lock); } /* * The MSI interrupt handler. This needs to handle data events from SGE * response queues as well as error and other async events as they all use * the same MSI vector. We use one SGE response queue per port in this mode * and protect all response queues with queue 0's lock. */ void t3_intr_msi(void *data) { adapter_t *adap = data; struct sge_rspq *q0 = &adap->sge.qs[0].rspq; int i, new_packets = 0; mtx_lock(&q0->lock); for_each_port(adap, i) if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) new_packets = 1; mtx_unlock(&q0->lock); if (new_packets == 0) { t3_write_reg(adap, A_PL_INT_ENABLE0, 0); (void) t3_read_reg(adap, A_PL_INT_ENABLE0); taskqueue_enqueue(adap->tq, &adap->slow_intr_task); } } void t3_intr_msix(void *data) { struct sge_qset *qs = data; adapter_t *adap = qs->port->adapter; struct sge_rspq *rspq = &qs->rspq; if (process_responses_gts(adap, rspq) == 0) rspq->unhandled_irqs++; } #define QDUMP_SBUF_SIZE 32 * 400 static int t3_dump_rspq(SYSCTL_HANDLER_ARGS) { struct sge_rspq *rspq; struct sge_qset *qs; int i, err, dump_end, idx; struct sbuf *sb; struct rsp_desc *rspd; uint32_t data[4]; rspq = arg1; qs = rspq_to_qset(rspq); if (rspq->rspq_dump_count == 0) return (0); if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { log(LOG_WARNING, "dump count is too large %d\n", rspq->rspq_dump_count); rspq->rspq_dump_count = 0; return (EINVAL); } if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { log(LOG_WARNING, "dump start of %d is greater than queue size\n", rspq->rspq_dump_start); rspq->rspq_dump_start = 0; return (EINVAL); } err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); if (err) return (err); err = sysctl_wire_old_buffer(req, 0); if (err) return (err); sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; for (i = rspq->rspq_dump_start; i < dump_end; i++) { idx = i & (RSPQ_Q_SIZE-1); rspd = &rspq->desc[idx]; sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), be32toh(rspd->len_cq), rspd->intr_gen); } err = sbuf_finish(sb); sbuf_delete(sb); return (err); } static int t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) { struct sge_txq *txq; struct sge_qset *qs; int i, j, err, dump_end; struct sbuf *sb; struct tx_desc *txd; uint32_t *WR, wr_hi, wr_lo, gen; uint32_t data[4]; txq = arg1; qs = txq_to_qset(txq, TXQ_ETH); if (txq->txq_dump_count == 0) { return (0); } if (txq->txq_dump_count > TX_ETH_Q_SIZE) { log(LOG_WARNING, "dump count is too large %d\n", txq->txq_dump_count); txq->txq_dump_count = 1; return (EINVAL); } if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { log(LOG_WARNING, "dump start of %d is greater than queue size\n", txq->txq_dump_start); txq->txq_dump_start = 0; return (EINVAL); } err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); if (err) return (err); err = sysctl_wire_old_buffer(req, 0); if (err) return (err); sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, txq->txq_dump_start, (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); dump_end = txq->txq_dump_start + txq->txq_dump_count; for (i = txq->txq_dump_start; i < dump_end; i++) { txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; WR = (uint32_t *)txd->flit; wr_hi = ntohl(WR[0]); wr_lo = ntohl(WR[1]); gen = G_WR_GEN(wr_lo); sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", wr_hi, wr_lo, gen); for (j = 2; j < 30; j += 4) sbuf_printf(sb, "\t%08x %08x %08x %08x \n", WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); } err = sbuf_finish(sb); sbuf_delete(sb); return (err); } static int t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) { struct sge_txq *txq; struct sge_qset *qs; int i, j, err, dump_end; struct sbuf *sb; struct tx_desc *txd; uint32_t *WR, wr_hi, wr_lo, gen; txq = arg1; qs = txq_to_qset(txq, TXQ_CTRL); if (txq->txq_dump_count == 0) { return (0); } if (txq->txq_dump_count > 256) { log(LOG_WARNING, "dump count is too large %d\n", txq->txq_dump_count); txq->txq_dump_count = 1; return (EINVAL); } if (txq->txq_dump_start > 255) { log(LOG_WARNING, "dump start of %d is greater than queue size\n", txq->txq_dump_start); txq->txq_dump_start = 0; return (EINVAL); } err = sysctl_wire_old_buffer(req, 0); if (err != 0) return (err); sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, txq->txq_dump_start, (txq->txq_dump_start + txq->txq_dump_count) & 255); dump_end = txq->txq_dump_start + txq->txq_dump_count; for (i = txq->txq_dump_start; i < dump_end; i++) { txd = &txq->desc[i & (255)]; WR = (uint32_t *)txd->flit; wr_hi = ntohl(WR[0]); wr_lo = ntohl(WR[1]); gen = G_WR_GEN(wr_lo); sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", wr_hi, wr_lo, gen); for (j = 2; j < 30; j += 4) sbuf_printf(sb, "\t%08x %08x %08x %08x \n", WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); } err = sbuf_finish(sb); sbuf_delete(sb); return (err); } static int t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) { adapter_t *sc = arg1; struct qset_params *qsp = &sc->params.sge.qset[0]; int coalesce_usecs; struct sge_qset *qs; int i, j, err, nqsets = 0; struct mtx *lock; if ((sc->flags & FULL_INIT_DONE) == 0) return (ENXIO); coalesce_usecs = qsp->coalesce_usecs; err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); if (err != 0) { return (err); } if (coalesce_usecs == qsp->coalesce_usecs) return (0); for (i = 0; i < sc->params.nports; i++) for (j = 0; j < sc->port[i].nqsets; j++) nqsets++; coalesce_usecs = max(1, coalesce_usecs); for (i = 0; i < nqsets; i++) { qs = &sc->sge.qs[i]; qsp = &sc->params.sge.qset[i]; qsp->coalesce_usecs = coalesce_usecs; lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : &sc->sge.qs[0].rspq.lock; mtx_lock(lock); t3_update_qset_coalesce(qs, qsp); t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | V_NEWTIMER(qs->rspq.holdoff_tmr)); mtx_unlock(lock); } return (0); } static int t3_pkt_timestamp(SYSCTL_HANDLER_ARGS) { adapter_t *sc = arg1; int rc, timestamp; if ((sc->flags & FULL_INIT_DONE) == 0) return (ENXIO); timestamp = sc->timestamp; rc = sysctl_handle_int(oidp, ×tamp, arg2, req); if (rc != 0) return (rc); if (timestamp != sc->timestamp) { t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS, timestamp ? F_ENABLERXPKTTMSTPRSS : 0); sc->timestamp = timestamp; } return (0); } void t3_add_attach_sysctls(adapter_t *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); /* random information */ SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", CTLFLAG_RD, sc->fw_version, 0, "firmware version"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD, &sc->params.rev, 0, "chip model"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "port_types", CTLFLAG_RD, sc->port_types, 0, "type of ports"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "enable_debug", CTLFLAG_RW, &cxgb_debug, 0, "enable verbose debugging output"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce", CTLFLAG_RD, &sc->tunq_coalesce, "#tunneled packets freed"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "txq_overrun", CTLFLAG_RD, &txq_fills, 0, "#times txq overrun"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, &sc->params.vpd.cclk, 0, "core clock frequency (in KHz)"); } static const char *rspq_name = "rspq"; static const char *txq_names[] = { "txq_eth", "txq_ofld", "txq_ctrl" }; static int sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) { struct port_info *p = arg1; uint64_t *parg; if (!p) return (EINVAL); cxgb_refresh_stats(p); parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); return (sysctl_handle_64(oidp, parg, 0, req)); } void t3_add_configured_sysctls(adapter_t *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; int i, j; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal", CTLTYPE_INT|CTLFLAG_RW, sc, 0, t3_set_coalesce_usecs, "I", "interrupt coalescing timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pkt_timestamp", CTLTYPE_INT | CTLFLAG_RW, sc, 0, t3_pkt_timestamp, "I", "provide packet timestamp instead of connection hash"); for (i = 0; i < sc->params.nports; i++) { struct port_info *pi = &sc->port[i]; struct sysctl_oid *poid; struct sysctl_oid_list *poidlist; struct mac_stats *mstats = &pi->mac.stats; snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); poidlist = SYSCTL_CHILDREN(poid); SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO, "nqsets", CTLFLAG_RD, &pi->nqsets, 0, "#queue sets"); for (j = 0; j < pi->nqsets; j++) { struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, *ctrlqpoid, *lropoid; struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist, *ctrlqpoidlist, *lropoidlist; struct sge_txq *txq = &qs->txq[TXQ_ETH]; snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); qspoidlist = SYSCTL_CHILDREN(qspoid); SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", CTLFLAG_RD, &qs->fl[0].empty, 0, "freelist #0 empty"); SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", CTLFLAG_RD, &qs->fl[1].empty, 0, "freelist #1 empty"); rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); txqpoidlist = SYSCTL_CHILDREN(txqpoid); ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); lropoidlist = SYSCTL_CHILDREN(lropoid); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", CTLFLAG_RD, &qs->rspq.size, 0, "#entries in response queue"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", CTLFLAG_RD, &qs->rspq.cidx, 0, "consumer index"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", CTLFLAG_RD, &qs->rspq.credits, 0, "#credits"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved", CTLFLAG_RD, &qs->rspq.starved, 0, "#times starved"); SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr", CTLFLAG_RD, &qs->rspq.phys_addr, "physical_address_of the queue"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", CTLFLAG_RW, &qs->rspq.rspq_dump_start, 0, "start rspq dump entry"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", CTLFLAG_RW, &qs->rspq.rspq_dump_count, 0, "#rspq entries to dump"); SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 0, t3_dump_rspq, "A", "dump of the response queue"); SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped", CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops, "#tunneled packets dropped"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen", CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.mq_len, 0, "#tunneled packets waiting to be sent"); #if 0 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 0, "#tunneled packets queue producer index"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 0, "#tunneled packets queue consumer index"); #endif SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed", CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 0, "#tunneled packets processed by the card"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", CTLFLAG_RD, &txq->cleaned, 0, "#tunneled packets cleaned"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", CTLFLAG_RD, &txq->in_use, 0, "#tunneled packet slots in use"); SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees", CTLFLAG_RD, &txq->txq_frees, "#tunneled packets freed"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", CTLFLAG_RD, &txq->txq_skipped, 0, "#tunneled packet descriptors skipped"); SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", CTLFLAG_RD, &txq->txq_coalesced, "#tunneled packets coalesced"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", CTLFLAG_RD, &txq->txq_enqueued, 0, "#tunneled packets enqueued to hardware"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", CTLFLAG_RD, &qs->txq_stopped, 0, "tx queues stopped"); SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr", CTLFLAG_RD, &txq->phys_addr, "physical_address_of the queue"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 0, "txq generation"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", CTLFLAG_RD, &txq->cidx, 0, "hardware queue cidx"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", CTLFLAG_RD, &txq->pidx, 0, "hardware queue pidx"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 0, "txq start idx for dump"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 0, "txq #entries to dump"); SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 0, "ctrlq start idx for dump"); SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 0, "ctrl #entries to dump"); SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_queued", CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_flushed", CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); } /* Now add a node for mac stats. */ poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "MAC statistics"); poidlist = SYSCTL_CHILDREN(poid); /* * We (ab)use the length argument (arg2) to pass on the offset * of the data that we are interested in. This is only required * for the quad counters that are updated from the hardware (we * make sure that we return the latest value). * sysctl_handle_macstat first updates *all* the counters from * the hardware, and then returns the latest value of the * requested counter. Best would be to update only the * requested counter from hardware, but t3_mac_update_stats() * hides all the register details and we don't want to dive into * all that here. */ #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ sysctl_handle_macstat, "QU", 0) CXGB_SYSCTL_ADD_QUAD(tx_octets); CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); CXGB_SYSCTL_ADD_QUAD(tx_frames); CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); CXGB_SYSCTL_ADD_QUAD(tx_pause); CXGB_SYSCTL_ADD_QUAD(tx_deferred); CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); CXGB_SYSCTL_ADD_QUAD(tx_underrun); CXGB_SYSCTL_ADD_QUAD(tx_len_errs); CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); CXGB_SYSCTL_ADD_QUAD(tx_frames_64); CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); CXGB_SYSCTL_ADD_QUAD(rx_octets); CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); CXGB_SYSCTL_ADD_QUAD(rx_frames); CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); CXGB_SYSCTL_ADD_QUAD(rx_pause); CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); CXGB_SYSCTL_ADD_QUAD(rx_align_errs); CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); CXGB_SYSCTL_ADD_QUAD(rx_data_errs); CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); CXGB_SYSCTL_ADD_QUAD(rx_runt); CXGB_SYSCTL_ADD_QUAD(rx_jabber); CXGB_SYSCTL_ADD_QUAD(rx_short); CXGB_SYSCTL_ADD_QUAD(rx_too_long); CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); CXGB_SYSCTL_ADD_QUAD(rx_frames_64); CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); #undef CXGB_SYSCTL_ADD_QUAD #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ CTLFLAG_RD, &mstats->a, 0) CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); CXGB_SYSCTL_ADD_ULONG(num_toggled); CXGB_SYSCTL_ADD_ULONG(num_resets); CXGB_SYSCTL_ADD_ULONG(link_faults); #undef CXGB_SYSCTL_ADD_ULONG } } /** * t3_get_desc - dump an SGE descriptor for debugging purposes * @qs: the queue set * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) * @idx: the descriptor index in the queue * @data: where to dump the descriptor contents * * Dumps the contents of a HW descriptor of an SGE queue. Returns the * size of the descriptor. */ int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, unsigned char *data) { if (qnum >= 6) return (EINVAL); if (qnum < 3) { if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) return -EINVAL; memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); return sizeof(struct tx_desc); } if (qnum == 3) { if (!qs->rspq.desc || idx >= qs->rspq.size) return (EINVAL); memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); return sizeof(struct rsp_desc); } qnum -= 4; if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) return (EINVAL); memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); return sizeof(struct rx_desc); }