Index: head/sys/ofed/drivers/net/mlx4/en_rx.c =================================================================== --- head/sys/ofed/drivers/net/mlx4/en_rx.c (revision 297966) +++ head/sys/ofed/drivers/net/mlx4/en_rx.c (revision 297967) @@ -1,918 +1,925 @@ /* * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include "opt_inet.h" #include #include #include #include #include #include #include #ifdef CONFIG_NET_RX_BUSY_POLL #include #endif #include "mlx4_en.h" static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { struct mlx4_en_rx_desc *rx_desc = (struct mlx4_en_rx_desc *) (ring->buf + (ring->stride * index)); int possible_frags; int i; /* Set size and memtype fields */ - rx_desc->data[0].byte_count = cpu_to_be32(priv->rx_mb_size); + rx_desc->data[0].byte_count = cpu_to_be32(priv->rx_mb_size - MLX4_NET_IP_ALIGN); rx_desc->data[0].lkey = cpu_to_be32(priv->mdev->mr.key); /* * If the number of used fragments does not fill up the ring * stride, remaining (unused) fragments must be padded with * null address/size and a special memory key: */ possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE; for (i = 1; i < possible_frags; i++) { rx_desc->data[i].byte_count = 0; rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); rx_desc->data[i].addr = 0; } } static int mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring, __be64 *pdma, struct mlx4_en_rx_mbuf *mb_list) { bus_dma_segment_t segs[1]; bus_dmamap_t map; struct mbuf *mb; int nsegs; int err; /* try to allocate a new spare mbuf */ if (unlikely(ring->spare.mbuf == NULL)) { mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size); if (unlikely(mb == NULL)) return (-ENOMEM); /* setup correct length */ - mb->m_len = ring->rx_mb_size; + mb->m_pkthdr.len = mb->m_len = ring->rx_mb_size; + /* make sure IP header gets aligned */ + m_adj(mb, MLX4_NET_IP_ALIGN); + /* load spare mbuf into BUSDMA */ err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, ring->spare.dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); if (unlikely(err != 0)) { m_freem(mb); return (err); } /* store spare info */ ring->spare.mbuf = mb; ring->spare.paddr_be = cpu_to_be64(segs[0].ds_addr); bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map, BUS_DMASYNC_PREREAD); } /* synchronize and unload the current mbuf, if any */ if (likely(mb_list->mbuf != NULL)) { bus_dmamap_sync(ring->dma_tag, mb_list->dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->dma_tag, mb_list->dma_map); } mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size); if (unlikely(mb == NULL)) goto use_spare; /* setup correct length */ - mb->m_len = ring->rx_mb_size; + mb->m_pkthdr.len = mb->m_len = ring->rx_mb_size; + /* make sure IP header gets aligned */ + m_adj(mb, MLX4_NET_IP_ALIGN); + err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, mb_list->dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); if (unlikely(err != 0)) { m_freem(mb); goto use_spare; } *pdma = cpu_to_be64(segs[0].ds_addr); mb_list->mbuf = mb; bus_dmamap_sync(ring->dma_tag, mb_list->dma_map, BUS_DMASYNC_PREREAD); return (0); use_spare: /* swap DMA maps */ map = mb_list->dma_map; mb_list->dma_map = ring->spare.dma_map; ring->spare.dma_map = map; /* swap MBUFs */ mb_list->mbuf = ring->spare.mbuf; ring->spare.mbuf = NULL; /* store physical address */ *pdma = ring->spare.paddr_be; return (0); } static void mlx4_en_free_buf(struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_mbuf *mb_list) { bus_dmamap_t map = mb_list->dma_map; bus_dmamap_sync(ring->dma_tag, map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->dma_tag, map); m_freem(mb_list->mbuf); mb_list->mbuf = NULL; /* safety clearing */ } static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { struct mlx4_en_rx_desc *rx_desc = (struct mlx4_en_rx_desc *) (ring->buf + (index * ring->stride)); struct mlx4_en_rx_mbuf *mb_list = ring->mbuf + index; mb_list->mbuf = NULL; if (mlx4_en_alloc_buf(ring, &rx_desc->data[0].addr, mb_list)) { priv->port_stats.rx_alloc_failed++; return (-ENOMEM); } return (0); } static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) { *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); } static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) { struct mlx4_en_rx_ring *ring; int ring_ind; int buf_ind; int new_size; int err; for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) { for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { ring = priv->rx_ring[ring_ind]; err = mlx4_en_prepare_rx_desc(priv, ring, ring->actual_size); if (err) { if (ring->actual_size == 0) { en_err(priv, "Failed to allocate " "enough rx buffers\n"); return -ENOMEM; } else { new_size = rounddown_pow_of_two(ring->actual_size); en_warn(priv, "Only %d buffers allocated " "reducing ring size to %d\n", ring->actual_size, new_size); goto reduce_rings; } } ring->actual_size++; ring->prod++; } } return 0; reduce_rings: for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { ring = priv->rx_ring[ring_ind]; while (ring->actual_size > new_size) { ring->actual_size--; ring->prod--; mlx4_en_free_buf(ring, ring->mbuf + ring->actual_size); } } return 0; } static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { int index; en_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n", ring->cons, ring->prod); /* Unmap and free Rx buffers */ BUG_ON((u32) (ring->prod - ring->cons) > ring->actual_size); while (ring->cons != ring->prod) { index = ring->cons & ring->size_mask; en_dbg(DRV, priv, "Processing descriptor:%d\n", index); mlx4_en_free_buf(ring, ring->mbuf + index); ++ring->cons; } } void mlx4_en_calc_rx_buf(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); - int eff_mtu = dev->if_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN; + int eff_mtu = dev->if_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN + + MLX4_NET_IP_ALIGN; if (eff_mtu > MJUM16BYTES) { en_err(priv, "MTU(%d) is too big\n", dev->if_mtu); eff_mtu = MJUM16BYTES; } else if (eff_mtu > MJUM9BYTES) { eff_mtu = MJUM16BYTES; } else if (eff_mtu > MJUMPAGESIZE) { eff_mtu = MJUM9BYTES; } else if (eff_mtu > MCLBYTES) { eff_mtu = MJUMPAGESIZE; } else { eff_mtu = MCLBYTES; } priv->rx_mb_size = eff_mtu; en_dbg(DRV, priv, "Effective RX MTU: %d bytes\n", eff_mtu); } int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, int node) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rx_ring *ring; int err; int tmp; uint32_t x; ring = kzalloc(sizeof(struct mlx4_en_rx_ring), GFP_KERNEL); if (!ring) { en_err(priv, "Failed to allocate RX ring structure\n"); return -ENOMEM; } /* Create DMA descriptor TAG */ if ((err = -bus_dma_tag_create( bus_get_dma_tag(mdev->pdev->dev.bsddev), 1, /* any alignment */ 0, /* no boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM16BYTES, /* maxsize */ 1, /* nsegments */ MJUM16BYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &ring->dma_tag))) { en_err(priv, "Failed to create DMA tag\n"); goto err_ring; } ring->prod = 0; ring->cons = 0; ring->size = size; ring->size_mask = size - 1; ring->stride = roundup_pow_of_two( sizeof(struct mlx4_en_rx_desc) + DS_SIZE); ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride + TXBB_SIZE; tmp = size * sizeof(struct mlx4_en_rx_mbuf); ring->mbuf = kzalloc(tmp, GFP_KERNEL); if (ring->mbuf == NULL) { err = -ENOMEM; goto err_dma_tag; } err = -bus_dmamap_create(ring->dma_tag, 0, &ring->spare.dma_map); if (err != 0) goto err_info; for (x = 0; x != size; x++) { err = -bus_dmamap_create(ring->dma_tag, 0, &ring->mbuf[x].dma_map); if (err != 0) { while (x--) bus_dmamap_destroy(ring->dma_tag, ring->mbuf[x].dma_map); goto err_info; } } en_dbg(DRV, priv, "Allocated MBUF ring at addr:%p size:%d\n", ring->mbuf, tmp); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); if (err) goto err_dma_map; err = mlx4_en_map_buffer(&ring->wqres.buf); if (err) { en_err(priv, "Failed to map RX buffer\n"); goto err_hwq; } ring->buf = ring->wqres.buf.direct.buf; *pring = ring; return 0; err_hwq: mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); err_dma_map: for (x = 0; x != size; x++) { bus_dmamap_destroy(ring->dma_tag, ring->mbuf[x].dma_map); } bus_dmamap_destroy(ring->dma_tag, ring->spare.dma_map); err_info: vfree(ring->mbuf); err_dma_tag: bus_dma_tag_destroy(ring->dma_tag); err_ring: kfree(ring); return (err); } int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) { struct mlx4_en_rx_ring *ring; int i; int ring_ind; int err; int stride = roundup_pow_of_two( sizeof(struct mlx4_en_rx_desc) + DS_SIZE); for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { ring = priv->rx_ring[ring_ind]; ring->prod = 0; ring->cons = 0; ring->actual_size = 0; ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn; ring->rx_alloc_order = priv->rx_alloc_order; ring->rx_alloc_size = priv->rx_alloc_size; ring->rx_buf_size = priv->rx_buf_size; ring->rx_mb_size = priv->rx_mb_size; ring->stride = stride; if (ring->stride <= TXBB_SIZE) ring->buf += TXBB_SIZE; ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride; memset(ring->buf, 0, ring->buf_size); mlx4_en_update_rx_prod_db(ring); /* Initialize all descriptors */ for (i = 0; i < ring->size; i++) mlx4_en_init_rx_desc(priv, ring, i); #ifdef INET /* Configure lro mngr */ if (priv->dev->if_capenable & IFCAP_LRO) { if (tcp_lro_init(&ring->lro)) priv->dev->if_capenable &= ~IFCAP_LRO; else ring->lro.ifp = priv->dev; } #endif } err = mlx4_en_fill_rx_buffers(priv); if (err) goto err_buffers; for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { ring = priv->rx_ring[ring_ind]; ring->size_mask = ring->actual_size - 1; mlx4_en_update_rx_prod_db(ring); } return 0; err_buffers: for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) mlx4_en_free_rx_buf(priv, priv->rx_ring[ring_ind]); ring_ind = priv->rx_ring_num - 1; while (ring_ind >= 0) { ring = priv->rx_ring[ring_ind]; if (ring->stride <= TXBB_SIZE) ring->buf -= TXBB_SIZE; ring_ind--; } return err; } void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, u16 stride) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rx_ring *ring = *pring; uint32_t x; mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); for (x = 0; x != size; x++) bus_dmamap_destroy(ring->dma_tag, ring->mbuf[x].dma_map); /* free spare mbuf, if any */ if (ring->spare.mbuf != NULL) { bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->dma_tag, ring->spare.dma_map); m_freem(ring->spare.mbuf); } bus_dmamap_destroy(ring->dma_tag, ring->spare.dma_map); vfree(ring->mbuf); bus_dma_tag_destroy(ring->dma_tag); kfree(ring); *pring = NULL; #ifdef CONFIG_RFS_ACCEL mlx4_en_cleanup_filters(priv, ring); #endif } void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { #ifdef INET tcp_lro_free(&ring->lro); #endif mlx4_en_free_rx_buf(priv, ring); if (ring->stride <= TXBB_SIZE) ring->buf -= TXBB_SIZE; } static void validate_loopback(struct mlx4_en_priv *priv, struct mbuf *mb) { int i; int offset = ETHER_HDR_LEN; for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) { if (*(mb->m_data + offset) != (unsigned char) (i & 0xff)) goto out_loopback; } /* Loopback found */ priv->loopback_ok = 1; out_loopback: m_freem(mb); } static inline int invalid_cqe(struct mlx4_en_priv *priv, struct mlx4_cqe *cqe) { /* Drop packet on bad receive or bad checksum */ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR)) { en_err(priv, "CQE completed in error - vendor syndrom:%d syndrom:%d\n", ((struct mlx4_err_cqe *)cqe)->vendor_err_syndrome, ((struct mlx4_err_cqe *)cqe)->syndrome); return 1; } if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) { en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n"); return 1; } return 0; } static struct mbuf * mlx4_en_rx_mb(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_mbuf *mb_list, int length) { struct mbuf *mb; /* get mbuf */ mb = mb_list->mbuf; /* collect used fragment while atomically replacing it */ if (mlx4_en_alloc_buf(ring, &rx_desc->data[0].addr, mb_list)) return (NULL); /* range check hardware computed value */ if (unlikely(length > mb->m_len)) length = mb->m_len; /* update total packet length in packet header */ mb->m_len = mb->m_pkthdr.len = length; return (mb); } /* For cpu arch with cache line of 64B the performance is better when cqe size==64B * To enlarge cqe size from 32B to 64B --> 32B of garbage (i.e. 0xccccccc) * was added in the beginning of each cqe (the real data is in the corresponding 32B). * The following calc ensures that when factor==1, it means we are alligned to 64B * and we get the real cqe data*/ #define CQE_FACTOR_INDEX(index, factor) ((index << factor) + factor) int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; struct mlx4_en_rx_mbuf *mb_list; struct mlx4_en_rx_desc *rx_desc; struct mbuf *mb; struct mlx4_cq *mcq = &cq->mcq; struct mlx4_cqe *buf = cq->buf; int index; unsigned int length; int polled = 0; u32 cons_index = mcq->cons_index; u32 size_mask = ring->size_mask; int size = cq->size; int factor = priv->cqe_factor; if (!priv->port_up) return 0; /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deducted from the CQE index instead of * reading 'cqe->index' */ index = cons_index & size_mask; cqe = &buf[CQE_FACTOR_INDEX(index, factor)]; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size)) { mb_list = ring->mbuf + index; rx_desc = (struct mlx4_en_rx_desc *) (ring->buf + (index << ring->log_stride)); /* * make sure we read the CQE after we read the ownership bit */ rmb(); if (invalid_cqe(priv, cqe)) { goto next; } /* * Packet is OK - process it. */ length = be32_to_cpu(cqe->byte_cnt); length -= ring->fcs_del; mb = mlx4_en_rx_mb(priv, ring, rx_desc, mb_list, length); if (unlikely(!mb)) { ring->errors++; goto next; } ring->bytes += length; ring->packets++; if (unlikely(priv->validate_loopback)) { validate_loopback(priv, mb); goto next; } /* forward Toeplitz compatible hash value */ mb->m_pkthdr.flowid = be32_to_cpu(cqe->immed_rss_invalid); M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE); mb->m_pkthdr.rcvif = dev; if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) { mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->sl_vid); mb->m_flags |= M_VLANTAG; } if (likely(dev->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) && (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && (cqe->checksum == cpu_to_be16(0xffff))) { priv->port_stats.rx_chksum_good++; mb->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; mb->m_pkthdr.csum_data = htons(0xffff); /* This packet is eligible for LRO if it is: * - DIX Ethernet (type interpretation) * - TCP/IP (v4) * - without IP options * - not an IP fragment */ #ifdef INET if (mlx4_en_can_lro(cqe->status) && (dev->if_capenable & IFCAP_LRO)) { if (ring->lro.lro_cnt != 0 && tcp_lro_rx(&ring->lro, mb, 0) == 0) goto next; } #endif /* LRO not possible, complete processing here */ INC_PERF_COUNTER(priv->pstats.lro_misses); } else { mb->m_pkthdr.csum_flags = 0; priv->port_stats.rx_chksum_none++; } /* Push it up the stack */ dev->if_input(dev, mb); next: ++cons_index; index = cons_index & size_mask; cqe = &buf[CQE_FACTOR_INDEX(index, factor)]; if (++polled == budget) goto out; } /* Flush all pending IP reassembly sessions */ out: #ifdef INET tcp_lro_flush_all(&ring->lro); #endif AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); mcq->cons_index = cons_index; mlx4_cq_set_ci(mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ ring->cons = mcq->cons_index; ring->prod += polled; /* Polled descriptors were realocated in place */ mlx4_en_update_rx_prod_db(ring); return polled; } /* Rx CQ polling - called by NAPI */ static int mlx4_en_poll_rx_cq(struct mlx4_en_cq *cq, int budget) { struct net_device *dev = cq->dev; int done; done = mlx4_en_process_rx_cq(dev, cq, budget); cq->tot_rx += done; return done; } void mlx4_en_rx_irq(struct mlx4_cq *mcq) { struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); struct mlx4_en_priv *priv = netdev_priv(cq->dev); int done; // Shoot one within the irq context // Because there is no NAPI in freeBSD done = mlx4_en_poll_rx_cq(cq, MLX4_EN_RX_BUDGET); if (priv->port_up && (done == MLX4_EN_RX_BUDGET) ) { cq->curr_poll_rx_cpu_id = curcpu; taskqueue_enqueue(cq->tq, &cq->cq_task); } else { mlx4_en_arm_cq(priv, cq); } } void mlx4_en_rx_que(void *context, int pending) { struct mlx4_en_cq *cq; struct thread *td; cq = context; td = curthread; thread_lock(td); sched_bind(td, cq->curr_poll_rx_cpu_id); thread_unlock(td); while (mlx4_en_poll_rx_cq(cq, MLX4_EN_RX_BUDGET) == MLX4_EN_RX_BUDGET); mlx4_en_arm_cq(cq->dev->if_softc, cq); } /* RSS related functions */ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, struct mlx4_en_rx_ring *ring, enum mlx4_qp_state *state, struct mlx4_qp *qp) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_qp_context *context; int err = 0; context = kmalloc(sizeof *context , GFP_KERNEL); if (!context) { en_err(priv, "Failed to allocate qp context\n"); return -ENOMEM; } err = mlx4_qp_alloc(mdev->dev, qpn, qp); if (err) { en_err(priv, "Failed to allocate qp #%x\n", qpn); goto out; } qp->event = mlx4_en_sqp_event; memset(context, 0, sizeof *context); mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0, qpn, ring->cqn, -1, context); context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma); /* Cancel FCS removal if FW allows */ if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) { context->param3 |= cpu_to_be32(1 << 29); ring->fcs_del = ETH_FCS_LEN; } else ring->fcs_del = 0; err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, context, qp, state); if (err) { mlx4_qp_remove(mdev->dev, qp); mlx4_qp_free(mdev->dev, qp); } mlx4_en_update_rx_prod_db(ring); out: kfree(context); return err; } int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) { int err; u32 qpn; err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn, 0); if (err) { en_err(priv, "Failed reserving drop qpn\n"); return err; } err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp); if (err) { en_err(priv, "Failed allocating drop qp\n"); mlx4_qp_release_range(priv->mdev->dev, qpn, 1); return err; } return 0; } void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv) { u32 qpn; qpn = priv->drop_qp.qpn; mlx4_qp_remove(priv->mdev->dev, &priv->drop_qp); mlx4_qp_free(priv->mdev->dev, &priv->drop_qp); mlx4_qp_release_range(priv->mdev->dev, qpn, 1); } /* Allocate rx qp's and configure them according to rss map */ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rss_map *rss_map = &priv->rss_map; struct mlx4_qp_context context; struct mlx4_rss_context *rss_context; int rss_rings; void *ptr; u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 | MLX4_RSS_TCP_IPV6); int i; int err = 0; int good_qps = 0; static const u32 rsskey[10] = { 0xD181C62C, 0xF7F4DB5B, 0x1983A2FC, 0x943E1ADB, 0xD9389E6B, 0xD1039C2C, 0xA74499AD, 0x593D56D9, 0xF3253C06, 0x2ADC1FFC}; en_dbg(DRV, priv, "Configuring rss steering\n"); err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num, priv->rx_ring_num, &rss_map->base_qpn, 0); if (err) { en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num); return err; } for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_ring[i]->qpn = rss_map->base_qpn + i; err = mlx4_en_config_rss_qp(priv, priv->rx_ring[i]->qpn, priv->rx_ring[i], &rss_map->state[i], &rss_map->qps[i]); if (err) goto rss_err; ++good_qps; } /* Configure RSS indirection qp */ err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp); if (err) { en_err(priv, "Failed to allocate RSS indirection QP\n"); goto rss_err; } rss_map->indir_qp.event = mlx4_en_sqp_event; mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn, priv->rx_ring[0]->cqn, -1, &context); if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num) rss_rings = priv->rx_ring_num; else rss_rings = priv->prof->rss_rings; ptr = ((u8 *)&context) + offsetof(struct mlx4_qp_context, pri_path) + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH; rss_context = ptr; rss_context->base_qpn = cpu_to_be32(ilog2(rss_rings) << 24 | (rss_map->base_qpn)); rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn); if (priv->mdev->profile.udp_rss) { rss_mask |= MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6; rss_context->base_qpn_udp = rss_context->default_qpn; } rss_context->flags = rss_mask; rss_context->hash_fn = MLX4_RSS_HASH_TOP; for (i = 0; i < 10; i++) rss_context->rss_key[i] = cpu_to_be32(rsskey[i]); err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, &rss_map->indir_qp, &rss_map->indir_state); if (err) goto indir_err; return 0; indir_err: mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); mlx4_qp_free(mdev->dev, &rss_map->indir_qp); rss_err: for (i = 0; i < good_qps; i++) { mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); mlx4_qp_free(mdev->dev, &rss_map->qps[i]); } mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num); return err; } void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rss_map *rss_map = &priv->rss_map; int i; mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); mlx4_qp_free(mdev->dev, &rss_map->indir_qp); for (i = 0; i < priv->rx_ring_num; i++) { mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); mlx4_qp_free(mdev->dev, &rss_map->qps[i]); } mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num); } Index: head/sys/ofed/drivers/net/mlx4/mlx4_en.h =================================================================== --- head/sys/ofed/drivers/net/mlx4/mlx4_en.h (revision 297966) +++ head/sys/ofed/drivers/net/mlx4/mlx4_en.h (revision 297967) @@ -1,917 +1,918 @@ /* * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #ifndef _MLX4_EN_H_ #define _MLX4_EN_H_ #include #include #include #include #include #include #include #include #ifdef CONFIG_MLX4_EN_DCB #include #endif #include #include #include #include #include #include #include #include "en_port.h" #include "mlx4_stats.h" #define DRV_NAME "mlx4_en" #define MLX4_EN_MSG_LEVEL (NETIF_MSG_LINK | NETIF_MSG_IFDOWN) /* * Device constants */ #define MLX4_EN_PAGE_SHIFT 12 #define MLX4_EN_PAGE_SIZE (1 << MLX4_EN_PAGE_SHIFT) +#define MLX4_NET_IP_ALIGN 2 /* bytes */ #define DEF_RX_RINGS 16 #define MAX_RX_RINGS 128 #define MIN_RX_RINGS 4 #define TXBB_SIZE 64 #define HEADROOM (2048 / TXBB_SIZE + 1) #define STAMP_STRIDE 64 #define STAMP_DWORDS (STAMP_STRIDE / 4) #define STAMP_SHIFT 31 #define STAMP_VAL 0x7fffffff #define STATS_DELAY (HZ / 4) #define SERVICE_TASK_DELAY (HZ / 4) #define MAX_NUM_OF_FS_RULES 256 #define MLX4_EN_FILTER_HASH_SHIFT 4 #define MLX4_EN_FILTER_EXPIRY_QUOTA 60 #ifdef CONFIG_NET_RX_BUSY_POLL #define LL_EXTENDED_STATS #endif /* vlan valid range */ #define VLAN_MIN_VALUE 1 #define VLAN_MAX_VALUE 4094 /* * OS related constants and tunables */ #define MLX4_EN_WATCHDOG_TIMEOUT (15 * HZ) #define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(PAGE_SIZE) #define MLX4_EN_ALLOC_ORDER get_order(MLX4_EN_ALLOC_SIZE) enum mlx4_en_alloc_type { MLX4_EN_ALLOC_NEW = 0, MLX4_EN_ALLOC_REPLACEMENT = 1, }; /* Maximum ring sizes */ #define MLX4_EN_DEF_TX_QUEUE_SIZE 4096 /* Minimum packet number till arming the CQ */ #define MLX4_EN_MIN_RX_ARM 2048 #define MLX4_EN_MIN_TX_ARM 2048 /* Maximum ring sizes */ #define MLX4_EN_MAX_TX_SIZE 8192 #define MLX4_EN_MAX_RX_SIZE 8192 /* Minimum ring sizes */ #define MLX4_EN_MIN_RX_SIZE (4096 / TXBB_SIZE) #define MLX4_EN_MIN_TX_SIZE (4096 / TXBB_SIZE) #define MLX4_EN_SMALL_PKT_SIZE 64 #define MLX4_EN_MAX_TX_RING_P_UP 32 #define MLX4_EN_NUM_UP 1 #define MAX_TX_RINGS (MLX4_EN_MAX_TX_RING_P_UP * \ MLX4_EN_NUM_UP) #define MLX4_EN_DEF_TX_RING_SIZE 1024 #define MLX4_EN_DEF_RX_RING_SIZE 1024 /* Target number of bytes to coalesce with interrupt moderation */ #define MLX4_EN_RX_COAL_TARGET 0x20000 #define MLX4_EN_RX_COAL_TIME 0x10 #define MLX4_EN_TX_COAL_PKTS 64 #define MLX4_EN_TX_COAL_TIME 64 #define MLX4_EN_RX_RATE_LOW 400000 #define MLX4_EN_RX_COAL_TIME_LOW 0 #define MLX4_EN_RX_RATE_HIGH 450000 #define MLX4_EN_RX_COAL_TIME_HIGH 128 #define MLX4_EN_RX_SIZE_THRESH 1024 #define MLX4_EN_RX_RATE_THRESH (1000000 / MLX4_EN_RX_COAL_TIME_HIGH) #define MLX4_EN_SAMPLE_INTERVAL 0 #define MLX4_EN_AVG_PKT_SMALL 256 #define MLX4_EN_AUTO_CONF 0xffff #define MLX4_EN_DEF_RX_PAUSE 1 #define MLX4_EN_DEF_TX_PAUSE 1 /* Interval between successive polls in the Tx routine when polling is used instead of interrupts (in per-core Tx rings) - should be power of 2 */ #define MLX4_EN_TX_POLL_MODER 16 #define MLX4_EN_TX_POLL_TIMEOUT (HZ / 4) #define MLX4_EN_64_ALIGN (64 - NET_SKB_PAD) #define SMALL_PACKET_SIZE (256 - NET_IP_ALIGN) #define HEADER_COPY_SIZE (128) #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETHER_HDR_LEN) #define MLX4_EN_MIN_MTU 46 #define ETH_BCAST 0xffffffffffffULL #define MLX4_EN_LOOPBACK_RETRIES 5 #define MLX4_EN_LOOPBACK_TIMEOUT 100 #ifdef MLX4_EN_PERF_STAT /* Number of samples to 'average' */ #define AVG_SIZE 128 #define AVG_FACTOR 1024 #define INC_PERF_COUNTER(cnt) (++(cnt)) #define ADD_PERF_COUNTER(cnt, add) ((cnt) += (add)) #define AVG_PERF_COUNTER(cnt, sample) \ ((cnt) = ((cnt) * (AVG_SIZE - 1) + (sample) * AVG_FACTOR) / AVG_SIZE) #define GET_PERF_COUNTER(cnt) (cnt) #define GET_AVG_PERF_COUNTER(cnt) ((cnt) / AVG_FACTOR) #else #define INC_PERF_COUNTER(cnt) do {} while (0) #define ADD_PERF_COUNTER(cnt, add) do {} while (0) #define AVG_PERF_COUNTER(cnt, sample) do {} while (0) #define GET_PERF_COUNTER(cnt) (0) #define GET_AVG_PERF_COUNTER(cnt) (0) #endif /* MLX4_EN_PERF_STAT */ /* * Configurables */ enum cq_type { RX = 0, TX = 1, }; /* * Useful macros */ #define ROUNDUP_LOG2(x) ilog2(roundup_pow_of_two(x)) #define XNOR(x, y) (!(x) == !(y)) #define ILLEGAL_MAC(addr) (addr == 0xffffffffffffULL || addr == 0x0) struct mlx4_en_tx_info { bus_dmamap_t dma_map; struct mbuf *mb; u32 nr_txbb; u32 nr_bytes; }; #define MLX4_EN_BIT_DESC_OWN 0x80000000 #define CTRL_SIZE sizeof(struct mlx4_wqe_ctrl_seg) #define MLX4_EN_MEMTYPE_PAD 0x100 #define DS_SIZE sizeof(struct mlx4_wqe_data_seg) struct mlx4_en_tx_desc { struct mlx4_wqe_ctrl_seg ctrl; union { struct mlx4_wqe_data_seg data; /* at least one data segment */ struct mlx4_wqe_lso_seg lso; struct mlx4_wqe_inline_seg inl; }; }; #define MLX4_EN_USE_SRQ 0x01000000 #define MLX4_EN_RX_BUDGET 64 #define MLX4_EN_TX_MAX_DESC_SIZE 512 /* bytes */ #define MLX4_EN_TX_MAX_MBUF_SIZE 65536 /* bytes */ #define MLX4_EN_TX_MAX_PAYLOAD_SIZE 65536 /* bytes */ #define MLX4_EN_TX_MAX_MBUF_FRAGS \ ((MLX4_EN_TX_MAX_DESC_SIZE - 128) / DS_SIZE_ALIGNMENT) /* units */ #define MLX4_EN_TX_WQE_MAX_WQEBBS \ (MLX4_EN_TX_MAX_DESC_SIZE / TXBB_SIZE) /* units */ #define MLX4_EN_CX3_LOW_ID 0x1000 #define MLX4_EN_CX3_HIGH_ID 0x1005 struct mlx4_en_tx_ring { spinlock_t tx_lock; bus_dma_tag_t dma_tag; struct mlx4_hwq_resources wqres; u32 size ; /* number of TXBBs */ u32 size_mask; u16 stride; u16 cqn; /* index of port CQ associated with this ring */ u32 prod; u32 cons; u32 buf_size; u32 doorbell_qpn; u8 *buf; u16 poll_cnt; int blocked; struct mlx4_en_tx_info *tx_info; u8 queue_index; cpuset_t affinity_mask; struct buf_ring *br; u32 last_nr_txbb; struct mlx4_qp qp; struct mlx4_qp_context context; int qpn; enum mlx4_qp_state qp_state; struct mlx4_srq dummy; unsigned long bytes; unsigned long packets; unsigned long tx_csum; unsigned long queue_stopped; unsigned long oversized_packets; unsigned long wake_queue; struct mlx4_bf bf; bool bf_enabled; int hwtstamp_tx_type; spinlock_t comp_lock; int inline_thold; u64 watchdog_time; }; struct mlx4_en_rx_desc { /* actual number of entries depends on rx ring stride */ struct mlx4_wqe_data_seg data[0]; }; struct mlx4_en_rx_mbuf { bus_dmamap_t dma_map; struct mbuf *mbuf; }; struct mlx4_en_rx_spare { bus_dmamap_t dma_map; struct mbuf *mbuf; u64 paddr_be; }; struct mlx4_en_rx_ring { struct mlx4_hwq_resources wqres; bus_dma_tag_t dma_tag; struct mlx4_en_rx_spare spare; u32 size ; /* number of Rx descs*/ u32 actual_size; u32 size_mask; u16 stride; u16 log_stride; u16 cqn; /* index of port CQ associated with this ring */ u32 prod; u32 cons; u32 buf_size; u8 fcs_del; u16 rx_alloc_order; u32 rx_alloc_size; u32 rx_buf_size; u32 rx_mb_size; int qpn; u8 *buf; struct mlx4_en_rx_mbuf *mbuf; unsigned long errors; unsigned long bytes; unsigned long packets; #ifdef LL_EXTENDED_STATS unsigned long yields; unsigned long misses; unsigned long cleaned; #endif unsigned long csum_ok; unsigned long csum_none; int hwtstamp_rx_filter; int numa_node; struct lro_ctrl lro; }; static inline int mlx4_en_can_lro(__be16 status) { const __be16 status_all = cpu_to_be16( MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV4F | MLX4_CQE_STATUS_IPV6 | MLX4_CQE_STATUS_IPV4OPT | MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP | MLX4_CQE_STATUS_IPOK); const __be16 status_ipv4_ipok_tcp = cpu_to_be16( MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPOK | MLX4_CQE_STATUS_TCP); const __be16 status_ipv6_ipok_tcp = cpu_to_be16( MLX4_CQE_STATUS_IPV6 | MLX4_CQE_STATUS_IPOK | MLX4_CQE_STATUS_TCP); status &= status_all; return (status == status_ipv4_ipok_tcp || status == status_ipv6_ipok_tcp); } struct mlx4_en_cq { struct mlx4_cq mcq; struct mlx4_hwq_resources wqres; int ring; spinlock_t lock; struct net_device *dev; /* Per-core Tx cq processing support */ struct timer_list timer; int size; int buf_size; unsigned vector; enum cq_type is_tx; u16 moder_time; u16 moder_cnt; struct mlx4_cqe *buf; struct task cq_task; struct taskqueue *tq; #define MLX4_EN_OPCODE_ERROR 0x1e u32 tot_rx; u32 tot_tx; u32 curr_poll_rx_cpu_id; #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int state; #define MLX4_EN_CQ_STATEIDLE 0 #define MLX4_EN_CQ_STATENAPI 1 /* NAPI owns this CQ */ #define MLX4_EN_CQ_STATEPOLL 2 /* poll owns this CQ */ #define MLX4_CQ_LOCKED (MLX4_EN_CQ_STATENAPI | MLX4_EN_CQ_STATEPOLL) #define MLX4_EN_CQ_STATENAPI_YIELD 4 /* NAPI yielded this CQ */ #define MLX4_EN_CQ_STATEPOLL_YIELD 8 /* poll yielded this CQ */ #define CQ_YIELD (MLX4_EN_CQ_STATENAPI_YIELD | MLX4_EN_CQ_STATEPOLL_YIELD) #define CQ_USER_PEND (MLX4_EN_CQ_STATEPOLL | MLX4_EN_CQ_STATEPOLL_YIELD) spinlock_t poll_lock; /* protects from LLS/napi conflicts */ #endif /* CONFIG_NET_RX_BUSY_POLL */ }; struct mlx4_en_port_profile { u32 flags; u32 tx_ring_num; u32 rx_ring_num; u32 tx_ring_size; u32 rx_ring_size; u8 rx_pause; u8 rx_ppp; u8 tx_pause; u8 tx_ppp; int rss_rings; }; struct mlx4_en_profile { int rss_xor; int udp_rss; u8 rss_mask; u32 active_ports; u32 small_pkt_int; u8 no_reset; u8 num_tx_rings_p_up; struct mlx4_en_port_profile prof[MLX4_MAX_PORTS + 1]; }; struct mlx4_en_dev { struct mlx4_dev *dev; struct pci_dev *pdev; struct mutex state_lock; struct net_device *pndev[MLX4_MAX_PORTS + 1]; u32 port_cnt; bool device_up; struct mlx4_en_profile profile; u32 LSO_support; struct workqueue_struct *workqueue; struct device *dma_device; void __iomem *uar_map; struct mlx4_uar priv_uar; struct mlx4_mr mr; u32 priv_pdn; spinlock_t uar_lock; u8 mac_removed[MLX4_MAX_PORTS + 1]; unsigned long last_overflow_check; unsigned long overflow_period; }; struct mlx4_en_rss_map { int base_qpn; struct mlx4_qp qps[MAX_RX_RINGS]; enum mlx4_qp_state state[MAX_RX_RINGS]; struct mlx4_qp indir_qp; enum mlx4_qp_state indir_state; }; struct mlx4_en_port_state { int link_state; int link_speed; int transciver; int autoneg; }; enum mlx4_en_mclist_act { MCLIST_NONE, MCLIST_REM, MCLIST_ADD, }; struct mlx4_en_mc_list { struct list_head list; enum mlx4_en_mclist_act action; u8 addr[ETH_ALEN]; u64 reg_id; }; #ifdef CONFIG_MLX4_EN_DCB /* Minimal TC BW - setting to 0 will block traffic */ #define MLX4_EN_BW_MIN 1 #define MLX4_EN_BW_MAX 100 /* Utilize 100% of the line */ #define MLX4_EN_TC_ETS 7 #endif enum { MLX4_EN_FLAG_PROMISC = (1 << 0), MLX4_EN_FLAG_MC_PROMISC = (1 << 1), /* whether we need to enable hardware loopback by putting dmac * in Tx WQE */ MLX4_EN_FLAG_ENABLE_HW_LOOPBACK = (1 << 2), /* whether we need to drop packets that hardware loopback-ed */ MLX4_EN_FLAG_RX_FILTER_NEEDED = (1 << 3), MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4), #ifdef CONFIG_MLX4_EN_DCB MLX4_EN_FLAG_DCB_ENABLED = (1 << 5) #endif }; #define MLX4_EN_MAC_HASH_SIZE (1 << BITS_PER_BYTE) #define MLX4_EN_MAC_HASH_IDX 5 struct en_port { struct kobject kobj; struct mlx4_dev *dev; u8 port_num; u8 vport_num; }; struct mlx4_en_priv { struct mlx4_en_dev *mdev; struct mlx4_en_port_profile *prof; struct net_device *dev; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct mlx4_en_port_state port_state; spinlock_t stats_lock; /* To allow rules removal while port is going down */ struct list_head ethtool_list; unsigned long last_moder_packets[MAX_RX_RINGS]; unsigned long last_moder_tx_packets; unsigned long last_moder_bytes[MAX_RX_RINGS]; unsigned long last_moder_jiffies; int last_moder_time[MAX_RX_RINGS]; u16 rx_usecs; u16 rx_frames; u16 tx_usecs; u16 tx_frames; u32 pkt_rate_low; u32 rx_usecs_low; u32 pkt_rate_high; u32 rx_usecs_high; u32 sample_interval; u32 adaptive_rx_coal; u32 msg_enable; u32 loopback_ok; u32 validate_loopback; struct mlx4_hwq_resources res; int link_state; int last_link_state; bool port_up; int port; int registered; int allocated; int stride; unsigned char current_mac[ETH_ALEN + 2]; u64 mac; int mac_index; unsigned max_mtu; int base_qpn; int cqe_factor; struct mlx4_en_rss_map rss_map; u32 flags; u8 num_tx_rings_p_up; u32 tx_ring_num; u32 rx_ring_num; u32 rx_mb_size; u16 rx_alloc_order; u32 rx_alloc_size; u32 rx_buf_size; struct mlx4_en_tx_ring **tx_ring; struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS]; struct mlx4_en_cq **tx_cq; struct mlx4_en_cq *rx_cq[MAX_RX_RINGS]; struct mlx4_qp drop_qp; struct work_struct rx_mode_task; struct work_struct watchdog_task; struct work_struct linkstate_task; struct delayed_work stats_task; struct delayed_work service_task; struct mlx4_en_perf_stats pstats; struct mlx4_en_pkt_stats pkstats; struct mlx4_en_pkt_stats pkstats_last; struct mlx4_en_flow_stats flowstats[MLX4_NUM_PRIORITIES]; struct mlx4_en_port_stats port_stats; struct mlx4_en_vport_stats vport_stats; struct mlx4_en_vf_stats vf_stats; DECLARE_BITMAP(stats_bitmap, NUM_ALL_STATS); struct list_head mc_list; struct list_head curr_list; u64 broadcast_id; struct mlx4_en_stat_out_mbox hw_stats; int vids[128]; bool wol; struct device *ddev; struct dentry *dev_root; u32 counter_index; eventhandler_tag vlan_attach; eventhandler_tag vlan_detach; struct callout watchdog_timer; struct ifmedia media; volatile int blocked; struct sysctl_oid *sysctl; struct sysctl_ctx_list conf_ctx; struct sysctl_ctx_list stat_ctx; #define MLX4_EN_MAC_HASH_IDX 5 struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE]; #ifdef CONFIG_MLX4_EN_DCB struct ieee_ets ets; u16 maxrate[IEEE_8021QAZ_MAX_TCS]; u8 dcbx_cap; #endif #ifdef CONFIG_RFS_ACCEL spinlock_t filters_lock; int last_filter_id; struct list_head filters; struct hlist_head filter_hash[1 << MLX4_EN_FILTER_HASH_SHIFT]; #endif struct en_port *vf_ports[MLX4_MAX_NUM_VF]; unsigned long last_ifq_jiffies; u64 if_counters_rx_errors; u64 if_counters_rx_no_buffer; }; enum mlx4_en_wol { MLX4_EN_WOL_MAGIC = (1ULL << 61), MLX4_EN_WOL_ENABLED = (1ULL << 62), }; struct mlx4_mac_entry { struct hlist_node hlist; unsigned char mac[ETH_ALEN + 2]; u64 reg_id; }; #ifdef CONFIG_NET_RX_BUSY_POLL static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) { spin_lock_init(&cq->poll_lock); cq->state = MLX4_EN_CQ_STATEIDLE; } /* called from the device poll rutine to get ownership of a cq */ static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq) { int rc = true; spin_lock(&cq->poll_lock); if (cq->state & MLX4_CQ_LOCKED) { WARN_ON(cq->state & MLX4_EN_CQ_STATENAPI); cq->state |= MLX4_EN_CQ_STATENAPI_YIELD; rc = false; } else /* we don't care if someone yielded */ cq->state = MLX4_EN_CQ_STATENAPI; spin_unlock(&cq->poll_lock); return rc; } /* returns true is someone tried to get the cq while napi had it */ static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq) { int rc = false; spin_lock(&cq->poll_lock); WARN_ON(cq->state & (MLX4_EN_CQ_STATEPOLL | MLX4_EN_CQ_STATENAPI_YIELD)); if (cq->state & MLX4_EN_CQ_STATEPOLL_YIELD) rc = true; cq->state = MLX4_EN_CQ_STATEIDLE; spin_unlock(&cq->poll_lock); return rc; } /* called from mlx4_en_low_latency_poll() */ static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq) { int rc = true; spin_lock_bh(&cq->poll_lock); if ((cq->state & MLX4_CQ_LOCKED)) { struct net_device *dev = cq->dev; struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring]; cq->state |= MLX4_EN_CQ_STATEPOLL_YIELD; rc = false; #ifdef LL_EXTENDED_STATS rx_ring->yields++; #endif } else /* preserve yield marks */ cq->state |= MLX4_EN_CQ_STATEPOLL; spin_unlock_bh(&cq->poll_lock); return rc; } /* returns true if someone tried to get the cq while it was locked */ static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) { int rc = false; spin_lock_bh(&cq->poll_lock); WARN_ON(cq->state & (MLX4_EN_CQ_STATENAPI)); if (cq->state & MLX4_EN_CQ_STATEPOLL_YIELD) rc = true; cq->state = MLX4_EN_CQ_STATEIDLE; spin_unlock_bh(&cq->poll_lock); return rc; } /* true if a socket is polling, even if it did not get the lock */ static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq) { WARN_ON(!(cq->state & MLX4_CQ_LOCKED)); return cq->state & CQ_USER_PEND; } #else static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) { } static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq) { return true; } static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq) { return false; } static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq) { return false; } static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) { return false; } static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq) { return false; } #endif /* CONFIG_NET_RX_BUSY_POLL */ #define MLX4_EN_WOL_DO_MODIFY (1ULL << 63) void mlx4_en_destroy_netdev(struct net_device *dev); int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_port_profile *prof); int mlx4_en_start_port(struct net_device *dev); void mlx4_en_stop_port(struct net_device *dev); void mlx4_en_free_resources(struct mlx4_en_priv *priv); int mlx4_en_alloc_resources(struct mlx4_en_priv *priv); int mlx4_en_pre_config(struct mlx4_en_priv *priv); int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq, int entries, int ring, enum cq_type mode, int node); void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq); int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, int cq_idx); void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); void mlx4_en_tx_irq(struct mlx4_cq *mcq); u16 mlx4_en_select_queue(struct net_device *dev, struct mbuf *mb); int mlx4_en_transmit(struct ifnet *dev, struct mbuf *m); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, u32 size, u16 stride, int node, int queue_idx); void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring); int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int cq, int user_prio); void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring); void mlx4_en_qflush(struct ifnet *dev); int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, int node); void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, u16 stride); void mlx4_en_tx_que(void *context, int pending); void mlx4_en_rx_que(void *context, int pending); int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv); void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring); int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget); void mlx4_en_poll_tx_cq(unsigned long data); void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, int is_tx, int rss, int qpn, int cqn, int user_prio, struct mlx4_qp_context *context); void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event); int mlx4_en_map_buffer(struct mlx4_buf *buf); void mlx4_en_unmap_buffer(struct mlx4_buf *buf); void mlx4_en_calc_rx_buf(struct net_device *dev); int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv); void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv); int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv); void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv); int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring); void mlx4_en_rx_irq(struct mlx4_cq *mcq); int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode); int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv); int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset); int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port); int mlx4_en_get_vport_stats(struct mlx4_en_dev *mdev, u8 port); void mlx4_en_create_debug_files(struct mlx4_en_priv *priv); void mlx4_en_delete_debug_files(struct mlx4_en_priv *priv); int mlx4_en_register_debugfs(void); void mlx4_en_unregister_debugfs(void); #ifdef CONFIG_MLX4_EN_DCB extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops; extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops; #endif int mlx4_en_setup_tc(struct net_device *dev, u8 up); #ifdef CONFIG_RFS_ACCEL void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *rx_ring); #endif #define MLX4_EN_NUM_SELF_TEST 5 void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf); void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev); /* * Functions for time stamping */ #define SKBTX_HW_TSTAMP (1 << 0) #define SKBTX_IN_PROGRESS (1 << 2) u64 mlx4_en_get_cqe_ts(struct mlx4_cqe *cqe); /* Functions for caching and restoring statistics */ int mlx4_en_get_sset_count(struct net_device *dev, int sset); void mlx4_en_restore_ethtool_stats(struct mlx4_en_priv *priv, u64 *data); /* * Globals */ extern const struct ethtool_ops mlx4_en_ethtool_ops; /* * Defines for link speed - needed by selftest */ #define MLX4_EN_LINK_SPEED_1G 1000 #define MLX4_EN_LINK_SPEED_10G 10000 #define MLX4_EN_LINK_SPEED_40G 40000 enum { NETIF_MSG_DRV = 0x0001, NETIF_MSG_PROBE = 0x0002, NETIF_MSG_LINK = 0x0004, NETIF_MSG_TIMER = 0x0008, NETIF_MSG_IFDOWN = 0x0010, NETIF_MSG_IFUP = 0x0020, NETIF_MSG_RX_ERR = 0x0040, NETIF_MSG_TX_ERR = 0x0080, NETIF_MSG_TX_QUEUED = 0x0100, NETIF_MSG_INTR = 0x0200, NETIF_MSG_TX_DONE = 0x0400, NETIF_MSG_RX_STATUS = 0x0800, NETIF_MSG_PKTDATA = 0x1000, NETIF_MSG_HW = 0x2000, NETIF_MSG_WOL = 0x4000, }; /* * printk / logging functions */ #define en_print(level, priv, format, arg...) \ { \ if ((priv)->registered) \ printk(level "%s: %s: " format, DRV_NAME, \ (priv->dev)->if_xname, ## arg); \ else \ printk(level "%s: %s: Port %d: " format, \ DRV_NAME, dev_name(&priv->mdev->pdev->dev), \ (priv)->port, ## arg); \ } #define en_dbg(mlevel, priv, format, arg...) \ do { \ if (NETIF_MSG_##mlevel & priv->msg_enable) \ en_print(KERN_DEBUG, priv, format, ##arg); \ } while (0) #define en_warn(priv, format, arg...) \ en_print(KERN_WARNING, priv, format, ##arg) #define en_err(priv, format, arg...) \ en_print(KERN_ERR, priv, format, ##arg) #define en_info(priv, format, arg...) \ en_print(KERN_INFO, priv, format, ## arg) #define mlx4_err(mdev, format, arg...) \ pr_err("%s %s: " format, DRV_NAME, \ dev_name(&mdev->pdev->dev), ##arg) #define mlx4_info(mdev, format, arg...) \ pr_info("%s %s: " format, DRV_NAME, \ dev_name(&mdev->pdev->dev), ##arg) #define mlx4_warn(mdev, format, arg...) \ pr_warning("%s %s: " format, DRV_NAME, \ dev_name(&mdev->pdev->dev), ##arg) #endif