Index: head/sys/dev/mlx4/cq.h =================================================================== --- head/sys/dev/mlx4/cq.h (revision 327863) +++ head/sys/dev/mlx4/cq.h (revision 327864) @@ -1,178 +1,178 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef MLX4_CQ_H #define MLX4_CQ_H #include #include #include struct mlx4_cqe { __be32 vlan_my_qpn; __be32 immed_rss_invalid; __be32 g_mlpath_rqpn; __be16 sl_vid; union { struct { __be16 rlid; __be16 status; u8 ipv6_ext_mask; u8 badfcs_enc; }; u8 smac[ETH_ALEN]; }; __be32 byte_cnt; __be16 wqe_index; __be16 checksum; u8 reserved[3]; u8 owner_sr_opcode; }; struct mlx4_err_cqe { __be32 my_qpn; u32 reserved1[5]; __be16 wqe_index; u8 vendor_err_syndrome; u8 syndrome; u8 reserved2[3]; u8 owner_sr_opcode; }; struct mlx4_ts_cqe { __be32 vlan_my_qpn; __be32 immed_rss_invalid; __be32 g_mlpath_rqpn; __be32 timestamp_hi; __be16 status; u8 ipv6_ext_mask; u8 badfcs_enc; __be32 byte_cnt; __be16 wqe_index; __be16 checksum; u8 reserved; __be16 timestamp_lo; u8 owner_sr_opcode; } __packed; enum { - MLX4_CQE_L2_TUNNEL_IPOK = 1 << 31, + MLX4_CQE_L2_TUNNEL_IPOK = 1U << 31, MLX4_CQE_CVLAN_PRESENT_MASK = 1 << 29, MLX4_CQE_SVLAN_PRESENT_MASK = 1 << 30, MLX4_CQE_L2_TUNNEL = 1 << 27, MLX4_CQE_L2_TUNNEL_CSUM = 1 << 26, MLX4_CQE_L2_TUNNEL_IPV4 = 1 << 25, MLX4_CQE_QPN_MASK = 0xffffff, MLX4_CQE_VID_MASK = 0xfff, }; enum { MLX4_CQE_OWNER_MASK = 0x80, MLX4_CQE_IS_SEND_MASK = 0x40, MLX4_CQE_OPCODE_MASK = 0x1f }; enum { MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR = 0x01, MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR = 0x02, MLX4_CQE_SYNDROME_LOCAL_PROT_ERR = 0x04, MLX4_CQE_SYNDROME_WR_FLUSH_ERR = 0x05, MLX4_CQE_SYNDROME_MW_BIND_ERR = 0x06, MLX4_CQE_SYNDROME_BAD_RESP_ERR = 0x10, MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR = 0x11, MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12, MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR = 0x13, MLX4_CQE_SYNDROME_REMOTE_OP_ERR = 0x14, MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR = 0x15, MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR = 0x16, MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR = 0x22, }; enum { MLX4_CQE_STATUS_IPV4 = 1 << 6, MLX4_CQE_STATUS_IPV4F = 1 << 7, MLX4_CQE_STATUS_IPV6 = 1 << 8, MLX4_CQE_STATUS_IPV4OPT = 1 << 9, MLX4_CQE_STATUS_TCP = 1 << 10, MLX4_CQE_STATUS_UDP = 1 << 11, MLX4_CQE_STATUS_IPOK = 1 << 12, }; enum { MLX4_CQE_LLC = 1, MLX4_CQE_SNAP = 1 << 1, MLX4_CQE_BAD_FCS = 1 << 4, }; static inline void mlx4_cq_arm(struct mlx4_cq *cq, u32 cmd, u8 __iomem *uar_page, spinlock_t *doorbell_lock) { __be32 doorbell[2]; u32 sn; u32 ci; sn = cq->arm_sn & 3; ci = cq->cons_index & 0xffffff; *cq->arm_db = cpu_to_be32(sn << 28 | cmd | ci); /* * Make sure that the doorbell record in host memory is * written before ringing the doorbell via PCI MMIO. */ wmb(); doorbell[0] = cpu_to_be32(sn << 28 | cmd | cq->cqn); doorbell[1] = cpu_to_be32(ci); mlx4_write64(doorbell, uar_page + MLX4_CQ_DOORBELL, doorbell_lock); } static inline void mlx4_cq_set_ci(struct mlx4_cq *cq) { *cq->set_ci_db = cpu_to_be32(cq->cons_index & 0xffffff); } enum { MLX4_CQ_DB_REQ_NOT_SOL = 1 << 24, MLX4_CQ_DB_REQ_NOT = 2 << 24 }; int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq, u16 count, u16 period); int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq, int entries, struct mlx4_mtt *mtt); #endif /* MLX4_CQ_H */ Index: head/sys/dev/mlx4/mlx4_core/mlx4_fw.c =================================================================== --- head/sys/dev/mlx4/mlx4_core/mlx4_fw.c (revision 327863) +++ head/sys/dev/mlx4/mlx4_core/mlx4_fw.c (revision 327864) @@ -1,3053 +1,3053 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define LINUXKPI_PARAM_PREFIX mlx4_ #include #include #include #include #include #include "fw.h" #include "icm.h" enum { MLX4_COMMAND_INTERFACE_MIN_REV = 2, MLX4_COMMAND_INTERFACE_MAX_REV = 3, MLX4_COMMAND_INTERFACE_NEW_PORT_CMDS = 3, }; extern void __buggy_use_of_MLX4_GET(void); extern void __buggy_use_of_MLX4_PUT(void); static bool enable_qos; module_param(enable_qos, bool, 0444); MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: off)"); #define MLX4_GET(dest, source, offset) \ do { \ void *__p = (char *) (source) + (offset); \ typedef struct { u64 value; } __packed u64_p_t; \ u64 val; \ switch (sizeof (dest)) { \ case 1: (dest) = *(u8 *) __p; break; \ case 2: (dest) = be16_to_cpup(__p); break; \ case 4: (dest) = be32_to_cpup(__p); break; \ case 8: val = ((u64_p_t *)__p)->value; \ (dest) = be64_to_cpu(val); break; \ default: __buggy_use_of_MLX4_GET(); \ } \ } while (0) #define MLX4_PUT(dest, source, offset) \ do { \ void *__d = ((char *) (dest) + (offset)); \ switch (sizeof(source)) { \ case 1: *(u8 *) __d = (source); break; \ case 2: *(__be16 *) __d = cpu_to_be16(source); break; \ case 4: *(__be32 *) __d = cpu_to_be32(source); break; \ case 8: *(__be64 *) __d = cpu_to_be64(source); break; \ default: __buggy_use_of_MLX4_PUT(); \ } \ } while (0) static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) { static const char *fname[] = { [ 0] = "RC transport", [ 1] = "UC transport", [ 2] = "UD transport", [ 3] = "XRC transport", [ 6] = "SRQ support", [ 7] = "IPoIB checksum offload", [ 8] = "P_Key violation counter", [ 9] = "Q_Key violation counter", [12] = "Dual Port Different Protocol (DPDP) support", [15] = "Big LSO headers", [16] = "MW support", [17] = "APM support", [18] = "Atomic ops support", [19] = "Raw multicast support", [20] = "Address vector port checking support", [21] = "UD multicast support", [30] = "IBoE support", [32] = "Unicast loopback support", [34] = "FCS header control", [37] = "Wake On LAN (port1) support", [38] = "Wake On LAN (port2) support", [40] = "UDP RSS support", [41] = "Unicast VEP steering support", [42] = "Multicast VEP steering support", [48] = "Counters support", [52] = "RSS IP fragments support", [53] = "Port ETS Scheduler support", [55] = "Port link type sensing support", [59] = "Port management change event support", [61] = "64 byte EQE support", [62] = "64 byte CQE support", }; int i; mlx4_dbg(dev, "DEV_CAP flags:\n"); for (i = 0; i < ARRAY_SIZE(fname); ++i) if (fname[i] && (flags & (1LL << i))) mlx4_dbg(dev, " %s\n", fname[i]); } static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) { static const char * const fname[] = { [0] = "RSS support", [1] = "RSS Toeplitz Hash Function support", [2] = "RSS XOR Hash Function support", [3] = "Device managed flow steering support", [4] = "Automatic MAC reassignment support", [5] = "Time stamping support", [6] = "VST (control vlan insertion/stripping) support", [7] = "FSM (MAC anti-spoofing) support", [8] = "Dynamic QP updates support", [9] = "Device managed flow steering IPoIB support", [10] = "TCP/IP offloads/flow-steering for VXLAN support", [11] = "MAD DEMUX (Secure-Host) support", [12] = "Large cache line (>64B) CQE stride support", [13] = "Large cache line (>64B) EQE stride support", [14] = "Ethernet protocol control support", [15] = "Ethernet Backplane autoneg support", [16] = "CONFIG DEV support", [17] = "Asymmetric EQs support", [18] = "More than 80 VFs support", [19] = "Performance optimized for limited rule configuration flow steering support", [20] = "Recoverable error events support", [21] = "Port Remap support", [22] = "QCN support", [23] = "QP rate limiting support", [24] = "Ethernet Flow control statistics support", [25] = "Granular QoS per VF support", [26] = "Port ETS Scheduler support", [27] = "Port beacon support", [28] = "RX-ALL support", [29] = "802.1ad offload support", [31] = "Modifying loopback source checks using UPDATE_QP support", [32] = "Loopback source checks support", [33] = "RoCEv2 support", [34] = "DMFS Sniffer support (UC & MC)", [35] = "QinQ VST mode support", [36] = "sl to vl mapping table change event support" }; int i; for (i = 0; i < ARRAY_SIZE(fname); ++i) if (fname[i] && (flags & (1LL << i))) mlx4_dbg(dev, " %s\n", fname[i]); } int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg) { struct mlx4_cmd_mailbox *mailbox; u32 *inbox; int err = 0; #define MOD_STAT_CFG_IN_SIZE 0x100 #define MOD_STAT_CFG_PG_SZ_M_OFFSET 0x002 #define MOD_STAT_CFG_PG_SZ_OFFSET 0x003 mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); inbox = mailbox->buf; MLX4_PUT(inbox, cfg->log_pg_sz, MOD_STAT_CFG_PG_SZ_OFFSET); MLX4_PUT(inbox, cfg->log_pg_sz_m, MOD_STAT_CFG_PG_SZ_M_OFFSET); err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_QUERY_FUNC(struct mlx4_dev *dev, struct mlx4_func *func, int slave) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; u8 in_modifier; u8 field; u16 field16; int err; #define QUERY_FUNC_BUS_OFFSET 0x00 #define QUERY_FUNC_DEVICE_OFFSET 0x01 #define QUERY_FUNC_FUNCTION_OFFSET 0x01 #define QUERY_FUNC_PHYSICAL_FUNCTION_OFFSET 0x03 #define QUERY_FUNC_RSVD_EQS_OFFSET 0x04 #define QUERY_FUNC_MAX_EQ_OFFSET 0x06 #define QUERY_FUNC_RSVD_UARS_OFFSET 0x0b mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; in_modifier = slave; err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, 0, MLX4_CMD_QUERY_FUNC, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) goto out; MLX4_GET(field, outbox, QUERY_FUNC_BUS_OFFSET); func->bus = field & 0xf; MLX4_GET(field, outbox, QUERY_FUNC_DEVICE_OFFSET); func->device = field & 0xf1; MLX4_GET(field, outbox, QUERY_FUNC_FUNCTION_OFFSET); func->function = field & 0x7; MLX4_GET(field, outbox, QUERY_FUNC_PHYSICAL_FUNCTION_OFFSET); func->physical_function = field & 0xf; MLX4_GET(field16, outbox, QUERY_FUNC_RSVD_EQS_OFFSET); func->rsvd_eqs = field16 & 0xffff; MLX4_GET(field16, outbox, QUERY_FUNC_MAX_EQ_OFFSET); func->max_eq = field16 & 0xffff; MLX4_GET(field, outbox, QUERY_FUNC_RSVD_UARS_OFFSET); func->rsvd_uars = field & 0x0f; mlx4_dbg(dev, "Bus: %d, Device: %d, Function: %d, Physical function: %d, Max EQs: %d, Reserved EQs: %d, Reserved UARs: %d\n", func->bus, func->device, func->function, func->physical_function, func->max_eq, func->rsvd_eqs, func->rsvd_uars); out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } static int mlx4_activate_vst_qinq(struct mlx4_priv *priv, int slave, int port) { struct mlx4_vport_oper_state *vp_oper; struct mlx4_vport_state *vp_admin; int err; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; if (vp_admin->default_vlan != vp_oper->state.default_vlan) { err = __mlx4_register_vlan(&priv->dev, port, vp_admin->default_vlan, &vp_oper->vlan_idx); if (err) { vp_oper->vlan_idx = NO_INDX; mlx4_warn(&priv->dev, "No vlan resources slave %d, port %d\n", slave, port); return err; } mlx4_dbg(&priv->dev, "alloc vlan %d idx %d slave %d port %d\n", (int)(vp_oper->state.default_vlan), vp_oper->vlan_idx, slave, port); } vp_oper->state.vlan_proto = vp_admin->vlan_proto; vp_oper->state.default_vlan = vp_admin->default_vlan; vp_oper->state.default_qos = vp_admin->default_qos; return 0; } static int mlx4_handle_vst_qinq(struct mlx4_priv *priv, int slave, int port) { struct mlx4_vport_oper_state *vp_oper; struct mlx4_slave_state *slave_state; struct mlx4_vport_state *vp_admin; int err; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; slave_state = &priv->mfunc.master.slave_state[slave]; if ((vp_admin->vlan_proto != htons(ETH_P_8021AD)) || (!slave_state->active)) return 0; if (vp_oper->state.vlan_proto == vp_admin->vlan_proto && vp_oper->state.default_vlan == vp_admin->default_vlan && vp_oper->state.default_qos == vp_admin->default_qos) return 0; if (!slave_state->vst_qinq_supported) { /* Warn and revert the request to set vst QinQ mode */ vp_admin->vlan_proto = vp_oper->state.vlan_proto; vp_admin->default_vlan = vp_oper->state.default_vlan; vp_admin->default_qos = vp_oper->state.default_qos; mlx4_warn(&priv->dev, "Slave %d does not support VST QinQ mode\n", slave); return 0; } err = mlx4_activate_vst_qinq(priv, slave, port); return err; } int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); u8 field, port; u32 size, proxy_qp, qkey; int err = 0; struct mlx4_func func; #define QUERY_FUNC_CAP_FLAGS_OFFSET 0x0 #define QUERY_FUNC_CAP_NUM_PORTS_OFFSET 0x1 #define QUERY_FUNC_CAP_PF_BHVR_OFFSET 0x4 #define QUERY_FUNC_CAP_FMR_OFFSET 0x8 #define QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP 0x10 #define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP 0x14 #define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP 0x18 #define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP 0x20 #define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP 0x24 #define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP 0x28 #define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c #define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0x30 #define QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET 0x48 #define QUERY_FUNC_CAP_QP_QUOTA_OFFSET 0x50 #define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET 0x54 #define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET 0x58 #define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET 0x60 #define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x64 #define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x68 #define QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET 0x6c #define QUERY_FUNC_CAP_FMR_FLAG 0x80 #define QUERY_FUNC_CAP_FLAG_RDMA 0x40 #define QUERY_FUNC_CAP_FLAG_ETH 0x80 #define QUERY_FUNC_CAP_FLAG_QUOTAS 0x10 #define QUERY_FUNC_CAP_FLAG_RESD_LKEY 0x08 #define QUERY_FUNC_CAP_FLAG_VALID_MAILBOX 0x04 #define QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG (1UL << 31) #define QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG (1UL << 30) /* when opcode modifier = 1 */ #define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3 #define QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET 0x4 #define QUERY_FUNC_CAP_FLAGS0_OFFSET 0x8 #define QUERY_FUNC_CAP_FLAGS1_OFFSET 0xc #define QUERY_FUNC_CAP_QP0_TUNNEL 0x10 #define QUERY_FUNC_CAP_QP0_PROXY 0x14 #define QUERY_FUNC_CAP_QP1_TUNNEL 0x18 #define QUERY_FUNC_CAP_QP1_PROXY 0x1c #define QUERY_FUNC_CAP_PHYS_PORT_ID 0x28 #define QUERY_FUNC_CAP_FLAGS1_FORCE_MAC 0x40 #define QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN 0x80 #define QUERY_FUNC_CAP_FLAGS1_NIC_INFO 0x10 #define QUERY_FUNC_CAP_VF_ENABLE_QP0 0x08 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 #define QUERY_FUNC_CAP_PHV_BIT 0x40 #define QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE 0x20 #define QUERY_FUNC_CAP_SUPPORTS_VST_QINQ BIT(30) #define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS BIT(31) if (vhcr->op_modifier == 1) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); int converted_port = mlx4_slave_convert_port( dev, slave, vhcr->in_modifier); struct mlx4_vport_oper_state *vp_oper; if (converted_port < 0) return -EINVAL; vhcr->in_modifier = converted_port; /* phys-port = logical-port */ field = vhcr->in_modifier - find_first_bit(actv_ports.ports, dev->caps.num_ports); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); port = vhcr->in_modifier; proxy_qp = dev->phys_caps.base_proxy_sqpn + 8 * slave + port - 1; /* Set nic_info bit to mark new fields support */ field = QUERY_FUNC_CAP_FLAGS1_NIC_INFO; if (mlx4_vf_smi_enabled(dev, slave, port) && !mlx4_get_parav_qkey(dev, proxy_qp, &qkey)) { field |= QUERY_FUNC_CAP_VF_ENABLE_QP0; MLX4_PUT(outbox->buf, qkey, QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET); } MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET); /* size is now the QP number */ size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + port - 1; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL); size += 2; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL); MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP0_PROXY); proxy_qp += 2; MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP1_PROXY); MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier], QUERY_FUNC_CAP_PHYS_PORT_ID); vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; err = mlx4_handle_vst_qinq(priv, slave, port); if (err) return err; field = 0; if (dev->caps.phv_bit[port]) field |= QUERY_FUNC_CAP_PHV_BIT; if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) field |= QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE; MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS0_OFFSET); } else if (vhcr->op_modifier == 0) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); struct mlx4_slave_state *slave_state = &priv->mfunc.master.slave_state[slave]; /* enable rdma and ethernet interfaces, new quota locations, * and reserved lkey */ field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA | QUERY_FUNC_CAP_FLAG_QUOTAS | QUERY_FUNC_CAP_FLAG_VALID_MAILBOX | QUERY_FUNC_CAP_FLAG_RESD_LKEY); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS_OFFSET); field = min( bitmap_weight(actv_ports.ports, dev->caps.num_ports), dev->caps.num_ports); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); size = dev->caps.function_caps; /* set PF behaviours */ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET); field = 0; /* protected FMR support not available as yet */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FMR_OFFSET); size = priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); size = dev->caps.num_qps; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP); size = priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); size = dev->caps.num_srqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP); size = priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); size = dev->caps.num_cqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP); if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) || mlx4_QUERY_FUNC(dev, &func, slave)) { size = vhcr->in_modifier & QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ? dev->caps.num_eqs : rounddown_pow_of_two(dev->caps.num_eqs); MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); size = dev->caps.reserved_eqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); } else { size = vhcr->in_modifier & QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ? func.max_eq : rounddown_pow_of_two(func.max_eq); MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); size = func.rsvd_eqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); } size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); size = dev->caps.num_mpts; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP); size = priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); size = dev->caps.num_mtts; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP); size = dev->caps.num_mgms + dev->caps.num_amgms; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP); size = QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG | QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET); size = dev->caps.reserved_lkey + ((slave << 8) & 0xFF00); MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET); if (vhcr->in_modifier & QUERY_FUNC_CAP_SUPPORTS_VST_QINQ) slave_state->vst_qinq_supported = true; } else err = -EINVAL; return err; } int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, struct mlx4_func_cap *func_cap) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; u8 field, op_modifier; u32 size, qkey; int err = 0, quotas = 0; u32 in_modifier; u32 slave_caps; op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ slave_caps = QUERY_FUNC_CAP_SUPPORTS_VST_QINQ | QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS; in_modifier = op_modifier ? gen_or_port : slave_caps; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, op_modifier, MLX4_CMD_QUERY_FUNC_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) goto out; outbox = mailbox->buf; if (!op_modifier) { MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET); if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) { mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n"); err = -EPROTONOSUPPORT; goto out; } func_cap->flags = field; quotas = !!(func_cap->flags & QUERY_FUNC_CAP_FLAG_QUOTAS); MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); func_cap->num_ports = field; MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET); func_cap->pf_context_behaviour = size; if (quotas) { MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); func_cap->qp_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); func_cap->srq_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); func_cap->cq_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); func_cap->mpt_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); func_cap->mtt_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); func_cap->mcg_quota = size & 0xFFFFFF; } else { MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP); func_cap->qp_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP); func_cap->srq_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP); func_cap->cq_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP); func_cap->mpt_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP); func_cap->mtt_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP); func_cap->mcg_quota = size & 0xFFFFFF; } MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET); func_cap->max_eq = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); func_cap->reserved_eq = size & 0xFFFFFF; if (func_cap->flags & QUERY_FUNC_CAP_FLAG_RESD_LKEY) { MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET); func_cap->reserved_lkey = size; } else { func_cap->reserved_lkey = 0; } func_cap->extra_flags = 0; /* Mailbox data from 0x6c and onward should only be treated if * QUERY_FUNC_CAP_FLAG_VALID_MAILBOX is set in func_cap->flags */ if (func_cap->flags & QUERY_FUNC_CAP_FLAG_VALID_MAILBOX) { MLX4_GET(size, outbox, QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET); if (size & QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG) func_cap->extra_flags |= MLX4_QUERY_FUNC_FLAGS_BF_RES_QP; if (size & QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG) func_cap->extra_flags |= MLX4_QUERY_FUNC_FLAGS_A0_RES_QP; } goto out; } /* logical port query */ if (gen_or_port > dev->caps.num_ports) { err = -EINVAL; goto out; } MLX4_GET(func_cap->flags1, outbox, QUERY_FUNC_CAP_FLAGS1_OFFSET); if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) { if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN) { mlx4_err(dev, "VLAN is enforced on this port\n"); err = -EPROTONOSUPPORT; goto out; } if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_FORCE_MAC) { mlx4_err(dev, "Force mac is enabled on this port\n"); err = -EPROTONOSUPPORT; goto out; } } else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) { MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); if (field & QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID) { mlx4_err(dev, "phy_wqe_gid is enforced on this ib port\n"); err = -EPROTONOSUPPORT; goto out; } } MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); func_cap->physical_port = field; if (func_cap->physical_port != gen_or_port) { err = -ENOSYS; goto out; } if (func_cap->flags1 & QUERY_FUNC_CAP_VF_ENABLE_QP0) { MLX4_GET(qkey, outbox, QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET); func_cap->qp0_qkey = qkey; } else { func_cap->qp0_qkey = 0; } MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL); func_cap->qp0_tunnel_qpn = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY); func_cap->qp0_proxy_qpn = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL); func_cap->qp1_tunnel_qpn = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY); func_cap->qp1_proxy_qpn = size & 0xFFFFFF; if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_NIC_INFO) MLX4_GET(func_cap->phys_port_id, outbox, QUERY_FUNC_CAP_PHYS_PORT_ID); MLX4_GET(func_cap->flags0, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); /* All other resources are allocated by the master, but we still report * 'num' and 'reserved' capabilities as follows: * - num remains the maximum resource index * - 'num - reserved' is the total available objects of a resource, but * resource indices may be less than 'reserved' * TODO: set per-resource quotas */ out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } static void disable_unsupported_roce_caps(void *buf); int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; u8 field; u32 field32, flags, ext_flags; u16 size; u16 stat_rate; int err; int i; #define QUERY_DEV_CAP_OUT_SIZE 0x100 #define QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET 0x10 #define QUERY_DEV_CAP_MAX_QP_SZ_OFFSET 0x11 #define QUERY_DEV_CAP_RSVD_QP_OFFSET 0x12 #define QUERY_DEV_CAP_MAX_QP_OFFSET 0x13 #define QUERY_DEV_CAP_RSVD_SRQ_OFFSET 0x14 #define QUERY_DEV_CAP_MAX_SRQ_OFFSET 0x15 #define QUERY_DEV_CAP_RSVD_EEC_OFFSET 0x16 #define QUERY_DEV_CAP_MAX_EEC_OFFSET 0x17 #define QUERY_DEV_CAP_MAX_CQ_SZ_OFFSET 0x19 #define QUERY_DEV_CAP_RSVD_CQ_OFFSET 0x1a #define QUERY_DEV_CAP_MAX_CQ_OFFSET 0x1b #define QUERY_DEV_CAP_MAX_MPT_OFFSET 0x1d #define QUERY_DEV_CAP_RSVD_EQ_OFFSET 0x1e #define QUERY_DEV_CAP_MAX_EQ_OFFSET 0x1f #define QUERY_DEV_CAP_RSVD_MTT_OFFSET 0x20 #define QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET 0x21 #define QUERY_DEV_CAP_RSVD_MRW_OFFSET 0x22 #define QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET 0x23 #define QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET 0x26 #define QUERY_DEV_CAP_MAX_AV_OFFSET 0x27 #define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET 0x29 #define QUERY_DEV_CAP_MAX_RES_QP_OFFSET 0x2b #define QUERY_DEV_CAP_MAX_GSO_OFFSET 0x2d #define QUERY_DEV_CAP_RSS_OFFSET 0x2e #define QUERY_DEV_CAP_MAX_RDMA_OFFSET 0x2f #define QUERY_DEV_CAP_RSZ_SRQ_OFFSET 0x33 #define QUERY_DEV_CAP_PORT_BEACON_OFFSET 0x34 #define QUERY_DEV_CAP_ACK_DELAY_OFFSET 0x35 #define QUERY_DEV_CAP_MTU_WIDTH_OFFSET 0x36 #define QUERY_DEV_CAP_VL_PORT_OFFSET 0x37 #define QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET 0x38 #define QUERY_DEV_CAP_MAX_GID_OFFSET 0x3b #define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET 0x3c #define QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET 0x3e #define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f #define QUERY_DEV_CAP_EXT_FLAGS_OFFSET 0x40 #define QUERY_DEV_CAP_FLAGS_OFFSET 0x44 #define QUERY_DEV_CAP_RSVD_UAR_OFFSET 0x48 #define QUERY_DEV_CAP_UAR_SZ_OFFSET 0x49 #define QUERY_DEV_CAP_PAGE_SZ_OFFSET 0x4b #define QUERY_DEV_CAP_BF_OFFSET 0x4c #define QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET 0x4d #define QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET 0x4e #define QUERY_DEV_CAP_LOG_MAX_BF_PAGES_OFFSET 0x4f #define QUERY_DEV_CAP_MAX_SG_SQ_OFFSET 0x51 #define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET 0x52 #define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET 0x55 #define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET 0x56 #define QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET 0x5D #define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET 0x61 #define QUERY_DEV_CAP_RSVD_MCG_OFFSET 0x62 #define QUERY_DEV_CAP_MAX_MCG_OFFSET 0x63 #define QUERY_DEV_CAP_RSVD_PD_OFFSET 0x64 #define QUERY_DEV_CAP_MAX_PD_OFFSET 0x65 #define QUERY_DEV_CAP_RSVD_XRC_OFFSET 0x66 #define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67 #define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET 0x68 #define QUERY_DEV_CAP_PORT_FLOWSTATS_COUNTERS_OFFSET 0x70 #define QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET 0x70 #define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET 0x74 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77 #define QUERY_DEV_CAP_SL2VL_EVENT_OFFSET 0x78 #define QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE 0x7a #define QUERY_DEV_CAP_ECN_QCN_VER_OFFSET 0x7b #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 #define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82 #define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84 #define QUERY_DEV_CAP_ALTC_ENTRY_SZ_OFFSET 0x86 #define QUERY_DEV_CAP_EQC_ENTRY_SZ_OFFSET 0x88 #define QUERY_DEV_CAP_CQC_ENTRY_SZ_OFFSET 0x8a #define QUERY_DEV_CAP_SRQ_ENTRY_SZ_OFFSET 0x8c #define QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET 0x8e #define QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET 0x90 #define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET 0x92 #define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x94 #define QUERY_DEV_CAP_CONFIG_DEV_OFFSET 0x94 #define QUERY_DEV_CAP_PHV_EN_OFFSET 0x96 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 #define QUERY_DEV_CAP_ETH_BACKPL_OFFSET 0x9c #define QUERY_DEV_CAP_DIAG_RPRT_PER_PORT 0x9c #define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d #define QUERY_DEV_CAP_VXLAN 0x9e #define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0 #define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_BASE_OFFSET 0xa8 #define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET 0xac #define QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET 0xcc #define QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET 0xd0 #define QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET 0xd2 dev_cap->flags2 = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) goto out; if (mlx4_is_mfunc(dev)) disable_unsupported_roce_caps(outbox); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET); dev_cap->reserved_qps = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET); dev_cap->max_qps = 1 << (field & 0x1f); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_SRQ_OFFSET); dev_cap->reserved_srqs = 1 << (field >> 4); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SRQ_OFFSET); dev_cap->max_srqs = 1 << (field & 0x1f); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_CQ_SZ_OFFSET); dev_cap->max_cq_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_CQ_OFFSET); dev_cap->reserved_cqs = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_CQ_OFFSET); dev_cap->max_cqs = 1 << (field & 0x1f); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET); dev_cap->max_mpts = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET); dev_cap->reserved_eqs = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET); dev_cap->max_eqs = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET); dev_cap->reserved_mtts = 1 << (field >> 4); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MRW_OFFSET); dev_cap->reserved_mrws = 1 << (field & 0xf); MLX4_GET(size, outbox, QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET); dev_cap->num_sys_eqs = size & 0xfff; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET); dev_cap->max_requester_per_qp = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET); dev_cap->max_responder_per_qp = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GSO_OFFSET); field &= 0x1f; if (!field) dev_cap->max_gso_sz = 0; else dev_cap->max_gso_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSS_OFFSET); if (field & 0x20) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS_XOR; if (field & 0x10) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS_TOP; field &= 0xf; if (field) { dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS; dev_cap->max_rss_tbl_sz = 1 << field; } else dev_cap->max_rss_tbl_sz = 0; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RDMA_OFFSET); dev_cap->max_rdma_global = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_ACK_DELAY_OFFSET); dev_cap->local_ca_ack_delay = field & 0x1f; MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); dev_cap->num_ports = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET); dev_cap->max_msg_sz = 1 << (field & 0x1f); MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_FLOWSTATS_COUNTERS_OFFSET); if (field & 0x10) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN; MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN; dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f; if (field & 0x20) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER; MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_BEACON_OFFSET); if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_BEACON; MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB; MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET); dev_cap->fs_max_num_qp_per_entry = field; MLX4_GET(field, outbox, QUERY_DEV_CAP_SL2VL_EVENT_OFFSET); if (field & (1 << 5)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT; MLX4_GET(field, outbox, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET); if (field & 0x1) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QCN; MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); dev_cap->stat_rate_support = stat_rate; MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_TS; MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); MLX4_GET(flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET); dev_cap->flags = flags | (u64)ext_flags << 32; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET); dev_cap->reserved_uars = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_UAR_SZ_OFFSET); dev_cap->uar_size = 1 << ((field & 0x3f) + 20); MLX4_GET(field, outbox, QUERY_DEV_CAP_PAGE_SZ_OFFSET); dev_cap->min_page_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_BF_OFFSET); if (field & 0x80) { MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET); dev_cap->bf_reg_size = 1 << (field & 0x1f); MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET); if ((1 << (field & 0x3f)) > (PAGE_SIZE / dev_cap->bf_reg_size)) field = 3; dev_cap->bf_regs_per_page = 1 << (field & 0x3f); } else { dev_cap->bf_reg_size = 0; } MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_SQ_OFFSET); dev_cap->max_sq_sg = field; MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET); dev_cap->max_sq_desc_sz = size; MLX4_GET(field, outbox, QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET); if (field & 0x1) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_MCG_OFFSET); dev_cap->max_qp_per_mcg = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MCG_OFFSET); dev_cap->reserved_mgms = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MCG_OFFSET); dev_cap->max_mcgs = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_PD_OFFSET); dev_cap->reserved_pds = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET); dev_cap->max_pds = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_XRC_OFFSET); dev_cap->reserved_xrcds = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_XRC_OFFSET); dev_cap->max_xrcds = 1 << (field & 0x1f); MLX4_GET(size, outbox, QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET); dev_cap->rdmarc_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET); dev_cap->qpc_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET); dev_cap->aux_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_ALTC_ENTRY_SZ_OFFSET); dev_cap->altc_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_EQC_ENTRY_SZ_OFFSET); dev_cap->eqc_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_CQC_ENTRY_SZ_OFFSET); dev_cap->cqc_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_SRQ_ENTRY_SZ_OFFSET); dev_cap->srq_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET); dev_cap->cmpt_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET); dev_cap->mtt_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET); dev_cap->dmpt_entry_sz = size; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET); dev_cap->max_srq_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_SZ_OFFSET); dev_cap->max_qp_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSZ_SRQ_OFFSET); dev_cap->resize_srq = field & 1; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_RQ_OFFSET); dev_cap->max_rq_sg = field; MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET); dev_cap->max_rq_desc_sz = size; MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE); if (field & (1 << 4)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QOS_VPP; if (field & (1 << 5)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL; if (field & (1 << 6)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CQE_STRIDE; if (field & (1 << 7)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE; MLX4_GET(dev_cap->bmme_flags, outbox, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); if (dev_cap->bmme_flags & MLX4_FLAG_ROCE_V1_V2) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ROCE_V1_V2; if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP; MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET); if (field & 0x20) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CONFIG_DEV; if (field & (1 << 2)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_IGNORE_FCS; MLX4_GET(field, outbox, QUERY_DEV_CAP_PHV_EN_OFFSET); if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PHV_EN; if (field & 0x40) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN; MLX4_GET(dev_cap->reserved_lkey, outbox, QUERY_DEV_CAP_RSVD_LKEY_OFFSET); MLX4_GET(field32, outbox, QUERY_DEV_CAP_ETH_BACKPL_OFFSET); if (field32 & (1 << 0)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP; if (field32 & (1 << 7)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT; MLX4_GET(field32, outbox, QUERY_DEV_CAP_DIAG_RPRT_PER_PORT); if (field32 & (1 << 17)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT; MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC); if (field & (1 << 6)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN; MLX4_GET(field, outbox, QUERY_DEV_CAP_VXLAN); if (field & (1 << 3)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS; if (field & (1 << 5)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG; MLX4_GET(dev_cap->max_icm_sz, outbox, QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET); if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS) MLX4_GET(dev_cap->max_counters, outbox, QUERY_DEV_CAP_MAX_COUNTERS_OFFSET); MLX4_GET(field32, outbox, QUERY_DEV_CAP_MAD_DEMUX_OFFSET); if (field32 & (1 << 0)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_MAD_DEMUX; MLX4_GET(dev_cap->dmfs_high_rate_qpn_base, outbox, QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_BASE_OFFSET); dev_cap->dmfs_high_rate_qpn_base &= MGM_QPN_MASK; MLX4_GET(dev_cap->dmfs_high_rate_qpn_range, outbox, QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET); dev_cap->dmfs_high_rate_qpn_range &= MGM_QPN_MASK; MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET); dev_cap->rl_caps.num_rates = size; if (dev_cap->rl_caps.num_rates) { dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT; MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET); dev_cap->rl_caps.max_val = size & 0xfff; dev_cap->rl_caps.max_unit = size >> 14; MLX4_GET(size, outbox, QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET); dev_cap->rl_caps.min_val = size & 0xfff; dev_cap->rl_caps.min_unit = size >> 14; } MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); if (field32 & (1 << 16)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP; if (field32 & (1 << 18)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB; if (field32 & (1 << 19)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_LB_SRC_CHK; if (field32 & (1 << 26)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL; if (field32 & (1 << 20)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FSM; if (field32 & (1 << 21)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_80_VFS; for (i = 1; i <= dev_cap->num_ports; i++) { err = mlx4_QUERY_PORT(dev, i, dev_cap->port_cap + i); if (err) goto out; } /* * Each UAR has 4 EQ doorbells; so if a UAR is reserved, then * we can't use any EQs whose doorbell falls on that page, * even if the EQ itself isn't reserved. */ if (dev_cap->num_sys_eqs == 0) dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4, dev_cap->reserved_eqs); else dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SYS_EQS; out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } void mlx4_dev_cap_dump(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { if (dev_cap->bf_reg_size > 0) mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n", dev_cap->bf_reg_size, dev_cap->bf_regs_per_page); else mlx4_dbg(dev, "BlueFlame not available\n"); mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n", dev_cap->bmme_flags, dev_cap->reserved_lkey); mlx4_dbg(dev, "Max ICM size %lld MB\n", (unsigned long long) dev_cap->max_icm_sz >> 20); mlx4_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n", dev_cap->max_qps, dev_cap->reserved_qps, dev_cap->qpc_entry_sz); mlx4_dbg(dev, "Max SRQs: %d, reserved SRQs: %d, entry size: %d\n", dev_cap->max_srqs, dev_cap->reserved_srqs, dev_cap->srq_entry_sz); mlx4_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n", dev_cap->max_cqs, dev_cap->reserved_cqs, dev_cap->cqc_entry_sz); mlx4_dbg(dev, "Num sys EQs: %d, max EQs: %d, reserved EQs: %d, entry size: %d\n", dev_cap->num_sys_eqs, dev_cap->max_eqs, dev_cap->reserved_eqs, dev_cap->eqc_entry_sz); mlx4_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n", dev_cap->reserved_mrws, dev_cap->reserved_mtts); mlx4_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n", dev_cap->max_pds, dev_cap->reserved_pds, dev_cap->reserved_uars); mlx4_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n", dev_cap->max_pds, dev_cap->reserved_mgms); mlx4_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n", dev_cap->max_cq_sz, dev_cap->max_qp_sz, dev_cap->max_srq_sz); mlx4_dbg(dev, "Local CA ACK delay: %d, max MTU: %d, port width cap: %d\n", dev_cap->local_ca_ack_delay, 128 << dev_cap->port_cap[1].ib_mtu, dev_cap->port_cap[1].max_port_width); mlx4_dbg(dev, "Max SQ desc size: %d, max SQ S/G: %d\n", dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg); mlx4_dbg(dev, "Max RQ desc size: %d, max RQ S/G: %d\n", dev_cap->max_rq_desc_sz, dev_cap->max_rq_sg); mlx4_dbg(dev, "Max GSO size: %d\n", dev_cap->max_gso_sz); mlx4_dbg(dev, "Max counters: %d\n", dev_cap->max_counters); mlx4_dbg(dev, "Max RSS Table size: %d\n", dev_cap->max_rss_tbl_sz); mlx4_dbg(dev, "DMFS high rate steer QPn base: %d\n", dev_cap->dmfs_high_rate_qpn_base); mlx4_dbg(dev, "DMFS high rate steer QPn range: %d\n", dev_cap->dmfs_high_rate_qpn_range); if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_QP_RATE_LIMIT) { struct mlx4_rate_limit_caps *rl_caps = &dev_cap->rl_caps; mlx4_dbg(dev, "QP Rate-Limit: #rates %d, unit/val max %d/%d, min %d/%d\n", rl_caps->num_rates, rl_caps->max_unit, rl_caps->max_val, rl_caps->min_unit, rl_caps->min_val); } dump_dev_cap_flags(dev, dev_cap->flags); dump_dev_cap_flags2(dev, dev_cap->flags2); } int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_cap) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; u8 field; u32 field32; int err; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) goto out; MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); port_cap->max_vl = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_MTU_WIDTH_OFFSET); port_cap->ib_mtu = field >> 4; port_cap->max_port_width = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GID_OFFSET); port_cap->max_gids = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PKEY_OFFSET); port_cap->max_pkeys = 1 << (field & 0xf); } else { #define QUERY_PORT_SUPPORTED_TYPE_OFFSET 0x00 #define QUERY_PORT_MTU_OFFSET 0x01 #define QUERY_PORT_ETH_MTU_OFFSET 0x02 #define QUERY_PORT_WIDTH_OFFSET 0x06 #define QUERY_PORT_MAX_GID_PKEY_OFFSET 0x07 #define QUERY_PORT_MAX_MACVLAN_OFFSET 0x0a #define QUERY_PORT_MAX_VL_OFFSET 0x0b #define QUERY_PORT_MAC_OFFSET 0x10 #define QUERY_PORT_TRANS_VENDOR_OFFSET 0x18 #define QUERY_PORT_WAVELENGTH_OFFSET 0x1c #define QUERY_PORT_TRANS_CODE_OFFSET 0x20 err = mlx4_cmd_box(dev, 0, mailbox->dma, port, 0, MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (err) goto out; MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET); port_cap->link_state = (field & 0x80) >> 7; port_cap->supported_port_types = field & 3; port_cap->suggested_type = (field >> 3) & 1; port_cap->default_sense = (field >> 4) & 1; port_cap->dmfs_optimized_state = (field >> 5) & 1; MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET); port_cap->ib_mtu = field & 0xf; MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET); port_cap->max_port_width = field & 0xf; MLX4_GET(field, outbox, QUERY_PORT_MAX_GID_PKEY_OFFSET); port_cap->max_gids = 1 << (field >> 4); port_cap->max_pkeys = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_PORT_MAX_VL_OFFSET); port_cap->max_vl = field & 0xf; port_cap->max_tc_eth = field >> 4; MLX4_GET(field, outbox, QUERY_PORT_MAX_MACVLAN_OFFSET); port_cap->log_max_macs = field & 0xf; port_cap->log_max_vlans = field >> 4; MLX4_GET(port_cap->eth_mtu, outbox, QUERY_PORT_ETH_MTU_OFFSET); MLX4_GET(port_cap->def_mac, outbox, QUERY_PORT_MAC_OFFSET); MLX4_GET(field32, outbox, QUERY_PORT_TRANS_VENDOR_OFFSET); port_cap->trans_type = field32 >> 24; port_cap->vendor_oui = field32 & 0xffffff; MLX4_GET(port_cap->wavelength, outbox, QUERY_PORT_WAVELENGTH_OFFSET); MLX4_GET(port_cap->trans_code, outbox, QUERY_PORT_TRANS_CODE_OFFSET); } out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } #define DEV_CAP_EXT_2_FLAG_PFC_COUNTERS (1 << 28) #define DEV_CAP_EXT_2_FLAG_VLAN_CONTROL (1 << 26) #define DEV_CAP_EXT_2_FLAG_80_VFS (1 << 21) #define DEV_CAP_EXT_2_FLAG_FSM (1 << 20) int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { u64 flags; int err = 0; u8 field; u16 field16; u32 bmme_flags, field32; int real_port; int slave_port; int first_port; struct mlx4_active_ports actv_ports; err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; disable_unsupported_roce_caps(outbox->buf); /* add port mng change event capability and disable mw type 1 * unconditionally to slaves */ MLX4_GET(flags, outbox->buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); flags |= MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV; flags &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW; actv_ports = mlx4_get_active_ports(dev, slave); first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports); for (slave_port = 0, real_port = first_port; real_port < first_port + bitmap_weight(actv_ports.ports, dev->caps.num_ports); ++real_port, ++slave_port) { if (flags & (MLX4_DEV_CAP_FLAG_WOL_PORT1 << real_port)) flags |= MLX4_DEV_CAP_FLAG_WOL_PORT1 << slave_port; else flags &= ~(MLX4_DEV_CAP_FLAG_WOL_PORT1 << slave_port); } for (; slave_port < dev->caps.num_ports; ++slave_port) flags &= ~(MLX4_DEV_CAP_FLAG_WOL_PORT1 << slave_port); /* Not exposing RSS IP fragments to guests */ flags &= ~MLX4_DEV_CAP_FLAG_RSS_IP_FRAG; MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_VL_PORT_OFFSET); field &= ~0x0F; field |= bitmap_weight(actv_ports.ports, dev->caps.num_ports) & 0x0F; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_VL_PORT_OFFSET); /* For guests, disable timestamp */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); field &= 0x7f; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); /* For guests, disable vxlan tunneling and QoS support */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_VXLAN); field &= 0xd7; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_VXLAN); /* For guests, disable port BEACON */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_PORT_BEACON_OFFSET); field &= 0x7f; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_PORT_BEACON_OFFSET); /* For guests, report Blueflame disabled */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_BF_OFFSET); field &= 0x7f; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET); /* For guests, disable mw type 2 and port remap*/ MLX4_GET(bmme_flags, outbox->buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; bmme_flags &= ~MLX4_FLAG_PORT_REMAP; MLX4_PUT(outbox->buf, bmme_flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); /* turn off device-managed steering capability if not enabled */ if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) { MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); field &= 0x7f; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); } /* turn off ipoib managed steering for guests */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); field &= ~0x80; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); /* turn off host side virt features (VST, FSM, etc) for guests */ MLX4_GET(field32, outbox->buf, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); field32 &= ~(DEV_CAP_EXT_2_FLAG_VLAN_CONTROL | DEV_CAP_EXT_2_FLAG_80_VFS | DEV_CAP_EXT_2_FLAG_FSM | DEV_CAP_EXT_2_FLAG_PFC_COUNTERS); MLX4_PUT(outbox->buf, field32, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); /* turn off QCN for guests */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET); field &= 0xfe; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_ECN_QCN_VER_OFFSET); /* turn off QP max-rate limiting for guests */ field16 = 0; MLX4_PUT(outbox->buf, field16, QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET); /* turn off QoS per VF support for guests */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE); field &= 0xef; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE); /* turn off ignore FCS feature for guests */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_CONFIG_DEV_OFFSET); field &= 0xfb; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CONFIG_DEV_OFFSET); return 0; } static void disable_unsupported_roce_caps(void *buf) { u32 flags; MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); flags &= ~(1UL << 31); MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); flags &= ~(1UL << 24); MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); MLX4_GET(flags, buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); flags &= ~(MLX4_FLAG_ROCE_V1_V2); MLX4_PUT(buf, flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); } int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); u64 def_mac; u8 port_type; u16 short_field; int err; int port = mlx4_slave_convert_port(dev, slave, vhcr->in_modifier & 0xFF); #define MLX4_VF_PORT_NO_LINK_SENSE_MASK 0xE0 #define MLX4_PORT_LINK_UP_MASK 0x80 #define QUERY_PORT_CUR_MAX_PKEY_OFFSET 0x0c #define QUERY_PORT_CUR_MAX_GID_OFFSET 0x0e if (port < 0) return -EINVAL; /* Protect against untrusted guests: enforce that this is the * QUERY_PORT general query. */ if (vhcr->op_modifier || vhcr->in_modifier & ~0xFF) return -EINVAL; vhcr->in_modifier = port; err = mlx4_cmd_box(dev, 0, outbox->dma, vhcr->in_modifier, 0, MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (!err && dev->caps.function != slave) { def_mac = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac; MLX4_PUT(outbox->buf, def_mac, QUERY_PORT_MAC_OFFSET); /* get port type - currently only eth is enabled */ MLX4_GET(port_type, outbox->buf, QUERY_PORT_SUPPORTED_TYPE_OFFSET); /* No link sensing allowed */ port_type &= MLX4_VF_PORT_NO_LINK_SENSE_MASK; /* set port type to currently operating port type */ port_type |= (dev->caps.port_type[vhcr->in_modifier] & 0x3); if (0 /* IFLA_VF_LINK_STATE_ENABLE == admin_link_state */) port_type |= MLX4_PORT_LINK_UP_MASK; else if (1 /* IFLA_VF_LINK_STATE_DISABLE == admin_link_state */) port_type &= ~MLX4_PORT_LINK_UP_MASK; else if (0 /* IFLA_VF_LINK_STATE_AUTO == admin_link_state && mlx4_is_bonded(dev) */) { int other_port = (port == 1) ? 2 : 1; struct mlx4_port_cap port_cap; err = mlx4_QUERY_PORT(dev, other_port, &port_cap); if (err) goto out; port_type |= (port_cap.link_state << 7); } MLX4_PUT(outbox->buf, port_type, QUERY_PORT_SUPPORTED_TYPE_OFFSET); if (dev->caps.port_type[vhcr->in_modifier] == MLX4_PORT_TYPE_ETH) short_field = mlx4_get_slave_num_gids(dev, slave, port); else short_field = 1; /* slave max gids */ MLX4_PUT(outbox->buf, short_field, QUERY_PORT_CUR_MAX_GID_OFFSET); short_field = dev->caps.pkey_table_len[vhcr->in_modifier]; MLX4_PUT(outbox->buf, short_field, QUERY_PORT_CUR_MAX_PKEY_OFFSET); } out: return err; } int mlx4_get_slave_pkey_gid_tbl_len(struct mlx4_dev *dev, u8 port, int *gid_tbl_len, int *pkey_tbl_len) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; u16 field; int err; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); err = mlx4_cmd_box(dev, 0, mailbox->dma, port, 0, MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); if (err) goto out; outbox = mailbox->buf; MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_GID_OFFSET); *gid_tbl_len = field; MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_PKEY_OFFSET); *pkey_tbl_len = field; out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } EXPORT_SYMBOL(mlx4_get_slave_pkey_gid_tbl_len); int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_icm_iter iter; __be64 *pages; int lg; int nent = 0; int i; int err = 0; int ts = 0, tc = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); pages = mailbox->buf; for (mlx4_icm_first(icm, &iter); !mlx4_icm_last(&iter); mlx4_icm_next(&iter)) { /* * We have to pass pages that are aligned to their * size, so find the least significant 1 in the * address or size and use that as our log2 size. */ lg = ffs(mlx4_icm_addr(&iter) | mlx4_icm_size(&iter)) - 1; if (lg < MLX4_ICM_PAGE_SHIFT) { mlx4_warn(dev, "Got FW area not aligned to %d (%llx/%lx)\n", MLX4_ICM_PAGE_SIZE, (unsigned long long) mlx4_icm_addr(&iter), mlx4_icm_size(&iter)); err = -EINVAL; goto out; } for (i = 0; i < mlx4_icm_size(&iter) >> lg; ++i) { if (virt != -1) { pages[nent * 2] = cpu_to_be64(virt); virt += 1 << lg; } pages[nent * 2 + 1] = cpu_to_be64((mlx4_icm_addr(&iter) + (i << lg)) | (lg - MLX4_ICM_PAGE_SHIFT)); ts += 1 << (lg - 10); ++tc; if (++nent == MLX4_MAILBOX_SIZE / 16) { err = mlx4_cmd(dev, mailbox->dma, nent, 0, op, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (err) goto out; nent = 0; } } } if (nent) err = mlx4_cmd(dev, mailbox->dma, nent, 0, op, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (err) goto out; switch (op) { case MLX4_CMD_MAP_FA: mlx4_dbg(dev, "Mapped %d chunks/%d KB for FW\n", tc, ts); break; case MLX4_CMD_MAP_ICM_AUX: mlx4_dbg(dev, "Mapped %d chunks/%d KB for ICM aux\n", tc, ts); break; case MLX4_CMD_MAP_ICM: mlx4_dbg(dev, "Mapped %d chunks/%d KB at %llx for ICM\n", tc, ts, (unsigned long long) virt - (ts << 10)); break; } out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm) { return mlx4_map_cmd(dev, MLX4_CMD_MAP_FA, icm, -1); } int mlx4_UNMAP_FA(struct mlx4_dev *dev) { return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_FA, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); } int mlx4_RUN_FW(struct mlx4_dev *dev) { return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_RUN_FW, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } int mlx4_QUERY_FW(struct mlx4_dev *dev) { struct mlx4_fw *fw = &mlx4_priv(dev)->fw; struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; struct mlx4_cmd_mailbox *mailbox; u32 *outbox; int err = 0; u64 fw_ver; u16 cmd_if_rev; u8 lg; #define QUERY_FW_OUT_SIZE 0x100 #define QUERY_FW_VER_OFFSET 0x00 #define QUERY_FW_PPF_ID 0x09 #define QUERY_FW_CMD_IF_REV_OFFSET 0x0a #define QUERY_FW_MAX_CMD_OFFSET 0x0f #define QUERY_FW_ERR_START_OFFSET 0x30 #define QUERY_FW_ERR_SIZE_OFFSET 0x38 #define QUERY_FW_ERR_BAR_OFFSET 0x3c #define QUERY_FW_SIZE_OFFSET 0x00 #define QUERY_FW_CLR_INT_BASE_OFFSET 0x20 #define QUERY_FW_CLR_INT_BAR_OFFSET 0x28 #define QUERY_FW_COMM_BASE_OFFSET 0x40 #define QUERY_FW_COMM_BAR_OFFSET 0x48 #define QUERY_FW_CLOCK_OFFSET 0x50 #define QUERY_FW_CLOCK_BAR 0x58 mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FW, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) goto out; MLX4_GET(fw_ver, outbox, QUERY_FW_VER_OFFSET); /* * FW subminor version is at more significant bits than minor * version, so swap here. */ dev->caps.fw_ver = (fw_ver & 0xffff00000000ull) | ((fw_ver & 0xffff0000ull) >> 16) | ((fw_ver & 0x0000ffffull) << 16); MLX4_GET(lg, outbox, QUERY_FW_PPF_ID); dev->caps.function = lg; if (mlx4_is_slave(dev)) goto out; MLX4_GET(cmd_if_rev, outbox, QUERY_FW_CMD_IF_REV_OFFSET); if (cmd_if_rev < MLX4_COMMAND_INTERFACE_MIN_REV || cmd_if_rev > MLX4_COMMAND_INTERFACE_MAX_REV) { mlx4_err(dev, "Installed FW has unsupported command interface revision %d\n", cmd_if_rev); mlx4_err(dev, "(Installed FW version is %d.%d.%03d)\n", (int) (dev->caps.fw_ver >> 32), (int) (dev->caps.fw_ver >> 16) & 0xffff, (int) dev->caps.fw_ver & 0xffff); mlx4_err(dev, "This driver version supports only revisions %d to %d\n", MLX4_COMMAND_INTERFACE_MIN_REV, MLX4_COMMAND_INTERFACE_MAX_REV); err = -ENODEV; goto out; } if (cmd_if_rev < MLX4_COMMAND_INTERFACE_NEW_PORT_CMDS) dev->flags |= MLX4_FLAG_OLD_PORT_CMDS; MLX4_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); cmd->max_cmds = 1 << lg; mlx4_dbg(dev, "FW version %d.%d.%03d (cmd intf rev %d), max commands %d\n", (int) (dev->caps.fw_ver >> 32), (int) (dev->caps.fw_ver >> 16) & 0xffff, (int) dev->caps.fw_ver & 0xffff, cmd_if_rev, cmd->max_cmds); MLX4_GET(fw->catas_offset, outbox, QUERY_FW_ERR_START_OFFSET); MLX4_GET(fw->catas_size, outbox, QUERY_FW_ERR_SIZE_OFFSET); MLX4_GET(fw->catas_bar, outbox, QUERY_FW_ERR_BAR_OFFSET); fw->catas_bar = (fw->catas_bar >> 6) * 2; mlx4_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x, BAR %d\n", (unsigned long long) fw->catas_offset, fw->catas_size, fw->catas_bar); MLX4_GET(fw->fw_pages, outbox, QUERY_FW_SIZE_OFFSET); MLX4_GET(fw->clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET); MLX4_GET(fw->clr_int_bar, outbox, QUERY_FW_CLR_INT_BAR_OFFSET); fw->clr_int_bar = (fw->clr_int_bar >> 6) * 2; MLX4_GET(fw->comm_base, outbox, QUERY_FW_COMM_BASE_OFFSET); MLX4_GET(fw->comm_bar, outbox, QUERY_FW_COMM_BAR_OFFSET); fw->comm_bar = (fw->comm_bar >> 6) * 2; mlx4_dbg(dev, "Communication vector bar:%d offset:0x%llx\n", fw->comm_bar, (unsigned long long)fw->comm_base); mlx4_dbg(dev, "FW size %d KB\n", fw->fw_pages >> 2); MLX4_GET(fw->clock_offset, outbox, QUERY_FW_CLOCK_OFFSET); MLX4_GET(fw->clock_bar, outbox, QUERY_FW_CLOCK_BAR); fw->clock_bar = (fw->clock_bar >> 6) * 2; mlx4_dbg(dev, "Internal clock bar:%d offset:0x%llx\n", fw->clock_bar, (unsigned long long)fw->clock_offset); /* * Round up number of system pages needed in case * MLX4_ICM_PAGE_SIZE < PAGE_SIZE. */ fw->fw_pages = ALIGN(fw->fw_pages, PAGE_SIZE / MLX4_ICM_PAGE_SIZE) >> (PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT); mlx4_dbg(dev, "Clear int @ %llx, BAR %d\n", (unsigned long long) fw->clr_int_base, fw->clr_int_bar); out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_QUERY_FW_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { u8 *outbuf; int err; outbuf = outbox->buf; err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_FW, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; /* for slaves, set pci PPF ID to invalid and zero out everything * else except FW version */ outbuf[0] = outbuf[1] = 0; memset(&outbuf[8], 0, QUERY_FW_OUT_SIZE - 8); outbuf[QUERY_FW_PPF_ID] = MLX4_INVALID_SLAVE_ID; return 0; } static void get_board_id(void *vsd, char *board_id) { int i; #define VSD_OFFSET_SIG1 0x00 #define VSD_OFFSET_SIG2 0xde #define VSD_OFFSET_MLX_BOARD_ID 0xd0 #define VSD_OFFSET_TS_BOARD_ID 0x20 #define VSD_SIGNATURE_TOPSPIN 0x5ad memset(board_id, 0, MLX4_BOARD_ID_LEN); if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN && be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) { strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MLX4_BOARD_ID_LEN); } else { /* * The board ID is a string but the firmware byte * swaps each 4-byte word before passing it back to * us. Therefore we need to swab it before printing. */ u32 *bid_u32 = (u32 *)board_id; for (i = 0; i < 4; ++i) { typedef struct { u32 value; } __packed u64_p_t; u32 *addr; u32 val; addr = (u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4); val = ((u64_p_t *)addr)->value; val = swab32(val); ((u64_p_t *)&bid_u32[i])->value = val; } } } int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; int err; #define QUERY_ADAPTER_OUT_SIZE 0x100 #define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10 #define QUERY_ADAPTER_VSD_OFFSET 0x20 mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_ADAPTER, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) goto out; MLX4_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4, adapter->board_id); out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) { struct mlx4_cmd_mailbox *mailbox; __be32 *inbox; int err; static const u8 a0_dmfs_hw_steering[] = { [MLX4_STEERING_DMFS_A0_DEFAULT] = 0, [MLX4_STEERING_DMFS_A0_DYNAMIC] = 1, [MLX4_STEERING_DMFS_A0_STATIC] = 2, [MLX4_STEERING_DMFS_A0_DISABLE] = 3 }; #define INIT_HCA_IN_SIZE 0x200 #define INIT_HCA_VERSION_OFFSET 0x000 #define INIT_HCA_VERSION 2 #define INIT_HCA_VXLAN_OFFSET 0x0c #define INIT_HCA_CACHELINE_SZ_OFFSET 0x0e #define INIT_HCA_FLAGS_OFFSET 0x014 #define INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET 0x018 #define INIT_HCA_QPC_OFFSET 0x020 #define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10) #define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17) #define INIT_HCA_SRQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x28) #define INIT_HCA_LOG_SRQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x2f) #define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30) #define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37) #define INIT_HCA_EQE_CQE_OFFSETS (INIT_HCA_QPC_OFFSET + 0x38) #define INIT_HCA_EQE_CQE_STRIDE_OFFSET (INIT_HCA_QPC_OFFSET + 0x3b) #define INIT_HCA_ALTC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40) #define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50) #define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60) #define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67) #define INIT_HCA_NUM_SYS_EQS_OFFSET (INIT_HCA_QPC_OFFSET + 0x6a) #define INIT_HCA_RDMARC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70) #define INIT_HCA_LOG_RD_OFFSET (INIT_HCA_QPC_OFFSET + 0x77) #define INIT_HCA_MCAST_OFFSET 0x0c0 #define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00) #define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12) #define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16) #define INIT_HCA_UC_STEERING_OFFSET (INIT_HCA_MCAST_OFFSET + 0x18) #define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b) #define INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN 0x6 #define INIT_HCA_FS_PARAM_OFFSET 0x1d0 #define INIT_HCA_FS_BASE_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x00) #define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x12) #define INIT_HCA_FS_A0_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x18) #define INIT_HCA_FS_LOG_TABLE_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x1b) #define INIT_HCA_FS_ETH_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x21) #define INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x22) #define INIT_HCA_FS_IB_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x25) #define INIT_HCA_FS_IB_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x26) #define INIT_HCA_TPT_OFFSET 0x0f0 #define INIT_HCA_DMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00) #define INIT_HCA_TPT_MW_OFFSET (INIT_HCA_TPT_OFFSET + 0x08) #define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b) #define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10) #define INIT_HCA_CMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x18) #define INIT_HCA_UAR_OFFSET 0x120 #define INIT_HCA_LOG_UAR_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0a) #define INIT_HCA_UAR_PAGE_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0b) mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); inbox = mailbox->buf; *((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION; *((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) = (ilog2(cache_line_size()) - 4) << 5; #if defined(__LITTLE_ENDIAN) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1); #elif defined(__BIG_ENDIAN) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1); #else #error Host endianness not defined #endif /* Check port for UD address vector: */ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1); /* Enable IPoIB checksumming if we can: */ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3); /* Enable QoS support if module parameter set */ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG && enable_qos) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 2); /* enable counters */ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4); /* Enable RSS spread to fragmented IP packets when supported */ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_RSS_IP_FRAG) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 13); /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) { *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29); dev->caps.eqe_size = 64; dev->caps.eqe_factor = 1; } else { dev->caps.eqe_size = 32; dev->caps.eqe_factor = 0; } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) { *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30); dev->caps.cqe_size = 64; dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; } else { dev->caps.cqe_size = 32; } #if 0 /* XXX not currently supported by the FreeBSD's mlxen */ /* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */ if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) && (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE)) { dev->caps.eqe_size = cache_line_size(); dev->caps.cqe_size = cache_line_size(); dev->caps.eqe_factor = 0; MLX4_PUT(inbox, (u8)((ilog2(dev->caps.eqe_size) - 5) << 4 | (ilog2(dev->caps.eqe_size) - 5)), INIT_HCA_EQE_CQE_STRIDE_OFFSET); /* User still need to know to support CQE > 32B */ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; } #endif if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT) - *(inbox + INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET / 4) |= cpu_to_be32(1 << 31); + *(inbox + INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET / 4) |= cpu_to_be32(1U << 31); /* QPC/EEC/CQC/EQC/RDMARC attributes */ MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); MLX4_PUT(inbox, param->log_num_qps, INIT_HCA_LOG_QP_OFFSET); MLX4_PUT(inbox, param->srqc_base, INIT_HCA_SRQC_BASE_OFFSET); MLX4_PUT(inbox, param->log_num_srqs, INIT_HCA_LOG_SRQ_OFFSET); MLX4_PUT(inbox, param->cqc_base, INIT_HCA_CQC_BASE_OFFSET); MLX4_PUT(inbox, param->log_num_cqs, INIT_HCA_LOG_CQ_OFFSET); MLX4_PUT(inbox, param->altc_base, INIT_HCA_ALTC_BASE_OFFSET); MLX4_PUT(inbox, param->auxc_base, INIT_HCA_AUXC_BASE_OFFSET); MLX4_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET); MLX4_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET); MLX4_PUT(inbox, param->num_sys_eqs, INIT_HCA_NUM_SYS_EQS_OFFSET); MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET); /* steering attributes */ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN); MLX4_PUT(inbox, param->mc_base, INIT_HCA_FS_BASE_OFFSET); MLX4_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); MLX4_PUT(inbox, param->log_mc_table_sz, INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); /* Enable Ethernet flow steering * with udp unicast and tcp unicast */ if (dev->caps.dmfs_high_steer_mode != MLX4_STEERING_DMFS_A0_STATIC) MLX4_PUT(inbox, (u8)(MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN), INIT_HCA_FS_ETH_BITS_OFFSET); MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR, INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET); /* Enable IPoIB flow steering * with udp unicast and tcp unicast */ MLX4_PUT(inbox, (u8) (MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN), INIT_HCA_FS_IB_BITS_OFFSET); MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR, INIT_HCA_FS_IB_NUM_ADDRS_OFFSET); if (dev->caps.dmfs_high_steer_mode != MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) MLX4_PUT(inbox, ((u8)(a0_dmfs_hw_steering[dev->caps.dmfs_high_steer_mode] << 6)), INIT_HCA_FS_A0_OFFSET); } else { MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET); MLX4_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); MLX4_PUT(inbox, param->log_mc_hash_sz, INIT_HCA_LOG_MC_HASH_SZ_OFFSET); MLX4_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); if (dev->caps.steering_mode == MLX4_STEERING_MODE_B0) MLX4_PUT(inbox, (u8) (1 << 3), INIT_HCA_UC_STEERING_OFFSET); } /* TPT attributes */ MLX4_PUT(inbox, param->dmpt_base, INIT_HCA_DMPT_BASE_OFFSET); MLX4_PUT(inbox, param->mw_enabled, INIT_HCA_TPT_MW_OFFSET); MLX4_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET); MLX4_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET); MLX4_PUT(inbox, param->cmpt_base, INIT_HCA_CMPT_BASE_OFFSET); /* UAR attributes */ MLX4_PUT(inbox, param->uar_page_sz, INIT_HCA_UAR_PAGE_SZ_OFFSET); MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET); /* set parser VXLAN attributes */ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS) { u8 parser_params = 0; MLX4_PUT(inbox, parser_params, INIT_HCA_VXLAN_OFFSET); } err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (err) mlx4_err(dev, "INIT_HCA returns %d\n", err); mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_QUERY_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) { struct mlx4_cmd_mailbox *mailbox; __be32 *outbox; u32 dword_field; int err; u8 byte_field; static const u8 a0_dmfs_query_hw_steering[] = { [0] = MLX4_STEERING_DMFS_A0_DEFAULT, [1] = MLX4_STEERING_DMFS_A0_DYNAMIC, [2] = MLX4_STEERING_DMFS_A0_STATIC, [3] = MLX4_STEERING_DMFS_A0_DISABLE }; #define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04 #define QUERY_HCA_CORE_CLOCK_OFFSET 0x0c mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_HCA, MLX4_CMD_TIME_CLASS_B, !mlx4_is_slave(dev)); if (err) goto out; MLX4_GET(param->global_caps, outbox, QUERY_HCA_GLOBAL_CAPS_OFFSET); MLX4_GET(param->hca_core_clock, outbox, QUERY_HCA_CORE_CLOCK_OFFSET); /* QPC/EEC/CQC/EQC/RDMARC attributes */ MLX4_GET(param->qpc_base, outbox, INIT_HCA_QPC_BASE_OFFSET); MLX4_GET(param->log_num_qps, outbox, INIT_HCA_LOG_QP_OFFSET); MLX4_GET(param->srqc_base, outbox, INIT_HCA_SRQC_BASE_OFFSET); MLX4_GET(param->log_num_srqs, outbox, INIT_HCA_LOG_SRQ_OFFSET); MLX4_GET(param->cqc_base, outbox, INIT_HCA_CQC_BASE_OFFSET); MLX4_GET(param->log_num_cqs, outbox, INIT_HCA_LOG_CQ_OFFSET); MLX4_GET(param->altc_base, outbox, INIT_HCA_ALTC_BASE_OFFSET); MLX4_GET(param->auxc_base, outbox, INIT_HCA_AUXC_BASE_OFFSET); MLX4_GET(param->eqc_base, outbox, INIT_HCA_EQC_BASE_OFFSET); MLX4_GET(param->log_num_eqs, outbox, INIT_HCA_LOG_EQ_OFFSET); MLX4_GET(param->num_sys_eqs, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET); MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET); if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) { param->steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; } else { MLX4_GET(byte_field, outbox, INIT_HCA_UC_STEERING_OFFSET); if (byte_field & 0x8) param->steering_mode = MLX4_STEERING_MODE_B0; else param->steering_mode = MLX4_STEERING_MODE_A0; } if (dword_field & (1 << 13)) param->rss_ip_frags = 1; /* steering attributes */ if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET); MLX4_GET(param->log_mc_entry_sz, outbox, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); MLX4_GET(param->log_mc_table_sz, outbox, INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); MLX4_GET(byte_field, outbox, INIT_HCA_FS_A0_OFFSET); param->dmfs_high_steer_mode = a0_dmfs_query_hw_steering[(byte_field >> 6) & 3]; } else { MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET); MLX4_GET(param->log_mc_entry_sz, outbox, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); MLX4_GET(param->log_mc_hash_sz, outbox, INIT_HCA_LOG_MC_HASH_SZ_OFFSET); MLX4_GET(param->log_mc_table_sz, outbox, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); } /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */ MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_OFFSETS); if (byte_field & 0x20) /* 64-bytes eqe enabled */ param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED; if (byte_field & 0x40) /* 64-bytes cqe enabled */ param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED; /* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */ MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_STRIDE_OFFSET); if (byte_field) { param->dev_cap_enabled |= MLX4_DEV_CAP_EQE_STRIDE_ENABLED; param->dev_cap_enabled |= MLX4_DEV_CAP_CQE_STRIDE_ENABLED; param->cqe_size = 1 << ((byte_field & MLX4_CQE_SIZE_MASK_STRIDE) + 5); param->eqe_size = 1 << (((byte_field & MLX4_EQE_SIZE_MASK_STRIDE) >> 4) + 5); } /* TPT attributes */ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); MLX4_GET(param->mw_enabled, outbox, INIT_HCA_TPT_MW_OFFSET); MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET); MLX4_GET(param->mtt_base, outbox, INIT_HCA_MTT_BASE_OFFSET); MLX4_GET(param->cmpt_base, outbox, INIT_HCA_CMPT_BASE_OFFSET); /* UAR attributes */ MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET); MLX4_GET(param->log_uar_sz, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET); /* phv_check enable */ MLX4_GET(byte_field, outbox, INIT_HCA_CACHELINE_SZ_OFFSET); if (byte_field & 0x2) param->phv_check_en = 1; out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } static int mlx4_hca_core_clock_update(struct mlx4_dev *dev) { struct mlx4_cmd_mailbox *mailbox; __be32 *outbox; int err; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { mlx4_warn(dev, "hca_core_clock mailbox allocation failed\n"); return PTR_ERR(mailbox); } outbox = mailbox->buf; err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_HCA, MLX4_CMD_TIME_CLASS_B, !mlx4_is_slave(dev)); if (err) { mlx4_warn(dev, "hca_core_clock update failed\n"); goto out; } MLX4_GET(dev->caps.hca_core_clock, outbox, QUERY_HCA_CORE_CLOCK_OFFSET); out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } /* for IB-type ports only in SRIOV mode. Checks that both proxy QP0 * and real QP0 are active, so that the paravirtualized QP0 is ready * to operate */ static int check_qp0_state(struct mlx4_dev *dev, int function, int port) { struct mlx4_priv *priv = mlx4_priv(dev); /* irrelevant if not infiniband */ if (priv->mfunc.master.qp0_state[port].proxy_qp0_active && priv->mfunc.master.qp0_state[port].qp0_active) return 1; return 0; } int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); int port = mlx4_slave_convert_port(dev, slave, vhcr->in_modifier); int err; if (port < 0) return -EINVAL; if (priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port)) return 0; if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { /* Enable port only if it was previously disabled */ if (!priv->mfunc.master.init_port_ref[port]) { err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; } priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); } else { if (slave == mlx4_master_func_num(dev)) { if (check_qp0_state(dev, slave, port) && !priv->mfunc.master.qp0_state[port].port_active) { err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; priv->mfunc.master.qp0_state[port].port_active = 1; priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); } } else priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); } ++priv->mfunc.master.init_port_ref[port]; return 0; } int mlx4_INIT_PORT(struct mlx4_dev *dev, int port) { struct mlx4_cmd_mailbox *mailbox; u32 *inbox; int err; u32 flags; u16 field; if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { #define INIT_PORT_IN_SIZE 256 #define INIT_PORT_FLAGS_OFFSET 0x00 #define INIT_PORT_FLAG_SIG (1 << 18) #define INIT_PORT_FLAG_NG (1 << 17) #define INIT_PORT_FLAG_G0 (1 << 16) #define INIT_PORT_VL_SHIFT 4 #define INIT_PORT_PORT_WIDTH_SHIFT 8 #define INIT_PORT_MTU_OFFSET 0x04 #define INIT_PORT_MAX_GID_OFFSET 0x06 #define INIT_PORT_MAX_PKEY_OFFSET 0x0a #define INIT_PORT_GUID0_OFFSET 0x10 #define INIT_PORT_NODE_GUID_OFFSET 0x18 #define INIT_PORT_SI_GUID_OFFSET 0x20 mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); inbox = mailbox->buf; flags = 0; flags |= (dev->caps.vl_cap[port] & 0xf) << INIT_PORT_VL_SHIFT; flags |= (dev->caps.port_width_cap[port] & 0xf) << INIT_PORT_PORT_WIDTH_SHIFT; MLX4_PUT(inbox, flags, INIT_PORT_FLAGS_OFFSET); field = 128 << dev->caps.ib_mtu_cap[port]; MLX4_PUT(inbox, field, INIT_PORT_MTU_OFFSET); field = dev->caps.gid_table_len[port]; MLX4_PUT(inbox, field, INIT_PORT_MAX_GID_OFFSET); field = dev->caps.pkey_table_len[port]; MLX4_PUT(inbox, field, INIT_PORT_MAX_PKEY_OFFSET); err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_INIT_PORT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev, mailbox); } else err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (!err) mlx4_hca_core_clock_update(dev); return err; } EXPORT_SYMBOL_GPL(mlx4_INIT_PORT); int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); int port = mlx4_slave_convert_port(dev, slave, vhcr->in_modifier); int err; if (port < 0) return -EINVAL; if (!(priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port))) return 0; if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { if (priv->mfunc.master.init_port_ref[port] == 1) { err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; } priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); } else { /* infiniband port */ if (slave == mlx4_master_func_num(dev)) { if (!priv->mfunc.master.qp0_state[port].qp0_active && priv->mfunc.master.qp0_state[port].port_active) { err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); priv->mfunc.master.qp0_state[port].port_active = 0; } } else priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); } --priv->mfunc.master.init_port_ref[port]; return 0; } int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port) { return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } EXPORT_SYMBOL_GPL(mlx4_CLOSE_PORT); int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic) { return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); } struct mlx4_config_dev { __be32 update_flags; __be32 rsvd1[3]; __be16 vxlan_udp_dport; __be16 rsvd2; __be16 roce_v2_entropy; __be16 roce_v2_udp_dport; __be32 roce_flags; __be32 rsvd4[25]; __be16 rsvd5; u8 rsvd6; u8 rx_checksum_val; }; #define MLX4_VXLAN_UDP_DPORT (1 << 0) #define MLX4_ROCE_V2_UDP_DPORT BIT(3) #define MLX4_DISABLE_RX_PORT BIT(18) static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev) { int err; struct mlx4_cmd_mailbox *mailbox; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); memcpy(mailbox->buf, config_dev, sizeof(*config_dev)); err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_CONFIG_DEV, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev, mailbox); return err; } static int mlx4_CONFIG_DEV_get(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev) { int err; struct mlx4_cmd_mailbox *mailbox; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 1, MLX4_CMD_CONFIG_DEV, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (!err) memcpy(config_dev, mailbox->buf, sizeof(*config_dev)); mlx4_free_cmd_mailbox(dev, mailbox); return err; } /* Conversion between the HW values and the actual functionality. * The value represented by the array index, * and the functionality determined by the flags. */ static const u8 config_dev_csum_flags[] = { [0] = 0, [1] = MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP, [2] = MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP | MLX4_RX_CSUM_MODE_L4, [3] = MLX4_RX_CSUM_MODE_L4 | MLX4_RX_CSUM_MODE_IP_OK_IP_NON_TCP_UDP | MLX4_RX_CSUM_MODE_MULTI_VLAN }; int mlx4_config_dev_retrieval(struct mlx4_dev *dev, struct mlx4_config_dev_params *params) { struct mlx4_config_dev config_dev = {0}; int err; u8 csum_mask; #define CONFIG_DEV_RX_CSUM_MODE_MASK 0x7 #define CONFIG_DEV_RX_CSUM_MODE_PORT1_BIT_OFFSET 0 #define CONFIG_DEV_RX_CSUM_MODE_PORT2_BIT_OFFSET 4 if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_CONFIG_DEV)) return -ENOTSUPP; err = mlx4_CONFIG_DEV_get(dev, &config_dev); if (err) return err; csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT1_BIT_OFFSET) & CONFIG_DEV_RX_CSUM_MODE_MASK; if (csum_mask >= sizeof(config_dev_csum_flags)/sizeof(config_dev_csum_flags[0])) return -EINVAL; params->rx_csum_flags_port_1 = config_dev_csum_flags[csum_mask]; csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT2_BIT_OFFSET) & CONFIG_DEV_RX_CSUM_MODE_MASK; if (csum_mask >= sizeof(config_dev_csum_flags)/sizeof(config_dev_csum_flags[0])) return -EINVAL; params->rx_csum_flags_port_2 = config_dev_csum_flags[csum_mask]; params->vxlan_udp_dport = be16_to_cpu(config_dev.vxlan_udp_dport); return 0; } EXPORT_SYMBOL_GPL(mlx4_config_dev_retrieval); int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port) { struct mlx4_config_dev config_dev; memset(&config_dev, 0, sizeof(config_dev)); config_dev.update_flags = cpu_to_be32(MLX4_VXLAN_UDP_DPORT); config_dev.vxlan_udp_dport = udp_port; return mlx4_CONFIG_DEV_set(dev, &config_dev); } EXPORT_SYMBOL_GPL(mlx4_config_vxlan_port); #define CONFIG_DISABLE_RX_PORT BIT(15) int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis) { struct mlx4_config_dev config_dev; memset(&config_dev, 0, sizeof(config_dev)); config_dev.update_flags = cpu_to_be32(MLX4_DISABLE_RX_PORT); if (dis) config_dev.roce_flags = cpu_to_be32(CONFIG_DISABLE_RX_PORT); return mlx4_CONFIG_DEV_set(dev, &config_dev); } int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port) { struct mlx4_config_dev config_dev; memset(&config_dev, 0, sizeof(config_dev)); config_dev.update_flags = cpu_to_be32(MLX4_ROCE_V2_UDP_DPORT); config_dev.roce_v2_udp_dport = cpu_to_be16(udp_port); return mlx4_CONFIG_DEV_set(dev, &config_dev); } EXPORT_SYMBOL_GPL(mlx4_config_roce_v2_port); int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2) { struct mlx4_cmd_mailbox *mailbox; struct { __be32 v_port1; __be32 v_port2; } *v2p; int err; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return -ENOMEM; v2p = mailbox->buf; v2p->v_port1 = cpu_to_be32(port1); v2p->v_port2 = cpu_to_be32(port2); err = mlx4_cmd(dev, mailbox->dma, 0, MLX4_SET_PORT_VIRT2PHY, MLX4_CMD_VIRT_PORT_MAP, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages) { int ret = mlx4_cmd_imm(dev, icm_size, aux_pages, 0, 0, MLX4_CMD_SET_ICM_SIZE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (ret) return ret; /* * Round up number of system pages needed in case * MLX4_ICM_PAGE_SIZE < PAGE_SIZE. */ *aux_pages = ALIGN(*aux_pages, PAGE_SIZE / MLX4_ICM_PAGE_SIZE) >> (PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT); return 0; } int mlx4_NOP(struct mlx4_dev *dev) { /* Input modifier of 0x1f means "finish as soon as possible." */ return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier, const u32 offset[], u32 value[], size_t array_len, u8 port) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; size_t i; int ret; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; ret = mlx4_cmd_box(dev, 0, mailbox->dma, port, op_modifier, MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (ret) goto out; for (i = 0; i < array_len; i++) { if (offset[i] > MLX4_MAILBOX_SIZE) { ret = -EINVAL; goto out; } MLX4_GET(value[i], outbox, offset[i]); } out: mlx4_free_cmd_mailbox(dev, mailbox); return ret; } EXPORT_SYMBOL(mlx4_query_diag_counters); int mlx4_get_phys_port_id(struct mlx4_dev *dev) { u8 port; u32 *outbox; struct mlx4_cmd_mailbox *mailbox; u32 in_mod; u32 guid_hi, guid_lo; int err, ret = 0; #define MOD_STAT_CFG_PORT_OFFSET 8 #define MOD_STAT_CFG_GUID_H 0X14 #define MOD_STAT_CFG_GUID_L 0X1c mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; for (port = 1; port <= dev->caps.num_ports; port++) { in_mod = port << MOD_STAT_CFG_PORT_OFFSET; err = mlx4_cmd_box(dev, 0, mailbox->dma, in_mod, 0x2, MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) { mlx4_err(dev, "Fail to get port %d uplink guid\n", port); ret = err; } else { MLX4_GET(guid_hi, outbox, MOD_STAT_CFG_GUID_H); MLX4_GET(guid_lo, outbox, MOD_STAT_CFG_GUID_L); dev->caps.phys_port_id[port] = (u64)guid_lo | (u64)guid_hi << 32; } } mlx4_free_cmd_mailbox(dev, mailbox); return ret; } #define MLX4_WOL_SETUP_MODE (5 << 28) int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port) { u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8; return mlx4_cmd_imm(dev, 0, config, in_mod, 0x3, MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } EXPORT_SYMBOL_GPL(mlx4_wol_read); int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port) { u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8; return mlx4_cmd(dev, config, in_mod, 0x1, MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } EXPORT_SYMBOL_GPL(mlx4_wol_write); enum { ADD_TO_MCG = 0x26, }; void mlx4_opreq_action(struct work_struct *work) { struct mlx4_priv *priv = container_of(work, struct mlx4_priv, opreq_task); struct mlx4_dev *dev = &priv->dev; int num_tasks = atomic_read(&priv->opreq_count); struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; u32 *outbox; u32 modifier; u16 token; u16 type; int err; u32 num_qps; struct mlx4_qp qp; int i; u8 rem_mcg; u8 prot; #define GET_OP_REQ_MODIFIER_OFFSET 0x08 #define GET_OP_REQ_TOKEN_OFFSET 0x14 #define GET_OP_REQ_TYPE_OFFSET 0x1a #define GET_OP_REQ_DATA_OFFSET 0x20 mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { mlx4_err(dev, "Failed to allocate mailbox for GET_OP_REQ\n"); return; } outbox = mailbox->buf; while (num_tasks) { err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_GET_OP_REQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) { mlx4_err(dev, "Failed to retrieve required operation: %d\n", err); return; } MLX4_GET(modifier, outbox, GET_OP_REQ_MODIFIER_OFFSET); MLX4_GET(token, outbox, GET_OP_REQ_TOKEN_OFFSET); MLX4_GET(type, outbox, GET_OP_REQ_TYPE_OFFSET); type &= 0xfff; switch (type) { case ADD_TO_MCG: if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { mlx4_warn(dev, "ADD MCG operation is not supported in DEVICE_MANAGED steering mode\n"); err = EPERM; break; } mgm = (struct mlx4_mgm *)((u8 *)(outbox) + GET_OP_REQ_DATA_OFFSET); num_qps = be32_to_cpu(mgm->members_count) & MGM_QPN_MASK; rem_mcg = ((u8 *)(&mgm->members_count))[0] & 1; prot = ((u8 *)(&mgm->members_count))[0] >> 6; for (i = 0; i < num_qps; i++) { qp.qpn = be32_to_cpu(mgm->qp[i]); if (rem_mcg) err = mlx4_multicast_detach(dev, &qp, mgm->gid, prot, 0); else err = mlx4_multicast_attach(dev, &qp, mgm->gid, mgm->gid[5] , 0, prot, NULL); if (err) break; } break; default: mlx4_warn(dev, "Bad type for required operation\n"); err = EINVAL; break; } err = mlx4_cmd(dev, 0, ((u32) err | (__force u32)cpu_to_be32(token) << 16), 1, MLX4_CMD_GET_OP_REQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) { mlx4_err(dev, "Failed to acknowledge required request: %d\n", err); goto out; } memset(outbox, 0, 0xffc); num_tasks = atomic_dec_return(&priv->opreq_count); } out: mlx4_free_cmd_mailbox(dev, mailbox); } static int mlx4_check_smp_firewall_active(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox) { #define MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET 0x10 #define MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET 0x20 #define MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET 0x40 #define MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET 0x70 u32 set_attr_mask, getresp_attr_mask; u32 trap_attr_mask, traprepress_attr_mask; MLX4_GET(set_attr_mask, mailbox->buf, MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET); mlx4_dbg(dev, "SMP firewall set_attribute_mask = 0x%x\n", set_attr_mask); MLX4_GET(getresp_attr_mask, mailbox->buf, MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET); mlx4_dbg(dev, "SMP firewall getresp_attribute_mask = 0x%x\n", getresp_attr_mask); MLX4_GET(trap_attr_mask, mailbox->buf, MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET); mlx4_dbg(dev, "SMP firewall trap_attribute_mask = 0x%x\n", trap_attr_mask); MLX4_GET(traprepress_attr_mask, mailbox->buf, MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET); mlx4_dbg(dev, "SMP firewall traprepress_attribute_mask = 0x%x\n", traprepress_attr_mask); if (set_attr_mask && getresp_attr_mask && trap_attr_mask && traprepress_attr_mask) return 1; return 0; } int mlx4_config_mad_demux(struct mlx4_dev *dev) { struct mlx4_cmd_mailbox *mailbox; int err; /* Check if mad_demux is supported */ if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_MAD_DEMUX)) return 0; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { mlx4_warn(dev, "Failed to allocate mailbox for cmd MAD_DEMUX"); return -ENOMEM; } /* Query mad_demux to find out which MADs are handled by internal sma */ err = mlx4_cmd_box(dev, 0, mailbox->dma, 0x01 /* subn mgmt class */, MLX4_CMD_MAD_DEMUX_QUERY_RESTR, MLX4_CMD_MAD_DEMUX, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (err) { mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: query restrictions failed (%d)\n", err); goto out; } if (mlx4_check_smp_firewall_active(dev, mailbox)) dev->flags |= MLX4_FLAG_SECURE_HOST; /* Config mad_demux to handle all MADs returned by the query above */ err = mlx4_cmd(dev, mailbox->dma, 0x01 /* subn mgmt class */, MLX4_CMD_MAD_DEMUX_CONFIG, MLX4_CMD_MAD_DEMUX, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (err) { mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: configure failed (%d)\n", err); goto out; } if (dev->flags & MLX4_FLAG_SECURE_HOST) mlx4_warn(dev, "HCA operating in secure-host mode. SMP firewall activated.\n"); out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } /* Access Reg commands */ enum mlx4_access_reg_masks { MLX4_ACCESS_REG_STATUS_MASK = 0x7f, MLX4_ACCESS_REG_METHOD_MASK = 0x7f, MLX4_ACCESS_REG_LEN_MASK = 0x7ff }; struct mlx4_access_reg { __be16 constant1; u8 status; u8 resrvd1; __be16 reg_id; u8 method; u8 constant2; __be32 resrvd2[2]; __be16 len_const; __be16 resrvd3; #define MLX4_ACCESS_REG_HEADER_SIZE (20) u8 reg_data[MLX4_MAILBOX_SIZE-MLX4_ACCESS_REG_HEADER_SIZE]; } __attribute__((__packed__)); /** * mlx4_ACCESS_REG - Generic access reg command. * @dev: mlx4_dev. * @reg_id: register ID to access. * @method: Access method Read/Write. * @reg_len: register length to Read/Write in bytes. * @reg_data: reg_data pointer to Read/Write From/To. * * Access ConnectX registers FW command. * Returns 0 on success and copies outbox mlx4_access_reg data * field into reg_data or a negative error code. */ static int mlx4_ACCESS_REG(struct mlx4_dev *dev, u16 reg_id, enum mlx4_access_reg_method method, u16 reg_len, void *reg_data) { struct mlx4_cmd_mailbox *inbox, *outbox; struct mlx4_access_reg *inbuf, *outbuf; int err; inbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(inbox)) return PTR_ERR(inbox); outbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(outbox)) { mlx4_free_cmd_mailbox(dev, inbox); return PTR_ERR(outbox); } inbuf = inbox->buf; outbuf = outbox->buf; inbuf->constant1 = cpu_to_be16(0x1<<11 | 0x4); inbuf->constant2 = 0x1; inbuf->reg_id = cpu_to_be16(reg_id); inbuf->method = method & MLX4_ACCESS_REG_METHOD_MASK; reg_len = min(reg_len, (u16)(sizeof(inbuf->reg_data))); inbuf->len_const = cpu_to_be16(((reg_len/4 + 1) & MLX4_ACCESS_REG_LEN_MASK) | ((0x3) << 12)); memcpy(inbuf->reg_data, reg_data, reg_len); err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, 0, 0, MLX4_CMD_ACCESS_REG, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_WRAPPED); if (err) goto out; if (outbuf->status & MLX4_ACCESS_REG_STATUS_MASK) { err = outbuf->status & MLX4_ACCESS_REG_STATUS_MASK; mlx4_err(dev, "MLX4_CMD_ACCESS_REG(%x) returned REG status (%x)\n", reg_id, err); goto out; } memcpy(reg_data, outbuf->reg_data, reg_len); out: mlx4_free_cmd_mailbox(dev, inbox); mlx4_free_cmd_mailbox(dev, outbox); return err; } /* ConnectX registers IDs */ enum mlx4_reg_id { MLX4_REG_ID_PTYS = 0x5004, }; /** * mlx4_ACCESS_PTYS_REG - Access PTYs (Port Type and Speed) * register * @dev: mlx4_dev. * @method: Access method Read/Write. * @ptys_reg: PTYS register data pointer. * * Access ConnectX PTYS register, to Read/Write Port Type/Speed * configuration * Returns 0 on success or a negative error code. */ int mlx4_ACCESS_PTYS_REG(struct mlx4_dev *dev, enum mlx4_access_reg_method method, struct mlx4_ptys_reg *ptys_reg) { return mlx4_ACCESS_REG(dev, MLX4_REG_ID_PTYS, method, sizeof(*ptys_reg), ptys_reg); } EXPORT_SYMBOL_GPL(mlx4_ACCESS_PTYS_REG); int mlx4_ACCESS_REG_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_access_reg *inbuf = inbox->buf; u8 method = inbuf->method & MLX4_ACCESS_REG_METHOD_MASK; u16 reg_id = be16_to_cpu(inbuf->reg_id); if (slave != mlx4_master_func_num(dev) && method == MLX4_ACCESS_REG_WRITE) return -EPERM; if (reg_id == MLX4_REG_ID_PTYS) { struct mlx4_ptys_reg *ptys_reg = (struct mlx4_ptys_reg *)inbuf->reg_data; ptys_reg->local_port = mlx4_slave_convert_port(dev, slave, ptys_reg->local_port); } return mlx4_cmd_box(dev, inbox->dma, outbox->dma, vhcr->in_modifier, 0, MLX4_CMD_ACCESS_REG, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); } static int mlx4_SET_PORT_phv_bit(struct mlx4_dev *dev, u8 port, u8 phv_bit) { #define SET_PORT_GEN_PHV_VALID 0x10 #define SET_PORT_GEN_PHV_EN 0x80 struct mlx4_cmd_mailbox *mailbox; struct mlx4_set_port_general_context *context; u32 in_mod; int err; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); context = mailbox->buf; context->v_ignore_fcs |= SET_PORT_GEN_PHV_VALID; if (phv_bit) context->phv_en |= SET_PORT_GEN_PHV_EN; in_mod = MLX4_SET_PORT_GENERAL << 8 | port; err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev, mailbox); return err; } int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv) { int err; struct mlx4_func_cap func_cap; memset(&func_cap, 0, sizeof(func_cap)); err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap); if (!err) *phv = func_cap.flags0 & QUERY_FUNC_CAP_PHV_BIT; return err; } EXPORT_SYMBOL(get_phv_bit); int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val) { int ret; if (mlx4_is_slave(dev)) return -EPERM; if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN && !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN)) { ret = mlx4_SET_PORT_phv_bit(dev, port, new_val); if (!ret) dev->caps.phv_bit[port] = new_val; return ret; } return -EOPNOTSUPP; } EXPORT_SYMBOL(set_phv_bit); void mlx4_replace_zero_macs(struct mlx4_dev *dev) { int i; u8 mac_addr[ETH_ALEN]; dev->port_random_macs = 0; for (i = 1; i <= dev->caps.num_ports; ++i) if (!dev->caps.def_mac[i] && dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) { random_ether_addr(mac_addr); dev->port_random_macs |= 1 << i; dev->caps.def_mac[i] = mlx4_mac_to_u64(mac_addr); } } EXPORT_SYMBOL_GPL(mlx4_replace_zero_macs); Index: head/sys/dev/mlx4/mlx4_core/mlx4_mcg.c =================================================================== --- head/sys/dev/mlx4/mlx4_core/mlx4_mcg.c (revision 327863) +++ head/sys/dev/mlx4/mlx4_core/mlx4_mcg.c (revision 327864) @@ -1,1648 +1,1648 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include "mlx4.h" int mlx4_get_mgm_entry_size(struct mlx4_dev *dev) { return 1 << dev->oper_log_mgm_entry_size; } int mlx4_get_qp_per_mgm(struct mlx4_dev *dev) { return 4 * (mlx4_get_mgm_entry_size(dev) / 16 - 2); } static int mlx4_QP_FLOW_STEERING_ATTACH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, u32 size, u64 *reg_id) { u64 imm; int err = 0; err = mlx4_cmd_imm(dev, mailbox->dma, &imm, size, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; *reg_id = imm; return err; } static int mlx4_QP_FLOW_STEERING_DETACH(struct mlx4_dev *dev, u64 regid) { int err = 0; err = mlx4_cmd(dev, regid, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); return err; } static int mlx4_READ_ENTRY(struct mlx4_dev *dev, int index, struct mlx4_cmd_mailbox *mailbox) { return mlx4_cmd_box(dev, 0, mailbox->dma, index, 0, MLX4_CMD_READ_MCG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } static int mlx4_WRITE_ENTRY(struct mlx4_dev *dev, int index, struct mlx4_cmd_mailbox *mailbox) { return mlx4_cmd(dev, mailbox->dma, index, 0, MLX4_CMD_WRITE_MCG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } static int mlx4_WRITE_PROMISC(struct mlx4_dev *dev, u8 port, u8 steer, struct mlx4_cmd_mailbox *mailbox) { u32 in_mod; in_mod = (u32) port << 16 | steer << 1; return mlx4_cmd(dev, mailbox->dma, in_mod, 0x1, MLX4_CMD_WRITE_MCG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } static int mlx4_GID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, u16 *hash, u8 op_mod) { u64 imm; int err; err = mlx4_cmd_imm(dev, mailbox->dma, &imm, 0, op_mod, MLX4_CMD_MGID_HASH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (!err) *hash = imm; return err; } static struct mlx4_promisc_qp *get_promisc_qp(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, u32 qpn) { struct mlx4_steer *s_steer; struct mlx4_promisc_qp *pqp; if (port < 1 || port > dev->caps.num_ports) return NULL; s_steer = &mlx4_priv(dev)->steer[port - 1]; list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) { if (pqp->qpn == qpn) return pqp; } /* not found */ return NULL; } /* * Add new entry to steering data structure. * All promisc QPs should be added as well */ static int new_steering_entry(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, unsigned int index, u32 qpn) { struct mlx4_steer *s_steer; struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; u32 members_count; struct mlx4_steer_index *new_entry; struct mlx4_promisc_qp *pqp; struct mlx4_promisc_qp *dqp = NULL; u32 prot; int err; if (port < 1 || port > dev->caps.num_ports) return -EINVAL; s_steer = &mlx4_priv(dev)->steer[port - 1]; new_entry = kzalloc(sizeof *new_entry, GFP_KERNEL); if (!new_entry) return -ENOMEM; INIT_LIST_HEAD(&new_entry->duplicates); new_entry->index = index; list_add_tail(&new_entry->list, &s_steer->steer_entries[steer]); /* If the given qpn is also a promisc qp, * it should be inserted to duplicates list */ pqp = get_promisc_qp(dev, port, steer, qpn); if (pqp) { dqp = kmalloc(sizeof *dqp, GFP_KERNEL); if (!dqp) { err = -ENOMEM; goto out_alloc; } dqp->qpn = qpn; list_add_tail(&dqp->list, &new_entry->duplicates); } /* if no promisc qps for this vep, we are done */ if (list_empty(&s_steer->promisc_qps[steer])) return 0; /* now need to add all the promisc qps to the new * steering entry, as they should also receive the packets * destined to this address */ mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { err = -ENOMEM; goto out_alloc; } mgm = mailbox->buf; err = mlx4_READ_ENTRY(dev, index, mailbox); if (err) goto out_mailbox; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; prot = be32_to_cpu(mgm->members_count) >> 30; list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) { /* don't add already existing qpn */ if (pqp->qpn == qpn) continue; if (members_count == dev->caps.num_qp_per_mgm) { /* out of space */ err = -ENOMEM; goto out_mailbox; } /* add the qpn */ mgm->qp[members_count++] = cpu_to_be32(pqp->qpn & MGM_QPN_MASK); } /* update the qps count and update the entry with all the promisc qps*/ mgm->members_count = cpu_to_be32(members_count | (prot << 30)); err = mlx4_WRITE_ENTRY(dev, index, mailbox); out_mailbox: mlx4_free_cmd_mailbox(dev, mailbox); if (!err) return 0; out_alloc: if (dqp) { list_del(&dqp->list); kfree(dqp); } list_del(&new_entry->list); kfree(new_entry); return err; } /* update the data structures with existing steering entry */ static int existing_steering_entry(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, unsigned int index, u32 qpn) { struct mlx4_steer *s_steer; struct mlx4_steer_index *tmp_entry, *entry = NULL; struct mlx4_promisc_qp *pqp; struct mlx4_promisc_qp *dqp; if (port < 1 || port > dev->caps.num_ports) return -EINVAL; s_steer = &mlx4_priv(dev)->steer[port - 1]; pqp = get_promisc_qp(dev, port, steer, qpn); if (!pqp) return 0; /* nothing to do */ list_for_each_entry(tmp_entry, &s_steer->steer_entries[steer], list) { if (tmp_entry->index == index) { entry = tmp_entry; break; } } if (unlikely(!entry)) { mlx4_warn(dev, "Steering entry at index %x is not registered\n", index); return -EINVAL; } /* the given qpn is listed as a promisc qpn * we need to add it as a duplicate to this entry * for future references */ list_for_each_entry(dqp, &entry->duplicates, list) { if (qpn == dqp->qpn) return 0; /* qp is already duplicated */ } /* add the qp as a duplicate on this index */ dqp = kmalloc(sizeof *dqp, GFP_KERNEL); if (!dqp) return -ENOMEM; dqp->qpn = qpn; list_add_tail(&dqp->list, &entry->duplicates); return 0; } /* Check whether a qpn is a duplicate on steering entry * If so, it should not be removed from mgm */ static bool check_duplicate_entry(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, unsigned int index, u32 qpn) { struct mlx4_steer *s_steer; struct mlx4_steer_index *tmp_entry, *entry = NULL; struct mlx4_promisc_qp *dqp, *tmp_dqp; if (port < 1 || port > dev->caps.num_ports) return NULL; s_steer = &mlx4_priv(dev)->steer[port - 1]; /* if qp is not promisc, it cannot be duplicated */ if (!get_promisc_qp(dev, port, steer, qpn)) return false; /* The qp is promisc qp so it is a duplicate on this index * Find the index entry, and remove the duplicate */ list_for_each_entry(tmp_entry, &s_steer->steer_entries[steer], list) { if (tmp_entry->index == index) { entry = tmp_entry; break; } } if (unlikely(!entry)) { mlx4_warn(dev, "Steering entry for index %x is not registered\n", index); return false; } list_for_each_entry_safe(dqp, tmp_dqp, &entry->duplicates, list) { if (dqp->qpn == qpn) { list_del(&dqp->list); kfree(dqp); } } return true; } /* Returns true if all the QPs != tqpn contained in this entry * are Promisc QPs. Returns false otherwise. */ static bool promisc_steering_entry(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, unsigned int index, u32 tqpn, u32 *members_count) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; u32 m_count; bool ret = false; int i; if (port < 1 || port > dev->caps.num_ports) return false; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return false; mgm = mailbox->buf; if (mlx4_READ_ENTRY(dev, index, mailbox)) goto out; m_count = be32_to_cpu(mgm->members_count) & 0xffffff; if (members_count) *members_count = m_count; for (i = 0; i < m_count; i++) { u32 qpn = be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK; if (!get_promisc_qp(dev, port, steer, qpn) && qpn != tqpn) { /* the qp is not promisc, the entry can't be removed */ goto out; } } ret = true; out: mlx4_free_cmd_mailbox(dev, mailbox); return ret; } /* IF a steering entry contains only promisc QPs, it can be removed. */ static bool can_remove_steering_entry(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, unsigned int index, u32 tqpn) { struct mlx4_steer *s_steer; struct mlx4_steer_index *entry = NULL, *tmp_entry; u32 members_count; bool ret = false; if (port < 1 || port > dev->caps.num_ports) return NULL; s_steer = &mlx4_priv(dev)->steer[port - 1]; if (!promisc_steering_entry(dev, port, steer, index, tqpn, &members_count)) goto out; /* All the qps currently registered for this entry are promiscuous, * Checking for duplicates */ ret = true; list_for_each_entry_safe(entry, tmp_entry, &s_steer->steer_entries[steer], list) { if (entry->index == index) { if (list_empty(&entry->duplicates) || members_count == 1) { struct mlx4_promisc_qp *pqp, *tmp_pqp; /* If there is only 1 entry in duplicates then * this is the QP we want to delete, going over * the list and deleting the entry. */ list_del(&entry->list); list_for_each_entry_safe(pqp, tmp_pqp, &entry->duplicates, list) { list_del(&pqp->list); kfree(pqp); } kfree(entry); } else { /* This entry contains duplicates so it shouldn't be removed */ ret = false; goto out; } } } out: return ret; } static int add_promisc_qp(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, u32 qpn) { struct mlx4_steer *s_steer; struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; struct mlx4_steer_index *entry; struct mlx4_promisc_qp *pqp; struct mlx4_promisc_qp *dqp; u32 members_count; u32 prot; int i; bool found; int err; struct mlx4_priv *priv = mlx4_priv(dev); if (port < 1 || port > dev->caps.num_ports) return -EINVAL; s_steer = &mlx4_priv(dev)->steer[port - 1]; mutex_lock(&priv->mcg_table.mutex); if (get_promisc_qp(dev, port, steer, qpn)) { err = 0; /* Noting to do, already exists */ goto out_mutex; } pqp = kmalloc(sizeof *pqp, GFP_KERNEL); if (!pqp) { err = -ENOMEM; goto out_mutex; } pqp->qpn = qpn; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { err = -ENOMEM; goto out_alloc; } mgm = mailbox->buf; if (!(mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER)) { /* The promisc QP needs to be added for each one of the steering * entries. If it already exists, needs to be added as * a duplicate for this entry. */ list_for_each_entry(entry, &s_steer->steer_entries[steer], list) { err = mlx4_READ_ENTRY(dev, entry->index, mailbox); if (err) goto out_mailbox; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; prot = be32_to_cpu(mgm->members_count) >> 30; found = false; for (i = 0; i < members_count; i++) { if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qpn) { /* Entry already exists. * Add to duplicates. */ dqp = kmalloc(sizeof(*dqp), GFP_KERNEL); if (!dqp) { err = -ENOMEM; goto out_mailbox; } dqp->qpn = qpn; list_add_tail(&dqp->list, &entry->duplicates); found = true; } } if (!found) { /* Need to add the qpn to mgm */ if (members_count == dev->caps.num_qp_per_mgm) { /* entry is full */ err = -ENOMEM; goto out_mailbox; } mgm->qp[members_count++] = cpu_to_be32(qpn & MGM_QPN_MASK); mgm->members_count = cpu_to_be32(members_count | (prot << 30)); err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox); if (err) goto out_mailbox; } } } /* add the new qpn to list of promisc qps */ list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]); /* now need to add all the promisc qps to default entry */ memset(mgm, 0, sizeof *mgm); members_count = 0; list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list) { if (members_count == dev->caps.num_qp_per_mgm) { /* entry is full */ err = -ENOMEM; goto out_list; } mgm->qp[members_count++] = cpu_to_be32(dqp->qpn & MGM_QPN_MASK); } mgm->members_count = cpu_to_be32(members_count | MLX4_PROT_ETH << 30); err = mlx4_WRITE_PROMISC(dev, port, steer, mailbox); if (err) goto out_list; mlx4_free_cmd_mailbox(dev, mailbox); mutex_unlock(&priv->mcg_table.mutex); return 0; out_list: list_del(&pqp->list); out_mailbox: mlx4_free_cmd_mailbox(dev, mailbox); out_alloc: kfree(pqp); out_mutex: mutex_unlock(&priv->mcg_table.mutex); return err; } static int remove_promisc_qp(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, u32 qpn) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_steer *s_steer; struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; struct mlx4_steer_index *entry, *tmp_entry; struct mlx4_promisc_qp *pqp; struct mlx4_promisc_qp *dqp; u32 members_count; bool found; bool back_to_list = false; int i; int err; if (port < 1 || port > dev->caps.num_ports) return -EINVAL; s_steer = &mlx4_priv(dev)->steer[port - 1]; mutex_lock(&priv->mcg_table.mutex); pqp = get_promisc_qp(dev, port, steer, qpn); if (unlikely(!pqp)) { mlx4_warn(dev, "QP %x is not promiscuous QP\n", qpn); /* nothing to do */ err = 0; goto out_mutex; } /*remove from list of promisc qps */ list_del(&pqp->list); /* set the default entry not to include the removed one */ mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { err = -ENOMEM; back_to_list = true; goto out_list; } mgm = mailbox->buf; members_count = 0; list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list) mgm->qp[members_count++] = cpu_to_be32(dqp->qpn & MGM_QPN_MASK); mgm->members_count = cpu_to_be32(members_count | MLX4_PROT_ETH << 30); err = mlx4_WRITE_PROMISC(dev, port, steer, mailbox); if (err) goto out_mailbox; if (!(mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER)) { /* Remove the QP from all the steering entries */ list_for_each_entry_safe(entry, tmp_entry, &s_steer->steer_entries[steer], list) { found = false; list_for_each_entry(dqp, &entry->duplicates, list) { if (dqp->qpn == qpn) { found = true; break; } } if (found) { /* A duplicate, no need to change the MGM, * only update the duplicates list */ list_del(&dqp->list); kfree(dqp); } else { int loc = -1; err = mlx4_READ_ENTRY(dev, entry->index, mailbox); if (err) goto out_mailbox; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; if (!members_count) { mlx4_warn(dev, "QP %06x wasn't found in entry %x mcount=0. deleting entry...\n", qpn, entry->index); list_del(&entry->list); kfree(entry); continue; } for (i = 0; i < members_count; ++i) if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qpn) { loc = i; break; } if (loc < 0) { mlx4_err(dev, "QP %06x wasn't found in entry %d\n", qpn, entry->index); err = -EINVAL; goto out_mailbox; } /* Copy the last QP in this MGM * over removed QP */ mgm->qp[loc] = mgm->qp[members_count - 1]; mgm->qp[members_count - 1] = 0; mgm->members_count = cpu_to_be32(--members_count | (MLX4_PROT_ETH << 30)); err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox); if (err) goto out_mailbox; } } } out_mailbox: mlx4_free_cmd_mailbox(dev, mailbox); out_list: if (back_to_list) list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]); else kfree(pqp); out_mutex: mutex_unlock(&priv->mcg_table.mutex); return err; } /* * Caller must hold MCG table semaphore. gid and mgm parameters must * be properly aligned for command interface. * * Returns 0 unless a firmware command error occurs. * * If GID is found in MGM or MGM is empty, *index = *hash, *prev = -1 * and *mgm holds MGM entry. * * if GID is found in AMGM, *index = index in AMGM, *prev = index of * previous entry in hash chain and *mgm holds AMGM entry. * * If no AMGM exists for given gid, *index = -1, *prev = index of last * entry in hash chain and *mgm holds end of hash chain. */ static int find_entry(struct mlx4_dev *dev, u8 port, u8 *gid, enum mlx4_protocol prot, struct mlx4_cmd_mailbox *mgm_mailbox, int *prev, int *index) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm = mgm_mailbox->buf; u8 *mgid; int err; u16 hash; u8 op_mod = (prot == MLX4_PROT_ETH) ? !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) : 0; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return -ENOMEM; mgid = mailbox->buf; memcpy(mgid, gid, 16); err = mlx4_GID_HASH(dev, mailbox, &hash, op_mod); mlx4_free_cmd_mailbox(dev, mailbox); if (err) return err; if (0) { mlx4_dbg(dev, "Hash for "GID_PRINT_FMT" is %04x\n", GID_PRINT_ARGS(gid), hash); } *index = hash; *prev = -1; do { err = mlx4_READ_ENTRY(dev, *index, mgm_mailbox); if (err) return err; if (!(be32_to_cpu(mgm->members_count) & 0xffffff)) { if (*index != hash) { mlx4_err(dev, "Found zero MGID in AMGM\n"); err = -EINVAL; } return err; } if (!memcmp(mgm->gid, gid, 16) && be32_to_cpu(mgm->members_count) >> 30 == prot) return err; *prev = *index; *index = be32_to_cpu(mgm->next_gid_index) >> 6; } while (*index); *index = -1; return err; } static const u8 __promisc_mode[] = { [MLX4_FS_REGULAR] = 0x0, [MLX4_FS_ALL_DEFAULT] = 0x1, [MLX4_FS_MC_DEFAULT] = 0x3, [MLX4_FS_MIRROR_RX_PORT] = 0x4, [MLX4_FS_MIRROR_SX_PORT] = 0x5, [MLX4_FS_UC_SNIFFER] = 0x6, [MLX4_FS_MC_SNIFFER] = 0x7, }; int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev, enum mlx4_net_trans_promisc_mode flow_type) { if (flow_type >= MLX4_FS_MODE_NUM) { mlx4_err(dev, "Invalid flow type. type = %d\n", flow_type); return -EINVAL; } return __promisc_mode[flow_type]; } EXPORT_SYMBOL_GPL(mlx4_map_sw_to_hw_steering_mode); static void trans_rule_ctrl_to_hw(struct mlx4_net_trans_rule *ctrl, struct mlx4_net_trans_rule_hw_ctrl *hw) { u8 flags = 0; flags = ctrl->queue_mode == MLX4_NET_TRANS_Q_LIFO ? 1 : 0; flags |= ctrl->exclusive ? (1 << 2) : 0; flags |= ctrl->allow_loopback ? (1 << 3) : 0; hw->flags = flags; hw->type = __promisc_mode[ctrl->promisc_mode]; hw->prio = cpu_to_be16(ctrl->priority); hw->port = ctrl->port; hw->qpn = cpu_to_be32(ctrl->qpn); } const u16 __sw_id_hw[] = { [MLX4_NET_TRANS_RULE_ID_ETH] = 0xE001, [MLX4_NET_TRANS_RULE_ID_IB] = 0xE005, [MLX4_NET_TRANS_RULE_ID_IPV6] = 0xE003, [MLX4_NET_TRANS_RULE_ID_IPV4] = 0xE002, [MLX4_NET_TRANS_RULE_ID_TCP] = 0xE004, [MLX4_NET_TRANS_RULE_ID_UDP] = 0xE006, [MLX4_NET_TRANS_RULE_ID_VXLAN] = 0xE008 }; int mlx4_map_sw_to_hw_steering_id(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id) { if (id >= MLX4_NET_TRANS_RULE_NUM) { mlx4_err(dev, "Invalid network rule id. id = %d\n", id); return -EINVAL; } return __sw_id_hw[id]; } EXPORT_SYMBOL_GPL(mlx4_map_sw_to_hw_steering_id); static const int __rule_hw_sz[] = { [MLX4_NET_TRANS_RULE_ID_ETH] = sizeof(struct mlx4_net_trans_rule_hw_eth), [MLX4_NET_TRANS_RULE_ID_IB] = sizeof(struct mlx4_net_trans_rule_hw_ib), [MLX4_NET_TRANS_RULE_ID_IPV6] = 0, [MLX4_NET_TRANS_RULE_ID_IPV4] = sizeof(struct mlx4_net_trans_rule_hw_ipv4), [MLX4_NET_TRANS_RULE_ID_TCP] = sizeof(struct mlx4_net_trans_rule_hw_tcp_udp), [MLX4_NET_TRANS_RULE_ID_UDP] = sizeof(struct mlx4_net_trans_rule_hw_tcp_udp), [MLX4_NET_TRANS_RULE_ID_VXLAN] = sizeof(struct mlx4_net_trans_rule_hw_vxlan) }; int mlx4_hw_rule_sz(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id) { if (id >= MLX4_NET_TRANS_RULE_NUM) { mlx4_err(dev, "Invalid network rule id. id = %d\n", id); return -EINVAL; } return __rule_hw_sz[id]; } EXPORT_SYMBOL_GPL(mlx4_hw_rule_sz); static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec, struct _rule_hw *rule_hw) { if (mlx4_hw_rule_sz(dev, spec->id) < 0) return -EINVAL; memset(rule_hw, 0, mlx4_hw_rule_sz(dev, spec->id)); rule_hw->id = cpu_to_be16(__sw_id_hw[spec->id]); rule_hw->size = mlx4_hw_rule_sz(dev, spec->id) >> 2; switch (spec->id) { case MLX4_NET_TRANS_RULE_ID_ETH: memcpy(rule_hw->eth.dst_mac, spec->eth.dst_mac, ETH_ALEN); memcpy(rule_hw->eth.dst_mac_msk, spec->eth.dst_mac_msk, ETH_ALEN); memcpy(rule_hw->eth.src_mac, spec->eth.src_mac, ETH_ALEN); memcpy(rule_hw->eth.src_mac_msk, spec->eth.src_mac_msk, ETH_ALEN); if (spec->eth.ether_type_enable) { rule_hw->eth.ether_type_enable = 1; rule_hw->eth.ether_type = spec->eth.ether_type; } rule_hw->eth.vlan_tag = spec->eth.vlan_id; rule_hw->eth.vlan_tag_msk = spec->eth.vlan_id_msk; break; case MLX4_NET_TRANS_RULE_ID_IB: rule_hw->ib.l3_qpn = spec->ib.l3_qpn; rule_hw->ib.qpn_mask = spec->ib.qpn_msk; memcpy(&rule_hw->ib.dst_gid, &spec->ib.dst_gid, 16); memcpy(&rule_hw->ib.dst_gid_msk, &spec->ib.dst_gid_msk, 16); break; case MLX4_NET_TRANS_RULE_ID_IPV6: return -EOPNOTSUPP; case MLX4_NET_TRANS_RULE_ID_IPV4: rule_hw->ipv4.src_ip = spec->ipv4.src_ip; rule_hw->ipv4.src_ip_msk = spec->ipv4.src_ip_msk; rule_hw->ipv4.dst_ip = spec->ipv4.dst_ip; rule_hw->ipv4.dst_ip_msk = spec->ipv4.dst_ip_msk; break; case MLX4_NET_TRANS_RULE_ID_TCP: case MLX4_NET_TRANS_RULE_ID_UDP: rule_hw->tcp_udp.dst_port = spec->tcp_udp.dst_port; rule_hw->tcp_udp.dst_port_msk = spec->tcp_udp.dst_port_msk; rule_hw->tcp_udp.src_port = spec->tcp_udp.src_port; rule_hw->tcp_udp.src_port_msk = spec->tcp_udp.src_port_msk; break; case MLX4_NET_TRANS_RULE_ID_VXLAN: rule_hw->vxlan.vni = cpu_to_be32(be32_to_cpu(spec->vxlan.vni) << 8); rule_hw->vxlan.vni_mask = cpu_to_be32(be32_to_cpu(spec->vxlan.vni_mask) << 8); break; default: return -EINVAL; } return __rule_hw_sz[spec->id]; } static void mlx4_err_rule(struct mlx4_dev *dev, char *str, struct mlx4_net_trans_rule *rule) { #define BUF_SIZE 256 struct mlx4_spec_list *cur; char buf[BUF_SIZE]; int len = 0; mlx4_err(dev, "%s", str); len += snprintf(buf + len, BUF_SIZE - len, "port = %d prio = 0x%x qp = 0x%x ", rule->port, rule->priority, rule->qpn); list_for_each_entry(cur, &rule->list, list) { switch (cur->id) { case MLX4_NET_TRANS_RULE_ID_ETH: len += snprintf(buf + len, BUF_SIZE - len, "dmac = 0x%02x%02x%02x%02x%02x%02x ", cur->eth.dst_mac[0], cur->eth.dst_mac[1], cur->eth.dst_mac[2], cur->eth.dst_mac[3], cur->eth.dst_mac[4], cur->eth.dst_mac[5]); if (cur->eth.ether_type) len += snprintf(buf + len, BUF_SIZE - len, "ethertype = 0x%x ", be16_to_cpu(cur->eth.ether_type)); if (cur->eth.vlan_id) len += snprintf(buf + len, BUF_SIZE - len, "vlan-id = %d ", be16_to_cpu(cur->eth.vlan_id)); break; case MLX4_NET_TRANS_RULE_ID_IPV4: if (cur->ipv4.src_ip) len += snprintf(buf + len, BUF_SIZE - len, "src-ip = %pI4 ", &cur->ipv4.src_ip); if (cur->ipv4.dst_ip) len += snprintf(buf + len, BUF_SIZE - len, "dst-ip = %pI4 ", &cur->ipv4.dst_ip); break; case MLX4_NET_TRANS_RULE_ID_TCP: case MLX4_NET_TRANS_RULE_ID_UDP: if (cur->tcp_udp.src_port) len += snprintf(buf + len, BUF_SIZE - len, "src-port = %d ", be16_to_cpu(cur->tcp_udp.src_port)); if (cur->tcp_udp.dst_port) len += snprintf(buf + len, BUF_SIZE - len, "dst-port = %d ", be16_to_cpu(cur->tcp_udp.dst_port)); break; case MLX4_NET_TRANS_RULE_ID_IB: len += snprintf(buf + len, BUF_SIZE - len, "dst-gid = "GID_PRINT_FMT"\n", GID_PRINT_ARGS(cur->ib.dst_gid)); len += snprintf(buf + len, BUF_SIZE - len, "dst-gid-mask = "GID_PRINT_FMT"\n", GID_PRINT_ARGS(cur->ib.dst_gid_msk)); break; case MLX4_NET_TRANS_RULE_ID_VXLAN: len += snprintf(buf + len, BUF_SIZE - len, "VNID = %d ", be32_to_cpu(cur->vxlan.vni)); break; case MLX4_NET_TRANS_RULE_ID_IPV6: break; default: break; } } len += snprintf(buf + len, BUF_SIZE - len, "\n"); mlx4_err(dev, "%s", buf); if (len >= BUF_SIZE) mlx4_err(dev, "Network rule error message was truncated, print buffer is too small\n"); } int mlx4_flow_attach(struct mlx4_dev *dev, struct mlx4_net_trans_rule *rule, u64 *reg_id) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_spec_list *cur; u32 size = 0; int ret; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); trans_rule_ctrl_to_hw(rule, mailbox->buf); size += sizeof(struct mlx4_net_trans_rule_hw_ctrl); list_for_each_entry(cur, &rule->list, list) { ret = parse_trans_rule(dev, cur, mailbox->buf + size); if (ret < 0) { mlx4_free_cmd_mailbox(dev, mailbox); return ret; } size += ret; } ret = mlx4_QP_FLOW_STEERING_ATTACH(dev, mailbox, size >> 2, reg_id); if (ret == -ENOMEM) { mlx4_err_rule(dev, "mcg table is full. Fail to register network rule\n", rule); } else if (ret) { if (ret == -ENXIO) { if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) mlx4_err_rule(dev, "DMFS is not enabled, " "failed to register network rule.\n", rule); else mlx4_err_rule(dev, "Rule exceeds the dmfs_high_rate_mode limitations, " "failed to register network rule.\n", rule); } else { mlx4_err_rule(dev, "Fail to register network rule.\n", rule); } } mlx4_free_cmd_mailbox(dev, mailbox); return ret; } EXPORT_SYMBOL_GPL(mlx4_flow_attach); int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id) { int err; err = mlx4_QP_FLOW_STEERING_DETACH(dev, reg_id); if (err) mlx4_err(dev, "Fail to detach network rule. registration id = 0x%llx\n", (unsigned long long)reg_id); return err; } EXPORT_SYMBOL_GPL(mlx4_flow_detach); int mlx4_tunnel_steer_add(struct mlx4_dev *dev, unsigned char *addr, int port, int qpn, u16 prio, u64 *reg_id) { int err; struct mlx4_spec_list spec_eth_outer = { {NULL} }; struct mlx4_spec_list spec_vxlan = { {NULL} }; struct mlx4_spec_list spec_eth_inner = { {NULL} }; struct mlx4_net_trans_rule rule = { .queue_mode = MLX4_NET_TRANS_Q_FIFO, .exclusive = 0, .allow_loopback = 1, .promisc_mode = MLX4_FS_REGULAR, }; __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); rule.port = port; rule.qpn = qpn; rule.priority = prio; INIT_LIST_HEAD(&rule.list); spec_eth_outer.id = MLX4_NET_TRANS_RULE_ID_ETH; memcpy(spec_eth_outer.eth.dst_mac, addr, ETH_ALEN); memcpy(spec_eth_outer.eth.dst_mac_msk, &mac_mask, ETH_ALEN); spec_vxlan.id = MLX4_NET_TRANS_RULE_ID_VXLAN; /* any vxlan header */ spec_eth_inner.id = MLX4_NET_TRANS_RULE_ID_ETH; /* any inner eth header */ list_add_tail(&spec_eth_outer.list, &rule.list); list_add_tail(&spec_vxlan.list, &rule.list); list_add_tail(&spec_eth_inner.list, &rule.list); err = mlx4_flow_attach(dev, &rule, reg_id); return err; } EXPORT_SYMBOL(mlx4_tunnel_steer_add); int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, u32 max_range_qpn) { int err; u64 in_param; in_param = ((u64) min_range_qpn) << 32; in_param |= ((u64) max_range_qpn) & 0xFFFFFFFF; err = mlx4_cmd(dev, in_param, 0, 0, MLX4_FLOW_STEERING_IB_UC_QP_RANGE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); return err; } EXPORT_SYMBOL_GPL(mlx4_FLOW_STEERING_IB_UC_QP_RANGE); int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot, enum mlx4_steer_type steer) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; u32 members_count; int index = -1, prev; int link = 0; int i; int err; u8 port = gid[5]; u8 new_entry = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); mgm = mailbox->buf; mutex_lock(&priv->mcg_table.mutex); err = find_entry(dev, port, gid, prot, mailbox, &prev, &index); if (err) goto out; if (index != -1) { if (!(be32_to_cpu(mgm->members_count) & 0xffffff)) { new_entry = 1; memcpy(mgm->gid, gid, 16); } } else { link = 1; index = mlx4_bitmap_alloc(&priv->mcg_table.bitmap); if (index == -1) { mlx4_err(dev, "No AMGM entries left\n"); err = -ENOMEM; goto out; } index += dev->caps.num_mgms; new_entry = 1; memset(mgm, 0, sizeof *mgm); memcpy(mgm->gid, gid, 16); } members_count = be32_to_cpu(mgm->members_count) & 0xffffff; if (members_count == dev->caps.num_qp_per_mgm) { mlx4_err(dev, "MGM at index %x is full\n", index); err = -ENOMEM; goto out; } for (i = 0; i < members_count; ++i) if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn) { mlx4_dbg(dev, "QP %06x already a member of MGM\n", qp->qpn); err = 0; goto out; } if (block_mcast_loopback) mgm->qp[members_count++] = cpu_to_be32((qp->qpn & MGM_QPN_MASK) | (1U << MGM_BLCK_LB_BIT)); else mgm->qp[members_count++] = cpu_to_be32(qp->qpn & MGM_QPN_MASK); mgm->members_count = cpu_to_be32(members_count | (u32) prot << 30); err = mlx4_WRITE_ENTRY(dev, index, mailbox); if (err) goto out; if (!link) goto out; err = mlx4_READ_ENTRY(dev, prev, mailbox); if (err) goto out; mgm->next_gid_index = cpu_to_be32(index << 6); err = mlx4_WRITE_ENTRY(dev, prev, mailbox); if (err) goto out; out: if (prot == MLX4_PROT_ETH && index != -1) { /* manage the steering entry for promisc mode */ if (new_entry) err = new_steering_entry(dev, port, steer, index, qp->qpn); else err = existing_steering_entry(dev, port, steer, index, qp->qpn); } if (err && link && index != -1) { if (index < dev->caps.num_mgms) mlx4_warn(dev, "Got AMGM index %d < %d\n", index, dev->caps.num_mgms); else mlx4_bitmap_free(&priv->mcg_table.bitmap, index - dev->caps.num_mgms, MLX4_USE_RR); } mutex_unlock(&priv->mcg_table.mutex); mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot, enum mlx4_steer_type steer) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; u32 members_count; int prev, index; int i, loc = -1; int err; u8 port = gid[5]; bool removed_entry = false; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); mgm = mailbox->buf; mutex_lock(&priv->mcg_table.mutex); err = find_entry(dev, port, gid, prot, mailbox, &prev, &index); if (err) goto out; if (index == -1) { mlx4_err(dev, "MGID "GID_PRINT_FMT" not found\n", GID_PRINT_ARGS(gid)); err = -EINVAL; goto out; } /* If this QP is also a promisc QP, it shouldn't be removed only if * at least one none promisc QP is also attached to this MCG */ if (prot == MLX4_PROT_ETH && check_duplicate_entry(dev, port, steer, index, qp->qpn) && !promisc_steering_entry(dev, port, steer, index, qp->qpn, NULL)) goto out; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; for (i = 0; i < members_count; ++i) if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn) { loc = i; break; } if (loc == -1) { mlx4_err(dev, "QP %06x not found in MGM\n", qp->qpn); err = -EINVAL; goto out; } /* copy the last QP in this MGM over removed QP */ mgm->qp[loc] = mgm->qp[members_count - 1]; mgm->qp[members_count - 1] = 0; mgm->members_count = cpu_to_be32(--members_count | (u32) prot << 30); if (prot == MLX4_PROT_ETH) removed_entry = can_remove_steering_entry(dev, port, steer, index, qp->qpn); if (members_count && (prot != MLX4_PROT_ETH || !removed_entry)) { err = mlx4_WRITE_ENTRY(dev, index, mailbox); goto out; } /* We are going to delete the entry, members count should be 0 */ mgm->members_count = cpu_to_be32((u32) prot << 30); if (prev == -1) { /* Remove entry from MGM */ int amgm_index = be32_to_cpu(mgm->next_gid_index) >> 6; if (amgm_index) { err = mlx4_READ_ENTRY(dev, amgm_index, mailbox); if (err) goto out; } else memset(mgm->gid, 0, 16); err = mlx4_WRITE_ENTRY(dev, index, mailbox); if (err) goto out; if (amgm_index) { if (amgm_index < dev->caps.num_mgms) mlx4_warn(dev, "MGM entry %d had AMGM index %d < %d\n", index, amgm_index, dev->caps.num_mgms); else mlx4_bitmap_free(&priv->mcg_table.bitmap, amgm_index - dev->caps.num_mgms, MLX4_USE_RR); } } else { /* Remove entry from AMGM */ int cur_next_index = be32_to_cpu(mgm->next_gid_index) >> 6; err = mlx4_READ_ENTRY(dev, prev, mailbox); if (err) goto out; mgm->next_gid_index = cpu_to_be32(cur_next_index << 6); err = mlx4_WRITE_ENTRY(dev, prev, mailbox); if (err) goto out; if (index < dev->caps.num_mgms) mlx4_warn(dev, "entry %d had next AMGM index %d < %d\n", prev, index, dev->caps.num_mgms); else mlx4_bitmap_free(&priv->mcg_table.bitmap, index - dev->caps.num_mgms, MLX4_USE_RR); } out: mutex_unlock(&priv->mcg_table.mutex); mlx4_free_cmd_mailbox(dev, mailbox); if (err && dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) /* In case device is under an error, return success as a closing command */ err = 0; return err; } static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], u8 attach, u8 block_loopback, enum mlx4_protocol prot) { struct mlx4_cmd_mailbox *mailbox; int err = 0; int qpn; if (!mlx4_is_mfunc(dev)) return -EBADF; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); memcpy(mailbox->buf, gid, 16); qpn = qp->qpn; qpn |= (prot << 28); if (attach && block_loopback) - qpn |= (1 << 31); + qpn |= (1U << 31); err = mlx4_cmd(dev, mailbox->dma, qpn, attach, MLX4_CMD_QP_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); mlx4_free_cmd_mailbox(dev, mailbox); if (err && !attach && dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) err = 0; return err; } int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], u8 port, int block_mcast_loopback, enum mlx4_protocol prot, u64 *reg_id) { struct mlx4_spec_list spec = { {NULL} }; __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); struct mlx4_net_trans_rule rule = { .queue_mode = MLX4_NET_TRANS_Q_FIFO, .exclusive = 0, .promisc_mode = MLX4_FS_REGULAR, .priority = MLX4_DOMAIN_NIC, }; rule.allow_loopback = !block_mcast_loopback; rule.port = port; rule.qpn = qp->qpn; INIT_LIST_HEAD(&rule.list); switch (prot) { case MLX4_PROT_ETH: spec.id = MLX4_NET_TRANS_RULE_ID_ETH; memcpy(spec.eth.dst_mac, &gid[10], ETH_ALEN); memcpy(spec.eth.dst_mac_msk, &mac_mask, ETH_ALEN); break; case MLX4_PROT_IB_IPV6: spec.id = MLX4_NET_TRANS_RULE_ID_IB; memcpy(spec.ib.dst_gid, gid, 16); memset(&spec.ib.dst_gid_msk, 0xff, 16); break; default: return -EINVAL; } list_add_tail(&spec.list, &rule.list); return mlx4_flow_attach(dev, &rule, reg_id); } int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], u8 port, int block_mcast_loopback, enum mlx4_protocol prot, u64 *reg_id) { switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_A0: if (prot == MLX4_PROT_ETH) return 0; case MLX4_STEERING_MODE_B0: if (prot == MLX4_PROT_ETH) gid[7] |= (MLX4_MC_STEER << 1); if (mlx4_is_mfunc(dev)) return mlx4_QP_ATTACH(dev, qp, gid, 1, block_mcast_loopback, prot); return mlx4_qp_attach_common(dev, qp, gid, block_mcast_loopback, prot, MLX4_MC_STEER); case MLX4_STEERING_MODE_DEVICE_MANAGED: return mlx4_trans_to_dmfs_attach(dev, qp, gid, port, block_mcast_loopback, prot, reg_id); default: return -EINVAL; } } EXPORT_SYMBOL_GPL(mlx4_multicast_attach); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot, u64 reg_id) { switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_A0: if (prot == MLX4_PROT_ETH) return 0; case MLX4_STEERING_MODE_B0: if (prot == MLX4_PROT_ETH) gid[7] |= (MLX4_MC_STEER << 1); if (mlx4_is_mfunc(dev)) return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot); return mlx4_qp_detach_common(dev, qp, gid, prot, MLX4_MC_STEER); case MLX4_STEERING_MODE_DEVICE_MANAGED: return mlx4_flow_detach(dev, reg_id); default: return -EINVAL; } } EXPORT_SYMBOL_GPL(mlx4_multicast_detach); int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn, enum mlx4_net_trans_promisc_mode mode) { struct mlx4_net_trans_rule rule = { .queue_mode = MLX4_NET_TRANS_Q_FIFO, .exclusive = 0, .allow_loopback = 1, }; u64 *regid_p; switch (mode) { case MLX4_FS_ALL_DEFAULT: regid_p = &dev->regid_promisc_array[port]; break; case MLX4_FS_MC_DEFAULT: regid_p = &dev->regid_allmulti_array[port]; break; default: return -1; } if (*regid_p != 0) return -1; rule.promisc_mode = mode; rule.port = port; rule.qpn = qpn; INIT_LIST_HEAD(&rule.list); mlx4_err(dev, "going promisc on %x\n", port); return mlx4_flow_attach(dev, &rule, regid_p); } EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_add); int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port, enum mlx4_net_trans_promisc_mode mode) { int ret; u64 *regid_p; switch (mode) { case MLX4_FS_ALL_DEFAULT: regid_p = &dev->regid_promisc_array[port]; break; case MLX4_FS_MC_DEFAULT: regid_p = &dev->regid_allmulti_array[port]; break; default: return -1; } if (*regid_p == 0) return -1; ret = mlx4_flow_detach(dev, *regid_p); if (ret == 0) *regid_p = 0; return ret; } EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_remove); int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot) { if (prot == MLX4_PROT_ETH) gid[7] |= (MLX4_UC_STEER << 1); if (mlx4_is_mfunc(dev)) return mlx4_QP_ATTACH(dev, qp, gid, 1, block_mcast_loopback, prot); return mlx4_qp_attach_common(dev, qp, gid, block_mcast_loopback, prot, MLX4_UC_STEER); } EXPORT_SYMBOL_GPL(mlx4_unicast_attach); int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot) { if (prot == MLX4_PROT_ETH) gid[7] |= (MLX4_UC_STEER << 1); if (mlx4_is_mfunc(dev)) return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot); return mlx4_qp_detach_common(dev, qp, gid, prot, MLX4_UC_STEER); } EXPORT_SYMBOL_GPL(mlx4_unicast_detach); int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { u32 qpn = (u32) vhcr->in_param & 0xffffffff; int port = mlx4_slave_convert_port(dev, slave, vhcr->in_param >> 62); enum mlx4_steer_type steer = vhcr->in_modifier; if (port < 0) return -EINVAL; /* Promiscuous unicast is not allowed in mfunc */ if (mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER) return 0; if (vhcr->op_modifier) return add_promisc_qp(dev, port, steer, qpn); else return remove_promisc_qp(dev, port, steer, qpn); } static int mlx4_PROMISC(struct mlx4_dev *dev, u32 qpn, enum mlx4_steer_type steer, u8 add, u8 port) { return mlx4_cmd(dev, (u64) qpn | (u64) port << 62, (u32) steer, add, MLX4_CMD_PROMISC, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port) { if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 1, port); return add_promisc_qp(dev, port, MLX4_MC_STEER, qpn); } EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_add); int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port) { if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 0, port); return remove_promisc_qp(dev, port, MLX4_MC_STEER, qpn); } EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_remove); int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port) { if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 1, port); return add_promisc_qp(dev, port, MLX4_UC_STEER, qpn); } EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_add); int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port) { if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 0, port); return remove_promisc_qp(dev, port, MLX4_UC_STEER, qpn); } EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_remove); int mlx4_init_mcg_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int err; /* No need for mcg_table when fw managed the mcg table*/ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) return 0; err = mlx4_bitmap_init(&priv->mcg_table.bitmap, dev->caps.num_amgms, dev->caps.num_amgms - 1, 0, 0); if (err) return err; mutex_init(&priv->mcg_table.mutex); return 0; } void mlx4_cleanup_mcg_table(struct mlx4_dev *dev) { if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap); } Index: head/sys/dev/mlx4/mlx4_en/mlx4_en_tx.c =================================================================== --- head/sys/dev/mlx4/mlx4_en/mlx4_en_tx.c (revision 327863) +++ head/sys/dev/mlx4/mlx4_en/mlx4_en_tx.c (revision 327864) @@ -1,1089 +1,1089 @@ /* * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #define LINUXKPI_PARAM_PREFIX mlx4_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "en.h" int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, u32 size, u16 stride, int node, int queue_idx) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_ring *ring; uint32_t x; int tmp; int err; ring = kzalloc_node(sizeof(struct mlx4_en_tx_ring), GFP_KERNEL, node); if (!ring) { ring = kzalloc(sizeof(struct mlx4_en_tx_ring), GFP_KERNEL); if (!ring) { en_err(priv, "Failed allocating TX ring\n"); return -ENOMEM; } } /* Create DMA descriptor TAG */ if ((err = -bus_dma_tag_create( bus_get_dma_tag(mdev->pdev->dev.bsddev), 1, /* any alignment */ 0, /* no boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MLX4_EN_TX_MAX_PAYLOAD_SIZE, /* maxsize */ MLX4_EN_TX_MAX_MBUF_FRAGS, /* nsegments */ MLX4_EN_TX_MAX_MBUF_SIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &ring->dma_tag))) goto done; ring->size = size; ring->size_mask = size - 1; ring->stride = stride; ring->inline_thold = MAX(MIN_PKT_LEN, MIN(priv->prof->inline_thold, MAX_INLINE)); mtx_init(&ring->tx_lock.m, "mlx4 tx", NULL, MTX_DEF); mtx_init(&ring->comp_lock.m, "mlx4 comp", NULL, MTX_DEF); /* Allocate the buf ring */ ring->br = buf_ring_alloc(MLX4_EN_DEF_TX_QUEUE_SIZE, M_DEVBUF, M_WAITOK, &ring->tx_lock.m); if (ring->br == NULL) { en_err(priv, "Failed allocating tx_info ring\n"); err = -ENOMEM; goto err_free_dma_tag; } tmp = size * sizeof(struct mlx4_en_tx_info); ring->tx_info = kzalloc_node(tmp, GFP_KERNEL, node); if (!ring->tx_info) { ring->tx_info = kzalloc(tmp, GFP_KERNEL); if (!ring->tx_info) { err = -ENOMEM; goto err_ring; } } /* Create DMA descriptor MAPs */ for (x = 0; x != size; x++) { err = -bus_dmamap_create(ring->dma_tag, 0, &ring->tx_info[x].dma_map); if (err != 0) { while (x--) { bus_dmamap_destroy(ring->dma_tag, ring->tx_info[x].dma_map); } goto err_info; } } en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n", ring->tx_info, tmp); ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE); /* Allocate HW buffers on provided NUMA node */ err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); if (err) { en_err(priv, "Failed allocating hwq resources\n"); goto err_dma_map; } err = mlx4_en_map_buffer(&ring->wqres.buf); if (err) { en_err(priv, "Failed to map TX buffer\n"); goto err_hwq_res; } ring->buf = ring->wqres.buf.direct.buf; en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d " "buf_size:%d dma:%llx\n", ring, ring->buf, ring->size, ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map); err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn, MLX4_RESERVE_ETH_BF_QP); if (err) { en_err(priv, "failed reserving qp for TX ring\n"); goto err_map; } err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL); if (err) { en_err(priv, "Failed allocating qp %d\n", ring->qpn); goto err_reserve; } ring->qp.event = mlx4_en_sqp_event; err = mlx4_bf_alloc(mdev->dev, &ring->bf, node); if (err) { en_dbg(DRV, priv, "working without blueflame (%d)", err); ring->bf.uar = &mdev->priv_uar; ring->bf.uar->map = mdev->uar_map; ring->bf_enabled = false; } else ring->bf_enabled = true; ring->queue_index = queue_idx; *pring = ring; return 0; err_reserve: mlx4_qp_release_range(mdev->dev, ring->qpn, 1); err_map: mlx4_en_unmap_buffer(&ring->wqres.buf); err_hwq_res: mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); err_dma_map: for (x = 0; x != size; x++) bus_dmamap_destroy(ring->dma_tag, ring->tx_info[x].dma_map); err_info: vfree(ring->tx_info); err_ring: buf_ring_free(ring->br, M_DEVBUF); err_free_dma_tag: bus_dma_tag_destroy(ring->dma_tag); done: kfree(ring); return err; } void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_ring *ring = *pring; uint32_t x; en_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn); buf_ring_free(ring->br, M_DEVBUF); if (ring->bf_enabled) mlx4_bf_free(mdev->dev, &ring->bf); mlx4_qp_remove(mdev->dev, &ring->qp); mlx4_qp_free(mdev->dev, &ring->qp); mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1); mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); for (x = 0; x != ring->size; x++) bus_dmamap_destroy(ring->dma_tag, ring->tx_info[x].dma_map); vfree(ring->tx_info); mtx_destroy(&ring->tx_lock.m); mtx_destroy(&ring->comp_lock.m); bus_dma_tag_destroy(ring->dma_tag); kfree(ring); *pring = NULL; } int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int cq, int user_prio) { struct mlx4_en_dev *mdev = priv->mdev; int err; ring->cqn = cq; ring->prod = 0; ring->cons = 0xffffffff; ring->last_nr_txbb = 1; ring->poll_cnt = 0; ring->blocked = 0; memset(ring->buf, 0, ring->buf_size); ring->qp_state = MLX4_QP_STATE_RST; ring->doorbell_qpn = ring->qp.qpn << 8; mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn, ring->cqn, user_prio, &ring->context); if (ring->bf_enabled) ring->context.usr_page = cpu_to_be32(ring->bf.uar->index); err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context, &ring->qp, &ring->qp_state); return err; } void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring) { struct mlx4_en_dev *mdev = priv->mdev; mlx4_qp_modify(mdev->dev, NULL, ring->qp_state, MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp); } static volatile struct mlx4_wqe_data_seg * mlx4_en_store_inline_lso_data(volatile struct mlx4_wqe_data_seg *dseg, struct mbuf *mb, int len, __be32 owner_bit) { uint8_t *inl = __DEVOLATILE(uint8_t *, dseg); /* copy data into place */ m_copydata(mb, 0, len, inl + 4); dseg += DIV_ROUND_UP(4 + len, DS_SIZE_ALIGNMENT); return (dseg); } static void mlx4_en_store_inline_lso_header(volatile struct mlx4_wqe_data_seg *dseg, int len, __be32 owner_bit) { } static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, u32 index, u8 owner) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_desc *tx_desc = (struct mlx4_en_tx_desc *) (ring->buf + (index * TXBB_SIZE)); volatile __be32 *ptr = (__be32 *)tx_desc; const __be32 stamp = cpu_to_be32(STAMP_VAL | ((u32)owner << STAMP_SHIFT)); u32 i; /* Stamp the freed descriptor */ for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { *ptr = stamp; ptr += STAMP_DWORDS; } } static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, u32 index) { struct mlx4_en_tx_info *tx_info; struct mbuf *mb; tx_info = &ring->tx_info[index]; mb = tx_info->mb; if (mb == NULL) goto done; bus_dmamap_sync(ring->dma_tag, tx_info->dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->dma_tag, tx_info->dma_map); m_freem(mb); done: return (tx_info->nr_txbb); } int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) { struct mlx4_en_priv *priv = netdev_priv(dev); int cnt = 0; /* Skip last polled descriptor */ ring->cons += ring->last_nr_txbb; en_dbg(DRV, priv, "Freeing Tx buf - cons:0x%x prod:0x%x\n", ring->cons, ring->prod); if ((u32) (ring->prod - ring->cons) > ring->size) { en_warn(priv, "Tx consumer passed producer!\n"); return 0; } while (ring->cons != ring->prod) { ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring, ring->cons & ring->size_mask); ring->cons += ring->last_nr_txbb; cnt++; } if (cnt) en_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt); return cnt; } static bool mlx4_en_tx_ring_is_full(struct mlx4_en_tx_ring *ring) { int wqs; wqs = ring->size - (ring->prod - ring->cons); return (wqs < (HEADROOM + (2 * MLX4_EN_TX_WQE_MAX_WQEBBS))); } static int mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; struct mlx4_cqe *cqe; u16 index; u16 new_index, ring_index, stamp_index; u32 txbbs_skipped = 0; u32 txbbs_stamp = 0; u32 cons_index = mcq->cons_index; int size = cq->size; u32 size_mask = ring->size_mask; struct mlx4_cqe *buf = cq->buf; int factor = priv->cqe_factor; if (!priv->port_up) return 0; index = cons_index & size_mask; cqe = &buf[(index << factor) + factor]; ring_index = ring->cons & size_mask; stamp_index = ring_index; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size)) { /* * make sure we read the CQE after we read the * ownership bit */ rmb(); if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR)) { en_err(priv, "CQE completed in error - vendor syndrom: 0x%x syndrom: 0x%x\n", ((struct mlx4_err_cqe *)cqe)-> vendor_err_syndrome, ((struct mlx4_err_cqe *)cqe)->syndrome); } /* Skip over last polled CQE */ new_index = be16_to_cpu(cqe->wqe_index) & size_mask; do { txbbs_skipped += ring->last_nr_txbb; ring_index = (ring_index + ring->last_nr_txbb) & size_mask; /* free next descriptor */ ring->last_nr_txbb = mlx4_en_free_tx_desc( priv, ring, ring_index); mlx4_en_stamp_wqe(priv, ring, stamp_index, !!((ring->cons + txbbs_stamp) & ring->size)); stamp_index = ring_index; txbbs_stamp = txbbs_skipped; } while (ring_index != new_index); ++cons_index; index = cons_index & size_mask; cqe = &buf[(index << factor) + factor]; } /* * To prevent CQ overflow we first update CQ consumer and only then * the ring consumer. */ mcq->cons_index = cons_index; mlx4_cq_set_ci(mcq); wmb(); ring->cons += txbbs_skipped; /* Wakeup Tx queue if it was stopped and ring is not full */ if (unlikely(ring->blocked) && !mlx4_en_tx_ring_is_full(ring)) { ring->blocked = 0; if (atomic_fetchadd_int(&priv->blocked, -1) == 1) atomic_clear_int(&dev->if_drv_flags ,IFF_DRV_OACTIVE); priv->port_stats.wake_queue++; ring->wake_queue++; } return (0); } void mlx4_en_tx_irq(struct mlx4_cq *mcq) { struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); struct mlx4_en_priv *priv = netdev_priv(cq->dev); struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; if (priv->port_up == 0 || !spin_trylock(&ring->comp_lock)) return; mlx4_en_process_tx_cq(cq->dev, cq); mod_timer(&cq->timer, jiffies + 1); spin_unlock(&ring->comp_lock); } void mlx4_en_poll_tx_cq(unsigned long data) { struct mlx4_en_cq *cq = (struct mlx4_en_cq *) data; struct mlx4_en_priv *priv = netdev_priv(cq->dev); struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; u32 inflight; INC_PERF_COUNTER(priv->pstats.tx_poll); if (priv->port_up == 0) return; if (!spin_trylock(&ring->comp_lock)) { mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); return; } mlx4_en_process_tx_cq(cq->dev, cq); inflight = (u32) (ring->prod - ring->cons - ring->last_nr_txbb); /* If there are still packets in flight and the timer has not already * been scheduled by the Tx routine then schedule it here to guarantee * completion processing of these packets */ if (inflight && priv->port_up) mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); spin_unlock(&ring->comp_lock); } static inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind) { struct mlx4_en_cq *cq = priv->tx_cq[tx_ind]; struct mlx4_en_tx_ring *ring = priv->tx_ring[tx_ind]; if (priv->port_up == 0) return; /* If we don't have a pending timer, set one up to catch our recent post in case the interface becomes idle */ if (!timer_pending(&cq->timer)) mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); /* Poll the CQ every mlx4_en_TX_MODER_POLL packets */ if ((++ring->poll_cnt & (MLX4_EN_TX_POLL_MODER - 1)) == 0) if (spin_trylock(&ring->comp_lock)) { mlx4_en_process_tx_cq(priv->dev, cq); spin_unlock(&ring->comp_lock); } } static u16 mlx4_en_get_inline_hdr_size(struct mlx4_en_tx_ring *ring, struct mbuf *mb) { u16 retval; /* only copy from first fragment, if possible */ retval = MIN(ring->inline_thold, mb->m_len); /* check for too little data */ if (unlikely(retval < MIN_PKT_LEN)) retval = MIN(ring->inline_thold, mb->m_pkthdr.len); return (retval); } static int mlx4_en_get_header_size(struct mbuf *mb) { struct ether_vlan_header *eh; struct tcphdr *th; struct ip *ip; int ip_hlen, tcp_hlen; struct ip6_hdr *ip6; uint16_t eth_type; int eth_hdr_len; eh = mtod(mb, struct ether_vlan_header *); if (mb->m_len < ETHER_HDR_LEN) return (0); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { eth_type = ntohs(eh->evl_proto); eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { eth_type = ntohs(eh->evl_encap_proto); eth_hdr_len = ETHER_HDR_LEN; } if (mb->m_len < eth_hdr_len) return (0); switch (eth_type) { case ETHERTYPE_IP: ip = (struct ip *)(mb->m_data + eth_hdr_len); if (mb->m_len < eth_hdr_len + sizeof(*ip)) return (0); if (ip->ip_p != IPPROTO_TCP) return (0); ip_hlen = ip->ip_hl << 2; eth_hdr_len += ip_hlen; break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len); if (mb->m_len < eth_hdr_len + sizeof(*ip6)) return (0); if (ip6->ip6_nxt != IPPROTO_TCP) return (0); eth_hdr_len += sizeof(*ip6); break; default: return (0); } if (mb->m_len < eth_hdr_len + sizeof(*th)) return (0); th = (struct tcphdr *)(mb->m_data + eth_hdr_len); tcp_hlen = th->th_off << 2; eth_hdr_len += tcp_hlen; if (mb->m_len < eth_hdr_len) return (0); return (eth_hdr_len); } static volatile struct mlx4_wqe_data_seg * mlx4_en_store_inline_data(volatile struct mlx4_wqe_data_seg *dseg, struct mbuf *mb, int len, __be32 owner_bit) { uint8_t *inl = __DEVOLATILE(uint8_t *, dseg); const int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - 4; if (unlikely(len < MIN_PKT_LEN)) { m_copydata(mb, 0, len, inl + 4); memset(inl + 4 + len, 0, MIN_PKT_LEN - len); dseg += DIV_ROUND_UP(4 + MIN_PKT_LEN, DS_SIZE_ALIGNMENT); } else if (len <= spc) { m_copydata(mb, 0, len, inl + 4); dseg += DIV_ROUND_UP(4 + len, DS_SIZE_ALIGNMENT); } else { m_copydata(mb, 0, spc, inl + 4); m_copydata(mb, spc, len - spc, inl + 8 + spc); dseg += DIV_ROUND_UP(8 + len, DS_SIZE_ALIGNMENT); } return (dseg); } static void mlx4_en_store_inline_header(volatile struct mlx4_wqe_data_seg *dseg, int len, __be32 owner_bit) { uint8_t *inl = __DEVOLATILE(uint8_t *, dseg); const int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - 4; if (unlikely(len < MIN_PKT_LEN)) { *(volatile uint32_t *)inl = - SET_BYTE_COUNT((1 << 31) | MIN_PKT_LEN); + SET_BYTE_COUNT((1U << 31) | MIN_PKT_LEN); } else if (len <= spc) { *(volatile uint32_t *)inl = - SET_BYTE_COUNT((1 << 31) | len); + SET_BYTE_COUNT((1U << 31) | len); } else { *(volatile uint32_t *)(inl + 4 + spc) = - SET_BYTE_COUNT((1 << 31) | (len - spc)); + SET_BYTE_COUNT((1U << 31) | (len - spc)); wmb(); *(volatile uint32_t *)inl = - SET_BYTE_COUNT((1 << 31) | spc); + SET_BYTE_COUNT((1U << 31) | spc); } } static uint32_t hashrandom; static void hashrandom_init(void *arg) { /* * It is assumed that the random subsystem has been * initialized when this function is called: */ hashrandom = m_ether_tcpip_hash_init(); } SYSINIT(hashrandom_init, SI_SUB_RANDOM, SI_ORDER_ANY, &hashrandom_init, NULL); u16 mlx4_en_select_queue(struct net_device *dev, struct mbuf *mb) { struct mlx4_en_priv *priv = netdev_priv(dev); u32 rings_p_up = priv->num_tx_rings_p_up; u32 up = 0; u32 queue_index; #if (MLX4_EN_NUM_UP > 1) /* Obtain VLAN information if present */ if (mb->m_flags & M_VLANTAG) { u32 vlan_tag = mb->m_pkthdr.ether_vtag; up = (vlan_tag >> 13) % MLX4_EN_NUM_UP; } #endif queue_index = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 | MBUF_HASHFLAG_L4, mb, hashrandom); return ((queue_index % rings_p_up) + (up * rings_p_up)); } static void mlx4_bf_copy(void __iomem *dst, volatile unsigned long *src, unsigned bytecnt) { __iowrite64_copy(dst, __DEVOLATILE(void *, src), bytecnt / 8); } static int mlx4_en_xmit(struct mlx4_en_priv *priv, int tx_ind, struct mbuf **mbp) { enum { DS_FACT = TXBB_SIZE / DS_SIZE_ALIGNMENT, CTRL_FLAGS = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE | MLX4_WQE_CTRL_SOLICITED), }; bus_dma_segment_t segs[MLX4_EN_TX_MAX_MBUF_FRAGS]; volatile struct mlx4_wqe_data_seg *dseg; volatile struct mlx4_wqe_data_seg *dseg_inline; volatile struct mlx4_en_tx_desc *tx_desc; struct mlx4_en_tx_ring *ring = priv->tx_ring[tx_ind]; struct ifnet *ifp = priv->dev; struct mlx4_en_tx_info *tx_info; struct mbuf *mb = *mbp; struct mbuf *m; __be32 owner_bit; int nr_segs; int pad; int err; u32 bf_size; u32 bf_prod; u32 opcode; u16 index; u16 ds_cnt; u16 ihs; if (unlikely(!priv->port_up)) { err = EINVAL; goto tx_drop; } /* check if TX ring is full */ if (unlikely(mlx4_en_tx_ring_is_full(ring))) { /* every full native Tx ring stops queue */ if (ring->blocked == 0) atomic_add_int(&priv->blocked, 1); /* Set HW-queue-is-full flag */ atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); priv->port_stats.queue_stopped++; ring->blocked = 1; ring->queue_stopped++; /* Use interrupts to find out when queue opened */ mlx4_en_arm_cq(priv, priv->tx_cq[tx_ind]); return (ENOBUFS); } /* sanity check we are not wrapping around */ KASSERT(((~ring->prod) & ring->size_mask) >= (MLX4_EN_TX_WQE_MAX_WQEBBS - 1), ("Wrapping around TX ring")); /* Track current inflight packets for performance analysis */ AVG_PERF_COUNTER(priv->pstats.inflight_avg, (u32) (ring->prod - ring->cons - 1)); /* Track current mbuf packet header length */ AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, mb->m_pkthdr.len); /* Grab an index and try to transmit packet */ owner_bit = (ring->prod & ring->size) ? cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0; index = ring->prod & ring->size_mask; tx_desc = (volatile struct mlx4_en_tx_desc *) (ring->buf + index * TXBB_SIZE); tx_info = &ring->tx_info[index]; dseg = &tx_desc->data; /* send a copy of the frame to the BPF listener, if any */ if (ifp != NULL && ifp->if_bpf != NULL) ETHER_BPF_MTAP(ifp, mb); /* get default flags */ tx_desc->ctrl.srcrb_flags = CTRL_FLAGS; if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM); if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_TCP_UDP_CSUM); /* do statistics */ if (likely(tx_desc->ctrl.srcrb_flags != CTRL_FLAGS)) { priv->port_stats.tx_chksum_offload++; ring->tx_csum++; } /* check for VLAN tag */ if (mb->m_flags & M_VLANTAG) { tx_desc->ctrl.vlan_tag = cpu_to_be16(mb->m_pkthdr.ether_vtag); tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN; } else { tx_desc->ctrl.vlan_tag = 0; tx_desc->ctrl.ins_vlan = 0; } if (unlikely(mlx4_is_mfunc(priv->mdev->dev) || priv->validate_loopback)) { /* * Copy destination MAC address to WQE. This allows * loopback in eSwitch, so that VFs and PF can * communicate with each other: */ m_copydata(mb, 0, 2, __DEVOLATILE(void *, &tx_desc->ctrl.srcrb_flags16[0])); m_copydata(mb, 2, 4, __DEVOLATILE(void *, &tx_desc->ctrl.imm)); } else { /* clear immediate field */ tx_desc->ctrl.imm = 0; } /* Handle LSO (TSO) packets */ if (mb->m_pkthdr.csum_flags & CSUM_TSO) { u32 payload_len; u32 mss = mb->m_pkthdr.tso_segsz; u32 num_pkts; opcode = cpu_to_be32(MLX4_OPCODE_LSO | MLX4_WQE_CTRL_RR) | owner_bit; ihs = mlx4_en_get_header_size(mb); if (unlikely(ihs > MAX_INLINE)) { ring->oversized_packets++; err = EINVAL; goto tx_drop; } tx_desc->lso.mss_hdr_size = cpu_to_be32((mss << 16) | ihs); payload_len = mb->m_pkthdr.len - ihs; if (unlikely(payload_len == 0)) num_pkts = 1; else num_pkts = DIV_ROUND_UP(payload_len, mss); ring->bytes += payload_len + (num_pkts * ihs); ring->packets += num_pkts; ring->tso_packets++; /* store pointer to inline header */ dseg_inline = dseg; /* copy data inline */ dseg = mlx4_en_store_inline_lso_data(dseg, mb, ihs, owner_bit); } else { opcode = cpu_to_be32(MLX4_OPCODE_SEND) | owner_bit; ihs = mlx4_en_get_inline_hdr_size(ring, mb); ring->bytes += max_t (unsigned int, mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN); ring->packets++; /* store pointer to inline header */ dseg_inline = dseg; /* copy data inline */ dseg = mlx4_en_store_inline_data(dseg, mb, ihs, owner_bit); } m_adj(mb, ihs); err = bus_dmamap_load_mbuf_sg(ring->dma_tag, tx_info->dma_map, mb, segs, &nr_segs, BUS_DMA_NOWAIT); if (unlikely(err == EFBIG)) { /* Too many mbuf fragments */ ring->defrag_attempts++; m = m_defrag(mb, M_NOWAIT); if (m == NULL) { ring->oversized_packets++; goto tx_drop; } mb = m; /* Try again */ err = bus_dmamap_load_mbuf_sg(ring->dma_tag, tx_info->dma_map, mb, segs, &nr_segs, BUS_DMA_NOWAIT); } /* catch errors */ if (unlikely(err != 0)) { ring->oversized_packets++; goto tx_drop; } /* If there were no errors and we didn't load anything, don't sync. */ if (nr_segs != 0) { /* make sure all mbuf data is written to RAM */ bus_dmamap_sync(ring->dma_tag, tx_info->dma_map, BUS_DMASYNC_PREWRITE); } else { /* All data was inlined, free the mbuf. */ bus_dmamap_unload(ring->dma_tag, tx_info->dma_map); m_freem(mb); mb = NULL; } /* compute number of DS needed */ ds_cnt = (dseg - ((volatile struct mlx4_wqe_data_seg *)tx_desc)) + nr_segs; /* * Check if the next request can wrap around and fill the end * of the current request with zero immediate data: */ pad = DIV_ROUND_UP(ds_cnt, DS_FACT); pad = (~(ring->prod + pad)) & ring->size_mask; if (unlikely(pad < (MLX4_EN_TX_WQE_MAX_WQEBBS - 1))) { /* * Compute the least number of DS blocks we need to * pad in order to achieve a TX ring wraparound: */ pad = (DS_FACT * (pad + 1)); } else { /* * The hardware will automatically jump to the next * TXBB. No need for padding. */ pad = 0; } /* compute total number of DS blocks */ ds_cnt += pad; /* * When modifying this code, please ensure that the following * computation is always less than or equal to 0x3F: * * ((MLX4_EN_TX_WQE_MAX_WQEBBS - 1) * DS_FACT) + * (MLX4_EN_TX_WQE_MAX_WQEBBS * DS_FACT) * * Else the "ds_cnt" variable can become too big. */ tx_desc->ctrl.fence_size = (ds_cnt & 0x3f); /* store pointer to mbuf */ tx_info->mb = mb; tx_info->nr_txbb = DIV_ROUND_UP(ds_cnt, DS_FACT); bf_size = ds_cnt * DS_SIZE_ALIGNMENT; bf_prod = ring->prod; /* compute end of "dseg" array */ dseg += nr_segs + pad; /* pad using zero immediate dseg */ while (pad--) { dseg--; dseg->addr = 0; dseg->lkey = 0; wmb(); - dseg->byte_count = SET_BYTE_COUNT((1 << 31)|0); + dseg->byte_count = SET_BYTE_COUNT((1U << 31)|0); } /* fill segment list */ while (nr_segs--) { if (unlikely(segs[nr_segs].ds_len == 0)) { dseg--; dseg->addr = 0; dseg->lkey = 0; wmb(); - dseg->byte_count = SET_BYTE_COUNT((1 << 31)|0); + dseg->byte_count = SET_BYTE_COUNT((1U << 31)|0); } else { dseg--; dseg->addr = cpu_to_be64((uint64_t)segs[nr_segs].ds_addr); dseg->lkey = cpu_to_be32(priv->mdev->mr.key); wmb(); dseg->byte_count = SET_BYTE_COUNT((uint32_t)segs[nr_segs].ds_len); } } wmb(); /* write owner bits in reverse order */ if ((opcode & cpu_to_be32(0x1F)) == cpu_to_be32(MLX4_OPCODE_LSO)) mlx4_en_store_inline_lso_header(dseg_inline, ihs, owner_bit); else mlx4_en_store_inline_header(dseg_inline, ihs, owner_bit); /* update producer counter */ ring->prod += tx_info->nr_txbb; if (ring->bf_enabled && bf_size <= MAX_BF && (tx_desc->ctrl.ins_vlan != MLX4_WQE_CTRL_INS_CVLAN)) { /* store doorbell number */ *(volatile __be32 *) (&tx_desc->ctrl.vlan_tag) |= cpu_to_be32(ring->doorbell_qpn); /* or in producer number for this WQE */ opcode |= cpu_to_be32((bf_prod & 0xffff) << 8); /* * Ensure the new descriptor hits memory before * setting ownership of this descriptor to HW: */ wmb(); tx_desc->ctrl.owner_opcode = opcode; wmb(); mlx4_bf_copy(((u8 *)ring->bf.reg) + ring->bf.offset, (volatile unsigned long *) &tx_desc->ctrl, bf_size); wmb(); ring->bf.offset ^= ring->bf.buf_size; } else { /* * Ensure the new descriptor hits memory before * setting ownership of this descriptor to HW: */ wmb(); tx_desc->ctrl.owner_opcode = opcode; wmb(); writel(cpu_to_be32(ring->doorbell_qpn), ((u8 *)ring->bf.uar->map) + MLX4_SEND_DOORBELL); } return (0); tx_drop: *mbp = NULL; m_freem(mb); return (err); } static int mlx4_en_transmit_locked(struct ifnet *dev, int tx_ind, struct mbuf *m) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_tx_ring *ring; struct mbuf *next; int enqueued, err = 0; ring = priv->tx_ring[tx_ind]; if ((dev->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || priv->port_up == 0) { if (m != NULL) err = drbr_enqueue(dev, ring->br, m); return (err); } enqueued = 0; if (m != NULL) /* * If we can't insert mbuf into drbr, try to xmit anyway. * We keep the error we got so we could return that after xmit. */ err = drbr_enqueue(dev, ring->br, m); /* Process the queue */ while ((next = drbr_peek(dev, ring->br)) != NULL) { if (mlx4_en_xmit(priv, tx_ind, &next) != 0) { if (next == NULL) { drbr_advance(dev, ring->br); } else { drbr_putback(dev, ring->br, next); } break; } drbr_advance(dev, ring->br); enqueued++; if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) break; } if (enqueued > 0) ring->watchdog_time = ticks; return (err); } void mlx4_en_tx_que(void *context, int pending) { struct mlx4_en_tx_ring *ring; struct mlx4_en_priv *priv; struct net_device *dev; struct mlx4_en_cq *cq; int tx_ind; cq = context; dev = cq->dev; priv = dev->if_softc; tx_ind = cq->ring; ring = priv->tx_ring[tx_ind]; if (priv->port_up != 0 && (dev->if_drv_flags & IFF_DRV_RUNNING) != 0) { mlx4_en_xmit_poll(priv, tx_ind); spin_lock(&ring->tx_lock); if (!drbr_empty(dev, ring->br)) mlx4_en_transmit_locked(dev, tx_ind, NULL); spin_unlock(&ring->tx_lock); } } int mlx4_en_transmit(struct ifnet *dev, struct mbuf *m) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_tx_ring *ring; struct mlx4_en_cq *cq; int i, err = 0; if (priv->port_up == 0) { m_freem(m); return (ENETDOWN); } /* Compute which queue to use */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { i = (m->m_pkthdr.flowid % 128) % priv->tx_ring_num; } else { i = mlx4_en_select_queue(dev, m); } ring = priv->tx_ring[i]; if (spin_trylock(&ring->tx_lock)) { err = mlx4_en_transmit_locked(dev, i, m); spin_unlock(&ring->tx_lock); /* Poll CQ here */ mlx4_en_xmit_poll(priv, i); } else { err = drbr_enqueue(dev, ring->br, m); cq = priv->tx_cq[i]; taskqueue_enqueue(cq->tq, &cq->cq_task); } #if __FreeBSD_version >= 1100000 if (unlikely(err != 0)) if_inc_counter(dev, IFCOUNTER_IQDROPS, 1); #endif return (err); } /* * Flush ring buffers. */ void mlx4_en_qflush(struct ifnet *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_tx_ring *ring; struct mbuf *m; if (priv->port_up == 0) return; for (int i = 0; i < priv->tx_ring_num; i++) { ring = priv->tx_ring[i]; spin_lock(&ring->tx_lock); while ((m = buf_ring_dequeue_sc(ring->br)) != NULL) m_freem(m); spin_unlock(&ring->tx_lock); } if_qflush(dev); } Index: head/sys/dev/mlx4/mlx4_ib/mlx4_ib.h =================================================================== --- head/sys/dev/mlx4/mlx4_ib/mlx4_ib.h (revision 327863) +++ head/sys/dev/mlx4/mlx4_ib/mlx4_ib.h (revision 327864) @@ -1,898 +1,898 @@ /* * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef MLX4_IB_H #define MLX4_IB_H #include #include #include #include #include #include #include #include #include #include #include #include #include #define MLX4_IB_DRV_NAME "mlx4_ib" #ifdef pr_fmt #undef pr_fmt #endif #define pr_fmt(fmt) "<" MLX4_IB_DRV_NAME "> %s: " fmt, __func__ #define mlx4_ib_warn(ibdev, format, arg...) \ dev_warn((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg) enum { MLX4_IB_SQ_MIN_WQE_SHIFT = 6, MLX4_IB_MAX_HEADROOM = 2048 }; #define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1) #define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT)) /*module param to indicate if SM assigns the alias_GUID*/ extern int mlx4_ib_sm_guid_assign; extern struct proc_dir_entry *mlx4_mrs_dir_entry; #define MLX4_IB_UC_STEER_QPN_ALIGN 1 #define MLX4_IB_UC_MAX_NUM_QPS 256 enum hw_bar_type { HW_BAR_BF, HW_BAR_DB, HW_BAR_CLOCK, HW_BAR_COUNT }; struct mlx4_ib_vma_private_data { struct vm_area_struct *vma; }; struct mlx4_ib_ucontext { struct ib_ucontext ibucontext; struct mlx4_uar uar; struct list_head db_page_list; struct mutex db_page_mutex; struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT]; }; struct mlx4_ib_pd { struct ib_pd ibpd; u32 pdn; }; struct mlx4_ib_xrcd { struct ib_xrcd ibxrcd; u32 xrcdn; struct ib_pd *pd; struct ib_cq *cq; }; struct mlx4_ib_cq_buf { struct mlx4_buf buf; struct mlx4_mtt mtt; int entry_size; }; struct mlx4_ib_cq_resize { struct mlx4_ib_cq_buf buf; int cqe; }; struct mlx4_ib_cq { struct ib_cq ibcq; struct mlx4_cq mcq; struct mlx4_ib_cq_buf buf; struct mlx4_ib_cq_resize *resize_buf; struct mlx4_db db; spinlock_t lock; struct mutex resize_mutex; struct ib_umem *umem; struct ib_umem *resize_umem; int create_flags; /* List of qps that it serves.*/ struct list_head send_qp_list; struct list_head recv_qp_list; }; #define MLX4_MR_PAGES_ALIGN 0x40 struct mlx4_ib_mr { struct ib_mr ibmr; __be64 *pages; dma_addr_t page_map; u32 npages; u32 max_pages; struct mlx4_mr mmr; struct ib_umem *umem; size_t page_map_size; }; struct mlx4_ib_mw { struct ib_mw ibmw; struct mlx4_mw mmw; }; struct mlx4_ib_fmr { struct ib_fmr ibfmr; struct mlx4_fmr mfmr; }; #define MAX_REGS_PER_FLOW 2 struct mlx4_flow_reg_id { u64 id; u64 mirror; }; struct mlx4_ib_flow { struct ib_flow ibflow; /* translating DMFS verbs sniffer rule to FW API requires two reg IDs */ struct mlx4_flow_reg_id reg_id[MAX_REGS_PER_FLOW]; }; struct mlx4_ib_wq { u64 *wrid; spinlock_t lock; int wqe_cnt; int max_post; int max_gs; int offset; int wqe_shift; unsigned head; unsigned tail; }; enum { MLX4_IB_QP_CREATE_ROCE_V2_GSI = IB_QP_CREATE_RESERVED_START }; enum mlx4_ib_qp_flags { MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO, /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */ MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI, MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30, - MLX4_IB_SRIOV_SQP = 1 << 31, + MLX4_IB_SRIOV_SQP = 1U << 31, }; struct mlx4_ib_gid_entry { struct list_head list; union ib_gid gid; int added; u8 port; }; enum mlx4_ib_qp_type { /* * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries * here (and in that order) since the MAD layer uses them as * indices into a 2-entry table. */ MLX4_IB_QPT_SMI = IB_QPT_SMI, MLX4_IB_QPT_GSI = IB_QPT_GSI, MLX4_IB_QPT_RC = IB_QPT_RC, MLX4_IB_QPT_UC = IB_QPT_UC, MLX4_IB_QPT_UD = IB_QPT_UD, MLX4_IB_QPT_RAW_IPV6 = IB_QPT_RAW_IPV6, MLX4_IB_QPT_RAW_ETHERTYPE = IB_QPT_RAW_ETHERTYPE, MLX4_IB_QPT_RAW_PACKET = IB_QPT_RAW_PACKET, MLX4_IB_QPT_XRC_INI = IB_QPT_XRC_INI, MLX4_IB_QPT_XRC_TGT = IB_QPT_XRC_TGT, MLX4_IB_QPT_PROXY_SMI_OWNER = 1 << 16, MLX4_IB_QPT_PROXY_SMI = 1 << 17, MLX4_IB_QPT_PROXY_GSI = 1 << 18, MLX4_IB_QPT_TUN_SMI_OWNER = 1 << 19, MLX4_IB_QPT_TUN_SMI = 1 << 20, MLX4_IB_QPT_TUN_GSI = 1 << 21, }; #define MLX4_IB_QPT_ANY_SRIOV (MLX4_IB_QPT_PROXY_SMI_OWNER | \ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \ MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI) enum mlx4_ib_mad_ifc_flags { MLX4_MAD_IFC_IGNORE_MKEY = 1, MLX4_MAD_IFC_IGNORE_BKEY = 2, MLX4_MAD_IFC_IGNORE_KEYS = (MLX4_MAD_IFC_IGNORE_MKEY | MLX4_MAD_IFC_IGNORE_BKEY), MLX4_MAD_IFC_NET_VIEW = 4, }; enum { MLX4_NUM_TUNNEL_BUFS = 256, }; struct mlx4_ib_tunnel_header { struct mlx4_av av; __be32 remote_qpn; __be32 qkey; __be16 vlan; u8 mac[6]; __be16 pkey_index; u8 reserved[6]; }; struct mlx4_ib_buf { void *addr; dma_addr_t map; }; struct mlx4_rcv_tunnel_hdr { __be32 flags_src_qp; /* flags[6:5] is defined for VLANs: * 0x0 - no vlan was in the packet * 0x01 - C-VLAN was in the packet */ u8 g_ml_path; /* gid bit stands for ipv6/4 header in RoCE */ u8 reserved; __be16 pkey_index; __be16 sl_vid; __be16 slid_mac_47_32; __be32 mac_31_0; }; struct mlx4_ib_proxy_sqp_hdr { struct ib_grh grh; struct mlx4_rcv_tunnel_hdr tun; } __packed; struct mlx4_roce_smac_vlan_info { u64 smac; int smac_index; int smac_port; u64 candidate_smac; int candidate_smac_index; int candidate_smac_port; u16 vid; int vlan_index; int vlan_port; u16 candidate_vid; int candidate_vlan_index; int candidate_vlan_port; int update_vid; }; struct mlx4_ib_qp { struct ib_qp ibqp; struct mlx4_qp mqp; struct mlx4_buf buf; struct mlx4_db db; struct mlx4_ib_wq rq; u32 doorbell_qpn; __be32 sq_signal_bits; unsigned sq_next_wqe; int sq_max_wqes_per_wr; int sq_spare_wqes; struct mlx4_ib_wq sq; enum mlx4_ib_qp_type mlx4_ib_qp_type; struct ib_umem *umem; struct mlx4_mtt mtt; int buf_size; struct mutex mutex; u16 xrcdn; u32 flags; u8 port; u8 alt_port; u8 atomic_rd_en; u8 resp_depth; u8 sq_no_prefetch; u8 state; int mlx_type; struct list_head gid_list; struct list_head steering_rules; struct mlx4_ib_buf *sqp_proxy_rcv; struct mlx4_roce_smac_vlan_info pri; struct mlx4_roce_smac_vlan_info alt; u64 reg_id; struct list_head qps_list; struct list_head cq_recv_list; struct list_head cq_send_list; struct counter_index *counter_index; }; struct mlx4_ib_srq { struct ib_srq ibsrq; struct mlx4_srq msrq; struct mlx4_buf buf; struct mlx4_db db; u64 *wrid; spinlock_t lock; int head; int tail; u16 wqe_ctr; struct ib_umem *umem; struct mlx4_mtt mtt; struct mutex mutex; }; struct mlx4_ib_ah { struct ib_ah ibah; union mlx4_ext_av av; }; /****************************************/ /* alias guid support */ /****************************************/ #define NUM_PORT_ALIAS_GUID 2 #define NUM_ALIAS_GUID_IN_REC 8 #define NUM_ALIAS_GUID_REC_IN_PORT 16 #define GUID_REC_SIZE 8 #define NUM_ALIAS_GUID_PER_PORT 128 #define MLX4_NOT_SET_GUID (0x00LL) #define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL)) enum mlx4_guid_alias_rec_status { MLX4_GUID_INFO_STATUS_IDLE, MLX4_GUID_INFO_STATUS_SET, }; #define GUID_STATE_NEED_PORT_INIT 0x01 enum mlx4_guid_alias_rec_method { MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET, MLX4_GUID_INFO_RECORD_DELETE = IB_SA_METHOD_DELETE, }; struct mlx4_sriov_alias_guid_info_rec_det { u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC]; ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/ enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/ unsigned int guids_retry_schedule[NUM_ALIAS_GUID_IN_REC]; u64 time_to_run; }; struct mlx4_sriov_alias_guid_port_rec_det { struct mlx4_sriov_alias_guid_info_rec_det all_rec_per_port[NUM_ALIAS_GUID_REC_IN_PORT]; struct workqueue_struct *wq; struct delayed_work alias_guid_work; u8 port; u32 state_flags; struct mlx4_sriov_alias_guid *parent; struct list_head cb_list; }; struct mlx4_sriov_alias_guid { struct mlx4_sriov_alias_guid_port_rec_det ports_guid[MLX4_MAX_PORTS]; spinlock_t ag_work_lock; struct ib_sa_client *sa_client; }; struct mlx4_ib_demux_work { struct work_struct work; struct mlx4_ib_dev *dev; int slave; int do_init; u8 port; }; struct mlx4_ib_tun_tx_buf { struct mlx4_ib_buf buf; struct ib_ah *ah; }; struct mlx4_ib_demux_pv_qp { struct ib_qp *qp; enum ib_qp_type proxy_qpt; struct mlx4_ib_buf *ring; struct mlx4_ib_tun_tx_buf *tx_ring; spinlock_t tx_lock; unsigned tx_ix_head; unsigned tx_ix_tail; }; enum mlx4_ib_demux_pv_state { DEMUX_PV_STATE_DOWN, DEMUX_PV_STATE_STARTING, DEMUX_PV_STATE_ACTIVE, DEMUX_PV_STATE_DOWNING, }; struct mlx4_ib_demux_pv_ctx { int port; int slave; enum mlx4_ib_demux_pv_state state; int has_smi; struct ib_device *ib_dev; struct ib_cq *cq; struct ib_pd *pd; struct work_struct work; struct workqueue_struct *wq; struct mlx4_ib_demux_pv_qp qp[2]; }; struct mlx4_ib_demux_ctx { struct ib_device *ib_dev; int port; struct workqueue_struct *wq; struct workqueue_struct *ud_wq; spinlock_t ud_lock; atomic64_t subnet_prefix; __be64 guid_cache[128]; struct mlx4_ib_dev *dev; /* the following lock protects both mcg_table and mcg_mgid0_list */ struct mutex mcg_table_lock; struct rb_root mcg_table; struct list_head mcg_mgid0_list; struct workqueue_struct *mcg_wq; struct mlx4_ib_demux_pv_ctx **tun; atomic_t tid; int flushing; /* flushing the work queue */ }; struct mlx4_ib_sriov { struct mlx4_ib_demux_ctx demux[MLX4_MAX_PORTS]; struct mlx4_ib_demux_pv_ctx *sqps[MLX4_MAX_PORTS]; /* when using this spinlock you should use "irq" because * it may be called from interrupt context.*/ spinlock_t going_down_lock; int is_going_down; struct mlx4_sriov_alias_guid alias_guid; /* CM paravirtualization fields */ struct list_head cm_list; spinlock_t id_map_lock; struct rb_root sl_id_map; struct idr pv_id_table; }; struct gid_cache_context { int real_index; int refcount; }; struct gid_entry { union ib_gid gid; enum ib_gid_type gid_type; struct gid_cache_context *ctx; }; struct mlx4_port_gid_table { struct gid_entry gids[MLX4_MAX_PORT_GIDS]; }; struct mlx4_ib_iboe { spinlock_t lock; struct net_device *netdevs[MLX4_MAX_PORTS]; atomic64_t mac[MLX4_MAX_PORTS]; struct notifier_block nb; struct mlx4_port_gid_table gids[MLX4_MAX_PORTS]; }; struct pkey_mgt { u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; u16 phys_pkey_cache[MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; struct list_head pkey_port_list[MLX4_MFUNC_MAX]; struct kobject *device_parent[MLX4_MFUNC_MAX]; }; struct mlx4_ib_iov_sysfs_attr { void *ctx; struct kobject *kobj; unsigned long data; u32 entry_num; char name[15]; struct device_attribute dentry; struct device *dev; }; struct mlx4_ib_iov_sysfs_attr_ar { struct mlx4_ib_iov_sysfs_attr dentries[3 * NUM_ALIAS_GUID_PER_PORT + 1]; }; struct mlx4_ib_iov_port { char name[100]; u8 num; struct mlx4_ib_dev *dev; struct list_head list; struct mlx4_ib_iov_sysfs_attr_ar *dentr_ar; struct ib_port_attr attr; struct kobject *cur_port; struct kobject *admin_alias_parent; struct kobject *gids_parent; struct kobject *pkeys_parent; struct kobject *mcgs_parent; struct mlx4_ib_iov_sysfs_attr mcg_dentry; }; struct counter_index { struct list_head list; u32 index; u8 allocated; }; struct mlx4_ib_counters { struct list_head counters_list; struct mutex mutex; /* mutex for accessing counters list */ u32 default_counter; }; #define MLX4_DIAG_COUNTERS_TYPES 2 struct mlx4_ib_diag_counters { const char **name; u32 *offset; u32 num_counters; }; struct mlx4_ib_dev { struct ib_device ib_dev; struct mlx4_dev *dev; int num_ports; void __iomem *uar_map; struct mlx4_uar priv_uar; u32 priv_pdn; MLX4_DECLARE_DOORBELL_LOCK(uar_lock); struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2]; struct ib_ah *sm_ah[MLX4_MAX_PORTS]; spinlock_t sm_lock; atomic64_t sl2vl[MLX4_MAX_PORTS]; struct mlx4_ib_sriov sriov; struct mutex cap_mask_mutex; bool ib_active; struct mlx4_ib_iboe iboe; struct mlx4_ib_counters counters_table[MLX4_MAX_PORTS]; int *eq_table; struct kobject *iov_parent; struct kobject *ports_parent; struct kobject *dev_ports_parent[MLX4_MFUNC_MAX]; struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS]; struct pkey_mgt pkeys; unsigned long *ib_uc_qpns_bitmap; int steer_qpn_count; int steer_qpn_base; int steering_support; struct mlx4_ib_qp *qp1_proxy[MLX4_MAX_PORTS]; /* lock when destroying qp1_proxy and getting netdev events */ struct mutex qp1_proxy_lock[MLX4_MAX_PORTS]; u8 bond_next_port; /* protect resources needed as part of reset flow */ spinlock_t reset_flow_resource_lock; struct list_head qp_list; struct mlx4_ib_diag_counters diag_counters[MLX4_DIAG_COUNTERS_TYPES]; }; struct ib_event_work { struct work_struct work; struct mlx4_ib_dev *ib_dev; struct mlx4_eqe ib_eqe; int port; }; struct mlx4_ib_qp_tunnel_init_attr { struct ib_qp_init_attr init_attr; int slave; enum ib_qp_type proxy_qp_type; u8 port; }; struct mlx4_uverbs_ex_query_device { __u32 comp_mask; __u32 reserved; }; enum query_device_resp_mask { QUERY_DEVICE_RESP_MASK_TIMESTAMP = 1UL << 0, }; struct mlx4_uverbs_ex_query_device_resp { __u32 comp_mask; __u32 response_length; __u64 hca_core_clock_offset; }; static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) { return container_of(ibdev, struct mlx4_ib_dev, ib_dev); } static inline struct mlx4_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) { return container_of(ibucontext, struct mlx4_ib_ucontext, ibucontext); } static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd) { return container_of(ibpd, struct mlx4_ib_pd, ibpd); } static inline struct mlx4_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd) { return container_of(ibxrcd, struct mlx4_ib_xrcd, ibxrcd); } static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq) { return container_of(ibcq, struct mlx4_ib_cq, ibcq); } static inline struct mlx4_ib_cq *to_mibcq(struct mlx4_cq *mcq) { return container_of(mcq, struct mlx4_ib_cq, mcq); } static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr) { return container_of(ibmr, struct mlx4_ib_mr, ibmr); } static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw) { return container_of(ibmw, struct mlx4_ib_mw, ibmw); } static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) { return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); } static inline struct mlx4_ib_flow *to_mflow(struct ib_flow *ibflow) { return container_of(ibflow, struct mlx4_ib_flow, ibflow); } static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp) { return container_of(ibqp, struct mlx4_ib_qp, ibqp); } static inline struct mlx4_ib_qp *to_mibqp(struct mlx4_qp *mqp) { return container_of(mqp, struct mlx4_ib_qp, mqp); } static inline struct mlx4_ib_srq *to_msrq(struct ib_srq *ibsrq) { return container_of(ibsrq, struct mlx4_ib_srq, ibsrq); } static inline struct mlx4_ib_srq *to_mibsrq(struct mlx4_srq *msrq) { return container_of(msrq, struct mlx4_ib_srq, msrq); } static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah) { return container_of(ibah, struct mlx4_ib_ah, ibah); } static inline u8 mlx4_ib_bond_next_port(struct mlx4_ib_dev *dev) { dev->bond_next_port = (dev->bond_next_port + 1) % dev->num_ports; return dev->bond_next_port + 1; } int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev); void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev); int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, struct mlx4_db *db); void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db); struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc); int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, struct ib_umem *umem); struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr); struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); int mlx4_ib_destroy_cq(struct ib_cq *cq); int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int mlx4_ib_destroy_ah(struct ib_ah *ah); struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int mlx4_ib_destroy_srq(struct ib_srq *srq); void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); int mlx4_ib_destroy_qp(struct ib_qp *qp); int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, struct ib_mad_hdr *out, size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx4_ib_mad_init(struct mlx4_ib_dev *dev); void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev); struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr); int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages, u64 iova); int mlx4_ib_unmap_fmr(struct list_head *fmr_list); int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr); int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props, int netw_view); int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey, int netw_view); int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid, int netw_view); static inline bool mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) { u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3; if (rdma_port_get_link_layer(ah->ibah.device, port) == IB_LINK_LAYER_ETHERNET) return true; return !!(ah->av.ib.g_slid & 0x80); } int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx); void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq); void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave); int mlx4_ib_mcg_init(void); void mlx4_ib_mcg_destroy(void); int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid); int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, int slave, struct ib_sa_mad *sa_mad); int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave, struct ib_sa_mad *mad); int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, union ib_gid *gid); void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, enum ib_event_type type); void mlx4_ib_tunnels_update_work(struct work_struct *work); int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type qpt, struct ib_wc *wc, struct ib_grh *grh, struct ib_mad *mad); int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr, u8 *s_mac, u16 vlan_id, struct ib_mad *mad); __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx); int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, struct ib_mad *mad); int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id, struct ib_mad *mad); void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev); void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave_id); /* alias guid support */ void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port); int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev); void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev); void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port); void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, int block_num, u8 port_num, u8 *p_data); void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num, u8 port_num, u8 *p_data); int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, struct attribute *attr); void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, struct attribute *attr); ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index); void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave, int port, int slave_init); int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ; void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device); __be64 mlx4_ib_gen_node_guid(void); int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn); void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count); int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, int is_attach); int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, u8 port_num, int index); void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, int port); void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port); #endif /* MLX4_IB_H */ Index: head/sys/dev/mlx4/mlx4_ib/mlx4_ib_qp.c =================================================================== --- head/sys/dev/mlx4/mlx4_ib/mlx4_ib_qp.c (revision 327863) +++ head/sys/dev/mlx4/mlx4_ib/mlx4_ib_qp.c (revision 327864) @@ -1,3530 +1,3530 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mlx4_ib.h" #include static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq); static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq); enum { MLX4_IB_ACK_REQ_FREQ = 8, }; enum { MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f, MLX4_IB_LINK_TYPE_IB = 0, MLX4_IB_LINK_TYPE_ETH = 1 }; enum { /* * Largest possible UD header: send with GRH and immediate * data plus 18 bytes for an Ethernet header with VLAN/802.1Q * tag. (LRH would only use 8 bytes, so Ethernet is the * biggest case) */ MLX4_IB_UD_HEADER_SIZE = 82, MLX4_IB_LSO_HEADER_SPARE = 128, }; enum { MLX4_IB_IBOE_ETHERTYPE = 0x8915 }; struct mlx4_ib_sqp { struct mlx4_ib_qp qp; int pkey_index; u32 qkey; u32 send_psn; struct ib_ud_header ud_header; u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; struct ib_qp *roce_v2_gsi; }; enum { MLX4_IB_MIN_SQ_STRIDE = 6, MLX4_IB_CACHE_LINE_SIZE = 64, }; enum { MLX4_RAW_QP_MTU = 7, MLX4_RAW_QP_MSGMAX = 31, }; #ifndef ETH_ALEN #define ETH_ALEN 6 #endif static const __be32 mlx4_ib_opcode[] = { [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO), [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM), [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ), [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), [IB_WR_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), }; static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) { return container_of(mqp, struct mlx4_ib_sqp, qp); } static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { if (!mlx4_is_master(dev->dev)) return 0; return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn && qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX; } static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { int proxy_sqp = 0; int real_sqp = 0; int i; /* PPF or Native -- real SQP */ real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) && qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn && qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3); if (real_sqp) return 1; /* VF or PF -- proxy SQP */ if (mlx4_is_mfunc(dev->dev)) { for (i = 0; i < dev->dev->caps.num_ports; i++) { if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] || qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) { proxy_sqp = 1; break; } } } if (proxy_sqp) return 1; return !!(qp->flags & MLX4_IB_ROCE_V2_GSI_QP); } /* used for INIT/CLOSE port logic */ static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { int proxy_qp0 = 0; int real_qp0 = 0; int i; /* PPF or Native -- real QP0 */ real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) && qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn && qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1); if (real_qp0) return 1; /* VF or PF -- proxy QP0 */ if (mlx4_is_mfunc(dev->dev)) { for (i = 0; i < dev->dev->caps.num_ports; i++) { if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) { proxy_qp0 = 1; break; } } } return proxy_qp0; } static void *get_wqe(struct mlx4_ib_qp *qp, int offset) { return mlx4_buf_offset(&qp->buf, offset); } static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n) { return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); } static void *get_send_wqe(struct mlx4_ib_qp *qp, int n) { return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift)); } /* * Stamp a SQ WQE so that it is invalid if prefetched by marking the * first four bytes of every 64 byte chunk with * 0x7FFFFFF | (invalid_ownership_value << 31). * * When the max work request size is less than or equal to the WQE * basic block size, as an optimization, we can stamp all WQEs with * 0xffffffff, and skip the very first chunk of each WQE. */ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) { __be32 *wqe; int i; int s; int ind; void *buf; __be32 stamp; struct mlx4_wqe_ctrl_seg *ctrl; if (qp->sq_max_wqes_per_wr > 1) { s = roundup(size, 1U << qp->sq.wqe_shift); for (i = 0; i < s; i += 64) { ind = (i >> qp->sq.wqe_shift) + n; stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : cpu_to_be32(0xffffffff); buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1)); *wqe = stamp; } } else { ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); s = (ctrl->fence_size & 0x3f) << 4; for (i = 64; i < s; i += 64) { wqe = buf + i; *wqe = cpu_to_be32(0xffffffff); } } } static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size) { struct mlx4_wqe_ctrl_seg *ctrl; struct mlx4_wqe_inline_seg *inl; void *wqe; int s; ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); s = sizeof(struct mlx4_wqe_ctrl_seg); if (qp->ibqp.qp_type == IB_QPT_UD) { struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl; struct mlx4_av *av = (struct mlx4_av *)dgram->av; memset(dgram, 0, sizeof *dgram); av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn); s += sizeof(struct mlx4_wqe_datagram_seg); } /* Pad the remainder of the WQE with an inline data segment. */ if (size > s) { inl = wqe + s; - inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl)); + inl->byte_count = cpu_to_be32(1U << 31 | (size - s - sizeof *inl)); } ctrl->srcrb_flags = 0; ctrl->fence_size = size / 16; /* * Make sure descriptor is fully written before setting ownership bit * (because HW can start executing as soon as we do). */ wmb(); ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) | - (n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); + (n & qp->sq.wqe_cnt ? cpu_to_be32(1U << 31) : 0); stamp_send_wqe(qp, n + qp->sq_spare_wqes, size); } /* Post NOP WQE to prevent wrap-around in the middle of WR */ static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind) { unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1)); if (unlikely(s < qp->sq_max_wqes_per_wr)) { post_nop_wqe(qp, ind, s << qp->sq.wqe_shift); ind += s; } return ind; } static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) { struct ib_event event; struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; if (type == MLX4_EVENT_TYPE_PATH_MIG) to_mibqp(qp)->port = to_mibqp(qp)->alt_port; if (ibqp->event_handler) { event.device = ibqp->device; event.element.qp = ibqp; switch (type) { case MLX4_EVENT_TYPE_PATH_MIG: event.event = IB_EVENT_PATH_MIG; break; case MLX4_EVENT_TYPE_COMM_EST: event.event = IB_EVENT_COMM_EST; break; case MLX4_EVENT_TYPE_SQ_DRAINED: event.event = IB_EVENT_SQ_DRAINED; break; case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE: event.event = IB_EVENT_QP_LAST_WQE_REACHED; break; case MLX4_EVENT_TYPE_WQ_CATAS_ERROR: event.event = IB_EVENT_QP_FATAL; break; case MLX4_EVENT_TYPE_PATH_MIG_FAILED: event.event = IB_EVENT_PATH_MIG_ERR; break; case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR: event.event = IB_EVENT_QP_REQ_ERR; break; case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR: event.event = IB_EVENT_QP_ACCESS_ERR; break; default: pr_warn("Unexpected event type %d " "on QP %06x\n", type, qp->qpn); return; } ibqp->event_handler(&event, ibqp->qp_context); } } static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) { /* * UD WQEs must have a datagram segment. * RC and UC WQEs might have a remote address segment. * MLX WQEs need two extra inline data segments (for the UD * header and space for the ICRC). */ switch (type) { case MLX4_IB_QPT_UD: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_datagram_seg) + ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0); case MLX4_IB_QPT_PROXY_SMI_OWNER: case MLX4_IB_QPT_PROXY_SMI: case MLX4_IB_QPT_PROXY_GSI: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_datagram_seg) + 64; case MLX4_IB_QPT_TUN_SMI_OWNER: case MLX4_IB_QPT_TUN_GSI: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_datagram_seg); case MLX4_IB_QPT_UC: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_raddr_seg); case MLX4_IB_QPT_RC: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_masked_atomic_seg) + sizeof (struct mlx4_wqe_raddr_seg); case MLX4_IB_QPT_SMI: case MLX4_IB_QPT_GSI: return sizeof (struct mlx4_wqe_ctrl_seg) + ALIGN(MLX4_IB_UD_HEADER_SIZE + DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, MLX4_INLINE_ALIGN) * sizeof (struct mlx4_wqe_inline_seg), sizeof (struct mlx4_wqe_data_seg)) + ALIGN(4 + sizeof (struct mlx4_wqe_inline_seg), sizeof (struct mlx4_wqe_data_seg)); default: return sizeof (struct mlx4_wqe_ctrl_seg); } } static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, int is_user, int has_rq, struct mlx4_ib_qp *qp) { /* Sanity check RQ size before proceeding */ if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE || cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)) return -EINVAL; if (!has_rq) { if (cap->max_recv_wr) return -EINVAL; qp->rq.wqe_cnt = qp->rq.max_gs = 0; } else { /* HW requires >= 1 RQ entry with >= 1 gather entry */ if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) return -EINVAL; qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr)); qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); } /* leave userspace return values as they were, so as not to break ABI */ if (is_user) { cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt; cap->max_recv_sge = qp->rq.max_gs; } else { cap->max_recv_wr = qp->rq.max_post = min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt); cap->max_recv_sge = min(qp->rq.max_gs, min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)); } return 0; } static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp, bool shrink_wqe) { int s; /* Sanity check SQ size before proceeding */ if (cap->max_send_wr > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) || cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) || cap->max_inline_data + send_wqe_overhead(type, qp->flags) + sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) return -EINVAL; /* * For MLX transport we need 2 extra S/G entries: * one for the header and one for the checksum at the end */ if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI || type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) && cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) return -EINVAL; s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg), cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) + send_wqe_overhead(type, qp->flags); if (s > dev->dev->caps.max_sq_desc_sz) return -EINVAL; /* * Hermon supports shrinking WQEs, such that a single work * request can include multiple units of 1 << wqe_shift. This * way, work requests can differ in size, and do not have to * be a power of 2 in size, saving memory and speeding up send * WR posting. Unfortunately, if we do this then the * wqe_index field in CQEs can't be used to look up the WR ID * anymore, so we do this only if selective signaling is off. * * Further, on 32-bit platforms, we can't use vmap() to make * the QP buffer virtually contiguous. Thus we have to use * constant-sized WRs to make sure a WR is always fully within * a single page-sized chunk. * * Finally, we use NOP work requests to pad the end of the * work queue, to avoid wrap-around in the middle of WR. We * set NEC bit to avoid getting completions with error for * these NOP WRs, but since NEC is only supported starting * with firmware 2.2.232, we use constant-sized WRs for older * firmware. * * And, since MLX QPs only support SEND, we use constant-sized * WRs in this case. * * We look for the smallest value of wqe_shift such that the * resulting number of wqes does not exceed device * capabilities. * * We set WQE size to at least 64 bytes, this way stamping * invalidates each WQE. */ if (shrink_wqe && dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && qp->sq_signal_bits && BITS_PER_LONG == 64 && type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI && !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) qp->sq.wqe_shift = ilog2(64); else qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); for (;;) { qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift); /* * We need to leave 2 KB + 1 WR of headroom in the SQ to * allow HW to prefetch. */ qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr; qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr * qp->sq_max_wqes_per_wr + qp->sq_spare_wqes); if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes) break; if (qp->sq_max_wqes_per_wr <= 1) return -EINVAL; ++qp->sq.wqe_shift; } qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz, (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) - send_wqe_overhead(type, qp->flags)) / sizeof (struct mlx4_wqe_data_seg); qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + (qp->sq.wqe_cnt << qp->sq.wqe_shift); if (qp->rq.wqe_shift > qp->sq.wqe_shift) { qp->rq.offset = 0; qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; } else { qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift; qp->sq.offset = 0; } cap->max_send_wr = qp->sq.max_post = (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr; cap->max_send_sge = min(qp->sq.max_gs, min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)); /* We don't support inline sends for kernel QPs (yet) */ cap->max_inline_data = 0; return 0; } static int set_user_sq_size(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, struct mlx4_ib_create_qp *ucmd) { /* Sanity check SQ size before proceeding */ if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes || ucmd->log_sq_stride > ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) || ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE) return -EINVAL; qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; qp->sq.wqe_shift = ucmd->log_sq_stride; qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + (qp->sq.wqe_cnt << qp->sq.wqe_shift); return 0; } static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) { int i; qp->sqp_proxy_rcv = kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt, GFP_KERNEL); if (!qp->sqp_proxy_rcv) return -ENOMEM; for (i = 0; i < qp->rq.wqe_cnt; i++) { qp->sqp_proxy_rcv[i].addr = kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr), GFP_KERNEL); if (!qp->sqp_proxy_rcv[i].addr) goto err; qp->sqp_proxy_rcv[i].map = ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr, sizeof (struct mlx4_ib_proxy_sqp_hdr), DMA_FROM_DEVICE); if (ib_dma_mapping_error(dev, qp->sqp_proxy_rcv[i].map)) { kfree(qp->sqp_proxy_rcv[i].addr); goto err; } } return 0; err: while (i > 0) { --i; ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map, sizeof (struct mlx4_ib_proxy_sqp_hdr), DMA_FROM_DEVICE); kfree(qp->sqp_proxy_rcv[i].addr); } kfree(qp->sqp_proxy_rcv); qp->sqp_proxy_rcv = NULL; return -ENOMEM; } static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) { int i; for (i = 0; i < qp->rq.wqe_cnt; i++) { ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map, sizeof (struct mlx4_ib_proxy_sqp_hdr), DMA_FROM_DEVICE); kfree(qp->sqp_proxy_rcv[i].addr); } kfree(qp->sqp_proxy_rcv); } static int qp_has_rq(struct ib_qp_init_attr *attr) { if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT) return 0; return !attr->srq; } static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn) { int i; for (i = 0; i < dev->caps.num_ports; i++) { if (qpn == dev->caps.qp0_proxy[i]) return !!dev->caps.qp0_qkey[i]; } return 0; } static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { mutex_lock(&dev->counters_table[qp->port - 1].mutex); mlx4_counter_free(dev->dev, qp->counter_index->index); list_del(&qp->counter_index->list); mutex_unlock(&dev->counters_table[qp->port - 1].mutex); kfree(qp->counter_index); qp->counter_index = NULL; } static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp, gfp_t gfp) { int qpn; int err; struct ib_qp_cap backup_cap; struct mlx4_ib_sqp *sqp; struct mlx4_ib_qp *qp; enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; struct mlx4_ib_cq *mcq; unsigned long flags; /* When tunneling special qps, we use a plain UD qp */ if (sqpn) { if (mlx4_is_mfunc(dev->dev) && (!mlx4_is_master(dev->dev) || !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) { if (init_attr->qp_type == IB_QPT_GSI) qp_type = MLX4_IB_QPT_PROXY_GSI; else { if (mlx4_is_master(dev->dev) || qp0_enabled_vf(dev->dev, sqpn)) qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER; else qp_type = MLX4_IB_QPT_PROXY_SMI; } } qpn = sqpn; /* add extra sg entry for tunneling */ init_attr->cap.max_recv_sge++; } else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) { struct mlx4_ib_qp_tunnel_init_attr *tnl_init = container_of(init_attr, struct mlx4_ib_qp_tunnel_init_attr, init_attr); if ((tnl_init->proxy_qp_type != IB_QPT_SMI && tnl_init->proxy_qp_type != IB_QPT_GSI) || !mlx4_is_master(dev->dev)) return -EINVAL; if (tnl_init->proxy_qp_type == IB_QPT_GSI) qp_type = MLX4_IB_QPT_TUN_GSI; else if (tnl_init->slave == mlx4_master_func_num(dev->dev) || mlx4_vf_smi_enabled(dev->dev, tnl_init->slave, tnl_init->port)) qp_type = MLX4_IB_QPT_TUN_SMI_OWNER; else qp_type = MLX4_IB_QPT_TUN_SMI; /* we are definitely in the PPF here, since we are creating * tunnel QPs. base_tunnel_sqpn is therefore valid. */ qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1; sqpn = qpn; } if (!*caller_qp) { if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI || (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp); if (!sqp) return -ENOMEM; qp = &sqp->qp; qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; } else { qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp); if (!qp) return -ENOMEM; qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; } } else qp = *caller_qp; qp->mlx4_ib_qp_type = qp_type; mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); INIT_LIST_HEAD(&qp->gid_list); INIT_LIST_HEAD(&qp->steering_rules); qp->state = IB_QPS_RESET; if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp); if (err) goto err; if (pd->uobject) { struct mlx4_ib_create_qp ucmd; if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { err = -EFAULT; goto err; } qp->sq_no_prefetch = ucmd.sq_no_prefetch; err = set_user_sq_size(dev, qp, &ucmd); if (err) goto err; qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, qp->buf_size, 0, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; } err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem), ilog2(qp->umem->page_size), &qp->mtt); if (err) goto err_buf; err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem); if (err) goto err_mtt; if (qp_has_rq(init_attr)) { err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), ucmd.db_addr, &qp->db); if (err) goto err_mtt; } } else { qp->sq_no_prefetch = 0; if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) qp->flags |= MLX4_IB_QP_LSO; if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) qp->flags |= MLX4_IB_QP_NETIF; else goto err; } memcpy(&backup_cap, &init_attr->cap, sizeof(backup_cap)); err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp, true); if (err) goto err; if (qp_has_rq(init_attr)) { err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp); if (err) goto err; *qp->db.db = 0; } if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size, &qp->buf, gfp)) { memcpy(&init_attr->cap, &backup_cap, sizeof(backup_cap)); err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp, false); if (err) goto err_db; if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) { err = -ENOMEM; goto err_db; } } err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift, &qp->mtt); if (err) goto err_buf; err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp); if (err) goto err_mtt; qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64), gfp | __GFP_NOWARN); if (!qp->sq.wrid) qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp, 0 /*PAGE_KERNEL*/); qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64), gfp | __GFP_NOWARN); if (!qp->rq.wrid) qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp, 0 /*PAGE_KERNEL*/); if (!qp->sq.wrid || !qp->rq.wrid) { err = -ENOMEM; goto err_wrid; } } if (sqpn) { if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { if (alloc_proxy_bufs(pd->device, qp)) { err = -ENOMEM; goto err_wrid; } } } else { /* Raw packet QPNs may not have bits 6,7 set in their qp_num; * otherwise, the WQE BlueFlame setup flow wrongly causes * VLAN insertion. */ if (init_attr->qp_type == IB_QPT_RAW_PACKET) err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn, (init_attr->cap.max_send_wr ? MLX4_RESERVE_ETH_BF_QP : 0) | (init_attr->cap.max_recv_wr ? MLX4_RESERVE_A0_QP : 0)); else if (qp->flags & MLX4_IB_QP_NETIF) err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn); else err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn, 0); if (err) goto err_proxy; } if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp); if (err) goto err_qpn; if (init_attr->qp_type == IB_QPT_XRC_TGT) qp->mqp.qpn |= (1 << 23); /* * Hardware wants QPN written in big-endian order (after * shifting) for send doorbell. Precompute this value to save * a little bit when posting sends. */ qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); qp->mqp.event = mlx4_ib_qp_event; if (!*caller_qp) *caller_qp = qp; spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq)); /* Maintain device to QPs access, needed for further handling * via reset flow */ list_add_tail(&qp->qps_list, &dev->qp_list); /* Maintain CQ to QPs access, needed for further handling * via reset flow */ mcq = to_mcq(init_attr->send_cq); list_add_tail(&qp->cq_send_list, &mcq->send_qp_list); mcq = to_mcq(init_attr->recv_cq); list_add_tail(&qp->cq_recv_list, &mcq->recv_qp_list); mlx4_ib_unlock_cqs(to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq)); spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); return 0; err_qpn: if (!sqpn) { if (qp->flags & MLX4_IB_QP_NETIF) mlx4_ib_steer_qp_free(dev, qpn, 1); else mlx4_qp_release_range(dev->dev, qpn, 1); } err_proxy: if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) free_proxy_bufs(pd->device, qp); err_wrid: if (pd->uobject) { if (qp_has_rq(init_attr)) mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); } else { kvfree(qp->sq.wrid); kvfree(qp->rq.wrid); } err_mtt: mlx4_mtt_cleanup(dev->dev, &qp->mtt); err_buf: if (pd->uobject) ib_umem_release(qp->umem); else mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); err_db: if (!pd->uobject && qp_has_rq(init_attr)) mlx4_db_free(dev->dev, &qp->db); err: if (!*caller_qp) kfree(qp); return err; } static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state) { switch (state) { case IB_QPS_RESET: return MLX4_QP_STATE_RST; case IB_QPS_INIT: return MLX4_QP_STATE_INIT; case IB_QPS_RTR: return MLX4_QP_STATE_RTR; case IB_QPS_RTS: return MLX4_QP_STATE_RTS; case IB_QPS_SQD: return MLX4_QP_STATE_SQD; case IB_QPS_SQE: return MLX4_QP_STATE_SQER; case IB_QPS_ERR: return MLX4_QP_STATE_ERR; default: return -1; } } static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) __acquires(&send_cq->lock) __acquires(&recv_cq->lock) { if (send_cq == recv_cq) { spin_lock(&send_cq->lock); __acquire(&recv_cq->lock); } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { spin_lock(&send_cq->lock); spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); } else { spin_lock(&recv_cq->lock); spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); } } static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) __releases(&send_cq->lock) __releases(&recv_cq->lock) { if (send_cq == recv_cq) { __release(&recv_cq->lock); spin_unlock(&send_cq->lock); } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { spin_unlock(&recv_cq->lock); spin_unlock(&send_cq->lock); } else { spin_unlock(&send_cq->lock); spin_unlock(&recv_cq->lock); } } static void del_gid_entries(struct mlx4_ib_qp *qp) { struct mlx4_ib_gid_entry *ge, *tmp; list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { list_del(&ge->list); kfree(ge); } } static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp) { if (qp->ibqp.qp_type == IB_QPT_XRC_TGT) return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd); else return to_mpd(qp->ibqp.pd); } static void get_cqs(struct mlx4_ib_qp *qp, struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq) { switch (qp->ibqp.qp_type) { case IB_QPT_XRC_TGT: *send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq); *recv_cq = *send_cq; break; case IB_QPT_XRC_INI: *send_cq = to_mcq(qp->ibqp.send_cq); *recv_cq = *send_cq; break; default: *send_cq = to_mcq(qp->ibqp.send_cq); *recv_cq = to_mcq(qp->ibqp.recv_cq); break; } } static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, int is_user) { struct mlx4_ib_cq *send_cq, *recv_cq; unsigned long flags; if (qp->state != IB_QPS_RESET) { if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state), MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp)) pr_warn("modify QP %06x to RESET failed.\n", qp->mqp.qpn); if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port)) { mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); qp->pri.smac = 0; qp->pri.smac_port = 0; } if (qp->alt.smac) { mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); qp->alt.smac = 0; } if (qp->pri.vid < 0x1000) { mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid); qp->pri.vid = 0xFFFF; qp->pri.candidate_vid = 0xFFFF; qp->pri.update_vid = 0; } if (qp->alt.vid < 0x1000) { mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid); qp->alt.vid = 0xFFFF; qp->alt.candidate_vid = 0xFFFF; qp->alt.update_vid = 0; } } get_cqs(qp, &send_cq, &recv_cq); spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); mlx4_ib_lock_cqs(send_cq, recv_cq); /* del from lists under both locks above to protect reset flow paths */ list_del(&qp->qps_list); list_del(&qp->cq_send_list); list_del(&qp->cq_recv_list); if (!is_user) { __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL); if (send_cq != recv_cq) __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); } mlx4_qp_remove(dev->dev, &qp->mqp); mlx4_ib_unlock_cqs(send_cq, recv_cq); spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); mlx4_qp_free(dev->dev, &qp->mqp); if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) { if (qp->flags & MLX4_IB_QP_NETIF) mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1); else mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); } mlx4_mtt_cleanup(dev->dev, &qp->mtt); if (is_user) { if (qp->rq.wqe_cnt) mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context), &qp->db); ib_umem_release(qp->umem); } else { kvfree(qp->sq.wrid); kvfree(qp->rq.wrid); if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) free_proxy_bufs(&dev->ib_dev, qp); mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); if (qp->rq.wqe_cnt) mlx4_db_free(dev->dev, &qp->db); } del_gid_entries(qp); } static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) { /* Native or PPF */ if (!mlx4_is_mfunc(dev->dev) || (mlx4_is_master(dev->dev) && attr->create_flags & MLX4_IB_SRIOV_SQP)) { return dev->dev->phys_caps.base_sqpn + (attr->qp_type == IB_QPT_SMI ? 0 : 2) + attr->port_num - 1; } /* PF or VF -- creating proxies */ if (attr->qp_type == IB_QPT_SMI) return dev->dev->caps.qp0_proxy[attr->port_num - 1]; else return dev->dev->caps.qp1_proxy[attr->port_num - 1]; } static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct mlx4_ib_qp *qp = NULL; int err; int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; u16 xrcdn = 0; gfp_t gfp; gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ? GFP_NOIO : GFP_KERNEL; /* * We only support LSO, vendor flag1, and multicast loopback blocking, * and only for kernel UD QPs. */ if (init_attr->create_flags & ~(MLX4_IB_QP_LSO | MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK | MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP | MLX4_IB_QP_NETIF | MLX4_IB_QP_CREATE_ROCE_V2_GSI | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) return ERR_PTR(-EINVAL); if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { if (init_attr->qp_type != IB_QPT_UD) return ERR_PTR(-EINVAL); } if (init_attr->create_flags) { if (udata && init_attr->create_flags & ~(sup_u_create_flags)) return ERR_PTR(-EINVAL); if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO | MLX4_IB_QP_CREATE_ROCE_V2_GSI | MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) && init_attr->qp_type != IB_QPT_UD) || (init_attr->create_flags & MLX4_IB_SRIOV_SQP && init_attr->qp_type > IB_QPT_GSI) || (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI && init_attr->qp_type != IB_QPT_GSI)) return ERR_PTR(-EINVAL); } switch (init_attr->qp_type) { case IB_QPT_XRC_TGT: pd = to_mxrcd(init_attr->xrcd)->pd; xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq; /* fall through */ case IB_QPT_XRC_INI: if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) return ERR_PTR(-ENOSYS); init_attr->recv_cq = init_attr->send_cq; /* fall through */ case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_RAW_PACKET: qp = kzalloc(sizeof *qp, gfp); if (!qp) return ERR_PTR(-ENOMEM); qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; /* fall through */ case IB_QPT_UD: { err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 0, &qp, gfp); if (err) { kfree(qp); return ERR_PTR(err); } qp->ibqp.qp_num = qp->mqp.qpn; qp->xrcdn = xrcdn; break; } case IB_QPT_SMI: case IB_QPT_GSI: { int sqpn; /* Userspace is not allowed to create special QPs: */ if (udata) return ERR_PTR(-EINVAL); if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) { int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0); if (res) return ERR_PTR(res); } else { sqpn = get_sqp_num(to_mdev(pd->device), init_attr); } err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, sqpn, &qp, gfp); if (err) return ERR_PTR(err); qp->port = init_attr->port_num; qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1; break; } default: /* Don't support raw QPs */ return ERR_PTR(-EINVAL); } return &qp->ibqp; } struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct ib_device *device = pd ? pd->device : init_attr->xrcd->device; struct ib_qp *ibqp; struct mlx4_ib_dev *dev = to_mdev(device); ibqp = _mlx4_ib_create_qp(pd, init_attr, udata); if (!IS_ERR(ibqp) && (init_attr->qp_type == IB_QPT_GSI) && !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) { struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp))); int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num); if (is_eth && dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) { init_attr->create_flags |= MLX4_IB_QP_CREATE_ROCE_V2_GSI; sqp->roce_v2_gsi = ib_create_qp(pd, init_attr); if (IS_ERR(sqp->roce_v2_gsi)) { pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi)); sqp->roce_v2_gsi = NULL; } else { sqp = to_msqp(to_mqp(sqp->roce_v2_gsi)); sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP; } init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI; } } return ibqp; } static int _mlx4_ib_destroy_qp(struct ib_qp *qp) { struct mlx4_ib_dev *dev = to_mdev(qp->device); struct mlx4_ib_qp *mqp = to_mqp(qp); struct mlx4_ib_pd *pd; if (is_qp0(dev, mqp)) mlx4_CLOSE_PORT(dev->dev, mqp->port); if (dev->qp1_proxy[mqp->port - 1] == mqp) { mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]); dev->qp1_proxy[mqp->port - 1] = NULL; mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]); } if (mqp->counter_index) mlx4_ib_free_qp_counter(dev, mqp); pd = get_pd(mqp); destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); if (is_sqp(dev, mqp)) kfree(to_msqp(mqp)); else kfree(mqp); return 0; } int mlx4_ib_destroy_qp(struct ib_qp *qp) { struct mlx4_ib_qp *mqp = to_mqp(qp); if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { struct mlx4_ib_sqp *sqp = to_msqp(mqp); if (sqp->roce_v2_gsi) ib_destroy_qp(sqp->roce_v2_gsi); } return _mlx4_ib_destroy_qp(qp); } static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type) { switch (type) { case MLX4_IB_QPT_RC: return MLX4_QP_ST_RC; case MLX4_IB_QPT_UC: return MLX4_QP_ST_UC; case MLX4_IB_QPT_UD: return MLX4_QP_ST_UD; case MLX4_IB_QPT_XRC_INI: case MLX4_IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC; case MLX4_IB_QPT_SMI: case MLX4_IB_QPT_GSI: case MLX4_IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX; case MLX4_IB_QPT_PROXY_SMI_OWNER: case MLX4_IB_QPT_TUN_SMI_OWNER: return (mlx4_is_mfunc(dev->dev) ? MLX4_QP_ST_MLX : -1); case MLX4_IB_QPT_PROXY_SMI: case MLX4_IB_QPT_TUN_SMI: case MLX4_IB_QPT_PROXY_GSI: case MLX4_IB_QPT_TUN_GSI: return (mlx4_is_mfunc(dev->dev) ? MLX4_QP_ST_UD : -1); default: return -1; } } static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr, int attr_mask) { u8 dest_rd_atomic; u32 access_flags; u32 hw_access_flags = 0; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) dest_rd_atomic = attr->max_dest_rd_atomic; else dest_rd_atomic = qp->resp_depth; if (attr_mask & IB_QP_ACCESS_FLAGS) access_flags = attr->qp_access_flags; else access_flags = qp->atomic_rd_en; if (!dest_rd_atomic) access_flags &= IB_ACCESS_REMOTE_WRITE; if (access_flags & IB_ACCESS_REMOTE_READ) hw_access_flags |= MLX4_QP_BIT_RRE; if (access_flags & IB_ACCESS_REMOTE_ATOMIC) hw_access_flags |= MLX4_QP_BIT_RAE; if (access_flags & IB_ACCESS_REMOTE_WRITE) hw_access_flags |= MLX4_QP_BIT_RWE; return cpu_to_be32(hw_access_flags); } static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr, int attr_mask) { if (attr_mask & IB_QP_PKEY_INDEX) sqp->pkey_index = attr->pkey_index; if (attr_mask & IB_QP_QKEY) sqp->qkey = attr->qkey; if (attr_mask & IB_QP_SQ_PSN) sqp->send_psn = attr->sq_psn; } static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) { path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6); } static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, u64 smac, u16 vlan_tag, struct mlx4_qp_path *path, struct mlx4_roce_smac_vlan_info *smac_info, u8 port) { int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_ETHERNET; int vidx; int smac_index; int err; path->grh_mylmc = ah->src_path_bits & 0x7f; path->rlid = cpu_to_be16(ah->dlid); if (ah->static_rate) { path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET; while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && !(1 << path->static_rate & dev->dev->caps.stat_rate_support)) --path->static_rate; } else path->static_rate = 0; if (ah->ah_flags & IB_AH_GRH) { int real_sgid_index = mlx4_ib_gid_index_to_real_index(dev, port, ah->grh.sgid_index); if (real_sgid_index >= dev->dev->caps.gid_table_len[port]) { pr_err("sgid_index (%u) too large. max is %d\n", real_sgid_index, dev->dev->caps.gid_table_len[port] - 1); return -1; } path->grh_mylmc |= 1 << 7; path->mgid_index = real_sgid_index; path->hop_limit = ah->grh.hop_limit; path->tclass_flowlabel = cpu_to_be32((ah->grh.traffic_class << 20) | (ah->grh.flow_label)); memcpy(path->rgid, ah->grh.dgid.raw, 16); } if (is_eth) { if (!(ah->ah_flags & IB_AH_GRH)) return -1; path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((port - 1) << 6) | ((ah->sl & 7) << 3); path->feup |= MLX4_FEUP_FORCE_ETH_UP; if (vlan_tag < 0x1000) { if (smac_info->vid < 0x1000) { /* both valid vlan ids */ if (smac_info->vid != vlan_tag) { /* different VIDs. unreg old and reg new */ err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx); if (err) return err; smac_info->candidate_vid = vlan_tag; smac_info->candidate_vlan_index = vidx; smac_info->candidate_vlan_port = port; smac_info->update_vid = 1; path->vlan_index = vidx; } else { path->vlan_index = smac_info->vlan_index; } } else { /* no current vlan tag in qp */ err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx); if (err) return err; smac_info->candidate_vid = vlan_tag; smac_info->candidate_vlan_index = vidx; smac_info->candidate_vlan_port = port; smac_info->update_vid = 1; path->vlan_index = vidx; } path->feup |= MLX4_FVL_FORCE_ETH_VLAN; path->fl = 1 << 6; } else { /* have current vlan tag. unregister it at modify-qp success */ if (smac_info->vid < 0x1000) { smac_info->candidate_vid = 0xFFFF; smac_info->update_vid = 1; } } /* get smac_index for RoCE use. * If no smac was yet assigned, register one. * If one was already assigned, but the new mac differs, * unregister the old one and register the new one. */ if ((!smac_info->smac && !smac_info->smac_port) || smac_info->smac != smac) { /* register candidate now, unreg if needed, after success */ smac_index = mlx4_register_mac(dev->dev, port, smac); if (smac_index >= 0) { smac_info->candidate_smac_index = smac_index; smac_info->candidate_smac = smac; smac_info->candidate_smac_port = port; } else { return -EINVAL; } } else { smac_index = smac_info->smac_index; } memcpy(path->dmac, ah->dmac, 6); path->ackto = MLX4_IB_LINK_TYPE_ETH; /* put MAC table smac index for IBoE */ path->grh_mylmc = (u8) (smac_index) | 0x80; } else { path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((port - 1) << 6) | ((ah->sl & 0xf) << 2); } return 0; } static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp, enum ib_qp_attr_mask qp_attr_mask, struct mlx4_ib_qp *mqp, struct mlx4_qp_path *path, u8 port, u16 vlan_id, u8 *smac) { return _mlx4_set_path(dev, &qp->ah_attr, mlx4_mac_to_u64(smac), vlan_id, path, &mqp->pri, port); } static int mlx4_set_alt_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp, enum ib_qp_attr_mask qp_attr_mask, struct mlx4_ib_qp *mqp, struct mlx4_qp_path *path, u8 port) { return _mlx4_set_path(dev, &qp->alt_ah_attr, 0, 0xffff, path, &mqp->alt, port); } static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { struct mlx4_ib_gid_entry *ge, *tmp; list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) { ge->added = 1; ge->port = qp->port; } } } static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, struct mlx4_qp_context *context) { u64 u64_mac; int smac_index; u64_mac = atomic64_read(&dev->iboe.mac[qp->port - 1]); context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6); if (!qp->pri.smac && !qp->pri.smac_port) { smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac); if (smac_index >= 0) { qp->pri.candidate_smac_index = smac_index; qp->pri.candidate_smac = u64_mac; qp->pri.candidate_smac_port = qp->port; context->pri_path.grh_mylmc = 0x80 | (u8) smac_index; } else { return -ENOENT; } } return 0; } static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { struct counter_index *new_counter_index; int err; u32 tmp_idx; if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) != IB_LINK_LAYER_ETHERNET || !(qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) || !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK)) return 0; err = mlx4_counter_alloc(dev->dev, &tmp_idx); if (err) return err; new_counter_index = kmalloc(sizeof(*new_counter_index), GFP_KERNEL); if (!new_counter_index) { mlx4_counter_free(dev->dev, tmp_idx); return -ENOMEM; } new_counter_index->index = tmp_idx; new_counter_index->allocated = 1; qp->counter_index = new_counter_index; mutex_lock(&dev->counters_table[qp->port - 1].mutex); list_add_tail(&new_counter_index->list, &dev->counters_table[qp->port - 1].counters_list); mutex_unlock(&dev->counters_table[qp->port - 1].mutex); return 0; } enum { MLX4_QPC_ROCE_MODE_1 = 0, MLX4_QPC_ROCE_MODE_2 = 2, MLX4_QPC_ROCE_MODE_UNDEFINED = 0xff }; static u8 gid_type_to_qpc(enum ib_gid_type gid_type) { switch (gid_type) { case IB_GID_TYPE_ROCE: return MLX4_QPC_ROCE_MODE_1; case IB_GID_TYPE_ROCE_UDP_ENCAP: return MLX4_QPC_ROCE_MODE_2; default: return MLX4_QPC_ROCE_MODE_UNDEFINED; } } static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) { struct mlx4_ib_dev *dev = to_mdev(ibqp->device); struct mlx4_ib_qp *qp = to_mqp(ibqp); struct mlx4_ib_pd *pd; struct mlx4_ib_cq *send_cq, *recv_cq; struct mlx4_qp_context *context; enum mlx4_qp_optpar optpar = 0; int sqd_event; int steer_qp = 0; int err = -EINVAL; int counter_index; /* APM is not supported under RoCE */ if (attr_mask & IB_QP_ALT_PATH && rdma_port_get_link_layer(&dev->ib_dev, qp->port) == IB_LINK_LAYER_ETHERNET) return -ENOTSUPP; context = kzalloc(sizeof *context, GFP_KERNEL); if (!context) return -ENOMEM; context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16)); if (!(attr_mask & IB_QP_PATH_MIG_STATE)) context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); else { optpar |= MLX4_QP_OPTPAR_PM_STATE; switch (attr->path_mig_state) { case IB_MIG_MIGRATED: context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); break; case IB_MIG_REARM: context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11); break; case IB_MIG_ARMED: context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11); break; } } if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; else if (ibqp->qp_type == IB_QPT_RAW_PACKET) context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX; else if (ibqp->qp_type == IB_QPT_UD) { if (qp->flags & MLX4_IB_QP_LSO) context->mtu_msgmax = (IB_MTU_4096 << 5) | ilog2(dev->dev->caps.max_gso_sz); else context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; } else if (attr_mask & IB_QP_PATH_MTU) { if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { pr_err("path MTU (%u) is invalid\n", attr->path_mtu); goto out; } context->mtu_msgmax = (attr->path_mtu << 5) | ilog2(dev->dev->caps.max_msg_sz); } if (qp->rq.wqe_cnt) context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3; context->rq_size_stride |= qp->rq.wqe_shift - 4; if (qp->sq.wqe_cnt) context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; context->sq_size_stride |= qp->sq.wqe_shift - 4; if (new_state == IB_QPS_RESET && qp->counter_index) mlx4_ib_free_qp_counter(dev, qp); if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { context->sq_size_stride |= !!qp->sq_no_prefetch << 7; context->xrcd = cpu_to_be32((u32) qp->xrcdn); if (ibqp->qp_type == IB_QPT_RAW_PACKET) context->param3 |= cpu_to_be32(1 << 30); } if (qp->ibqp.uobject) context->usr_page = cpu_to_be32( mlx4_to_hw_uar_index(dev->dev, to_mucontext(ibqp->uobject->context)->uar.index)); else context->usr_page = cpu_to_be32( mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index)); if (attr_mask & IB_QP_DEST_QPN) context->remote_qpn = cpu_to_be32(attr->dest_qp_num); if (attr_mask & IB_QP_PORT) { if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD && !(attr_mask & IB_QP_AV)) { mlx4_set_sched(&context->pri_path, attr->port_num); optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE; } } if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { err = create_qp_lb_counter(dev, qp); if (err) goto out; counter_index = dev->counters_table[qp->port - 1].default_counter; if (qp->counter_index) counter_index = qp->counter_index->index; if (counter_index != -1) { context->pri_path.counter_index = counter_index; optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX; if (qp->counter_index) { context->pri_path.fl |= MLX4_FL_ETH_SRC_CHECK_MC_LB; context->pri_path.vlan_control |= MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER; } } else context->pri_path.counter_index = MLX4_SINK_COUNTER_INDEX(dev->dev); if (qp->flags & MLX4_IB_QP_NETIF) { mlx4_ib_steer_qp_reg(dev, qp, 1); steer_qp = 1; } if (ibqp->qp_type == IB_QPT_GSI) { enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ? IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE; u8 qpc_roce_mode = gid_type_to_qpc(gid_type); context->rlkey_roce_mode |= (qpc_roce_mode << 6); } } if (attr_mask & IB_QP_PKEY_INDEX) { if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) context->pri_path.disable_pkey_check = 0x40; context->pri_path.pkey_index = attr->pkey_index; optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; } if (attr_mask & IB_QP_AV) { u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 : attr_mask & IB_QP_PORT ? attr->port_num : qp->port; union ib_gid gid; struct ib_gid_attr gid_attr; u16 vlan = 0xffff; u8 smac[ETH_ALEN]; int status = 0; int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) && attr->ah_attr.ah_flags & IB_AH_GRH; if (is_eth) { int index = attr->ah_attr.grh.sgid_index; status = ib_get_cached_gid(ibqp->device, port_num, index, &gid, &gid_attr); if (!status && !memcmp(&gid, &zgid, sizeof(gid))) status = -ENOENT; if (!status && gid_attr.ndev) { vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev); memcpy(smac, IF_LLADDR(gid_attr.ndev), ETH_ALEN); dev_put(gid_attr.ndev); } } if (status) goto out; if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path, port_num, vlan, smac)) goto out; optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | MLX4_QP_OPTPAR_SCHED_QUEUE); if (is_eth && (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) { u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type); if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) { err = -EINVAL; goto out; } context->rlkey_roce_mode |= (qpc_roce_mode << 6); } } if (attr_mask & IB_QP_TIMEOUT) { context->pri_path.ackto |= attr->timeout << 3; optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT; } if (attr_mask & IB_QP_ALT_PATH) { if (attr->alt_port_num == 0 || attr->alt_port_num > dev->dev->caps.num_ports) goto out; if (attr->alt_pkey_index >= dev->dev->caps.pkey_table_len[attr->alt_port_num]) goto out; if (mlx4_set_alt_path(dev, attr, attr_mask, qp, &context->alt_path, attr->alt_port_num)) goto out; context->alt_path.pkey_index = attr->alt_pkey_index; context->alt_path.ackto = attr->alt_timeout << 3; optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; } pd = get_pd(qp); get_cqs(qp, &send_cq, &recv_cq); context->pd = cpu_to_be32(pd->pdn); context->cqn_send = cpu_to_be32(send_cq->mcq.cqn); context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn); context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); /* Set "fast registration enabled" for all kernel QPs */ if (!qp->ibqp.uobject) context->params1 |= cpu_to_be32(1 << 11); if (attr_mask & IB_QP_RNR_RETRY) { context->params1 |= cpu_to_be32(attr->rnr_retry << 13); optpar |= MLX4_QP_OPTPAR_RNR_RETRY; } if (attr_mask & IB_QP_RETRY_CNT) { context->params1 |= cpu_to_be32(attr->retry_cnt << 16); optpar |= MLX4_QP_OPTPAR_RETRY_COUNT; } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { if (attr->max_rd_atomic) context->params1 |= cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); optpar |= MLX4_QP_OPTPAR_SRA_MAX; } if (attr_mask & IB_QP_SQ_PSN) context->next_send_psn = cpu_to_be32(attr->sq_psn); if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { if (attr->max_dest_rd_atomic) context->params2 |= cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); optpar |= MLX4_QP_OPTPAR_RRA_MAX; } if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask); optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE; } if (ibqp->srq) context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC); if (attr_mask & IB_QP_MIN_RNR_TIMER) { context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT; } if (attr_mask & IB_QP_RQ_PSN) context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); /* proxy and tunnel qp qkeys will be changed in modify-qp wrappers */ if (attr_mask & IB_QP_QKEY) { if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) context->qkey = cpu_to_be32(IB_QP_SET_QKEY); else { if (mlx4_is_mfunc(dev->dev) && !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) && (attr->qkey & MLX4_RESERVED_QKEY_MASK) == MLX4_RESERVED_QKEY_BASE) { pr_err("Cannot use reserved QKEY" " 0x%x (range 0xffff0000..0xffffffff" " is reserved)\n", attr->qkey); err = -EINVAL; goto out; } context->qkey = cpu_to_be32(attr->qkey); } optpar |= MLX4_QP_OPTPAR_Q_KEY; } if (ibqp->srq) context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->db_rec_addr = cpu_to_be64(qp->db.dma); if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR && (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == IB_QPT_RAW_PACKET)) { context->pri_path.sched_queue = (qp->port - 1) << 6; if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI || qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) { context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI) context->pri_path.fl = 0x80; } else { if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) context->pri_path.fl = 0x80; context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; } if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) == IB_LINK_LAYER_ETHERNET) { if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI || qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) context->pri_path.feup = 1 << 7; /* don't fsm */ /* handle smac_index */ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD || qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI || qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) { err = handle_eth_ud_smac_index(dev, qp, context); if (err) { err = -EINVAL; goto out; } if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) dev->qp1_proxy[qp->port - 1] = qp; } } } if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { context->pri_path.ackto = (context->pri_path.ackto & 0xf8) | MLX4_IB_LINK_TYPE_ETH; if (dev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { /* set QP to receive both tunneled & non-tunneled packets */ if (!(context->flags & cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET))) context->srqn = cpu_to_be32(7 << 28); } } if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) { int is_eth = rdma_port_get_link_layer( &dev->ib_dev, qp->port) == IB_LINK_LAYER_ETHERNET; if (is_eth) { context->pri_path.ackto = MLX4_IB_LINK_TYPE_ETH; optpar |= MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH; } } if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) sqd_event = 1; else sqd_event = 0; if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->rlkey_roce_mode |= (1 << 4); /* * Before passing a kernel QP to the HW, make sure that the * ownership bits of the send queue are set and the SQ * headroom is stamped so that the hardware doesn't start * processing stale work requests. */ if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { struct mlx4_wqe_ctrl_seg *ctrl; int i; for (i = 0; i < qp->sq.wqe_cnt; ++i) { ctrl = get_send_wqe(qp, i); - ctrl->owner_opcode = cpu_to_be32(1 << 31); + ctrl->owner_opcode = cpu_to_be32(1U << 31); if (qp->sq_max_wqes_per_wr == 1) ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4); stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); } } err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state), to_mlx4_state(new_state), context, optpar, sqd_event, &qp->mqp); if (err) goto out; qp->state = new_state; if (attr_mask & IB_QP_ACCESS_FLAGS) qp->atomic_rd_en = attr->qp_access_flags; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) qp->resp_depth = attr->max_dest_rd_atomic; if (attr_mask & IB_QP_PORT) { qp->port = attr->port_num; update_mcg_macs(dev, qp); } if (attr_mask & IB_QP_ALT_PATH) qp->alt_port = attr->alt_port_num; if (is_sqp(dev, qp)) store_sqp_attrs(to_msqp(qp), attr, attr_mask); /* * If we moved QP0 to RTR, bring the IB link up; if we moved * QP0 to RESET or ERROR, bring the link back down. */ if (is_qp0(dev, qp)) { if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR) if (mlx4_INIT_PORT(dev->dev, qp->port)) pr_warn("INIT_PORT failed for port %d\n", qp->port); if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) mlx4_CLOSE_PORT(dev->dev, qp->port); } /* * If we moved a kernel QP to RESET, clean up all old CQ * entries and reinitialize the QP. */ if (new_state == IB_QPS_RESET) { if (!ibqp->uobject) { mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, ibqp->srq ? to_msrq(ibqp->srq) : NULL); if (send_cq != recv_cq) mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); qp->rq.head = 0; qp->rq.tail = 0; qp->sq.head = 0; qp->sq.tail = 0; qp->sq_next_wqe = 0; if (qp->rq.wqe_cnt) *qp->db.db = 0; if (qp->flags & MLX4_IB_QP_NETIF) mlx4_ib_steer_qp_reg(dev, qp, 0); } if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port)) { mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); qp->pri.smac = 0; qp->pri.smac_port = 0; } if (qp->alt.smac) { mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); qp->alt.smac = 0; } if (qp->pri.vid < 0x1000) { mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid); qp->pri.vid = 0xFFFF; qp->pri.candidate_vid = 0xFFFF; qp->pri.update_vid = 0; } if (qp->alt.vid < 0x1000) { mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid); qp->alt.vid = 0xFFFF; qp->alt.candidate_vid = 0xFFFF; qp->alt.update_vid = 0; } } out: if (err && qp->counter_index) mlx4_ib_free_qp_counter(dev, qp); if (err && steer_qp) mlx4_ib_steer_qp_reg(dev, qp, 0); kfree(context); if (qp->pri.candidate_smac || (!qp->pri.candidate_smac && qp->pri.candidate_smac_port)) { if (err) { mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, qp->pri.candidate_smac); } else { if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port)) mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); qp->pri.smac = qp->pri.candidate_smac; qp->pri.smac_index = qp->pri.candidate_smac_index; qp->pri.smac_port = qp->pri.candidate_smac_port; } qp->pri.candidate_smac = 0; qp->pri.candidate_smac_index = 0; qp->pri.candidate_smac_port = 0; } if (qp->alt.candidate_smac) { if (err) { mlx4_unregister_mac(dev->dev, qp->alt.candidate_smac_port, qp->alt.candidate_smac); } else { if (qp->alt.smac) mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); qp->alt.smac = qp->alt.candidate_smac; qp->alt.smac_index = qp->alt.candidate_smac_index; qp->alt.smac_port = qp->alt.candidate_smac_port; } qp->alt.candidate_smac = 0; qp->alt.candidate_smac_index = 0; qp->alt.candidate_smac_port = 0; } if (qp->pri.update_vid) { if (err) { if (qp->pri.candidate_vid < 0x1000) mlx4_unregister_vlan(dev->dev, qp->pri.candidate_vlan_port, qp->pri.candidate_vid); } else { if (qp->pri.vid < 0x1000) mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid); qp->pri.vid = qp->pri.candidate_vid; qp->pri.vlan_port = qp->pri.candidate_vlan_port; qp->pri.vlan_index = qp->pri.candidate_vlan_index; } qp->pri.candidate_vid = 0xFFFF; qp->pri.update_vid = 0; } if (qp->alt.update_vid) { if (err) { if (qp->alt.candidate_vid < 0x1000) mlx4_unregister_vlan(dev->dev, qp->alt.candidate_vlan_port, qp->alt.candidate_vid); } else { if (qp->alt.vid < 0x1000) mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid); qp->alt.vid = qp->alt.candidate_vid; qp->alt.vlan_port = qp->alt.candidate_vlan_port; qp->alt.vlan_index = qp->alt.candidate_vlan_index; } qp->alt.candidate_vid = 0xFFFF; qp->alt.update_vid = 0; } return err; } static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(ibqp->device); struct mlx4_ib_qp *qp = to_mqp(ibqp); enum ib_qp_state cur_state, new_state; int err = -EINVAL; int ll; mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; if (cur_state == new_state && cur_state == IB_QPS_RESET) { ll = IB_LINK_LAYER_UNSPECIFIED; } else { int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; ll = rdma_port_get_link_layer(&dev->ib_dev, port); } if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, ll)) { pr_debug("qpn 0x%x: invalid attribute mask specified " "for transition %d to %d. qp_type %d," " attr_mask 0x%x\n", ibqp->qp_num, cur_state, new_state, ibqp->qp_type, attr_mask); goto out; } if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) { if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) { if ((ibqp->qp_type == IB_QPT_RC) || (ibqp->qp_type == IB_QPT_UD) || (ibqp->qp_type == IB_QPT_UC) || (ibqp->qp_type == IB_QPT_RAW_PACKET) || (ibqp->qp_type == IB_QPT_XRC_INI)) { attr->port_num = mlx4_ib_bond_next_port(dev); } } else { /* no sense in changing port_num * when ports are bonded */ attr_mask &= ~IB_QP_PORT; } } if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || attr->port_num > dev->num_ports)) { pr_debug("qpn 0x%x: invalid port number (%d) specified " "for transition %d to %d. qp_type %d\n", ibqp->qp_num, attr->port_num, cur_state, new_state, ibqp->qp_type); goto out; } if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) && (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) != IB_LINK_LAYER_ETHERNET)) goto out; if (attr_mask & IB_QP_PKEY_INDEX) { int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) { pr_debug("qpn 0x%x: invalid pkey index (%d) specified " "for transition %d to %d. qp_type %d\n", ibqp->qp_num, attr->pkey_index, cur_state, new_state, ibqp->qp_type); goto out; } } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { pr_debug("qpn 0x%x: max_rd_atomic (%d) too large. " "Transition %d to %d. qp_type %d\n", ibqp->qp_num, attr->max_rd_atomic, cur_state, new_state, ibqp->qp_type); goto out; } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) { pr_debug("qpn 0x%x: max_dest_rd_atomic (%d) too large. " "Transition %d to %d. qp_type %d\n", ibqp->qp_num, attr->max_dest_rd_atomic, cur_state, new_state, ibqp->qp_type); goto out; } if (cur_state == new_state && cur_state == IB_QPS_RESET) { err = 0; goto out; } err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) attr->port_num = 1; out: mutex_unlock(&qp->mutex); return err; } int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct mlx4_ib_qp *mqp = to_mqp(ibqp); int ret; ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata); if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { struct mlx4_ib_sqp *sqp = to_msqp(mqp); int err = 0; if (sqp->roce_v2_gsi) err = ib_modify_qp(sqp->roce_v2_gsi, attr, attr_mask); if (err) pr_err("Failed to modify GSI QP for RoCEv2 (%d)\n", err); } return ret; } static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) { int i; for (i = 0; i < dev->caps.num_ports; i++) { if (qpn == dev->caps.qp0_proxy[i] || qpn == dev->caps.qp0_tunnel[i]) { *qkey = dev->caps.qp0_qkey[i]; return 0; } } return -EINVAL; } static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); struct ib_device *ib_dev = &mdev->ib_dev; struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->ah); u16 pkey; u32 qkey; int send_size; int header_size; int spc; int i; if (wr->wr.opcode != IB_WR_SEND) return -EINVAL; send_size = 0; for (i = 0; i < wr->wr.num_sge; ++i) send_size += wr->wr.sg_list[i].length; /* for proxy-qp0 sends, need to add in size of tunnel header */ /* for tunnel-qp0 sends, tunnel header is already in s/g list */ if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) send_size += sizeof (struct mlx4_ib_tunnel_header); ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header); if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) { sqp->ud_header.lrh.service_level = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; sqp->ud_header.lrh.destination_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); } mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); /* force loopback */ mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR); mlx->rlid = sqp->ud_header.lrh.destination_lid; sqp->ud_header.lrh.virtual_lane = 0; sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); else sqp->ud_header.bth.destination_qpn = cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); if (mlx4_is_master(mdev->dev)) { if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) return -EINVAL; } else { if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) return -EINVAL; } sqp->ud_header.deth.qkey = cpu_to_be32(qkey); sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn); sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; sqp->ud_header.immediate_present = 0; header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); /* * Inline data segments may not cross a 64 byte boundary. If * our UD header is bigger than the space available up to the * next 64 byte boundary in the WQE, use two inline data * segments to hold the UD header. */ spc = MLX4_INLINE_ALIGN - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); if (header_size <= spc) { inl->byte_count = cpu_to_be32((1U << 31) | header_size); memcpy(inl + 1, sqp->header_buf, header_size); i = 1; } else { inl->byte_count = cpu_to_be32((1U << 31) | spc); memcpy(inl + 1, sqp->header_buf, spc); inl = (void *) (inl + 1) + spc; memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); /* * Need a barrier here to make sure all the data is * visible before the byte_count field is set. * Otherwise the HCA prefetcher could grab the 64-byte * chunk with this inline segment and get a valid (!= * 0xffffffff) byte count but stale data, and end up * generating a packet with bad headers. * * The first inline segment's byte_count field doesn't * need a barrier, because it comes after a * control/MLX segment and therefore is at an offset * of 16 mod 64. */ wmb(); inl->byte_count = cpu_to_be32((1U << 31) | (header_size - spc)); i = 2; } *mlx_seg_len = ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); return 0; } static u8 sl_to_vl(struct mlx4_ib_dev *dev, u8 sl, int port_num) { union sl2vl_tbl_to_u64 tmp_vltab; u8 vl; if (sl > 15) return 0xf; tmp_vltab.sl64 = atomic64_read(&dev->sl2vl[port_num - 1]); vl = tmp_vltab.sl8[sl >> 1]; if (sl & 1) vl &= 0x0f; else vl >>= 4; return vl; } #define MLX4_ROCEV2_QP1_SPORT 0xC000 static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct ib_device *ib_dev = sqp->qp.ibqp.device; struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_ctrl_seg *ctrl = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->ah); union ib_gid sgid; u16 pkey; int send_size; int header_size; int spc; int i; int err = 0; u16 vlan = 0xffff; bool is_eth; bool is_vlan = false; bool is_grh; bool is_udp = false; int ip_version = 0; send_size = 0; for (i = 0; i < wr->wr.num_sge; ++i) send_size += wr->wr.sg_list[i].length; is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; is_grh = mlx4_ib_ah_grh_present(ah); if (is_eth) { struct ib_gid_attr gid_attr; if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { /* When multi-function is enabled, the ib_core gid * indexes don't necessarily match the hw ones, so * we must use our own cache */ err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, ah->av.ib.gid_index, &sgid.raw[0]); if (err) return err; } else { err = ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, ah->av.ib.gid_index, &sgid, &gid_attr); if (!err) { if (gid_attr.ndev) dev_put(gid_attr.ndev); if (!memcmp(&sgid, &zgid, sizeof(sgid))) err = -ENOENT; } if (!err) { is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; if (is_udp) { if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) ip_version = 4; else ip_version = 6; is_grh = false; } } else { return err; } } if (ah->av.eth.vlan != cpu_to_be16(0xffff)) { vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff; is_vlan = 1; } } err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, ip_version, is_udp, 0, &sqp->ud_header); if (err) return err; if (!is_eth) { sqp->ud_header.lrh.service_level = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid; sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); } if (is_grh || (ip_version == 6)) { sqp->ud_header.grh.traffic_class = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff; sqp->ud_header.grh.flow_label = ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; if (is_eth) { memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16); } else { if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { /* When multi-function is enabled, the ib_core gid * indexes don't necessarily match the hw ones, so * we must use our own cache */ sqp->ud_header.grh.source_gid.global.subnet_prefix = cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov. demux[sqp->qp.port - 1]. subnet_prefix))); sqp->ud_header.grh.source_gid.global.interface_id = to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. guid_cache[ah->av.ib.gid_index]; } else { ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid, NULL); } } memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16); } if (ip_version == 4) { sqp->ud_header.ip4.tos = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff; sqp->ud_header.ip4.id = 0; sqp->ud_header.ip4.frag_off = htons(IP_DF); sqp->ud_header.ip4.ttl = ah->av.eth.hop_limit; memcpy(&sqp->ud_header.ip4.saddr, sgid.raw + 12, 4); memcpy(&sqp->ud_header.ip4.daddr, ah->av.ib.dgid + 12, 4); sqp->ud_header.ip4.check = ib_ud_ip4_csum(&sqp->ud_header); } if (is_udp) { sqp->ud_header.udp.dport = htons(ROCE_V2_UDP_DPORT); sqp->ud_header.udp.sport = htons(MLX4_ROCEV2_QP1_SPORT); sqp->ud_header.udp.csum = 0; } mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); if (!is_eth) { mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | (sqp->ud_header.lrh.service_level << 8)); if (ah->av.ib.port_pd & cpu_to_be32(0x80000000)) mlx->flags |= cpu_to_be32(0x1); /* force loopback */ mlx->rlid = sqp->ud_header.lrh.destination_lid; } switch (wr->wr.opcode) { case IB_WR_SEND: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; sqp->ud_header.immediate_present = 0; break; case IB_WR_SEND_WITH_IMM: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; sqp->ud_header.immediate_present = 1; sqp->ud_header.immediate_data = wr->wr.ex.imm_data; break; default: return -EINVAL; } if (is_eth) { struct in6_addr in6; u16 ether_type; u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; ether_type = (!is_udp) ? MLX4_IB_IBOE_ETHERTYPE : (ip_version == 4 ? ETHERTYPE_IP : ETHERTYPE_IPV6); mlx->sched_prio = cpu_to_be16(pcp); ether_addr_copy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac); memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2); memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4); memcpy(&in6, sgid.raw, sizeof(in6)); if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); if (!is_vlan) { sqp->ud_header.eth.type = cpu_to_be16(ether_type); } else { sqp->ud_header.vlan.type = cpu_to_be16(ether_type); sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); } } else { sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : sl_to_vl(to_mdev(ib_dev), sqp->ud_header.lrh.service_level, sqp->qp.port); if (sqp->qp.ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15) return -EINVAL; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; } sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); else ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ? sqp->qkey : wr->remote_qkey); sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); if (0) { pr_err("built UD header of size %d:\n", header_size); for (i = 0; i < header_size / 4; ++i) { if (i % 8 == 0) pr_err(" [%02x] ", i * 4); pr_cont(" %08x", be32_to_cpu(((__be32 *) sqp->header_buf)[i])); if ((i + 1) % 8 == 0) pr_cont("\n"); } pr_err("\n"); } /* * Inline data segments may not cross a 64 byte boundary. If * our UD header is bigger than the space available up to the * next 64 byte boundary in the WQE, use two inline data * segments to hold the UD header. */ spc = MLX4_INLINE_ALIGN - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); if (header_size <= spc) { - inl->byte_count = cpu_to_be32(1 << 31 | header_size); + inl->byte_count = cpu_to_be32(1U << 31 | header_size); memcpy(inl + 1, sqp->header_buf, header_size); i = 1; } else { - inl->byte_count = cpu_to_be32(1 << 31 | spc); + inl->byte_count = cpu_to_be32(1U << 31 | spc); memcpy(inl + 1, sqp->header_buf, spc); inl = (void *) (inl + 1) + spc; memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); /* * Need a barrier here to make sure all the data is * visible before the byte_count field is set. * Otherwise the HCA prefetcher could grab the 64-byte * chunk with this inline segment and get a valid (!= * 0xffffffff) byte count but stale data, and end up * generating a packet with bad headers. * * The first inline segment's byte_count field doesn't * need a barrier, because it comes after a * control/MLX segment and therefore is at an offset * of 16 mod 64. */ wmb(); - inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc)); + inl->byte_count = cpu_to_be32(1U << 31 | (header_size - spc)); i = 2; } *mlx_seg_len = ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); return 0; } static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq) { unsigned cur; struct mlx4_ib_cq *cq; cur = wq->head - wq->tail; if (likely(cur + nreq < wq->max_post)) return 0; cq = to_mcq(ib_cq); spin_lock(&cq->lock); cur = wq->head - wq->tail; spin_unlock(&cq->lock); return cur + nreq >= wq->max_post; } static __be32 convert_access(int acc) { return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) | (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) | (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) | cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); } static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_reg_wr *wr) { struct mlx4_ib_mr *mr = to_mmr(wr->mr); fseg->flags = convert_access(wr->access); fseg->mem_key = cpu_to_be32(wr->key); fseg->buf_list = cpu_to_be64(mr->page_map); fseg->start_addr = cpu_to_be64(mr->ibmr.iova); fseg->reg_len = cpu_to_be64(mr->ibmr.length); fseg->offset = 0; /* XXX -- is this just for ZBVA? */ fseg->page_size = cpu_to_be32(ilog2(mr->ibmr.page_size)); fseg->reserved[0] = 0; fseg->reserved[1] = 0; } static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) { memset(iseg, 0, sizeof(*iseg)); iseg->mem_key = cpu_to_be32(rkey); } static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, u64 remote_addr, u32 rkey) { rseg->raddr = cpu_to_be64(remote_addr); rseg->rkey = cpu_to_be32(rkey); rseg->reserved = 0; } static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_atomic_wr *wr) { if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { aseg->swap_add = cpu_to_be64(wr->swap); aseg->compare = cpu_to_be64(wr->compare_add); } else if (wr->wr.opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { aseg->swap_add = cpu_to_be64(wr->compare_add); aseg->compare = cpu_to_be64(wr->compare_add_mask); } else { aseg->swap_add = cpu_to_be64(wr->compare_add); aseg->compare = 0; } } static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, struct ib_atomic_wr *wr) { aseg->swap_add = cpu_to_be64(wr->swap); aseg->swap_add_mask = cpu_to_be64(wr->swap_mask); aseg->compare = cpu_to_be64(wr->compare_add); aseg->compare_mask = cpu_to_be64(wr->compare_add_mask); } static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, struct ib_ud_wr *wr) { memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av)); dseg->dqpn = cpu_to_be32(wr->remote_qpn); dseg->qkey = cpu_to_be32(wr->remote_qkey); dseg->vlan = to_mah(wr->ah)->av.eth.vlan; memcpy(dseg->mac, to_mah(wr->ah)->av.eth.mac, 6); } static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, struct mlx4_wqe_datagram_seg *dseg, struct ib_ud_wr *wr, enum mlx4_ib_qp_type qpt) { union mlx4_ext_av *av = &to_mah(wr->ah)->av; struct mlx4_av sqp_av = {0}; int port = *((u8 *) &av->ib.port_pd) & 0x3; /* force loopback */ sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000); sqp_av.g_slid = av->ib.g_slid & 0x7f; /* no GRH */ sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel & cpu_to_be32(0xf0000000); memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av)); if (qpt == MLX4_IB_QPT_PROXY_GSI) dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]); else dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]); /* Use QKEY from the QP context, which is set by master */ dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); } static void build_tunnel_header(struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct mlx4_wqe_inline_seg *inl = wqe; struct mlx4_ib_tunnel_header hdr; struct mlx4_ib_ah *ah = to_mah(wr->ah); int spc; int i; memcpy(&hdr.av, &ah->av, sizeof hdr.av); hdr.remote_qpn = cpu_to_be32(wr->remote_qpn); hdr.pkey_index = cpu_to_be16(wr->pkey_index); hdr.qkey = cpu_to_be32(wr->remote_qkey); memcpy(hdr.mac, ah->av.eth.mac, 6); hdr.vlan = ah->av.eth.vlan; spc = MLX4_INLINE_ALIGN - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); if (sizeof (hdr) <= spc) { memcpy(inl + 1, &hdr, sizeof (hdr)); wmb(); inl->byte_count = cpu_to_be32((1U << 31) | (u32)sizeof(hdr)); i = 1; } else { memcpy(inl + 1, &hdr, spc); wmb(); inl->byte_count = cpu_to_be32((1U << 31) | spc); inl = (void *) (inl + 1) + spc; memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc); wmb(); inl->byte_count = cpu_to_be32((1U << 31) | (u32)(sizeof (hdr) - spc)); i = 2; } *mlx_seg_len = ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16); } static void set_mlx_icrc_seg(void *dseg) { u32 *t = dseg; struct mlx4_wqe_inline_seg *iseg = dseg; t[1] = 0; /* * Need a barrier here before writing the byte_count field to * make sure that all the data is visible before the * byte_count field is set. Otherwise, if the segment begins * a new cacheline, the HCA prefetcher could grab the 64-byte * chunk and get a valid (!= * 0xffffffff) byte count but * stale data, and end up sending the wrong data. */ wmb(); iseg->byte_count = cpu_to_be32((1U << 31) | 4); } static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) { dseg->lkey = cpu_to_be32(sg->lkey); dseg->addr = cpu_to_be64(sg->addr); /* * Need a barrier here before writing the byte_count field to * make sure that all the data is visible before the * byte_count field is set. Otherwise, if the segment begins * a new cacheline, the HCA prefetcher could grab the 64-byte * chunk and get a valid (!= * 0xffffffff) byte count but * stale data, and end up sending the wrong data. */ wmb(); dseg->byte_count = cpu_to_be32(sg->length); } static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) { dseg->byte_count = cpu_to_be32(sg->length); dseg->lkey = cpu_to_be32(sg->lkey); dseg->addr = cpu_to_be64(sg->addr); } static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr, struct mlx4_ib_qp *qp, unsigned *lso_seg_len, __be32 *lso_hdr_sz, __be32 *blh) { unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16); if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE)) *blh = cpu_to_be32(1 << 6); if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) && wr->wr.num_sge > qp->sq.max_gs - (halign >> 4))) return -EINVAL; memcpy(wqe->header, wr->header, wr->hlen); *lso_hdr_sz = cpu_to_be32(wr->mss << 16 | wr->hlen); *lso_seg_len = halign; return 0; } static __be32 send_ieth(struct ib_send_wr *wr) { switch (wr->opcode) { case IB_WR_SEND_WITH_IMM: case IB_WR_RDMA_WRITE_WITH_IMM: return wr->ex.imm_data; case IB_WR_SEND_WITH_INV: return cpu_to_be32(wr->ex.invalidate_rkey); default: return 0; } } static void add_zero_len_inline(void *wqe) { struct mlx4_wqe_inline_seg *inl = wqe; memset(wqe, 0, 16); inl->byte_count = cpu_to_be32(1U << 31); } int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { struct mlx4_ib_qp *qp = to_mqp(ibqp); void *wqe; struct mlx4_wqe_ctrl_seg *ctrl; struct mlx4_wqe_data_seg *dseg; unsigned long flags; int nreq; int err = 0; unsigned ind; int uninitialized_var(stamp); int uninitialized_var(size); unsigned uninitialized_var(seglen); __be32 dummy; __be32 *lso_wqe; __be32 lso_hdr_sz = 0; __be32 blh; int i; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { struct mlx4_ib_sqp *sqp = to_msqp(qp); if (sqp->roce_v2_gsi) { struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah); struct ib_gid_attr gid_attr; union ib_gid gid; if (!ib_get_cached_gid(ibqp->device, be32_to_cpu(ah->av.ib.port_pd) >> 24, ah->av.ib.gid_index, &gid, &gid_attr)) { if (gid_attr.ndev) dev_put(gid_attr.ndev); qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? to_mqp(sqp->roce_v2_gsi) : qp; } else { pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n", ah->av.ib.gid_index); } } } spin_lock_irqsave(&qp->sq.lock, flags); if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { err = -EIO; *bad_wr = wr; nreq = 0; goto out; } ind = qp->sq_next_wqe; for (nreq = 0; wr; ++nreq, wr = wr->next) { lso_wqe = &dummy; blh = 0; if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { err = -ENOMEM; *bad_wr = wr; goto out; } if (unlikely(wr->num_sge > qp->sq.max_gs)) { err = -EINVAL; *bad_wr = wr; goto out; } ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id; ctrl->srcrb_flags = (wr->send_flags & IB_SEND_SIGNALED ? cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) | (wr->send_flags & IB_SEND_SOLICITED ? cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) | ((wr->send_flags & IB_SEND_IP_CSUM) ? cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM | MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) | qp->sq_signal_bits; ctrl->imm = send_ieth(wr); wqe += sizeof *ctrl; size = sizeof *ctrl / 16; switch (qp->mlx4_ib_qp_type) { case MLX4_IB_QPT_RC: case MLX4_IB_QPT_UC: switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, atomic_wr(wr)->rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); set_atomic_seg(wqe, atomic_wr(wr)); wqe += sizeof (struct mlx4_wqe_atomic_seg); size += (sizeof (struct mlx4_wqe_raddr_seg) + sizeof (struct mlx4_wqe_atomic_seg)) / 16; break; case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, atomic_wr(wr)->rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); set_masked_atomic_seg(wqe, atomic_wr(wr)); wqe += sizeof (struct mlx4_wqe_masked_atomic_seg); size += (sizeof (struct mlx4_wqe_raddr_seg) + sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16; break; case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, rdma_wr(wr)->rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); size += sizeof (struct mlx4_wqe_raddr_seg) / 16; break; case IB_WR_LOCAL_INV: ctrl->srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); set_local_inv_seg(wqe, wr->ex.invalidate_rkey); wqe += sizeof (struct mlx4_wqe_local_inval_seg); size += sizeof (struct mlx4_wqe_local_inval_seg) / 16; break; case IB_WR_REG_MR: ctrl->srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); set_reg_seg(wqe, reg_wr(wr)); wqe += sizeof(struct mlx4_wqe_fmr_seg); size += sizeof(struct mlx4_wqe_fmr_seg) / 16; break; default: /* No extra segments required for sends */ break; } break; case MLX4_IB_QPT_TUN_SMI_OWNER: err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr), ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; } wqe += seglen; size += seglen / 16; break; case MLX4_IB_QPT_TUN_SMI: case MLX4_IB_QPT_TUN_GSI: /* this is a UD qp used in MAD responses to slaves. */ set_datagram_seg(wqe, ud_wr(wr)); /* set the forced-loopback bit in the data seg av */ *(__be32 *) wqe |= cpu_to_be32(0x80000000); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; break; case MLX4_IB_QPT_UD: set_datagram_seg(wqe, ud_wr(wr)); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; if (wr->opcode == IB_WR_LSO) { err = build_lso_seg(wqe, ud_wr(wr), qp, &seglen, &lso_hdr_sz, &blh); if (unlikely(err)) { *bad_wr = wr; goto out; } lso_wqe = (__be32 *) wqe; wqe += seglen; size += seglen / 16; } break; case MLX4_IB_QPT_PROXY_SMI_OWNER: err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr), ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; } wqe += seglen; size += seglen / 16; /* to start tunnel header on a cache-line boundary */ add_zero_len_inline(wqe); wqe += 16; size++; build_tunnel_header(ud_wr(wr), wqe, &seglen); wqe += seglen; size += seglen / 16; break; case MLX4_IB_QPT_PROXY_SMI: case MLX4_IB_QPT_PROXY_GSI: /* If we are tunneling special qps, this is a UD qp. * In this case we first add a UD segment targeting * the tunnel qp, and then add a header with address * information */ set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, ud_wr(wr), qp->mlx4_ib_qp_type); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; build_tunnel_header(ud_wr(wr), wqe, &seglen); wqe += seglen; size += seglen / 16; break; case MLX4_IB_QPT_SMI: case MLX4_IB_QPT_GSI: err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; } wqe += seglen; size += seglen / 16; break; default: break; } /* * Write data segments in reverse order, so as to * overwrite cacheline stamp last within each * cacheline. This avoids issues with WQE * prefetching. */ dseg = wqe; dseg += wr->num_sge - 1; size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16); /* Add one more inline data segment for ICRC for MLX sends */ if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI || qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI || qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) { set_mlx_icrc_seg(dseg + 1); size += sizeof (struct mlx4_wqe_data_seg) / 16; } for (i = wr->num_sge - 1; i >= 0; --i, --dseg) set_data_seg(dseg, wr->sg_list + i); /* * Possibly overwrite stamping in cacheline with LSO * segment only after making sure all data segments * are written. */ wmb(); *lso_wqe = lso_hdr_sz; ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? MLX4_WQE_CTRL_FENCE : 0) | size; /* * Make sure descriptor is fully written before * setting ownership bit (because HW can start * executing as soon as we do). */ wmb(); if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) { *bad_wr = wr; err = -EINVAL; goto out; } ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | - (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh; + (ind & qp->sq.wqe_cnt ? cpu_to_be32(1U << 31) : 0) | blh; stamp = ind + qp->sq_spare_wqes; ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); /* * We can improve latency by not stamping the last * send queue WQE until after ringing the doorbell, so * only stamp here if there are still more WQEs to post. * * Same optimization applies to padding with NOP wqe * in case of WQE shrinking (used to prevent wrap-around * in the middle of WR). */ if (wr->next) { stamp_send_wqe(qp, stamp, size * 16); ind = pad_wraparound(qp, ind); } } out: if (likely(nreq)) { qp->sq.head += nreq; /* * Make sure that descriptors are written before * doorbell record. */ wmb(); writel(qp->doorbell_qpn, to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL); /* * Make sure doorbells don't leak out of SQ spinlock * and reach the HCA out of order. */ mmiowb(); stamp_send_wqe(qp, stamp, size * 16); ind = pad_wraparound(qp, ind); qp->sq_next_wqe = ind; } spin_unlock_irqrestore(&qp->sq.lock, flags); return err; } int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { struct mlx4_ib_qp *qp = to_mqp(ibqp); struct mlx4_wqe_data_seg *scat; unsigned long flags; int err = 0; int nreq; int ind; int max_gs; int i; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); max_gs = qp->rq.max_gs; spin_lock_irqsave(&qp->rq.lock, flags); if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { err = -EIO; *bad_wr = wr; nreq = 0; goto out; } ind = qp->rq.head & (qp->rq.wqe_cnt - 1); for (nreq = 0; wr; ++nreq, wr = wr->next) { if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { err = -ENOMEM; *bad_wr = wr; goto out; } if (unlikely(wr->num_sge > qp->rq.max_gs)) { err = -EINVAL; *bad_wr = wr; goto out; } scat = get_recv_wqe(qp, ind); if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { ib_dma_sync_single_for_device(ibqp->device, qp->sqp_proxy_rcv[ind].map, sizeof (struct mlx4_ib_proxy_sqp_hdr), DMA_FROM_DEVICE); scat->byte_count = cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr)); /* use dma lkey from upper layer entry */ scat->lkey = cpu_to_be32(wr->sg_list->lkey); scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map); scat++; max_gs--; } for (i = 0; i < wr->num_sge; ++i) __set_data_seg(scat + i, wr->sg_list + i); if (i < max_gs) { scat[i].byte_count = 0; scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); scat[i].addr = 0; } qp->rq.wrid[ind] = wr->wr_id; ind = (ind + 1) & (qp->rq.wqe_cnt - 1); } out: if (likely(nreq)) { qp->rq.head += nreq; /* * Make sure that descriptors are written before * doorbell record. */ wmb(); *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff); } spin_unlock_irqrestore(&qp->rq.lock, flags); return err; } static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state) { switch (mlx4_state) { case MLX4_QP_STATE_RST: return IB_QPS_RESET; case MLX4_QP_STATE_INIT: return IB_QPS_INIT; case MLX4_QP_STATE_RTR: return IB_QPS_RTR; case MLX4_QP_STATE_RTS: return IB_QPS_RTS; case MLX4_QP_STATE_SQ_DRAINING: case MLX4_QP_STATE_SQD: return IB_QPS_SQD; case MLX4_QP_STATE_SQER: return IB_QPS_SQE; case MLX4_QP_STATE_ERR: return IB_QPS_ERR; default: return -1; } } static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state) { switch (mlx4_mig_state) { case MLX4_QP_PM_ARMED: return IB_MIG_ARMED; case MLX4_QP_PM_REARM: return IB_MIG_REARM; case MLX4_QP_PM_MIGRATED: return IB_MIG_MIGRATED; default: return -1; } } static int to_ib_qp_access_flags(int mlx4_flags) { int ib_flags = 0; if (mlx4_flags & MLX4_QP_BIT_RRE) ib_flags |= IB_ACCESS_REMOTE_READ; if (mlx4_flags & MLX4_QP_BIT_RWE) ib_flags |= IB_ACCESS_REMOTE_WRITE; if (mlx4_flags & MLX4_QP_BIT_RAE) ib_flags |= IB_ACCESS_REMOTE_ATOMIC; return ib_flags; } static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr, struct mlx4_qp_path *path) { struct mlx4_dev *dev = ibdev->dev; int is_eth; memset(ib_ah_attr, 0, sizeof *ib_ah_attr); ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) return; is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) == IB_LINK_LAYER_ETHERNET; if (is_eth) ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) | ((path->sched_queue & 4) << 1); else ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; ib_ah_attr->dlid = be16_to_cpu(path->rlid); ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f; ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0; ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0; if (ib_ah_attr->ah_flags) { ib_ah_attr->grh.sgid_index = path->mgid_index; ib_ah_attr->grh.hop_limit = path->hop_limit; ib_ah_attr->grh.traffic_class = (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff; ib_ah_attr->grh.flow_label = be32_to_cpu(path->tclass_flowlabel) & 0xfffff; memcpy(ib_ah_attr->grh.dgid.raw, path->rgid, sizeof ib_ah_attr->grh.dgid.raw); } } int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { struct mlx4_ib_dev *dev = to_mdev(ibqp->device); struct mlx4_ib_qp *qp = to_mqp(ibqp); struct mlx4_qp_context context; int mlx4_state; int err = 0; mutex_lock(&qp->mutex); if (qp->state == IB_QPS_RESET) { qp_attr->qp_state = IB_QPS_RESET; goto done; } err = mlx4_qp_query(dev->dev, &qp->mqp, &context); if (err) { err = -EINVAL; goto out; } mlx4_state = be32_to_cpu(context.flags) >> 28; qp->state = to_ib_qp_state(mlx4_state); qp_attr->qp_state = qp->state; qp_attr->path_mtu = context.mtu_msgmax >> 5; qp_attr->path_mig_state = to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3); qp_attr->qkey = be32_to_cpu(context.qkey); qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff; qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff; qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff; qp_attr->qp_access_flags = to_ib_qp_access_flags(be32_to_cpu(context.params2)); if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path); to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path); qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; } qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f; if (qp_attr->qp_state == IB_QPS_INIT) qp_attr->port_num = qp->port; else qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING; qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7); qp_attr->max_dest_rd_atomic = 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7); qp_attr->min_rnr_timer = (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f; qp_attr->timeout = context.pri_path.ackto >> 3; qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7; qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7; qp_attr->alt_timeout = context.alt_path.ackto >> 3; done: qp_attr->cur_qp_state = qp_attr->qp_state; qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; qp_attr->cap.max_recv_sge = qp->rq.max_gs; if (!ibqp->uobject) { qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; qp_attr->cap.max_send_sge = qp->sq.max_gs; } else { qp_attr->cap.max_send_wr = 0; qp_attr->cap.max_send_sge = 0; } /* * We don't support inline sends for kernel QPs (yet), and we * don't know what userspace's value should be. */ qp_attr->cap.max_inline_data = 0; qp_init_attr->cap = qp_attr->cap; qp_init_attr->create_flags = 0; if (qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; if (qp->flags & MLX4_IB_QP_LSO) qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; if (qp->flags & MLX4_IB_QP_NETIF) qp_init_attr->create_flags |= IB_QP_CREATE_NETIF_QP; qp_init_attr->sq_sig_type = qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; out: mutex_unlock(&qp->mutex); return err; } Index: head/sys/dev/mlx4/qp.h =================================================================== --- head/sys/dev/mlx4/qp.h (revision 327863) +++ head/sys/dev/mlx4/qp.h (revision 327864) @@ -1,509 +1,509 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef MLX4_QP_H #define MLX4_QP_H #include #include #define MLX4_INVALID_LKEY 0x100 #define DS_SIZE_ALIGNMENT 16 #define SET_BYTE_COUNT(byte_count) cpu_to_be32(byte_count) #define SET_LSO_MSS(mss_hdr_size) cpu_to_be32(mss_hdr_size) #define DS_BYTE_COUNT_MASK cpu_to_be32(0x7fffffff) enum mlx4_qp_optpar { MLX4_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, MLX4_QP_OPTPAR_RRE = 1 << 1, MLX4_QP_OPTPAR_RAE = 1 << 2, MLX4_QP_OPTPAR_RWE = 1 << 3, MLX4_QP_OPTPAR_PKEY_INDEX = 1 << 4, MLX4_QP_OPTPAR_Q_KEY = 1 << 5, MLX4_QP_OPTPAR_RNR_TIMEOUT = 1 << 6, MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7, MLX4_QP_OPTPAR_SRA_MAX = 1 << 8, MLX4_QP_OPTPAR_RRA_MAX = 1 << 9, MLX4_QP_OPTPAR_PM_STATE = 1 << 10, MLX4_QP_OPTPAR_RETRY_COUNT = 1 << 12, MLX4_QP_OPTPAR_RNR_RETRY = 1 << 13, MLX4_QP_OPTPAR_ACK_TIMEOUT = 1 << 14, MLX4_QP_OPTPAR_SCHED_QUEUE = 1 << 16, MLX4_QP_OPTPAR_COUNTER_INDEX = 1 << 20, MLX4_QP_OPTPAR_VLAN_STRIPPING = 1 << 21, }; enum mlx4_qp_state { MLX4_QP_STATE_RST = 0, MLX4_QP_STATE_INIT = 1, MLX4_QP_STATE_RTR = 2, MLX4_QP_STATE_RTS = 3, MLX4_QP_STATE_SQER = 4, MLX4_QP_STATE_SQD = 5, MLX4_QP_STATE_ERR = 6, MLX4_QP_STATE_SQ_DRAINING = 7, MLX4_QP_NUM_STATE }; enum { MLX4_QP_ST_RC = 0x0, MLX4_QP_ST_UC = 0x1, MLX4_QP_ST_RD = 0x2, MLX4_QP_ST_UD = 0x3, MLX4_QP_ST_XRC = 0x6, MLX4_QP_ST_MLX = 0x7 }; enum { MLX4_QP_PM_MIGRATED = 0x3, MLX4_QP_PM_ARMED = 0x0, MLX4_QP_PM_REARM = 0x1 }; enum { /* params1 */ MLX4_QP_BIT_SRE = 1 << 15, MLX4_QP_BIT_SWE = 1 << 14, MLX4_QP_BIT_SAE = 1 << 13, /* params2 */ MLX4_QP_BIT_RRE = 1 << 15, MLX4_QP_BIT_RWE = 1 << 14, MLX4_QP_BIT_RAE = 1 << 13, MLX4_QP_BIT_FPP = 1 << 3, MLX4_QP_BIT_RIC = 1 << 4, }; enum { MLX4_RSS_HASH_XOR = 0, MLX4_RSS_HASH_TOP = 1, MLX4_RSS_UDP_IPV6 = 1 << 0, MLX4_RSS_UDP_IPV4 = 1 << 1, MLX4_RSS_TCP_IPV6 = 1 << 2, MLX4_RSS_IPV6 = 1 << 3, MLX4_RSS_TCP_IPV4 = 1 << 4, MLX4_RSS_IPV4 = 1 << 5, MLX4_RSS_BY_OUTER_HEADERS = 0 << 6, MLX4_RSS_BY_INNER_HEADERS = 2 << 6, MLX4_RSS_BY_INNER_HEADERS_IPONLY = 3 << 6, /* offset of mlx4_rss_context within mlx4_qp_context.pri_path */ MLX4_RSS_OFFSET_IN_QPC_PRI_PATH = 0x24, /* offset of being RSS indirection QP within mlx4_qp_context.flags */ MLX4_RSS_QPC_FLAG_OFFSET = 13, }; #define MLX4_EN_RSS_KEY_SIZE 40 struct mlx4_rss_context { __be32 base_qpn; __be32 default_qpn; u16 reserved; u8 hash_fn; u8 flags; __be32 rss_key[MLX4_EN_RSS_KEY_SIZE / sizeof(__be32)]; __be32 base_qpn_udp; }; struct mlx4_qp_path { u8 fl; union { u8 vlan_control; u8 control; }; u8 disable_pkey_check; u8 pkey_index; u8 counter_index; u8 grh_mylmc; __be16 rlid; u8 ackto; u8 mgid_index; u8 static_rate; u8 hop_limit; __be32 tclass_flowlabel; u8 rgid[16]; u8 sched_queue; u8 vlan_index; u8 feup; u8 fvl_rx; u8 reserved4[2]; u8 dmac[ETH_ALEN]; }; enum { /* fl */ MLX4_FL_CV = 1 << 6, MLX4_FL_SV = 1 << 5, MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2, MLX4_FL_ETH_SRC_CHECK_MC_LB = 1 << 1, MLX4_FL_ETH_SRC_CHECK_UC_LB = 1 << 0, }; enum { /* control */ MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER = 1 << 7, }; enum { /* vlan_control */ MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED = 1 << 6, MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED = 1 << 5, /* 802.1p priority tag */ MLX4_VLAN_CTRL_ETH_TX_BLOCK_UNTAGGED = 1 << 4, MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED = 1 << 2, MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED = 1 << 1, /* 802.1p priority tag */ MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED = 1 << 0 }; enum { /* feup */ MLX4_FEUP_FORCE_ETH_UP = 1 << 6, /* force Eth UP */ MLX4_FSM_FORCE_ETH_SRC_MAC = 1 << 5, /* force Source MAC */ MLX4_FVL_FORCE_ETH_VLAN = 1 << 3 /* force Eth vlan */ }; enum { /* fvl_rx */ MLX4_FVL_RX_FORCE_ETH_VLAN = 1 << 0 /* enforce Eth rx vlan */ }; struct mlx4_qp_context { __be32 flags; __be32 pd; u8 mtu_msgmax; u8 rq_size_stride; u8 sq_size_stride; u8 rlkey_roce_mode; __be32 usr_page; __be32 local_qpn; __be32 remote_qpn; struct mlx4_qp_path pri_path; struct mlx4_qp_path alt_path; __be32 params1; u32 reserved1; __be32 next_send_psn; __be32 cqn_send; __be16 roce_entropy; __be16 reserved2[3]; __be32 last_acked_psn; __be32 ssn; __be32 params2; __be32 rnr_nextrecvpsn; __be32 xrcd; __be32 cqn_recv; __be64 db_rec_addr; __be32 qkey; __be32 srqn; __be32 msn; __be16 rq_wqe_counter; __be16 sq_wqe_counter; u32 reserved3; __be16 rate_limit_params; u8 reserved4; u8 qos_vport; __be32 param3; __be32 nummmcpeers_basemkey; u8 log_page_size; u8 reserved5[2]; u8 mtt_base_addr_h; __be32 mtt_base_addr_l; u32 reserved6[10]; }; struct mlx4_update_qp_context { __be64 qp_mask; __be64 primary_addr_path_mask; __be64 secondary_addr_path_mask; u64 reserved1; struct mlx4_qp_context qp_context; u64 reserved2[58]; }; enum { MLX4_UPD_QP_MASK_PM_STATE = 32, MLX4_UPD_QP_MASK_VSD = 33, MLX4_UPD_QP_MASK_QOS_VPP = 34, MLX4_UPD_QP_MASK_RATE_LIMIT = 35, }; enum { MLX4_UPD_QP_PATH_MASK_PKEY_INDEX = 0 + 32, MLX4_UPD_QP_PATH_MASK_FSM = 1 + 32, MLX4_UPD_QP_PATH_MASK_MAC_INDEX = 2 + 32, MLX4_UPD_QP_PATH_MASK_FVL = 3 + 32, MLX4_UPD_QP_PATH_MASK_CV = 4 + 32, MLX4_UPD_QP_PATH_MASK_VLAN_INDEX = 5 + 32, MLX4_UPD_QP_PATH_MASK_ETH_HIDE_CQE_VLAN = 6 + 32, MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_UNTAGGED = 7 + 32, MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_1P = 8 + 32, MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_TAGGED = 9 + 32, MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_UNTAGGED = 10 + 32, MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_1P = 11 + 32, MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_TAGGED = 12 + 32, MLX4_UPD_QP_PATH_MASK_FEUP = 13 + 32, MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE = 14 + 32, MLX4_UPD_QP_PATH_MASK_IF_COUNTER_INDEX = 15 + 32, MLX4_UPD_QP_PATH_MASK_FVL_RX = 16 + 32, MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB = 18 + 32, MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB = 19 + 32, MLX4_UPD_QP_PATH_MASK_SV = 22 + 32, }; enum { /* param3 */ MLX4_STRIP_VLAN = 1 << 30 }; /* Which firmware version adds support for NEC (NoErrorCompletion) bit */ #define MLX4_FW_VER_WQE_CTRL_NEC mlx4_fw_ver(2, 2, 232) enum { - MLX4_WQE_CTRL_OWN = 1 << 31, + MLX4_WQE_CTRL_OWN = 1U << 31, MLX4_WQE_CTRL_NEC = 1 << 29, MLX4_WQE_CTRL_RR = 1 << 6, MLX4_WQE_CTRL_IIP = 1 << 28, MLX4_WQE_CTRL_ILP = 1 << 27, MLX4_WQE_CTRL_FENCE = 1 << 6, MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2, MLX4_WQE_CTRL_SOLICITED = 1 << 1, MLX4_WQE_CTRL_IP_CSUM = 1 << 4, MLX4_WQE_CTRL_TCP_UDP_CSUM = 1 << 5, MLX4_WQE_CTRL_INS_CVLAN = 1 << 6, MLX4_WQE_CTRL_INS_SVLAN = 1 << 7, MLX4_WQE_CTRL_STRONG_ORDER = 1 << 7, MLX4_WQE_CTRL_FORCE_LOOPBACK = 1 << 0, }; struct mlx4_wqe_ctrl_seg { __be32 owner_opcode; union { struct { __be16 vlan_tag; u8 ins_vlan; u8 fence_size; }; __be32 bf_qpn; }; /* * High 24 bits are SRC remote buffer; low 8 bits are flags: * [7] SO (strong ordering) * [5] TCP/UDP checksum * [4] IP checksum * [3:2] C (generate completion queue entry) * [1] SE (solicited event) * [0] FL (force loopback) */ union { __be32 srcrb_flags; __be16 srcrb_flags16[2]; }; /* * imm is immediate data for send/RDMA write w/ immediate; * also invalidation key for send with invalidate; input * modifier for WQEs on CCQs. */ __be32 imm; }; enum { MLX4_WQE_MLX_VL15 = 1 << 17, MLX4_WQE_MLX_SLR = 1 << 16 }; struct mlx4_wqe_mlx_seg { u8 owner; u8 reserved1[2]; u8 opcode; __be16 sched_prio; u8 reserved2; u8 size; /* * [17] VL15 * [16] SLR * [15:12] static rate * [11:8] SL * [4] ICRC * [3:2] C * [0] FL (force loopback) */ __be32 flags; __be16 rlid; u16 reserved3; }; struct mlx4_wqe_datagram_seg { __be32 av[8]; __be32 dqpn; __be32 qkey; __be16 vlan; u8 mac[ETH_ALEN]; }; struct mlx4_wqe_lso_seg { __be32 mss_hdr_size; __be32 header[0]; }; enum mlx4_wqe_bind_seg_flags2 { MLX4_WQE_BIND_ZERO_BASED = (1 << 30), - MLX4_WQE_BIND_TYPE_2 = (1 << 31), + MLX4_WQE_BIND_TYPE_2 = (1U << 31), }; struct mlx4_wqe_bind_seg { __be32 flags1; __be32 flags2; __be32 new_rkey; __be32 lkey; __be64 addr; __be64 length; }; enum { MLX4_WQE_FMR_PERM_LOCAL_READ = 1 << 27, MLX4_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28, MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ = 1 << 29, MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE = 1 << 30, - MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC = 1 << 31 + MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC = 1U << 31 }; struct mlx4_wqe_fmr_seg { __be32 flags; __be32 mem_key; __be64 buf_list; __be64 start_addr; __be64 reg_len; __be32 offset; __be32 page_size; u32 reserved[2]; }; struct mlx4_wqe_fmr_ext_seg { u8 flags; u8 reserved; __be16 app_mask; __be16 wire_app_tag; __be16 mem_app_tag; __be32 wire_ref_tag_base; __be32 mem_ref_tag_base; }; struct mlx4_wqe_local_inval_seg { u64 reserved1; __be32 mem_key; u32 reserved2; u64 reserved3[2]; }; struct mlx4_wqe_raddr_seg { __be64 raddr; __be32 rkey; u32 reserved; }; struct mlx4_wqe_atomic_seg { __be64 swap_add; __be64 compare; }; struct mlx4_wqe_masked_atomic_seg { __be64 swap_add; __be64 compare; __be64 swap_add_mask; __be64 compare_mask; }; struct mlx4_wqe_data_seg { __be32 byte_count; __be32 lkey; __be64 addr; }; enum { MLX4_INLINE_ALIGN = 64, - MLX4_INLINE_SEG = 1 << 31, + MLX4_INLINE_SEG = 1U << 31, }; struct mlx4_wqe_inline_seg { __be32 byte_count; }; enum mlx4_update_qp_attr { MLX4_UPDATE_QP_SMAC = 1 << 0, MLX4_UPDATE_QP_VSD = 1 << 1, MLX4_UPDATE_QP_RATE_LIMIT = 1 << 2, MLX4_UPDATE_QP_QOS_VPORT = 1 << 3, MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB = 1 << 4, MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 5) - 1 }; enum mlx4_update_qp_params_flags { MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB = 1 << 0, MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE = 1 << 1, }; struct mlx4_update_qp_params { u8 smac_index; u8 qos_vport; u32 flags; u16 rate_unit; u16 rate_val; }; int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, enum mlx4_update_qp_attr attr, struct mlx4_update_qp_params *params); int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state, struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar, int sqd_event, struct mlx4_qp *qp); int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, struct mlx4_qp_context *context); int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_qp_context *context, struct mlx4_qp *qp, enum mlx4_qp_state *qp_state); static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) { return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1)); } void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp); static inline u16 folded_qp(u32 q) { u16 res; res = ((q & 0xff) ^ ((q & 0xff0000) >> 16)) | (q & 0xff00); return res; } u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn); #endif /* MLX4_QP_H */ Index: head/sys/dev/mlx5/device.h =================================================================== --- head/sys/dev/mlx5/device.h (revision 327863) +++ head/sys/dev/mlx5/device.h (revision 327864) @@ -1,1446 +1,1446 @@ /*- * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef MLX5_DEVICE_H #define MLX5_DEVICE_H #include #include #include #define FW_INIT_TIMEOUT_MILI 2000 #define FW_INIT_WAIT_MS 2 #if defined(__LITTLE_ENDIAN) #define MLX5_SET_HOST_ENDIANNESS 0 #elif defined(__BIG_ENDIAN) #define MLX5_SET_HOST_ENDIANNESS 0x80 #else #error Host endianness not defined #endif /* helper macros */ #define __mlx5_nullp(typ) ((struct mlx5_ifc_##typ##_bits *)0) #define __mlx5_bit_sz(typ, fld) sizeof(__mlx5_nullp(typ)->fld) #define __mlx5_bit_off(typ, fld) __offsetof(struct mlx5_ifc_##typ##_bits, fld) #define __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32) #define __mlx5_64_off(typ, fld) (__mlx5_bit_off(typ, fld) / 64) #define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - (__mlx5_bit_off(typ, fld) & 0x1f)) #define __mlx5_mask(typ, fld) ((u32)((1ull << __mlx5_bit_sz(typ, fld)) - 1)) #define __mlx5_dw_mask(typ, fld) (__mlx5_mask(typ, fld) << __mlx5_dw_bit_off(typ, fld)) #define __mlx5_st_sz_bits(typ) sizeof(struct mlx5_ifc_##typ##_bits) #define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8) #define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8) #define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32) #define MLX5_ST_SZ_QW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 64) #define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8) #define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32) #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8) #define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld)) /* insert a value to a struct */ #define MLX5_SET(typ, p, fld, v) do { \ BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32); \ BUILD_BUG_ON(__mlx5_bit_sz(typ, fld) > 32); \ *((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \ cpu_to_be32((be32_to_cpu(*((__be32 *)(p) + __mlx5_dw_off(typ, fld))) & \ (~__mlx5_dw_mask(typ, fld))) | (((v) & __mlx5_mask(typ, fld)) \ << __mlx5_dw_bit_off(typ, fld))); \ } while (0) #define MLX5_SET_TO_ONES(typ, p, fld) do { \ BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32); \ BUILD_BUG_ON(__mlx5_bit_sz(typ, fld) > 32); \ *((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \ cpu_to_be32((be32_to_cpu(*((__be32 *)(p) + __mlx5_dw_off(typ, fld))) & \ (~__mlx5_dw_mask(typ, fld))) | ((__mlx5_mask(typ, fld)) \ << __mlx5_dw_bit_off(typ, fld))); \ } while (0) #define MLX5_GET(typ, p, fld) ((be32_to_cpu(*((__be32 *)(p) +\ __mlx5_dw_off(typ, fld))) >> __mlx5_dw_bit_off(typ, fld)) & \ __mlx5_mask(typ, fld)) #define MLX5_GET_PR(typ, p, fld) ({ \ u32 ___t = MLX5_GET(typ, p, fld); \ pr_debug(#fld " = 0x%x\n", ___t); \ ___t; \ }) #define MLX5_SET64(typ, p, fld, v) do { \ BUILD_BUG_ON(__mlx5_bit_sz(typ, fld) != 64); \ BUILD_BUG_ON(__mlx5_bit_off(typ, fld) % 64); \ *((__be64 *)(p) + __mlx5_64_off(typ, fld)) = cpu_to_be64(v); \ } while (0) #define MLX5_GET64(typ, p, fld) be64_to_cpu(*((__be64 *)(p) + __mlx5_64_off(typ, fld))) #define MLX5_GET64_BE(typ, p, fld) (*((__be64 *)(p) +\ __mlx5_64_off(typ, fld))) #define MLX5_GET_BE(type_t, typ, p, fld) ({ \ type_t tmp; \ switch (sizeof(tmp)) { \ case sizeof(u8): \ tmp = (__force type_t)MLX5_GET(typ, p, fld); \ break; \ case sizeof(u16): \ tmp = (__force type_t)cpu_to_be16(MLX5_GET(typ, p, fld)); \ break; \ case sizeof(u32): \ tmp = (__force type_t)cpu_to_be32(MLX5_GET(typ, p, fld)); \ break; \ case sizeof(u64): \ tmp = (__force type_t)MLX5_GET64_BE(typ, p, fld); \ break; \ } \ tmp; \ }) #define MLX5_BY_PASS_NUM_REGULAR_PRIOS 8 #define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 8 #define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1 #define MLX5_BY_PASS_NUM_PRIOS (MLX5_BY_PASS_NUM_REGULAR_PRIOS +\ MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS +\ MLX5_BY_PASS_NUM_MULTICAST_PRIOS) enum { MLX5_MAX_COMMANDS = 32, MLX5_CMD_DATA_BLOCK_SIZE = 512, MLX5_CMD_MBOX_SIZE = 1024, MLX5_PCI_CMD_XPORT = 7, MLX5_MKEY_BSF_OCTO_SIZE = 4, MLX5_MAX_PSVS = 4, }; enum { MLX5_EXTENDED_UD_AV = 0x80000000, }; enum { MLX5_CQ_FLAGS_OI = 2, }; enum { MLX5_STAT_RATE_OFFSET = 5, }; enum { MLX5_INLINE_SEG = 0x80000000, }; enum { MLX5_HW_START_PADDING = MLX5_INLINE_SEG, }; enum { MLX5_MIN_PKEY_TABLE_SIZE = 128, MLX5_MAX_LOG_PKEY_TABLE = 5, }; enum { - MLX5_MKEY_INBOX_PG_ACCESS = 1 << 31 + MLX5_MKEY_INBOX_PG_ACCESS = 1U << 31 }; enum { MLX5_PERM_LOCAL_READ = 1 << 2, MLX5_PERM_LOCAL_WRITE = 1 << 3, MLX5_PERM_REMOTE_READ = 1 << 4, MLX5_PERM_REMOTE_WRITE = 1 << 5, MLX5_PERM_ATOMIC = 1 << 6, MLX5_PERM_UMR_EN = 1 << 7, }; enum { MLX5_PCIE_CTRL_SMALL_FENCE = 1 << 0, MLX5_PCIE_CTRL_RELAXED_ORDERING = 1 << 2, MLX5_PCIE_CTRL_NO_SNOOP = 1 << 3, MLX5_PCIE_CTRL_TLP_PROCE_EN = 1 << 6, MLX5_PCIE_CTRL_TPH_MASK = 3 << 4, }; enum { MLX5_MKEY_REMOTE_INVAL = 1 << 24, MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29, MLX5_MKEY_BSF_EN = 1 << 30, - MLX5_MKEY_LEN64 = 1 << 31, + MLX5_MKEY_LEN64 = 1U << 31, }; enum { MLX5_EN_RD = (u64)1, MLX5_EN_WR = (u64)2 }; enum { MLX5_BF_REGS_PER_PAGE = 4, MLX5_MAX_UAR_PAGES = 1 << 8, MLX5_NON_FP_BF_REGS_PER_PAGE = 2, MLX5_MAX_UUARS = MLX5_MAX_UAR_PAGES * MLX5_NON_FP_BF_REGS_PER_PAGE, }; enum { MLX5_MKEY_MASK_LEN = 1ull << 0, MLX5_MKEY_MASK_PAGE_SIZE = 1ull << 1, MLX5_MKEY_MASK_START_ADDR = 1ull << 6, MLX5_MKEY_MASK_PD = 1ull << 7, MLX5_MKEY_MASK_EN_RINVAL = 1ull << 8, MLX5_MKEY_MASK_EN_SIGERR = 1ull << 9, MLX5_MKEY_MASK_BSF_EN = 1ull << 12, MLX5_MKEY_MASK_KEY = 1ull << 13, MLX5_MKEY_MASK_QPN = 1ull << 14, MLX5_MKEY_MASK_LR = 1ull << 17, MLX5_MKEY_MASK_LW = 1ull << 18, MLX5_MKEY_MASK_RR = 1ull << 19, MLX5_MKEY_MASK_RW = 1ull << 20, MLX5_MKEY_MASK_A = 1ull << 21, MLX5_MKEY_MASK_SMALL_FENCE = 1ull << 23, MLX5_MKEY_MASK_FREE = 1ull << 29, }; enum { MLX5_UMR_TRANSLATION_OFFSET_EN = (1 << 4), MLX5_UMR_CHECK_NOT_FREE = (1 << 5), MLX5_UMR_CHECK_FREE = (2 << 5), MLX5_UMR_INLINE = (1 << 7), }; #define MLX5_UMR_MTT_ALIGNMENT 0x40 #define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1) #define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT enum { MLX5_EVENT_QUEUE_TYPE_QP = 0, MLX5_EVENT_QUEUE_TYPE_RQ = 1, MLX5_EVENT_QUEUE_TYPE_SQ = 2, }; enum { MLX5_PORT_CHANGE_SUBTYPE_DOWN = 1, MLX5_PORT_CHANGE_SUBTYPE_ACTIVE = 4, MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED = 5, MLX5_PORT_CHANGE_SUBTYPE_LID = 6, MLX5_PORT_CHANGE_SUBTYPE_PKEY = 7, MLX5_PORT_CHANGE_SUBTYPE_GUID = 8, MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG = 9, }; enum { MLX5_DCBX_EVENT_SUBTYPE_ERROR_STATE_DCBX = 1, MLX5_DCBX_EVENT_SUBTYPE_REMOTE_CONFIG_CHANGE, MLX5_DCBX_EVENT_SUBTYPE_LOCAL_OPER_CHANGE, MLX5_DCBX_EVENT_SUBTYPE_REMOTE_CONFIG_APP_PRIORITY_CHANGE, MLX5_MAX_INLINE_RECEIVE_SIZE = 64 }; enum { MLX5_DEV_CAP_FLAG_XRC = 1LL << 3, MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8, MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9, MLX5_DEV_CAP_FLAG_APM = 1LL << 17, MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD = 1LL << 21, MLX5_DEV_CAP_FLAG_BLOCK_MCAST = 1LL << 23, MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29, MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30, MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 33, MLX5_DEV_CAP_FLAG_ROCE = 1LL << 34, MLX5_DEV_CAP_FLAG_DCT = 1LL << 37, MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40, MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 3LL << 46, MLX5_DEV_CAP_FLAG_DRAIN_SIGERR = 1LL << 48, }; enum { MLX5_ROCE_VERSION_1 = 0, MLX5_ROCE_VERSION_1_5 = 1, MLX5_ROCE_VERSION_2 = 2, }; enum { MLX5_ROCE_VERSION_1_CAP = 1 << MLX5_ROCE_VERSION_1, MLX5_ROCE_VERSION_1_5_CAP = 1 << MLX5_ROCE_VERSION_1_5, MLX5_ROCE_VERSION_2_CAP = 1 << MLX5_ROCE_VERSION_2, }; enum { MLX5_ROCE_L3_TYPE_IPV4 = 0, MLX5_ROCE_L3_TYPE_IPV6 = 1, }; enum { MLX5_ROCE_L3_TYPE_IPV4_CAP = 1 << 1, MLX5_ROCE_L3_TYPE_IPV6_CAP = 1 << 2, }; enum { MLX5_OPCODE_NOP = 0x00, MLX5_OPCODE_SEND_INVAL = 0x01, MLX5_OPCODE_RDMA_WRITE = 0x08, MLX5_OPCODE_RDMA_WRITE_IMM = 0x09, MLX5_OPCODE_SEND = 0x0a, MLX5_OPCODE_SEND_IMM = 0x0b, MLX5_OPCODE_LSO = 0x0e, MLX5_OPCODE_RDMA_READ = 0x10, MLX5_OPCODE_ATOMIC_CS = 0x11, MLX5_OPCODE_ATOMIC_FA = 0x12, MLX5_OPCODE_ATOMIC_MASKED_CS = 0x14, MLX5_OPCODE_ATOMIC_MASKED_FA = 0x15, MLX5_OPCODE_BIND_MW = 0x18, MLX5_OPCODE_CONFIG_CMD = 0x1f, MLX5_RECV_OPCODE_RDMA_WRITE_IMM = 0x00, MLX5_RECV_OPCODE_SEND = 0x01, MLX5_RECV_OPCODE_SEND_IMM = 0x02, MLX5_RECV_OPCODE_SEND_INVAL = 0x03, MLX5_CQE_OPCODE_ERROR = 0x1e, MLX5_CQE_OPCODE_RESIZE = 0x16, MLX5_OPCODE_SET_PSV = 0x20, MLX5_OPCODE_GET_PSV = 0x21, MLX5_OPCODE_CHECK_PSV = 0x22, MLX5_OPCODE_RGET_PSV = 0x26, MLX5_OPCODE_RCHECK_PSV = 0x27, MLX5_OPCODE_UMR = 0x25, MLX5_OPCODE_SIGNATURE_CANCELED = (1 << 15), }; enum { MLX5_SET_PORT_RESET_QKEY = 0, MLX5_SET_PORT_GUID0 = 16, MLX5_SET_PORT_NODE_GUID = 17, MLX5_SET_PORT_SYS_GUID = 18, MLX5_SET_PORT_GID_TABLE = 19, MLX5_SET_PORT_PKEY_TABLE = 20, }; enum { MLX5_MAX_PAGE_SHIFT = 31 }; enum { MLX5_ADAPTER_PAGE_SHIFT = 12, MLX5_ADAPTER_PAGE_SIZE = 1 << MLX5_ADAPTER_PAGE_SHIFT, }; enum { MLX5_CAP_OFF_CMDIF_CSUM = 46, }; enum { /* * Max wqe size for rdma read is 512 bytes, so this * limits our max_sge_rd as the wqe needs to fit: * - ctrl segment (16 bytes) * - rdma segment (16 bytes) * - scatter elements (16 bytes each) */ MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16 }; struct mlx5_inbox_hdr { __be16 opcode; u8 rsvd[4]; __be16 opmod; }; struct mlx5_outbox_hdr { u8 status; u8 rsvd[3]; __be32 syndrome; }; struct mlx5_cmd_set_dc_cnak_mbox_in { struct mlx5_inbox_hdr hdr; u8 enable; u8 reserved[47]; __be64 pa; }; struct mlx5_cmd_set_dc_cnak_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd[8]; }; struct mlx5_cmd_layout { u8 type; u8 rsvd0[3]; __be32 inlen; __be64 in_ptr; __be32 in[4]; __be32 out[4]; __be64 out_ptr; __be32 outlen; u8 token; u8 sig; u8 rsvd1; u8 status_own; }; struct mlx5_health_buffer { __be32 assert_var[5]; __be32 rsvd0[3]; __be32 assert_exit_ptr; __be32 assert_callra; __be32 rsvd1[2]; __be32 fw_ver; __be32 hw_id; __be32 rsvd2; u8 irisc_index; u8 synd; __be16 ext_sync; }; struct mlx5_init_seg { __be32 fw_rev; __be32 cmdif_rev_fw_sub; __be32 rsvd0[2]; __be32 cmdq_addr_h; __be32 cmdq_addr_l_sz; __be32 cmd_dbell; __be32 rsvd1[120]; __be32 initializing; struct mlx5_health_buffer health; __be32 rsvd2[880]; __be32 internal_timer_h; __be32 internal_timer_l; __be32 rsvd3[2]; __be32 health_counter; __be32 rsvd4[1019]; __be64 ieee1588_clk; __be32 ieee1588_clk_type; __be32 clr_intx; }; struct mlx5_eqe_comp { __be32 reserved[6]; __be32 cqn; }; struct mlx5_eqe_qp_srq { __be32 reserved[6]; __be32 qp_srq_n; }; struct mlx5_eqe_cq_err { __be32 cqn; u8 reserved1[7]; u8 syndrome; }; struct mlx5_eqe_port_state { u8 reserved0[8]; u8 port; }; struct mlx5_eqe_gpio { __be32 reserved0[2]; __be64 gpio_event; }; struct mlx5_eqe_congestion { u8 type; u8 rsvd0; u8 congestion_level; }; struct mlx5_eqe_stall_vl { u8 rsvd0[3]; u8 port_vl; }; struct mlx5_eqe_cmd { __be32 vector; __be32 rsvd[6]; }; struct mlx5_eqe_page_req { u8 rsvd0[2]; __be16 func_id; __be32 num_pages; __be32 rsvd1[5]; }; struct mlx5_eqe_vport_change { u8 rsvd0[2]; __be16 vport_num; __be32 rsvd1[6]; }; #define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF #define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF enum { MLX5_MODULE_STATUS_PLUGGED = 0x1, MLX5_MODULE_STATUS_UNPLUGGED = 0x2, MLX5_MODULE_STATUS_ERROR = 0x3, }; enum { MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED = 0x0, MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE = 0x1, MLX5_MODULE_EVENT_ERROR_BUS_STUCK = 0x2, MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT = 0x3, MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4, MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER = 0x5, MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6, MLX5_MODULE_EVENT_ERROR_CABLE_IS_SHORTED = 0x7, }; struct mlx5_eqe_port_module_event { u8 rsvd0; u8 module; u8 rsvd1; u8 module_status; u8 rsvd2[2]; u8 error_type; }; struct mlx5_eqe_general_notification_event { u32 rq_user_index_delay_drop; u32 rsvd0[6]; }; union ev_data { __be32 raw[7]; struct mlx5_eqe_cmd cmd; struct mlx5_eqe_comp comp; struct mlx5_eqe_qp_srq qp_srq; struct mlx5_eqe_cq_err cq_err; struct mlx5_eqe_port_state port; struct mlx5_eqe_gpio gpio; struct mlx5_eqe_congestion cong; struct mlx5_eqe_stall_vl stall_vl; struct mlx5_eqe_page_req req_pages; struct mlx5_eqe_port_module_event port_module_event; struct mlx5_eqe_vport_change vport_change; struct mlx5_eqe_general_notification_event general_notifications; } __packed; struct mlx5_eqe { u8 rsvd0; u8 type; u8 rsvd1; u8 sub_type; __be32 rsvd2[7]; union ev_data data; __be16 rsvd3; u8 signature; u8 owner; } __packed; struct mlx5_cmd_prot_block { u8 data[MLX5_CMD_DATA_BLOCK_SIZE]; u8 rsvd0[48]; __be64 next; __be32 block_num; u8 rsvd1; u8 token; u8 ctrl_sig; u8 sig; }; #define MLX5_NUM_CMDS_IN_ADAPTER_PAGE \ (MLX5_ADAPTER_PAGE_SIZE / MLX5_CMD_MBOX_SIZE) CTASSERT(MLX5_CMD_MBOX_SIZE >= sizeof(struct mlx5_cmd_prot_block)); CTASSERT(MLX5_CMD_MBOX_SIZE <= MLX5_ADAPTER_PAGE_SIZE); enum { MLX5_CQE_SYND_FLUSHED_IN_ERROR = 5, }; struct mlx5_err_cqe { u8 rsvd0[32]; __be32 srqn; u8 rsvd1[18]; u8 vendor_err_synd; u8 syndrome; __be32 s_wqe_opcode_qpn; __be16 wqe_counter; u8 signature; u8 op_own; }; struct mlx5_cqe64 { u8 tunneled_etc; u8 rsvd0[3]; u8 lro_tcppsh_abort_dupack; u8 lro_min_ttl; __be16 lro_tcp_win; __be32 lro_ack_seq_num; __be32 rss_hash_result; u8 rss_hash_type; u8 ml_path; u8 rsvd20[2]; __be16 check_sum; __be16 slid; __be32 flags_rqpn; u8 hds_ip_ext; u8 l4_hdr_type_etc; __be16 vlan_info; __be32 srqn; /* [31:24]: lro_num_seg, [23:0]: srqn */ __be32 imm_inval_pkey; u8 rsvd40[4]; __be32 byte_cnt; __be64 timestamp; __be32 sop_drop_qpn; __be16 wqe_counter; u8 signature; u8 op_own; }; #define MLX5_CQE_TSTMP_PTP (1ULL << 63) static inline bool get_cqe_lro_timestamp_valid(struct mlx5_cqe64 *cqe) { return (cqe->lro_tcppsh_abort_dupack >> 7) & 1; } static inline bool get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) { return (cqe->lro_tcppsh_abort_dupack >> 6) & 1; } static inline u8 get_cqe_l4_hdr_type(struct mlx5_cqe64 *cqe) { return (cqe->l4_hdr_type_etc >> 4) & 0x7; } static inline u16 get_cqe_vlan(struct mlx5_cqe64 *cqe) { return be16_to_cpu(cqe->vlan_info) & 0xfff; } static inline void get_cqe_smac(struct mlx5_cqe64 *cqe, u8 *smac) { memcpy(smac, &cqe->rss_hash_type , 4); memcpy(smac + 4, &cqe->slid , 2); } static inline bool cqe_has_vlan(struct mlx5_cqe64 *cqe) { return cqe->l4_hdr_type_etc & 0x1; } static inline bool cqe_is_tunneled(struct mlx5_cqe64 *cqe) { return cqe->tunneled_etc & 0x1; } enum { CQE_L4_HDR_TYPE_NONE = 0x0, CQE_L4_HDR_TYPE_TCP_NO_ACK = 0x1, CQE_L4_HDR_TYPE_UDP = 0x2, CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA = 0x3, CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA = 0x4, }; enum { /* source L3 hash types */ CQE_RSS_SRC_HTYPE_IP = 0x3 << 0, CQE_RSS_SRC_HTYPE_IPV4 = 0x1 << 0, CQE_RSS_SRC_HTYPE_IPV6 = 0x2 << 0, /* destination L3 hash types */ CQE_RSS_DST_HTYPE_IP = 0x3 << 2, CQE_RSS_DST_HTYPE_IPV4 = 0x1 << 2, CQE_RSS_DST_HTYPE_IPV6 = 0x2 << 2, /* source L4 hash types */ CQE_RSS_SRC_HTYPE_L4 = 0x3 << 4, CQE_RSS_SRC_HTYPE_TCP = 0x1 << 4, CQE_RSS_SRC_HTYPE_UDP = 0x2 << 4, CQE_RSS_SRC_HTYPE_IPSEC = 0x3 << 4, /* destination L4 hash types */ CQE_RSS_DST_HTYPE_L4 = 0x3 << 6, CQE_RSS_DST_HTYPE_TCP = 0x1 << 6, CQE_RSS_DST_HTYPE_UDP = 0x2 << 6, CQE_RSS_DST_HTYPE_IPSEC = 0x3 << 6, }; enum { MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH = 0x0, MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6 = 0x1, MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4 = 0x2, }; enum { CQE_L2_OK = 1 << 0, CQE_L3_OK = 1 << 1, CQE_L4_OK = 1 << 2, }; struct mlx5_sig_err_cqe { u8 rsvd0[16]; __be32 expected_trans_sig; __be32 actual_trans_sig; __be32 expected_reftag; __be32 actual_reftag; __be16 syndrome; u8 rsvd22[2]; __be32 mkey; __be64 err_offset; u8 rsvd30[8]; __be32 qpn; u8 rsvd38[2]; u8 signature; u8 op_own; }; struct mlx5_wqe_srq_next_seg { u8 rsvd0[2]; __be16 next_wqe_index; u8 signature; u8 rsvd1[11]; }; union mlx5_ext_cqe { struct ib_grh grh; u8 inl[64]; }; struct mlx5_cqe128 { union mlx5_ext_cqe inl_grh; struct mlx5_cqe64 cqe64; }; struct mlx5_srq_ctx { u8 state_log_sz; u8 rsvd0[3]; __be32 flags_xrcd; __be32 pgoff_cqn; u8 rsvd1[4]; u8 log_pg_sz; u8 rsvd2[7]; __be32 pd; __be16 lwm; __be16 wqe_cnt; u8 rsvd3[8]; __be64 db_record; }; struct mlx5_create_srq_mbox_in { struct mlx5_inbox_hdr hdr; __be32 input_srqn; u8 rsvd0[4]; struct mlx5_srq_ctx ctx; u8 rsvd1[208]; __be64 pas[0]; }; struct mlx5_create_srq_mbox_out { struct mlx5_outbox_hdr hdr; __be32 srqn; u8 rsvd[4]; }; struct mlx5_destroy_srq_mbox_in { struct mlx5_inbox_hdr hdr; __be32 srqn; u8 rsvd[4]; }; struct mlx5_destroy_srq_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd[8]; }; struct mlx5_query_srq_mbox_in { struct mlx5_inbox_hdr hdr; __be32 srqn; u8 rsvd0[4]; }; struct mlx5_query_srq_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd0[8]; struct mlx5_srq_ctx ctx; u8 rsvd1[32]; __be64 pas[0]; }; struct mlx5_arm_srq_mbox_in { struct mlx5_inbox_hdr hdr; __be32 srqn; __be16 rsvd; __be16 lwm; }; struct mlx5_arm_srq_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd[8]; }; struct mlx5_cq_context { u8 status; u8 cqe_sz_flags; u8 st; u8 rsvd3; u8 rsvd4[6]; __be16 page_offset; __be32 log_sz_usr_page; __be16 cq_period; __be16 cq_max_count; __be16 rsvd20; __be16 c_eqn; u8 log_pg_sz; u8 rsvd25[7]; __be32 last_notified_index; __be32 solicit_producer_index; __be32 consumer_counter; __be32 producer_counter; u8 rsvd48[8]; __be64 db_record_addr; }; struct mlx5_create_cq_mbox_in { struct mlx5_inbox_hdr hdr; __be32 input_cqn; u8 rsvdx[4]; struct mlx5_cq_context ctx; u8 rsvd6[192]; __be64 pas[0]; }; struct mlx5_create_cq_mbox_out { struct mlx5_outbox_hdr hdr; __be32 cqn; u8 rsvd0[4]; }; struct mlx5_destroy_cq_mbox_in { struct mlx5_inbox_hdr hdr; __be32 cqn; u8 rsvd0[4]; }; struct mlx5_destroy_cq_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd0[8]; }; struct mlx5_query_cq_mbox_in { struct mlx5_inbox_hdr hdr; __be32 cqn; u8 rsvd0[4]; }; struct mlx5_query_cq_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd0[8]; struct mlx5_cq_context ctx; u8 rsvd6[16]; __be64 pas[0]; }; struct mlx5_modify_cq_mbox_in { struct mlx5_inbox_hdr hdr; __be32 cqn; __be32 field_select; struct mlx5_cq_context ctx; u8 rsvd[192]; __be64 pas[0]; }; struct mlx5_modify_cq_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd[8]; }; struct mlx5_eq_context { u8 status; u8 ec_oi; u8 st; u8 rsvd2[7]; __be16 page_pffset; __be32 log_sz_usr_page; u8 rsvd3[7]; u8 intr; u8 log_page_size; u8 rsvd4[15]; __be32 consumer_counter; __be32 produser_counter; u8 rsvd5[16]; }; struct mlx5_create_eq_mbox_in { struct mlx5_inbox_hdr hdr; u8 rsvd0[3]; u8 input_eqn; u8 rsvd1[4]; struct mlx5_eq_context ctx; u8 rsvd2[8]; __be64 events_mask; u8 rsvd3[176]; __be64 pas[0]; }; struct mlx5_create_eq_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd0[3]; u8 eq_number; u8 rsvd1[4]; }; struct mlx5_map_eq_mbox_in { struct mlx5_inbox_hdr hdr; __be64 mask; u8 mu; u8 rsvd0[2]; u8 eqn; u8 rsvd1[24]; }; struct mlx5_map_eq_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd[8]; }; struct mlx5_query_eq_mbox_in { struct mlx5_inbox_hdr hdr; u8 rsvd0[3]; u8 eqn; u8 rsvd1[4]; }; struct mlx5_query_eq_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd[8]; struct mlx5_eq_context ctx; }; enum { MLX5_MKEY_STATUS_FREE = 1 << 6, }; struct mlx5_mkey_seg { /* This is a two bit field occupying bits 31-30. * bit 31 is always 0, * bit 30 is zero for regular MRs and 1 (e.g free) for UMRs that do not have tanslation */ u8 status; u8 pcie_control; u8 flags; u8 version; __be32 qpn_mkey7_0; u8 rsvd1[4]; __be32 flags_pd; __be64 start_addr; __be64 len; __be32 bsfs_octo_size; u8 rsvd2[16]; __be32 xlt_oct_size; u8 rsvd3[3]; u8 log2_page_size; u8 rsvd4[4]; }; struct mlx5_query_special_ctxs_mbox_in { struct mlx5_inbox_hdr hdr; u8 rsvd[8]; }; struct mlx5_query_special_ctxs_mbox_out { struct mlx5_outbox_hdr hdr; __be32 dump_fill_mkey; __be32 reserved_lkey; }; struct mlx5_create_mkey_mbox_in { struct mlx5_inbox_hdr hdr; __be32 input_mkey_index; __be32 flags; struct mlx5_mkey_seg seg; u8 rsvd1[16]; __be32 xlat_oct_act_size; __be32 rsvd2; u8 rsvd3[168]; __be64 pas[0]; }; struct mlx5_create_mkey_mbox_out { struct mlx5_outbox_hdr hdr; __be32 mkey; u8 rsvd[4]; }; struct mlx5_query_mkey_mbox_in { struct mlx5_inbox_hdr hdr; __be32 mkey; }; struct mlx5_query_mkey_mbox_out { struct mlx5_outbox_hdr hdr; __be64 pas[0]; }; struct mlx5_modify_mkey_mbox_in { struct mlx5_inbox_hdr hdr; __be32 mkey; __be64 pas[0]; }; struct mlx5_modify_mkey_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd[8]; }; struct mlx5_dump_mkey_mbox_in { struct mlx5_inbox_hdr hdr; }; struct mlx5_dump_mkey_mbox_out { struct mlx5_outbox_hdr hdr; __be32 mkey; }; struct mlx5_mad_ifc_mbox_in { struct mlx5_inbox_hdr hdr; __be16 remote_lid; u8 rsvd0; u8 port; u8 rsvd1[4]; u8 data[256]; }; struct mlx5_mad_ifc_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd[8]; u8 data[256]; }; struct mlx5_access_reg_mbox_in { struct mlx5_inbox_hdr hdr; u8 rsvd0[2]; __be16 register_id; __be32 arg; __be32 data[0]; }; struct mlx5_access_reg_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd[8]; __be32 data[0]; }; #define MLX5_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) enum { MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO = 1 << 0 }; struct mlx5_allocate_psv_in { struct mlx5_inbox_hdr hdr; __be32 npsv_pd; __be32 rsvd_psv0; }; struct mlx5_allocate_psv_out { struct mlx5_outbox_hdr hdr; u8 rsvd[8]; __be32 psv_idx[4]; }; struct mlx5_destroy_psv_in { struct mlx5_inbox_hdr hdr; __be32 psv_number; u8 rsvd[4]; }; struct mlx5_destroy_psv_out { struct mlx5_outbox_hdr hdr; u8 rsvd[8]; }; static inline int mlx5_host_is_le(void) { #if defined(__LITTLE_ENDIAN) return 1; #elif defined(__BIG_ENDIAN) return 0; #else #error Host endianness not defined #endif } #define MLX5_CMD_OP_MAX 0x939 enum { VPORT_STATE_DOWN = 0x0, VPORT_STATE_UP = 0x1, }; enum { MLX5_L3_PROT_TYPE_IPV4 = 0, MLX5_L3_PROT_TYPE_IPV6 = 1, }; enum { MLX5_L4_PROT_TYPE_TCP = 0, MLX5_L4_PROT_TYPE_UDP = 1, }; enum { MLX5_HASH_FIELD_SEL_SRC_IP = 1 << 0, MLX5_HASH_FIELD_SEL_DST_IP = 1 << 1, MLX5_HASH_FIELD_SEL_L4_SPORT = 1 << 2, MLX5_HASH_FIELD_SEL_L4_DPORT = 1 << 3, MLX5_HASH_FIELD_SEL_IPSEC_SPI = 1 << 4, }; enum { MLX5_MATCH_OUTER_HEADERS = 1 << 0, MLX5_MATCH_MISC_PARAMETERS = 1 << 1, MLX5_MATCH_INNER_HEADERS = 1 << 2, }; enum { MLX5_FLOW_TABLE_TYPE_NIC_RCV = 0, MLX5_FLOW_TABLE_TYPE_EGRESS_ACL = 2, MLX5_FLOW_TABLE_TYPE_INGRESS_ACL = 3, MLX5_FLOW_TABLE_TYPE_ESWITCH = 4, MLX5_FLOW_TABLE_TYPE_SNIFFER_RX = 5, MLX5_FLOW_TABLE_TYPE_SNIFFER_TX = 6, MLX5_FLOW_TABLE_TYPE_NIC_RX_RDMA = 7, }; enum { MLX5_MODIFY_ESW_VPORT_CONTEXT_CVLAN_INSERT_NONE = 0, MLX5_MODIFY_ESW_VPORT_CONTEXT_CVLAN_INSERT_IF_NO_VLAN = 1, MLX5_MODIFY_ESW_VPORT_CONTEXT_CVLAN_INSERT_OVERWRITE = 2 }; enum { MLX5_MODIFY_ESW_VPORT_CONTEXT_FIELD_SELECT_SVLAN_STRIP = 1 << 0, MLX5_MODIFY_ESW_VPORT_CONTEXT_FIELD_SELECT_CVLAN_STRIP = 1 << 1, MLX5_MODIFY_ESW_VPORT_CONTEXT_FIELD_SELECT_SVLAN_INSERT = 1 << 2, MLX5_MODIFY_ESW_VPORT_CONTEXT_FIELD_SELECT_CVLAN_INSERT = 1 << 3 }; enum { MLX5_UC_ADDR_CHANGE = (1 << 0), MLX5_MC_ADDR_CHANGE = (1 << 1), MLX5_VLAN_CHANGE = (1 << 2), MLX5_PROMISC_CHANGE = (1 << 3), MLX5_MTU_CHANGE = (1 << 4), }; enum mlx5_list_type { MLX5_NIC_VPORT_LIST_TYPE_UC = 0x0, MLX5_NIC_VPORT_LIST_TYPE_MC = 0x1, MLX5_NIC_VPORT_LIST_TYPE_VLAN = 0x2, }; enum { MLX5_ESW_VPORT_ADMIN_STATE_DOWN = 0x0, MLX5_ESW_VPORT_ADMIN_STATE_UP = 0x1, MLX5_ESW_VPORT_ADMIN_STATE_AUTO = 0x2, }; /* MLX5 DEV CAPs */ /* TODO: EAT.ME */ enum mlx5_cap_mode { HCA_CAP_OPMOD_GET_MAX = 0, HCA_CAP_OPMOD_GET_CUR = 1, }; enum mlx5_cap_type { MLX5_CAP_GENERAL = 0, MLX5_CAP_ETHERNET_OFFLOADS, MLX5_CAP_ODP, MLX5_CAP_ATOMIC, MLX5_CAP_ROCE, MLX5_CAP_IPOIB_OFFLOADS, MLX5_CAP_EOIB_OFFLOADS, MLX5_CAP_FLOW_TABLE, MLX5_CAP_ESWITCH_FLOW_TABLE, MLX5_CAP_ESWITCH, MLX5_CAP_SNAPSHOT, MLX5_CAP_VECTOR_CALC, MLX5_CAP_QOS, MLX5_CAP_DEBUG, /* NUM OF CAP Types */ MLX5_CAP_NUM }; /* GET Dev Caps macros */ #define MLX5_CAP_GEN(mdev, cap) \ MLX5_GET(cmd_hca_cap, mdev->hca_caps_cur[MLX5_CAP_GENERAL], cap) #define MLX5_CAP_GEN_MAX(mdev, cap) \ MLX5_GET(cmd_hca_cap, mdev->hca_caps_max[MLX5_CAP_GENERAL], cap) #define MLX5_CAP_ETH(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ mdev->hca_caps_cur[MLX5_CAP_ETHERNET_OFFLOADS], cap) #define MLX5_CAP_ETH_MAX(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ mdev->hca_caps_max[MLX5_CAP_ETHERNET_OFFLOADS], cap) #define MLX5_CAP_ROCE(mdev, cap) \ MLX5_GET(roce_cap, mdev->hca_caps_cur[MLX5_CAP_ROCE], cap) #define MLX5_CAP_ROCE_MAX(mdev, cap) \ MLX5_GET(roce_cap, mdev->hca_caps_max[MLX5_CAP_ROCE], cap) #define MLX5_CAP_ATOMIC(mdev, cap) \ MLX5_GET(atomic_caps, mdev->hca_caps_cur[MLX5_CAP_ATOMIC], cap) #define MLX5_CAP_ATOMIC_MAX(mdev, cap) \ MLX5_GET(atomic_caps, mdev->hca_caps_max[MLX5_CAP_ATOMIC], cap) #define MLX5_CAP_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_nic_cap, mdev->hca_caps_cur[MLX5_CAP_FLOW_TABLE], cap) #define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap) #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) #define MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->hca_caps_max[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) #define MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_nic_esw_fdb.cap) #define MLX5_CAP_ESW_FLOWTABLE_FDB_MAX(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_nic_esw_fdb.cap) #define MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_esw_acl_egress.cap) #define MLX5_CAP_ESW_EGRESS_ACL_MAX(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_egress.cap) #define MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_esw_acl_ingress.cap) #define MLX5_CAP_ESW_INGRESS_ACL_MAX(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_esw_acl_ingress.cap) #define MLX5_CAP_ESW(mdev, cap) \ MLX5_GET(e_switch_cap, \ mdev->hca_caps_cur[MLX5_CAP_ESWITCH], cap) #define MLX5_CAP_ESW_MAX(mdev, cap) \ MLX5_GET(e_switch_cap, \ mdev->hca_caps_max[MLX5_CAP_ESWITCH], cap) #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap) #define MLX5_CAP_ODP_MAX(mdev, cap)\ MLX5_GET(odp_cap, mdev->hca_caps_max[MLX5_CAP_ODP], cap) #define MLX5_CAP_SNAPSHOT(mdev, cap) \ MLX5_GET(snapshot_cap, \ mdev->hca_caps_cur[MLX5_CAP_SNAPSHOT], cap) #define MLX5_CAP_SNAPSHOT_MAX(mdev, cap) \ MLX5_GET(snapshot_cap, \ mdev->hca_caps_max[MLX5_CAP_SNAPSHOT], cap) #define MLX5_CAP_EOIB_OFFLOADS(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ mdev->hca_caps_cur[MLX5_CAP_EOIB_OFFLOADS], cap) #define MLX5_CAP_EOIB_OFFLOADS_MAX(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ mdev->hca_caps_max[MLX5_CAP_EOIB_OFFLOADS], cap) #define MLX5_CAP_DEBUG(mdev, cap) \ MLX5_GET(debug_cap, \ mdev->hca_caps_cur[MLX5_CAP_DEBUG], cap) #define MLX5_CAP_DEBUG_MAX(mdev, cap) \ MLX5_GET(debug_cap, \ mdev->hca_caps_max[MLX5_CAP_DEBUG], cap) #define MLX5_CAP_QOS(mdev, cap) \ MLX5_GET(qos_cap,\ mdev->hca_caps_cur[MLX5_CAP_QOS], cap) #define MLX5_CAP_QOS_MAX(mdev, cap) \ MLX5_GET(qos_cap,\ mdev->hca_caps_max[MLX5_CAP_QOS], cap) enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, MLX5_CMD_STAT_BAD_OP_ERR = 0x2, MLX5_CMD_STAT_BAD_PARAM_ERR = 0x3, MLX5_CMD_STAT_BAD_SYS_STATE_ERR = 0x4, MLX5_CMD_STAT_BAD_RES_ERR = 0x5, MLX5_CMD_STAT_RES_BUSY = 0x6, MLX5_CMD_STAT_LIM_ERR = 0x8, MLX5_CMD_STAT_BAD_RES_STATE_ERR = 0x9, MLX5_CMD_STAT_IX_ERR = 0xa, MLX5_CMD_STAT_NO_RES_ERR = 0xf, MLX5_CMD_STAT_BAD_INP_LEN_ERR = 0x50, MLX5_CMD_STAT_BAD_OUTP_LEN_ERR = 0x51, MLX5_CMD_STAT_BAD_QP_STATE_ERR = 0x10, MLX5_CMD_STAT_BAD_PKT_ERR = 0x30, MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40, }; enum { MLX5_IEEE_802_3_COUNTERS_GROUP = 0x0, MLX5_RFC_2863_COUNTERS_GROUP = 0x1, MLX5_RFC_2819_COUNTERS_GROUP = 0x2, MLX5_RFC_3635_COUNTERS_GROUP = 0x3, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP = 0x5, MLX5_ETHERNET_DISCARD_COUNTERS_GROUP = 0x6, MLX5_PER_PRIORITY_COUNTERS_GROUP = 0x10, MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP = 0x12, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP = 0x16, MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, }; enum { MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP = 0x0, MLX5_PCIE_LANE_COUNTERS_GROUP = 0x1, MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP = 0x2, }; enum { MLX5_NUM_UUARS_PER_PAGE = MLX5_NON_FP_BF_REGS_PER_PAGE, MLX5_DEF_TOT_UUARS = 8 * MLX5_NUM_UUARS_PER_PAGE, }; enum { NUM_DRIVER_UARS = 4, NUM_LOW_LAT_UUARS = 4, }; enum { MLX5_CAP_PORT_TYPE_IB = 0x0, MLX5_CAP_PORT_TYPE_ETH = 0x1, }; enum { MLX5_CMD_HCA_CAP_MIN_WQE_INLINE_MODE_L2 = 0x0, MLX5_CMD_HCA_CAP_MIN_WQE_INLINE_MODE_VPORT_CONFIG = 0x1, MLX5_CMD_HCA_CAP_MIN_WQE_INLINE_MODE_NOT_REQUIRED = 0x2 }; enum { MLX5_QUERY_VPORT_STATE_OUT_STATE_FOLLOW = 0x2, }; static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) { if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) return 0; return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; } struct mlx5_ifc_mcia_reg_bits { u8 l[0x1]; u8 reserved_0[0x7]; u8 module[0x8]; u8 reserved_1[0x8]; u8 status[0x8]; u8 i2c_device_address[0x8]; u8 page_number[0x8]; u8 device_address[0x10]; u8 reserved_2[0x10]; u8 size[0x10]; u8 reserved_3[0x20]; u8 dword_0[0x20]; u8 dword_1[0x20]; u8 dword_2[0x20]; u8 dword_3[0x20]; u8 dword_4[0x20]; u8 dword_5[0x20]; u8 dword_6[0x20]; u8 dword_7[0x20]; u8 dword_8[0x20]; u8 dword_9[0x20]; u8 dword_10[0x20]; u8 dword_11[0x20]; }; #define MLX5_CMD_OP_QUERY_EEPROM 0x93c struct mlx5_mini_cqe8 { union { __be32 rx_hash_result; __be16 checksum; __be16 rsvd; struct { __be16 wqe_counter; u8 s_wqe_opcode; u8 reserved; } s_wqe_info; }; __be32 byte_cnt; }; enum { MLX5_NO_INLINE_DATA, MLX5_INLINE_DATA32_SEG, MLX5_INLINE_DATA64_SEG, MLX5_COMPRESSED, }; enum mlx5_exp_cqe_zip_recv_type { MLX5_CQE_FORMAT_HASH, MLX5_CQE_FORMAT_CSUM, }; #define MLX5E_CQE_FORMAT_MASK 0xc static inline int mlx5_get_cqe_format(const struct mlx5_cqe64 *cqe) { return (cqe->op_own & MLX5E_CQE_FORMAT_MASK) >> 2; } enum { MLX5_GEN_EVENT_SUBTYPE_DELAY_DROP_TIMEOUT = 0x1, }; /* 8 regular priorities + 1 for multicast */ #define MLX5_NUM_BYPASS_FTS 9 #endif /* MLX5_DEVICE_H */ Index: head/sys/dev/mlx5/qp.h =================================================================== --- head/sys/dev/mlx5/qp.h (revision 327863) +++ head/sys/dev/mlx5/qp.h (revision 327864) @@ -1,784 +1,784 @@ /*- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef MLX5_QP_H #define MLX5_QP_H #include #define MLX5_INVALID_LKEY 0x100 #define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 5) #define MLX5_DIF_SIZE 8 #define MLX5_STRIDE_BLOCK_OP 0x400 #define MLX5_CPY_GRD_MASK 0xc0 #define MLX5_CPY_APP_MASK 0x30 #define MLX5_CPY_REF_MASK 0x0f #define MLX5_BSF_INC_REFTAG (1 << 6) #define MLX5_BSF_INL_VALID (1 << 15) #define MLX5_BSF_REFRESH_DIF (1 << 14) #define MLX5_BSF_REPEAT_BLOCK (1 << 7) #define MLX5_BSF_APPTAG_ESCAPE 0x1 #define MLX5_BSF_APPREF_ESCAPE 0x2 #define MLX5_WQE_DS_UNITS 16 enum mlx5_qp_optpar { MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, MLX5_QP_OPTPAR_RRE = 1 << 1, MLX5_QP_OPTPAR_RAE = 1 << 2, MLX5_QP_OPTPAR_RWE = 1 << 3, MLX5_QP_OPTPAR_PKEY_INDEX = 1 << 4, MLX5_QP_OPTPAR_Q_KEY = 1 << 5, MLX5_QP_OPTPAR_RNR_TIMEOUT = 1 << 6, MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7, MLX5_QP_OPTPAR_SRA_MAX = 1 << 8, MLX5_QP_OPTPAR_RRA_MAX = 1 << 9, MLX5_QP_OPTPAR_PM_STATE = 1 << 10, MLX5_QP_OPTPAR_RETRY_COUNT = 1 << 12, MLX5_QP_OPTPAR_RNR_RETRY = 1 << 13, MLX5_QP_OPTPAR_ACK_TIMEOUT = 1 << 14, MLX5_QP_OPTPAR_PRI_PORT = 1 << 16, MLX5_QP_OPTPAR_SRQN = 1 << 18, MLX5_QP_OPTPAR_CQN_RCV = 1 << 19, MLX5_QP_OPTPAR_DC_HS = 1 << 20, MLX5_QP_OPTPAR_DC_KEY = 1 << 21, }; enum mlx5_qp_state { MLX5_QP_STATE_RST = 0, MLX5_QP_STATE_INIT = 1, MLX5_QP_STATE_RTR = 2, MLX5_QP_STATE_RTS = 3, MLX5_QP_STATE_SQER = 4, MLX5_QP_STATE_SQD = 5, MLX5_QP_STATE_ERR = 6, MLX5_QP_STATE_SQ_DRAINING = 7, MLX5_QP_STATE_SUSPENDED = 9, MLX5_QP_NUM_STATE, MLX5_QP_STATE, MLX5_QP_STATE_BAD, }; enum { MLX5_SQ_STATE_NA = MLX5_SQC_STATE_ERR + 1, MLX5_SQ_NUM_STATE = MLX5_SQ_STATE_NA + 1, MLX5_RQ_STATE_NA = MLX5_RQC_STATE_ERR + 1, MLX5_RQ_NUM_STATE = MLX5_RQ_STATE_NA + 1, }; enum { MLX5_QP_ST_RC = 0x0, MLX5_QP_ST_UC = 0x1, MLX5_QP_ST_UD = 0x2, MLX5_QP_ST_XRC = 0x3, MLX5_QP_ST_MLX = 0x4, MLX5_QP_ST_DCI = 0x5, MLX5_QP_ST_DCT = 0x6, MLX5_QP_ST_QP0 = 0x7, MLX5_QP_ST_QP1 = 0x8, MLX5_QP_ST_RAW_ETHERTYPE = 0x9, MLX5_QP_ST_RAW_IPV6 = 0xa, MLX5_QP_ST_SNIFFER = 0xb, MLX5_QP_ST_SYNC_UMR = 0xe, MLX5_QP_ST_PTP_1588 = 0xd, MLX5_QP_ST_REG_UMR = 0xc, MLX5_QP_ST_SW_CNAK = 0x10, MLX5_QP_ST_MAX }; enum { MLX5_NON_ZERO_RQ = 0x0, MLX5_SRQ_RQ = 0x1, MLX5_CRQ_RQ = 0x2, MLX5_ZERO_LEN_RQ = 0x3 }; enum { /* params1 */ MLX5_QP_BIT_SRE = 1 << 15, MLX5_QP_BIT_SWE = 1 << 14, MLX5_QP_BIT_SAE = 1 << 13, /* params2 */ MLX5_QP_BIT_RRE = 1 << 15, MLX5_QP_BIT_RWE = 1 << 14, MLX5_QP_BIT_RAE = 1 << 13, MLX5_QP_BIT_RIC = 1 << 4, MLX5_QP_BIT_COLL_SYNC_RQ = 1 << 2, MLX5_QP_BIT_COLL_SYNC_SQ = 1 << 1, MLX5_QP_BIT_COLL_MASTER = 1 << 0 }; enum { MLX5_DCT_BIT_RRE = 1 << 19, MLX5_DCT_BIT_RWE = 1 << 18, MLX5_DCT_BIT_RAE = 1 << 17, }; enum { MLX5_WQE_CTRL_CQ_UPDATE = 2 << 2, MLX5_WQE_CTRL_CQ_UPDATE_AND_EQE = 3 << 2, MLX5_WQE_CTRL_SOLICITED = 1 << 1, }; enum { MLX5_SEND_WQE_DS = 16, MLX5_SEND_WQE_BB = 64, }; #define MLX5_SEND_WQEBB_NUM_DS (MLX5_SEND_WQE_BB / MLX5_SEND_WQE_DS) enum { MLX5_SEND_WQE_MAX_WQEBBS = 16, }; enum { MLX5_WQE_FMR_PERM_LOCAL_READ = 1 << 27, MLX5_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28, MLX5_WQE_FMR_PERM_REMOTE_READ = 1 << 29, MLX5_WQE_FMR_PERM_REMOTE_WRITE = 1 << 30, - MLX5_WQE_FMR_PERM_ATOMIC = 1 << 31 + MLX5_WQE_FMR_PERM_ATOMIC = 1U << 31 }; enum { MLX5_FENCE_MODE_NONE = 0 << 5, MLX5_FENCE_MODE_INITIATOR_SMALL = 1 << 5, MLX5_FENCE_MODE_FENCE = 2 << 5, MLX5_FENCE_MODE_STRONG_ORDERING = 3 << 5, MLX5_FENCE_MODE_SMALL_AND_FENCE = 4 << 5, }; enum { MLX5_QP_DRAIN_SIGERR = 1 << 26, MLX5_QP_LAT_SENSITIVE = 1 << 28, MLX5_QP_BLOCK_MCAST = 1 << 30, - MLX5_QP_ENABLE_SIG = 1 << 31, + MLX5_QP_ENABLE_SIG = 1U << 31, }; enum { MLX5_RCV_DBR = 0, MLX5_SND_DBR = 1, }; enum { MLX5_FLAGS_INLINE = 1<<7, MLX5_FLAGS_CHECK_FREE = 1<<5, }; struct mlx5_wqe_fmr_seg { __be32 flags; __be32 mem_key; __be64 buf_list; __be64 start_addr; __be64 reg_len; __be32 offset; __be32 page_size; u32 reserved[2]; }; struct mlx5_wqe_ctrl_seg { __be32 opmod_idx_opcode; __be32 qpn_ds; u8 signature; u8 rsvd[2]; u8 fm_ce_se; __be32 imm; }; #define MLX5_WQE_CTRL_DS_MASK 0x3f enum { MLX5_MLX_FLAG_MASK_VL15 = 0x40, MLX5_MLX_FLAG_MASK_SLR = 0x20, MLX5_MLX_FLAG_MASK_ICRC = 0x8, MLX5_MLX_FLAG_MASK_FL = 4 }; struct mlx5_mlx_seg { __be32 rsvd0; u8 flags; u8 stat_rate_sl; u8 rsvd1[8]; __be16 dlid; }; enum { MLX5_ETH_WQE_L3_INNER_CSUM = 1 << 4, MLX5_ETH_WQE_L4_INNER_CSUM = 1 << 5, MLX5_ETH_WQE_L3_CSUM = 1 << 6, MLX5_ETH_WQE_L4_CSUM = 1 << 7, }; enum { MLX5_ETH_WQE_SWP_INNER_L3_TYPE = 1 << 0, MLX5_ETH_WQE_SWP_INNER_L4_TYPE = 1 << 1, MLX5_ETH_WQE_SWP_OUTER_L3_TYPE = 1 << 4, MLX5_ETH_WQE_SWP_OUTER_L4_TYPE = 1 << 5, }; struct mlx5_wqe_eth_seg { u8 swp_outer_l4_offset; u8 swp_outer_l3_offset; u8 swp_inner_l4_offset; u8 swp_inner_l3_offset; u8 cs_flags; u8 swp_flags; __be16 mss; __be32 rsvd2; __be16 inline_hdr_sz; u8 inline_hdr_start[2]; }; struct mlx5_wqe_xrc_seg { __be32 xrc_srqn; u8 rsvd[12]; }; struct mlx5_wqe_masked_atomic_seg { __be64 swap_add; __be64 compare; __be64 swap_add_mask; __be64 compare_mask; }; struct mlx5_av { union { struct { __be32 qkey; __be32 reserved; } qkey; __be64 dc_key; } key; __be32 dqp_dct; u8 stat_rate_sl; u8 fl_mlid; union { __be16 rlid; __be16 udp_sport; }; u8 reserved0[4]; u8 rmac[6]; u8 tclass; u8 hop_limit; __be32 grh_gid_fl; u8 rgid[16]; }; struct mlx5_wqe_datagram_seg { struct mlx5_av av; }; struct mlx5_wqe_raddr_seg { __be64 raddr; __be32 rkey; u32 reserved; }; struct mlx5_wqe_atomic_seg { __be64 swap_add; __be64 compare; }; struct mlx5_wqe_data_seg { __be32 byte_count; __be32 lkey; __be64 addr; }; struct mlx5_wqe_umr_ctrl_seg { u8 flags; u8 rsvd0[3]; __be16 klm_octowords; __be16 bsf_octowords; __be64 mkey_mask; u8 rsvd1[32]; }; struct mlx5_seg_set_psv { __be32 psv_num; __be16 syndrome; __be16 status; __be32 transient_sig; __be32 ref_tag; }; struct mlx5_seg_get_psv { u8 rsvd[19]; u8 num_psv; __be32 l_key; __be64 va; __be32 psv_index[4]; }; struct mlx5_seg_check_psv { u8 rsvd0[2]; __be16 err_coalescing_op; u8 rsvd1[2]; __be16 xport_err_op; u8 rsvd2[2]; __be16 xport_err_mask; u8 rsvd3[7]; u8 num_psv; __be32 l_key; __be64 va; __be32 psv_index[4]; }; struct mlx5_rwqe_sig { u8 rsvd0[4]; u8 signature; u8 rsvd1[11]; }; struct mlx5_wqe_signature_seg { u8 rsvd0[4]; u8 signature; u8 rsvd1[11]; }; struct mlx5_wqe_inline_seg { __be32 byte_count; }; enum mlx5_sig_type { MLX5_DIF_CRC = 0x1, MLX5_DIF_IPCS = 0x2, }; struct mlx5_bsf_inl { __be16 vld_refresh; __be16 dif_apptag; __be32 dif_reftag; u8 sig_type; u8 rp_inv_seed; u8 rsvd[3]; u8 dif_inc_ref_guard_check; __be16 dif_app_bitmask_check; }; struct mlx5_bsf { struct mlx5_bsf_basic { u8 bsf_size_sbs; u8 check_byte_mask; union { u8 copy_byte_mask; u8 bs_selector; u8 rsvd_wflags; } wire; union { u8 bs_selector; u8 rsvd_mflags; } mem; __be32 raw_data_size; __be32 w_bfs_psv; __be32 m_bfs_psv; } basic; struct mlx5_bsf_ext { __be32 t_init_gen_pro_size; __be32 rsvd_epi_size; __be32 w_tfs_psv; __be32 m_tfs_psv; } ext; struct mlx5_bsf_inl w_inl; struct mlx5_bsf_inl m_inl; }; struct mlx5_klm { __be32 bcount; __be32 key; __be64 va; }; struct mlx5_stride_block_entry { __be16 stride; __be16 bcount; __be32 key; __be64 va; }; struct mlx5_stride_block_ctrl_seg { __be32 bcount_per_cycle; __be32 op; __be32 repeat_count; u16 rsvd; __be16 num_entries; }; enum mlx5_pagefault_flags { MLX5_PFAULT_REQUESTOR = 1 << 0, MLX5_PFAULT_WRITE = 1 << 1, MLX5_PFAULT_RDMA = 1 << 2, }; /* Contains the details of a pagefault. */ struct mlx5_pagefault { u32 bytes_committed; u8 event_subtype; enum mlx5_pagefault_flags flags; union { /* Initiator or send message responder pagefault details. */ struct { /* Received packet size, only valid for responders. */ u32 packet_size; /* * WQE index. Refers to either the send queue or * receive queue, according to event_subtype. */ u16 wqe_index; } wqe; /* RDMA responder pagefault details */ struct { u32 r_key; /* * Received packet size, minimal size page fault * resolution required for forward progress. */ u32 packet_size; u32 rdma_op_len; u64 rdma_va; } rdma; }; }; struct mlx5_core_qp { struct mlx5_core_rsc_common common; /* must be first */ void (*event) (struct mlx5_core_qp *, int); int qpn; struct mlx5_rsc_debug *dbg; int pid; }; struct mlx5_qp_path { u8 fl_free_ar; u8 rsvd3; __be16 pkey_index; u8 rsvd0; u8 grh_mlid; __be16 rlid; u8 ackto_lt; u8 mgid_index; u8 static_rate; u8 hop_limit; __be32 tclass_flowlabel; union { u8 rgid[16]; u8 rip[16]; }; u8 f_dscp_ecn_prio; u8 ecn_dscp; __be16 udp_sport; u8 dci_cfi_prio_sl; u8 port; u8 rmac[6]; }; struct mlx5_qp_context { __be32 flags; __be32 flags_pd; u8 mtu_msgmax; u8 rq_size_stride; __be16 sq_crq_size; __be32 qp_counter_set_usr_page; __be32 wire_qpn; __be32 log_pg_sz_remote_qpn; struct mlx5_qp_path pri_path; struct mlx5_qp_path alt_path; __be32 params1; u8 reserved2[4]; __be32 next_send_psn; __be32 cqn_send; __be32 deth_sqpn; u8 reserved3[4]; __be32 last_acked_psn; __be32 ssn; __be32 params2; __be32 rnr_nextrecvpsn; __be32 xrcd; __be32 cqn_recv; __be64 db_rec_addr; __be32 qkey; __be32 rq_type_srqn; __be32 rmsn; __be16 hw_sq_wqe_counter; __be16 sw_sq_wqe_counter; __be16 hw_rcyclic_byte_counter; __be16 hw_rq_counter; __be16 sw_rcyclic_byte_counter; __be16 sw_rq_counter; u8 rsvd0[5]; u8 cgs; u8 cs_req; u8 cs_res; __be64 dc_access_key; u8 rsvd1[24]; }; struct mlx5_create_qp_mbox_in { struct mlx5_inbox_hdr hdr; __be32 input_qpn; u8 rsvd0[4]; __be32 opt_param_mask; u8 rsvd1[4]; struct mlx5_qp_context ctx; u8 rsvd3[16]; __be64 pas[0]; }; struct mlx5_dct_context { u8 state; u8 rsvd0[7]; __be32 cqn; __be32 flags; u8 rsvd1; u8 cs_res; u8 min_rnr; u8 rsvd2; __be32 srqn; __be32 pdn; __be32 tclass_flow_label; __be64 access_key; u8 mtu; u8 port; __be16 pkey_index; u8 rsvd4; u8 mgid_index; u8 rsvd5; u8 hop_limit; __be32 access_violations; u8 rsvd[12]; }; struct mlx5_create_dct_mbox_in { struct mlx5_inbox_hdr hdr; u8 rsvd0[8]; struct mlx5_dct_context context; u8 rsvd[48]; }; struct mlx5_create_dct_mbox_out { struct mlx5_outbox_hdr hdr; __be32 dctn; u8 rsvd0[4]; }; struct mlx5_destroy_dct_mbox_in { struct mlx5_inbox_hdr hdr; __be32 dctn; u8 rsvd0[4]; }; struct mlx5_destroy_dct_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd0[8]; }; struct mlx5_drain_dct_mbox_in { struct mlx5_inbox_hdr hdr; __be32 dctn; u8 rsvd0[4]; }; struct mlx5_drain_dct_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd0[8]; }; struct mlx5_create_qp_mbox_out { struct mlx5_outbox_hdr hdr; __be32 qpn; u8 rsvd0[4]; }; struct mlx5_destroy_qp_mbox_in { struct mlx5_inbox_hdr hdr; __be32 qpn; u8 rsvd0[4]; }; struct mlx5_destroy_qp_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd0[8]; }; struct mlx5_modify_qp_mbox_in { struct mlx5_inbox_hdr hdr; __be32 qpn; u8 rsvd1[4]; __be32 optparam; u8 rsvd0[4]; struct mlx5_qp_context ctx; u8 rsvd2[16]; }; struct mlx5_modify_qp_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd0[8]; }; struct mlx5_query_qp_mbox_in { struct mlx5_inbox_hdr hdr; __be32 qpn; u8 rsvd[4]; }; struct mlx5_query_qp_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd1[8]; __be32 optparam; u8 rsvd0[4]; struct mlx5_qp_context ctx; u8 rsvd2[16]; __be64 pas[0]; }; struct mlx5_query_dct_mbox_in { struct mlx5_inbox_hdr hdr; __be32 dctn; u8 rsvd[4]; }; struct mlx5_query_dct_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd0[8]; struct mlx5_dct_context ctx; u8 rsvd1[48]; }; struct mlx5_arm_dct_mbox_in { struct mlx5_inbox_hdr hdr; __be32 dctn; u8 rsvd[4]; }; struct mlx5_arm_dct_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd0[8]; }; struct mlx5_conf_sqp_mbox_in { struct mlx5_inbox_hdr hdr; __be32 qpn; u8 rsvd[3]; u8 type; }; struct mlx5_conf_sqp_mbox_out { struct mlx5_outbox_hdr hdr; u8 rsvd[8]; }; static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u32 qpn) { return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); } static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key) { return radix_tree_lookup(&dev->priv.mr_table.tree, key); } int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, struct mlx5_create_qp_mbox_in *in, int inlen); int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation, struct mlx5_modify_qp_mbox_in *in, int sqd_event, struct mlx5_core_qp *qp); int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, struct mlx5_query_qp_mbox_out *out, int outlen); int mlx5_core_dct_query(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct, struct mlx5_query_dct_mbox_out *out); int mlx5_core_arm_dct(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct); int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn); int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn); int mlx5_core_create_dct(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct, struct mlx5_create_dct_mbox_in *in); int mlx5_core_destroy_dct(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct); int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *rq); void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev, struct mlx5_core_qp *rq); int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *sq); void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, struct mlx5_core_qp *sq); void mlx5_init_qp_table(struct mlx5_core_dev *dev); void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev); int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); static inline const char *mlx5_qp_type_str(int type) { switch (type) { case MLX5_QP_ST_RC: return "RC"; case MLX5_QP_ST_UC: return "C"; case MLX5_QP_ST_UD: return "UD"; case MLX5_QP_ST_XRC: return "XRC"; case MLX5_QP_ST_MLX: return "MLX"; case MLX5_QP_ST_DCI: return "DCI"; case MLX5_QP_ST_QP0: return "QP0"; case MLX5_QP_ST_QP1: return "QP1"; case MLX5_QP_ST_RAW_ETHERTYPE: return "RAW_ETHERTYPE"; case MLX5_QP_ST_RAW_IPV6: return "RAW_IPV6"; case MLX5_QP_ST_SNIFFER: return "SNIFFER"; case MLX5_QP_ST_SYNC_UMR: return "SYNC_UMR"; case MLX5_QP_ST_PTP_1588: return "PTP_1588"; case MLX5_QP_ST_REG_UMR: return "REG_UMR"; case MLX5_QP_ST_SW_CNAK: return "DC_CNAK"; default: return "Invalid transport type"; } } static inline const char *mlx5_qp_state_str(int state) { switch (state) { case MLX5_QP_STATE_RST: return "RST"; case MLX5_QP_STATE_INIT: return "INIT"; case MLX5_QP_STATE_RTR: return "RTR"; case MLX5_QP_STATE_RTS: return "RTS"; case MLX5_QP_STATE_SQER: return "SQER"; case MLX5_QP_STATE_SQD: return "SQD"; case MLX5_QP_STATE_ERR: return "ERR"; case MLX5_QP_STATE_SQ_DRAINING: return "SQ_DRAINING"; case MLX5_QP_STATE_SUSPENDED: return "SUSPENDED"; default: return "Invalid QP state"; } } #endif /* MLX5_QP_H */