Index: head/sys/ofed/drivers/infiniband/hw/mlx4/main.c =================================================================== --- head/sys/ofed/drivers/infiniband/hw/mlx4/main.c (revision 299178) +++ head/sys/ofed/drivers/infiniband/hw/mlx4/main.c (revision 299179) @@ -1,2894 +1,2894 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mlx4_ib.h" #include "mlx4_exp.h" #include "user.h" #include "wc.h" #define DRV_NAME MLX4_IB_DRV_NAME #define DRV_VERSION "1.0" #define DRV_RELDATE __DATE__ #define MLX4_IB_DRIVER_PROC_DIR_NAME "driver/mlx4_ib" #define MLX4_IB_MRS_PROC_DIR_NAME "mrs" #define MLX4_IB_FLOW_MAX_PRIO 0xFFF #define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); MODULE_LICENSE("Dual BSD/GPL"); #ifdef __linux__ MODULE_VERSION(DRV_VERSION); #endif int mlx4_ib_sm_guid_assign = 1; module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444); MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)"); enum { MAX_NUM_STR_BITMAP = 1 << 15, DEFAULT_TBL_VAL = -1 }; static struct mlx4_dbdf2val_lst dev_assign_str = { .name = "dev_assign_str param", .num_vals = 1, .def_val = {DEFAULT_TBL_VAL}, .range = {0, MAX_NUM_STR_BITMAP - 1} }; module_param_string(dev_assign_str, dev_assign_str.str, sizeof(dev_assign_str.str), 0444); MODULE_PARM_DESC(dev_assign_str, "Map device function numbers to IB device numbers (e.g. '0000:04:00.0-0,002b:1c:0b.a-1,...').\n" "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for IB device numbers (e.g. 1).\n" "\t\tMax supported devices - 32"); static unsigned long *dev_num_str_bitmap; static spinlock_t dev_num_str_lock; static const char mlx4_ib_version[] = DRV_NAME ": Mellanox ConnectX InfiniBand driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; struct update_gid_work { struct work_struct work; union ib_gid gids[128]; struct mlx4_ib_dev *dev; int port; }; struct dev_rec { int bus; int dev; int func; int nr; }; static int dr_active; static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init); static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, struct net_device*, unsigned long); static u8 mlx4_ib_get_dev_port(struct net_device *dev, struct mlx4_ib_dev *ibdev); static struct workqueue_struct *wq; static void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; mad->class_version = 1; mad->method = IB_MGMT_METHOD_GET; } static union ib_gid zgid; static int check_flow_steering_support(struct mlx4_dev *dev) { int eth_num_ports = 0; int ib_num_ports = 0; int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED; if (dmfs) { int i; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) eth_num_ports++; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) ib_num_ports++; dmfs &= (!ib_num_ports || (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) && (!eth_num_ports || (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)); if (ib_num_ports && mlx4_is_mfunc(dev)) { dmfs = 0; } } return dmfs; } int mlx4_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { struct mlx4_ib_dev *dev = to_mdev(ibdev); struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS, 1, NULL, NULL, in_mad, out_mad); if (err) goto out; memset(props, 0, sizeof *props); props->fw_ver = dev->dev->caps.fw_ver; props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK | IB_DEVICE_SHARED_MR; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM) props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT) props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH) props->device_cap_flags |= IB_DEVICE_UD_TSO; if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) && (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) props->device_cap_flags |= IB_DEVICE_XRC; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_CROSS_CHANNEL) props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; if (check_flow_steering_support(dev->dev)) props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; props->device_cap_flags |= IB_DEVICE_QPG; if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) { props->device_cap_flags |= IB_DEVICE_UD_RSS; props->max_rss_tbl_sz = dev->dev->caps.max_rss_tbl_sz; } if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW) props->device_cap_flags |= IB_DEVICE_MEM_WINDOW; if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B) props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B; else props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A; } props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; props->vendor_part_id = dev->dev->pdev->device; props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&props->sys_image_guid, out_mad->data + 4, 8); props->max_mr_size = ~0ull; props->page_size_cap = dev->dev->caps.page_size_cap; props->max_qp = dev->dev->quotas.qp; props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; props->max_sge = min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg); props->max_cq = dev->dev->quotas.cq; props->max_cqe = dev->dev->caps.max_cqes; props->max_mr = dev->dev->quotas.mpt; props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds; props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma; props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma; props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq = dev->dev->quotas.srq; props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; props->max_srq_sge = dev->dev->caps.max_srq_sge; props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES; props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; props->masked_atomic_cap = props->atomic_cap; props->max_pkeys = dev->dev->caps.pkey_table_len[1]; props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; props->hca_core_clock = dev->dev->caps.hca_core_clock; if (dev->dev->caps.hca_core_clock > 0) props->comp_mask |= IB_DEVICE_ATTR_WITH_HCA_CORE_CLOCK; if (dev->dev->caps.cq_timestamp) { props->timestamp_mask = 0xFFFFFFFFFFFF; props->comp_mask |= IB_DEVICE_ATTR_WITH_TIMESTAMP_MASK; } out: kfree(in_mad); kfree(out_mad); return err; } static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num) { struct mlx4_dev *dev = to_mdev(device)->dev; return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ? IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; } static int ib_link_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props, int netw_view) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int ext_active_speed; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view) mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); props->sm_sl = out_mad->data[36] & 0xf; props->state = out_mad->data[32] & 0xf; props->phys_state = out_mad->data[33] >> 4; props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); if (netw_view) props->gid_tbl_len = out_mad->data[50]; else props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port]; props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; props->active_speed = out_mad->data[35] >> 4; props->max_mtu = out_mad->data[41] & 0xf; props->active_mtu = out_mad->data[36] >> 4; props->subnet_timeout = out_mad->data[51] & 0x1f; props->max_vl_num = out_mad->data[37] >> 4; props->init_type_reply = out_mad->data[41] >> 4; /* Check if extended speeds (EDR/FDR/...) are supported */ if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { ext_active_speed = out_mad->data[62] >> 4; switch (ext_active_speed) { case 1: props->active_speed = IB_SPEED_FDR; break; case 2: props->active_speed = IB_SPEED_EDR; break; } } /* If reported active speed is QDR, check if is FDR-10 */ if (props->active_speed == IB_SPEED_QDR) { init_query_mad(in_mad); in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; /* Checking LinkSpeedActive for FDR-10 */ if (out_mad->data[15] & 0x1) props->active_speed = IB_SPEED_FDR10; } /* Avoid wrong speed value returned by FW if the IB link is down. */ if (props->state == IB_PORT_DOWN) props->active_speed = IB_SPEED_SDR; out: kfree(in_mad); kfree(out_mad); return err; } static u8 state_to_phys_state(enum ib_port_state state) { return state == IB_PORT_ACTIVE ? 5 : 3; } static int eth_link_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props, int netw_view) { struct mlx4_ib_dev *mdev = to_mdev(ibdev); struct mlx4_ib_iboe *iboe = &mdev->iboe; struct net_device *ndev; enum ib_mtu tmp; struct mlx4_cmd_mailbox *mailbox; unsigned long flags; int err = 0; mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0, MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); if (err) goto out; props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ? IB_WIDTH_4X : IB_WIDTH_1X; props->active_speed = IB_SPEED_QDR; props->port_cap_flags = IB_PORT_CM_SUP; if (netw_view) props->gid_tbl_len = MLX4_ROCE_MAX_GIDS; else props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; props->max_msg_sz = mdev->dev->caps.max_msg_sz; props->pkey_tbl_len = 1; props->max_mtu = IB_MTU_4096; props->max_vl_num = 2; props->state = IB_PORT_DOWN; props->phys_state = state_to_phys_state(props->state); props->active_mtu = IB_MTU_256; spin_lock_irqsave(&iboe->lock, flags); ndev = iboe->netdevs[port - 1]; if (!ndev) goto out_unlock; tmp = iboe_get_mtu(ndev->if_mtu); props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256; props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ? IB_PORT_ACTIVE : IB_PORT_DOWN; props->phys_state = state_to_phys_state(props->state); out_unlock: spin_unlock_irqrestore(&iboe->lock, flags); out: mlx4_free_cmd_mailbox(mdev->dev, mailbox); return err; } int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props, int netw_view) { int err; memset(props, 0, sizeof *props); err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? ib_link_query_port(ibdev, port, props, netw_view) : eth_link_query_port(ibdev, port, props, netw_view); return err; } static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { /* returns host view */ return __mlx4_ib_query_port(ibdev, port, props, 0); } int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid, int netw_view) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; struct mlx4_ib_dev *dev = to_mdev(ibdev); int clear = 0; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); if (mlx4_is_mfunc(dev->dev) && netw_view) mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; memcpy(gid->raw, out_mad->data + 8, 8); if (mlx4_is_mfunc(dev->dev) && !netw_view) { if (index) { /* For any index > 0, return the null guid */ err = 0; clear = 1; goto out; } } init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; in_mad->attr_mod = cpu_to_be32(index / 8); err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); out: if (clear) memset(gid->raw + 8, 0, 8); kfree(in_mad); kfree(out_mad); return err; } static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { struct mlx4_ib_dev *dev = to_mdev(ibdev); *gid = dev->iboe.gid_table[port - 1][index]; return 0; } static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) return __mlx4_ib_query_gid(ibdev, port, index, gid, 0); else return iboe_query_gid(ibdev, port, index, gid); } int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey, int netw_view) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; in_mad->attr_mod = cpu_to_be32(index / 32); if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view) mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]); out: kfree(in_mad); kfree(out_mad); return err; } static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0); } static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *props) { struct mlx4_cmd_mailbox *mailbox; unsigned long flags; if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) return -EOPNOTSUPP; if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) return 0; if (mlx4_is_slave(to_mdev(ibdev)->dev)) return -EOPNOTSUPP; spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags); memcpy(ibdev->node_desc, props->node_desc, 64); spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags); /* * If possible, pass node desc to FW, so it can generate * a 144 trap. If cmd fails, just ignore. */ mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev); if (IS_ERR(mailbox)) return 0; memset(mailbox->buf, 0, 256); memcpy(mailbox->buf, props->node_desc, 64); mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox); return 0; } static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, u32 cap_mask) { struct mlx4_cmd_mailbox *mailbox; int err; u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; mailbox = mlx4_alloc_cmd_mailbox(dev->dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); memset(mailbox->buf, 0, 256); if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { *(u8 *) mailbox->buf = !!reset_qkey_viols << 6; ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask); } else { ((u8 *) mailbox->buf)[3] = !!reset_qkey_viols; ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask); } err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev->dev, mailbox); return err; } static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { struct ib_port_attr attr; u32 cap_mask; int err; mutex_lock(&to_mdev(ibdev)->cap_mask_mutex); err = mlx4_ib_query_port(ibdev, port, &attr); if (err) goto out; cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) & ~props->clr_port_cap_mask; err = mlx4_SET_PORT(to_mdev(ibdev), port, !!(mask & IB_PORT_RESET_QKEY_CNTR), cap_mask); out: mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); return err; } static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_ucontext *context; struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3; struct mlx4_ib_alloc_ucontext_resp resp; int err; if (!dev->ib_active) return ERR_PTR(-EAGAIN); if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) { resp_v3.qp_tab_size = dev->dev->caps.num_qps; if (mlx4_wc_enabled()) { resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size; resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; } else { resp_v3.bf_reg_size = 0; resp_v3.bf_regs_per_page = 0; } } else { resp.dev_caps = dev->dev->caps.userspace_caps; resp.qp_tab_size = dev->dev->caps.num_qps; if (mlx4_wc_enabled()) { resp.bf_reg_size = dev->dev->caps.bf_reg_size; resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; } else { resp.bf_reg_size = 0; resp.bf_regs_per_page = 0; } resp.cqe_size = dev->dev->caps.cqe_size; } context = kmalloc(sizeof *context, GFP_KERNEL); if (!context) return ERR_PTR(-ENOMEM); err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar); if (err) { kfree(context); return ERR_PTR(err); } INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3)); else err = ib_copy_to_udata(udata, &resp, sizeof(resp)); if (err) { mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar); kfree(context); return ERR_PTR(-EFAULT); } return &context->ibucontext; } static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct mlx4_ib_ucontext *context = to_mucontext(ibcontext); mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar); kfree(context); return 0; } /* XXX FBSD has no support for get_unmapped_area function */ #if 0 static unsigned long mlx4_ib_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct mm_struct *mm; struct vm_area_struct *vma; unsigned long start_addr; unsigned long page_size_order; unsigned long command; mm = current->mm; if (addr) return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); /* Last 8 bits hold the command others are data per that command */ command = pgoff & MLX4_IB_MMAP_CMD_MASK; if (command != MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES) return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); page_size_order = pgoff >> MLX4_IB_MMAP_CMD_BITS; /* code is based on the huge-pages get_unmapped_area code */ start_addr = mm->free_area_cache; if (len <= mm->cached_hole_size) start_addr = TASK_UNMAPPED_BASE; full_search: addr = ALIGN(start_addr, 1 << page_size_order); for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ if (TASK_SIZE - len < addr) { /* * Start a new search - just in case we missed * some holes. */ if (start_addr != TASK_UNMAPPED_BASE) { start_addr = TASK_UNMAPPED_BASE; goto full_search; } return -ENOMEM; } if (!vma || addr + len <= vma->vm_start) return addr; addr = ALIGN(vma->vm_end, 1 << page_size_order); } } #endif static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { struct mlx4_ib_dev *dev = to_mdev(context->device); /* Last 8 bits hold the command others are data per that command */ unsigned long command = vma->vm_pgoff & MLX4_IB_MMAP_CMD_MASK; if (command < MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES) { - /* compatability handling for commands 0 & 1*/ + /* compatibility handling for commands 0 & 1*/ if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; } if (command == MLX4_IB_MMAP_UAR_PAGE) { vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); if (io_remap_pfn_range(vma, vma->vm_start, to_mucontext(context)->uar.pfn, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; } else if (command == MLX4_IB_MMAP_BLUE_FLAME_PAGE && dev->dev->caps.bf_reg_size != 0) { vma->vm_page_prot = pgprot_wc(vma->vm_page_prot); if (io_remap_pfn_range(vma, vma->vm_start, to_mucontext(context)->uar.pfn + dev->dev->caps.num_uars, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; } else if (command == MLX4_IB_MMAP_GET_HW_CLOCK) { struct mlx4_clock_params params; int ret; ret = mlx4_get_internal_clock_params(dev->dev, ¶ms); if (ret) return ret; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); if (io_remap_pfn_range(vma, vma->vm_start, (pci_resource_start(dev->dev->pdev, params.bar) + params.offset) >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; } else return -EINVAL; return 0; } static int mlx4_ib_ioctl(struct ib_ucontext *context, unsigned int cmd, unsigned long arg) { struct mlx4_ib_dev *dev = to_mdev(context->device); int ret; int offset; switch (cmd) { case MLX4_IOCHWCLOCKOFFSET: { struct mlx4_clock_params params; int ret; ret = mlx4_get_internal_clock_params(dev->dev, ¶ms); if (!ret) { offset = params.offset % PAGE_SIZE; ret = put_user(offset, (int *)arg); return sizeof(int); } else { return ret; } } default: { pr_err("mlx4_ib: invalid ioctl %u command with arg %lX\n", cmd, arg); return -ENOTTY; } } return ret; } static int mlx4_ib_query_values(struct ib_device *device, int q_values, struct ib_device_values *values) { struct mlx4_ib_dev *dev = to_mdev(device); cycle_t cycles; values->values_mask = 0; if (q_values & IBV_VALUES_HW_CLOCK) { cycles = mlx4_read_clock(dev->dev); if (cycles < 0) { values->hwclock = cycles & CORE_CLOCK_MASK; values->values_mask |= IBV_VALUES_HW_CLOCK; } q_values &= ~IBV_VALUES_HW_CLOCK; } if (q_values) return -ENOTTY; return 0; } static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) { struct mlx4_ib_pd *pd; int err; pd = kmalloc(sizeof *pd, GFP_KERNEL); if (!pd) return ERR_PTR(-ENOMEM); err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn); if (err) { kfree(pd); return ERR_PTR(err); } if (context) if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) { mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn); kfree(pd); return ERR_PTR(-EFAULT); } return &pd->ibpd; } static int mlx4_ib_dealloc_pd(struct ib_pd *pd) { mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); kfree(pd); return 0; } static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) { struct mlx4_ib_xrcd *xrcd; int err; if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) return ERR_PTR(-ENOSYS); xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL); if (!xrcd) return ERR_PTR(-ENOMEM); err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn); if (err) goto err1; xrcd->pd = ib_alloc_pd(ibdev); if (IS_ERR(xrcd->pd)) { err = PTR_ERR(xrcd->pd); goto err2; } xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0); if (IS_ERR(xrcd->cq)) { err = PTR_ERR(xrcd->cq); goto err3; } return &xrcd->ibxrcd; err3: ib_dealloc_pd(xrcd->pd); err2: mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn); err1: kfree(xrcd); return ERR_PTR(err); } static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd) { ib_destroy_cq(to_mxrcd(xrcd)->cq); ib_dealloc_pd(to_mxrcd(xrcd)->pd); mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn); kfree(xrcd); return 0; } static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) { struct mlx4_ib_qp *mqp = to_mqp(ibqp); struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); struct mlx4_ib_gid_entry *ge; ge = kzalloc(sizeof *ge, GFP_KERNEL); if (!ge) return -ENOMEM; ge->gid = *gid; if (mlx4_ib_add_mc(mdev, mqp, gid)) { ge->port = mqp->port; ge->added = 1; } mutex_lock(&mqp->mutex); list_add_tail(&ge->list, &mqp->gid_list); mutex_unlock(&mqp->mutex); return 0; } int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, union ib_gid *gid) { u8 mac[6]; struct net_device *ndev; int ret = 0; if (!mqp->port) return 0; spin_lock(&mdev->iboe.lock); ndev = mdev->iboe.netdevs[mqp->port - 1]; if (ndev) dev_hold(ndev); spin_unlock(&mdev->iboe.lock); if (ndev) { rdma_get_mcast_mac((struct in6_addr *)gid, mac); rtnl_lock(); dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac, 6, 0); ret = 1; rtnl_unlock(); dev_put(ndev); } return ret; } struct mlx4_ib_steering { struct list_head list; u64 reg_id; union ib_gid gid; }; static int parse_flow_attr(struct mlx4_dev *dev, union ib_flow_spec *ib_spec, struct _rule_hw *mlx4_spec) { enum mlx4_net_trans_rule_id type; switch (ib_spec->type) { case IB_FLOW_SPEC_ETH: type = MLX4_NET_TRANS_RULE_ID_ETH; memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac, ETH_ALEN); memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac, ETH_ALEN); mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag; mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag; break; case IB_FLOW_SPEC_IB: type = MLX4_NET_TRANS_RULE_ID_IB; mlx4_spec->ib.l3_qpn = ib_spec->ib.val.l3_type_qpn; mlx4_spec->ib.qpn_mask = ib_spec->ib.mask.l3_type_qpn; memcpy(&mlx4_spec->ib.dst_gid, ib_spec->ib.val.dst_gid, 16); memcpy(&mlx4_spec->ib.dst_gid_msk, ib_spec->ib.mask.dst_gid, 16); break; case IB_FLOW_SPEC_IPV4: type = MLX4_NET_TRANS_RULE_ID_IPV4; mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip; mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip; mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip; mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip; break; case IB_FLOW_SPEC_TCP: case IB_FLOW_SPEC_UDP: type = ib_spec->type == IB_FLOW_SPEC_TCP ? MLX4_NET_TRANS_RULE_ID_TCP : MLX4_NET_TRANS_RULE_ID_UDP; mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port; mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port; mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port; mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port; break; default: return -EINVAL; } if (map_sw_to_hw_steering_id(dev, type) < 0 || hw_rule_sz(dev, type) < 0) return -EINVAL; mlx4_spec->id = cpu_to_be16(map_sw_to_hw_steering_id(dev, type)); mlx4_spec->size = hw_rule_sz(dev, type) >> 2; return hw_rule_sz(dev, type); } static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain, enum mlx4_net_trans_promisc_mode flow_type, u64 *reg_id) { int ret, i; int size = 0; void *ib_flow; struct mlx4_ib_dev *mdev = to_mdev(qp->device); struct mlx4_cmd_mailbox *mailbox; struct mlx4_net_trans_rule_hw_ctrl *ctrl; size_t rule_size = sizeof(struct mlx4_net_trans_rule_hw_ctrl) + (sizeof(struct _rule_hw) * flow_attr->num_of_specs); static const u16 __mlx4_domain[] = { [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS, [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL, [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS, [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC, }; if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) { pr_err("Invalid priority value.\n"); return -EINVAL; } if (domain >= IB_FLOW_DOMAIN_NUM) { pr_err("Invalid domain value.\n"); return -EINVAL; } if (map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0) return -EINVAL; mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); memset(mailbox->buf, 0, rule_size); ctrl = mailbox->buf; ctrl->prio = cpu_to_be16(__mlx4_domain[domain] | flow_attr->priority); ctrl->type = map_sw_to_hw_steering_mode(mdev->dev, flow_type); ctrl->port = flow_attr->port; ctrl->qpn = cpu_to_be32(qp->qp_num); if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK) ctrl->flags = (1 << 3); ib_flow = flow_attr + 1; size += sizeof(struct mlx4_net_trans_rule_hw_ctrl); for (i = 0; i < flow_attr->num_of_specs; i++) { ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size); if (ret < 0) { mlx4_free_cmd_mailbox(mdev->dev, mailbox); return -EINVAL; } ib_flow += ((union ib_flow_spec *)ib_flow)->size; size += ret; } ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (ret == -ENOMEM) pr_err("mcg table is full. Fail to register network rule.\n"); else if (ret == -ENXIO) pr_err("Device managed flow steering is disabled. Fail to register network rule.\n"); else if (ret) pr_err("Invalid argumant. Fail to register network rule.\n"); mlx4_free_cmd_mailbox(mdev->dev, mailbox); return ret; } static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id) { int err; err = mlx4_cmd(dev, reg_id, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) pr_err("Fail to detach network rule. registration id = 0x%llx\n", (unsigned long long)reg_id); return err; } static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain) { int err = 0, i = 0; struct mlx4_ib_flow *mflow; enum mlx4_net_trans_promisc_mode type[2]; memset(type, 0, sizeof(type)); mflow = kzalloc(sizeof(struct mlx4_ib_flow), GFP_KERNEL); if (!mflow) { err = -ENOMEM; goto err_free; } switch (flow_attr->type) { case IB_FLOW_ATTR_NORMAL: type[0] = MLX4_FS_REGULAR; break; case IB_FLOW_ATTR_ALL_DEFAULT: type[0] = MLX4_FS_ALL_DEFAULT; break; case IB_FLOW_ATTR_MC_DEFAULT: type[0] = MLX4_FS_MC_DEFAULT; break; case IB_FLOW_ATTR_SNIFFER: type[0] = MLX4_FS_UC_SNIFFER; type[1] = MLX4_FS_MC_SNIFFER; break; default: err = -EINVAL; goto err_free; } while (i < ARRAY_SIZE(type) && type[i]) { err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i], &mflow->reg_id[i]); if (err) goto err_free; i++; } return &mflow->ibflow; err_free: kfree(mflow); return ERR_PTR(err); } static int mlx4_ib_destroy_flow(struct ib_flow *flow_id) { int err, ret = 0; int i = 0; struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device); struct mlx4_ib_flow *mflow = to_mflow(flow_id); while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) { err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]); if (err) ret = err; i++; } kfree(mflow); return ret; } static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw) { struct mlx4_ib_gid_entry *ge; struct mlx4_ib_gid_entry *tmp; struct mlx4_ib_gid_entry *ret = NULL; list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { if (!memcmp(raw, ge->gid.raw, 16)) { ret = ge; break; } } return ret; } static int del_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) { struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); struct mlx4_ib_qp *mqp = to_mqp(ibqp); struct mlx4_ib_gid_entry *ge; struct net_device *ndev; u8 mac[6]; mutex_lock(&mqp->mutex); ge = find_gid_entry(mqp, gid->raw); if (ge) { spin_lock(&mdev->iboe.lock); ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL; if (ndev) dev_hold(ndev); spin_unlock(&mdev->iboe.lock); rdma_get_mcast_mac((struct in6_addr *)gid, mac); if (ndev) { rtnl_lock(); dev_mc_delete(mdev->iboe.netdevs[ge->port - 1], mac, 6, 0); rtnl_unlock(); dev_put(ndev); } list_del(&ge->list); kfree(ge); } else pr_warn("could not find mgid entry\n"); mutex_unlock(&mqp->mutex); return ge != NULL ? 0 : -EINVAL; } static int _mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid, int count) { int err; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); struct mlx4_ib_qp *mqp = to_mqp(ibqp); u64 reg_id = 0; int record_err = 0; if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { struct mlx4_ib_steering *ib_steering; struct mlx4_ib_steering *tmp; LIST_HEAD(temp); mutex_lock(&mqp->mutex); list_for_each_entry_safe(ib_steering, tmp, &mqp->steering_rules, list) { if (memcmp(ib_steering->gid.raw, gid->raw, 16)) continue; if (--count < 0) break; list_del(&ib_steering->list); list_add(&ib_steering->list, &temp); } mutex_unlock(&mqp->mutex); list_for_each_entry_safe(ib_steering, tmp, &temp, list) { reg_id = ib_steering->reg_id; err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, (ibqp->qp_type == IB_QPT_RAW_PACKET) ? MLX4_PROT_ETH : MLX4_PROT_IB_IPV6, reg_id); if (err) { record_err = record_err ?: err; continue; } err = del_gid_entry(ibqp, gid); if (err) { record_err = record_err ?: err; continue; } list_del(&ib_steering->list); kfree(ib_steering); } mutex_lock(&mqp->mutex); list_for_each_entry(ib_steering, &temp, list) { list_add(&ib_steering->list, &mqp->steering_rules); } mutex_unlock(&mqp->mutex); if (count) { pr_warn("Couldn't release all reg_ids for mgid. Steering rule is left attached\n"); return -EINVAL; } } else { if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_B0 && ibqp->qp_type == IB_QPT_RAW_PACKET) gid->raw[5] = mqp->port; err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, (ibqp->qp_type == IB_QPT_RAW_PACKET) ? MLX4_PROT_ETH : MLX4_PROT_IB_IPV6, reg_id); if (err) return err; err = del_gid_entry(ibqp, gid); if (err) return err; } return record_err; } static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); int count = (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) ? mdev->dev->caps.num_ports : 1; return _mlx4_ib_mcg_detach(ibqp, gid, lid, count); } static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { int err = -ENODEV; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); struct mlx4_ib_qp *mqp = to_mqp(ibqp); DECLARE_BITMAP(ports, MLX4_MAX_PORTS); int i = 0; if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_B0 && ibqp->qp_type == IB_QPT_RAW_PACKET) gid->raw[5] = mqp->port; if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { bitmap_fill(ports, mdev->dev->caps.num_ports); } else { if (mqp->port <= mdev->dev->caps.num_ports) { bitmap_zero(ports, mdev->dev->caps.num_ports); set_bit(0, ports); } else { return -EINVAL; } } for (; i < mdev->dev->caps.num_ports; i++) { u64 reg_id; struct mlx4_ib_steering *ib_steering = NULL; if (!test_bit(i, ports)) continue; if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL); if (!ib_steering) goto err_add; } err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, i + 1, !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), (ibqp->qp_type == IB_QPT_RAW_PACKET) ? MLX4_PROT_ETH : MLX4_PROT_IB_IPV6, ®_id); if (err) { kfree(ib_steering); goto err_add; } err = add_gid_entry(ibqp, gid); if (err) { mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6, reg_id); kfree(ib_steering); goto err_add; } if (ib_steering) { memcpy(ib_steering->gid.raw, gid->raw, 16); mutex_lock(&mqp->mutex); list_add(&ib_steering->list, &mqp->steering_rules); mutex_unlock(&mqp->mutex); ib_steering->reg_id = reg_id; } } return 0; err_add: if (i > 0) _mlx4_ib_mcg_detach(ibqp, gid, lid, i); return err; } static int init_node_data(struct mlx4_ib_dev *dev) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; if (mlx4_is_master(dev->dev)) mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad); if (err) goto out; memcpy(dev->ib_dev.node_desc, out_mad->data, 64); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad); if (err) goto out; dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); out: kfree(in_mad); kfree(out_mad); return err; } static ssize_t show_hca(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); return sprintf(buf, "MT%d\n", dev->dev->pdev->device); } static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32), (int) (dev->dev->caps.fw_ver >> 16) & 0xffff, (int) dev->dev->caps.fw_ver & 0xffff); } static ssize_t show_rev(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); return sprintf(buf, "%x\n", dev->dev->rev_id); } static ssize_t show_board(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id); } static ssize_t show_vsd(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); ssize_t len = MLX4_VSD_LEN; if (dev->dev->vsd_vendor_id == PCI_VENDOR_ID_MELLANOX) len = sprintf(buf, "%.*s\n", MLX4_VSD_LEN, dev->dev->vsd); else memcpy(buf, dev->dev->vsd, MLX4_VSD_LEN); return len; } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static DEVICE_ATTR(vsd, S_IRUGO, show_vsd, NULL); static struct device_attribute *mlx4_class_attributes[] = { &dev_attr_hw_rev, &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id, &dev_attr_vsd }; static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev, u8 port) { memcpy(eui, IF_LLADDR(dev), 3); memcpy(eui + 5, IF_LLADDR(dev) + 3, 3); if (vlan_id < 0x1000) { eui[3] = vlan_id >> 8; eui[4] = vlan_id & 0xff; } else { eui[3] = 0xff; eui[4] = 0xfe; } eui[0] ^= 2; } static void update_gids_task(struct work_struct *work) { struct update_gid_work *gw = container_of(work, struct update_gid_work, work); struct mlx4_cmd_mailbox *mailbox; union ib_gid *gids; int err; struct mlx4_dev *dev = gw->dev->dev; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox)); goto free; } gids = mailbox->buf; memcpy(gids, gw->gids, sizeof gw->gids); if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, gw->port) == IB_LINK_LAYER_ETHERNET) { err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); if (err) pr_warn("set port command failed\n"); else mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE); } mlx4_free_cmd_mailbox(dev, mailbox); free: kfree(gw); } static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_num) { struct mlx4_ib_dev *ibdev = to_mdev(device); return mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num); } static void reset_gids_task(struct work_struct *work) { struct update_gid_work *gw = container_of(work, struct update_gid_work, work); struct mlx4_cmd_mailbox *mailbox; union ib_gid *gids; int err; struct mlx4_dev *dev = gw->dev->dev; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { pr_warn("reset gid table failed\n"); goto free; } gids = mailbox->buf; memcpy(gids, gw->gids, sizeof(gw->gids)); if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET && dev->caps.num_ports > 0) { err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | 1, 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); if (err) pr_warn("set port 1 command failed\n"); } if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, 2) == IB_LINK_LAYER_ETHERNET && dev->caps.num_ports > 1) { err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | 2, 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); if (err) pr_warn("set port 2 command failed\n"); } mlx4_free_cmd_mailbox(dev, mailbox); free: kfree(gw); } static int update_gid_table(struct mlx4_ib_dev *dev, int port, union ib_gid *gid, int clear, int default_gid) { struct update_gid_work *work; int i; int need_update = 0; int free = -1; int found = -1; int max_gids; int start_index = !default_gid; max_gids = dev->dev->caps.gid_table_len[port]; for (i = start_index; i < max_gids; ++i) { if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid, sizeof(*gid))) found = i; if (clear) { if (found >= 0) { need_update = 1; dev->iboe.gid_table[port - 1][found] = zgid; break; } } else { if (found >= 0) break; if (free < 0 && !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof(*gid))) free = i; } } if (found == -1 && !clear && free < 0) { pr_err("GID table of port %d is full. Can't add "GID_PRINT_FMT"\n", port, GID_PRINT_ARGS(gid)); return -ENOMEM; } if (found == -1 && clear) { pr_err(GID_PRINT_FMT" is not in GID table of port %d\n", GID_PRINT_ARGS(gid), port); return -EINVAL; } if (found == -1 && !clear && free >= 0) { dev->iboe.gid_table[port - 1][free] = *gid; need_update = 1; } if (!need_update) return 0; work = kzalloc(sizeof *work, GFP_ATOMIC); if (!work) return -ENOMEM; memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids)); INIT_WORK(&work->work, update_gids_task); work->port = port; work->dev = dev; queue_work(wq, &work->work); return 0; } static int reset_gid_table(struct mlx4_ib_dev *dev) { struct update_gid_work *work; work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) return -ENOMEM; memset(dev->iboe.gid_table, 0, sizeof(dev->iboe.gid_table)); memset(work->gids, 0, sizeof(work->gids)); INIT_WORK(&work->work, reset_gids_task); work->dev = dev; queue_work(wq, &work->work); return 0; } /* XXX BOND Related - stub (no support for these flags in FBSD)*/ static inline int netif_is_bond_master(struct net_device *dev) { #if 0 return (dev->flags & IFF_MASTER) && (dev->priv_flags & IFF_BONDING); #endif return 0; } static void mlx4_make_default_gid(struct net_device *dev, union ib_gid *gid, u8 port) { gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); mlx4_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev, port); } static u8 mlx4_ib_get_dev_port(struct net_device *dev, struct mlx4_ib_dev *ibdev) { u8 port = 0; struct mlx4_ib_iboe *iboe; struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ? rdma_vlan_dev_real_dev(dev) : dev; iboe = &ibdev->iboe; for (port = 1; port <= MLX4_MAX_PORTS; ++port) if ((netif_is_bond_master(real_dev) && (real_dev == iboe->masters[port - 1])) || (!netif_is_bond_master(real_dev) && (real_dev == iboe->netdevs[port - 1]))) break; return port > MLX4_MAX_PORTS ? 0 : port; } static void mlx4_ib_get_dev_addr(struct net_device *dev, struct mlx4_ib_dev *ibdev, u8 port) { struct ifaddr *ifa; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct inet6_dev *in6_dev; union ib_gid *pgid; struct inet6_ifaddr *ifp; #endif union ib_gid gid; if ((port == 0) || (port > MLX4_MAX_PORTS)) return; /* IPv4 gids */ TAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) { if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET){ ipv6_addr_set_v4mapped( ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr, (struct in6_addr *)&gid); update_gid_table(ibdev, port, &gid, 0, 0); } } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) /* IPv6 gids */ in6_dev = in6_dev_get(dev); if (in6_dev) { read_lock_bh(&in6_dev->lock); list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { pgid = (union ib_gid *)&ifp->addr; update_gid_table(ibdev, port, pgid, 0, 0); } read_unlock_bh(&in6_dev->lock); in6_dev_put(in6_dev); } #endif } static void mlx4_set_default_gid(struct mlx4_ib_dev *ibdev, struct net_device *dev, u8 port) { union ib_gid gid; mlx4_make_default_gid(dev, &gid, port); update_gid_table(ibdev, port, &gid, 0, 1); } static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) { struct net_device *dev; if (reset_gid_table(ibdev)) return -1; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(dev, &V_ifnet, if_link) { u8 port = mlx4_ib_get_dev_port(dev, ibdev); if (port) { if (!rdma_vlan_dev_real_dev(dev) && !netif_is_bond_master(dev)) mlx4_set_default_gid(ibdev, dev, port); mlx4_ib_get_dev_addr(dev, ibdev, port); } } IFNET_RUNLOCK_NOSLEEP(); return 0; } static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, struct net_device *dev, unsigned long event) { struct mlx4_ib_iboe *iboe; int port; int init = 0; unsigned long flags; iboe = &ibdev->iboe; spin_lock_irqsave(&iboe->lock, flags); mlx4_foreach_ib_transport_port(port, ibdev->dev) { struct net_device *old_netdev = iboe->netdevs[port - 1]; /* XXX BOND related */ #if 0 struct net_device *old_master = iboe->masters[port - 1]; #endif iboe->masters[port - 1] = NULL; iboe->netdevs[port - 1] = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port); if (old_netdev != iboe->netdevs[port - 1]) init = 1; if (dev == iboe->netdevs[port - 1] && event == NETDEV_CHANGEADDR) init = 1; /* XXX BOND related */ #if 0 if (iboe->netdevs[port - 1] && netif_is_bond_slave(iboe->netdevs[port - 1])) iboe->masters[port - 1] = iboe->netdevs[port - 1]->master; /* if bonding is used it is possible that we add it to masters only after IP address is assigned to the net bonding interface */ if (old_master != iboe->masters[port - 1]) init = 1; #endif } spin_unlock_irqrestore(&iboe->lock, flags); if (init) if (mlx4_ib_init_gid_table(ibdev)) pr_warn("Fail to reset gid table\n"); } static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; struct mlx4_ib_dev *ibdev; ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb); mlx4_ib_scan_netdevs(ibdev, dev, event); return NOTIFY_DONE; } /* This function initializes the gid table only if the event_netdev real device is an iboe * device, will be invoked by the inet/inet6 events */ static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_netdev = ptr; struct mlx4_ib_dev *ibdev; struct mlx4_ib_iboe *ibdev_iboe; int port = 0; ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet); struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ? rdma_vlan_dev_real_dev(event_netdev) : event_netdev; ibdev_iboe = &ibdev->iboe; port = mlx4_ib_get_dev_port(real_dev, ibdev); /* Perform init_gid_table if the event real_dev is the net_device which represents this port, * otherwise this event is not related and would be ignored.*/ if(port && (real_dev == ibdev_iboe->netdevs[port - 1])) if (mlx4_ib_init_gid_table(ibdev)) pr_warn("Fail to reset gid table\n"); return NOTIFY_DONE; } static void init_pkeys(struct mlx4_ib_dev *ibdev) { int port; int slave; int i; if (mlx4_is_master(ibdev->dev)) { for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) { for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { for (i = 0; i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; ++i) { ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] = /* master has the identity virt2phys pkey mapping */ (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i : ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1; mlx4_sync_pkey_table(ibdev->dev, slave, port, i, ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]); } } } /* initialize pkey cache */ for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { for (i = 0; i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; ++i) ibdev->pkeys.phys_pkey_cache[port-1][i] = (i) ? 0 : 0xFFFF; } } } static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { char name[32]; int eq_per_port = 0; int added_eqs = 0; int total_eqs = 0; int i, j, eq; /* Legacy mode or comp_pool is not large enough */ if (dev->caps.comp_pool == 0 || dev->caps.num_ports > dev->caps.comp_pool) return; eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/ dev->caps.num_ports); /* Init eq table */ added_eqs = 0; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) added_eqs += eq_per_port; total_eqs = dev->caps.num_comp_vectors + added_eqs; ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL); if (!ibdev->eq_table) return; ibdev->eq_added = added_eqs; eq = 0; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) { for (j = 0; j < eq_per_port; j++) { sprintf(name, "mlx4-ib-%d-%d@%d:%d:%d:%d", i, j, pci_get_domain(dev->pdev->dev.bsddev), pci_get_bus(dev->pdev->dev.bsddev), PCI_SLOT(dev->pdev->devfn), PCI_FUNC(dev->pdev->devfn)); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(dev, name, &ibdev->eq_table[eq])) { /* Use legacy (same as mlx4_en driver) */ pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq); ibdev->eq_table[eq] = (eq % dev->caps.num_comp_vectors); } eq++; } } /* Fill the reset of the vector with legacy EQ */ for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++) ibdev->eq_table[eq++] = i; /* Advertise the new number of EQs to clients */ ibdev->ib_dev.num_comp_vectors = total_eqs; } static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { int i; /* no additional eqs were added */ if (!ibdev->eq_table) return; /* Reset the advertised EQ number */ ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; /* Free only the added eqs */ for (i = 0; i < ibdev->eq_added; i++) { /* Don't free legacy eqs if used */ if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors) continue; mlx4_release_eq(dev, ibdev->eq_table[i]); } kfree(ibdev->eq_table); } /* * create show function and a device_attribute struct pointing to * the function for _name */ #define DEVICE_DIAG_RPRT_ATTR(_name, _offset, _op_mod) \ static ssize_t show_rprt_##_name(struct device *dev, \ struct device_attribute *attr, \ char *buf){ \ return show_diag_rprt(dev, buf, _offset, _op_mod); \ } \ static DEVICE_ATTR(_name, S_IRUGO, show_rprt_##_name, NULL); #define MLX4_DIAG_RPRT_CLEAR_DIAGS 3 static size_t show_diag_rprt(struct device *device, char *buf, u32 offset, u8 op_modifier) { size_t ret; u32 counter_offset = offset; u32 diag_counter = 0; struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); ret = mlx4_query_diag_counters(dev->dev, 1, op_modifier, &counter_offset, &diag_counter); if (ret) return ret; return sprintf(buf, "%d\n", diag_counter); } static ssize_t clear_diag_counters(struct device *device, struct device_attribute *attr, const char *buf, size_t length) { size_t ret; struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); ret = mlx4_query_diag_counters(dev->dev, 0, MLX4_DIAG_RPRT_CLEAR_DIAGS, NULL, NULL); if (ret) return ret; return length; } DEVICE_DIAG_RPRT_ATTR(rq_num_lle , 0x00, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_lle , 0x04, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_lqpoe , 0x08, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_lqpoe , 0x0C, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_lpe , 0x18, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_lpe , 0x1C, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_wrfe , 0x20, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_wrfe , 0x24, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_mwbe , 0x2C, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_bre , 0x34, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_lae , 0x38, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_rire , 0x44, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_rire , 0x48, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_rae , 0x4C, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_rae , 0x50, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_roe , 0x54, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_tree , 0x5C, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_rree , 0x64, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_rnr , 0x68, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_rnr , 0x6C, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_oos , 0x100, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_oos , 0x104, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_mce , 0x108, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_udsdprd , 0x118, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_ucsdprd , 0x120, 2); DEVICE_DIAG_RPRT_ATTR(num_cqovf , 0x1A0, 2); DEVICE_DIAG_RPRT_ATTR(num_eqovf , 0x1A4, 2); DEVICE_DIAG_RPRT_ATTR(num_baddb , 0x1A8, 2); static DEVICE_ATTR(clear_diag, S_IWUSR, NULL, clear_diag_counters); static struct attribute *diag_rprt_attrs[] = { &dev_attr_rq_num_lle.attr, &dev_attr_sq_num_lle.attr, &dev_attr_rq_num_lqpoe.attr, &dev_attr_sq_num_lqpoe.attr, &dev_attr_rq_num_lpe.attr, &dev_attr_sq_num_lpe.attr, &dev_attr_rq_num_wrfe.attr, &dev_attr_sq_num_wrfe.attr, &dev_attr_sq_num_mwbe.attr, &dev_attr_sq_num_bre.attr, &dev_attr_rq_num_lae.attr, &dev_attr_sq_num_rire.attr, &dev_attr_rq_num_rire.attr, &dev_attr_sq_num_rae.attr, &dev_attr_rq_num_rae.attr, &dev_attr_sq_num_roe.attr, &dev_attr_sq_num_tree.attr, &dev_attr_sq_num_rree.attr, &dev_attr_rq_num_rnr.attr, &dev_attr_sq_num_rnr.attr, &dev_attr_rq_num_oos.attr, &dev_attr_sq_num_oos.attr, &dev_attr_rq_num_mce.attr, &dev_attr_rq_num_udsdprd.attr, &dev_attr_rq_num_ucsdprd.attr, &dev_attr_num_cqovf.attr, &dev_attr_num_eqovf.attr, &dev_attr_num_baddb.attr, &dev_attr_clear_diag.attr, NULL }; static struct attribute_group diag_counters_group = { .name = "diag_counters", .attrs = diag_rprt_attrs }; static void init_dev_assign(void) { int i = 1; spin_lock_init(&dev_num_str_lock); if (mlx4_fill_dbdf2val_tbl(&dev_assign_str)) return; dev_num_str_bitmap = kmalloc(BITS_TO_LONGS(MAX_NUM_STR_BITMAP) * sizeof(long), GFP_KERNEL); if (!dev_num_str_bitmap) { pr_warn("bitmap alloc failed -- cannot apply dev_assign_str parameter\n"); return; } bitmap_zero(dev_num_str_bitmap, MAX_NUM_STR_BITMAP); while ((i < MLX4_DEVS_TBL_SIZE) && (dev_assign_str.tbl[i].dbdf != MLX4_ENDOF_TBL)) { if (bitmap_allocate_region(dev_num_str_bitmap, dev_assign_str.tbl[i].val[0], 0)) goto err; i++; } dr_active = 1; return; err: kfree(dev_num_str_bitmap); dev_num_str_bitmap = NULL; pr_warn("mlx4_ib: The value of 'dev_assign_str' parameter " "is incorrect. The parameter value is discarded!"); } static int mlx4_ib_dev_idx(struct mlx4_dev *dev) { int i, val; if (!dr_active) return -1; if (!dev) return -1; if (mlx4_get_val(dev_assign_str.tbl, dev->pdev, 0, &val)) return -1; if (val != DEFAULT_TBL_VAL) { dev->flags |= MLX4_FLAG_DEV_NUM_STR; return val; } spin_lock(&dev_num_str_lock); i = bitmap_find_free_region(dev_num_str_bitmap, MAX_NUM_STR_BITMAP, 0); spin_unlock(&dev_num_str_lock); if (i >= 0) return i; return -1; } static void *mlx4_ib_add(struct mlx4_dev *dev) { struct mlx4_ib_dev *ibdev; int num_ports = 0; int i, j; int err; struct mlx4_ib_iboe *iboe; int dev_idx; pr_info_once("%s", mlx4_ib_version); mlx4_foreach_ib_transport_port(i, dev) num_ports++; /* No point in registering a device with no ports... */ if (num_ports == 0) return NULL; ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); if (!ibdev) { dev_err(&dev->pdev->dev, "Device struct alloc failed\n"); return NULL; } iboe = &ibdev->iboe; if (mlx4_pd_alloc(dev, &ibdev->priv_pdn)) goto err_dealloc; if (mlx4_uar_alloc(dev, &ibdev->priv_uar)) goto err_pd; ibdev->priv_uar.map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE); if (!ibdev->priv_uar.map) goto err_uar; MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock); ibdev->dev = dev; dev_idx = mlx4_ib_dev_idx(dev); if (dev_idx >= 0) sprintf(ibdev->ib_dev.name, "mlx4_%d", dev_idx); else strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); ibdev->ib_dev.owner = THIS_MODULE; ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; ibdev->num_ports = num_ports; ibdev->ib_dev.phys_port_cnt = ibdev->num_ports; ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dma_device = &dev->pdev->dev; if (dev->caps.userspace_caps) ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; else ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION; ibdev->ib_dev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | (1ull << IB_USER_VERBS_CMD_REG_MR) | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | (1ull << IB_USER_VERBS_CMD_QUERY_QP) | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | (1ull << IB_USER_VERBS_CMD_OPEN_QP); ibdev->ib_dev.query_device = mlx4_ib_query_device; ibdev->ib_dev.query_port = mlx4_ib_query_port; ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer; ibdev->ib_dev.query_gid = mlx4_ib_query_gid; ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey; ibdev->ib_dev.modify_device = mlx4_ib_modify_device; ibdev->ib_dev.modify_port = mlx4_ib_modify_port; ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext; ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext; ibdev->ib_dev.mmap = mlx4_ib_mmap; /* XXX FBSD has no support for get_unmapped_area function */ #if 0 ibdev->ib_dev.get_unmapped_area = mlx4_ib_get_unmapped_area; #endif ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd; ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd; ibdev->ib_dev.create_ah = mlx4_ib_create_ah; ibdev->ib_dev.query_ah = mlx4_ib_query_ah; ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah; ibdev->ib_dev.create_srq = mlx4_ib_create_srq; ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq; ibdev->ib_dev.query_srq = mlx4_ib_query_srq; ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq; ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv; ibdev->ib_dev.create_qp = mlx4_ib_create_qp; ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; ibdev->ib_dev.query_qp = mlx4_ib_query_qp; ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; ibdev->ib_dev.post_send = mlx4_ib_post_send; ibdev->ib_dev.post_recv = mlx4_ib_post_recv; ibdev->ib_dev.create_cq = mlx4_ib_create_cq; ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq; ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq; ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq; ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq; ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq; ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr; ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list; ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev; ibdev->ib_dev.ioctl = mlx4_ib_ioctl; ibdev->ib_dev.query_values = mlx4_ib_query_values; if (!mlx4_is_slave(ibdev->dev)) { ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW) { ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw; ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw; ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw; ibdev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd; ibdev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); } /* * Set experimental data */ ibdev->ib_dev.uverbs_exp_cmd_mask = (1ull << IB_USER_VERBS_EXP_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_EXP_CMD_MODIFY_CQ) | (1ull << IB_USER_VERBS_EXP_CMD_QUERY_DEVICE) | (1ull << IB_USER_VERBS_EXP_CMD_CREATE_CQ); ibdev->ib_dev.exp_create_qp = mlx4_ib_exp_create_qp; ibdev->ib_dev.exp_query_device = mlx4_ib_exp_query_device; if (check_flow_steering_support(dev)) { ibdev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); ibdev->ib_dev.create_flow = mlx4_ib_create_flow; ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow; } else { pr_debug("Device managed flow steering is unavailable for this configuration.\n"); } /* * End of experimental data */ mlx4_ib_alloc_eqs(dev, ibdev); spin_lock_init(&iboe->lock); if (init_node_data(ibdev)) goto err_map; for (i = 0; i < ibdev->num_ports; ++i) { if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) == IB_LINK_LAYER_ETHERNET) { if (mlx4_is_slave(dev)) { ibdev->counters[i].status = mlx4_counter_alloc(ibdev->dev, i + 1, &ibdev->counters[i].counter_index); } else {/* allocating the PF IB default counter indices reserved in mlx4_init_counters_table */ ibdev->counters[i].counter_index = ((i + 1) << 1) - 1; ibdev->counters[i].status = 0; } dev_info(&dev->pdev->dev, "%s: allocated counter index %d for port %d\n", __func__, ibdev->counters[i].counter_index, i+1); } else { ibdev->counters[i].counter_index = MLX4_SINK_COUNTER_INDEX; ibdev->counters[i].status = -ENOSPC; } } spin_lock_init(&ibdev->sm_lock); mutex_init(&ibdev->cap_mask_mutex); if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED && !mlx4_is_mfunc(dev)) { ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS; err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count, MLX4_IB_UC_STEER_QPN_ALIGN, &ibdev->steer_qpn_base, 0); if (err) goto err_counter; ibdev->ib_uc_qpns_bitmap = kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) * sizeof(long), GFP_KERNEL); if (!ibdev->ib_uc_qpns_bitmap) { dev_err(&dev->pdev->dev, "bit map alloc failed\n"); goto err_steer_qp_release; } bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count); err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_base + ibdev->steer_qpn_count - 1); if (err) goto err_steer_free_bitmap; } if (ib_register_device(&ibdev->ib_dev, NULL)) goto err_steer_free_bitmap; if (mlx4_ib_mad_init(ibdev)) goto err_reg; if (mlx4_ib_init_sriov(ibdev)) goto err_mad; if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) { if (!iboe->nb.notifier_call) { iboe->nb.notifier_call = mlx4_ib_netdev_event; err = register_netdevice_notifier(&iboe->nb); if (err) { iboe->nb.notifier_call = NULL; goto err_notify; } } if (!iboe->nb_inet.notifier_call) { iboe->nb_inet.notifier_call = mlx4_ib_inet_event; err = register_inetaddr_notifier(&iboe->nb_inet); if (err) { iboe->nb_inet.notifier_call = NULL; goto err_notify; } } mlx4_ib_scan_netdevs(ibdev, NULL, 0); } for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { if (device_create_file(&ibdev->ib_dev.dev, mlx4_class_attributes[j])) goto err_notify; } if (sysfs_create_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group)) goto err_notify; ibdev->ib_active = true; if (mlx4_is_mfunc(ibdev->dev)) init_pkeys(ibdev); /* create paravirt contexts for any VFs which are active */ if (mlx4_is_master(ibdev->dev)) { for (j = 0; j < MLX4_MFUNC_MAX; j++) { if (j == mlx4_master_func_num(ibdev->dev)) continue; if (mlx4_is_slave_active(ibdev->dev, j)) do_slave_init(ibdev, j, 1); } } return ibdev; err_notify: for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { device_remove_file(&ibdev->ib_dev.dev, mlx4_class_attributes[j]); } if (ibdev->iboe.nb.notifier_call) { if (unregister_netdevice_notifier(&ibdev->iboe.nb)) pr_warn("failure unregistering notifier\n"); ibdev->iboe.nb.notifier_call = NULL; } if (ibdev->iboe.nb_inet.notifier_call) { if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet)) pr_warn("failure unregistering notifier\n"); ibdev->iboe.nb_inet.notifier_call = NULL; } flush_workqueue(wq); mlx4_ib_close_sriov(ibdev); err_mad: mlx4_ib_mad_cleanup(ibdev); err_reg: ib_unregister_device(&ibdev->ib_dev); err_steer_free_bitmap: kfree(ibdev->ib_uc_qpns_bitmap); err_steer_qp_release: if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) mlx4_qp_release_range(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_count); err_counter: for (; i; --i) { if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i) == IB_LINK_LAYER_ETHERNET) { mlx4_counter_free(ibdev->dev, i, ibdev->counters[i - 1].counter_index); } } err_map: iounmap(ibdev->priv_uar.map); mlx4_ib_free_eqs(dev, ibdev); err_uar: mlx4_uar_free(dev, &ibdev->priv_uar); err_pd: mlx4_pd_free(dev, ibdev->priv_pdn); err_dealloc: ib_dealloc_device(&ibdev->ib_dev); return NULL; } int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn) { int offset; WARN_ON(!dev->ib_uc_qpns_bitmap); offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap, dev->steer_qpn_count, get_count_order(count)); if (offset < 0) return offset; *qpn = dev->steer_qpn_base + offset; return 0; } void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count) { if (!qpn || dev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) return; BUG_ON(qpn < dev->steer_qpn_base); bitmap_release_region(dev->ib_uc_qpns_bitmap, qpn - dev->steer_qpn_base, get_count_order(count)); } int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, int is_attach) { int err; size_t flow_size; struct ib_flow_attr *flow = NULL; struct ib_flow_spec_ib *ib_spec; if (is_attach) { flow_size = sizeof(struct ib_flow_attr) + sizeof(struct ib_flow_spec_ib); flow = kzalloc(flow_size, GFP_KERNEL); if (!flow) return -ENOMEM; flow->port = mqp->port; flow->num_of_specs = 1; flow->size = flow_size; ib_spec = (struct ib_flow_spec_ib *)(flow + 1); ib_spec->type = IB_FLOW_SPEC_IB; ib_spec->size = sizeof(struct ib_flow_spec_ib); ib_spec->val.l3_type_qpn = mqp->ibqp.qp_num; ib_spec->mask.l3_type_qpn = MLX4_IB_FLOW_QPN_MASK; err = __mlx4_ib_create_flow(&mqp->ibqp, flow, IB_FLOW_DOMAIN_NIC, MLX4_FS_REGULAR, &mqp->reg_id); } else { err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id); } kfree(flow); return err; } static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) { struct mlx4_ib_dev *ibdev = ibdev_ptr; int p, j; int dev_idx, ret; if (ibdev->iboe.nb_inet.notifier_call) { if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet)) pr_warn("failure unregistering notifier\n"); ibdev->iboe.nb_inet.notifier_call = NULL; } mlx4_ib_close_sriov(ibdev); sysfs_remove_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group); mlx4_ib_mad_cleanup(ibdev); for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { device_remove_file(&ibdev->ib_dev.dev, mlx4_class_attributes[j]); } dev_idx = -1; if (dr_active && !(ibdev->dev->flags & MLX4_FLAG_DEV_NUM_STR)) { ret = sscanf(ibdev->ib_dev.name, "mlx4_%d", &dev_idx); if (ret != 1) dev_idx = -1; } ib_unregister_device(&ibdev->ib_dev); if (dev_idx >= 0) { spin_lock(&dev_num_str_lock); bitmap_release_region(dev_num_str_bitmap, dev_idx, 0); spin_unlock(&dev_num_str_lock); } if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { mlx4_qp_release_range(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_count); kfree(ibdev->ib_uc_qpns_bitmap); } if (ibdev->iboe.nb.notifier_call) { if (unregister_netdevice_notifier(&ibdev->iboe.nb)) pr_warn("failure unregistering notifier\n"); ibdev->iboe.nb.notifier_call = NULL; } iounmap(ibdev->priv_uar.map); for (p = 0; p < ibdev->num_ports; ++p) { if (mlx4_ib_port_link_layer(&ibdev->ib_dev, p + 1) == IB_LINK_LAYER_ETHERNET) { mlx4_counter_free(ibdev->dev, p + 1, ibdev->counters[p].counter_index); } } mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) mlx4_CLOSE_PORT(dev, p); mlx4_ib_free_eqs(dev, ibdev); mlx4_uar_free(dev, &ibdev->priv_uar); mlx4_pd_free(dev, ibdev->priv_pdn); ib_dealloc_device(&ibdev->ib_dev); } static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) { struct mlx4_ib_demux_work **dm = NULL; struct mlx4_dev *dev = ibdev->dev; int i; unsigned long flags; if (!mlx4_is_master(dev)) return; dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC); if (!dm) { pr_err("failed to allocate memory for tunneling qp update\n"); goto out; } for (i = 0; i < dev->caps.num_ports; i++) { dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC); if (!dm[i]) { pr_err("failed to allocate memory for tunneling qp update work struct\n"); for (i = 0; i < dev->caps.num_ports; i++) { if (dm[i]) kfree(dm[i]); } goto out; } } /* initialize or tear down tunnel QPs for the slave */ for (i = 0; i < dev->caps.num_ports; i++) { INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work); dm[i]->port = i + 1; dm[i]->slave = slave; dm[i]->do_init = do_init; dm[i]->dev = ibdev; spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); if (!ibdev->sriov.is_going_down) queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work); spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); } out: if (dm) kfree(dm); return; } static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, enum mlx4_dev_event event, unsigned long param) { struct ib_event ibev; struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); struct mlx4_eqe *eqe = NULL; struct ib_event_work *ew; int p = 0; if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) eqe = (struct mlx4_eqe *)param; else p = (int) param; switch (event) { case MLX4_DEV_EVENT_PORT_UP: if (p > ibdev->num_ports) return; if (mlx4_is_master(dev) && rdma_port_get_link_layer(&ibdev->ib_dev, p) == IB_LINK_LAYER_INFINIBAND) { mlx4_ib_invalidate_all_guid_record(ibdev, p); } mlx4_ib_info((struct ib_device *) ibdev_ptr, "Port %d logical link is up\n", p); ibev.event = IB_EVENT_PORT_ACTIVE; break; case MLX4_DEV_EVENT_PORT_DOWN: if (p > ibdev->num_ports) return; mlx4_ib_info((struct ib_device *) ibdev_ptr, "Port %d logical link is down\n", p); ibev.event = IB_EVENT_PORT_ERR; break; case MLX4_DEV_EVENT_CATASTROPHIC_ERROR: ibdev->ib_active = false; ibev.event = IB_EVENT_DEVICE_FATAL; break; case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: ew = kmalloc(sizeof *ew, GFP_ATOMIC); if (!ew) { pr_err("failed to allocate memory for events work\n"); break; } INIT_WORK(&ew->work, handle_port_mgmt_change_event); memcpy(&ew->ib_eqe, eqe, sizeof *eqe); ew->ib_dev = ibdev; /* need to queue only for port owner, which uses GEN_EQE */ if (mlx4_is_master(dev)) queue_work(wq, &ew->work); else handle_port_mgmt_change_event(&ew->work); return; case MLX4_DEV_EVENT_SLAVE_INIT: /* here, p is the slave id */ do_slave_init(ibdev, p, 1); return; case MLX4_DEV_EVENT_SLAVE_SHUTDOWN: /* here, p is the slave id */ do_slave_init(ibdev, p, 0); return; default: return; } ibev.device = ibdev_ptr; ibev.element.port_num = (u8) p; ib_dispatch_event(&ibev); } static struct mlx4_interface mlx4_ib_interface = { .add = mlx4_ib_add, .remove = mlx4_ib_remove, .event = mlx4_ib_event, .protocol = MLX4_PROT_IB_IPV6 }; static int __init mlx4_ib_init(void) { int err; wq = create_singlethread_workqueue("mlx4_ib"); if (!wq) return -ENOMEM; err = mlx4_ib_mcg_init(); if (err) goto clean_proc; init_dev_assign(); err = mlx4_register_interface(&mlx4_ib_interface); if (err) goto clean_mcg; return 0; clean_mcg: mlx4_ib_mcg_destroy(); clean_proc: destroy_workqueue(wq); return err; } static void __exit mlx4_ib_cleanup(void) { mlx4_unregister_interface(&mlx4_ib_interface); mlx4_ib_mcg_destroy(); destroy_workqueue(wq); kfree(dev_num_str_bitmap); } module_init_order(mlx4_ib_init, SI_ORDER_MIDDLE); module_exit(mlx4_ib_cleanup); static int mlx4ib_evhand(module_t mod, int event, void *arg) { return (0); } static moduledata_t mlx4ib_mod = { .name = "mlx4ib", .evhand = mlx4ib_evhand, }; DECLARE_MODULE(mlx4ib, mlx4ib_mod, SI_SUB_LAST, SI_ORDER_ANY); MODULE_DEPEND(mlx4ib, mlx4, 1, 1, 1); MODULE_DEPEND(mlx4ib, ibcore, 1, 1, 1); MODULE_DEPEND(mlx4ib, linuxkpi, 1, 1, 1); Index: head/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c =================================================================== --- head/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c (revision 299178) +++ head/sys/ofed/drivers/infiniband/hw/mlx4/mcg.c (revision 299179) @@ -1,1260 +1,1260 @@ /* * Copyright (c) 2012 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include "mlx4_ib.h" #define MAX_VFS 80 #define MAX_PEND_REQS_PER_FUNC 4 #define MAD_TIMEOUT_MS 2000 #define mcg_warn(fmt, arg...) pr_warn("MCG WARNING: " fmt, ##arg) #define mcg_error(fmt, arg...) pr_err(fmt, ##arg) #define mcg_warn_group(group, format, arg...) \ pr_warn("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\ (group)->name, group->demux->port, ## arg) #define mcg_error_group(group, format, arg...) \ pr_err(" %16s: " format, (group)->name, ## arg) static union ib_gid mgid0; static struct workqueue_struct *clean_wq; enum mcast_state { MCAST_NOT_MEMBER = 0, MCAST_MEMBER, }; enum mcast_group_state { MCAST_IDLE, MCAST_JOIN_SENT, MCAST_LEAVE_SENT, MCAST_RESP_READY }; struct mcast_member { enum mcast_state state; uint8_t join_state; int num_pend_reqs; struct list_head pending; }; struct ib_sa_mcmember_data { union ib_gid mgid; union ib_gid port_gid; __be32 qkey; __be16 mlid; u8 mtusel_mtu; u8 tclass; __be16 pkey; u8 ratesel_rate; u8 lifetmsel_lifetm; __be32 sl_flowlabel_hoplimit; u8 scope_join_state; u8 proxy_join; u8 reserved[2]; }; struct mcast_group { struct ib_sa_mcmember_data rec; struct rb_node node; struct list_head mgid0_list; struct mlx4_ib_demux_ctx *demux; struct mcast_member func[MAX_VFS]; struct mutex lock; struct work_struct work; struct list_head pending_list; int members[3]; enum mcast_group_state state; enum mcast_group_state prev_state; struct ib_sa_mad response_sa_mad; __be64 last_req_tid; char name[33]; /* MGID string */ struct device_attribute dentry; /* refcount is the reference count for the following: 1. Each queued request 2. Each invocation of the worker thread 3. Membership of the port at the SA */ atomic_t refcount; /* delayed work to clean pending SM request */ struct delayed_work timeout_work; struct list_head cleanup_list; }; struct mcast_req { int func; struct ib_sa_mad sa_mad; struct list_head group_list; struct list_head func_list; struct mcast_group *group; int clean; }; #define safe_atomic_dec(ref) \ do {\ if (atomic_dec_and_test(ref)) \ mcg_warn_group(group, "did not expect to reach zero\n"); \ } while (0) static const char *get_state_string(enum mcast_group_state state) { switch (state) { case MCAST_IDLE: return "MCAST_IDLE"; case MCAST_JOIN_SENT: return "MCAST_JOIN_SENT"; case MCAST_LEAVE_SENT: return "MCAST_LEAVE_SENT"; case MCAST_RESP_READY: return "MCAST_RESP_READY"; } return "Invalid State"; } static struct mcast_group *mcast_find(struct mlx4_ib_demux_ctx *ctx, union ib_gid *mgid) { struct rb_node *node = ctx->mcg_table.rb_node; struct mcast_group *group; int ret; while (node) { group = rb_entry(node, struct mcast_group, node); ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid); if (!ret) return group; if (ret < 0) node = node->rb_left; else node = node->rb_right; } return NULL; } static struct mcast_group *mcast_insert(struct mlx4_ib_demux_ctx *ctx, struct mcast_group *group) { struct rb_node **link = &ctx->mcg_table.rb_node; struct rb_node *parent = NULL; struct mcast_group *cur_group; int ret; while (*link) { parent = *link; cur_group = rb_entry(parent, struct mcast_group, node); ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw, sizeof group->rec.mgid); if (ret < 0) link = &(*link)->rb_left; else if (ret > 0) link = &(*link)->rb_right; else return cur_group; } rb_link_node(&group->node, parent, link); rb_insert_color(&group->node, &ctx->mcg_table); return NULL; } static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad) { struct mlx4_ib_dev *dev = ctx->dev; struct ib_ah_attr ah_attr; spin_lock(&dev->sm_lock); if (!dev->sm_ah[ctx->port - 1]) { /* port is not yet Active, sm_ah not ready */ spin_unlock(&dev->sm_lock); return -EAGAIN; } mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr); spin_unlock(&dev->sm_lock); return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, 0, mad); } static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad) { struct mlx4_ib_dev *dev = ctx->dev; struct ib_mad_agent *agent = dev->send_agent[ctx->port - 1][1]; struct ib_wc wc; struct ib_ah_attr ah_attr; /* Our agent might not yet be registered when mads start to arrive */ if (!agent) return -EAGAIN; ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr); if (ib_find_cached_pkey(&dev->ib_dev, ctx->port, IB_DEFAULT_PKEY_FULL, &wc.pkey_index)) return -EINVAL; wc.sl = 0; wc.dlid_path_bits = 0; wc.port_num = ctx->port; wc.slid = ah_attr.dlid; /* opensm lid */ wc.src_qp = 1; return mlx4_ib_send_to_slave(dev, slave, ctx->port, IB_QPT_GSI, &wc, NULL, mad); } static int send_join_to_wire(struct mcast_group *group, struct ib_sa_mad *sa_mad) { struct ib_sa_mad mad; struct ib_sa_mcmember_data *sa_mad_data = (struct ib_sa_mcmember_data *)&mad.data; int ret; /* we rely on a mad request as arrived from a VF */ memcpy(&mad, sa_mad, sizeof mad); /* fix port GID to be the real one (slave 0) */ sa_mad_data->port_gid.global.interface_id = group->demux->guid_cache[0]; /* assign our own TID */ mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux); group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */ ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad); /* set timeout handler */ if (!ret) { /* calls mlx4_ib_mcg_timeout_handler */ queue_delayed_work(group->demux->mcg_wq, &group->timeout_work, msecs_to_jiffies(MAD_TIMEOUT_MS)); } return ret; } static int send_leave_to_wire(struct mcast_group *group, u8 join_state) { struct ib_sa_mad mad; struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data; int ret; memset(&mad, 0, sizeof mad); mad.mad_hdr.base_version = 1; mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; mad.mad_hdr.class_version = 2; mad.mad_hdr.method = IB_SA_METHOD_DELETE; mad.mad_hdr.status = cpu_to_be16(0); mad.mad_hdr.class_specific = cpu_to_be16(0); mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux); group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */ mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); mad.mad_hdr.attr_mod = cpu_to_be32(0); mad.sa_hdr.sm_key = 0x0; mad.sa_hdr.attr_offset = cpu_to_be16(7); mad.sa_hdr.comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_JOIN_STATE; *sa_data = group->rec; sa_data->scope_join_state = join_state; ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad); if (ret) group->state = MCAST_IDLE; /* set timeout handler */ if (!ret) { /* calls mlx4_ib_mcg_timeout_handler */ queue_delayed_work(group->demux->mcg_wq, &group->timeout_work, msecs_to_jiffies(MAD_TIMEOUT_MS)); } return ret; } static int send_reply_to_slave(int slave, struct mcast_group *group, struct ib_sa_mad *req_sa_mad, u16 status) { struct ib_sa_mad mad; struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data; struct ib_sa_mcmember_data *req_sa_data = (struct ib_sa_mcmember_data *)&req_sa_mad->data; int ret; memset(&mad, 0, sizeof mad); mad.mad_hdr.base_version = 1; mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; mad.mad_hdr.class_version = 2; mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP; mad.mad_hdr.status = cpu_to_be16(status); mad.mad_hdr.class_specific = cpu_to_be16(0); mad.mad_hdr.tid = req_sa_mad->mad_hdr.tid; *(u8 *)&mad.mad_hdr.tid = 0; /* resetting tid to 0 */ mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); mad.mad_hdr.attr_mod = cpu_to_be32(0); mad.sa_hdr.sm_key = req_sa_mad->sa_hdr.sm_key; mad.sa_hdr.attr_offset = cpu_to_be16(7); mad.sa_hdr.comp_mask = 0; /* ignored on responses, see IBTA spec */ *sa_data = group->rec; /* reconstruct VF's requested join_state and port_gid */ sa_data->scope_join_state &= 0xf0; sa_data->scope_join_state |= (group->func[slave].join_state & 0x0f); memcpy(&sa_data->port_gid, &req_sa_data->port_gid, sizeof req_sa_data->port_gid); ret = send_mad_to_slave(slave, group->demux, (struct ib_mad *)&mad); return ret; } static int check_selector(ib_sa_comp_mask comp_mask, ib_sa_comp_mask selector_mask, ib_sa_comp_mask value_mask, u8 src_value, u8 dst_value) { int err; u8 selector = dst_value >> 6; dst_value &= 0x3f; src_value &= 0x3f; if (!(comp_mask & selector_mask) || !(comp_mask & value_mask)) return 0; switch (selector) { case IB_SA_GT: err = (src_value <= dst_value); break; case IB_SA_LT: err = (src_value >= dst_value); break; case IB_SA_EQ: err = (src_value != dst_value); break; default: err = 0; break; } return err; } static u16 cmp_rec(struct ib_sa_mcmember_data *src, struct ib_sa_mcmember_data *dst, ib_sa_comp_mask comp_mask) { /* src is group record, dst is request record */ /* MGID must already match */ /* Port_GID we always replace to our Port_GID, so it is a match */ #define MAD_STATUS_REQ_INVALID 0x0200 if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey) return MAD_STATUS_REQ_INVALID; if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid) return MAD_STATUS_REQ_INVALID; if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR, IB_SA_MCMEMBER_REC_MTU, src->mtusel_mtu, dst->mtusel_mtu)) return MAD_STATUS_REQ_INVALID; if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS && src->tclass != dst->tclass) return MAD_STATUS_REQ_INVALID; if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey) return MAD_STATUS_REQ_INVALID; if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR, IB_SA_MCMEMBER_REC_RATE, src->ratesel_rate, dst->ratesel_rate)) return MAD_STATUS_REQ_INVALID; if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR, IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME, src->lifetmsel_lifetm, dst->lifetmsel_lifetm)) return MAD_STATUS_REQ_INVALID; if (comp_mask & IB_SA_MCMEMBER_REC_SL && (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0xf0000000) != (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0xf0000000)) return MAD_STATUS_REQ_INVALID; if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL && (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x0fffff00) != (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x0fffff00)) return MAD_STATUS_REQ_INVALID; if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT && (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x000000ff) != (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x000000ff)) return MAD_STATUS_REQ_INVALID; if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && (src->scope_join_state & 0xf0) != (dst->scope_join_state & 0xf0)) return MAD_STATUS_REQ_INVALID; /* join_state checked separately, proxy_join ignored */ return 0; } /* release group, return 1 if this was last release and group is destroyed * timout work is canceled sync */ static int release_group(struct mcast_group *group, int from_timeout_handler) { struct mlx4_ib_demux_ctx *ctx = group->demux; int nzgroup; mutex_lock(&ctx->mcg_table_lock); mutex_lock(&group->lock); if (atomic_dec_and_test(&group->refcount)) { if (!from_timeout_handler) { if (group->state != MCAST_IDLE && !cancel_delayed_work(&group->timeout_work)) { atomic_inc(&group->refcount); mutex_unlock(&group->lock); mutex_unlock(&ctx->mcg_table_lock); return 0; } } nzgroup = memcmp(&group->rec.mgid, &mgid0, sizeof mgid0); if (nzgroup) del_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr); if (!list_empty(&group->pending_list)) mcg_warn_group(group, "releasing a group with non empty pending list\n"); if (nzgroup) rb_erase(&group->node, &ctx->mcg_table); list_del_init(&group->mgid0_list); mutex_unlock(&group->lock); mutex_unlock(&ctx->mcg_table_lock); kfree(group); return 1; } else { mutex_unlock(&group->lock); mutex_unlock(&ctx->mcg_table_lock); } return 0; } static void adjust_membership(struct mcast_group *group, u8 join_state, int inc) { int i; for (i = 0; i < 3; i++, join_state >>= 1) if (join_state & 0x1) group->members[i] += inc; } static u8 get_leave_state(struct mcast_group *group) { u8 leave_state = 0; int i; for (i = 0; i < 3; i++) if (!group->members[i]) leave_state |= (1 << i); return leave_state & (group->rec.scope_join_state & 7); } static int join_group(struct mcast_group *group, int slave, u8 join_mask) { int ret = 0; u8 join_state; /* remove bits that slave is already member of, and adjust */ join_state = join_mask & (~group->func[slave].join_state); adjust_membership(group, join_state, 1); group->func[slave].join_state |= join_state; if (group->func[slave].state != MCAST_MEMBER && join_state) { group->func[slave].state = MCAST_MEMBER; ret = 1; } return ret; } static int leave_group(struct mcast_group *group, int slave, u8 leave_state) { int ret = 0; adjust_membership(group, leave_state, -1); group->func[slave].join_state &= ~leave_state; if (!group->func[slave].join_state) { group->func[slave].state = MCAST_NOT_MEMBER; ret = 1; } return ret; } static int check_leave(struct mcast_group *group, int slave, u8 leave_mask) { if (group->func[slave].state != MCAST_MEMBER) return MAD_STATUS_REQ_INVALID; /* make sure we're not deleting unset bits */ if (~group->func[slave].join_state & leave_mask) return MAD_STATUS_REQ_INVALID; if (!leave_mask) return MAD_STATUS_REQ_INVALID; return 0; } static void mlx4_ib_mcg_timeout_handler(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); struct mcast_group *group; struct mcast_req *req = NULL; group = container_of(delay, typeof(*group), timeout_work); mutex_lock(&group->lock); if (group->state == MCAST_JOIN_SENT) { if (!list_empty(&group->pending_list)) { req = list_first_entry(&group->pending_list, struct mcast_req, group_list); list_del(&req->group_list); list_del(&req->func_list); --group->func[req->func].num_pend_reqs; mutex_unlock(&group->lock); kfree(req); if (memcmp(&group->rec.mgid, &mgid0, sizeof mgid0)) { if (release_group(group, 1)) return; } else { kfree(group); return; } mutex_lock(&group->lock); } else mcg_warn_group(group, "DRIVER BUG\n"); } else if (group->state == MCAST_LEAVE_SENT) { if (group->rec.scope_join_state & 7) group->rec.scope_join_state &= 0xf8; group->state = MCAST_IDLE; mutex_unlock(&group->lock); if (release_group(group, 1)) return; mutex_lock(&group->lock); } else mcg_warn_group(group, "invalid state %s\n", get_state_string(group->state)); group->state = MCAST_IDLE; atomic_inc(&group->refcount); if (!queue_work(group->demux->mcg_wq, &group->work)) safe_atomic_dec(&group->refcount); mutex_unlock(&group->lock); } static int handle_leave_req(struct mcast_group *group, u8 leave_mask, struct mcast_req *req) { u16 status; if (req->clean) leave_mask = group->func[req->func].join_state; status = check_leave(group, req->func, leave_mask); if (!status) leave_group(group, req->func, leave_mask); if (!req->clean) send_reply_to_slave(req->func, group, &req->sa_mad, status); --group->func[req->func].num_pend_reqs; list_del(&req->group_list); list_del(&req->func_list); kfree(req); return 1; } static int handle_join_req(struct mcast_group *group, u8 join_mask, struct mcast_req *req) { u8 group_join_state = group->rec.scope_join_state & 7; int ref = 0; u16 status; struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; if (join_mask == (group_join_state & join_mask)) { /* port's membership need not change */ status = cmp_rec(&group->rec, sa_data, req->sa_mad.sa_hdr.comp_mask); if (!status) join_group(group, req->func, join_mask); --group->func[req->func].num_pend_reqs; send_reply_to_slave(req->func, group, &req->sa_mad, status); list_del(&req->group_list); list_del(&req->func_list); kfree(req); ++ref; } else { /* port's membership needs to be updated */ group->prev_state = group->state; if (send_join_to_wire(group, &req->sa_mad)) { --group->func[req->func].num_pend_reqs; list_del(&req->group_list); list_del(&req->func_list); kfree(req); ref = 1; group->state = group->prev_state; } else group->state = MCAST_JOIN_SENT; } return ref; } static void mlx4_ib_mcg_work_handler(struct work_struct *work) { struct mcast_group *group; struct mcast_req *req = NULL; struct ib_sa_mcmember_data *sa_data; u8 req_join_state; int rc = 1; /* release_count - this is for the scheduled work */ u16 status; u8 method; group = container_of(work, typeof(*group), work); mutex_lock(&group->lock); /* First, let's see if a response from SM is waiting regarding this group. * If so, we need to update the group's REC. If this is a bad response, we * may need to send a bad response to a VF waiting for it. If VF is waiting * and this is a good response, the VF will be answered later in this func. */ if (group->state == MCAST_RESP_READY) { /* cancels mlx4_ib_mcg_timeout_handler */ cancel_delayed_work(&group->timeout_work); status = be16_to_cpu(group->response_sa_mad.mad_hdr.status); method = group->response_sa_mad.mad_hdr.method; if (group->last_req_tid != group->response_sa_mad.mad_hdr.tid) { mcg_warn_group(group, "Got MAD response to existing MGID but wrong TID, dropping. Resp TID=%llx, group TID=%llx\n", (long long)be64_to_cpu( group->response_sa_mad.mad_hdr.tid), (long long)be64_to_cpu(group->last_req_tid)); group->state = group->prev_state; goto process_requests; } if (status) { if (!list_empty(&group->pending_list)) req = list_first_entry(&group->pending_list, struct mcast_req, group_list); if (method == IB_MGMT_METHOD_GET_RESP) { if (req) { send_reply_to_slave(req->func, group, &req->sa_mad, status); --group->func[req->func].num_pend_reqs; list_del(&req->group_list); list_del(&req->func_list); kfree(req); ++rc; } else mcg_warn_group(group, "no request for failed join\n"); } else if (method == IB_SA_METHOD_DELETE_RESP && group->demux->flushing) ++rc; } else { u8 resp_join_state; u8 cur_join_state; resp_join_state = ((struct ib_sa_mcmember_data *) group->response_sa_mad.data)->scope_join_state & 7; cur_join_state = group->rec.scope_join_state & 7; if (method == IB_MGMT_METHOD_GET_RESP) { - /* successfull join */ + /* successful join */ if (!cur_join_state && resp_join_state) --rc; } else if (!resp_join_state) ++rc; memcpy(&group->rec, group->response_sa_mad.data, sizeof group->rec); } group->state = MCAST_IDLE; } process_requests: /* We should now go over pending join/leave requests, as long as we are idle. */ while (!list_empty(&group->pending_list) && group->state == MCAST_IDLE) { req = list_first_entry(&group->pending_list, struct mcast_req, group_list); sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; req_join_state = sa_data->scope_join_state & 0x7; /* For a leave request, we will immediately answer the VF, and * update our internal counters. The actual leave will be sent * to SM later, if at all needed. We dequeue the request now. */ if (req->sa_mad.mad_hdr.method == IB_SA_METHOD_DELETE) rc += handle_leave_req(group, req_join_state, req); else rc += handle_join_req(group, req_join_state, req); } /* Handle leaves */ if (group->state == MCAST_IDLE) { req_join_state = get_leave_state(group); if (req_join_state) { group->rec.scope_join_state &= ~req_join_state; group->prev_state = group->state; if (send_leave_to_wire(group, req_join_state)) { group->state = group->prev_state; ++rc; } else group->state = MCAST_LEAVE_SENT; } } if (!list_empty(&group->pending_list) && group->state == MCAST_IDLE) goto process_requests; mutex_unlock(&group->lock); while (rc--) release_group(group, 0); } static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx *ctx, __be64 tid, union ib_gid *new_mgid) { struct mcast_group *group = NULL, *cur_group; struct mcast_req *req; struct list_head *pos; struct list_head *n; mutex_lock(&ctx->mcg_table_lock); list_for_each_safe(pos, n, &ctx->mcg_mgid0_list) { group = list_entry(pos, struct mcast_group, mgid0_list); mutex_lock(&group->lock); if (group->last_req_tid == tid) { if (memcmp(new_mgid, &mgid0, sizeof mgid0)) { group->rec.mgid = *new_mgid; sprintf(group->name, "%016llx%016llx", (long long)be64_to_cpu(group->rec.mgid.global.subnet_prefix), (long long)be64_to_cpu(group->rec.mgid.global.interface_id)); list_del_init(&group->mgid0_list); cur_group = mcast_insert(ctx, group); if (cur_group) { /* A race between our code and SM. Silently cleaning the new one */ req = list_first_entry(&group->pending_list, struct mcast_req, group_list); --group->func[req->func].num_pend_reqs; list_del(&req->group_list); list_del(&req->func_list); kfree(req); mutex_unlock(&group->lock); mutex_unlock(&ctx->mcg_table_lock); release_group(group, 0); return NULL; } atomic_inc(&group->refcount); add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr); mutex_unlock(&group->lock); mutex_unlock(&ctx->mcg_table_lock); return group; } else { struct mcast_req *tmp1, *tmp2; list_del(&group->mgid0_list); if (!list_empty(&group->pending_list) && group->state != MCAST_IDLE) cancel_delayed_work_sync(&group->timeout_work); list_for_each_entry_safe(tmp1, tmp2, &group->pending_list, group_list) { list_del(&tmp1->group_list); kfree(tmp1); } mutex_unlock(&group->lock); mutex_unlock(&ctx->mcg_table_lock); kfree(group); return NULL; } } mutex_unlock(&group->lock); } mutex_unlock(&ctx->mcg_table_lock); return NULL; } static ssize_t sysfs_show_group(struct device *dev, struct device_attribute *attr, char *buf); static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx, union ib_gid *mgid, int create, gfp_t gfp_mask) { struct mcast_group *group, *cur_group; int is_mgid0; int i; is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0); if (!is_mgid0) { group = mcast_find(ctx, mgid); if (group) goto found; } if (!create) return ERR_PTR(-ENOENT); group = kzalloc(sizeof *group, gfp_mask); if (!group) return ERR_PTR(-ENOMEM); group->demux = ctx; group->rec.mgid = *mgid; INIT_LIST_HEAD(&group->pending_list); INIT_LIST_HEAD(&group->mgid0_list); for (i = 0; i < MAX_VFS; ++i) INIT_LIST_HEAD(&group->func[i].pending); INIT_WORK(&group->work, mlx4_ib_mcg_work_handler); INIT_DELAYED_WORK(&group->timeout_work, mlx4_ib_mcg_timeout_handler); mutex_init(&group->lock); sprintf(group->name, "%016llx%016llx", (long long)be64_to_cpu( group->rec.mgid.global.subnet_prefix), (long long)be64_to_cpu( group->rec.mgid.global.interface_id)); sysfs_attr_init(&group->dentry.attr); group->dentry.show = sysfs_show_group; group->dentry.store = NULL; group->dentry.attr.name = group->name; group->dentry.attr.mode = 0400; group->state = MCAST_IDLE; if (is_mgid0) { list_add(&group->mgid0_list, &ctx->mcg_mgid0_list); goto found; } cur_group = mcast_insert(ctx, group); if (cur_group) { mcg_warn("group just showed up %s - confused\n", cur_group->name); kfree(group); return ERR_PTR(-EINVAL); } add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr); found: atomic_inc(&group->refcount); return group; } static void queue_req(struct mcast_req *req) { struct mcast_group *group = req->group; atomic_inc(&group->refcount); /* for the request */ atomic_inc(&group->refcount); /* for scheduling the work */ list_add_tail(&req->group_list, &group->pending_list); list_add_tail(&req->func_list, &group->func[req->func].pending); /* calls mlx4_ib_mcg_work_handler */ if (!queue_work(group->demux->mcg_wq, &group->work)) safe_atomic_dec(&group->refcount); } int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave, struct ib_sa_mad *mad) { struct mlx4_ib_dev *dev = to_mdev(ibdev); struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)mad->data; struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1]; struct mcast_group *group; switch (mad->mad_hdr.method) { case IB_MGMT_METHOD_GET_RESP: case IB_SA_METHOD_DELETE_RESP: mutex_lock(&ctx->mcg_table_lock); group = acquire_group(ctx, &rec->mgid, 0, GFP_KERNEL); mutex_unlock(&ctx->mcg_table_lock); if (IS_ERR(group)) { if (mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP) { __be64 tid = mad->mad_hdr.tid; *(u8 *)(&tid) = (u8)slave; /* in group we kept the modified TID */ group = search_relocate_mgid0_group(ctx, tid, &rec->mgid); } else group = NULL; } if (!group) return 1; mutex_lock(&group->lock); group->response_sa_mad = *mad; group->prev_state = group->state; group->state = MCAST_RESP_READY; /* calls mlx4_ib_mcg_work_handler */ atomic_inc(&group->refcount); if (!queue_work(ctx->mcg_wq, &group->work)) safe_atomic_dec(&group->refcount); mutex_unlock(&group->lock); release_group(group, 0); return 1; /* consumed */ case IB_MGMT_METHOD_SET: case IB_SA_METHOD_GET_TABLE: case IB_SA_METHOD_GET_TABLE_RESP: case IB_SA_METHOD_DELETE: return 0; /* not consumed, pass-through to guest over tunnel */ default: mcg_warn("In demux, port %d: unexpected MCMember method: 0x%x, dropping\n", port, mad->mad_hdr.method); return 1; /* consumed */ } } int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, int slave, struct ib_sa_mad *sa_mad) { struct mlx4_ib_dev *dev = to_mdev(ibdev); struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)sa_mad->data; struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1]; struct mcast_group *group; struct mcast_req *req; int may_create = 0; if (ctx->flushing) return -EAGAIN; switch (sa_mad->mad_hdr.method) { case IB_MGMT_METHOD_SET: may_create = 1; case IB_SA_METHOD_DELETE: req = kzalloc(sizeof *req, GFP_KERNEL); if (!req) return -ENOMEM; req->func = slave; req->sa_mad = *sa_mad; mutex_lock(&ctx->mcg_table_lock); group = acquire_group(ctx, &rec->mgid, may_create, GFP_KERNEL); mutex_unlock(&ctx->mcg_table_lock); if (IS_ERR(group)) { kfree(req); return PTR_ERR(group); } mutex_lock(&group->lock); if (group->func[slave].num_pend_reqs > MAX_PEND_REQS_PER_FUNC) { mutex_unlock(&group->lock); mcg_warn_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n", port, slave, MAX_PEND_REQS_PER_FUNC); release_group(group, 0); kfree(req); return -ENOMEM; } ++group->func[slave].num_pend_reqs; req->group = group; queue_req(req); mutex_unlock(&group->lock); release_group(group, 0); return 1; /* consumed */ case IB_SA_METHOD_GET_TABLE: case IB_MGMT_METHOD_GET_RESP: case IB_SA_METHOD_GET_TABLE_RESP: case IB_SA_METHOD_DELETE_RESP: return 0; /* not consumed, pass-through */ default: mcg_warn("In multiplex, port %d, func %d: unexpected MCMember method: 0x%x, dropping\n", port, slave, sa_mad->mad_hdr.method); return 1; /* consumed */ } } static ssize_t sysfs_show_group(struct device *dev, struct device_attribute *attr, char *buf) { struct mcast_group *group = container_of(attr, struct mcast_group, dentry); struct mcast_req *req = NULL; char pending_str[40]; char state_str[40]; ssize_t len = 0; int f; if (group->state == MCAST_IDLE) sprintf(state_str, "%s", get_state_string(group->state)); else sprintf(state_str, "%s(TID=0x%llx)", get_state_string(group->state), (long long)be64_to_cpu(group->last_req_tid)); if (list_empty(&group->pending_list)) { sprintf(pending_str, "No"); } else { req = list_first_entry(&group->pending_list, struct mcast_req, group_list); sprintf(pending_str, "Yes(TID=0x%llx)", (long long)be64_to_cpu( req->sa_mad.mad_hdr.tid)); } len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ", group->rec.scope_join_state & 0xf, group->members[2], group->members[1], group->members[0], atomic_read(&group->refcount), pending_str, state_str); for (f = 0; f < MAX_VFS; ++f) if (group->func[f].state == MCAST_MEMBER) len += sprintf(buf + len, "%d[%1x] ", f, group->func[f].join_state); len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x " "%4x %4x %2x %2x)\n", be16_to_cpu(group->rec.pkey), be32_to_cpu(group->rec.qkey), (group->rec.mtusel_mtu & 0xc0) >> 6, group->rec.mtusel_mtu & 0x3f, group->rec.tclass, (group->rec.ratesel_rate & 0xc0) >> 6, group->rec.ratesel_rate & 0x3f, (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28, (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8, be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff, group->rec.proxy_join); return len; } int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx) { char name[20]; atomic_set(&ctx->tid, 0); sprintf(name, "mlx4_ib_mcg%d", ctx->port); ctx->mcg_wq = create_singlethread_workqueue(name); if (!ctx->mcg_wq) return -ENOMEM; mutex_init(&ctx->mcg_table_lock); ctx->mcg_table = RB_ROOT; INIT_LIST_HEAD(&ctx->mcg_mgid0_list); ctx->flushing = 0; return 0; } static void force_clean_group(struct mcast_group *group) { struct mcast_req *req, *tmp ; list_for_each_entry_safe(req, tmp, &group->pending_list, group_list) { list_del(&req->group_list); kfree(req); } del_sysfs_port_mcg_attr(group->demux->dev, group->demux->port, &group->dentry.attr); rb_erase(&group->node, &group->demux->mcg_table); kfree(group); } static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq) { int i; struct rb_node *p; struct mcast_group *group; unsigned long end; int count; for (i = 0; i < MAX_VFS; ++i) clean_vf_mcast(ctx, i); end = jiffies + msecs_to_jiffies(MAD_TIMEOUT_MS + 3000); do { count = 0; mutex_lock(&ctx->mcg_table_lock); for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p)) ++count; mutex_unlock(&ctx->mcg_table_lock); if (!count) break; msleep(1); } while (time_after(end, jiffies)); flush_workqueue(ctx->mcg_wq); if (destroy_wq) destroy_workqueue(ctx->mcg_wq); mutex_lock(&ctx->mcg_table_lock); while ((p = rb_first(&ctx->mcg_table)) != NULL) { group = rb_entry(p, struct mcast_group, node); if (atomic_read(&group->refcount)) mcg_warn_group(group, "group refcount %d!!! (pointer %p)\n", atomic_read(&group->refcount), group); force_clean_group(group); } mutex_unlock(&ctx->mcg_table_lock); } struct clean_work { struct work_struct work; struct mlx4_ib_demux_ctx *ctx; int destroy_wq; }; static void mcg_clean_task(struct work_struct *work) { struct clean_work *cw = container_of(work, struct clean_work, work); _mlx4_ib_mcg_port_cleanup(cw->ctx, cw->destroy_wq); cw->ctx->flushing = 0; kfree(cw); } void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq) { struct clean_work *work; if (ctx->flushing) return; ctx->flushing = 1; if (destroy_wq) { _mlx4_ib_mcg_port_cleanup(ctx, destroy_wq); ctx->flushing = 0; return; } work = kmalloc(sizeof *work, GFP_KERNEL); if (!work) { ctx->flushing = 0; mcg_warn("failed allocating work for cleanup\n"); return; } work->ctx = ctx; work->destroy_wq = destroy_wq; INIT_WORK(&work->work, mcg_clean_task); queue_work(clean_wq, &work->work); } static void build_leave_mad(struct mcast_req *req) { struct ib_sa_mad *mad = &req->sa_mad; mad->mad_hdr.method = IB_SA_METHOD_DELETE; } static void clear_pending_reqs(struct mcast_group *group, int vf) { struct mcast_req *req, *tmp, *group_first = NULL; int clear; int pend = 0; if (!list_empty(&group->pending_list)) group_first = list_first_entry(&group->pending_list, struct mcast_req, group_list); list_for_each_entry_safe(req, tmp, &group->func[vf].pending, func_list) { clear = 1; if (group_first == req && (group->state == MCAST_JOIN_SENT || group->state == MCAST_LEAVE_SENT)) { clear = cancel_delayed_work(&group->timeout_work); pend = !clear; group->state = MCAST_IDLE; } if (clear) { --group->func[vf].num_pend_reqs; list_del(&req->group_list); list_del(&req->func_list); kfree(req); atomic_dec(&group->refcount); } } if (!pend && (!list_empty(&group->func[vf].pending) || group->func[vf].num_pend_reqs)) { mcg_warn_group(group, "DRIVER BUG: list_empty %d, num_pend_reqs %d\n", list_empty(&group->func[vf].pending), group->func[vf].num_pend_reqs); } } static int push_deleteing_req(struct mcast_group *group, int slave) { struct mcast_req *req; struct mcast_req *pend_req; if (!group->func[slave].join_state) return 0; req = kzalloc(sizeof *req, GFP_KERNEL); if (!req) { mcg_warn_group(group, "failed allocation - may leave stall groups\n"); return -ENOMEM; } if (!list_empty(&group->func[slave].pending)) { pend_req = list_entry(group->func[slave].pending.prev, struct mcast_req, group_list); if (pend_req->clean) { kfree(req); return 0; } } req->clean = 1; req->func = slave; req->group = group; ++group->func[slave].num_pend_reqs; build_leave_mad(req); queue_req(req); return 0; } void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave) { struct mcast_group *group; struct rb_node *p; mutex_lock(&ctx->mcg_table_lock); for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p)) { group = rb_entry(p, struct mcast_group, node); mutex_lock(&group->lock); if (atomic_read(&group->refcount)) { /* clear pending requests of this VF */ clear_pending_reqs(group, slave); push_deleteing_req(group, slave); } mutex_unlock(&group->lock); } mutex_unlock(&ctx->mcg_table_lock); } int mlx4_ib_mcg_init(void) { clean_wq = create_singlethread_workqueue("mlx4_ib_mcg"); if (!clean_wq) return -ENOMEM; return 0; } void mlx4_ib_mcg_destroy(void) { destroy_workqueue(clean_wq); } Index: head/sys/ofed/drivers/infiniband/hw/mlx4/mr.c =================================================================== --- head/sys/ofed/drivers/infiniband/hw/mlx4/mr.c (revision 299178) +++ head/sys/ofed/drivers/infiniband/hw/mlx4/mr.c (revision 299179) @@ -1,885 +1,885 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include "mlx4_ib.h" static u32 convert_access(int acc) { return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) | (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) | (acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) | MLX4_PERM_LOCAL_READ; } /* No suuport for Shared MR feature */ #if 0 static ssize_t shared_mr_proc_read(struct file *file, char __user *buffer, size_t len, loff_t *offset) { return -ENOSYS; } static ssize_t shared_mr_proc_write(struct file *file, const char __user *buffer, size_t len, loff_t *offset) { return -ENOSYS; } static int shared_mr_mmap(struct file *filep, struct vm_area_struct *vma) { struct proc_dir_entry *pde = PDE(filep->f_path.dentry->d_inode); struct mlx4_shared_mr_info *smr_info = (struct mlx4_shared_mr_info *)pde->data; /* Prevent any mapping not on start of area */ if (vma->vm_pgoff != 0) return -EINVAL; return ib_umem_map_to_vma(smr_info->umem, vma); } static const struct file_operations shared_mr_proc_ops = { .owner = THIS_MODULE, .read = shared_mr_proc_read, .write = shared_mr_proc_write, .mmap = shared_mr_mmap }; static mode_t convert_shared_access(int acc) { return (acc & IB_ACCESS_SHARED_MR_USER_READ ? S_IRUSR : 0) | (acc & IB_ACCESS_SHARED_MR_USER_WRITE ? S_IWUSR : 0) | (acc & IB_ACCESS_SHARED_MR_GROUP_READ ? S_IRGRP : 0) | (acc & IB_ACCESS_SHARED_MR_GROUP_WRITE ? S_IWGRP : 0) | (acc & IB_ACCESS_SHARED_MR_OTHER_READ ? S_IROTH : 0) | (acc & IB_ACCESS_SHARED_MR_OTHER_WRITE ? S_IWOTH : 0); } #endif struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx4_ib_mr *mr; int err; mr = kzalloc(sizeof *mr, GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0, ~0ull, convert_access(acc), 0, 0, &mr->mmr); if (err) goto err_free; err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr); if (err) goto err_mr; mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; mr->umem = NULL; return &mr->ibmr; err_mr: (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); err_free: kfree(mr); return ERR_PTR(err); } static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, u64 mtt_size, u64 mtt_shift, u64 len, u64 cur_start_addr, u64 *pages, int *start_index, int *npages) { int k; int err = 0; u64 mtt_entries; u64 cur_end_addr = cur_start_addr + len; u64 cur_end_addr_aligned = 0; len += (cur_start_addr & (mtt_size-1ULL)); cur_end_addr_aligned = round_up(cur_end_addr, mtt_size); len += (cur_end_addr_aligned - cur_end_addr); if (len & (mtt_size-1ULL)) { WARN(1 , "write_block: len %llx is not aligned to mtt_size %llx\n", (unsigned long long)len, (unsigned long long)mtt_size); return -EINVAL; } mtt_entries = (len >> mtt_shift); /* Align the MTT start address to the mtt_size. Required to handle cases when the MR starts in the middle of an MTT record. Was not required in old code since the physical addresses provided by the dma subsystem were page aligned, which was also the MTT size. */ cur_start_addr = round_down(cur_start_addr, mtt_size); /* A new block is started ...*/ for (k = 0; k < mtt_entries; ++k) { pages[*npages] = cur_start_addr + (mtt_size * k); (*npages)++; /* * Be friendly to mlx4_write_mtt() and * pass it chunks of appropriate size. */ if (*npages == PAGE_SIZE / sizeof(u64)) { err = mlx4_write_mtt(dev->dev, mtt, *start_index, *npages, pages); if (err) return err; (*start_index) += *npages; *npages = 0; } } return 0; } int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, struct ib_umem *umem) { u64 *pages; u64 len = 0; int err = 0; u64 mtt_size; u64 cur_start_addr = 0; u64 mtt_shift; int start_index = 0; int npages = 0; struct scatterlist *sg; int i; pages = (u64 *) __get_free_page(GFP_KERNEL); if (!pages) return -ENOMEM; mtt_shift = mtt->page_shift; mtt_size = 1ULL << mtt_shift; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { if (cur_start_addr + len == sg_dma_address(sg)) { /* still the same block */ len += sg_dma_len(sg); continue; } /* A new block is started ...*/ /* If len is malaligned, write an extra mtt entry to cover the misaligned area (round up the division) */ err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size, mtt_shift, len, cur_start_addr, pages, &start_index, &npages); if (err) goto out; cur_start_addr = sg_dma_address(sg); len = sg_dma_len(sg); } /* Handle the last block */ if (len > 0) { /* If len is malaligned, write an extra mtt entry to cover the misaligned area (round up the division) */ err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size, mtt_shift, len, cur_start_addr, pages, &start_index, &npages); if (err) goto out; } if (npages) err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages); out: free_page((unsigned long) pages); return err; } static inline u64 alignment_of(u64 ptr) { return ilog2(ptr & (~(ptr-1))); } static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start, u64 current_block_end, u64 block_shift) { /* Check whether the alignment of the new block is aligned as well as the previous block. Block address must start with zeros till size of entity_size. */ if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0) /* It is not as well aligned as the previous block-reduce the mtt size accordingly. Here we take the last right bit which is 1. */ block_shift = alignment_of(next_block_start); /* Check whether the alignment of the end of previous block - is it aligned as well as the start of the block */ if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0) /* It is not as well aligned as the start of the block - reduce the mtt size accordingly. */ block_shift = alignment_of(current_block_end); return block_shift; } /* Calculate optimal mtt size based on contiguous pages. * Function will return also the number of pages that are not aligned to the calculated mtt_size to be added to total number of pages. For that we should check the first chunk length & last chunk length and if not aligned to mtt_size we should increment the non_aligned_pages number. All chunks in the middle already handled as part of mtt shift calculation for both their start & end addresses. */ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, int *num_of_mtts) { u64 block_shift = MLX4_MAX_MTT_SHIFT; u64 current_block_len = 0; u64 current_block_start = 0; u64 misalignment_bits; u64 first_block_start = 0; u64 last_block_end = 0; u64 total_len = 0; u64 last_block_aligned_end = 0; u64 min_shift = ilog2(umem->page_size); struct scatterlist *sg; int i; u64 next_block_start; u64 current_block_end; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { /* Initialization - save the first chunk start as the current_block_start - block means contiguous pages. */ if (current_block_len == 0 && current_block_start == 0) { first_block_start = current_block_start = sg_dma_address(sg); /* Find the bits that are different between the physical address and the virtual address for the start of the MR. */ /* umem_get aligned the start_va to a page - boundry. Therefore, we need to align the - start va to the same boundry */ + boundary. Therefore, we need to align the + start va to the same boundary */ /* misalignment_bits is needed to handle the case of a single memory region. In this case, the rest of the logic will not reduce the block size. If we use a block size which is bigger than the alignment of the misalignment bits, we might use the virtual page number instead of the physical page number, resulting in access to the wrong data. */ misalignment_bits = (start_va & (~(((u64)(umem->page_size))-1ULL))) ^ current_block_start; block_shift = min(alignment_of(misalignment_bits) , block_shift); } /* Go over the scatter entries and check if they continue the previous scatter entry. */ next_block_start = sg_dma_address(sg); current_block_end = current_block_start + current_block_len; /* If we have a split (non-contig.) between two block*/ if (current_block_end != next_block_start) { block_shift = mlx4_ib_umem_calc_block_mtt( next_block_start, current_block_end, block_shift); /* If we reached the minimum shift for 4k page we stop the loop. */ if (block_shift <= min_shift) goto end; /* If not saved yet we are in first block - we save the length of first block to calculate the non_aligned_pages number at * the end. */ total_len += current_block_len; /* Start a new block */ current_block_start = next_block_start; current_block_len = sg_dma_len(sg); continue; } /* The scatter entry is another part of the current block, increase the block size * An entry in the scatter can be larger than 4k (page) as of dma mapping which merge some blocks together. */ current_block_len += sg_dma_len(sg); } /* Account for the last block in the total len */ total_len += current_block_len; /* Add to the first block the misalignment that it suffers from.*/ total_len += (first_block_start & ((1ULL<> block_shift; end: if (block_shift < min_shift) { /* If shift is less than the min we set a WARN and return the min shift. */ WARN(1, "mlx4_ib_umem_calc_optimal_mtt_size - unexpected shift %lld\n", (unsigned long long)block_shift); block_shift = min_shift; } return block_shift; } /* No suuport for Shared MR */ #if 0 static int prepare_shared_mr(struct mlx4_ib_mr *mr, int access_flags, int mr_id) { struct proc_dir_entry *mr_proc_entry; mode_t mode = S_IFREG; char name_buff[16]; mode |= convert_shared_access(access_flags); sprintf(name_buff, "%X", mr_id); mr->smr_info = kmalloc(sizeof(struct mlx4_shared_mr_info), GFP_KERNEL); mr->smr_info->mr_id = mr_id; mr->smr_info->umem = mr->umem; mr_proc_entry = proc_create_data(name_buff, mode, mlx4_mrs_dir_entry, &shared_mr_proc_ops, mr->smr_info); if (!mr_proc_entry) { pr_err("prepare_shared_mr failed via proc\n"); kfree(mr->smr_info); return -ENODEV; } current_uid_gid(&(mr_proc_entry->uid), &(mr_proc_entry->gid)); mr_proc_entry->size = mr->umem->length; return 0; } static int is_shared_mr(int access_flags) { /* We should check whether IB_ACCESS_SHARED_MR_USER_READ or other shared bits were turned on. */ return !!(access_flags & (IB_ACCESS_SHARED_MR_USER_READ | IB_ACCESS_SHARED_MR_USER_WRITE | IB_ACCESS_SHARED_MR_GROUP_READ | IB_ACCESS_SHARED_MR_GROUP_WRITE | IB_ACCESS_SHARED_MR_OTHER_READ | IB_ACCESS_SHARED_MR_OTHER_WRITE)); } static void free_smr_info(struct mlx4_ib_mr *mr) { /* When master/parent shared mr is dereged there is no ability to share this mr any more - its mr_id will be returned to the kernel as part of ib_uverbs_dereg_mr and may be allocated again as part of other reg_mr. */ char name_buff[16]; sprintf(name_buff, "%X", mr->smr_info->mr_id); /* Remove proc entry is checking internally that no operation was strated on that proc fs file and if in the middle current process will wait till end of operation. That's why no sync mechanism is needed when we release below the shared umem. */ remove_proc_entry(name_buff, mlx4_mrs_dir_entry); kfree(mr->smr_info); mr->smr_info = NULL; } #endif static void mlx4_invalidate_umem(void *invalidation_cookie, struct ib_umem *umem, unsigned long addr, size_t size) { struct mlx4_ib_mr *mr = (struct mlx4_ib_mr *)invalidation_cookie; /* This function is called under client peer lock so its resources are race protected */ if (atomic_inc_return(&mr->invalidated) > 1) { umem->invalidation_ctx->inflight_invalidation = 1; goto end; } umem->invalidation_ctx->peer_callback = 1; mlx4_mr_free(to_mdev(mr->ibmr.device)->dev, &mr->mmr); ib_umem_release(umem); complete(&mr->invalidation_comp); end: return; } struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata, int mr_id) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_mr *mr; int shift; int err; int n; struct ib_peer_memory_client *ib_peer_mem; mr = kzalloc(sizeof *mr, GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); mr->umem = ib_umem_get_ex(pd->uobject->context, start, length, access_flags, 0, 1); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err_free; } ib_peer_mem = mr->umem->ib_peer_mem; n = ib_umem_page_count(mr->umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n); err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, convert_access(access_flags), n, shift, &mr->mmr); if (err) goto err_umem; err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem); if (err) goto err_mr; err = mlx4_mr_enable(dev->dev, &mr->mmr); if (err) goto err_mr; mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; /* No suuport for Shared MR */ #if 0 /* Check whether MR should be shared */ if (is_shared_mr(access_flags)) { /* start address and length must be aligned to page size in order to map a full page and preventing leakage of data */ if (mr->umem->offset || (length & ~PAGE_MASK)) { err = -EINVAL; goto err_mr; } err = prepare_shared_mr(mr, access_flags, mr_id); if (err) goto err_mr; } #endif if (ib_peer_mem) { if (access_flags & IB_ACCESS_MW_BIND) { /* Prevent binding MW on peer clients. * mlx4_invalidate_umem must be void, * therefore, mlx4_mr_free should not fail * when using peer clients. */ err = -ENOSYS; pr_err("MW is not supported with peer memory client"); goto err_smr; } init_completion(&mr->invalidation_comp); ib_umem_activate_invalidation_notifier(mr->umem, mlx4_invalidate_umem, mr); } atomic_set(&mr->invalidated, 0); return &mr->ibmr; err_smr: /* No suuport for Shared MR */ #if 0 if (mr->smr_info) free_smr_info(mr); #endif err_mr: (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); err_umem: ib_umem_release(mr->umem); err_free: kfree(mr); return ERR_PTR(err); } int mlx4_ib_dereg_mr(struct ib_mr *ibmr) { struct mlx4_ib_mr *mr = to_mmr(ibmr); struct ib_umem *umem = mr->umem; int ret; /* No suuport for Shared MR */ #if 0 if (mr->smr_info) free_smr_info(mr); #endif if (atomic_inc_return(&mr->invalidated) > 1) { wait_for_completion(&mr->invalidation_comp); goto end; } ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); if (ret) { /* Error is not expected here, except when memory windows * are bound to MR which is not supported with * peer memory clients */ atomic_set(&mr->invalidated, 0); return ret; } if (!umem) goto end; ib_umem_release(mr->umem); end: kfree(mr); return 0; } struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_mw *mw; int err; mw = kmalloc(sizeof(*mw), GFP_KERNEL); if (!mw) return ERR_PTR(-ENOMEM); err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn, (enum mlx4_mw_type)type, &mw->mmw); if (err) goto err_free; err = mlx4_mw_enable(dev->dev, &mw->mmw); if (err) goto err_mw; mw->ibmw.rkey = mw->mmw.key; return &mw->ibmw; err_mw: mlx4_mw_free(dev->dev, &mw->mmw); err_free: kfree(mw); return ERR_PTR(err); } int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind) { struct ib_send_wr wr; struct ib_send_wr *bad_wr; int ret; memset(&wr, 0, sizeof(wr)); wr.opcode = IB_WR_BIND_MW; wr.wr_id = mw_bind->wr_id; wr.send_flags = mw_bind->send_flags; wr.wr.bind_mw.mw = mw; wr.wr.bind_mw.bind_info = mw_bind->bind_info; wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey); ret = mlx4_ib_post_send(qp, &wr, &bad_wr); if (!ret) mw->rkey = wr.wr.bind_mw.rkey; return ret; } int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) { struct mlx4_ib_mw *mw = to_mmw(ibmw); mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw); kfree(mw); return 0; } struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_mr *mr; int err; mr = kzalloc(sizeof *mr, GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0, max_page_list_len, 0, &mr->mmr); if (err) goto err_free; err = mlx4_mr_enable(dev->dev, &mr->mmr); if (err) goto err_mr; mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; mr->umem = NULL; return &mr->ibmr; err_mr: (void) mlx4_mr_free(dev->dev, &mr->mmr); err_free: kfree(mr); return ERR_PTR(err); } struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len) { struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_fast_reg_page_list *mfrpl; int size = page_list_len * sizeof (u64); if (page_list_len > MLX4_MAX_FAST_REG_PAGES) return ERR_PTR(-EINVAL); mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL); if (!mfrpl) return ERR_PTR(-ENOMEM); mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); if (!mfrpl->ibfrpl.page_list) goto err_free; mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev, size, &mfrpl->map, GFP_KERNEL); if (!mfrpl->mapped_page_list) goto err_free; WARN_ON(mfrpl->map & 0x3f); return &mfrpl->ibfrpl; err_free: kfree(mfrpl->ibfrpl.page_list); kfree(mfrpl); return ERR_PTR(-ENOMEM); } void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) { struct mlx4_ib_dev *dev = to_mdev(page_list->device); struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); int size = page_list->max_page_list_len * sizeof (u64); dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list, mfrpl->map); kfree(mfrpl->ibfrpl.page_list); kfree(mfrpl); } struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, struct ib_fmr_attr *fmr_attr) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_fmr *fmr; int err = -ENOMEM; fmr = kmalloc(sizeof *fmr, GFP_KERNEL); if (!fmr) return ERR_PTR(-ENOMEM); err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc), fmr_attr->max_pages, fmr_attr->max_maps, fmr_attr->page_shift, &fmr->mfmr); if (err) goto err_free; err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr); if (err) goto err_mr; fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key; return &fmr->ibfmr; err_mr: (void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr); err_free: kfree(fmr); return ERR_PTR(err); } int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages, u64 iova) { struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device); return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey); } int mlx4_ib_unmap_fmr(struct list_head *fmr_list) { struct ib_fmr *ibfmr; int err; struct mlx4_dev *mdev = NULL; list_for_each_entry(ibfmr, fmr_list, list) { if (mdev && to_mdev(ibfmr->device)->dev != mdev) return -EINVAL; mdev = to_mdev(ibfmr->device)->dev; } if (!mdev) return 0; list_for_each_entry(ibfmr, fmr_list, list) { struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey); } /* * Make sure all MPT status updates are visible before issuing * SYNC_TPT firmware command. */ wmb(); err = mlx4_SYNC_TPT(mdev); if (err) pr_warn("SYNC_TPT error %d when " "unmapping FMRs\n", err); return 0; } int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr) { struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); struct mlx4_ib_dev *dev = to_mdev(ibfmr->device); int err; err = mlx4_fmr_free(dev->dev, &ifmr->mfmr); if (!err) kfree(ifmr); return err; } Index: head/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h =================================================================== --- head/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h (revision 299178) +++ head/sys/ofed/drivers/infiniband/ulp/sdp/sdp.h (revision 299179) @@ -1,723 +1,723 @@ #ifndef _SDP_H_ #define _SDP_H_ #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ofed.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SDP_DEBUG #define CONFIG_INFINIBAND_SDP_DEBUG #endif #include "sdp_dbg.h" #undef LIST_HEAD /* From sys/queue.h */ #define LIST_HEAD(name, type) \ struct name { \ struct type *lh_first; /* first element */ \ } -/* Interval between sucessive polls in the Tx routine when polling is used +/* Interval between successive polls in the Tx routine when polling is used instead of interrupts (in per-core Tx rings) - should be power of 2 */ #define SDP_TX_POLL_MODER 16 #define SDP_TX_POLL_TIMEOUT (HZ / 20) #define SDP_NAGLE_TIMEOUT (HZ / 10) #define SDP_SRCAVAIL_CANCEL_TIMEOUT (HZ * 5) #define SDP_SRCAVAIL_ADV_TIMEOUT (1 * HZ) #define SDP_SRCAVAIL_PAYLOAD_LEN 1 #define SDP_RESOLVE_TIMEOUT 1000 #define SDP_ROUTE_TIMEOUT 1000 #define SDP_RETRY_COUNT 5 #define SDP_KEEPALIVE_TIME (120 * 60 * HZ) #define SDP_FIN_WAIT_TIMEOUT (60 * HZ) /* like TCP_FIN_TIMEOUT */ #define SDP_TX_SIZE 0x40 #define SDP_RX_SIZE 0x40 #define SDP_FMR_SIZE (MIN(0x1000, PAGE_SIZE) / sizeof(u64)) #define SDP_FMR_POOL_SIZE 1024 #define SDP_FMR_DIRTY_SIZE ( SDP_FMR_POOL_SIZE / 4 ) #define SDP_MAX_RDMA_READ_LEN (PAGE_SIZE * (SDP_FMR_SIZE - 2)) /* mb inlined data len - rest will be rx'ed into frags */ #define SDP_HEAD_SIZE (sizeof(struct sdp_bsdh)) /* limit tx payload len, if the sink supports bigger buffers than the source * can handle. * or rx fragment size (limited by sge->length size) */ #define SDP_MAX_PACKET (1 << 16) #define SDP_MAX_PAYLOAD (SDP_MAX_PACKET - SDP_HEAD_SIZE) #define SDP_MAX_RECV_SGES (SDP_MAX_PACKET / MCLBYTES) #define SDP_MAX_SEND_SGES (SDP_MAX_PACKET / MCLBYTES) + 2 #define SDP_NUM_WC 4 #define SDP_DEF_ZCOPY_THRESH 64*1024 #define SDP_MIN_ZCOPY_THRESH PAGE_SIZE #define SDP_MAX_ZCOPY_THRESH 1048576 #define SDP_OP_RECV 0x800000000LL #define SDP_OP_SEND 0x400000000LL #define SDP_OP_RDMA 0x200000000LL #define SDP_OP_NOP 0x100000000LL /* how long (in jiffies) to block sender till tx completion*/ #define SDP_BZCOPY_POLL_TIMEOUT (HZ / 10) #define SDP_AUTO_CONF 0xffff #define AUTO_MOD_DELAY (HZ / 4) struct sdp_mb_cb { __u32 seq; /* Starting sequence number */ struct bzcopy_state *bz; struct rx_srcavail_state *rx_sa; struct tx_srcavail_state *tx_sa; }; #define M_PUSH M_PROTO1 /* Do a 'push'. */ #define M_URG M_PROTO2 /* Mark as urgent (oob). */ #define SDP_SKB_CB(__mb) ((struct sdp_mb_cb *)&((__mb)->cb[0])) #define BZCOPY_STATE(mb) (SDP_SKB_CB(mb)->bz) #define RX_SRCAVAIL_STATE(mb) (SDP_SKB_CB(mb)->rx_sa) #define TX_SRCAVAIL_STATE(mb) (SDP_SKB_CB(mb)->tx_sa) #ifndef MIN #define MIN(a, b) (a < b ? a : b) #endif #define ring_head(ring) (atomic_read(&(ring).head)) #define ring_tail(ring) (atomic_read(&(ring).tail)) #define ring_posted(ring) (ring_head(ring) - ring_tail(ring)) #define rx_ring_posted(ssk) ring_posted(ssk->rx_ring) #ifdef SDP_ZCOPY #define tx_ring_posted(ssk) (ring_posted(ssk->tx_ring) + \ (ssk->tx_ring.rdma_inflight ? ssk->tx_ring.rdma_inflight->busy : 0)) #else #define tx_ring_posted(ssk) ring_posted(ssk->tx_ring) #endif extern int sdp_zcopy_thresh; extern int rcvbuf_initial_size; extern struct workqueue_struct *rx_comp_wq; extern struct ib_client sdp_client; enum sdp_mid { SDP_MID_HELLO = 0x0, SDP_MID_HELLO_ACK = 0x1, SDP_MID_DISCONN = 0x2, SDP_MID_ABORT = 0x3, SDP_MID_SENDSM = 0x4, SDP_MID_RDMARDCOMPL = 0x6, SDP_MID_SRCAVAIL_CANCEL = 0x8, SDP_MID_CHRCVBUF = 0xB, SDP_MID_CHRCVBUF_ACK = 0xC, SDP_MID_SINKAVAIL = 0xFD, SDP_MID_SRCAVAIL = 0xFE, SDP_MID_DATA = 0xFF, }; enum sdp_flags { SDP_OOB_PRES = 1 << 0, SDP_OOB_PEND = 1 << 1, }; enum { SDP_MIN_TX_CREDITS = 2 }; enum { SDP_ERR_ERROR = -4, SDP_ERR_FAULT = -3, SDP_NEW_SEG = -2, SDP_DO_WAIT_MEM = -1 }; struct sdp_bsdh { u8 mid; u8 flags; __u16 bufs; __u32 len; __u32 mseq; __u32 mseq_ack; } __attribute__((__packed__)); union cma_ip_addr { struct in6_addr ip6; struct { __u32 pad[3]; __u32 addr; } ip4; } __attribute__((__packed__)); /* TODO: too much? Can I avoid having the src/dst and port here? */ struct sdp_hh { struct sdp_bsdh bsdh; u8 majv_minv; u8 ipv_cap; u8 rsvd1; u8 max_adverts; __u32 desremrcvsz; __u32 localrcvsz; __u16 port; __u16 rsvd2; union cma_ip_addr src_addr; union cma_ip_addr dst_addr; u8 rsvd3[IB_CM_REQ_PRIVATE_DATA_SIZE - sizeof(struct sdp_bsdh) - 48]; } __attribute__((__packed__)); struct sdp_hah { struct sdp_bsdh bsdh; u8 majv_minv; u8 ipv_cap; u8 rsvd1; u8 ext_max_adverts; __u32 actrcvsz; u8 rsvd2[IB_CM_REP_PRIVATE_DATA_SIZE - sizeof(struct sdp_bsdh) - 8]; } __attribute__((__packed__)); struct sdp_rrch { __u32 len; } __attribute__((__packed__)); struct sdp_srcah { __u32 len; __u32 rkey; __u64 vaddr; } __attribute__((__packed__)); struct sdp_buf { struct mbuf *mb; u64 mapping[SDP_MAX_SEND_SGES]; } __attribute__((__packed__)); struct sdp_chrecvbuf { u32 size; } __attribute__((__packed__)); /* Context used for synchronous zero copy bcopy (BZCOPY) */ struct bzcopy_state { unsigned char __user *u_base; int u_len; int left; int page_cnt; int cur_page; int cur_offset; int busy; struct sdp_sock *ssk; struct page **pages; }; enum rx_sa_flag { RX_SA_ABORTED = 2, }; enum tx_sa_flag { TX_SA_SENDSM = 0x01, TX_SA_CROSS_SEND = 0x02, TX_SA_INTRRUPTED = 0x04, TX_SA_TIMEDOUT = 0x08, TX_SA_ERROR = 0x10, }; struct rx_srcavail_state { /* Advertised buffer stuff */ u32 mseq; u32 used; u32 reported; u32 len; u32 rkey; u64 vaddr; /* Dest buff info */ struct ib_umem *umem; struct ib_pool_fmr *fmr; /* Utility */ u8 busy; enum rx_sa_flag flags; }; struct tx_srcavail_state { /* Data below 'busy' will be reset */ u8 busy; struct ib_umem *umem; struct ib_pool_fmr *fmr; u32 bytes_sent; u32 bytes_acked; enum tx_sa_flag abort_flags; u8 posted; u32 mseq; }; struct sdp_tx_ring { #ifdef SDP_ZCOPY struct rx_srcavail_state *rdma_inflight; #endif struct sdp_buf *buffer; atomic_t head; atomic_t tail; struct ib_cq *cq; atomic_t credits; #define tx_credits(ssk) (atomic_read(&ssk->tx_ring.credits)) struct callout timer; u16 poll_cnt; }; struct sdp_rx_ring { struct sdp_buf *buffer; atomic_t head; atomic_t tail; struct ib_cq *cq; int destroyed; struct rwlock destroyed_lock; }; struct sdp_device { struct ib_pd *pd; struct ib_mr *mr; struct ib_fmr_pool *fmr_pool; }; struct sdp_moderation { unsigned long last_moder_packets; unsigned long last_moder_tx_packets; unsigned long last_moder_bytes; unsigned long last_moder_jiffies; int last_moder_time; u16 rx_usecs; u16 rx_frames; u16 tx_usecs; u32 pkt_rate_low; u16 rx_usecs_low; u32 pkt_rate_high; u16 rx_usecs_high; u16 sample_interval; u16 adaptive_rx_coal; u32 msg_enable; int moder_cnt; int moder_time; }; /* These are flags fields. */ #define SDP_TIMEWAIT 0x0001 /* In ssk timewait state. */ #define SDP_DROPPED 0x0002 /* Socket has been dropped. */ #define SDP_SOCKREF 0x0004 /* Holding a sockref for close. */ #define SDP_NODELAY 0x0008 /* Disble nagle. */ #define SDP_NEEDFIN 0x0010 /* Send a fin on the next tx. */ #define SDP_DREQWAIT 0x0020 /* Waiting on DREQ. */ #define SDP_DESTROY 0x0040 /* Being destroyed. */ #define SDP_DISCON 0x0080 /* rdma_disconnect is owed. */ /* These are oobflags */ #define SDP_HADOOB 0x0001 /* Had OOB data. */ #define SDP_HAVEOOB 0x0002 /* Have OOB data. */ struct sdp_sock { LIST_ENTRY(sdp_sock) list; struct socket *socket; struct rdma_cm_id *id; struct ib_device *ib_device; struct sdp_device *sdp_dev; struct ib_qp *qp; struct ucred *cred; struct callout keep2msl; /* 2msl and keepalive timer. */ struct callout nagle_timer; /* timeout waiting for ack */ struct ib_ucontext context; in_port_t lport; in_addr_t laddr; in_port_t fport; in_addr_t faddr; int flags; int oobflags; /* protected by rx lock. */ int state; int softerror; int recv_bytes; /* Bytes per recv. buf including header */ int xmit_size_goal; char iobc; struct sdp_rx_ring rx_ring; struct sdp_tx_ring tx_ring; struct rwlock lock; struct mbuf *rx_ctl_q; struct mbuf *rx_ctl_tail; int qp_active; /* XXX Flag. */ int max_sge; struct work_struct rx_comp_work; #define rcv_nxt(ssk) atomic_read(&(ssk->rcv_nxt)) atomic_t rcv_nxt; /* SDP specific */ atomic_t mseq_ack; #define mseq_ack(ssk) (atomic_read(&ssk->mseq_ack)) unsigned max_bufs; /* Initial buffers offered by other side */ unsigned min_bufs; /* Low water mark to wake senders */ unsigned long nagle_last_unacked; /* mseq of lastest unacked packet */ atomic_t remote_credits; #define remote_credits(ssk) (atomic_read(&ssk->remote_credits)) int poll_cq; /* SDP slow start */ int recv_request_head; /* mark the rx_head when the resize request - was recieved */ - int recv_request; /* XXX flag if request to resize was recieved */ + was received */ + int recv_request; /* XXX flag if request to resize was received */ unsigned long tx_packets; unsigned long rx_packets; unsigned long tx_bytes; unsigned long rx_bytes; struct sdp_moderation auto_mod; struct task shutdown_task; #ifdef SDP_ZCOPY struct tx_srcavail_state *tx_sa; struct rx_srcavail_state *rx_sa; spinlock_t tx_sa_lock; struct delayed_work srcavail_cancel_work; int srcavail_cancel_mseq; /* ZCOPY data: -1:use global; 0:disable zcopy; >0: zcopy threshold */ int zcopy_thresh; #endif }; #define sdp_sk(so) ((struct sdp_sock *)(so->so_pcb)) #define SDP_RLOCK(ssk) rw_rlock(&(ssk)->lock) #define SDP_WLOCK(ssk) rw_wlock(&(ssk)->lock) #define SDP_RUNLOCK(ssk) rw_runlock(&(ssk)->lock) #define SDP_WUNLOCK(ssk) rw_wunlock(&(ssk)->lock) #define SDP_WLOCK_ASSERT(ssk) rw_assert(&(ssk)->lock, RA_WLOCKED) #define SDP_RLOCK_ASSERT(ssk) rw_assert(&(ssk)->lock, RA_RLOCKED) #define SDP_LOCK_ASSERT(ssk) rw_assert(&(ssk)->lock, RA_LOCKED) static inline void tx_sa_reset(struct tx_srcavail_state *tx_sa) { memset((void *)&tx_sa->busy, 0, sizeof(*tx_sa) - offsetof(typeof(*tx_sa), busy)); } static inline void rx_ring_unlock(struct sdp_rx_ring *rx_ring) { rw_runlock(&rx_ring->destroyed_lock); } static inline int rx_ring_trylock(struct sdp_rx_ring *rx_ring) { rw_rlock(&rx_ring->destroyed_lock); if (rx_ring->destroyed) { rx_ring_unlock(rx_ring); return 0; } return 1; } static inline void rx_ring_destroy_lock(struct sdp_rx_ring *rx_ring) { rw_wlock(&rx_ring->destroyed_lock); rx_ring->destroyed = 1; rw_wunlock(&rx_ring->destroyed_lock); } static inline void sdp_arm_rx_cq(struct sdp_sock *ssk) { sdp_prf(ssk->socket, NULL, "Arming RX cq"); sdp_dbg_data(ssk->socket, "Arming RX cq\n"); ib_req_notify_cq(ssk->rx_ring.cq, IB_CQ_NEXT_COMP); } static inline void sdp_arm_tx_cq(struct sdp_sock *ssk) { sdp_prf(ssk->socket, NULL, "Arming TX cq"); sdp_dbg_data(ssk->socket, "Arming TX cq. credits: %d, posted: %d\n", tx_credits(ssk), tx_ring_posted(ssk)); ib_req_notify_cq(ssk->tx_ring.cq, IB_CQ_NEXT_COMP); } /* return the min of: * - tx credits * - free slots in tx_ring (not including SDP_MIN_TX_CREDITS */ static inline int tx_slots_free(struct sdp_sock *ssk) { int min_free; min_free = MIN(tx_credits(ssk), SDP_TX_SIZE - tx_ring_posted(ssk)); if (min_free < SDP_MIN_TX_CREDITS) return 0; return min_free - SDP_MIN_TX_CREDITS; }; /* utilities */ static inline char *mid2str(int mid) { #define ENUM2STR(e) [e] = #e static char *mid2str[] = { ENUM2STR(SDP_MID_HELLO), ENUM2STR(SDP_MID_HELLO_ACK), ENUM2STR(SDP_MID_ABORT), ENUM2STR(SDP_MID_DISCONN), ENUM2STR(SDP_MID_SENDSM), ENUM2STR(SDP_MID_RDMARDCOMPL), ENUM2STR(SDP_MID_SRCAVAIL_CANCEL), ENUM2STR(SDP_MID_CHRCVBUF), ENUM2STR(SDP_MID_CHRCVBUF_ACK), ENUM2STR(SDP_MID_DATA), ENUM2STR(SDP_MID_SRCAVAIL), ENUM2STR(SDP_MID_SINKAVAIL), }; if (mid >= ARRAY_SIZE(mid2str)) return NULL; return mid2str[mid]; } static inline struct mbuf * sdp_alloc_mb(struct socket *sk, u8 mid, int size, int wait) { struct sdp_bsdh *h; struct mbuf *mb; MGETHDR(mb, wait, MT_DATA); if (mb == NULL) return (NULL); mb->m_pkthdr.len = mb->m_len = sizeof(struct sdp_bsdh); h = mtod(mb, struct sdp_bsdh *); h->mid = mid; return mb; } static inline struct mbuf * sdp_alloc_mb_data(struct socket *sk, int wait) { return sdp_alloc_mb(sk, SDP_MID_DATA, 0, wait); } static inline struct mbuf * sdp_alloc_mb_disconnect(struct socket *sk, int wait) { return sdp_alloc_mb(sk, SDP_MID_DISCONN, 0, wait); } static inline void * mb_put(struct mbuf *mb, int len) { uint8_t *data; data = mb->m_data; data += mb->m_len; mb->m_len += len; return (void *)data; } static inline struct mbuf * sdp_alloc_mb_chrcvbuf_ack(struct socket *sk, int size, int wait) { struct mbuf *mb; struct sdp_chrecvbuf *resp_size; mb = sdp_alloc_mb(sk, SDP_MID_CHRCVBUF_ACK, sizeof(*resp_size), wait); if (mb == NULL) return (NULL); resp_size = (struct sdp_chrecvbuf *)mb_put(mb, sizeof *resp_size); resp_size->size = htonl(size); return mb; } static inline struct mbuf * sdp_alloc_mb_srcavail(struct socket *sk, u32 len, u32 rkey, u64 vaddr, int wait) { struct mbuf *mb; struct sdp_srcah *srcah; mb = sdp_alloc_mb(sk, SDP_MID_SRCAVAIL, sizeof(*srcah), wait); if (mb == NULL) return (NULL); srcah = (struct sdp_srcah *)mb_put(mb, sizeof(*srcah)); srcah->len = htonl(len); srcah->rkey = htonl(rkey); srcah->vaddr = cpu_to_be64(vaddr); return mb; } static inline struct mbuf * sdp_alloc_mb_srcavail_cancel(struct socket *sk, int wait) { return sdp_alloc_mb(sk, SDP_MID_SRCAVAIL_CANCEL, 0, wait); } static inline struct mbuf * sdp_alloc_mb_rdmardcompl(struct socket *sk, u32 len, int wait) { struct mbuf *mb; struct sdp_rrch *rrch; mb = sdp_alloc_mb(sk, SDP_MID_RDMARDCOMPL, sizeof(*rrch), wait); if (mb == NULL) return (NULL); rrch = (struct sdp_rrch *)mb_put(mb, sizeof(*rrch)); rrch->len = htonl(len); return mb; } static inline struct mbuf * sdp_alloc_mb_sendsm(struct socket *sk, int wait) { return sdp_alloc_mb(sk, SDP_MID_SENDSM, 0, wait); } static inline int sdp_tx_ring_slots_left(struct sdp_sock *ssk) { return SDP_TX_SIZE - tx_ring_posted(ssk); } static inline int credit_update_needed(struct sdp_sock *ssk) { int c; c = remote_credits(ssk); if (likely(c > SDP_MIN_TX_CREDITS)) c += c/2; return unlikely(c < rx_ring_posted(ssk)) && likely(tx_credits(ssk) > 0) && likely(sdp_tx_ring_slots_left(ssk)); } #define SDPSTATS_COUNTER_INC(stat) #define SDPSTATS_COUNTER_ADD(stat, val) #define SDPSTATS_COUNTER_MID_INC(stat, mid) #define SDPSTATS_HIST_LINEAR(stat, size) #define SDPSTATS_HIST(stat, size) static inline void sdp_cleanup_sdp_buf(struct sdp_sock *ssk, struct sdp_buf *sbuf, enum dma_data_direction dir) { struct ib_device *dev; struct mbuf *mb; int i; dev = ssk->ib_device; for (i = 0, mb = sbuf->mb; mb != NULL; mb = mb->m_next, i++) ib_dma_unmap_single(dev, sbuf->mapping[i], mb->m_len, dir); } /* sdp_main.c */ void sdp_set_default_moderation(struct sdp_sock *ssk); void sdp_start_keepalive_timer(struct socket *sk); void sdp_urg(struct sdp_sock *ssk, struct mbuf *mb); void sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk); void sdp_abort(struct socket *sk); struct sdp_sock *sdp_notify(struct sdp_sock *ssk, int error); /* sdp_cma.c */ int sdp_cma_handler(struct rdma_cm_id *, struct rdma_cm_event *); /* sdp_tx.c */ int sdp_tx_ring_create(struct sdp_sock *ssk, struct ib_device *device); void sdp_tx_ring_destroy(struct sdp_sock *ssk); int sdp_xmit_poll(struct sdp_sock *ssk, int force); void sdp_post_send(struct sdp_sock *ssk, struct mbuf *mb); void sdp_post_sends(struct sdp_sock *ssk, int wait); void sdp_post_keepalive(struct sdp_sock *ssk); /* sdp_rx.c */ void sdp_rx_ring_init(struct sdp_sock *ssk); int sdp_rx_ring_create(struct sdp_sock *ssk, struct ib_device *device); void sdp_rx_ring_destroy(struct sdp_sock *ssk); int sdp_resize_buffers(struct sdp_sock *ssk, u32 new_size); int sdp_init_buffers(struct sdp_sock *ssk, u32 new_size); void sdp_do_posts(struct sdp_sock *ssk); void sdp_rx_comp_full(struct sdp_sock *ssk); /* sdp_zcopy.c */ struct kiocb; int sdp_sendmsg_zcopy(struct kiocb *iocb, struct socket *sk, struct iovec *iov); int sdp_handle_srcavail(struct sdp_sock *ssk, struct sdp_srcah *srcah); void sdp_handle_sendsm(struct sdp_sock *ssk, u32 mseq_ack); void sdp_handle_rdma_read_compl(struct sdp_sock *ssk, u32 mseq_ack, u32 bytes_completed); int sdp_handle_rdma_read_cqe(struct sdp_sock *ssk); int sdp_rdma_to_iovec(struct socket *sk, struct iovec *iov, struct mbuf *mb, unsigned long *used); int sdp_post_rdma_rd_compl(struct sdp_sock *ssk, struct rx_srcavail_state *rx_sa); int sdp_post_sendsm(struct socket *sk); void srcavail_cancel_timeout(struct work_struct *work); void sdp_abort_srcavail(struct socket *sk); void sdp_abort_rdma_read(struct socket *sk); int sdp_process_rx(struct sdp_sock *ssk); #endif Index: head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c =================================================================== --- head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c (revision 299178) +++ head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c (revision 299179) @@ -1,490 +1,490 @@ /* * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "sdp.h" #define sdp_cnt(var) do { (var)++; } while (0) SDP_MODPARAM_SINT(sdp_keepalive_probes_sent, 0, "Total number of keepalive probes sent."); static int sdp_process_tx_cq(struct sdp_sock *ssk); static void sdp_poll_tx_timeout(void *data); int sdp_xmit_poll(struct sdp_sock *ssk, int force) { int wc_processed = 0; SDP_WLOCK_ASSERT(ssk); sdp_prf(ssk->socket, NULL, "%s", __func__); /* If we don't have a pending timer, set one up to catch our recent post in case the interface becomes idle */ if (!callout_pending(&ssk->tx_ring.timer)) callout_reset(&ssk->tx_ring.timer, SDP_TX_POLL_TIMEOUT, sdp_poll_tx_timeout, ssk); /* Poll the CQ every SDP_TX_POLL_MODER packets */ if (force || (++ssk->tx_ring.poll_cnt & (SDP_TX_POLL_MODER - 1)) == 0) wc_processed = sdp_process_tx_cq(ssk); return wc_processed; } void sdp_post_send(struct sdp_sock *ssk, struct mbuf *mb) { struct sdp_buf *tx_req; struct sdp_bsdh *h; unsigned long mseq; struct ib_device *dev; struct ib_send_wr *bad_wr; struct ib_sge ibsge[SDP_MAX_SEND_SGES]; struct ib_sge *sge; struct ib_send_wr tx_wr = { NULL }; int i, rc; u64 addr; SDPSTATS_COUNTER_MID_INC(post_send, h->mid); SDPSTATS_HIST(send_size, mb->len); if (!ssk->qp_active) { m_freem(mb); return; } mseq = ring_head(ssk->tx_ring); h = mtod(mb, struct sdp_bsdh *); ssk->tx_packets++; ssk->tx_bytes += mb->m_pkthdr.len; #ifdef SDP_ZCOPY if (unlikely(h->mid == SDP_MID_SRCAVAIL)) { struct tx_srcavail_state *tx_sa = TX_SRCAVAIL_STATE(mb); if (ssk->tx_sa != tx_sa) { sdp_dbg_data(ssk->socket, "SrcAvail cancelled " "before being sent!\n"); WARN_ON(1); m_freem(mb); return; } TX_SRCAVAIL_STATE(mb)->mseq = mseq; } #endif if (unlikely(mb->m_flags & M_URG)) h->flags = SDP_OOB_PRES | SDP_OOB_PEND; else h->flags = 0; mb->m_flags |= M_RDONLY; /* Don't allow compression once sent. */ h->bufs = htons(rx_ring_posted(ssk)); h->len = htonl(mb->m_pkthdr.len); h->mseq = htonl(mseq); h->mseq_ack = htonl(mseq_ack(ssk)); sdp_prf1(ssk->socket, mb, "TX: %s bufs: %d mseq:%ld ack:%d", mid2str(h->mid), rx_ring_posted(ssk), mseq, ntohl(h->mseq_ack)); SDP_DUMP_PACKET(ssk->socket, "TX", mb, h); tx_req = &ssk->tx_ring.buffer[mseq & (SDP_TX_SIZE - 1)]; tx_req->mb = mb; dev = ssk->ib_device; sge = &ibsge[0]; for (i = 0; mb != NULL; i++, mb = mb->m_next, sge++) { addr = ib_dma_map_single(dev, mb->m_data, mb->m_len, DMA_TO_DEVICE); /* TODO: proper error handling */ BUG_ON(ib_dma_mapping_error(dev, addr)); BUG_ON(i >= SDP_MAX_SEND_SGES); tx_req->mapping[i] = addr; sge->addr = addr; sge->length = mb->m_len; sge->lkey = ssk->sdp_dev->mr->lkey; } tx_wr.next = NULL; tx_wr.wr_id = mseq | SDP_OP_SEND; tx_wr.sg_list = ibsge; tx_wr.num_sge = i; tx_wr.opcode = IB_WR_SEND; tx_wr.send_flags = IB_SEND_SIGNALED; if (unlikely(tx_req->mb->m_flags & M_URG)) tx_wr.send_flags |= IB_SEND_SOLICITED; rc = ib_post_send(ssk->qp, &tx_wr, &bad_wr); if (unlikely(rc)) { sdp_dbg(ssk->socket, "ib_post_send failed with status %d.\n", rc); sdp_cleanup_sdp_buf(ssk, tx_req, DMA_TO_DEVICE); sdp_notify(ssk, ECONNRESET); m_freem(tx_req->mb); return; } atomic_inc(&ssk->tx_ring.head); atomic_dec(&ssk->tx_ring.credits); atomic_set(&ssk->remote_credits, rx_ring_posted(ssk)); return; } static struct mbuf * sdp_send_completion(struct sdp_sock *ssk, int mseq) { struct ib_device *dev; struct sdp_buf *tx_req; struct mbuf *mb = NULL; struct sdp_tx_ring *tx_ring = &ssk->tx_ring; if (unlikely(mseq != ring_tail(*tx_ring))) { printk(KERN_WARNING "Bogus send completion id %d tail %d\n", mseq, ring_tail(*tx_ring)); goto out; } dev = ssk->ib_device; tx_req = &tx_ring->buffer[mseq & (SDP_TX_SIZE - 1)]; mb = tx_req->mb; sdp_cleanup_sdp_buf(ssk, tx_req, DMA_TO_DEVICE); #ifdef SDP_ZCOPY /* TODO: AIO and real zcopy code; add their context support here */ if (BZCOPY_STATE(mb)) BZCOPY_STATE(mb)->busy--; #endif atomic_inc(&tx_ring->tail); out: return mb; } static int sdp_handle_send_comp(struct sdp_sock *ssk, struct ib_wc *wc) { struct mbuf *mb = NULL; struct sdp_bsdh *h; if (unlikely(wc->status)) { if (wc->status != IB_WC_WR_FLUSH_ERR) { sdp_prf(ssk->socket, mb, "Send completion with error. " "Status %d", wc->status); sdp_dbg_data(ssk->socket, "Send completion with error. " "Status %d\n", wc->status); sdp_notify(ssk, ECONNRESET); } } mb = sdp_send_completion(ssk, wc->wr_id); if (unlikely(!mb)) return -1; h = mtod(mb, struct sdp_bsdh *); sdp_prf1(ssk->socket, mb, "tx completion. mseq:%d", ntohl(h->mseq)); sdp_dbg(ssk->socket, "tx completion. %p %d mseq:%d", mb, mb->m_pkthdr.len, ntohl(h->mseq)); m_freem(mb); return 0; } static inline void sdp_process_tx_wc(struct sdp_sock *ssk, struct ib_wc *wc) { if (likely(wc->wr_id & SDP_OP_SEND)) { sdp_handle_send_comp(ssk, wc); return; } #ifdef SDP_ZCOPY if (wc->wr_id & SDP_OP_RDMA) { /* TODO: handle failed RDMA read cqe */ sdp_dbg_data(ssk->socket, "TX comp: RDMA read. status: %d\n", wc->status); sdp_prf1(sk, NULL, "TX comp: RDMA read"); if (!ssk->tx_ring.rdma_inflight) { sdp_warn(ssk->socket, "ERROR: unexpected RDMA read\n"); return; } if (!ssk->tx_ring.rdma_inflight->busy) { sdp_warn(ssk->socket, "ERROR: too many RDMA read completions\n"); return; } /* Only last RDMA read WR is signalled. Order is guaranteed - * therefore if Last RDMA read WR is completed - all other * have, too */ ssk->tx_ring.rdma_inflight->busy = 0; sowwakeup(ssk->socket); sdp_dbg_data(ssk->socket, "woke up sleepers\n"); return; } #endif /* Keepalive probe sent cleanup */ sdp_cnt(sdp_keepalive_probes_sent); if (likely(!wc->status)) return; sdp_dbg(ssk->socket, " %s consumes KEEPALIVE status %d\n", __func__, wc->status); if (wc->status == IB_WC_WR_FLUSH_ERR) return; sdp_notify(ssk, ECONNRESET); } static int sdp_process_tx_cq(struct sdp_sock *ssk) { struct ib_wc ibwc[SDP_NUM_WC]; int n, i; int wc_processed = 0; SDP_WLOCK_ASSERT(ssk); if (!ssk->tx_ring.cq) { sdp_dbg(ssk->socket, "tx irq on destroyed tx_cq\n"); return 0; } do { n = ib_poll_cq(ssk->tx_ring.cq, SDP_NUM_WC, ibwc); for (i = 0; i < n; ++i) { sdp_process_tx_wc(ssk, ibwc + i); wc_processed++; } } while (n == SDP_NUM_WC); if (wc_processed) { sdp_post_sends(ssk, M_NOWAIT); sdp_prf1(sk, NULL, "Waking sendmsg. inflight=%d", (u32) tx_ring_posted(ssk)); sowwakeup(ssk->socket); } return wc_processed; } static void sdp_poll_tx(struct sdp_sock *ssk) { struct socket *sk = ssk->socket; u32 inflight, wc_processed; sdp_prf1(ssk->socket, NULL, "TX timeout: inflight=%d, head=%d tail=%d", (u32) tx_ring_posted(ssk), ring_head(ssk->tx_ring), ring_tail(ssk->tx_ring)); if (unlikely(ssk->state == TCPS_CLOSED)) { sdp_warn(sk, "Socket is closed\n"); goto out; } wc_processed = sdp_process_tx_cq(ssk); if (!wc_processed) SDPSTATS_COUNTER_INC(tx_poll_miss); else SDPSTATS_COUNTER_INC(tx_poll_hit); inflight = (u32) tx_ring_posted(ssk); - sdp_prf1(ssk->socket, NULL, "finished tx proccessing. inflight = %d", + sdp_prf1(ssk->socket, NULL, "finished tx processing. inflight = %d", inflight); /* If there are still packets in flight and the timer has not already * been scheduled by the Tx routine then schedule it here to guarantee * completion processing of these packets */ if (inflight) callout_reset(&ssk->tx_ring.timer, SDP_TX_POLL_TIMEOUT, sdp_poll_tx_timeout, ssk); out: #ifdef SDP_ZCOPY if (ssk->tx_ring.rdma_inflight && ssk->tx_ring.rdma_inflight->busy) { sdp_prf1(sk, NULL, "RDMA is inflight - arming irq"); sdp_arm_tx_cq(ssk); } #endif return; } static void sdp_poll_tx_timeout(void *data) { struct sdp_sock *ssk = (struct sdp_sock *)data; if (!callout_active(&ssk->tx_ring.timer)) return; callout_deactivate(&ssk->tx_ring.timer); sdp_poll_tx(ssk); } static void sdp_tx_irq(struct ib_cq *cq, void *cq_context) { struct sdp_sock *ssk; ssk = cq_context; sdp_prf1(ssk->socket, NULL, "tx irq"); sdp_dbg_data(ssk->socket, "Got tx comp interrupt\n"); SDPSTATS_COUNTER_INC(tx_int_count); SDP_WLOCK(ssk); sdp_poll_tx(ssk); SDP_WUNLOCK(ssk); } static void sdp_tx_ring_purge(struct sdp_sock *ssk) { while (tx_ring_posted(ssk)) { struct mbuf *mb; mb = sdp_send_completion(ssk, ring_tail(ssk->tx_ring)); if (!mb) break; m_freem(mb); } } void sdp_post_keepalive(struct sdp_sock *ssk) { int rc; struct ib_send_wr wr, *bad_wr; sdp_dbg(ssk->socket, "%s\n", __func__); memset(&wr, 0, sizeof(wr)); wr.next = NULL; wr.wr_id = 0; wr.sg_list = NULL; wr.num_sge = 0; wr.opcode = IB_WR_RDMA_WRITE; rc = ib_post_send(ssk->qp, &wr, &bad_wr); if (rc) { sdp_dbg(ssk->socket, "ib_post_keepalive failed with status %d.\n", rc); sdp_notify(ssk, ECONNRESET); } sdp_cnt(sdp_keepalive_probes_sent); } static void sdp_tx_cq_event_handler(struct ib_event *event, void *data) { } int sdp_tx_ring_create(struct sdp_sock *ssk, struct ib_device *device) { struct ib_cq *tx_cq; int rc = 0; sdp_dbg(ssk->socket, "tx ring create\n"); callout_init_rw(&ssk->tx_ring.timer, &ssk->lock, 0); callout_init_rw(&ssk->nagle_timer, &ssk->lock, 0); atomic_set(&ssk->tx_ring.head, 1); atomic_set(&ssk->tx_ring.tail, 1); ssk->tx_ring.buffer = kzalloc( sizeof *ssk->tx_ring.buffer * SDP_TX_SIZE, GFP_KERNEL); if (!ssk->tx_ring.buffer) { rc = -ENOMEM; sdp_warn(ssk->socket, "Can't allocate TX Ring size %zd.\n", sizeof(*ssk->tx_ring.buffer) * SDP_TX_SIZE); goto out; } tx_cq = ib_create_cq(device, sdp_tx_irq, sdp_tx_cq_event_handler, ssk, SDP_TX_SIZE, 0); if (IS_ERR(tx_cq)) { rc = PTR_ERR(tx_cq); sdp_warn(ssk->socket, "Unable to allocate TX CQ: %d.\n", rc); goto err_cq; } ssk->tx_ring.cq = tx_cq; ssk->tx_ring.poll_cnt = 0; sdp_arm_tx_cq(ssk); return 0; err_cq: kfree(ssk->tx_ring.buffer); ssk->tx_ring.buffer = NULL; out: return rc; } void sdp_tx_ring_destroy(struct sdp_sock *ssk) { sdp_dbg(ssk->socket, "tx ring destroy\n"); SDP_WLOCK(ssk); callout_stop(&ssk->tx_ring.timer); callout_stop(&ssk->nagle_timer); SDP_WUNLOCK(ssk); callout_drain(&ssk->tx_ring.timer); callout_drain(&ssk->nagle_timer); if (ssk->tx_ring.buffer) { sdp_tx_ring_purge(ssk); kfree(ssk->tx_ring.buffer); ssk->tx_ring.buffer = NULL; } if (ssk->tx_ring.cq) { if (ib_destroy_cq(ssk->tx_ring.cq)) { sdp_warn(ssk->socket, "destroy cq(%p) failed\n", ssk->tx_ring.cq); } else { ssk->tx_ring.cq = NULL; } } WARN_ON(ring_head(ssk->tx_ring) != ring_tail(ssk->tx_ring)); } Index: head/sys/ofed/drivers/net/mlx4/en_rx.c =================================================================== --- head/sys/ofed/drivers/net/mlx4/en_rx.c (revision 299178) +++ head/sys/ofed/drivers/net/mlx4/en_rx.c (revision 299179) @@ -1,922 +1,922 @@ /* * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include "opt_inet.h" #include #include #include #include #include #include #include #ifdef CONFIG_NET_RX_BUSY_POLL #include #endif #include "mlx4_en.h" static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { struct mlx4_en_rx_desc *rx_desc = (struct mlx4_en_rx_desc *) (ring->buf + (ring->stride * index)); int possible_frags; int i; /* Set size and memtype fields */ rx_desc->data[0].byte_count = cpu_to_be32(priv->rx_mb_size - MLX4_NET_IP_ALIGN); rx_desc->data[0].lkey = cpu_to_be32(priv->mdev->mr.key); /* * If the number of used fragments does not fill up the ring * stride, remaining (unused) fragments must be padded with * null address/size and a special memory key: */ possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE; for (i = 1; i < possible_frags; i++) { rx_desc->data[i].byte_count = 0; rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); rx_desc->data[i].addr = 0; } } static int mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring, __be64 *pdma, struct mlx4_en_rx_mbuf *mb_list) { bus_dma_segment_t segs[1]; bus_dmamap_t map; struct mbuf *mb; int nsegs; int err; /* try to allocate a new spare mbuf */ if (unlikely(ring->spare.mbuf == NULL)) { mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size); if (unlikely(mb == NULL)) return (-ENOMEM); /* setup correct length */ mb->m_pkthdr.len = mb->m_len = ring->rx_mb_size; /* make sure IP header gets aligned */ m_adj(mb, MLX4_NET_IP_ALIGN); /* load spare mbuf into BUSDMA */ err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, ring->spare.dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); if (unlikely(err != 0)) { m_freem(mb); return (err); } /* store spare info */ ring->spare.mbuf = mb; ring->spare.paddr_be = cpu_to_be64(segs[0].ds_addr); bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map, BUS_DMASYNC_PREREAD); } /* synchronize and unload the current mbuf, if any */ if (likely(mb_list->mbuf != NULL)) { bus_dmamap_sync(ring->dma_tag, mb_list->dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->dma_tag, mb_list->dma_map); } mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size); if (unlikely(mb == NULL)) goto use_spare; /* setup correct length */ mb->m_pkthdr.len = mb->m_len = ring->rx_mb_size; /* make sure IP header gets aligned */ m_adj(mb, MLX4_NET_IP_ALIGN); err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, mb_list->dma_map, mb, segs, &nsegs, BUS_DMA_NOWAIT); if (unlikely(err != 0)) { m_freem(mb); goto use_spare; } *pdma = cpu_to_be64(segs[0].ds_addr); mb_list->mbuf = mb; bus_dmamap_sync(ring->dma_tag, mb_list->dma_map, BUS_DMASYNC_PREREAD); return (0); use_spare: /* swap DMA maps */ map = mb_list->dma_map; mb_list->dma_map = ring->spare.dma_map; ring->spare.dma_map = map; /* swap MBUFs */ mb_list->mbuf = ring->spare.mbuf; ring->spare.mbuf = NULL; /* store physical address */ *pdma = ring->spare.paddr_be; return (0); } static void mlx4_en_free_buf(struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_mbuf *mb_list) { bus_dmamap_t map = mb_list->dma_map; bus_dmamap_sync(ring->dma_tag, map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->dma_tag, map); m_freem(mb_list->mbuf); mb_list->mbuf = NULL; /* safety clearing */ } static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { struct mlx4_en_rx_desc *rx_desc = (struct mlx4_en_rx_desc *) (ring->buf + (index * ring->stride)); struct mlx4_en_rx_mbuf *mb_list = ring->mbuf + index; mb_list->mbuf = NULL; if (mlx4_en_alloc_buf(ring, &rx_desc->data[0].addr, mb_list)) { priv->port_stats.rx_alloc_failed++; return (-ENOMEM); } return (0); } static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) { *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); } static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) { struct mlx4_en_rx_ring *ring; int ring_ind; int buf_ind; int new_size; int err; for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) { for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { ring = priv->rx_ring[ring_ind]; err = mlx4_en_prepare_rx_desc(priv, ring, ring->actual_size); if (err) { if (ring->actual_size == 0) { en_err(priv, "Failed to allocate " "enough rx buffers\n"); return -ENOMEM; } else { new_size = rounddown_pow_of_two(ring->actual_size); en_warn(priv, "Only %d buffers allocated " "reducing ring size to %d\n", ring->actual_size, new_size); goto reduce_rings; } } ring->actual_size++; ring->prod++; } } return 0; reduce_rings: for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { ring = priv->rx_ring[ring_ind]; while (ring->actual_size > new_size) { ring->actual_size--; ring->prod--; mlx4_en_free_buf(ring, ring->mbuf + ring->actual_size); } } return 0; } static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { int index; en_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n", ring->cons, ring->prod); /* Unmap and free Rx buffers */ BUG_ON((u32) (ring->prod - ring->cons) > ring->actual_size); while (ring->cons != ring->prod) { index = ring->cons & ring->size_mask; en_dbg(DRV, priv, "Processing descriptor:%d\n", index); mlx4_en_free_buf(ring, ring->mbuf + index); ++ring->cons; } } void mlx4_en_calc_rx_buf(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); int eff_mtu = dev->if_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN + MLX4_NET_IP_ALIGN; if (eff_mtu > MJUM16BYTES) { en_err(priv, "MTU(%d) is too big\n", dev->if_mtu); eff_mtu = MJUM16BYTES; } else if (eff_mtu > MJUM9BYTES) { eff_mtu = MJUM16BYTES; } else if (eff_mtu > MJUMPAGESIZE) { eff_mtu = MJUM9BYTES; } else if (eff_mtu > MCLBYTES) { eff_mtu = MJUMPAGESIZE; } else { eff_mtu = MCLBYTES; } priv->rx_mb_size = eff_mtu; en_dbg(DRV, priv, "Effective RX MTU: %d bytes\n", eff_mtu); } int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, int node) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rx_ring *ring; int err; int tmp; uint32_t x; ring = kzalloc(sizeof(struct mlx4_en_rx_ring), GFP_KERNEL); if (!ring) { en_err(priv, "Failed to allocate RX ring structure\n"); return -ENOMEM; } /* Create DMA descriptor TAG */ if ((err = -bus_dma_tag_create( bus_get_dma_tag(mdev->pdev->dev.bsddev), 1, /* any alignment */ 0, /* no boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM16BYTES, /* maxsize */ 1, /* nsegments */ MJUM16BYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &ring->dma_tag))) { en_err(priv, "Failed to create DMA tag\n"); goto err_ring; } ring->prod = 0; ring->cons = 0; ring->size = size; ring->size_mask = size - 1; ring->stride = roundup_pow_of_two( sizeof(struct mlx4_en_rx_desc) + DS_SIZE); ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride + TXBB_SIZE; tmp = size * sizeof(struct mlx4_en_rx_mbuf); ring->mbuf = kzalloc(tmp, GFP_KERNEL); if (ring->mbuf == NULL) { err = -ENOMEM; goto err_dma_tag; } err = -bus_dmamap_create(ring->dma_tag, 0, &ring->spare.dma_map); if (err != 0) goto err_info; for (x = 0; x != size; x++) { err = -bus_dmamap_create(ring->dma_tag, 0, &ring->mbuf[x].dma_map); if (err != 0) { while (x--) bus_dmamap_destroy(ring->dma_tag, ring->mbuf[x].dma_map); goto err_info; } } en_dbg(DRV, priv, "Allocated MBUF ring at addr:%p size:%d\n", ring->mbuf, tmp); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); if (err) goto err_dma_map; err = mlx4_en_map_buffer(&ring->wqres.buf); if (err) { en_err(priv, "Failed to map RX buffer\n"); goto err_hwq; } ring->buf = ring->wqres.buf.direct.buf; *pring = ring; return 0; err_hwq: mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); err_dma_map: for (x = 0; x != size; x++) { bus_dmamap_destroy(ring->dma_tag, ring->mbuf[x].dma_map); } bus_dmamap_destroy(ring->dma_tag, ring->spare.dma_map); err_info: vfree(ring->mbuf); err_dma_tag: bus_dma_tag_destroy(ring->dma_tag); err_ring: kfree(ring); return (err); } int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) { struct mlx4_en_rx_ring *ring; int i; int ring_ind; int err; int stride = roundup_pow_of_two( sizeof(struct mlx4_en_rx_desc) + DS_SIZE); for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { ring = priv->rx_ring[ring_ind]; ring->prod = 0; ring->cons = 0; ring->actual_size = 0; ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn; ring->rx_mb_size = priv->rx_mb_size; ring->stride = stride; if (ring->stride <= TXBB_SIZE) ring->buf += TXBB_SIZE; ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride; memset(ring->buf, 0, ring->buf_size); mlx4_en_update_rx_prod_db(ring); /* Initialize all descriptors */ for (i = 0; i < ring->size; i++) mlx4_en_init_rx_desc(priv, ring, i); #ifdef INET /* Configure lro mngr */ if (priv->dev->if_capenable & IFCAP_LRO) { if (tcp_lro_init(&ring->lro)) priv->dev->if_capenable &= ~IFCAP_LRO; else ring->lro.ifp = priv->dev; } #endif } err = mlx4_en_fill_rx_buffers(priv); if (err) goto err_buffers; for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { ring = priv->rx_ring[ring_ind]; ring->size_mask = ring->actual_size - 1; mlx4_en_update_rx_prod_db(ring); } return 0; err_buffers: for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) mlx4_en_free_rx_buf(priv, priv->rx_ring[ring_ind]); ring_ind = priv->rx_ring_num - 1; while (ring_ind >= 0) { ring = priv->rx_ring[ring_ind]; if (ring->stride <= TXBB_SIZE) ring->buf -= TXBB_SIZE; ring_ind--; } return err; } void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, u16 stride) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rx_ring *ring = *pring; uint32_t x; mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); for (x = 0; x != size; x++) bus_dmamap_destroy(ring->dma_tag, ring->mbuf[x].dma_map); /* free spare mbuf, if any */ if (ring->spare.mbuf != NULL) { bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->dma_tag, ring->spare.dma_map); m_freem(ring->spare.mbuf); } bus_dmamap_destroy(ring->dma_tag, ring->spare.dma_map); vfree(ring->mbuf); bus_dma_tag_destroy(ring->dma_tag); kfree(ring); *pring = NULL; #ifdef CONFIG_RFS_ACCEL mlx4_en_cleanup_filters(priv, ring); #endif } void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { #ifdef INET tcp_lro_free(&ring->lro); #endif mlx4_en_free_rx_buf(priv, ring); if (ring->stride <= TXBB_SIZE) ring->buf -= TXBB_SIZE; } static void validate_loopback(struct mlx4_en_priv *priv, struct mbuf *mb) { int i; int offset = ETHER_HDR_LEN; for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) { if (*(mb->m_data + offset) != (unsigned char) (i & 0xff)) goto out_loopback; } /* Loopback found */ priv->loopback_ok = 1; out_loopback: m_freem(mb); } static inline int invalid_cqe(struct mlx4_en_priv *priv, struct mlx4_cqe *cqe) { /* Drop packet on bad receive or bad checksum */ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR)) { en_err(priv, "CQE completed in error - vendor syndrom:%d syndrom:%d\n", ((struct mlx4_err_cqe *)cqe)->vendor_err_syndrome, ((struct mlx4_err_cqe *)cqe)->syndrome); return 1; } if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) { en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n"); return 1; } return 0; } static struct mbuf * mlx4_en_rx_mb(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_mbuf *mb_list, int length) { struct mbuf *mb; /* get mbuf */ mb = mb_list->mbuf; /* collect used fragment while atomically replacing it */ if (mlx4_en_alloc_buf(ring, &rx_desc->data[0].addr, mb_list)) return (NULL); /* range check hardware computed value */ if (unlikely(length > mb->m_len)) length = mb->m_len; /* update total packet length in packet header */ mb->m_len = mb->m_pkthdr.len = length; return (mb); } /* For cpu arch with cache line of 64B the performance is better when cqe size==64B * To enlarge cqe size from 32B to 64B --> 32B of garbage (i.e. 0xccccccc) * was added in the beginning of each cqe (the real data is in the corresponding 32B). - * The following calc ensures that when factor==1, it means we are alligned to 64B + * The following calc ensures that when factor==1, it means we are aligned to 64B * and we get the real cqe data*/ #define CQE_FACTOR_INDEX(index, factor) ((index << factor) + factor) int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; struct mlx4_en_rx_mbuf *mb_list; struct mlx4_en_rx_desc *rx_desc; struct mbuf *mb; struct mlx4_cq *mcq = &cq->mcq; struct mlx4_cqe *buf = cq->buf; int index; unsigned int length; int polled = 0; u32 cons_index = mcq->cons_index; u32 size_mask = ring->size_mask; int size = cq->size; int factor = priv->cqe_factor; if (!priv->port_up) return 0; /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deducted from the CQE index instead of * reading 'cqe->index' */ index = cons_index & size_mask; cqe = &buf[CQE_FACTOR_INDEX(index, factor)]; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size)) { mb_list = ring->mbuf + index; rx_desc = (struct mlx4_en_rx_desc *) (ring->buf + (index << ring->log_stride)); /* * make sure we read the CQE after we read the ownership bit */ rmb(); if (invalid_cqe(priv, cqe)) { goto next; } /* * Packet is OK - process it. */ length = be32_to_cpu(cqe->byte_cnt); length -= ring->fcs_del; mb = mlx4_en_rx_mb(priv, ring, rx_desc, mb_list, length); if (unlikely(!mb)) { ring->errors++; goto next; } ring->bytes += length; ring->packets++; if (unlikely(priv->validate_loopback)) { validate_loopback(priv, mb); goto next; } /* forward Toeplitz compatible hash value */ mb->m_pkthdr.flowid = be32_to_cpu(cqe->immed_rss_invalid); M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE); mb->m_pkthdr.rcvif = dev; if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) { mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->sl_vid); mb->m_flags |= M_VLANTAG; } if (likely(dev->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) && (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && (cqe->checksum == cpu_to_be16(0xffff))) { priv->port_stats.rx_chksum_good++; mb->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; mb->m_pkthdr.csum_data = htons(0xffff); /* This packet is eligible for LRO if it is: * - DIX Ethernet (type interpretation) * - TCP/IP (v4) * - without IP options * - not an IP fragment */ #ifdef INET if (mlx4_en_can_lro(cqe->status) && (dev->if_capenable & IFCAP_LRO)) { if (ring->lro.lro_cnt != 0 && tcp_lro_rx(&ring->lro, mb, 0) == 0) goto next; } #endif /* LRO not possible, complete processing here */ INC_PERF_COUNTER(priv->pstats.lro_misses); } else { mb->m_pkthdr.csum_flags = 0; priv->port_stats.rx_chksum_none++; } /* Push it up the stack */ dev->if_input(dev, mb); next: ++cons_index; index = cons_index & size_mask; cqe = &buf[CQE_FACTOR_INDEX(index, factor)]; if (++polled == budget) goto out; } /* Flush all pending IP reassembly sessions */ out: #ifdef INET tcp_lro_flush_all(&ring->lro); #endif AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); mcq->cons_index = cons_index; mlx4_cq_set_ci(mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ ring->cons = mcq->cons_index; ring->prod += polled; /* Polled descriptors were realocated in place */ mlx4_en_update_rx_prod_db(ring); return polled; } /* Rx CQ polling - called by NAPI */ static int mlx4_en_poll_rx_cq(struct mlx4_en_cq *cq, int budget) { struct net_device *dev = cq->dev; int done; done = mlx4_en_process_rx_cq(dev, cq, budget); cq->tot_rx += done; return done; } void mlx4_en_rx_irq(struct mlx4_cq *mcq) { struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); struct mlx4_en_priv *priv = netdev_priv(cq->dev); int done; // Shoot one within the irq context // Because there is no NAPI in freeBSD done = mlx4_en_poll_rx_cq(cq, MLX4_EN_RX_BUDGET); if (priv->port_up && (done == MLX4_EN_RX_BUDGET) ) { cq->curr_poll_rx_cpu_id = curcpu; taskqueue_enqueue(cq->tq, &cq->cq_task); } else { mlx4_en_arm_cq(priv, cq); } } void mlx4_en_rx_que(void *context, int pending) { struct mlx4_en_cq *cq; struct thread *td; cq = context; td = curthread; thread_lock(td); sched_bind(td, cq->curr_poll_rx_cpu_id); thread_unlock(td); while (mlx4_en_poll_rx_cq(cq, MLX4_EN_RX_BUDGET) == MLX4_EN_RX_BUDGET); mlx4_en_arm_cq(cq->dev->if_softc, cq); } /* RSS related functions */ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, struct mlx4_en_rx_ring *ring, enum mlx4_qp_state *state, struct mlx4_qp *qp) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_qp_context *context; int err = 0; context = kmalloc(sizeof *context , GFP_KERNEL); if (!context) { en_err(priv, "Failed to allocate qp context\n"); return -ENOMEM; } err = mlx4_qp_alloc(mdev->dev, qpn, qp); if (err) { en_err(priv, "Failed to allocate qp #%x\n", qpn); goto out; } qp->event = mlx4_en_sqp_event; memset(context, 0, sizeof *context); mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0, qpn, ring->cqn, -1, context); context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma); /* Cancel FCS removal if FW allows */ if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) { context->param3 |= cpu_to_be32(1 << 29); ring->fcs_del = ETH_FCS_LEN; } else ring->fcs_del = 0; err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, context, qp, state); if (err) { mlx4_qp_remove(mdev->dev, qp); mlx4_qp_free(mdev->dev, qp); } mlx4_en_update_rx_prod_db(ring); out: kfree(context); return err; } int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) { int err; u32 qpn; err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn, 0); if (err) { en_err(priv, "Failed reserving drop qpn\n"); return err; } err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp); if (err) { en_err(priv, "Failed allocating drop qp\n"); mlx4_qp_release_range(priv->mdev->dev, qpn, 1); return err; } return 0; } void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv) { u32 qpn; qpn = priv->drop_qp.qpn; mlx4_qp_remove(priv->mdev->dev, &priv->drop_qp); mlx4_qp_free(priv->mdev->dev, &priv->drop_qp); mlx4_qp_release_range(priv->mdev->dev, qpn, 1); } /* Allocate rx qp's and configure them according to rss map */ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rss_map *rss_map = &priv->rss_map; struct mlx4_qp_context context; struct mlx4_rss_context *rss_context; int rss_rings; void *ptr; u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 | MLX4_RSS_TCP_IPV6); int i; int err = 0; int good_qps = 0; static const u32 rsskey[10] = { 0xD181C62C, 0xF7F4DB5B, 0x1983A2FC, 0x943E1ADB, 0xD9389E6B, 0xD1039C2C, 0xA74499AD, 0x593D56D9, 0xF3253C06, 0x2ADC1FFC}; en_dbg(DRV, priv, "Configuring rss steering\n"); err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num, priv->rx_ring_num, &rss_map->base_qpn, 0); if (err) { en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num); return err; } for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_ring[i]->qpn = rss_map->base_qpn + i; err = mlx4_en_config_rss_qp(priv, priv->rx_ring[i]->qpn, priv->rx_ring[i], &rss_map->state[i], &rss_map->qps[i]); if (err) goto rss_err; ++good_qps; } /* Configure RSS indirection qp */ err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp); if (err) { en_err(priv, "Failed to allocate RSS indirection QP\n"); goto rss_err; } rss_map->indir_qp.event = mlx4_en_sqp_event; mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn, priv->rx_ring[0]->cqn, -1, &context); if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num) rss_rings = priv->rx_ring_num; else rss_rings = priv->prof->rss_rings; ptr = ((u8 *)&context) + offsetof(struct mlx4_qp_context, pri_path) + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH; rss_context = ptr; rss_context->base_qpn = cpu_to_be32(ilog2(rss_rings) << 24 | (rss_map->base_qpn)); rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn); if (priv->mdev->profile.udp_rss) { rss_mask |= MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6; rss_context->base_qpn_udp = rss_context->default_qpn; } rss_context->flags = rss_mask; rss_context->hash_fn = MLX4_RSS_HASH_TOP; for (i = 0; i < 10; i++) rss_context->rss_key[i] = cpu_to_be32(rsskey[i]); err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, &rss_map->indir_qp, &rss_map->indir_state); if (err) goto indir_err; return 0; indir_err: mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); mlx4_qp_free(mdev->dev, &rss_map->indir_qp); rss_err: for (i = 0; i < good_qps; i++) { mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); mlx4_qp_free(mdev->dev, &rss_map->qps[i]); } mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num); return err; } void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rss_map *rss_map = &priv->rss_map; int i; mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); mlx4_qp_free(mdev->dev, &rss_map->indir_qp); for (i = 0; i < priv->rx_ring_num; i++) { mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); mlx4_qp_free(mdev->dev, &rss_map->qps[i]); } mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num); } Index: head/sys/ofed/drivers/net/mlx4/fw.c =================================================================== --- head/sys/ofed/drivers/net/mlx4/fw.c (revision 299178) +++ head/sys/ofed/drivers/net/mlx4/fw.c (revision 299179) @@ -1,1954 +1,1954 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include "fw.h" #include "icm.h" enum { MLX4_COMMAND_INTERFACE_MIN_REV = 2, MLX4_COMMAND_INTERFACE_MAX_REV = 3, MLX4_COMMAND_INTERFACE_NEW_PORT_CMDS = 3, }; extern void __buggy_use_of_MLX4_GET(void); extern void __buggy_use_of_MLX4_PUT(void); static bool enable_qos; module_param(enable_qos, bool, 0444); MODULE_PARM_DESC(enable_qos, "Enable Quality of Service support in the HCA (default: off)"); #define MLX4_GET(dest, source, offset) \ do { \ void *__p = (char *) (source) + (offset); \ switch (sizeof (dest)) { \ case 1: (dest) = *(u8 *) __p; break; \ case 2: (dest) = be16_to_cpup(__p); break; \ case 4: (dest) = be32_to_cpup(__p); break; \ case 8: (dest) = be64_to_cpup(__p); break; \ default: __buggy_use_of_MLX4_GET(); \ } \ } while (0) #define MLX4_PUT(dest, source, offset) \ do { \ void *__d = ((char *) (dest) + (offset)); \ switch (sizeof(source)) { \ case 1: *(u8 *) __d = (source); break; \ case 2: *(__be16 *) __d = cpu_to_be16(source); break; \ case 4: *(__be32 *) __d = cpu_to_be32(source); break; \ case 8: *(__be64 *) __d = cpu_to_be64(source); break; \ default: __buggy_use_of_MLX4_PUT(); \ } \ } while (0) static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) { static const char *fname[] = { [ 0] = "RC transport", [ 1] = "UC transport", [ 2] = "UD transport", [ 3] = "XRC transport", [ 4] = "reliable multicast", [ 5] = "FCoIB support", [ 6] = "SRQ support", [ 7] = "IPoIB checksum offload", [ 8] = "P_Key violation counter", [ 9] = "Q_Key violation counter", [10] = "VMM", [12] = "DPDP", [15] = "Big LSO headers", [16] = "MW support", [17] = "APM support", [18] = "Atomic ops support", [19] = "Raw multicast support", [20] = "Address vector port checking support", [21] = "UD multicast support", [24] = "Demand paging support", [25] = "Router support", [30] = "IBoE support", [32] = "Unicast loopback support", [34] = "FCS header control", [38] = "Wake On LAN support", [40] = "UDP RSS support", [41] = "Unicast VEP steering support", [42] = "Multicast VEP steering support", [44] = "Cross-channel (sync_qp) operations support", [48] = "Counters support", [59] = "Port management change event support", [60] = "eSwitch support", [61] = "64 byte EQE support", [62] = "64 byte CQE support", }; int i; mlx4_dbg(dev, "DEV_CAP flags:\n"); for (i = 0; i < ARRAY_SIZE(fname); ++i) if (fname[i] && (flags & (1LL << i))) mlx4_dbg(dev, " %s\n", fname[i]); } static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) { static const char * const fname[] = { [0] = "RSS support", [1] = "RSS Toeplitz Hash Function support", [2] = "RSS XOR Hash Function support", [3] = "Device manage flow steering support", [4] = "FSM (MAC unti-spoofing) support", [5] = "VST (control vlan insertion/stripping) support", [6] = "Dynamic QP updates support", [7] = "Loopback source checks support", [8] = "Device managed flow steering IPoIB support", [9] = "ETS configuration support", [10] = "ETH backplane autoneg report", [11] = "Ethernet Flow control statistics support", [12] = "Recoverable error events support", [13] = "Time stamping support", [14] = "Report driver version to FW support" }; int i; for (i = 0; i < ARRAY_SIZE(fname); ++i) if (fname[i] && (flags & (1LL << i))) mlx4_dbg(dev, " %s\n", fname[i]); } int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg) { struct mlx4_cmd_mailbox *mailbox; u32 *inbox; int err = 0; #define MOD_STAT_CFG_IN_SIZE 0x100 #define MOD_STAT_CFG_PG_SZ_M_OFFSET 0x002 #define MOD_STAT_CFG_PG_SZ_OFFSET 0x003 mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); inbox = mailbox->buf; memset(inbox, 0, MOD_STAT_CFG_IN_SIZE); MLX4_PUT(inbox, cfg->log_pg_sz, MOD_STAT_CFG_PG_SZ_OFFSET); MLX4_PUT(inbox, cfg->log_pg_sz_m, MOD_STAT_CFG_PG_SZ_M_OFFSET); err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); u8 field, port; u32 size; int err = 0; #define QUERY_FUNC_CAP_FLAGS_OFFSET 0x0 #define QUERY_FUNC_CAP_NUM_PORTS_OFFSET 0x1 #define QUERY_FUNC_CAP_PF_BHVR_OFFSET 0x4 #define QUERY_FUNC_CAP_FMR_OFFSET 0x8 #define QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP 0x10 #define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP 0x14 #define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP 0x18 #define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP 0x20 #define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP 0x24 #define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP 0x28 #define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c #define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0x30 #define QUERY_FUNC_CAP_QP_QUOTA_OFFSET 0x50 #define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET 0x54 #define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET 0x58 #define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET 0x60 #define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x64 #define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x68 #define QUERY_FUNC_CAP_FMR_FLAG 0x80 #define QUERY_FUNC_CAP_FLAG_RDMA 0x40 #define QUERY_FUNC_CAP_FLAG_ETH 0x80 #define QUERY_FUNC_CAP_FLAG_QUOTAS 0x10 /* when opcode modifier = 1 */ #define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3 #define QUERY_FUNC_CAP_FLAGS0_OFFSET 0x8 #define QUERY_FUNC_CAP_FLAGS1_OFFSET 0xc #define QUERY_FUNC_CAP_COUNTER_INDEX_OFFSET 0xd #define QUERY_FUNC_CAP_QP0_TUNNEL 0x10 #define QUERY_FUNC_CAP_QP0_PROXY 0x14 #define QUERY_FUNC_CAP_QP1_TUNNEL 0x18 #define QUERY_FUNC_CAP_QP1_PROXY 0x1c #define QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC 0x40 #define QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN 0x80 #define QUERY_FUNC_CAP_PROPS_DEF_COUNTER 0x20 #define QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID 0x80 if (vhcr->op_modifier == 1) { port = vhcr->in_modifier; /* phys-port = logical-port */ MLX4_PUT(outbox->buf, port, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); field = 0; /* ensure that phy_wqe_gid bit is not set */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS0_OFFSET); /* ensure force vlan and force mac bits are not set * and that default counter bit is set */ field = QUERY_FUNC_CAP_PROPS_DEF_COUNTER; /* def counter */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET); /* There is always default counter legal or sink counter */ field = mlx4_get_default_counter_index(dev, slave, vhcr->in_modifier); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_COUNTER_INDEX_OFFSET); /* size is now the QP number */ size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + port - 1; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL); size += 2; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL); size = dev->phys_caps.base_proxy_sqpn + 8 * slave + port - 1; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY); size += 2; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY); } else if (vhcr->op_modifier == 0) { /* enable rdma and ethernet interfaces, and new quota locations */ field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA | QUERY_FUNC_CAP_FLAG_QUOTAS); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS_OFFSET); field = dev->caps.num_ports; MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); size = dev->caps.function_caps; /* set PF behaviours */ MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET); field = 0; /* protected FMR support not available as yet */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FMR_OFFSET); size = priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); size = dev->caps.num_qps; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP); size = priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); size = dev->caps.num_srqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP); size = priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); size = dev->caps.num_cqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP); size = dev->caps.num_eqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); size = dev->caps.reserved_eqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); size = dev->caps.num_mpts; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP); size = priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); size = dev->caps.num_mtts; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP); size = dev->caps.num_mgms + dev->caps.num_amgms; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP); } else err = -EINVAL; return err; } int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, struct mlx4_func_cap *func_cap) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; u8 field, op_modifier; u32 size; int err = 0, quotas = 0; op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); err = mlx4_cmd_box(dev, 0, mailbox->dma, gen_or_port, op_modifier, MLX4_CMD_QUERY_FUNC_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) goto out; outbox = mailbox->buf; if (!op_modifier) { MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET); if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) { mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n"); err = -EPROTONOSUPPORT; goto out; } func_cap->flags = field; quotas = !!(func_cap->flags & QUERY_FUNC_CAP_FLAG_QUOTAS); MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); func_cap->num_ports = field; MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET); func_cap->pf_context_behaviour = size; if (quotas) { MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); func_cap->qp_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); func_cap->srq_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); func_cap->cq_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); func_cap->mpt_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); func_cap->mtt_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); func_cap->mcg_quota = size & 0xFFFFFF; } else { MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP); func_cap->qp_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP); func_cap->srq_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP); func_cap->cq_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP); func_cap->mpt_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP); func_cap->mtt_quota = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP); func_cap->mcg_quota = size & 0xFFFFFF; } MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET); func_cap->max_eq = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); func_cap->reserved_eq = size & 0xFFFFFF; goto out; } /* logical port query */ if (gen_or_port > dev->caps.num_ports) { err = -EINVAL; goto out; } if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) { MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS1_OFFSET); if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) { mlx4_err(dev, "VLAN is enforced on this port\n"); err = -EPROTONOSUPPORT; goto out; } if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) { mlx4_err(dev, "Force mac is enabled on this port\n"); err = -EPROTONOSUPPORT; goto out; } } else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) { MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) { mlx4_err(dev, "phy_wqe_gid is " "enforced on this ib port\n"); err = -EPROTONOSUPPORT; goto out; } } MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); func_cap->physical_port = field; if (func_cap->physical_port != gen_or_port) { err = -ENOSYS; goto out; } MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS1_OFFSET); if (field & QUERY_FUNC_CAP_PROPS_DEF_COUNTER) { MLX4_GET(field, outbox, QUERY_FUNC_CAP_COUNTER_INDEX_OFFSET); func_cap->def_counter_index = field; } else { func_cap->def_counter_index = MLX4_SINK_COUNTER_INDEX; } MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL); func_cap->qp0_tunnel_qpn = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY); func_cap->qp0_proxy_qpn = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL); func_cap->qp1_tunnel_qpn = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY); func_cap->qp1_proxy_qpn = size & 0xFFFFFF; /* All other resources are allocated by the master, but we still report * 'num' and 'reserved' capabilities as follows: * - num remains the maximum resource index * - 'num - reserved' is the total available objects of a resource, but * resource indices may be less than 'reserved' * TODO: set per-resource quotas */ out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; u8 field; u32 field32, flags, ext_flags; u16 size; u16 stat_rate; int err; int i; #define QUERY_DEV_CAP_OUT_SIZE 0x100 #define QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET 0x10 #define QUERY_DEV_CAP_MAX_QP_SZ_OFFSET 0x11 #define QUERY_DEV_CAP_RSVD_QP_OFFSET 0x12 #define QUERY_DEV_CAP_MAX_QP_OFFSET 0x13 #define QUERY_DEV_CAP_RSVD_SRQ_OFFSET 0x14 #define QUERY_DEV_CAP_MAX_SRQ_OFFSET 0x15 #define QUERY_DEV_CAP_RSVD_EEC_OFFSET 0x16 #define QUERY_DEV_CAP_MAX_EEC_OFFSET 0x17 #define QUERY_DEV_CAP_MAX_CQ_SZ_OFFSET 0x19 #define QUERY_DEV_CAP_RSVD_CQ_OFFSET 0x1a #define QUERY_DEV_CAP_MAX_CQ_OFFSET 0x1b #define QUERY_DEV_CAP_MAX_MPT_OFFSET 0x1d #define QUERY_DEV_CAP_RSVD_EQ_OFFSET 0x1e #define QUERY_DEV_CAP_MAX_EQ_OFFSET 0x1f #define QUERY_DEV_CAP_RSVD_MTT_OFFSET 0x20 #define QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET 0x21 #define QUERY_DEV_CAP_RSVD_MRW_OFFSET 0x22 #define QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET 0x23 #define QUERY_DEV_CAP_MAX_AV_OFFSET 0x27 #define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET 0x29 #define QUERY_DEV_CAP_MAX_RES_QP_OFFSET 0x2b #define QUERY_DEV_CAP_MAX_GSO_OFFSET 0x2d #define QUERY_DEV_CAP_RSS_OFFSET 0x2e #define QUERY_DEV_CAP_MAX_RDMA_OFFSET 0x2f #define QUERY_DEV_CAP_RSZ_SRQ_OFFSET 0x33 #define QUERY_DEV_CAP_ACK_DELAY_OFFSET 0x35 #define QUERY_DEV_CAP_MTU_WIDTH_OFFSET 0x36 #define QUERY_DEV_CAP_VL_PORT_OFFSET 0x37 #define QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET 0x38 #define QUERY_DEV_CAP_MAX_GID_OFFSET 0x3b #define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET 0x3c #define QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET 0x3e #define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f #define QUERY_DEV_CAP_EXT_FLAGS_OFFSET 0x40 #define QUERY_DEV_CAP_SYNC_QP_OFFSET 0x42 #define QUERY_DEV_CAP_FLAGS_OFFSET 0x44 #define QUERY_DEV_CAP_RSVD_UAR_OFFSET 0x48 #define QUERY_DEV_CAP_UAR_SZ_OFFSET 0x49 #define QUERY_DEV_CAP_PAGE_SZ_OFFSET 0x4b #define QUERY_DEV_CAP_BF_OFFSET 0x4c #define QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET 0x4d #define QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET 0x4e #define QUERY_DEV_CAP_LOG_MAX_BF_PAGES_OFFSET 0x4f #define QUERY_DEV_CAP_MAX_SG_SQ_OFFSET 0x51 #define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET 0x52 #define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET 0x55 #define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET 0x56 #define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET 0x61 #define QUERY_DEV_CAP_RSVD_MCG_OFFSET 0x62 #define QUERY_DEV_CAP_MAX_MCG_OFFSET 0x63 #define QUERY_DEV_CAP_RSVD_PD_OFFSET 0x64 #define QUERY_DEV_CAP_MAX_PD_OFFSET 0x65 #define QUERY_DEV_CAP_RSVD_XRC_OFFSET 0x66 #define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67 #define QUERY_DEV_CAP_MAX_BASIC_COUNTERS_OFFSET 0x68 #define QUERY_DEV_CAP_MAX_EXTENDED_COUNTERS_OFFSET 0x6c #define QUERY_DEV_CAP_PORT_FLOWSTATS_COUNTERS_OFFSET 0x70 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76 #define QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET 0x70 #define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET 0x74 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 #define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82 #define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84 #define QUERY_DEV_CAP_ALTC_ENTRY_SZ_OFFSET 0x86 #define QUERY_DEV_CAP_EQC_ENTRY_SZ_OFFSET 0x88 #define QUERY_DEV_CAP_CQC_ENTRY_SZ_OFFSET 0x8a #define QUERY_DEV_CAP_SRQ_ENTRY_SZ_OFFSET 0x8c #define QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET 0x8e #define QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET 0x90 #define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET 0x92 #define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x94 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 #define QUERY_DEV_CAP_ETS_CFG_OFFSET 0x9c #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 dev_cap->flags2 = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) goto out; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET); dev_cap->reserved_qps = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET); dev_cap->max_qps = 1 << (field & 0x1f); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_SRQ_OFFSET); dev_cap->reserved_srqs = 1 << (field >> 4); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SRQ_OFFSET); dev_cap->max_srqs = 1 << (field & 0x1f); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_CQ_SZ_OFFSET); dev_cap->max_cq_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_CQ_OFFSET); dev_cap->reserved_cqs = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_CQ_OFFSET); dev_cap->max_cqs = 1 << (field & 0x1f); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET); dev_cap->max_mpts = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET); dev_cap->reserved_eqs = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET); dev_cap->max_eqs = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET); dev_cap->reserved_mtts = 1 << (field >> 4); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET); dev_cap->max_mrw_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MRW_OFFSET); dev_cap->reserved_mrws = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET); dev_cap->max_mtt_seg = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET); dev_cap->max_requester_per_qp = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET); dev_cap->max_responder_per_qp = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GSO_OFFSET); field &= 0x1f; if (!field) dev_cap->max_gso_sz = 0; else dev_cap->max_gso_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSS_OFFSET); if (field & 0x20) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS_XOR; if (field & 0x10) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS_TOP; field &= 0xf; if (field) { dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RSS; dev_cap->max_rss_tbl_sz = 1 << field; } else dev_cap->max_rss_tbl_sz = 0; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RDMA_OFFSET); dev_cap->max_rdma_global = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_ACK_DELAY_OFFSET); dev_cap->local_ca_ack_delay = field & 0x1f; MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); dev_cap->num_ports = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET); dev_cap->max_msg_sz = 1 << (field & 0x1f); MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_FLOWSTATS_COUNTERS_OFFSET); if (field & 0x10) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN; MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN; MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB; dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f; MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET); dev_cap->fs_max_num_qp_per_entry = field; MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); dev_cap->stat_rate_support = stat_rate; MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_TS; MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); MLX4_GET(flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET); dev_cap->flags = flags | (u64)ext_flags << 32; MLX4_GET(field, outbox, QUERY_DEV_CAP_SYNC_QP_OFFSET); dev_cap->sync_qp = field & 0x10; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET); dev_cap->reserved_uars = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_UAR_SZ_OFFSET); dev_cap->uar_size = 1 << ((field & 0x3f) + 20); MLX4_GET(field, outbox, QUERY_DEV_CAP_PAGE_SZ_OFFSET); dev_cap->min_page_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_BF_OFFSET); if (field & 0x80) { MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_BF_REG_SZ_OFFSET); dev_cap->bf_reg_size = 1 << (field & 0x1f); MLX4_GET(field, outbox, QUERY_DEV_CAP_LOG_MAX_BF_REGS_PER_PAGE_OFFSET); if ((1 << (field & 0x3f)) > (PAGE_SIZE / dev_cap->bf_reg_size)) field = 3; dev_cap->bf_regs_per_page = 1 << (field & 0x3f); mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n", dev_cap->bf_reg_size, dev_cap->bf_regs_per_page); } else { dev_cap->bf_reg_size = 0; mlx4_dbg(dev, "BlueFlame not available\n"); } MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_SQ_OFFSET); dev_cap->max_sq_sg = field; MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET); dev_cap->max_sq_desc_sz = size; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_MCG_OFFSET); dev_cap->max_qp_per_mcg = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MCG_OFFSET); dev_cap->reserved_mgms = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MCG_OFFSET); dev_cap->max_mcgs = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_PD_OFFSET); dev_cap->reserved_pds = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET); dev_cap->max_pds = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_XRC_OFFSET); dev_cap->reserved_xrcds = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_XRC_OFFSET); dev_cap->max_xrcds = 1 << (field & 0x1f); MLX4_GET(size, outbox, QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET); dev_cap->rdmarc_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET); dev_cap->qpc_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET); dev_cap->aux_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_ALTC_ENTRY_SZ_OFFSET); dev_cap->altc_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_EQC_ENTRY_SZ_OFFSET); dev_cap->eqc_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_CQC_ENTRY_SZ_OFFSET); dev_cap->cqc_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_SRQ_ENTRY_SZ_OFFSET); dev_cap->srq_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_C_MPT_ENTRY_SZ_OFFSET); dev_cap->cmpt_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET); dev_cap->mtt_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET); dev_cap->dmpt_entry_sz = size; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SRQ_SZ_OFFSET); dev_cap->max_srq_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_SZ_OFFSET); dev_cap->max_qp_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSZ_SRQ_OFFSET); dev_cap->resize_srq = field & 1; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_RQ_OFFSET); dev_cap->max_rq_sg = field; MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET); dev_cap->max_rq_desc_sz = size; MLX4_GET(dev_cap->bmme_flags, outbox, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); MLX4_GET(dev_cap->reserved_lkey, outbox, QUERY_DEV_CAP_RSVD_LKEY_OFFSET); MLX4_GET(field32, outbox, QUERY_DEV_CAP_ETS_CFG_OFFSET); if (field32 & (1 << 0)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP; if (field32 & (1 << 7)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT; if (field32 & (1 << 8)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW; if (field32 & (1 << 13)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG; MLX4_GET(dev_cap->max_icm_sz, outbox, QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET); if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS) MLX4_GET(dev_cap->max_basic_counters, outbox, QUERY_DEV_CAP_MAX_BASIC_COUNTERS_OFFSET); /* FW reports 256 however real value is 255 */ dev_cap->max_basic_counters = min_t(u32, dev_cap->max_basic_counters, 255); if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT) MLX4_GET(dev_cap->max_extended_counters, outbox, QUERY_DEV_CAP_MAX_EXTENDED_COUNTERS_OFFSET); MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); if (field32 & (1 << 16)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP; if (field32 & (1 << 19)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_LB_SRC_CHK; if (field32 & (1 << 20)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FSM; if (field32 & (1 << 26)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL; if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { for (i = 1; i <= dev_cap->num_ports; ++i) { MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); dev_cap->max_vl[i] = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_MTU_WIDTH_OFFSET); dev_cap->ib_mtu[i] = field >> 4; dev_cap->max_port_width[i] = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GID_OFFSET); dev_cap->max_gids[i] = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PKEY_OFFSET); dev_cap->max_pkeys[i] = 1 << (field & 0xf); } } else { #define QUERY_PORT_SUPPORTED_TYPE_OFFSET 0x00 #define QUERY_PORT_MTU_OFFSET 0x01 #define QUERY_PORT_ETH_MTU_OFFSET 0x02 #define QUERY_PORT_WIDTH_OFFSET 0x06 #define QUERY_PORT_MAX_GID_PKEY_OFFSET 0x07 #define QUERY_PORT_MAX_MACVLAN_OFFSET 0x0a #define QUERY_PORT_MAX_VL_OFFSET 0x0b #define QUERY_PORT_MAC_OFFSET 0x10 #define QUERY_PORT_TRANS_VENDOR_OFFSET 0x18 #define QUERY_PORT_WAVELENGTH_OFFSET 0x1c #define QUERY_PORT_TRANS_CODE_OFFSET 0x20 for (i = 1; i <= dev_cap->num_ports; ++i) { err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 0, MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (err) goto out; MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET); dev_cap->supported_port_types[i] = field & 3; dev_cap->suggested_type[i] = (field >> 3) & 1; dev_cap->default_sense[i] = (field >> 4) & 1; MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET); dev_cap->ib_mtu[i] = field & 0xf; MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET); dev_cap->max_port_width[i] = field & 0xf; MLX4_GET(field, outbox, QUERY_PORT_MAX_GID_PKEY_OFFSET); dev_cap->max_gids[i] = 1 << (field >> 4); dev_cap->max_pkeys[i] = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_PORT_MAX_VL_OFFSET); dev_cap->max_vl[i] = field & 0xf; MLX4_GET(field, outbox, QUERY_PORT_MAX_MACVLAN_OFFSET); dev_cap->log_max_macs[i] = field & 0xf; dev_cap->log_max_vlans[i] = field >> 4; MLX4_GET(dev_cap->eth_mtu[i], outbox, QUERY_PORT_ETH_MTU_OFFSET); MLX4_GET(dev_cap->def_mac[i], outbox, QUERY_PORT_MAC_OFFSET); MLX4_GET(field32, outbox, QUERY_PORT_TRANS_VENDOR_OFFSET); dev_cap->trans_type[i] = field32 >> 24; dev_cap->vendor_oui[i] = field32 & 0xffffff; MLX4_GET(dev_cap->wavelength[i], outbox, QUERY_PORT_WAVELENGTH_OFFSET); MLX4_GET(dev_cap->trans_code[i], outbox, QUERY_PORT_TRANS_CODE_OFFSET); } } mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n", dev_cap->bmme_flags, dev_cap->reserved_lkey); /* * Each UAR has 4 EQ doorbells; so if a UAR is reserved, then * we can't use any EQs whose doorbell falls on that page, * even if the EQ itself isn't reserved. */ dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4, dev_cap->reserved_eqs); mlx4_dbg(dev, "Max ICM size %lld MB\n", (unsigned long long) dev_cap->max_icm_sz >> 20); mlx4_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n", dev_cap->max_qps, dev_cap->reserved_qps, dev_cap->qpc_entry_sz); mlx4_dbg(dev, "Max SRQs: %d, reserved SRQs: %d, entry size: %d\n", dev_cap->max_srqs, dev_cap->reserved_srqs, dev_cap->srq_entry_sz); mlx4_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n", dev_cap->max_cqs, dev_cap->reserved_cqs, dev_cap->cqc_entry_sz); mlx4_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n", dev_cap->max_eqs, dev_cap->reserved_eqs, dev_cap->eqc_entry_sz); mlx4_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n", dev_cap->reserved_mrws, dev_cap->reserved_mtts); mlx4_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n", dev_cap->max_pds, dev_cap->reserved_pds, dev_cap->reserved_uars); mlx4_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n", dev_cap->max_pds, dev_cap->reserved_mgms); mlx4_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n", dev_cap->max_cq_sz, dev_cap->max_qp_sz, dev_cap->max_srq_sz); mlx4_dbg(dev, "Local CA ACK delay: %d, max MTU: %d, port width cap: %d\n", dev_cap->local_ca_ack_delay, 128 << dev_cap->ib_mtu[1], dev_cap->max_port_width[1]); mlx4_dbg(dev, "Max SQ desc size: %d, max SQ S/G: %d\n", dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg); mlx4_dbg(dev, "Max RQ desc size: %d, max RQ S/G: %d\n", dev_cap->max_rq_desc_sz, dev_cap->max_rq_sg); mlx4_dbg(dev, "Max GSO size: %d\n", dev_cap->max_gso_sz); mlx4_dbg(dev, "Max basic counters: %d\n", dev_cap->max_basic_counters); mlx4_dbg(dev, "Max extended counters: %d\n", dev_cap->max_extended_counters); mlx4_dbg(dev, "Max RSS Table size: %d\n", dev_cap->max_rss_tbl_sz); dump_dev_cap_flags(dev, dev_cap->flags); dump_dev_cap_flags2(dev, dev_cap->flags2); out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { u64 flags; int err = 0; u8 field; err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; /* add port mng change event capability unconditionally to slaves */ MLX4_GET(flags, outbox->buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); flags |= MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV; MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); /* For guests, report Blueflame disabled */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_BF_OFFSET); field &= 0x7f; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET); /* turn off device-managed steering capability if not enabled */ if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) { MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); field &= 0x7f; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); } return 0; } int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); u64 def_mac; u8 port_type; u16 short_field; int err; int admin_link_state; #define MLX4_VF_PORT_NO_LINK_SENSE_MASK 0xE0 #define MLX4_PORT_LINK_UP_MASK 0x80 #define QUERY_PORT_CUR_MAX_PKEY_OFFSET 0x0c #define QUERY_PORT_CUR_MAX_GID_OFFSET 0x0e err = mlx4_cmd_box(dev, 0, outbox->dma, vhcr->in_modifier, 0, MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (!err && dev->caps.function != slave) { /* set slave default_mac address to be zero MAC */ def_mac = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac; MLX4_PUT(outbox->buf, def_mac, QUERY_PORT_MAC_OFFSET); /* get port type - currently only eth is enabled */ MLX4_GET(port_type, outbox->buf, QUERY_PORT_SUPPORTED_TYPE_OFFSET); /* No link sensing allowed */ port_type &= MLX4_VF_PORT_NO_LINK_SENSE_MASK; /* set port type to currently operating port type */ port_type |= (dev->caps.port_type[vhcr->in_modifier] & 0x3); admin_link_state = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.link_state; if (IFLA_VF_LINK_STATE_ENABLE == admin_link_state) port_type |= MLX4_PORT_LINK_UP_MASK; else if (IFLA_VF_LINK_STATE_DISABLE == admin_link_state) port_type &= ~MLX4_PORT_LINK_UP_MASK; MLX4_PUT(outbox->buf, port_type, QUERY_PORT_SUPPORTED_TYPE_OFFSET); if (dev->caps.port_type[vhcr->in_modifier] == MLX4_PORT_TYPE_ETH) short_field = mlx4_get_slave_num_gids(dev, slave); else short_field = 1; /* slave max gids */ MLX4_PUT(outbox->buf, short_field, QUERY_PORT_CUR_MAX_GID_OFFSET); short_field = dev->caps.pkey_table_len[vhcr->in_modifier]; MLX4_PUT(outbox->buf, short_field, QUERY_PORT_CUR_MAX_PKEY_OFFSET); } return err; } int mlx4_get_slave_pkey_gid_tbl_len(struct mlx4_dev *dev, u8 port, int *gid_tbl_len, int *pkey_tbl_len) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; u16 field; int err; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); err = mlx4_cmd_box(dev, 0, mailbox->dma, port, 0, MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); if (err) goto out; outbox = mailbox->buf; MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_GID_OFFSET); *gid_tbl_len = field; MLX4_GET(field, outbox, QUERY_PORT_CUR_MAX_PKEY_OFFSET); *pkey_tbl_len = field; out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } EXPORT_SYMBOL(mlx4_get_slave_pkey_gid_tbl_len); int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_icm_iter iter; __be64 *pages; int lg; int nent = 0; int i; int err = 0; int ts = 0, tc = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); memset(mailbox->buf, 0, MLX4_MAILBOX_SIZE); pages = mailbox->buf; for (mlx4_icm_first(icm, &iter); !mlx4_icm_last(&iter); mlx4_icm_next(&iter)) { /* * We have to pass pages that are aligned to their * size, so find the least significant 1 in the * address or size and use that as our log2 size. */ lg = ffs(mlx4_icm_addr(&iter) | mlx4_icm_size(&iter)) - 1; if (lg < MLX4_ICM_PAGE_SHIFT) { mlx4_warn(dev, "Got FW area not aligned to %d (%llx/%lx).\n", MLX4_ICM_PAGE_SIZE, (unsigned long long) mlx4_icm_addr(&iter), mlx4_icm_size(&iter)); err = -EINVAL; goto out; } for (i = 0; i < mlx4_icm_size(&iter) >> lg; ++i) { if (virt != -1) { pages[nent * 2] = cpu_to_be64(virt); virt += 1 << lg; } pages[nent * 2 + 1] = cpu_to_be64((mlx4_icm_addr(&iter) + (i << lg)) | (lg - MLX4_ICM_PAGE_SHIFT)); ts += 1 << (lg - 10); ++tc; if (++nent == MLX4_MAILBOX_SIZE / 16) { err = mlx4_cmd(dev, mailbox->dma, nent, 0, op, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (err) goto out; nent = 0; } } } if (nent) err = mlx4_cmd(dev, mailbox->dma, nent, 0, op, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (err) goto out; switch (op) { case MLX4_CMD_MAP_FA: mlx4_dbg(dev, "Mapped %d chunks/%d KB for FW.\n", tc, ts); break; case MLX4_CMD_MAP_ICM_AUX: mlx4_dbg(dev, "Mapped %d chunks/%d KB for ICM aux.\n", tc, ts); break; case MLX4_CMD_MAP_ICM: mlx4_dbg(dev, "Mapped %d chunks/%d KB at %llx for ICM.\n", tc, ts, (unsigned long long) virt - (ts << 10)); break; } out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm) { return mlx4_map_cmd(dev, MLX4_CMD_MAP_FA, icm, -1); } int mlx4_UNMAP_FA(struct mlx4_dev *dev) { return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_FA, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); } int mlx4_RUN_FW(struct mlx4_dev *dev) { return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_RUN_FW, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } int mlx4_QUERY_FW(struct mlx4_dev *dev) { struct mlx4_fw *fw = &mlx4_priv(dev)->fw; struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; struct mlx4_cmd_mailbox *mailbox; u32 *outbox; int err = 0; u64 fw_ver; u16 cmd_if_rev; u8 lg; #define QUERY_FW_OUT_SIZE 0x100 #define QUERY_FW_VER_OFFSET 0x00 #define QUERY_FW_PPF_ID 0x09 #define QUERY_FW_CMD_IF_REV_OFFSET 0x0a #define QUERY_FW_MAX_CMD_OFFSET 0x0f #define QUERY_FW_ERR_START_OFFSET 0x30 #define QUERY_FW_ERR_SIZE_OFFSET 0x38 #define QUERY_FW_ERR_BAR_OFFSET 0x3c #define QUERY_FW_SIZE_OFFSET 0x00 #define QUERY_FW_CLR_INT_BASE_OFFSET 0x20 #define QUERY_FW_CLR_INT_BAR_OFFSET 0x28 #define QUERY_FW_COMM_BASE_OFFSET 0x40 #define QUERY_FW_COMM_BAR_OFFSET 0x48 #define QUERY_FW_CLOCK_OFFSET 0x50 #define QUERY_FW_CLOCK_BAR 0x58 mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FW, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) goto out; MLX4_GET(fw_ver, outbox, QUERY_FW_VER_OFFSET); /* * FW subminor version is at more significant bits than minor * version, so swap here. */ dev->caps.fw_ver = (fw_ver & 0xffff00000000ull) | ((fw_ver & 0xffff0000ull) >> 16) | ((fw_ver & 0x0000ffffull) << 16); MLX4_GET(lg, outbox, QUERY_FW_PPF_ID); dev->caps.function = lg; if (mlx4_is_slave(dev)) goto out; MLX4_GET(cmd_if_rev, outbox, QUERY_FW_CMD_IF_REV_OFFSET); if (cmd_if_rev < MLX4_COMMAND_INTERFACE_MIN_REV || cmd_if_rev > MLX4_COMMAND_INTERFACE_MAX_REV) { mlx4_err(dev, "Installed FW has unsupported " "command interface revision %d.\n", cmd_if_rev); mlx4_err(dev, "(Installed FW version is %d.%d.%03d)\n", (int) (dev->caps.fw_ver >> 32), (int) (dev->caps.fw_ver >> 16) & 0xffff, (int) dev->caps.fw_ver & 0xffff); mlx4_err(dev, "This driver version supports only revisions %d to %d.\n", MLX4_COMMAND_INTERFACE_MIN_REV, MLX4_COMMAND_INTERFACE_MAX_REV); err = -ENODEV; goto out; } if (cmd_if_rev < MLX4_COMMAND_INTERFACE_NEW_PORT_CMDS) dev->flags |= MLX4_FLAG_OLD_PORT_CMDS; MLX4_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); cmd->max_cmds = 1 << lg; mlx4_dbg(dev, "FW version %d.%d.%03d (cmd intf rev %d), max commands %d\n", (int) (dev->caps.fw_ver >> 32), (int) (dev->caps.fw_ver >> 16) & 0xffff, (int) dev->caps.fw_ver & 0xffff, cmd_if_rev, cmd->max_cmds); MLX4_GET(fw->catas_offset, outbox, QUERY_FW_ERR_START_OFFSET); MLX4_GET(fw->catas_size, outbox, QUERY_FW_ERR_SIZE_OFFSET); MLX4_GET(fw->catas_bar, outbox, QUERY_FW_ERR_BAR_OFFSET); fw->catas_bar = (fw->catas_bar >> 6) * 2; mlx4_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x, BAR %d\n", (unsigned long long) fw->catas_offset, fw->catas_size, fw->catas_bar); MLX4_GET(fw->fw_pages, outbox, QUERY_FW_SIZE_OFFSET); MLX4_GET(fw->clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET); MLX4_GET(fw->clr_int_bar, outbox, QUERY_FW_CLR_INT_BAR_OFFSET); fw->clr_int_bar = (fw->clr_int_bar >> 6) * 2; MLX4_GET(fw->comm_base, outbox, QUERY_FW_COMM_BASE_OFFSET); MLX4_GET(fw->comm_bar, outbox, QUERY_FW_COMM_BAR_OFFSET); fw->comm_bar = (fw->comm_bar >> 6) * 2; mlx4_dbg(dev, "Communication vector bar:%d offset:0x%llx\n", fw->comm_bar, (unsigned long long)fw->comm_base); mlx4_dbg(dev, "FW size %d KB\n", fw->fw_pages >> 2); MLX4_GET(fw->clock_offset, outbox, QUERY_FW_CLOCK_OFFSET); MLX4_GET(fw->clock_bar, outbox, QUERY_FW_CLOCK_BAR); fw->clock_bar = (fw->clock_bar >> 6) * 2; mlx4_dbg(dev, "Internal clock bar:%d offset:0x%llx\n", fw->comm_bar, (unsigned long long)fw->comm_base); /* * Round up number of system pages needed in case * MLX4_ICM_PAGE_SIZE < PAGE_SIZE. */ fw->fw_pages = ALIGN(fw->fw_pages, PAGE_SIZE / MLX4_ICM_PAGE_SIZE) >> (PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT); mlx4_dbg(dev, "Clear int @ %llx, BAR %d\n", (unsigned long long) fw->clr_int_base, fw->clr_int_bar); out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_QUERY_FW_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { u8 *outbuf; int err; outbuf = outbox->buf; err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_FW, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; /* for slaves, set pci PPF ID to invalid and zero out everything * else except FW version */ outbuf[0] = outbuf[1] = 0; memset(&outbuf[8], 0, QUERY_FW_OUT_SIZE - 8); outbuf[QUERY_FW_PPF_ID] = MLX4_INVALID_SLAVE_ID; return 0; } static void get_board_id(void *vsd, char *board_id, char *vsdstr) { int i; #define VSD_OFFSET_SIG1 0x00 #define VSD_OFFSET_SIG2 0xde #define VSD_OFFSET_MLX_BOARD_ID 0xd0 #define VSD_OFFSET_TS_BOARD_ID 0x20 #define VSD_LEN 0xd0 #define VSD_SIGNATURE_TOPSPIN 0x5ad memset(vsdstr, 0, MLX4_VSD_LEN); for (i = 0; i < VSD_LEN / 4; i++) ((u32 *)vsdstr)[i] = swab32(*(u32 *)(vsd + i * 4)); memset(board_id, 0, MLX4_BOARD_ID_LEN); if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN && be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) { strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MLX4_BOARD_ID_LEN); } else { /* * The board ID is a string but the firmware byte * swaps each 4-byte word before passing it back to * us. Therefore we need to swab it before printing. */ for (i = 0; i < 4; ++i) ((u32 *) board_id)[i] = swab32(*(u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4)); } } int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; int err; #define QUERY_ADAPTER_OUT_SIZE 0x100 #define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10 #define QUERY_ADAPTER_VSD_OFFSET 0x20 #define QUERY_ADAPTER_VSD_VENDOR_ID_OFFSET 0x1e mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_ADAPTER, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) goto out; MLX4_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); adapter->vsd_vendor_id = be16_to_cpup((u16 *)outbox + QUERY_ADAPTER_VSD_VENDOR_ID_OFFSET / 2); get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4, adapter->board_id, adapter->vsd); out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) { struct mlx4_cmd_mailbox *mailbox; __be32 *inbox; u32 mw_enable; int err; #define INIT_HCA_IN_SIZE 0x200 #define INIT_HCA_DRV_NAME_FOR_FW_MAX_SIZE 64 #define INIT_HCA_VERSION_OFFSET 0x000 #define INIT_HCA_VERSION 2 #define INIT_HCA_CACHELINE_SZ_OFFSET 0x0e #define INIT_HCA_FLAGS_OFFSET 0x014 #define INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET 0x018 #define INIT_HCA_QPC_OFFSET 0x020 #define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10) #define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17) #define INIT_HCA_SRQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x28) #define INIT_HCA_LOG_SRQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x2f) #define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30) #define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37) #define INIT_HCA_EQE_CQE_OFFSETS (INIT_HCA_QPC_OFFSET + 0x38) #define INIT_HCA_ALTC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40) #define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50) #define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60) #define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67) #define INIT_HCA_RDMARC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70) #define INIT_HCA_LOG_RD_OFFSET (INIT_HCA_QPC_OFFSET + 0x77) #define INIT_HCA_MCAST_OFFSET 0x0c0 #define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00) #define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12) #define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16) #define INIT_HCA_UC_STEERING_OFFSET (INIT_HCA_MCAST_OFFSET + 0x18) #define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b) #define INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN 0x6 #define INIT_HCA_DRIVER_VERSION_OFFSET 0x140 #define INIT_HCA_FS_PARAM_OFFSET 0x1d0 #define INIT_HCA_FS_BASE_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x00) #define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x12) #define INIT_HCA_FS_LOG_TABLE_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x1b) #define INIT_HCA_FS_ETH_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x21) #define INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x22) #define INIT_HCA_FS_IB_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x25) #define INIT_HCA_FS_IB_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x26) #define INIT_HCA_TPT_OFFSET 0x0f0 #define INIT_HCA_DMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00) #define INIT_HCA_TPT_MW_OFFSET (INIT_HCA_TPT_OFFSET + 0x08) #define INIT_HCA_TPT_MW_ENABLE (1 << 31) #define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b) #define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10) #define INIT_HCA_CMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x18) #define INIT_HCA_UAR_OFFSET 0x120 #define INIT_HCA_LOG_UAR_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0a) #define INIT_HCA_UAR_PAGE_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0b) mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); inbox = mailbox->buf; memset(inbox, 0, INIT_HCA_IN_SIZE); *((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION; *((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) = ((ilog2(cache_line_size()) - 4) << 5) | (1 << 4); #if defined(__LITTLE_ENDIAN) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1); #elif defined(__BIG_ENDIAN) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1); #else #error Host endianness not defined #endif /* Check port for UD address vector: */ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1); /* Enable IPoIB checksumming if we can: */ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3); /* Enable QoS support if module parameter set */ if (enable_qos) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 2); /* Enable fast drop performance optimization */ if (dev->caps.fast_drop) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 7); /* enable counters */ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4); /* CX3 is capable of extending CQEs\EQEs from 32 to 64 bytes */ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) { *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29); dev->caps.eqe_size = 64; dev->caps.eqe_factor = 1; } else { dev->caps.eqe_size = 32; dev->caps.eqe_factor = 0; } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) { *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30); dev->caps.cqe_size = 64; dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; } else { dev->caps.cqe_size = 32; } if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT) *(inbox + INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET / 4) |= cpu_to_be32(1 << 31); if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW) { strncpy((u8 *)mailbox->buf + INIT_HCA_DRIVER_VERSION_OFFSET, DRV_NAME_FOR_FW, INIT_HCA_DRV_NAME_FOR_FW_MAX_SIZE - 1); mlx4_dbg(dev, "Reporting Driver Version to FW: %s\n", (u8 *)mailbox->buf + INIT_HCA_DRIVER_VERSION_OFFSET); } /* QPC/EEC/CQC/EQC/RDMARC attributes */ MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); MLX4_PUT(inbox, param->log_num_qps, INIT_HCA_LOG_QP_OFFSET); MLX4_PUT(inbox, param->srqc_base, INIT_HCA_SRQC_BASE_OFFSET); MLX4_PUT(inbox, param->log_num_srqs, INIT_HCA_LOG_SRQ_OFFSET); MLX4_PUT(inbox, param->cqc_base, INIT_HCA_CQC_BASE_OFFSET); MLX4_PUT(inbox, param->log_num_cqs, INIT_HCA_LOG_CQ_OFFSET); MLX4_PUT(inbox, param->altc_base, INIT_HCA_ALTC_BASE_OFFSET); MLX4_PUT(inbox, param->auxc_base, INIT_HCA_AUXC_BASE_OFFSET); MLX4_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET); MLX4_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET); MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET); /* steering attributes */ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN); MLX4_PUT(inbox, param->mc_base, INIT_HCA_FS_BASE_OFFSET); MLX4_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); MLX4_PUT(inbox, param->log_mc_table_sz, INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); /* Enable Ethernet flow steering * with udp unicast and tcp unicast */ MLX4_PUT(inbox, (u8) (MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN), INIT_HCA_FS_ETH_BITS_OFFSET); MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR, INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET); /* Enable IPoIB flow steering * with udp unicast and tcp unicast */ MLX4_PUT(inbox, (u8) (MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN), INIT_HCA_FS_IB_BITS_OFFSET); MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR, INIT_HCA_FS_IB_NUM_ADDRS_OFFSET); } else { MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET); MLX4_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); MLX4_PUT(inbox, param->log_mc_hash_sz, INIT_HCA_LOG_MC_HASH_SZ_OFFSET); MLX4_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); if (dev->caps.steering_mode == MLX4_STEERING_MODE_B0) MLX4_PUT(inbox, (u8) (1 << 3), INIT_HCA_UC_STEERING_OFFSET); } /* TPT attributes */ MLX4_PUT(inbox, param->dmpt_base, INIT_HCA_DMPT_BASE_OFFSET); mw_enable = param->mw_enable ? INIT_HCA_TPT_MW_ENABLE : 0; MLX4_PUT(inbox, mw_enable, INIT_HCA_TPT_MW_OFFSET); MLX4_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET); MLX4_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET); MLX4_PUT(inbox, param->cmpt_base, INIT_HCA_CMPT_BASE_OFFSET); /* UAR attributes */ MLX4_PUT(inbox, param->uar_page_sz, INIT_HCA_UAR_PAGE_SZ_OFFSET); MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET); err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000, MLX4_CMD_NATIVE); if (err) mlx4_err(dev, "INIT_HCA returns %d\n", err); mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_QUERY_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) { struct mlx4_cmd_mailbox *mailbox; __be32 *outbox; u32 dword_field; u32 mw_enable; int err; u8 byte_field; #define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04 #define QUERY_HCA_CORE_CLOCK_OFFSET 0x0c mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_HCA, MLX4_CMD_TIME_CLASS_B, !mlx4_is_slave(dev)); if (err) goto out; MLX4_GET(param->global_caps, outbox, QUERY_HCA_GLOBAL_CAPS_OFFSET); MLX4_GET(param->hca_core_clock, outbox, QUERY_HCA_CORE_CLOCK_OFFSET); /* QPC/EEC/CQC/EQC/RDMARC attributes */ MLX4_GET(param->qpc_base, outbox, INIT_HCA_QPC_BASE_OFFSET); MLX4_GET(param->log_num_qps, outbox, INIT_HCA_LOG_QP_OFFSET); MLX4_GET(param->srqc_base, outbox, INIT_HCA_SRQC_BASE_OFFSET); MLX4_GET(param->log_num_srqs, outbox, INIT_HCA_LOG_SRQ_OFFSET); MLX4_GET(param->cqc_base, outbox, INIT_HCA_CQC_BASE_OFFSET); MLX4_GET(param->log_num_cqs, outbox, INIT_HCA_LOG_CQ_OFFSET); MLX4_GET(param->altc_base, outbox, INIT_HCA_ALTC_BASE_OFFSET); MLX4_GET(param->auxc_base, outbox, INIT_HCA_AUXC_BASE_OFFSET); MLX4_GET(param->eqc_base, outbox, INIT_HCA_EQC_BASE_OFFSET); MLX4_GET(param->log_num_eqs, outbox, INIT_HCA_LOG_EQ_OFFSET); MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET); if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) { param->steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; } else { MLX4_GET(byte_field, outbox, INIT_HCA_UC_STEERING_OFFSET); if (byte_field & 0x8) param->steering_mode = MLX4_STEERING_MODE_B0; else param->steering_mode = MLX4_STEERING_MODE_A0; } /* steering attributes */ if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET); MLX4_GET(param->log_mc_entry_sz, outbox, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); MLX4_GET(param->log_mc_table_sz, outbox, INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); } else { MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET); MLX4_GET(param->log_mc_entry_sz, outbox, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); MLX4_GET(param->log_mc_hash_sz, outbox, INIT_HCA_LOG_MC_HASH_SZ_OFFSET); MLX4_GET(param->log_mc_table_sz, outbox, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); } /* CX3 is capable of extending CQEs\EQEs from 32 to 64 bytes */ MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_OFFSETS); if (byte_field & 0x20) /* 64-bytes eqe enabled */ param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED; if (byte_field & 0x40) /* 64-bytes cqe enabled */ param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED; /* TPT attributes */ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); MLX4_GET(mw_enable, outbox, INIT_HCA_TPT_MW_OFFSET); param->mw_enable = (mw_enable & INIT_HCA_TPT_MW_ENABLE) == INIT_HCA_TPT_MW_ENABLE; MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET); MLX4_GET(param->mtt_base, outbox, INIT_HCA_MTT_BASE_OFFSET); MLX4_GET(param->cmpt_base, outbox, INIT_HCA_CMPT_BASE_OFFSET); /* UAR attributes */ MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET); MLX4_GET(param->log_uar_sz, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET); out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } /* for IB-type ports only in SRIOV mode. Checks that both proxy QP0 * and real QP0 are active, so that the paravirtualized QP0 is ready * to operate */ static int check_qp0_state(struct mlx4_dev *dev, int function, int port) { struct mlx4_priv *priv = mlx4_priv(dev); /* irrelevant if not infiniband */ if (priv->mfunc.master.qp0_state[port].proxy_qp0_active && priv->mfunc.master.qp0_state[port].qp0_active) return 1; return 0; } int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); int port = vhcr->in_modifier; int err; if (priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port)) return 0; if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { /* Enable port only if it was previously disabled */ if (!priv->mfunc.master.init_port_ref[port]) { err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; } priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); } else { if (slave == mlx4_master_func_num(dev)) { if (check_qp0_state(dev, slave, port) && !priv->mfunc.master.qp0_state[port].port_active) { err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; priv->mfunc.master.qp0_state[port].port_active = 1; priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); } } else priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); } ++priv->mfunc.master.init_port_ref[port]; return 0; } int mlx4_INIT_PORT(struct mlx4_dev *dev, int port) { struct mlx4_cmd_mailbox *mailbox; u32 *inbox; int err; u32 flags; u16 field; if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { #define INIT_PORT_IN_SIZE 256 #define INIT_PORT_FLAGS_OFFSET 0x00 #define INIT_PORT_FLAG_SIG (1 << 18) #define INIT_PORT_FLAG_NG (1 << 17) #define INIT_PORT_FLAG_G0 (1 << 16) #define INIT_PORT_VL_SHIFT 4 #define INIT_PORT_PORT_WIDTH_SHIFT 8 #define INIT_PORT_MTU_OFFSET 0x04 #define INIT_PORT_MAX_GID_OFFSET 0x06 #define INIT_PORT_MAX_PKEY_OFFSET 0x0a #define INIT_PORT_GUID0_OFFSET 0x10 #define INIT_PORT_NODE_GUID_OFFSET 0x18 #define INIT_PORT_SI_GUID_OFFSET 0x20 mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); inbox = mailbox->buf; memset(inbox, 0, INIT_PORT_IN_SIZE); flags = 0; flags |= (dev->caps.vl_cap[port] & 0xf) << INIT_PORT_VL_SHIFT; flags |= (dev->caps.port_width_cap[port] & 0xf) << INIT_PORT_PORT_WIDTH_SHIFT; MLX4_PUT(inbox, flags, INIT_PORT_FLAGS_OFFSET); field = 128 << dev->caps.ib_mtu_cap[port]; MLX4_PUT(inbox, field, INIT_PORT_MTU_OFFSET); field = dev->caps.gid_table_len[port]; MLX4_PUT(inbox, field, INIT_PORT_MAX_GID_OFFSET); field = dev->caps.pkey_table_len[port]; MLX4_PUT(inbox, field, INIT_PORT_MAX_PKEY_OFFSET); err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_INIT_PORT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev, mailbox); } else err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); return err; } EXPORT_SYMBOL_GPL(mlx4_INIT_PORT); int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); int port = vhcr->in_modifier; int err; if (!(priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port))) return 0; if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { if (priv->mfunc.master.init_port_ref[port] == 1) { err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000, MLX4_CMD_NATIVE); if (err) return err; } priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); } else { /* infiniband port */ if (slave == mlx4_master_func_num(dev)) { if (!priv->mfunc.master.qp0_state[port].qp0_active && priv->mfunc.master.qp0_state[port].port_active) { err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000, MLX4_CMD_NATIVE); if (err) return err; priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); priv->mfunc.master.qp0_state[port].port_active = 0; } } else priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); } --priv->mfunc.master.init_port_ref[port]; return 0; } int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port) { return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000, MLX4_CMD_WRAPPED); } EXPORT_SYMBOL_GPL(mlx4_CLOSE_PORT); int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic) { return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA, 1000, MLX4_CMD_NATIVE); } int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages) { int ret = mlx4_cmd_imm(dev, icm_size, aux_pages, 0, 0, MLX4_CMD_SET_ICM_SIZE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (ret) return ret; /* * Round up number of system pages needed in case * MLX4_ICM_PAGE_SIZE < PAGE_SIZE. */ *aux_pages = ALIGN(*aux_pages, PAGE_SIZE / MLX4_ICM_PAGE_SIZE) >> (PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT); return 0; } int mlx4_NOP(struct mlx4_dev *dev) { /* Input modifier of 0x1f means "finish as soon as possible." */ return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } int mlx4_query_diag_counters(struct mlx4_dev *dev, int array_length, u8 op_modifier, u32 in_offset[], u32 counter_out[]) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; int ret; int i; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); outbox = mailbox->buf; ret = mlx4_cmd_box(dev, 0, mailbox->dma, 0, op_modifier, MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (ret) goto out; for (i = 0; i < array_length; i++) { if (in_offset[i] > MLX4_MAILBOX_SIZE) { ret = -EINVAL; goto out; } MLX4_GET(counter_out[i], outbox, in_offset[i]); } out: mlx4_free_cmd_mailbox(dev, mailbox); return ret; } EXPORT_SYMBOL_GPL(mlx4_query_diag_counters); int mlx4_MOD_STAT_CFG_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { return -EPERM; } #define MLX4_WOL_SETUP_MODE (5 << 28) int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port) { u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8; return mlx4_cmd_imm(dev, 0, config, in_mod, 0x3, MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } EXPORT_SYMBOL_GPL(mlx4_wol_read); int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port) { u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8; return mlx4_cmd(dev, config, in_mod, 0x1, MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } EXPORT_SYMBOL_GPL(mlx4_wol_write); enum { ADD_TO_MCG = 0x26, }; void mlx4_opreq_action(struct work_struct *work) { struct mlx4_priv *priv = container_of(work, struct mlx4_priv, opreq_task); struct mlx4_dev *dev = &priv->dev; int num_tasks = atomic_read(&priv->opreq_count); struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; u32 *outbox; u32 modifier; u16 token; u16 type_m; u16 type; int err; u32 num_qps; struct mlx4_qp qp; int i; u8 rem_mcg; u8 prot; #define GET_OP_REQ_MODIFIER_OFFSET 0x08 #define GET_OP_REQ_TOKEN_OFFSET 0x14 #define GET_OP_REQ_TYPE_OFFSET 0x1a #define GET_OP_REQ_DATA_OFFSET 0x20 mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { mlx4_err(dev, "Failed to allocate mailbox for GET_OP_REQ\n"); return; } outbox = mailbox->buf; while (num_tasks) { err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_GET_OP_REQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) { - mlx4_err(dev, "Failed to retreive required operation: %d\n", err); + mlx4_err(dev, "Failed to retrieve required operation: %d\n", err); return; } MLX4_GET(modifier, outbox, GET_OP_REQ_MODIFIER_OFFSET); MLX4_GET(token, outbox, GET_OP_REQ_TOKEN_OFFSET); MLX4_GET(type, outbox, GET_OP_REQ_TYPE_OFFSET); type_m = type >> 12; type &= 0xfff; switch (type) { case ADD_TO_MCG: if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { mlx4_warn(dev, "ADD MCG operation is not supported in " "DEVICE_MANAGED steerign mode\n"); err = EPERM; break; } mgm = (struct mlx4_mgm *) ((u8 *) (outbox) + GET_OP_REQ_DATA_OFFSET); num_qps = be32_to_cpu(mgm->members_count) & MGM_QPN_MASK; rem_mcg = ((u8 *) (&mgm->members_count))[0] & 1; prot = ((u8 *) (&mgm->members_count))[0] >> 6; for (i = 0; i < num_qps; i++) { qp.qpn = be32_to_cpu(mgm->qp[i]); if (rem_mcg) err = mlx4_multicast_detach(dev, &qp, mgm->gid, prot, 0); else err = mlx4_multicast_attach(dev, &qp, mgm->gid, mgm->gid[5] ,0, prot, NULL); if (err) break; } break; default: mlx4_warn(dev, "Bad type for required operation\n"); err = EINVAL; break; } err = mlx4_cmd(dev, 0, ((u32) err | cpu_to_be32(token) << 16), 1, MLX4_CMD_GET_OP_REQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) { mlx4_err(dev, "Failed to acknowledge required request: %d\n", err); goto out; } memset(outbox, 0, 0xffc); num_tasks = atomic_dec_return(&priv->opreq_count); } out: mlx4_free_cmd_mailbox(dev, mailbox); } Index: head/sys/ofed/drivers/net/mlx4/main.c =================================================================== --- head/sys/ofed/drivers/net/mlx4/main.c (revision 299178) +++ head/sys/ofed/drivers/net/mlx4/main.c (revision 299179) @@ -1,3881 +1,3881 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mlx4.h" #include "fw.h" #include "icm.h" #include "mlx4_stats.h" /* Mellanox ConnectX HCA low-level driver */ struct workqueue_struct *mlx4_wq; #ifdef CONFIG_MLX4_DEBUG int mlx4_debug_level = 0; module_param_named(debug_level, mlx4_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); #endif /* CONFIG_MLX4_DEBUG */ #ifdef CONFIG_PCI_MSI static int msi_x = 1; module_param(msi_x, int, 0444); MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x (non-SRIOV only)"); #else /* CONFIG_PCI_MSI */ #define msi_x (0) #endif /* CONFIG_PCI_MSI */ static int enable_sys_tune = 0; module_param(enable_sys_tune, int, 0444); MODULE_PARM_DESC(enable_sys_tune, "Tune the cpu's for better performance (default 0)"); int mlx4_blck_lb = 1; module_param_named(block_loopback, mlx4_blck_lb, int, 0644); MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0 " "(default: 1)"); enum { DEFAULT_DOMAIN = 0, BDF_STR_SIZE = 8, /* bb:dd.f- */ DBDF_STR_SIZE = 13 /* mmmm:bb:dd.f- */ }; enum { NUM_VFS, PROBE_VF, PORT_TYPE_ARRAY }; enum { VALID_DATA, INVALID_DATA, INVALID_STR }; struct param_data { int id; struct mlx4_dbdf2val_lst dbdf2val; }; static struct param_data num_vfs = { .id = NUM_VFS, .dbdf2val = { .name = "num_vfs param", .num_vals = 1, .def_val = {0}, .range = {0, MLX4_MAX_NUM_VF} } }; module_param_string(num_vfs, num_vfs.dbdf2val.str, sizeof(num_vfs.dbdf2val.str), 0444); MODULE_PARM_DESC(num_vfs, "Either single value (e.g. '5') to define uniform num_vfs value for all devices functions\n" "\t\tor a string to map device function numbers to their num_vfs values (e.g. '0000:04:00.0-5,002b:1c:0b.a-15').\n" "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for num_vfs value (e.g. 15)."); static struct param_data probe_vf = { .id = PROBE_VF, .dbdf2val = { .name = "probe_vf param", .num_vals = 1, .def_val = {0}, .range = {0, MLX4_MAX_NUM_VF} } }; module_param_string(probe_vf, probe_vf.dbdf2val.str, sizeof(probe_vf.dbdf2val.str), 0444); MODULE_PARM_DESC(probe_vf, "Either single value (e.g. '3') to define uniform number of VFs to probe by the pf driver for all devices functions\n" "\t\tor a string to map device function numbers to their probe_vf values (e.g. '0000:04:00.0-3,002b:1c:0b.a-13').\n" "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for probe_vf value (e.g. 13)."); int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; module_param_named(log_num_mgm_entry_size, mlx4_log_num_mgm_entry_size, int, 0444); MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" " of qp per mcg, for example:" " 10 gives 248.range: 7 <=" " log_num_mgm_entry_size <= 12." " To activate device managed" " flow steering when available, set to -1"); static int high_rate_steer; module_param(high_rate_steer, int, 0444); MODULE_PARM_DESC(high_rate_steer, "Enable steering mode for higher packet rate" " (default off)"); static int fast_drop; module_param_named(fast_drop, fast_drop, int, 0444); MODULE_PARM_DESC(fast_drop, - "Enable fast packet drop when no recieve WQEs are posted"); + "Enable fast packet drop when no receive WQEs are posted"); int mlx4_enable_64b_cqe_eqe = 1; module_param_named(enable_64b_cqe_eqe, mlx4_enable_64b_cqe_eqe, int, 0644); MODULE_PARM_DESC(enable_64b_cqe_eqe, "Enable 64 byte CQEs/EQEs when the the FW supports this if non-zero (default: 1)"); #define HCA_GLOBAL_CAP_MASK 0 #define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE static char mlx4_version[] __devinitdata = DRV_NAME ": Mellanox ConnectX VPI driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; static int log_num_mac = 7; module_param_named(log_num_mac, log_num_mac, int, 0444); MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)"); static int log_num_vlan; module_param_named(log_num_vlan, log_num_vlan, int, 0444); MODULE_PARM_DESC(log_num_vlan, "(Obsolete) Log2 max number of VLANs per ETH port (0-7)"); /* Log2 max number of VLANs per ETH port (0-7) */ #define MLX4_LOG_NUM_VLANS 7 int log_mtts_per_seg = ilog2(1); module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment " "(0-7) (default: 0)"); static struct param_data port_type_array = { .id = PORT_TYPE_ARRAY, .dbdf2val = { .name = "port_type_array param", .num_vals = 2, .def_val = {MLX4_PORT_TYPE_ETH, MLX4_PORT_TYPE_ETH}, .range = {MLX4_PORT_TYPE_IB, MLX4_PORT_TYPE_NA} } }; module_param_string(port_type_array, port_type_array.dbdf2val.str, sizeof(port_type_array.dbdf2val.str), 0444); MODULE_PARM_DESC(port_type_array, "Either pair of values (e.g. '1,2') to define uniform port1/port2 types configuration for all devices functions\n" "\t\tor a string to map device function numbers to their pair of port types values (e.g. '0000:04:00.0-1;2,002b:1c:0b.a-1;1').\n" "\t\tValid port types: 1-ib, 2-eth, 3-auto, 4-N/A\n" "\t\tIn case that only one port is available use the N/A port type for port2 (e.g '1,4')."); struct mlx4_port_config { struct list_head list; enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; struct pci_dev *pdev; }; #define MLX4_LOG_NUM_MTT 20 /* We limit to 30 as of a bit map issue which uses int and not uint. see mlx4_buddy_init -> bitmap_zero which gets int. */ #define MLX4_MAX_LOG_NUM_MTT 30 static struct mlx4_profile mod_param_profile = { .num_qp = 19, .num_srq = 16, .rdmarc_per_qp = 4, .num_cq = 16, .num_mcg = 13, .num_mpt = 19, .num_mtt_segs = 0, /* max(20, 2*MTTs for host memory)) */ }; module_param_named(log_num_qp, mod_param_profile.num_qp, int, 0444); MODULE_PARM_DESC(log_num_qp, "log maximum number of QPs per HCA (default: 19)"); module_param_named(log_num_srq, mod_param_profile.num_srq, int, 0444); MODULE_PARM_DESC(log_num_srq, "log maximum number of SRQs per HCA " "(default: 16)"); module_param_named(log_rdmarc_per_qp, mod_param_profile.rdmarc_per_qp, int, 0444); MODULE_PARM_DESC(log_rdmarc_per_qp, "log number of RDMARC buffers per QP " "(default: 4)"); module_param_named(log_num_cq, mod_param_profile.num_cq, int, 0444); MODULE_PARM_DESC(log_num_cq, "log maximum number of CQs per HCA (default: 16)"); module_param_named(log_num_mcg, mod_param_profile.num_mcg, int, 0444); MODULE_PARM_DESC(log_num_mcg, "log maximum number of multicast groups per HCA " "(default: 13)"); module_param_named(log_num_mpt, mod_param_profile.num_mpt, int, 0444); MODULE_PARM_DESC(log_num_mpt, "log maximum number of memory protection table entries per " "HCA (default: 19)"); module_param_named(log_num_mtt, mod_param_profile.num_mtt_segs, int, 0444); MODULE_PARM_DESC(log_num_mtt, "log maximum number of memory translation table segments per " "HCA (default: max(20, 2*MTTs for register all of the host memory limited to 30))"); enum { MLX4_IF_STATE_BASIC, MLX4_IF_STATE_EXTENDED }; static inline u64 dbdf_to_u64(int domain, int bus, int dev, int fn) { return (domain << 20) | (bus << 12) | (dev << 4) | fn; } static inline void pr_bdf_err(const char *dbdf, const char *pname) { pr_warn("mlx4_core: '%s' is not valid bdf in '%s'\n", dbdf, pname); } static inline void pr_val_err(const char *dbdf, const char *pname, const char *val) { pr_warn("mlx4_core: value '%s' of bdf '%s' in '%s' is not valid\n" , val, dbdf, pname); } static inline void pr_out_of_range_bdf(const char *dbdf, int val, struct mlx4_dbdf2val_lst *dbdf2val) { pr_warn("mlx4_core: value %d in bdf '%s' of '%s' is out of its valid range (%d,%d)\n" , val, dbdf, dbdf2val->name , dbdf2val->range.min, dbdf2val->range.max); } static inline void pr_out_of_range(struct mlx4_dbdf2val_lst *dbdf2val) { pr_warn("mlx4_core: value of '%s' is out of its valid range (%d,%d)\n" , dbdf2val->name , dbdf2val->range.min, dbdf2val->range.max); } static inline int is_in_range(int val, struct mlx4_range *r) { return (val >= r->min && val <= r->max); } static int update_defaults(struct param_data *pdata) { long int val[MLX4_MAX_BDF_VALS]; int ret; char *t, *p = pdata->dbdf2val.str; char sval[32]; int val_len; if (!strlen(p) || strchr(p, ':') || strchr(p, '.') || strchr(p, ';')) return INVALID_STR; switch (pdata->id) { case PORT_TYPE_ARRAY: t = strchr(p, ','); if (!t || t == p || (t - p) > sizeof(sval)) return INVALID_STR; val_len = t - p; strncpy(sval, p, val_len); sval[val_len] = 0; ret = kstrtol(sval, 0, &val[0]); if (ret == -EINVAL) return INVALID_STR; if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { pr_out_of_range(&pdata->dbdf2val); return INVALID_DATA; } ret = kstrtol(t + 1, 0, &val[1]); if (ret == -EINVAL) return INVALID_STR; if (ret || !is_in_range(val[1], &pdata->dbdf2val.range)) { pr_out_of_range(&pdata->dbdf2val); return INVALID_DATA; } pdata->dbdf2val.tbl[0].val[0] = val[0]; pdata->dbdf2val.tbl[0].val[1] = val[1]; break; case NUM_VFS: case PROBE_VF: ret = kstrtol(p, 0, &val[0]); if (ret == -EINVAL) return INVALID_STR; if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { pr_out_of_range(&pdata->dbdf2val); return INVALID_DATA; } pdata->dbdf2val.tbl[0].val[0] = val[0]; break; } pdata->dbdf2val.tbl[1].dbdf = MLX4_ENDOF_TBL; return VALID_DATA; } int mlx4_fill_dbdf2val_tbl(struct mlx4_dbdf2val_lst *dbdf2val_lst) { int domain, bus, dev, fn; u64 dbdf; char *p, *t, *v; char tmp[32]; char sbdf[32]; char sep = ','; int j, k, str_size, i = 1; int prfx_size; p = dbdf2val_lst->str; for (j = 0; j < dbdf2val_lst->num_vals; j++) dbdf2val_lst->tbl[0].val[j] = dbdf2val_lst->def_val[j]; dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; str_size = strlen(dbdf2val_lst->str); if (str_size == 0) return 0; while (strlen(p)) { prfx_size = BDF_STR_SIZE; sbdf[prfx_size] = 0; strncpy(sbdf, p, prfx_size); domain = DEFAULT_DOMAIN; if (sscanf(sbdf, "%02x:%02x.%x-", &bus, &dev, &fn) != 3) { prfx_size = DBDF_STR_SIZE; sbdf[prfx_size] = 0; strncpy(sbdf, p, prfx_size); if (sscanf(sbdf, "%04x:%02x:%02x.%x-", &domain, &bus, &dev, &fn) != 4) { pr_bdf_err(sbdf, dbdf2val_lst->name); goto err; } sprintf(tmp, "%04x:%02x:%02x.%x-", domain, bus, dev, fn); } else { sprintf(tmp, "%02x:%02x.%x-", bus, dev, fn); } if (strnicmp(sbdf, tmp, sizeof(tmp))) { pr_bdf_err(sbdf, dbdf2val_lst->name); goto err; } dbdf = dbdf_to_u64(domain, bus, dev, fn); for (j = 1; j < i; j++) if (dbdf2val_lst->tbl[j].dbdf == dbdf) { pr_warn("mlx4_core: in '%s', %s appears multiple times\n" , dbdf2val_lst->name, sbdf); goto err; } if (i >= MLX4_DEVS_TBL_SIZE) { pr_warn("mlx4_core: Too many devices in '%s'\n" , dbdf2val_lst->name); goto err; } p += prfx_size; t = strchr(p, sep); t = t ? t : p + strlen(p); if (p >= t) { pr_val_err(sbdf, dbdf2val_lst->name, ""); goto err; } for (k = 0; k < dbdf2val_lst->num_vals; k++) { char sval[32]; long int val; int ret, val_len; char vsep = ';'; v = (k == dbdf2val_lst->num_vals - 1) ? t : strchr(p, vsep); if (!v || v > t || v == p || (v - p) > sizeof(sval)) { pr_val_err(sbdf, dbdf2val_lst->name, p); goto err; } val_len = v - p; strncpy(sval, p, val_len); sval[val_len] = 0; ret = kstrtol(sval, 0, &val); if (ret) { if (strchr(p, vsep)) pr_warn("mlx4_core: too many vals in bdf '%s' of '%s'\n" , sbdf, dbdf2val_lst->name); else pr_val_err(sbdf, dbdf2val_lst->name, sval); goto err; } if (!is_in_range(val, &dbdf2val_lst->range)) { pr_out_of_range_bdf(sbdf, val, dbdf2val_lst); goto err; } dbdf2val_lst->tbl[i].val[k] = val; p = v; if (p[0] == vsep) p++; } dbdf2val_lst->tbl[i].dbdf = dbdf; if (strlen(p)) { if (p[0] != sep) { pr_warn("mlx4_core: expect separator '%c' before '%s' in '%s'\n" , sep, p, dbdf2val_lst->name); goto err; } p++; } i++; if (i < MLX4_DEVS_TBL_SIZE) dbdf2val_lst->tbl[i].dbdf = MLX4_ENDOF_TBL; } return 0; err: dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; pr_warn("mlx4_core: The value of '%s' is incorrect. The value is discarded!\n" , dbdf2val_lst->name); return -EINVAL; } EXPORT_SYMBOL(mlx4_fill_dbdf2val_tbl); int mlx4_get_val(struct mlx4_dbdf2val *tbl, struct pci_dev *pdev, int idx, int *val) { u64 dbdf; int i = 1; *val = tbl[0].val[idx]; if (!pdev) return -EINVAL; dbdf = dbdf_to_u64(pci_get_domain(pdev->dev.bsddev), pci_get_bus(pdev->dev.bsddev), PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); while ((i < MLX4_DEVS_TBL_SIZE) && (tbl[i].dbdf != MLX4_ENDOF_TBL)) { if (tbl[i].dbdf == dbdf) { *val = tbl[i].val[idx]; return 0; } i++; } return 0; } EXPORT_SYMBOL(mlx4_get_val); static void process_mod_param_profile(struct mlx4_profile *profile) { vm_size_t hwphyssz; hwphyssz = 0; TUNABLE_ULONG_FETCH("hw.realmem", (u_long *) &hwphyssz); profile->num_qp = 1 << mod_param_profile.num_qp; profile->num_srq = 1 << mod_param_profile.num_srq; profile->rdmarc_per_qp = 1 << mod_param_profile.rdmarc_per_qp; profile->num_cq = 1 << mod_param_profile.num_cq; profile->num_mcg = 1 << mod_param_profile.num_mcg; profile->num_mpt = 1 << mod_param_profile.num_mpt; /* * We want to scale the number of MTTs with the size of the * system memory, since it makes sense to register a lot of * memory on a system with a lot of memory. As a heuristic, * make sure we have enough MTTs to register twice the system * memory (with PAGE_SIZE entries). * * This number has to be a power of two and fit into 32 bits * due to device limitations. We cap this at 2^30 as of bit map * limitation to work with int instead of uint (mlx4_buddy_init -> bitmap_zero) * That limits us to 4TB of memory registration per HCA with * 4KB pages, which is probably OK for the next few months. */ if (mod_param_profile.num_mtt_segs) profile->num_mtt_segs = 1 << mod_param_profile.num_mtt_segs; else { profile->num_mtt_segs = roundup_pow_of_two(max_t(unsigned, 1 << (MLX4_LOG_NUM_MTT - log_mtts_per_seg), min(1UL << (MLX4_MAX_LOG_NUM_MTT - log_mtts_per_seg), (hwphyssz << 1) >> log_mtts_per_seg))); /* set the actual value, so it will be reflected to the user using the sysfs */ mod_param_profile.num_mtt_segs = ilog2(profile->num_mtt_segs); } } int mlx4_check_port_params(struct mlx4_dev *dev, enum mlx4_port_type *port_type) { int i; for (i = 0; i < dev->caps.num_ports - 1; i++) { if (port_type[i] != port_type[i + 1]) { if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { mlx4_err(dev, "Only same port types supported " "on this HCA, aborting.\n"); return -EINVAL; } } } for (i = 0; i < dev->caps.num_ports; i++) { if (!(port_type[i] & dev->caps.supported_type[i+1])) { mlx4_err(dev, "Requested port type for port %d is not " "supported on this HCA\n", i + 1); return -EINVAL; } } return 0; } static void mlx4_set_port_mask(struct mlx4_dev *dev) { int i; for (i = 1; i <= dev->caps.num_ports; ++i) dev->caps.port_mask[i] = dev->caps.port_type[i]; } static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { int err; int i; err = mlx4_QUERY_DEV_CAP(dev, dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); return err; } if (dev_cap->min_page_sz > PAGE_SIZE) { mlx4_err(dev, "HCA minimum page size of %d bigger than " "kernel PAGE_SIZE of %d, aborting.\n", dev_cap->min_page_sz, (int)PAGE_SIZE); return -ENODEV; } if (dev_cap->num_ports > MLX4_MAX_PORTS) { mlx4_err(dev, "HCA has %d ports, but we only support %d, " "aborting.\n", dev_cap->num_ports, MLX4_MAX_PORTS); return -ENODEV; } if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) { mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than " "PCI resource 2 size of 0x%llx, aborting.\n", dev_cap->uar_size, (unsigned long long) pci_resource_len(dev->pdev, 2)); return -ENODEV; } dev->caps.num_ports = dev_cap->num_ports; dev->phys_caps.num_phys_eqs = MLX4_MAX_EQ_NUM; for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.vl_cap[i] = dev_cap->max_vl[i]; dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; dev->phys_caps.gid_phys_table_len[i] = dev_cap->max_gids[i]; dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i]; /* set gid and pkey table operating lengths by default * to non-sriov values */ dev->caps.gid_table_len[i] = dev_cap->max_gids[i]; dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i]; dev->caps.port_width_cap[i] = dev_cap->max_port_width[i]; dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i]; dev->caps.def_mac[i] = dev_cap->def_mac[i]; dev->caps.supported_type[i] = dev_cap->supported_port_types[i]; dev->caps.suggested_type[i] = dev_cap->suggested_type[i]; dev->caps.default_sense[i] = dev_cap->default_sense[i]; dev->caps.trans_type[i] = dev_cap->trans_type[i]; dev->caps.vendor_oui[i] = dev_cap->vendor_oui[i]; dev->caps.wavelength[i] = dev_cap->wavelength[i]; dev->caps.trans_code[i] = dev_cap->trans_code[i]; } dev->caps.uar_page_size = PAGE_SIZE; dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay; dev->caps.bf_reg_size = dev_cap->bf_reg_size; dev->caps.bf_regs_per_page = dev_cap->bf_regs_per_page; dev->caps.max_sq_sg = dev_cap->max_sq_sg; dev->caps.max_rq_sg = dev_cap->max_rq_sg; dev->caps.max_wqes = dev_cap->max_qp_sz; dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp; dev->caps.max_srq_wqes = dev_cap->max_srq_sz; dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1; dev->caps.reserved_srqs = dev_cap->reserved_srqs; dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz; dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; /* * Subtract 1 from the limit because we need to allocate a * spare CQE to enable resizing the CQ */ dev->caps.max_cqes = dev_cap->max_cq_sz - 1; dev->caps.reserved_cqs = dev_cap->reserved_cqs; dev->caps.reserved_eqs = dev_cap->reserved_eqs; dev->caps.reserved_mtts = dev_cap->reserved_mtts; dev->caps.reserved_mrws = dev_cap->reserved_mrws; /* The first 128 UARs are used for EQ doorbells */ dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars); dev->caps.reserved_pds = dev_cap->reserved_pds; dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? dev_cap->reserved_xrcds : 0; dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? dev_cap->max_xrcds : 0; dev->caps.mtt_entry_sz = dev_cap->mtt_entry_sz; dev->caps.max_msg_sz = dev_cap->max_msg_sz; dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); dev->caps.flags = dev_cap->flags; dev->caps.flags2 = dev_cap->flags2; dev->caps.bmme_flags = dev_cap->bmme_flags; dev->caps.reserved_lkey = dev_cap->reserved_lkey; dev->caps.stat_rate_support = dev_cap->stat_rate_support; dev->caps.cq_timestamp = dev_cap->timestamp_support; dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; /* Don't do sense port on multifunction devices (for now at least) */ if (mlx4_is_mfunc(dev)) dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; dev->caps.log_num_macs = log_num_mac; dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; dev->caps.fast_drop = fast_drop ? !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FAST_DROP) : 0; for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE; if (dev->caps.supported_type[i]) { /* if only ETH is supported - assign ETH */ if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH) dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH; /* if only IB is supported, assign IB */ else if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_IB) dev->caps.port_type[i] = MLX4_PORT_TYPE_IB; else { /* * if IB and ETH are supported, we set the port * type according to user selection of port type; * if there is no user selection, take the FW hint */ int pta; mlx4_get_val(port_type_array.dbdf2val.tbl, pci_physfn(dev->pdev), i - 1, &pta); if (pta == MLX4_PORT_TYPE_NONE) { dev->caps.port_type[i] = dev->caps.suggested_type[i] ? MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB; } else if (pta == MLX4_PORT_TYPE_NA) { mlx4_err(dev, "Port %d is valid port. " "It is not allowed to configure its type to N/A(%d)\n", i, MLX4_PORT_TYPE_NA); return -EINVAL; } else { dev->caps.port_type[i] = pta; } } } /* * Link sensing is allowed on the port if 3 conditions are true: * 1. Both protocols are supported on the port. * 2. Different types are supported on the port * 3. FW declared that it supports link sensing */ mlx4_priv(dev)->sense.sense_allowed[i] = ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) && (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)); /* Disablling auto sense for default Eth ports support */ mlx4_priv(dev)->sense.sense_allowed[i] = 0; /* * If "default_sense" bit is set, we move the port to "AUTO" mode * and perform sense_port FW command to try and set the correct * port type from beginning */ if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) { enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE; dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO; mlx4_SENSE_PORT(dev, i, &sensed_port); if (sensed_port != MLX4_PORT_TYPE_NONE) dev->caps.port_type[i] = sensed_port; } else { dev->caps.possible_type[i] = dev->caps.port_type[i]; } if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) { dev->caps.log_num_macs = dev_cap->log_max_macs[i]; mlx4_warn(dev, "Requested number of MACs is too much " "for port %d, reducing to %d.\n", i, 1 << dev->caps.log_num_macs); } if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) { dev->caps.log_num_vlans = dev_cap->log_max_vlans[i]; mlx4_warn(dev, "Requested number of VLANs is too much " "for port %d, reducing to %d.\n", i, 1 << dev->caps.log_num_vlans); } } dev->caps.max_basic_counters = dev_cap->max_basic_counters; dev->caps.max_extended_counters = dev_cap->max_extended_counters; /* support extended counters if available */ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT) dev->caps.max_counters = dev->caps.max_extended_counters; else dev->caps.max_counters = dev->caps.max_basic_counters; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = (1 << dev->caps.log_num_macs) * (1 << dev->caps.log_num_vlans) * dev->caps.num_ports; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH; dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; dev->caps.sync_qp = dev_cap->sync_qp; if (dev->pdev->device == 0x1003) dev->caps.cq_flags |= MLX4_DEV_CAP_CQ_FLAG_IO; dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; if (!mlx4_enable_64b_cqe_eqe && !mlx4_is_slave(dev)) { if (dev_cap->flags & (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) { mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n"); dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; } } if ((dev->caps.flags & (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) && mlx4_is_master(dev)) dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; if (!mlx4_is_slave(dev)) { for (i = 0; i < dev->caps.num_ports; ++i) dev->caps.def_counter_index[i] = i << 1; } return 0; } /*The function checks if there are live vf, return the num of them*/ static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_state; int i; int ret = 0; for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) { s_state = &priv->mfunc.master.slave_state[i]; if (s_state->active && s_state->last_cmd != MLX4_COMM_CMD_RESET) { mlx4_warn(dev, "%s: slave: %d is still active\n", __func__, i); ret++; } } return ret; } int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) { u32 qk = MLX4_RESERVED_QKEY_BASE; if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || qpn < dev->phys_caps.base_proxy_sqpn) return -EINVAL; if (qpn >= dev->phys_caps.base_tunnel_sqpn) /* tunnel qp */ qk += qpn - dev->phys_caps.base_tunnel_sqpn; else qk += qpn - dev->phys_caps.base_proxy_sqpn; *qkey = qk; return 0; } EXPORT_SYMBOL(mlx4_get_parav_qkey); void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val) { struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); if (!mlx4_is_master(dev)) return; priv->virt2phys_pkey[slave][port - 1][i] = val; } EXPORT_SYMBOL(mlx4_sync_pkey_table); void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid) { struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); if (!mlx4_is_master(dev)) return; priv->slave_node_guids[slave] = guid; } EXPORT_SYMBOL(mlx4_put_slave_node_guid); __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); if (!mlx4_is_master(dev)) return 0; return priv->slave_node_guids[slave]; } EXPORT_SYMBOL(mlx4_get_slave_node_guid); int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_slave; if (!mlx4_is_master(dev)) return 0; s_slave = &priv->mfunc.master.slave_state[slave]; return !!s_slave->active; } EXPORT_SYMBOL(mlx4_is_slave_active); static void slave_adjust_steering_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *hca_param) { dev->caps.steering_mode = hca_param->steering_mode; if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; else dev->caps.num_qp_per_mgm = 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2); mlx4_dbg(dev, "Steering mode is: %s\n", mlx4_steering_mode_str(dev->caps.steering_mode)); } static int mlx4_slave_cap(struct mlx4_dev *dev) { int err; u32 page_size; struct mlx4_dev_cap dev_cap; struct mlx4_func_cap func_cap; struct mlx4_init_hca_param hca_param; int i; memset(&hca_param, 0, sizeof(hca_param)); err = mlx4_QUERY_HCA(dev, &hca_param); if (err) { mlx4_err(dev, "QUERY_HCA command failed, aborting.\n"); return err; } /*fail if the hca has an unknown capability */ if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) != HCA_GLOBAL_CAP_MASK) { mlx4_err(dev, "Unknown hca global capabilities\n"); return -ENOSYS; } mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz; dev->caps.hca_core_clock = hca_param.hca_core_clock; memset(&dev_cap, 0, sizeof(dev_cap)); dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp; err = mlx4_dev_cap(dev, &dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); return err; } err = mlx4_QUERY_FW(dev); if (err) mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n"); if (!hca_param.mw_enable) { dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW; dev->caps.bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; } page_size = ~dev->caps.page_size_cap + 1; mlx4_warn(dev, "HCA minimum page size:%d\n", page_size); if (page_size > PAGE_SIZE) { mlx4_err(dev, "HCA minimum page size of %d bigger than " "kernel PAGE_SIZE of %d, aborting.\n", page_size, (int)PAGE_SIZE); return -ENODEV; } /* slave gets uar page size from QUERY_HCA fw command */ dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12); /* TODO: relax this assumption */ if (dev->caps.uar_page_size != PAGE_SIZE) { mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %d\n", dev->caps.uar_page_size, (int)PAGE_SIZE); return -ENODEV; } memset(&func_cap, 0, sizeof(func_cap)); err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); if (err) { mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n", err); return err; } if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != PF_CONTEXT_BEHAVIOUR_MASK) { mlx4_err(dev, "Unknown pf context behaviour\n"); return -ENOSYS; } dev->caps.num_ports = func_cap.num_ports; dev->quotas.qp = func_cap.qp_quota; dev->quotas.srq = func_cap.srq_quota; dev->quotas.cq = func_cap.cq_quota; dev->quotas.mpt = func_cap.mpt_quota; dev->quotas.mtt = func_cap.mtt_quota; dev->caps.num_qps = 1 << hca_param.log_num_qps; dev->caps.num_srqs = 1 << hca_param.log_num_srqs; dev->caps.num_cqs = 1 << hca_param.log_num_cqs; dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; dev->caps.num_eqs = func_cap.max_eq; dev->caps.reserved_eqs = func_cap.reserved_eq; dev->caps.num_pds = MLX4_NUM_PDS; dev->caps.num_mgms = 0; dev->caps.num_amgms = 0; if (dev->caps.num_ports > MLX4_MAX_PORTS) { mlx4_err(dev, "HCA has %d ports, but we only support %d, " "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS); return -ENODEV; } dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { err = -ENOMEM; goto err_mem; } for (i = 1; i <= dev->caps.num_ports; ++i) { err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap); if (err) { mlx4_err(dev, "QUERY_FUNC_CAP port command failed for" " port %d, aborting (%d).\n", i, err); goto err_mem; } dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; dev->caps.def_counter_index[i - 1] = func_cap.def_counter_index; dev->caps.port_mask[i] = dev->caps.port_type[i]; err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, &dev->caps.gid_table_len[i], &dev->caps.pkey_table_len[i]); if (err) goto err_mem; } if (dev->caps.uar_page_size * (dev->caps.num_uars - dev->caps.reserved_uars) > pci_resource_len(dev->pdev, 2)) { mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than " "PCI resource 2 size of 0x%llx, aborting.\n", dev->caps.uar_page_size * dev->caps.num_uars, (unsigned long long) pci_resource_len(dev->pdev, 2)); err = -ENOMEM; goto err_mem; } if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { dev->caps.eqe_size = 64; dev->caps.eqe_factor = 1; } else { dev->caps.eqe_size = 32; dev->caps.eqe_factor = 0; } if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { dev->caps.cqe_size = 64; dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; } else { dev->caps.cqe_size = 32; } dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; mlx4_warn(dev, "Timestamping is not supported in slave mode.\n"); slave_adjust_steering_mode(dev, &dev_cap, &hca_param); return 0; err_mem: kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); dev->caps.qp0_tunnel = dev->caps.qp0_proxy = dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; return err; } static void mlx4_request_modules(struct mlx4_dev *dev) { int port; int has_ib_port = false; int has_eth_port = false; #define EN_DRV_NAME "mlx4_en" #define IB_DRV_NAME "mlx4_ib" for (port = 1; port <= dev->caps.num_ports; port++) { if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) has_ib_port = true; else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) has_eth_port = true; } if (has_ib_port) request_module_nowait(IB_DRV_NAME); if (has_eth_port) request_module_nowait(EN_DRV_NAME); } /* * Change the port configuration of the device. * Every user of this function must hold the port mutex. */ int mlx4_change_port_types(struct mlx4_dev *dev, enum mlx4_port_type *port_types) { int err = 0; int change = 0; int port; for (port = 0; port < dev->caps.num_ports; port++) { /* Change the port type only if the new type is different * from the current, and not set to Auto */ if (port_types[port] != dev->caps.port_type[port + 1]) change = 1; } if (change) { mlx4_unregister_device(dev); for (port = 1; port <= dev->caps.num_ports; port++) { mlx4_CLOSE_PORT(dev, port); dev->caps.port_type[port] = port_types[port - 1]; err = mlx4_SET_PORT(dev, port, -1); if (err) { mlx4_err(dev, "Failed to set port %d, " "aborting\n", port); goto out; } } mlx4_set_port_mask(dev); err = mlx4_register_device(dev); if (err) { mlx4_err(dev, "Failed to register device\n"); goto out; } mlx4_request_modules(dev); } out: return err; } static ssize_t show_port_type(struct device *dev, struct device_attribute *attr, char *buf) { struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, port_attr); struct mlx4_dev *mdev = info->dev; char type[8]; sprintf(type, "%s", (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ? "ib" : "eth"); if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO) sprintf(buf, "auto (%s)\n", type); else sprintf(buf, "%s\n", type); return strlen(buf); } static ssize_t set_port_type(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, port_attr); struct mlx4_dev *mdev = info->dev; struct mlx4_priv *priv = mlx4_priv(mdev); enum mlx4_port_type types[MLX4_MAX_PORTS]; enum mlx4_port_type new_types[MLX4_MAX_PORTS]; int i; int err = 0; if (!strcmp(buf, "ib\n")) info->tmp_type = MLX4_PORT_TYPE_IB; else if (!strcmp(buf, "eth\n")) info->tmp_type = MLX4_PORT_TYPE_ETH; else if (!strcmp(buf, "auto\n")) info->tmp_type = MLX4_PORT_TYPE_AUTO; else { mlx4_err(mdev, "%s is not supported port type\n", buf); return -EINVAL; } if ((info->tmp_type & mdev->caps.supported_type[info->port]) != info->tmp_type) { mlx4_err(mdev, "Requested port type for port %d is not supported on this HCA\n", info->port); return -EINVAL; } mlx4_stop_sense(mdev); mutex_lock(&priv->port_mutex); /* Possible type is always the one that was delivered */ mdev->caps.possible_type[info->port] = info->tmp_type; for (i = 0; i < mdev->caps.num_ports; i++) { types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type : mdev->caps.possible_type[i+1]; if (types[i] == MLX4_PORT_TYPE_AUTO) types[i] = mdev->caps.port_type[i+1]; } if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) { for (i = 1; i <= mdev->caps.num_ports; i++) { if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { mdev->caps.possible_type[i] = mdev->caps.port_type[i]; err = -EINVAL; } } } if (err) { mlx4_err(mdev, "Auto sensing is not supported on this HCA. " "Set only 'eth' or 'ib' for both ports " "(should be the same)\n"); goto out; } mlx4_do_sense_ports(mdev, new_types, types); err = mlx4_check_port_params(mdev, new_types); if (err) goto out; /* We are about to apply the changes after the configuration * was verified, no need to remember the temporary types * any more */ for (i = 0; i < mdev->caps.num_ports; i++) priv->port[i + 1].tmp_type = 0; err = mlx4_change_port_types(mdev, new_types); out: mlx4_start_sense(mdev); mutex_unlock(&priv->port_mutex); return err ? err : count; } enum ibta_mtu { IB_MTU_256 = 1, IB_MTU_512 = 2, IB_MTU_1024 = 3, IB_MTU_2048 = 4, IB_MTU_4096 = 5 }; static inline int int_to_ibta_mtu(int mtu) { switch (mtu) { case 256: return IB_MTU_256; case 512: return IB_MTU_512; case 1024: return IB_MTU_1024; case 2048: return IB_MTU_2048; case 4096: return IB_MTU_4096; default: return -1; } } static inline int ibta_mtu_to_int(enum ibta_mtu mtu) { switch (mtu) { case IB_MTU_256: return 256; case IB_MTU_512: return 512; case IB_MTU_1024: return 1024; case IB_MTU_2048: return 2048; case IB_MTU_4096: return 4096; default: return -1; } } static ssize_t show_board(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, board_attr); struct mlx4_dev *mdev = info->dev; return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, mdev->board_id); } static ssize_t show_hca(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, hca_attr); struct mlx4_dev *mdev = info->dev; return sprintf(buf, "MT%d\n", mdev->pdev->device); } static ssize_t show_firmware_version(struct device *dev, struct device_attribute *attr, char *buf) { struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, firmware_attr); struct mlx4_dev *mdev = info->dev; return sprintf(buf, "%d.%d.%d\n", (int)(mdev->caps.fw_ver >> 32), (int)(mdev->caps.fw_ver >> 16) & 0xffff, (int)mdev->caps.fw_ver & 0xffff); } static ssize_t show_port_ib_mtu(struct device *dev, struct device_attribute *attr, char *buf) { struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, port_mtu_attr); struct mlx4_dev *mdev = info->dev; /* When port type is eth, port mtu value isn't used. */ if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) return -EINVAL; sprintf(buf, "%d\n", ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port])); return strlen(buf); } static ssize_t set_port_ib_mtu(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, port_mtu_attr); struct mlx4_dev *mdev = info->dev; struct mlx4_priv *priv = mlx4_priv(mdev); int err, port, mtu, ibta_mtu = -1; if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) { mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); return -EINVAL; } mtu = (int) simple_strtol(buf, NULL, 0); ibta_mtu = int_to_ibta_mtu(mtu); if (ibta_mtu < 0) { mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf); return -EINVAL; } mdev->caps.port_ib_mtu[info->port] = ibta_mtu; mlx4_stop_sense(mdev); mutex_lock(&priv->port_mutex); mlx4_unregister_device(mdev); for (port = 1; port <= mdev->caps.num_ports; port++) { mlx4_CLOSE_PORT(mdev, port); err = mlx4_SET_PORT(mdev, port, -1); if (err) { mlx4_err(mdev, "Failed to set port %d, " "aborting\n", port); goto err_set_port; } } err = mlx4_register_device(mdev); err_set_port: mutex_unlock(&priv->port_mutex); mlx4_start_sense(mdev); return err ? err : count; } static int mlx4_load_fw(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int err, unmap_flag = 0; priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages, GFP_HIGHUSER | __GFP_NOWARN, 0); if (!priv->fw.fw_icm) { mlx4_err(dev, "Couldn't allocate FW area, aborting.\n"); return -ENOMEM; } err = mlx4_MAP_FA(dev, priv->fw.fw_icm); if (err) { mlx4_err(dev, "MAP_FA command failed, aborting.\n"); goto err_free; } err = mlx4_RUN_FW(dev); if (err) { mlx4_err(dev, "RUN_FW command failed, aborting.\n"); goto err_unmap_fa; } return 0; err_unmap_fa: unmap_flag = mlx4_UNMAP_FA(dev); if (unmap_flag) pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); err_free: if (!unmap_flag) mlx4_free_icm(dev, priv->fw.fw_icm, 0); return err; } static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, int cmpt_entry_sz) { struct mlx4_priv *priv = mlx4_priv(dev); int err; int num_eqs; err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_QP * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) goto err; err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_SRQ * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, dev->caps.num_srqs, dev->caps.reserved_srqs, 0, 0); if (err) goto err_qp; err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_CQ * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, dev->caps.num_cqs, dev->caps.reserved_cqs, 0, 0); if (err) goto err_srq; num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : dev->caps.num_eqs; err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_EQ * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, num_eqs, num_eqs, 0, 0); if (err) goto err_cq; return 0; err_cq: mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); err_srq: mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); err_qp: mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); err: return err; } static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *init_hca, u64 icm_size) { struct mlx4_priv *priv = mlx4_priv(dev); u64 aux_pages; int num_eqs; int err, unmap_flag = 0; err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages); if (err) { mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n"); return err; } mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n", (unsigned long long) icm_size >> 10, (unsigned long long) aux_pages << 2); priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages, GFP_HIGHUSER | __GFP_NOWARN, 0); if (!priv->fw.aux_icm) { mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n"); return -ENOMEM; } err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm); if (err) { mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n"); goto err_free_aux; } err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz); if (err) { mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n"); goto err_unmap_aux; } num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : dev->caps.num_eqs; err = mlx4_init_icm_table(dev, &priv->eq_table.table, init_hca->eqc_base, dev_cap->eqc_entry_sz, num_eqs, num_eqs, 0, 0); if (err) { mlx4_err(dev, "Failed to map EQ context memory, aborting.\n"); goto err_unmap_cmpt; } /* * Reserved MTT entries must be aligned up to a cacheline * boundary, since the FW will write to them, while the driver * writes to all other MTT entries. (The variable * dev->caps.mtt_entry_sz below is really the MTT segment * size, not the raw entry size) */ dev->caps.reserved_mtts = ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz, dma_get_cache_alignment()) / dev->caps.mtt_entry_sz; err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, init_hca->mtt_base, dev->caps.mtt_entry_sz, dev->caps.num_mtts, dev->caps.reserved_mtts, 1, 0); if (err) { mlx4_err(dev, "Failed to map MTT context memory, aborting.\n"); goto err_unmap_eq; } err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table, init_hca->dmpt_base, dev_cap->dmpt_entry_sz, dev->caps.num_mpts, dev->caps.reserved_mrws, 1, 1); if (err) { mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n"); goto err_unmap_mtt; } err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table, init_hca->qpc_base, dev_cap->qpc_entry_sz, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) { mlx4_err(dev, "Failed to map QP context memory, aborting.\n"); goto err_unmap_dmpt; } err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table, init_hca->auxc_base, dev_cap->aux_entry_sz, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) { mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n"); goto err_unmap_qp; } err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table, init_hca->altc_base, dev_cap->altc_entry_sz, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) { mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n"); goto err_unmap_auxc; } err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table, init_hca->rdmarc_base, dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) { mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n"); goto err_unmap_altc; } err = mlx4_init_icm_table(dev, &priv->cq_table.table, init_hca->cqc_base, dev_cap->cqc_entry_sz, dev->caps.num_cqs, dev->caps.reserved_cqs, 0, 0); if (err) { mlx4_err(dev, "Failed to map CQ context memory, aborting.\n"); goto err_unmap_rdmarc; } err = mlx4_init_icm_table(dev, &priv->srq_table.table, init_hca->srqc_base, dev_cap->srq_entry_sz, dev->caps.num_srqs, dev->caps.reserved_srqs, 0, 0); if (err) { mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n"); goto err_unmap_cq; } /* * For flow steering device managed mode it is required to use * mlx4_init_icm_table. For B0 steering mode it's not strictly * required, but for simplicity just map the whole multicast * group table now. The table isn't very big and it's a lot * easier than trying to track ref counts. */ err = mlx4_init_icm_table(dev, &priv->mcg_table.table, init_hca->mc_base, mlx4_get_mgm_entry_size(dev), dev->caps.num_mgms + dev->caps.num_amgms, dev->caps.num_mgms + dev->caps.num_amgms, 0, 0); if (err) { mlx4_err(dev, "Failed to map MCG context memory, aborting.\n"); goto err_unmap_srq; } return 0; err_unmap_srq: mlx4_cleanup_icm_table(dev, &priv->srq_table.table); err_unmap_cq: mlx4_cleanup_icm_table(dev, &priv->cq_table.table); err_unmap_rdmarc: mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); err_unmap_altc: mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); err_unmap_auxc: mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); err_unmap_qp: mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); err_unmap_dmpt: mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); err_unmap_mtt: mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); err_unmap_eq: mlx4_cleanup_icm_table(dev, &priv->eq_table.table); err_unmap_cmpt: mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); err_unmap_aux: unmap_flag = mlx4_UNMAP_ICM_AUX(dev); if (unmap_flag) pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); err_free_aux: if (!unmap_flag) mlx4_free_icm(dev, priv->fw.aux_icm, 0); return err; } static void mlx4_free_icms(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); mlx4_cleanup_icm_table(dev, &priv->mcg_table.table); mlx4_cleanup_icm_table(dev, &priv->srq_table.table); mlx4_cleanup_icm_table(dev, &priv->cq_table.table); mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); mlx4_cleanup_icm_table(dev, &priv->eq_table.table); mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); if (!mlx4_UNMAP_ICM_AUX(dev)) mlx4_free_icm(dev, priv->fw.aux_icm, 0); else pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); } static void mlx4_slave_exit(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); mutex_lock(&priv->cmd.slave_cmd_mutex); if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) mlx4_warn(dev, "Failed to close slave function.\n"); mutex_unlock(&priv->cmd.slave_cmd_mutex); } static int map_bf_area(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); resource_size_t bf_start; resource_size_t bf_len; int err = 0; if (!dev->caps.bf_reg_size) return -ENXIO; bf_start = pci_resource_start(dev->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT); bf_len = pci_resource_len(dev->pdev, 2) - (dev->caps.num_uars << PAGE_SHIFT); priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len); if (!priv->bf_mapping) err = -ENOMEM; return err; } static void unmap_bf_area(struct mlx4_dev *dev) { if (mlx4_priv(dev)->bf_mapping) io_mapping_free(mlx4_priv(dev)->bf_mapping); } int mlx4_read_clock(struct mlx4_dev *dev) { u32 clockhi, clocklo, clockhi1; cycle_t cycles; int i; struct mlx4_priv *priv = mlx4_priv(dev); if (!priv->clock_mapping) return -ENOTSUPP; for (i = 0; i < 10; i++) { clockhi = swab32(readl(priv->clock_mapping)); clocklo = swab32(readl(priv->clock_mapping + 4)); clockhi1 = swab32(readl(priv->clock_mapping)); if (clockhi == clockhi1) break; } cycles = (u64) clockhi << 32 | (u64) clocklo; return cycles; } EXPORT_SYMBOL_GPL(mlx4_read_clock); static int map_internal_clock(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); priv->clock_mapping = ioremap(pci_resource_start(dev->pdev, priv->fw.clock_bar) + priv->fw.clock_offset, MLX4_CLOCK_SIZE); if (!priv->clock_mapping) return -ENOMEM; return 0; } int mlx4_get_internal_clock_params(struct mlx4_dev *dev, struct mlx4_clock_params *params) { struct mlx4_priv *priv = mlx4_priv(dev); if (mlx4_is_slave(dev)) return -ENOTSUPP; if (!params) return -EINVAL; params->bar = priv->fw.clock_bar; params->offset = priv->fw.clock_offset; params->size = MLX4_CLOCK_SIZE; return 0; } EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params); static void unmap_internal_clock(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); if (priv->clock_mapping) iounmap(priv->clock_mapping); } static void mlx4_close_hca(struct mlx4_dev *dev) { unmap_internal_clock(dev); unmap_bf_area(dev); if (mlx4_is_slave(dev)) { mlx4_slave_exit(dev); } else { mlx4_CLOSE_HCA(dev, 0); mlx4_free_icms(dev); if (!mlx4_UNMAP_FA(dev)) mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0); else pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); } } static int mlx4_init_slave(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); u64 dma = (u64) priv->mfunc.vhcr_dma; int num_of_reset_retries = NUM_OF_RESET_RETRIES; int ret_from_reset = 0; u32 slave_read; u32 cmd_channel_ver; mutex_lock(&priv->cmd.slave_cmd_mutex); priv->cmd.max_cmds = 1; mlx4_warn(dev, "Sending reset\n"); ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME); /* if we are in the middle of flr the slave will try * NUM_OF_RESET_RETRIES times before leaving.*/ if (ret_from_reset) { if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) { msleep(SLEEP_TIME_IN_RESET); while (ret_from_reset && num_of_reset_retries) { mlx4_warn(dev, "slave is currently in the" "middle of FLR. retrying..." "(try num:%d)\n", (NUM_OF_RESET_RETRIES - num_of_reset_retries + 1)); ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME); num_of_reset_retries = num_of_reset_retries - 1; } } else goto err; } /* check the driver version - the slave I/F revision * must match the master's */ slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); cmd_channel_ver = mlx4_comm_get_version(); if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) != MLX4_COMM_GET_IF_REV(slave_read)) { mlx4_err(dev, "slave driver version is not supported" " by the master\n"); goto err; } mlx4_warn(dev, "Sending vhcr0\n"); if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) goto err; mutex_unlock(&priv->cmd.slave_cmd_mutex); return 0; err: mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); mutex_unlock(&priv->cmd.slave_cmd_mutex); return -EIO; } static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) { int i; for (i = 1; i <= dev->caps.num_ports; i++) { if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) dev->caps.gid_table_len[i] = mlx4_get_slave_num_gids(dev, 0); else dev->caps.gid_table_len[i] = 1; dev->caps.pkey_table_len[i] = dev->phys_caps.pkey_phys_table_len[i] - 1; } } static int choose_log_fs_mgm_entry_size(int qp_per_entry) { int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE; i++) { if (qp_per_entry <= 4 * ((1 << i) / 16 - 2)) break; } return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1; } static void choose_steering_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { int nvfs; mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(dev->pdev), 0, &nvfs); if (high_rate_steer && !mlx4_is_mfunc(dev)) { dev->caps.flags &= ~(MLX4_DEV_CAP_FLAG_VEP_MC_STEER | MLX4_DEV_CAP_FLAG_VEP_UC_STEER); dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_FS_EN; } if (mlx4_log_num_mgm_entry_size == -1 && dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && (!mlx4_is_mfunc(dev) || (dev_cap->fs_max_num_qp_per_entry >= (nvfs + 1))) && choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= MLX4_MIN_MGM_LOG_ENTRY_SIZE) { dev->oper_log_mgm_entry_size = choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry); dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; } else { if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) dev->caps.steering_mode = MLX4_STEERING_MODE_B0; else { dev->caps.steering_mode = MLX4_STEERING_MODE_A0; if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags " "set to use B0 steering. Falling back to A0 steering mode.\n"); } dev->oper_log_mgm_entry_size = mlx4_log_num_mgm_entry_size > 0 ? mlx4_log_num_mgm_entry_size : MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); } mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, " "log_num_mgm_entry_size = %d\n", mlx4_steering_mode_str(dev->caps.steering_mode), dev->oper_log_mgm_entry_size, mlx4_log_num_mgm_entry_size); } static int mlx4_init_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_dev_cap *dev_cap = NULL; struct mlx4_adapter adapter; struct mlx4_mod_stat_cfg mlx4_cfg; struct mlx4_profile profile; struct mlx4_init_hca_param init_hca; u64 icm_size; int err; if (!mlx4_is_slave(dev)) { err = mlx4_QUERY_FW(dev); if (err) { if (err == -EACCES) mlx4_info(dev, "non-primary physical function, skipping.\n"); else mlx4_err(dev, "QUERY_FW command failed, aborting.\n"); return err; } err = mlx4_load_fw(dev); if (err) { mlx4_err(dev, "Failed to start FW, aborting.\n"); return err; } mlx4_cfg.log_pg_sz_m = 1; mlx4_cfg.log_pg_sz = 0; err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg); if (err) mlx4_warn(dev, "Failed to override log_pg_sz parameter\n"); dev_cap = kzalloc(sizeof *dev_cap, GFP_KERNEL); if (!dev_cap) { mlx4_err(dev, "Failed to allocate memory for dev_cap\n"); err = -ENOMEM; goto err_stop_fw; } err = mlx4_dev_cap(dev, dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); goto err_stop_fw; } choose_steering_mode(dev, dev_cap); if (mlx4_is_master(dev)) mlx4_parav_master_pf_caps(dev); process_mod_param_profile(&profile); if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) profile.num_mcg = MLX4_FS_NUM_MCG; icm_size = mlx4_make_profile(dev, &profile, dev_cap, &init_hca); if ((long long) icm_size < 0) { err = icm_size; goto err_stop_fw; } dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; init_hca.log_uar_sz = ilog2(dev->caps.num_uars); init_hca.uar_page_sz = PAGE_SHIFT - 12; err = mlx4_init_icm(dev, dev_cap, &init_hca, icm_size); if (err) goto err_stop_fw; init_hca.mw_enable = 1; err = mlx4_INIT_HCA(dev, &init_hca); if (err) { mlx4_err(dev, "INIT_HCA command failed, aborting.\n"); goto err_free_icm; } /* * Read HCA frequency by QUERY_HCA command */ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { memset(&init_hca, 0, sizeof(init_hca)); err = mlx4_QUERY_HCA(dev, &init_hca); if (err) { mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n"); dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; } else { dev->caps.hca_core_clock = init_hca.hca_core_clock; } /* In case we got HCA frequency 0 - disable timestamping * to avoid dividing by zero */ if (!dev->caps.hca_core_clock) { dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; mlx4_err(dev, "HCA frequency is 0. Timestamping is not supported."); } else if (map_internal_clock(dev)) { /* Map internal clock, * in case of failure disable timestamping */ dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported.\n"); } } } else { err = mlx4_init_slave(dev); if (err) { mlx4_err(dev, "Failed to initialize slave\n"); return err; } err = mlx4_slave_cap(dev); if (err) { mlx4_err(dev, "Failed to obtain slave caps\n"); goto err_close; } } if (map_bf_area(dev)) mlx4_dbg(dev, "Failed to map blue flame area\n"); /* Only the master set the ports, all the rest got it from it.*/ if (!mlx4_is_slave(dev)) mlx4_set_port_mask(dev); err = mlx4_QUERY_ADAPTER(dev, &adapter); if (err) { mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n"); goto unmap_bf; } priv->eq_table.inta_pin = adapter.inta_pin; memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); memcpy(dev->vsd, adapter.vsd, sizeof(dev->vsd)); dev->vsd_vendor_id = adapter.vsd_vendor_id; if (!mlx4_is_slave(dev)) kfree(dev_cap); return 0; unmap_bf: if (!mlx4_is_slave(dev)) unmap_internal_clock(dev); unmap_bf_area(dev); if (mlx4_is_slave(dev)) { kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); } err_close: if (mlx4_is_slave(dev)) mlx4_slave_exit(dev); else mlx4_CLOSE_HCA(dev, 0); err_free_icm: if (!mlx4_is_slave(dev)) mlx4_free_icms(dev); err_stop_fw: if (!mlx4_is_slave(dev)) { if (!mlx4_UNMAP_FA(dev)) mlx4_free_icm(dev, priv->fw.fw_icm, 0); else pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); kfree(dev_cap); } return err; } static int mlx4_init_counters_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int nent_pow2, port_indx, vf_index, num_counters; int res, index = 0; struct counter_index *new_counter_index; if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) return -ENOENT; if (!mlx4_is_slave(dev) && dev->caps.max_counters == dev->caps.max_extended_counters) { res = mlx4_cmd(dev, MLX4_IF_STATE_EXTENDED, 0, 0, MLX4_CMD_SET_IF_STAT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (res) { mlx4_err(dev, "Failed to set extended counters (err=%d)\n", res); return res; } } mutex_init(&priv->counters_table.mutex); if (mlx4_is_slave(dev)) { for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); if (dev->caps.def_counter_index[port_indx] != 0xFF) { new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) return -ENOMEM; new_counter_index->index = dev->caps.def_counter_index[port_indx]; list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port_indx]); } } mlx4_dbg(dev, "%s: slave allocated %d counters for %d ports\n", __func__, dev->caps.num_ports, dev->caps.num_ports); return 0; } nent_pow2 = roundup_pow_of_two(dev->caps.max_counters); for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); /* allocating 2 counters per port for PFs */ /* For the PF, the ETH default counters are 0,2; */ /* and the RoCE default counters are 1,3 */ for (num_counters = 0; num_counters < 2; num_counters++, index++) { new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) return -ENOMEM; new_counter_index->index = index; list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port_indx]); } } if (mlx4_is_master(dev)) { for (vf_index = 0; vf_index < dev->num_vfs; vf_index++) { for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { INIT_LIST_HEAD(&priv->counters_table.vf_list[vf_index][port_indx]); new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) return -ENOMEM; if (index < nent_pow2 - 2) { new_counter_index->index = index; index++; } else { new_counter_index->index = MLX4_SINK_COUNTER_INDEX; } list_add_tail(&new_counter_index->list, &priv->counters_table.vf_list[vf_index][port_indx]); } } res = mlx4_bitmap_init(&priv->counters_table.bitmap, nent_pow2, nent_pow2 - 1, index, 1); mlx4_dbg(dev, "%s: master allocated %d counters for %d VFs\n", __func__, index, dev->num_vfs); } else { res = mlx4_bitmap_init(&priv->counters_table.bitmap, nent_pow2, nent_pow2 - 1, index, 1); mlx4_dbg(dev, "%s: native allocated %d counters for %d ports\n", __func__, index, dev->caps.num_ports); } return 0; } static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i, j; struct counter_index *port, *tmp_port; struct counter_index *vf, *tmp_vf; mutex_lock(&priv->counters_table.mutex); if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) { for (i = 0; i < dev->caps.num_ports; i++) { list_for_each_entry_safe(port, tmp_port, &priv->counters_table.global_port_list[i], list) { list_del(&port->list); kfree(port); } } if (!mlx4_is_slave(dev)) { for (i = 0; i < dev->num_vfs; i++) { for (j = 0; j < dev->caps.num_ports; j++) { list_for_each_entry_safe(vf, tmp_vf, &priv->counters_table.vf_list[i][j], list) { /* clear the counter statistic */ if (__mlx4_clear_if_stat(dev, vf->index)) mlx4_dbg(dev, "%s: reset counter %d failed\n", __func__, vf->index); list_del(&vf->list); kfree(vf); } } } mlx4_bitmap_cleanup(&priv->counters_table.bitmap); } } mutex_unlock(&priv->counters_table.mutex); } int __mlx4_slave_counters_free(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); int i, first; struct counter_index *vf, *tmp_vf; /* clean VF's counters for the next useg */ if (slave > 0 && slave <= dev->num_vfs) { mlx4_dbg(dev, "%s: free counters of slave(%d)\n" , __func__, slave); mutex_lock(&priv->counters_table.mutex); for (i = 0; i < dev->caps.num_ports; i++) { first = 0; list_for_each_entry_safe(vf, tmp_vf, &priv->counters_table.vf_list[slave - 1][i], list) { /* clear the counter statistic */ if (__mlx4_clear_if_stat(dev, vf->index)) mlx4_dbg(dev, "%s: reset counter %d failed\n", __func__, vf->index); if (first++ && vf->index != MLX4_SINK_COUNTER_INDEX) { mlx4_dbg(dev, "%s: delete counter index %d for slave %d and port %d\n" , __func__, vf->index, slave, i + 1); mlx4_bitmap_free(&priv->counters_table.bitmap, vf->index, MLX4_USE_RR); list_del(&vf->list); kfree(vf); } else { mlx4_dbg(dev, "%s: can't delete default counter index %d for slave %d and port %d\n" , __func__, vf->index, slave, i + 1); } } } mutex_unlock(&priv->counters_table.mutex); } return 0; } int __mlx4_counter_alloc(struct mlx4_dev *dev, int slave, int port, u32 *idx) { struct mlx4_priv *priv = mlx4_priv(dev); struct counter_index *new_counter_index; if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) return -ENOENT; if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || (port < 0) || (port > MLX4_MAX_PORTS)) { mlx4_dbg(dev, "%s: invalid slave(%d) or port(%d) index\n", __func__, slave, port); return -EINVAL; } /* handle old guest request does not support request by port index */ if (port == 0) { *idx = MLX4_SINK_COUNTER_INDEX; mlx4_dbg(dev, "%s: allocated default counter index %d for slave %d port %d\n" , __func__, *idx, slave, port); return 0; } mutex_lock(&priv->counters_table.mutex); *idx = mlx4_bitmap_alloc(&priv->counters_table.bitmap); /* if no resources return the default counter of the slave and port */ if (*idx == -1) { if (slave == 0) { /* its the ethernet counter ?????? */ new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, struct counter_index, list); } else { new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, struct counter_index, list); } *idx = new_counter_index->index; mlx4_dbg(dev, "%s: allocated defualt counter index %d for slave %d port %d\n" , __func__, *idx, slave, port); goto out; } if (slave == 0) { /* native or master */ new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) goto no_mem; new_counter_index->index = *idx; list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); } else { new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) goto no_mem; new_counter_index->index = *idx; list_add_tail(&new_counter_index->list, &priv->counters_table.vf_list[slave - 1][port - 1]); } mlx4_dbg(dev, "%s: allocated counter index %d for slave %d port %d\n" , __func__, *idx, slave, port); out: mutex_unlock(&priv->counters_table.mutex); return 0; no_mem: mlx4_bitmap_free(&priv->counters_table.bitmap, *idx, MLX4_USE_RR); mutex_unlock(&priv->counters_table.mutex); *idx = MLX4_SINK_COUNTER_INDEX; mlx4_dbg(dev, "%s: failed err (%d)\n" , __func__, -ENOMEM); return -ENOMEM; } int mlx4_counter_alloc(struct mlx4_dev *dev, u8 port, u32 *idx) { u64 out_param; int err; struct mlx4_priv *priv = mlx4_priv(dev); struct counter_index *new_counter_index, *c_index; if (mlx4_is_mfunc(dev)) { err = mlx4_cmd_imm(dev, 0, &out_param, ((u32) port) << 8 | (u32) RES_COUNTER, RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (!err) { *idx = get_param_l(&out_param); if (*idx == MLX4_SINK_COUNTER_INDEX) return -ENOSPC; mutex_lock(&priv->counters_table.mutex); c_index = list_entry(priv->counters_table.global_port_list[port - 1].next, struct counter_index, list); mutex_unlock(&priv->counters_table.mutex); if (c_index->index == *idx) return -EEXIST; if (mlx4_is_slave(dev)) { new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) { mlx4_counter_free(dev, port, *idx); return -ENOMEM; } new_counter_index->index = *idx; mutex_lock(&priv->counters_table.mutex); list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); mutex_unlock(&priv->counters_table.mutex); mlx4_dbg(dev, "%s: allocated counter index %d for port %d\n" , __func__, *idx, port); } } return err; } return __mlx4_counter_alloc(dev, 0, port, idx); } EXPORT_SYMBOL_GPL(mlx4_counter_alloc); void __mlx4_counter_free(struct mlx4_dev *dev, int slave, int port, u32 idx) { - /* check if native or slave and deletes acordingly */ + /* check if native or slave and deletes accordingly */ struct mlx4_priv *priv = mlx4_priv(dev); struct counter_index *pf, *tmp_pf; struct counter_index *vf, *tmp_vf; int first; if (idx == MLX4_SINK_COUNTER_INDEX) { mlx4_dbg(dev, "%s: try to delete default counter index %d for port %d\n" , __func__, idx, port); return; } if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || (port < 0) || (port > MLX4_MAX_PORTS)) { mlx4_warn(dev, "%s: deletion failed due to invalid slave(%d) or port(%d) index\n" , __func__, slave, idx); return; } mutex_lock(&priv->counters_table.mutex); if (slave == 0) { first = 0; list_for_each_entry_safe(pf, tmp_pf, &priv->counters_table.global_port_list[port - 1], list) { /* the first 2 counters are reserved */ if (pf->index == idx) { /* clear the counter statistic */ if (__mlx4_clear_if_stat(dev, pf->index)) mlx4_dbg(dev, "%s: reset counter %d failed\n", __func__, pf->index); if (1 < first && idx != MLX4_SINK_COUNTER_INDEX) { list_del(&pf->list); kfree(pf); mlx4_dbg(dev, "%s: delete counter index %d for native device (%d) port %d\n" , __func__, idx, slave, port); mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); goto out; } else { mlx4_dbg(dev, "%s: can't delete default counter index %d for native device (%d) port %d\n" , __func__, idx, slave, port); goto out; } } first++; } mlx4_dbg(dev, "%s: can't delete counter index %d for native device (%d) port %d\n" , __func__, idx, slave, port); } else { first = 0; list_for_each_entry_safe(vf, tmp_vf, &priv->counters_table.vf_list[slave - 1][port - 1], list) { /* the first element is reserved */ if (vf->index == idx) { /* clear the counter statistic */ if (__mlx4_clear_if_stat(dev, vf->index)) mlx4_dbg(dev, "%s: reset counter %d failed\n", __func__, vf->index); if (first) { list_del(&vf->list); kfree(vf); mlx4_dbg(dev, "%s: delete counter index %d for slave %d port %d\n", __func__, idx, slave, port); mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); goto out; } else { mlx4_dbg(dev, "%s: can't delete default slave (%d) counter index %d for port %d\n" , __func__, slave, idx, port); goto out; } } first++; } mlx4_dbg(dev, "%s: can't delete slave (%d) counter index %d for port %d\n" , __func__, slave, idx, port); } out: mutex_unlock(&priv->counters_table.mutex); } void mlx4_counter_free(struct mlx4_dev *dev, u8 port, u32 idx) { u64 in_param = 0; struct mlx4_priv *priv = mlx4_priv(dev); struct counter_index *counter, *tmp_counter; int first = 0; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, idx); mlx4_cmd(dev, in_param, ((u32) port) << 8 | (u32) RES_COUNTER, RES_OP_RESERVE, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (mlx4_is_slave(dev) && idx != MLX4_SINK_COUNTER_INDEX) { mutex_lock(&priv->counters_table.mutex); list_for_each_entry_safe(counter, tmp_counter, &priv->counters_table.global_port_list[port - 1], list) { if (counter->index == idx && first++) { list_del(&counter->list); kfree(counter); mlx4_dbg(dev, "%s: delete counter index %d for port %d\n" , __func__, idx, port); mutex_unlock(&priv->counters_table.mutex); return; } } mutex_unlock(&priv->counters_table.mutex); } return; } __mlx4_counter_free(dev, 0, port, idx); } EXPORT_SYMBOL_GPL(mlx4_counter_free); int __mlx4_clear_if_stat(struct mlx4_dev *dev, u8 counter_index) { struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; int err = 0; u32 if_stat_in_mod = (counter_index & 0xff) | (1 << 31); if (counter_index == MLX4_SINK_COUNTER_INDEX) return -EINVAL; if (mlx4_is_slave(dev)) return 0; if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(if_stat_mailbox)) { err = PTR_ERR(if_stat_mailbox); return err; } err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0, MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev, if_stat_mailbox); return err; } u8 mlx4_get_default_counter_index(struct mlx4_dev *dev, int slave, int port) { struct mlx4_priv *priv = mlx4_priv(dev); struct counter_index *new_counter_index; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) { mlx4_dbg(dev, "%s: return counter index %d for slave %d port (MLX4_PORT_TYPE_IB) %d\n", __func__, MLX4_SINK_COUNTER_INDEX, slave, port); return (u8)MLX4_SINK_COUNTER_INDEX; } mutex_lock(&priv->counters_table.mutex); if (slave == 0) { new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, struct counter_index, list); } else { new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, struct counter_index, list); } mutex_unlock(&priv->counters_table.mutex); mlx4_dbg(dev, "%s: return counter index %d for slave %d port %d\n", __func__, new_counter_index->index, slave, port); return (u8)new_counter_index->index; } int mlx4_get_vport_ethtool_stats(struct mlx4_dev *dev, int port, struct mlx4_en_vport_stats *vport_stats, int reset) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; union mlx4_counter *counter; int err = 0; u32 if_stat_in_mod; struct counter_index *vport, *tmp_vport; if (!vport_stats) return -EINVAL; if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(if_stat_mailbox)) { err = PTR_ERR(if_stat_mailbox); return err; } mutex_lock(&priv->counters_table.mutex); list_for_each_entry_safe(vport, tmp_vport, &priv->counters_table.global_port_list[port - 1], list) { if (vport->index == MLX4_SINK_COUNTER_INDEX) continue; memset(if_stat_mailbox->buf, 0, sizeof(union mlx4_counter)); if_stat_in_mod = (vport->index & 0xff) | ((reset & 1) << 31); err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0, MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (err) { mlx4_dbg(dev, "%s: failed to read statistics for counter index %d\n", __func__, vport->index); goto if_stat_out; } counter = (union mlx4_counter *)if_stat_mailbox->buf; if ((counter->control.cnt_mode & 0xf) == 1) { vport_stats->rx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastFrames); vport_stats->rx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxUnicastFrames); vport_stats->rx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxMulticastFrames); vport_stats->tx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastFrames); vport_stats->tx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxUnicastFrames); vport_stats->tx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxMulticastFrames); vport_stats->rx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastOctets); vport_stats->rx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxUnicastOctets); vport_stats->rx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxMulticastOctets); vport_stats->tx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastOctets); vport_stats->tx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxUnicastOctets); vport_stats->tx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxMulticastOctets); vport_stats->rx_errors += be64_to_cpu(counter->ext.counters[0].IfRxErrorFrames); vport_stats->rx_dropped += be64_to_cpu(counter->ext.counters[0].IfRxNoBufferFrames); vport_stats->tx_errors += be64_to_cpu(counter->ext.counters[0].IfTxDroppedFrames); } } if_stat_out: mutex_unlock(&priv->counters_table.mutex); mlx4_free_cmd_mailbox(dev, if_stat_mailbox); return err; } EXPORT_SYMBOL_GPL(mlx4_get_vport_ethtool_stats); static int mlx4_setup_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int err; int port; __be32 ib_port_default_caps; err = mlx4_init_uar_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "user access region table (err=%d), aborting.\n", err); return err; } err = mlx4_uar_alloc(dev, &priv->driver_uar); if (err) { mlx4_err(dev, "Failed to allocate driver access region " "(err=%d), aborting.\n", err); goto err_uar_table_free; } priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); if (!priv->kar) { mlx4_err(dev, "Couldn't map kernel access region, " "aborting.\n"); err = -ENOMEM; goto err_uar_free; } err = mlx4_init_pd_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "protection domain table (err=%d), aborting.\n", err); goto err_kar_unmap; } err = mlx4_init_xrcd_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "reliable connection domain table (err=%d), " "aborting.\n", err); goto err_pd_table_free; } err = mlx4_init_mr_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "memory region table (err=%d), aborting.\n", err); goto err_xrcd_table_free; } if (!mlx4_is_slave(dev)) { err = mlx4_init_mcg_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "multicast group table (err=%d), aborting.\n", err); goto err_mr_table_free; } } err = mlx4_init_eq_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "event queue table (err=%d), aborting.\n", err); goto err_mcg_table_free; } err = mlx4_cmd_use_events(dev); if (err) { mlx4_err(dev, "Failed to switch to event-driven " "firmware commands (err=%d), aborting.\n", err); goto err_eq_table_free; } err = mlx4_NOP(dev); if (err) { if (dev->flags & MLX4_FLAG_MSI_X) { mlx4_warn(dev, "NOP command failed to generate MSI-X " "interrupt IRQ %d).\n", priv->eq_table.eq[dev->caps.num_comp_vectors].irq); mlx4_warn(dev, "Trying again without MSI-X.\n"); } else { mlx4_err(dev, "NOP command failed to generate interrupt " "(IRQ %d), aborting.\n", priv->eq_table.eq[dev->caps.num_comp_vectors].irq); mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); } goto err_cmd_poll; } mlx4_dbg(dev, "NOP command IRQ test passed\n"); err = mlx4_init_cq_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "completion queue table (err=%d), aborting.\n", err); goto err_cmd_poll; } err = mlx4_init_srq_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "shared receive queue table (err=%d), aborting.\n", err); goto err_cq_table_free; } err = mlx4_init_qp_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "queue pair table (err=%d), aborting.\n", err); goto err_srq_table_free; } err = mlx4_init_counters_table(dev); if (err && err != -ENOENT) { mlx4_err(dev, "Failed to initialize counters table (err=%d), " "aborting.\n", err); goto err_qp_table_free; } if (!mlx4_is_slave(dev)) { for (port = 1; port <= dev->caps.num_ports; port++) { ib_port_default_caps = 0; err = mlx4_get_port_ib_caps(dev, port, &ib_port_default_caps); if (err) mlx4_warn(dev, "failed to get port %d default " "ib capabilities (%d). Continuing " "with caps = 0\n", port, err); dev->caps.ib_port_def_cap[port] = ib_port_default_caps; /* initialize per-slave default ib port capabilities */ if (mlx4_is_master(dev)) { int i; for (i = 0; i < dev->num_slaves; i++) { if (i == mlx4_master_func_num(dev)) continue; priv->mfunc.master.slave_state[i].ib_cap_mask[port] = ib_port_default_caps; } } dev->caps.port_ib_mtu[port] = IB_MTU_4096; err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ? dev->caps.pkey_table_len[port] : -1); if (err) { mlx4_err(dev, "Failed to set port %d (err=%d), " "aborting\n", port, err); goto err_counters_table_free; } } } return 0; err_counters_table_free: mlx4_cleanup_counters_table(dev); err_qp_table_free: mlx4_cleanup_qp_table(dev); err_srq_table_free: mlx4_cleanup_srq_table(dev); err_cq_table_free: mlx4_cleanup_cq_table(dev); err_cmd_poll: mlx4_cmd_use_polling(dev); err_eq_table_free: mlx4_cleanup_eq_table(dev); err_mcg_table_free: if (!mlx4_is_slave(dev)) mlx4_cleanup_mcg_table(dev); err_mr_table_free: mlx4_cleanup_mr_table(dev); err_xrcd_table_free: mlx4_cleanup_xrcd_table(dev); err_pd_table_free: mlx4_cleanup_pd_table(dev); err_kar_unmap: iounmap(priv->kar); err_uar_free: mlx4_uar_free(dev, &priv->driver_uar); err_uar_table_free: mlx4_cleanup_uar_table(dev); return err; } static void mlx4_enable_msi_x(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct msix_entry *entries; int nreq = min_t(int, dev->caps.num_ports * min_t(int, num_possible_cpus() + 1, MAX_MSIX_P_PORT) + MSIX_LEGACY_SZ, MAX_MSIX); int err; int i; if (msi_x) { nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, nreq); if (msi_x > 1 && !mlx4_is_mfunc(dev)) nreq = min_t(int, nreq, msi_x); entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); if (!entries) goto no_msi; for (i = 0; i < nreq; ++i) entries[i].entry = i; retry: err = pci_enable_msix(dev->pdev, entries, nreq); if (err) { /* Try again if at least 2 vectors are available */ if (err > 1) { mlx4_info(dev, "Requested %d vectors, " "but only %d MSI-X vectors available, " "trying again\n", nreq, err); nreq = err; goto retry; } kfree(entries); /* if error, or can't alloc even 1 IRQ */ if (err < 0) { mlx4_err(dev, "No IRQs left, device can't " "be started.\n"); goto no_irq; } goto no_msi; } if (nreq < MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) { /*Working in legacy mode , all EQ's shared*/ dev->caps.comp_pool = 0; dev->caps.num_comp_vectors = nreq - 1; } else { dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ; dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1; } for (i = 0; i < nreq; ++i) priv->eq_table.eq[i].irq = entries[i].vector; dev->flags |= MLX4_FLAG_MSI_X; kfree(entries); return; } no_msi: dev->caps.num_comp_vectors = 1; dev->caps.comp_pool = 0; for (i = 0; i < 2; ++i) priv->eq_table.eq[i].irq = dev->pdev->irq; return; no_irq: dev->caps.num_comp_vectors = 0; dev->caps.comp_pool = 0; return; } static void mlx4_init_hca_info(struct mlx4_dev *dev) { struct mlx4_hca_info *info = &mlx4_priv(dev)->hca_info; info->dev = dev; info->firmware_attr = (struct device_attribute)__ATTR(fw_ver, S_IRUGO, show_firmware_version, NULL); if (device_create_file(&dev->pdev->dev, &info->firmware_attr)) mlx4_err(dev, "Failed to add file firmware version"); info->hca_attr = (struct device_attribute)__ATTR(hca, S_IRUGO, show_hca, NULL); if (device_create_file(&dev->pdev->dev, &info->hca_attr)) mlx4_err(dev, "Failed to add file hca type"); info->board_attr = (struct device_attribute)__ATTR(board_id, S_IRUGO, show_board, NULL); if (device_create_file(&dev->pdev->dev, &info->board_attr)) mlx4_err(dev, "Failed to add file board id type"); } static int mlx4_init_port_info(struct mlx4_dev *dev, int port) { struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; int err = 0; info->dev = dev; info->port = port; if (!mlx4_is_slave(dev)) { mlx4_init_mac_table(dev, &info->mac_table); mlx4_init_vlan_table(dev, &info->vlan_table); info->base_qpn = mlx4_get_base_qpn(dev, port); } sprintf(info->dev_name, "mlx4_port%d", port); info->port_attr.attr.name = info->dev_name; if (mlx4_is_mfunc(dev)) info->port_attr.attr.mode = S_IRUGO; else { info->port_attr.attr.mode = S_IRUGO | S_IWUSR; info->port_attr.store = set_port_type; } info->port_attr.show = show_port_type; sysfs_attr_init(&info->port_attr.attr); err = device_create_file(&dev->pdev->dev, &info->port_attr); if (err) { mlx4_err(dev, "Failed to create file for port %d\n", port); info->port = -1; } sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port); info->port_mtu_attr.attr.name = info->dev_mtu_name; if (mlx4_is_mfunc(dev)) info->port_mtu_attr.attr.mode = S_IRUGO; else { info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR; info->port_mtu_attr.store = set_port_ib_mtu; } info->port_mtu_attr.show = show_port_ib_mtu; sysfs_attr_init(&info->port_mtu_attr.attr); err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr); if (err) { mlx4_err(dev, "Failed to create mtu file for port %d\n", port); device_remove_file(&info->dev->pdev->dev, &info->port_attr); info->port = -1; } return err; } static void mlx4_cleanup_hca_info(struct mlx4_hca_info *info) { device_remove_file(&info->dev->pdev->dev, &info->firmware_attr); device_remove_file(&info->dev->pdev->dev, &info->board_attr); device_remove_file(&info->dev->pdev->dev, &info->hca_attr); } static void mlx4_cleanup_port_info(struct mlx4_port_info *info) { if (info->port < 0) return; device_remove_file(&info->dev->pdev->dev, &info->port_attr); device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr); } static int mlx4_init_steering(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int num_entries = dev->caps.num_ports; int i, j; priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL); if (!priv->steer) return -ENOMEM; for (i = 0; i < num_entries; i++) for (j = 0; j < MLX4_NUM_STEERS; j++) { INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]); INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]); } return 0; } static void mlx4_clear_steering(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_steer_index *entry, *tmp_entry; struct mlx4_promisc_qp *pqp, *tmp_pqp; int num_entries = dev->caps.num_ports; int i, j; for (i = 0; i < num_entries; i++) { for (j = 0; j < MLX4_NUM_STEERS; j++) { list_for_each_entry_safe(pqp, tmp_pqp, &priv->steer[i].promisc_qps[j], list) { list_del(&pqp->list); kfree(pqp); } list_for_each_entry_safe(entry, tmp_entry, &priv->steer[i].steer_entries[j], list) { list_del(&entry->list); list_for_each_entry_safe(pqp, tmp_pqp, &entry->duplicates, list) { list_del(&pqp->list); kfree(pqp); } kfree(entry); } } } kfree(priv->steer); } static int extended_func_num(struct pci_dev *pdev) { return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn); } #define MLX4_OWNER_BASE 0x8069c #define MLX4_OWNER_SIZE 4 static int mlx4_get_ownership(struct mlx4_dev *dev) { void __iomem *owner; u32 ret; if (pci_channel_offline(dev->pdev)) return -EIO; owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { mlx4_err(dev, "Failed to obtain ownership bit\n"); return -ENOMEM; } ret = readl(owner); iounmap(owner); return (int) !!ret; } static void mlx4_free_ownership(struct mlx4_dev *dev) { void __iomem *owner; if (pci_channel_offline(dev->pdev)) return; owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { mlx4_err(dev, "Failed to obtain ownership bit\n"); return; } writel(0, owner); msleep(1000); iounmap(owner); } static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) { struct mlx4_priv *priv; struct mlx4_dev *dev; int err; int port; int nvfs, prb_vf; pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev)); err = pci_enable_device(pdev); if (err) { dev_err(&pdev->dev, "Cannot enable PCI device, " "aborting.\n"); return err; } mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(pdev), 0, &nvfs); mlx4_get_val(probe_vf.dbdf2val.tbl, pci_physfn(pdev), 0, &prb_vf); if (nvfs > MLX4_MAX_NUM_VF) { dev_err(&pdev->dev, "There are more VF's (%d) than allowed(%d)\n", nvfs, MLX4_MAX_NUM_VF); return -EINVAL; } if (nvfs < 0) { dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n"); return -EINVAL; } /* * Check for BARs. */ if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) && !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { dev_err(&pdev->dev, "Missing DCS, aborting." "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%x)\n", pci_dev_data, pci_resource_flags(pdev, 0)); err = -ENODEV; goto err_disable_pdev; } if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) { dev_err(&pdev->dev, "Missing UAR, aborting.\n"); err = -ENODEV; goto err_disable_pdev; } err = pci_request_regions(pdev, DRV_NAME); if (err) { dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); goto err_disable_pdev; } pci_set_master(pdev); err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) { dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); goto err_release_regions; } } err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) { dev_warn(&pdev->dev, "Warning: couldn't set 64-bit " "consistent PCI DMA mask.\n"); err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, " "aborting.\n"); goto err_release_regions; } } /* Allow large DMA segments, up to the firmware limit of 1 GB */ dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); priv = kzalloc(sizeof *priv, GFP_KERNEL); if (!priv) { dev_err(&pdev->dev, "Device struct alloc failed, " "aborting.\n"); err = -ENOMEM; goto err_release_regions; } dev = &priv->dev; dev->pdev = pdev; INIT_LIST_HEAD(&priv->dev_list); INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); mutex_init(&priv->port_mutex); INIT_LIST_HEAD(&priv->pgdir_list); mutex_init(&priv->pgdir_mutex); INIT_LIST_HEAD(&priv->bf_list); mutex_init(&priv->bf_mutex); dev->rev_id = pdev->revision; dev->numa_node = dev_to_node(&pdev->dev); /* Detect if this device is a virtual function */ if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { /* When acting as pf, we normally skip vfs unless explicitly * requested to probe them. */ if (nvfs && extended_func_num(pdev) > prb_vf) { mlx4_warn(dev, "Skipping virtual function:%d\n", extended_func_num(pdev)); err = -ENODEV; goto err_free_dev; } mlx4_warn(dev, "Detected virtual function - running in slave mode\n"); dev->flags |= MLX4_FLAG_SLAVE; } else { /* We reset the device and enable SRIOV only for physical * devices. Try to claim ownership on the device; * if already taken, skip -- do not allow multiple PFs */ err = mlx4_get_ownership(dev); if (err) { if (err < 0) goto err_free_dev; else { mlx4_warn(dev, "Multiple PFs not yet supported." " Skipping PF.\n"); err = -EINVAL; goto err_free_dev; } } if (nvfs) { mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", nvfs); err = pci_enable_sriov(pdev, nvfs); if (err) { mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", err); err = 0; } else { mlx4_warn(dev, "Running in master mode\n"); dev->flags |= MLX4_FLAG_SRIOV | MLX4_FLAG_MASTER; dev->num_vfs = nvfs; } } atomic_set(&priv->opreq_count, 0); INIT_WORK(&priv->opreq_task, mlx4_opreq_action); /* * Now reset the HCA before we touch the PCI capabilities or * attempt a firmware command, since a boot ROM may have left * the HCA in an undefined state. */ err = mlx4_reset(dev); if (err) { mlx4_err(dev, "Failed to reset HCA, aborting.\n"); goto err_sriov; } } slave_start: err = mlx4_cmd_init(dev); if (err) { mlx4_err(dev, "Failed to init command interface, aborting.\n"); goto err_sriov; } /* In slave functions, the communication channel must be initialized * before posting commands. Also, init num_slaves before calling * mlx4_init_hca */ if (mlx4_is_mfunc(dev)) { if (mlx4_is_master(dev)) dev->num_slaves = MLX4_MAX_NUM_SLAVES; else { dev->num_slaves = 0; err = mlx4_multi_func_init(dev); if (err) { mlx4_err(dev, "Failed to init slave mfunc" " interface, aborting.\n"); goto err_cmd; } } } err = mlx4_init_hca(dev); if (err) { if (err == -EACCES) { /* Not primary Physical function * Running in slave mode */ mlx4_cmd_cleanup(dev); dev->flags |= MLX4_FLAG_SLAVE; dev->flags &= ~MLX4_FLAG_MASTER; goto slave_start; } else goto err_mfunc; } /* In master functions, the communication channel must be initialized * after obtaining its address from fw */ if (mlx4_is_master(dev)) { err = mlx4_multi_func_init(dev); if (err) { mlx4_err(dev, "Failed to init master mfunc" "interface, aborting.\n"); goto err_close; } } err = mlx4_alloc_eq_table(dev); if (err) goto err_master_mfunc; priv->msix_ctl.pool_bm = 0; mutex_init(&priv->msix_ctl.pool_lock); mlx4_enable_msi_x(dev); /* no MSIX and no shared IRQ */ if (!dev->caps.num_comp_vectors && !dev->caps.comp_pool) { err = -ENOSPC; goto err_free_eq; } if ((mlx4_is_mfunc(dev)) && !(dev->flags & MLX4_FLAG_MSI_X)) { err = -ENOSYS; mlx4_err(dev, "INTx is not supported in multi-function mode." " aborting.\n"); goto err_free_eq; } if (!mlx4_is_slave(dev)) { err = mlx4_init_steering(dev); if (err) goto err_free_eq; } err = mlx4_setup_hca(dev); if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) && !mlx4_is_mfunc(dev)) { dev->flags &= ~MLX4_FLAG_MSI_X; dev->caps.num_comp_vectors = 1; dev->caps.comp_pool = 0; pci_disable_msix(pdev); err = mlx4_setup_hca(dev); } if (err) goto err_steer; mlx4_init_quotas(dev); mlx4_init_hca_info(dev); for (port = 1; port <= dev->caps.num_ports; port++) { err = mlx4_init_port_info(dev, port); if (err) goto err_port; } err = mlx4_register_device(dev); if (err) goto err_port; mlx4_request_modules(dev); mlx4_sense_init(dev); mlx4_start_sense(dev); priv->pci_dev_data = pci_dev_data; pci_set_drvdata(pdev, dev); return 0; err_port: for (--port; port >= 1; --port) mlx4_cleanup_port_info(&priv->port[port]); mlx4_cleanup_counters_table(dev); mlx4_cleanup_qp_table(dev); mlx4_cleanup_srq_table(dev); mlx4_cleanup_cq_table(dev); mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); mlx4_cleanup_mcg_table(dev); mlx4_cleanup_mr_table(dev); mlx4_cleanup_xrcd_table(dev); mlx4_cleanup_pd_table(dev); mlx4_cleanup_uar_table(dev); err_steer: if (!mlx4_is_slave(dev)) mlx4_clear_steering(dev); err_free_eq: mlx4_free_eq_table(dev); err_master_mfunc: if (mlx4_is_master(dev)) { mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY); mlx4_multi_func_cleanup(dev); } if (mlx4_is_slave(dev)) { kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); } err_close: if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); mlx4_close_hca(dev); err_mfunc: if (mlx4_is_slave(dev)) mlx4_multi_func_cleanup(dev); err_cmd: mlx4_cmd_cleanup(dev); err_sriov: if (dev->flags & MLX4_FLAG_SRIOV) pci_disable_sriov(pdev); if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); err_free_dev: kfree(priv); err_release_regions: pci_release_regions(pdev); err_disable_pdev: pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); return err; } static int __devinit mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { device_set_desc(pdev->dev.bsddev, mlx4_version); return __mlx4_init_one(pdev, id->driver_data); } static void mlx4_remove_one(struct pci_dev *pdev) { struct mlx4_dev *dev = pci_get_drvdata(pdev); struct mlx4_priv *priv = mlx4_priv(dev); int p; if (dev) { /* in SRIOV it is not allowed to unload the pf's * driver while there are alive vf's */ if (mlx4_is_master(dev)) { if (mlx4_how_many_lives_vf(dev)) mlx4_err(dev, "Removing PF when there are assigned VF's !!!\n"); } mlx4_stop_sense(dev); mlx4_unregister_device(dev); mlx4_cleanup_hca_info(&priv->hca_info); for (p = 1; p <= dev->caps.num_ports; p++) { mlx4_cleanup_port_info(&priv->port[p]); mlx4_CLOSE_PORT(dev, p); } if (mlx4_is_master(dev)) mlx4_free_resource_tracker(dev, RES_TR_FREE_SLAVES_ONLY); mlx4_cleanup_counters_table(dev); mlx4_cleanup_qp_table(dev); mlx4_cleanup_srq_table(dev); mlx4_cleanup_cq_table(dev); mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); mlx4_cleanup_mcg_table(dev); mlx4_cleanup_mr_table(dev); mlx4_cleanup_xrcd_table(dev); mlx4_cleanup_pd_table(dev); if (mlx4_is_master(dev)) mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY); iounmap(priv->kar); mlx4_uar_free(dev, &priv->driver_uar); mlx4_cleanup_uar_table(dev); if (!mlx4_is_slave(dev)) mlx4_clear_steering(dev); mlx4_free_eq_table(dev); if (mlx4_is_master(dev)) mlx4_multi_func_cleanup(dev); mlx4_close_hca(dev); if (mlx4_is_slave(dev)) mlx4_multi_func_cleanup(dev); mlx4_cmd_cleanup(dev); if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); if (dev->flags & MLX4_FLAG_SRIOV) { mlx4_warn(dev, "Disabling SR-IOV\n"); pci_disable_sriov(pdev); } if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); kfree(priv); pci_release_regions(pdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } } static int restore_current_port_types(struct mlx4_dev *dev, enum mlx4_port_type *types, enum mlx4_port_type *poss_types) { struct mlx4_priv *priv = mlx4_priv(dev); int err, i; mlx4_stop_sense(dev); mutex_lock(&priv->port_mutex); for (i = 0; i < dev->caps.num_ports; i++) dev->caps.possible_type[i + 1] = poss_types[i]; err = mlx4_change_port_types(dev, types); mlx4_start_sense(dev); mutex_unlock(&priv->port_mutex); return err; } int mlx4_restart_one(struct pci_dev *pdev) { struct mlx4_dev *dev = pci_get_drvdata(pdev); struct mlx4_priv *priv = mlx4_priv(dev); enum mlx4_port_type curr_type[MLX4_MAX_PORTS]; enum mlx4_port_type poss_type[MLX4_MAX_PORTS]; int pci_dev_data, err, i; pci_dev_data = priv->pci_dev_data; for (i = 0; i < dev->caps.num_ports; i++) { curr_type[i] = dev->caps.port_type[i + 1]; poss_type[i] = dev->caps.possible_type[i + 1]; } mlx4_remove_one(pdev); err = __mlx4_init_one(pdev, pci_dev_data); if (err) return err; dev = pci_get_drvdata(pdev); err = restore_current_port_types(dev, curr_type, poss_type); if (err) mlx4_err(dev, "mlx4_restart_one: could not restore original port types (%d)\n", err); return 0; } static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { /* MT25408 "Hermon" SDR */ { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" DDR */ { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" QDR */ { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" DDR PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" QDR PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" EN 10GigE */ { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */ { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26468 ConnectX EN 10GigE PCIe gen2*/ { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26478 ConnectX2 40GigE PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25400 Family [ConnectX-2 Virtual Function] */ { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF }, /* MT27500 Family [ConnectX-3] */ { PCI_VDEVICE(MELLANOX, 0x1003), 0 }, /* MT27500 Family [ConnectX-3 Virtual Function] */ { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF }, { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */ { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */ { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */ { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */ { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */ { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */ { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */ { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */ { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */ { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */ { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */ { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */ { 0, } }; MODULE_DEVICE_TABLE(pci, mlx4_pci_table); static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { mlx4_remove_one(pdev); return state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; } static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) { int ret = __mlx4_init_one(pdev, 0); return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } static const struct pci_error_handlers mlx4_err_handler = { .error_detected = mlx4_pci_err_detected, .slot_reset = mlx4_pci_slot_reset, }; static int suspend(struct pci_dev *pdev, pm_message_t state) { mlx4_remove_one(pdev); return 0; } static int resume(struct pci_dev *pdev) { return __mlx4_init_one(pdev, 0); } static struct pci_driver mlx4_driver = { .name = DRV_NAME, .id_table = mlx4_pci_table, .probe = mlx4_init_one, .remove = __devexit_p(mlx4_remove_one), .suspend = suspend, .resume = resume, .err_handler = &mlx4_err_handler, }; static int __init mlx4_verify_params(void) { int status; status = update_defaults(&port_type_array); if (status == INVALID_STR) { if (mlx4_fill_dbdf2val_tbl(&port_type_array.dbdf2val)) return -1; } else if (status == INVALID_DATA) { return -1; } status = update_defaults(&num_vfs); if (status == INVALID_STR) { if (mlx4_fill_dbdf2val_tbl(&num_vfs.dbdf2val)) return -1; } else if (status == INVALID_DATA) { return -1; } status = update_defaults(&probe_vf); if (status == INVALID_STR) { if (mlx4_fill_dbdf2val_tbl(&probe_vf.dbdf2val)) return -1; } else if (status == INVALID_DATA) { return -1; } if (msi_x < 0) { pr_warn("mlx4_core: bad msi_x: %d\n", msi_x); return -1; } if ((log_num_mac < 0) || (log_num_mac > 7)) { pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac); return -1; } if (log_num_vlan != 0) pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n", MLX4_LOG_NUM_VLANS); if (mlx4_set_4k_mtu != -1) pr_warning("mlx4_core: set_4k_mtu - obsolete module param\n"); if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) { pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg); return -1; } if (mlx4_log_num_mgm_entry_size != -1 && (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not " "in legal range (-1 or %d..%d)\n", mlx4_log_num_mgm_entry_size, MLX4_MIN_MGM_LOG_ENTRY_SIZE, MLX4_MAX_MGM_LOG_ENTRY_SIZE); return -1; } if (mod_param_profile.num_qp < 18 || mod_param_profile.num_qp > 23) { pr_warning("mlx4_core: bad log_num_qp: %d\n", mod_param_profile.num_qp); return -1; } if (mod_param_profile.num_srq < 10) { pr_warning("mlx4_core: too low log_num_srq: %d\n", mod_param_profile.num_srq); return -1; } if (mod_param_profile.num_cq < 10) { pr_warning("mlx4_core: too low log_num_cq: %d\n", mod_param_profile.num_cq); return -1; } if (mod_param_profile.num_mpt < 10) { pr_warning("mlx4_core: too low log_num_mpt: %d\n", mod_param_profile.num_mpt); return -1; } if (mod_param_profile.num_mtt_segs && mod_param_profile.num_mtt_segs < 15) { pr_warning("mlx4_core: too low log_num_mtt: %d\n", mod_param_profile.num_mtt_segs); return -1; } if (mod_param_profile.num_mtt_segs > MLX4_MAX_LOG_NUM_MTT) { pr_warning("mlx4_core: too high log_num_mtt: %d\n", mod_param_profile.num_mtt_segs); return -1; } return 0; } static int __init mlx4_init(void) { int ret; if (mlx4_verify_params()) return -EINVAL; mlx4_catas_init(); mlx4_wq = create_singlethread_workqueue("mlx4"); if (!mlx4_wq) return -ENOMEM; if (enable_sys_tune) sys_tune_init(); ret = pci_register_driver(&mlx4_driver); if (ret < 0) goto err; return 0; err: if (enable_sys_tune) sys_tune_fini(); destroy_workqueue(mlx4_wq); return ret; } static void __exit mlx4_cleanup(void) { if (enable_sys_tune) sys_tune_fini(); pci_unregister_driver(&mlx4_driver); destroy_workqueue(mlx4_wq); } module_init_order(mlx4_init, SI_ORDER_MIDDLE); module_exit(mlx4_cleanup); static int mlx4_evhand(module_t mod, int event, void *arg) { return (0); } static moduledata_t mlx4_mod = { .name = "mlx4", .evhand = mlx4_evhand, }; MODULE_VERSION(mlx4, 1); DECLARE_MODULE(mlx4, mlx4_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY); MODULE_DEPEND(mlx4, linuxkpi, 1, 1, 1); Index: head/sys/ofed/drivers/net/mlx4/mlx4.h =================================================================== --- head/sys/ofed/drivers/net/mlx4/mlx4.h (revision 299178) +++ head/sys/ofed/drivers/net/mlx4/mlx4.h (revision 299179) @@ -1,1328 +1,1328 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef MLX4_H #define MLX4_H #include #include #include #include #include #include #include #include #include #include #include #define DRV_NAME "mlx4_core" #define PFX DRV_NAME ": " #define DRV_VERSION "2.1.6" #define DRV_RELDATE __DATE__ #define DRV_STACK_NAME "Linux-MLNX_OFED" #define DRV_STACK_VERSION "2.1" #define DRV_NAME_FOR_FW DRV_STACK_NAME","DRV_STACK_VERSION #define MLX4_FS_UDP_UC_EN (1 << 1) #define MLX4_FS_TCP_UC_EN (1 << 2) #define MLX4_FS_NUM_OF_L2_ADDR 8 #define MLX4_FS_MGM_LOG_ENTRY_SIZE 7 #define MLX4_FS_NUM_MCG (1 << 17) struct mlx4_set_port_prio2tc_context { u8 prio2tc[4]; }; struct mlx4_port_scheduler_tc_cfg_be { __be16 pg; __be16 bw_precentage; __be16 max_bw_units; /* 3-100Mbps, 4-1Gbps, other values - reserved */ __be16 max_bw_value; }; struct mlx4_set_port_scheduler_context { struct mlx4_port_scheduler_tc_cfg_be tc[MLX4_NUM_TC]; }; enum { MLX4_HCR_BASE = 0x80680, MLX4_HCR_SIZE = 0x0001c, MLX4_CLR_INT_SIZE = 0x00008, MLX4_SLAVE_COMM_BASE = 0x0, MLX4_COMM_PAGESIZE = 0x1000, MLX4_CLOCK_SIZE = 0x00008 }; enum { MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE = 10, MLX4_MIN_MGM_LOG_ENTRY_SIZE = 7, MLX4_MAX_MGM_LOG_ENTRY_SIZE = 12, MLX4_MAX_QP_PER_MGM = 4 * ((1 << MLX4_MAX_MGM_LOG_ENTRY_SIZE)/16 - 2), }; enum { MLX4_NUM_PDS = 1 << 15 }; enum { MLX4_CMPT_TYPE_QP = 0, MLX4_CMPT_TYPE_SRQ = 1, MLX4_CMPT_TYPE_CQ = 2, MLX4_CMPT_TYPE_EQ = 3, MLX4_CMPT_NUM_TYPE }; enum { MLX4_CMPT_SHIFT = 24, MLX4_NUM_CMPTS = MLX4_CMPT_NUM_TYPE << MLX4_CMPT_SHIFT }; enum mlx4_mpt_state { MLX4_MPT_DISABLED = 0, MLX4_MPT_EN_HW, MLX4_MPT_EN_SW }; #define MLX4_COMM_TIME 10000 enum { MLX4_COMM_CMD_RESET, MLX4_COMM_CMD_VHCR0, MLX4_COMM_CMD_VHCR1, MLX4_COMM_CMD_VHCR2, MLX4_COMM_CMD_VHCR_EN, MLX4_COMM_CMD_VHCR_POST, MLX4_COMM_CMD_FLR = 254 }; /*The flag indicates that the slave should delay the RESET cmd*/ #define MLX4_DELAY_RESET_SLAVE 0xbbbbbbb /*indicates how many retries will be done if we are in the middle of FLR*/ #define NUM_OF_RESET_RETRIES 10 #define SLEEP_TIME_IN_RESET (2 * 1000) enum mlx4_resource { RES_QP, RES_CQ, RES_SRQ, RES_XRCD, RES_MPT, RES_MTT, RES_MAC, RES_VLAN, RES_NPORT_ID, RES_COUNTER, RES_FS_RULE, RES_EQ, MLX4_NUM_OF_RESOURCE_TYPE }; enum mlx4_alloc_mode { RES_OP_RESERVE, RES_OP_RESERVE_AND_MAP, RES_OP_MAP_ICM, }; enum mlx4_res_tracker_free_type { RES_TR_FREE_ALL, RES_TR_FREE_SLAVES_ONLY, RES_TR_FREE_STRUCTS_ONLY, }; /* *Virtual HCR structures. - * mlx4_vhcr is the sw representation, in machine endianess + * mlx4_vhcr is the sw representation, in machine endianness * * mlx4_vhcr_cmd is the formalized structure, the one that is passed * to FW to go through communication channel. * It is big endian, and has the same structure as the physical HCR * used by command interface */ struct mlx4_vhcr { u64 in_param; u64 out_param; u32 in_modifier; u32 errno; u16 op; u16 token; u8 op_modifier; u8 e_bit; }; struct mlx4_vhcr_cmd { __be64 in_param; __be32 in_modifier; u32 reserved1; __be64 out_param; __be16 token; u16 reserved; u8 status; u8 flags; __be16 opcode; } __packed; struct mlx4_cmd_info { u16 opcode; bool has_inbox; bool has_outbox; bool out_is_imm; bool encode_slave_id; bool skip_err_print; int (*verify)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox); int (*wrapper)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); }; enum { MLX4_DEBUG_MASK_CMD_TIME = 0x100, }; #ifdef CONFIG_MLX4_DEBUG extern int mlx4_debug_level; #else /* CONFIG_MLX4_DEBUG */ #define mlx4_debug_level (0) #endif /* CONFIG_MLX4_DEBUG */ #define mlx4_dbg(mdev, format, arg...) \ do { \ if (mlx4_debug_level) \ dev_printk(KERN_DEBUG, &mdev->pdev->dev, format, ##arg); \ } while (0) #define mlx4_err(mdev, format, arg...) \ dev_err(&mdev->pdev->dev, format, ##arg) #define mlx4_info(mdev, format, arg...) \ dev_info(&mdev->pdev->dev, format, ##arg) #define mlx4_warn(mdev, format, arg...) \ dev_warn(&mdev->pdev->dev, format, ##arg) extern int mlx4_log_num_mgm_entry_size; extern int log_mtts_per_seg; extern int mlx4_blck_lb; extern int mlx4_set_4k_mtu; #define MLX4_MAX_NUM_SLAVES (MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF) #define ALL_SLAVES 0xff struct mlx4_bitmap { u32 last; u32 top; u32 max; u32 reserved_top; u32 mask; u32 avail; spinlock_t lock; unsigned long *table; }; struct mlx4_buddy { unsigned long **bits; unsigned int *num_free; u32 max_order; spinlock_t lock; }; struct mlx4_icm; struct mlx4_icm_table { u64 virt; int num_icm; u32 num_obj; int obj_size; int lowmem; int coherent; struct mutex mutex; struct mlx4_icm **icm; }; #define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28) #define MLX4_MPT_FLAG_FREE (0x3UL << 28) #define MLX4_MPT_FLAG_MIO (1 << 17) #define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15) #define MLX4_MPT_FLAG_PHYSICAL (1 << 9) #define MLX4_MPT_FLAG_REGION (1 << 8) #define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27) #define MLX4_MPT_PD_FLAG_RAE (1 << 28) #define MLX4_MPT_PD_FLAG_EN_INV (3 << 24) #define MLX4_MPT_QP_FLAG_BOUND_QP (1 << 7) #define MLX4_MPT_STATUS_SW 0xF0 #define MLX4_MPT_STATUS_HW 0x00 /* * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. */ struct mlx4_mpt_entry { __be32 flags; __be32 qpn; __be32 key; __be32 pd_flags; __be64 start; __be64 length; __be32 lkey; __be32 win_cnt; u8 reserved1[3]; u8 mtt_rep; __be64 mtt_addr; __be32 mtt_sz; __be32 entity_size; __be32 first_byte_offset; } __packed; /* * Must be packed because start is 64 bits but only aligned to 32 bits. */ struct mlx4_eq_context { __be32 flags; u16 reserved1[3]; __be16 page_offset; u8 log_eq_size; u8 reserved2[4]; u8 eq_period; u8 reserved3; u8 eq_max_count; u8 reserved4[3]; u8 intr; u8 log_page_size; u8 reserved5[2]; u8 mtt_base_addr_h; __be32 mtt_base_addr_l; u32 reserved6[2]; __be32 consumer_index; __be32 producer_index; u32 reserved7[4]; }; struct mlx4_cq_context { __be32 flags; u16 reserved1[3]; __be16 page_offset; __be32 logsize_usrpage; __be16 cq_period; __be16 cq_max_count; u8 reserved2[3]; u8 comp_eqn; u8 log_page_size; u8 reserved3[2]; u8 mtt_base_addr_h; __be32 mtt_base_addr_l; __be32 last_notified_index; __be32 solicit_producer_index; __be32 consumer_index; __be32 producer_index; u32 reserved4[2]; __be64 db_rec_addr; }; struct mlx4_srq_context { __be32 state_logsize_srqn; u8 logstride; u8 reserved1; __be16 xrcd; __be32 pg_offset_cqn; u32 reserved2; u8 log_page_size; u8 reserved3[2]; u8 mtt_base_addr_h; __be32 mtt_base_addr_l; __be32 pd; __be16 limit_watermark; __be16 wqe_cnt; u16 reserved4; __be16 wqe_counter; u32 reserved5; __be64 db_rec_addr; }; struct mlx4_eq { struct mlx4_dev *dev; void __iomem *doorbell; int eqn; u32 cons_index; u16 irq; u16 have_irq; int nent; struct mlx4_buf_list *page_list; struct mlx4_mtt mtt; }; struct mlx4_slave_eqe { u8 type; u8 port; u32 param; }; struct mlx4_slave_event_eq_info { int eqn; u16 token; }; struct mlx4_profile { int num_qp; int rdmarc_per_qp; int num_srq; int num_cq; int num_mcg; int num_mpt; unsigned num_mtt_segs; }; struct mlx4_fw { u64 clr_int_base; u64 catas_offset; u64 comm_base; u64 clock_offset; struct mlx4_icm *fw_icm; struct mlx4_icm *aux_icm; u32 catas_size; u16 fw_pages; u8 clr_int_bar; u8 catas_bar; u8 comm_bar; u8 clock_bar; }; struct mlx4_comm { u32 slave_write; u32 slave_read; }; enum { MLX4_MCAST_CONFIG = 0, MLX4_MCAST_DISABLE = 1, MLX4_MCAST_ENABLE = 2, }; #define VLAN_FLTR_SIZE 128 struct mlx4_vlan_fltr { __be32 entry[VLAN_FLTR_SIZE]; }; struct mlx4_mcast_entry { struct list_head list; u64 addr; }; struct mlx4_promisc_qp { struct list_head list; u32 qpn; }; struct mlx4_steer_index { struct list_head list; unsigned int index; struct list_head duplicates; }; #define MLX4_EVENT_TYPES_NUM 64 struct mlx4_slave_state { u8 comm_toggle; u8 last_cmd; u8 init_port_mask; bool active; bool old_vlan_api; u8 function; dma_addr_t vhcr_dma; u16 mtu[MLX4_MAX_PORTS + 1]; __be32 ib_cap_mask[MLX4_MAX_PORTS + 1]; struct mlx4_slave_eqe eq[MLX4_MFUNC_MAX_EQES]; struct list_head mcast_filters[MLX4_MAX_PORTS + 1]; struct mlx4_vlan_fltr *vlan_filter[MLX4_MAX_PORTS + 1]; /* event type to eq number lookup */ struct mlx4_slave_event_eq_info event_eq[MLX4_EVENT_TYPES_NUM]; u16 eq_pi; u16 eq_ci; spinlock_t lock; /*initialized via the kzalloc*/ u8 is_slave_going_down; u32 cookie; enum slave_port_state port_state[MLX4_MAX_PORTS + 1]; }; #define MLX4_VGT 4095 #define NO_INDX (-1) struct mlx4_vport_state { u64 mac; u16 default_vlan; u8 default_qos; u32 tx_rate; bool spoofchk; u32 link_state; }; struct mlx4_vf_admin_state { struct mlx4_vport_state vport[MLX4_MAX_PORTS + 1]; }; struct mlx4_vport_oper_state { struct mlx4_vport_state state; int mac_idx; int vlan_idx; }; struct mlx4_vf_oper_state { struct mlx4_vport_oper_state vport[MLX4_MAX_PORTS + 1]; }; struct slave_list { struct mutex mutex; struct list_head res_list[MLX4_NUM_OF_RESOURCE_TYPE]; }; struct resource_allocator { spinlock_t alloc_lock; union { int res_reserved; int res_port_rsvd[MLX4_MAX_PORTS]; }; union { int res_free; int res_port_free[MLX4_MAX_PORTS]; }; int *quota; int *allocated; int *guaranteed; }; struct mlx4_resource_tracker { spinlock_t lock; /* tree for each resources */ struct rb_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE]; /* num_of_slave's lists, one per slave */ struct slave_list *slave_list; struct resource_allocator res_alloc[MLX4_NUM_OF_RESOURCE_TYPE]; }; #define SLAVE_EVENT_EQ_SIZE 128 struct mlx4_slave_event_eq { u32 eqn; u32 cons; u32 prod; spinlock_t event_lock; struct mlx4_eqe event_eqe[SLAVE_EVENT_EQ_SIZE]; }; struct mlx4_master_qp0_state { int proxy_qp0_active; int qp0_active; int port_active; }; struct mlx4_mfunc_master_ctx { struct mlx4_slave_state *slave_state; struct mlx4_vf_admin_state *vf_admin; struct mlx4_vf_oper_state *vf_oper; struct mlx4_master_qp0_state qp0_state[MLX4_MAX_PORTS + 1]; int init_port_ref[MLX4_MAX_PORTS + 1]; u16 max_mtu[MLX4_MAX_PORTS + 1]; int disable_mcast_ref[MLX4_MAX_PORTS + 1]; struct mlx4_resource_tracker res_tracker; struct workqueue_struct *comm_wq; struct work_struct comm_work; struct work_struct arm_comm_work; struct work_struct slave_event_work; struct work_struct slave_flr_event_work; spinlock_t slave_state_lock; __be32 comm_arm_bit_vector[4]; struct mlx4_eqe cmd_eqe; struct mlx4_slave_event_eq slave_eq; struct mutex gen_eqe_mutex[MLX4_MFUNC_MAX]; }; struct mlx4_mfunc { struct mlx4_comm __iomem *comm; struct mlx4_vhcr_cmd *vhcr; dma_addr_t vhcr_dma; struct mlx4_mfunc_master_ctx master; }; #define MGM_QPN_MASK 0x00FFFFFF #define MGM_BLCK_LB_BIT 30 struct mlx4_mgm { __be32 next_gid_index; __be32 members_count; u32 reserved[2]; u8 gid[16]; __be32 qp[MLX4_MAX_QP_PER_MGM]; }; struct mlx4_cmd { struct pci_pool *pool; void __iomem *hcr; struct mutex hcr_mutex; struct mutex slave_cmd_mutex; struct semaphore poll_sem; struct semaphore event_sem; int max_cmds; spinlock_t context_lock; int free_head; struct mlx4_cmd_context *context; u16 token_mask; u8 use_events; u8 toggle; u8 comm_toggle; }; enum { MLX4_VF_IMMED_VLAN_FLAG_VLAN = 1 << 0, MLX4_VF_IMMED_VLAN_FLAG_QOS = 1 << 1, }; struct mlx4_vf_immed_vlan_work { struct work_struct work; struct mlx4_priv *priv; int flags; int slave; int vlan_ix; int orig_vlan_ix; u8 port; u8 qos; u16 vlan_id; u16 orig_vlan_id; }; struct mlx4_uar_table { struct mlx4_bitmap bitmap; }; struct mlx4_mr_table { struct mlx4_bitmap mpt_bitmap; struct mlx4_buddy mtt_buddy; u64 mtt_base; u64 mpt_base; struct mlx4_icm_table mtt_table; struct mlx4_icm_table dmpt_table; }; struct mlx4_cq_table { struct mlx4_bitmap bitmap; spinlock_t lock; rwlock_t cq_table_lock; struct radix_tree_root tree; struct mlx4_icm_table table; struct mlx4_icm_table cmpt_table; }; struct mlx4_eq_table { struct mlx4_bitmap bitmap; char *irq_names; void __iomem *clr_int; void __iomem **uar_map; u32 clr_mask; struct mlx4_eq *eq; struct mlx4_icm_table table; struct mlx4_icm_table cmpt_table; int have_irq; u8 inta_pin; }; struct mlx4_srq_table { struct mlx4_bitmap bitmap; spinlock_t lock; struct radix_tree_root tree; struct mlx4_icm_table table; struct mlx4_icm_table cmpt_table; }; struct mlx4_qp_table { struct mlx4_bitmap bitmap; u32 rdmarc_base; int rdmarc_shift; spinlock_t lock; struct mlx4_icm_table qp_table; struct mlx4_icm_table auxc_table; struct mlx4_icm_table altc_table; struct mlx4_icm_table rdmarc_table; struct mlx4_icm_table cmpt_table; }; struct mlx4_mcg_table { struct mutex mutex; struct mlx4_bitmap bitmap; struct mlx4_icm_table table; }; struct mlx4_catas_err { u32 __iomem *map; struct timer_list timer; struct list_head list; }; #define MLX4_MAX_MAC_NUM 128 #define MLX4_MAC_TABLE_SIZE (MLX4_MAX_MAC_NUM << 3) struct mlx4_mac_table { __be64 entries[MLX4_MAX_MAC_NUM]; int refs[MLX4_MAX_MAC_NUM]; struct mutex mutex; int total; int max; }; #define MLX4_MAX_VLAN_NUM 128 #define MLX4_VLAN_TABLE_SIZE (MLX4_MAX_VLAN_NUM << 2) struct mlx4_vlan_table { __be32 entries[MLX4_MAX_VLAN_NUM]; int refs[MLX4_MAX_VLAN_NUM]; struct mutex mutex; int total; int max; }; #define SET_PORT_GEN_ALL_VALID 0x7 #define SET_PORT_PROMISC_SHIFT 31 #define SET_PORT_MC_PROMISC_SHIFT 30 enum { MCAST_DIRECT_ONLY = 0, MCAST_DIRECT = 1, MCAST_DEFAULT = 2 }; struct mlx4_set_port_general_context { u8 reserved[3]; u8 flags; u16 reserved2; __be16 mtu; u8 pptx; u8 pfctx; u16 reserved3; u8 pprx; u8 pfcrx; u16 reserved4; }; struct mlx4_set_port_rqp_calc_context { __be32 base_qpn; u8 rererved; u8 n_mac; u8 n_vlan; u8 n_prio; u8 reserved2[3]; u8 mac_miss; u8 intra_no_vlan; u8 no_vlan; u8 intra_vlan_miss; u8 vlan_miss; u8 reserved3[3]; u8 no_vlan_prio; __be32 promisc; __be32 mcast; }; struct mlx4_hca_info { struct mlx4_dev *dev; struct device_attribute firmware_attr; struct device_attribute hca_attr; struct device_attribute board_attr; }; struct mlx4_port_info { struct mlx4_dev *dev; int port; char dev_name[16]; struct device_attribute port_attr; enum mlx4_port_type tmp_type; char dev_mtu_name[16]; struct device_attribute port_mtu_attr; struct mlx4_mac_table mac_table; struct mlx4_vlan_table vlan_table; int base_qpn; }; struct mlx4_sense { struct mlx4_dev *dev; u8 do_sense_port[MLX4_MAX_PORTS + 1]; u8 sense_allowed[MLX4_MAX_PORTS + 1]; struct delayed_work sense_poll; }; struct mlx4_msix_ctl { u64 pool_bm; struct mutex pool_lock; }; struct mlx4_steer { struct list_head promisc_qps[MLX4_NUM_STEERS]; struct list_head steer_entries[MLX4_NUM_STEERS]; }; enum { MLX4_PCI_DEV_IS_VF = 1 << 0, MLX4_PCI_DEV_FORCE_SENSE_PORT = 1 << 1, }; struct mlx4_roce_gid_entry { u8 raw[16]; }; struct counter_index { struct list_head list; u32 index; }; struct mlx4_counters { struct mlx4_bitmap bitmap; struct list_head global_port_list[MLX4_MAX_PORTS]; struct list_head vf_list[MLX4_MAX_NUM_VF][MLX4_MAX_PORTS]; struct mutex mutex; }; enum { MLX4_NO_RR = 0, MLX4_USE_RR = 1, }; struct mlx4_priv { struct mlx4_dev dev; struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; int pci_dev_data; struct list_head pgdir_list; struct mutex pgdir_mutex; struct mlx4_fw fw; struct mlx4_cmd cmd; struct mlx4_mfunc mfunc; struct mlx4_bitmap pd_bitmap; struct mlx4_bitmap xrcd_bitmap; struct mlx4_uar_table uar_table; struct mlx4_mr_table mr_table; struct mlx4_cq_table cq_table; struct mlx4_eq_table eq_table; struct mlx4_srq_table srq_table; struct mlx4_qp_table qp_table; struct mlx4_mcg_table mcg_table; struct mlx4_counters counters_table; struct mlx4_catas_err catas_err; void __iomem *clr_base; struct mlx4_uar driver_uar; void __iomem *kar; struct mlx4_port_info port[MLX4_MAX_PORTS + 1]; struct mlx4_hca_info hca_info; struct mlx4_sense sense; struct mutex port_mutex; struct mlx4_msix_ctl msix_ctl; struct mlx4_steer *steer; struct list_head bf_list; struct mutex bf_mutex; struct io_mapping *bf_mapping; void __iomem *clock_mapping; int reserved_mtts; int fs_hash_mode; u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; __be64 slave_node_guids[MLX4_MFUNC_MAX]; struct mlx4_roce_gid_entry roce_gids[MLX4_MAX_PORTS][128]; atomic_t opreq_count; struct work_struct opreq_task; }; static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) { return container_of(dev, struct mlx4_priv, dev); } #define MLX4_SENSE_RANGE (HZ * 3) extern struct workqueue_struct *mlx4_wq; u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap); void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr); u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align, u32 skip_mask); void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt, int use_rr); u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap); int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved_bot, u32 resetrved_top); void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap); int mlx4_reset(struct mlx4_dev *dev); int mlx4_alloc_eq_table(struct mlx4_dev *dev); void mlx4_free_eq_table(struct mlx4_dev *dev); int mlx4_init_pd_table(struct mlx4_dev *dev); int mlx4_init_xrcd_table(struct mlx4_dev *dev); int mlx4_init_uar_table(struct mlx4_dev *dev); int mlx4_init_mr_table(struct mlx4_dev *dev); int mlx4_init_eq_table(struct mlx4_dev *dev); int mlx4_init_cq_table(struct mlx4_dev *dev); int mlx4_init_qp_table(struct mlx4_dev *dev); int mlx4_init_srq_table(struct mlx4_dev *dev); int mlx4_init_mcg_table(struct mlx4_dev *dev); void mlx4_cleanup_pd_table(struct mlx4_dev *dev); void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev); void mlx4_cleanup_uar_table(struct mlx4_dev *dev); void mlx4_cleanup_mr_table(struct mlx4_dev *dev); void mlx4_cleanup_eq_table(struct mlx4_dev *dev); void mlx4_cleanup_cq_table(struct mlx4_dev *dev); void mlx4_cleanup_qp_table(struct mlx4_dev *dev); void mlx4_cleanup_srq_table(struct mlx4_dev *dev); void mlx4_cleanup_mcg_table(struct mlx4_dev *dev); int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn); void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn); int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn); void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn); int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn); void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn); int __mlx4_mpt_reserve(struct mlx4_dev *dev); void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index); int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index); void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index); u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order); void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order); int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_SYNC_TPT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_HW2SW_MPT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base, u8 flags); void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac); void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac); int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); int __mlx4_counter_alloc(struct mlx4_dev *dev, int slave, int port, u32 *idx); void __mlx4_counter_free(struct mlx4_dev *dev, int slave, int port, u32 idx); int __mlx4_slave_counters_free(struct mlx4_dev *dev, int slave); int __mlx4_clear_if_stat(struct mlx4_dev *dev, u8 counter_index); u8 mlx4_get_default_counter_index(struct mlx4_dev *dev, int slave, int port); int __mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn); void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn); void mlx4_start_catas_poll(struct mlx4_dev *dev); void mlx4_stop_catas_poll(struct mlx4_dev *dev); void mlx4_catas_init(void); int mlx4_restart_one(struct pci_dev *pdev); int mlx4_register_device(struct mlx4_dev *dev); void mlx4_unregister_device(struct mlx4_dev *dev); void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, unsigned long param); struct mlx4_dev_cap; struct mlx4_init_hca_param; u64 mlx4_make_profile(struct mlx4_dev *dev, struct mlx4_profile *request, struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *init_hca); void mlx4_master_comm_channel(struct work_struct *work); void mlx4_master_arm_comm_channel(struct work_struct *work); void mlx4_gen_slave_eqe(struct work_struct *work); void mlx4_master_handle_slave_flr(struct work_struct *work); int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_COMM_INT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_HW2SW_EQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_QUERY_EQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_HW2SW_CQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_QUERY_CQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_MODIFY_CQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_HW2SW_SRQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_QUERY_SRQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_ARM_SRQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_GEN_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_2ERR_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_RTS2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_QUERY_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe); int mlx4_cmd_init(struct mlx4_dev *dev); void mlx4_cmd_cleanup(struct mlx4_dev *dev); int mlx4_multi_func_init(struct mlx4_dev *dev); void mlx4_multi_func_cleanup(struct mlx4_dev *dev); void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param); int mlx4_cmd_use_events(struct mlx4_dev *dev); void mlx4_cmd_use_polling(struct mlx4_dev *dev); int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, unsigned long timeout); void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn); void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type); void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type); void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type); void mlx4_handle_catas_err(struct mlx4_dev *dev); int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port, enum mlx4_port_type *type); void mlx4_do_sense_ports(struct mlx4_dev *dev, enum mlx4_port_type *stype, enum mlx4_port_type *defaults); void mlx4_start_sense(struct mlx4_dev *dev); void mlx4_stop_sense(struct mlx4_dev *dev); void mlx4_sense_init(struct mlx4_dev *dev); int mlx4_check_port_params(struct mlx4_dev *dev, enum mlx4_port_type *port_type); int mlx4_change_port_types(struct mlx4_dev *dev, enum mlx4_port_type *port_types); void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table); void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table); void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz); /* resource tracker functions*/ int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev, enum mlx4_resource resource_type, u64 resource_id, int *slave); void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave_id); int mlx4_init_resource_tracker(struct mlx4_dev *dev); void mlx4_free_resource_tracker(struct mlx4_dev *dev, enum mlx4_res_tracker_free_type type); int mlx4_QUERY_FW_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps); int mlx4_get_slave_pkey_gid_tbl_len(struct mlx4_dev *dev, u8 port, int *gid_tbl_len, int *pkey_tbl_len); int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot, enum mlx4_steer_type steer); int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot, enum mlx4_steer_type steer); int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], u8 port, int block_mcast_loopback, enum mlx4_protocol prot, u64 *reg_id); int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode); int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_common_set_vlan_fltr(struct mlx4_dev *dev, int function, int port, void *buf); int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_PKEY_TABLE_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_MOD_STAT_CFG_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int mlx4_get_mgm_entry_size(struct mlx4_dev *dev); int mlx4_get_qp_per_mgm(struct mlx4_dev *dev); static inline void set_param_l(u64 *arg, u32 val) { *arg = (*arg & 0xffffffff00000000ULL) | (u64) val; } static inline void set_param_h(u64 *arg, u32 val) { *arg = (*arg & 0xffffffff) | ((u64) val << 32); } static inline u32 get_param_l(u64 *arg) { return (u32) (*arg & 0xffffffff); } static inline u32 get_param_h(u64 *arg) { return (u32)(*arg >> 32); } static inline spinlock_t *mlx4_tlock(struct mlx4_dev *dev) { return &mlx4_priv(dev)->mfunc.master.res_tracker.lock; } #define NOT_MASKED_PD_BITS 17 void sys_tune_init(void); void sys_tune_fini(void); void mlx4_init_quotas(struct mlx4_dev *dev); int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave); int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave); void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work); #endif /* MLX4_H */ Index: head/sys/ofed/drivers/net/mlx4/resource_tracker.c =================================================================== --- head/sys/ofed/drivers/net/mlx4/resource_tracker.c (revision 299178) +++ head/sys/ofed/drivers/net/mlx4/resource_tracker.c (revision 299179) @@ -1,4686 +1,4686 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. * All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include "mlx4.h" #include "fw.h" #define MLX4_MAC_VALID (1ull << 63) struct mac_res { struct list_head list; u64 mac; int ref_count; u8 smac_index; u8 port; }; struct vlan_res { struct list_head list; u16 vlan; int ref_count; int vlan_index; u8 port; }; struct res_common { struct list_head list; struct rb_node node; u64 res_id; int owner; int state; int from_state; int to_state; int removing; }; enum { RES_ANY_BUSY = 1 }; struct res_gid { struct list_head list; u8 gid[16]; enum mlx4_protocol prot; enum mlx4_steer_type steer; u64 reg_id; }; enum res_qp_states { RES_QP_BUSY = RES_ANY_BUSY, /* QP number was allocated */ RES_QP_RESERVED, /* ICM memory for QP context was mapped */ RES_QP_MAPPED, /* QP is in hw ownership */ RES_QP_HW }; struct res_qp { struct res_common com; struct res_mtt *mtt; struct res_cq *rcq; struct res_cq *scq; struct res_srq *srq; struct list_head mcg_list; spinlock_t mcg_spl; int local_qpn; atomic_t ref_count; u32 qpc_flags; /* saved qp params before VST enforcement in order to restore on VGT */ u8 sched_queue; __be32 param3; u8 vlan_control; u8 fvl_rx; u8 pri_path_fl; u8 vlan_index; u8 feup; }; enum res_mtt_states { RES_MTT_BUSY = RES_ANY_BUSY, RES_MTT_ALLOCATED, }; static inline const char *mtt_states_str(enum res_mtt_states state) { switch (state) { case RES_MTT_BUSY: return "RES_MTT_BUSY"; case RES_MTT_ALLOCATED: return "RES_MTT_ALLOCATED"; default: return "Unknown"; } } struct res_mtt { struct res_common com; int order; atomic_t ref_count; }; enum res_mpt_states { RES_MPT_BUSY = RES_ANY_BUSY, RES_MPT_RESERVED, RES_MPT_MAPPED, RES_MPT_HW, }; struct res_mpt { struct res_common com; struct res_mtt *mtt; int key; }; enum res_eq_states { RES_EQ_BUSY = RES_ANY_BUSY, RES_EQ_RESERVED, RES_EQ_HW, }; struct res_eq { struct res_common com; struct res_mtt *mtt; }; enum res_cq_states { RES_CQ_BUSY = RES_ANY_BUSY, RES_CQ_ALLOCATED, RES_CQ_HW, }; struct res_cq { struct res_common com; struct res_mtt *mtt; atomic_t ref_count; }; enum res_srq_states { RES_SRQ_BUSY = RES_ANY_BUSY, RES_SRQ_ALLOCATED, RES_SRQ_HW, }; struct res_srq { struct res_common com; struct res_mtt *mtt; struct res_cq *cq; atomic_t ref_count; }; enum res_counter_states { RES_COUNTER_BUSY = RES_ANY_BUSY, RES_COUNTER_ALLOCATED, }; struct res_counter { struct res_common com; int port; }; enum res_xrcdn_states { RES_XRCD_BUSY = RES_ANY_BUSY, RES_XRCD_ALLOCATED, }; struct res_xrcdn { struct res_common com; int port; }; enum res_fs_rule_states { RES_FS_RULE_BUSY = RES_ANY_BUSY, RES_FS_RULE_ALLOCATED, }; struct res_fs_rule { struct res_common com; int qpn; }; static int mlx4_is_eth(struct mlx4_dev *dev, int port) { return dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB ? 0 : 1; } static void *res_tracker_lookup(struct rb_root *root, u64 res_id) { struct rb_node *node = root->rb_node; while (node) { struct res_common *res = container_of(node, struct res_common, node); if (res_id < res->res_id) node = node->rb_left; else if (res_id > res->res_id) node = node->rb_right; else return res; } return NULL; } static int res_tracker_insert(struct rb_root *root, struct res_common *res) { struct rb_node **new = &(root->rb_node), *parent = NULL; /* Figure out where to put new node */ while (*new) { struct res_common *this = container_of(*new, struct res_common, node); parent = *new; if (res->res_id < this->res_id) new = &((*new)->rb_left); else if (res->res_id > this->res_id) new = &((*new)->rb_right); else return -EEXIST; } /* Add new node and rebalance tree. */ rb_link_node(&res->node, parent, new); rb_insert_color(&res->node, root); return 0; } enum qp_transition { QP_TRANS_INIT2RTR, QP_TRANS_RTR2RTS, QP_TRANS_RTS2RTS, QP_TRANS_SQERR2RTS, QP_TRANS_SQD2SQD, QP_TRANS_SQD2RTS }; /* For Debug uses */ static const char *ResourceType(enum mlx4_resource rt) { switch (rt) { case RES_QP: return "RES_QP"; case RES_CQ: return "RES_CQ"; case RES_SRQ: return "RES_SRQ"; case RES_MPT: return "RES_MPT"; case RES_MTT: return "RES_MTT"; case RES_MAC: return "RES_MAC"; case RES_VLAN: return "RES_VLAN"; case RES_EQ: return "RES_EQ"; case RES_COUNTER: return "RES_COUNTER"; case RES_FS_RULE: return "RES_FS_RULE"; case RES_XRCD: return "RES_XRCD"; default: return "Unknown resource type !!!"; }; } static void rem_slave_vlans(struct mlx4_dev *dev, int slave); static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave, enum mlx4_resource res_type, int count, int port) { struct mlx4_priv *priv = mlx4_priv(dev); struct resource_allocator *res_alloc = &priv->mfunc.master.res_tracker.res_alloc[res_type]; int err = -EINVAL; int allocated, free, reserved, guaranteed, from_free; spin_lock(&res_alloc->alloc_lock); allocated = (port > 0) ? res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] : res_alloc->allocated[slave]; free = (port > 0) ? res_alloc->res_port_free[port - 1] : res_alloc->res_free; reserved = (port > 0) ? res_alloc->res_port_rsvd[port - 1] : res_alloc->res_reserved; guaranteed = res_alloc->guaranteed[slave]; if (allocated + count > res_alloc->quota[slave]) goto out; if (allocated + count <= guaranteed) { err = 0; } else { /* portion may need to be obtained from free area */ if (guaranteed - allocated > 0) from_free = count - (guaranteed - allocated); else from_free = count; if (free - from_free > reserved) err = 0; } if (!err) { /* grant the request */ if (port > 0) { res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] += count; res_alloc->res_port_free[port - 1] -= count; } else { res_alloc->allocated[slave] += count; res_alloc->res_free -= count; } } out: spin_unlock(&res_alloc->alloc_lock); return err; } static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave, enum mlx4_resource res_type, int count, int port) { struct mlx4_priv *priv = mlx4_priv(dev); struct resource_allocator *res_alloc = &priv->mfunc.master.res_tracker.res_alloc[res_type]; spin_lock(&res_alloc->alloc_lock); if (port > 0) { res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] -= count; res_alloc->res_port_free[port - 1] += count; } else { res_alloc->allocated[slave] -= count; res_alloc->res_free += count; } spin_unlock(&res_alloc->alloc_lock); return; } static inline void initialize_res_quotas(struct mlx4_dev *dev, struct resource_allocator *res_alloc, enum mlx4_resource res_type, int vf, int num_instances) { res_alloc->guaranteed[vf] = num_instances / (2 * (dev->num_vfs + 1)); res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf]; if (vf == mlx4_master_func_num(dev)) { res_alloc->res_free = num_instances; if (res_type == RES_MTT) { /* reserved mtts will be taken out of the PF allocation */ res_alloc->res_free += dev->caps.reserved_mtts; res_alloc->guaranteed[vf] += dev->caps.reserved_mtts; res_alloc->quota[vf] += dev->caps.reserved_mtts; } } } void mlx4_init_quotas(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int pf; /* quotas for VFs are initialized in mlx4_slave_cap */ if (mlx4_is_slave(dev)) return; if (!mlx4_is_mfunc(dev)) { dev->quotas.qp = dev->caps.num_qps - dev->caps.reserved_qps - mlx4_num_reserved_sqps(dev); dev->quotas.cq = dev->caps.num_cqs - dev->caps.reserved_cqs; dev->quotas.srq = dev->caps.num_srqs - dev->caps.reserved_srqs; dev->quotas.mtt = dev->caps.num_mtts - dev->caps.reserved_mtts; dev->quotas.mpt = dev->caps.num_mpts - dev->caps.reserved_mrws; return; } pf = mlx4_master_func_num(dev); dev->quotas.qp = priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[pf]; dev->quotas.cq = priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[pf]; dev->quotas.srq = priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[pf]; dev->quotas.mtt = priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[pf]; dev->quotas.mpt = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf]; } int mlx4_init_resource_tracker(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i, j; int t; priv->mfunc.master.res_tracker.slave_list = kzalloc(dev->num_slaves * sizeof(struct slave_list), GFP_KERNEL); if (!priv->mfunc.master.res_tracker.slave_list) return -ENOMEM; for (i = 0 ; i < dev->num_slaves; i++) { for (t = 0; t < MLX4_NUM_OF_RESOURCE_TYPE; ++t) INIT_LIST_HEAD(&priv->mfunc.master.res_tracker. slave_list[i].res_list[t]); mutex_init(&priv->mfunc.master.res_tracker.slave_list[i].mutex); } mlx4_dbg(dev, "Started init_resource_tracker: %ld slaves\n", dev->num_slaves); for (i = 0 ; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) priv->mfunc.master.res_tracker.res_tree[i] = RB_ROOT; for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { struct resource_allocator *res_alloc = &priv->mfunc.master.res_tracker.res_alloc[i]; res_alloc->quota = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); res_alloc->guaranteed = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); if (i == RES_MAC || i == RES_VLAN) res_alloc->allocated = kzalloc(MLX4_MAX_PORTS * (dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); else res_alloc->allocated = kzalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); if (!res_alloc->quota || !res_alloc->guaranteed || !res_alloc->allocated) goto no_mem_err; spin_lock_init(&res_alloc->alloc_lock); for (t = 0; t < dev->num_vfs + 1; t++) { switch (i) { case RES_QP: initialize_res_quotas(dev, res_alloc, RES_QP, t, dev->caps.num_qps - dev->caps.reserved_qps - mlx4_num_reserved_sqps(dev)); break; case RES_CQ: initialize_res_quotas(dev, res_alloc, RES_CQ, t, dev->caps.num_cqs - dev->caps.reserved_cqs); break; case RES_SRQ: initialize_res_quotas(dev, res_alloc, RES_SRQ, t, dev->caps.num_srqs - dev->caps.reserved_srqs); break; case RES_MPT: initialize_res_quotas(dev, res_alloc, RES_MPT, t, dev->caps.num_mpts - dev->caps.reserved_mrws); break; case RES_MTT: initialize_res_quotas(dev, res_alloc, RES_MTT, t, dev->caps.num_mtts - dev->caps.reserved_mtts); break; case RES_MAC: if (t == mlx4_master_func_num(dev)) { res_alloc->quota[t] = MLX4_MAX_MAC_NUM - 2 * dev->num_vfs; res_alloc->guaranteed[t] = res_alloc->quota[t]; for (j = 0; j < MLX4_MAX_PORTS; j++) res_alloc->res_port_free[j] = MLX4_MAX_MAC_NUM; } else { res_alloc->quota[t] = 2; res_alloc->guaranteed[t] = 2; } break; case RES_VLAN: if (t == mlx4_master_func_num(dev)) { res_alloc->quota[t] = MLX4_MAX_VLAN_NUM; res_alloc->guaranteed[t] = MLX4_MAX_VLAN_NUM / 2; for (j = 0; j < MLX4_MAX_PORTS; j++) res_alloc->res_port_free[j] = res_alloc->quota[t]; } else { res_alloc->quota[t] = MLX4_MAX_VLAN_NUM / 2; res_alloc->guaranteed[t] = 0; } break; case RES_COUNTER: res_alloc->quota[t] = dev->caps.max_counters; res_alloc->guaranteed[t] = 0; if (t == mlx4_master_func_num(dev)) res_alloc->res_free = res_alloc->quota[t]; break; default: break; } if (i == RES_MAC || i == RES_VLAN) { for (j = 0; j < MLX4_MAX_PORTS; j++) res_alloc->res_port_rsvd[j] += res_alloc->guaranteed[t]; } else { res_alloc->res_reserved += res_alloc->guaranteed[t]; } } } spin_lock_init(&priv->mfunc.master.res_tracker.lock); return 0; no_mem_err: for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated); priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL; kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed); priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL; kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota); priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL; } return -ENOMEM; } void mlx4_free_resource_tracker(struct mlx4_dev *dev, enum mlx4_res_tracker_free_type type) { struct mlx4_priv *priv = mlx4_priv(dev); int i; if (priv->mfunc.master.res_tracker.slave_list) { if (type != RES_TR_FREE_STRUCTS_ONLY) { for (i = 0; i < dev->num_slaves; i++) { if (type == RES_TR_FREE_ALL || dev->caps.function != i) mlx4_delete_all_resources_for_slave(dev, i); } /* free master's vlans */ i = dev->caps.function; mutex_lock(&priv->mfunc.master.res_tracker.slave_list[i].mutex); rem_slave_vlans(dev, i); mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[i].mutex); } if (type != RES_TR_FREE_SLAVES_ONLY) { for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { kfree(priv->mfunc.master.res_tracker.res_alloc[i].allocated); priv->mfunc.master.res_tracker.res_alloc[i].allocated = NULL; kfree(priv->mfunc.master.res_tracker.res_alloc[i].guaranteed); priv->mfunc.master.res_tracker.res_alloc[i].guaranteed = NULL; kfree(priv->mfunc.master.res_tracker.res_alloc[i].quota); priv->mfunc.master.res_tracker.res_alloc[i].quota = NULL; } kfree(priv->mfunc.master.res_tracker.slave_list); priv->mfunc.master.res_tracker.slave_list = NULL; } } } static void update_pkey_index(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox) { u8 sched = *(u8 *)(inbox->buf + 64); u8 orig_index = *(u8 *)(inbox->buf + 35); u8 new_index; struct mlx4_priv *priv = mlx4_priv(dev); int port; port = (sched >> 6 & 1) + 1; new_index = priv->virt2phys_pkey[slave][port - 1][orig_index]; *(u8 *)(inbox->buf + 35) = new_index; } static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, u8 slave) { struct mlx4_qp_context *qp_ctx = inbox->buf + 8; enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *) inbox->buf); u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; int port; if (MLX4_QP_ST_UD == ts) { port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; if (mlx4_is_eth(dev, port)) qp_ctx->pri_path.mgid_index = mlx4_get_base_gid_ix(dev, slave) | 0x80; else qp_ctx->pri_path.mgid_index = 0x80 | slave; } else if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_UC == ts) { if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; if (mlx4_is_eth(dev, port)) { qp_ctx->pri_path.mgid_index += mlx4_get_base_gid_ix(dev, slave); qp_ctx->pri_path.mgid_index &= 0x7f; } else { qp_ctx->pri_path.mgid_index = slave & 0x7F; } } if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1; if (mlx4_is_eth(dev, port)) { qp_ctx->alt_path.mgid_index += mlx4_get_base_gid_ix(dev, slave); qp_ctx->alt_path.mgid_index &= 0x7f; } else { qp_ctx->alt_path.mgid_index = slave & 0x7F; } } } } static int check_counter_index_validity(struct mlx4_dev *dev, int slave, int port, int idx) { struct mlx4_priv *priv = mlx4_priv(dev); struct counter_index *counter, *tmp_counter; if (slave == 0) { list_for_each_entry_safe(counter, tmp_counter, &priv->counters_table.global_port_list[port - 1], list) { if (counter->index == idx) return 0; } return -EINVAL; } else { list_for_each_entry_safe(counter, tmp_counter, &priv->counters_table.vf_list[slave - 1][port - 1], list) { if (counter->index == idx) return 0; } return -EINVAL; } } static int update_vport_qp_param(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, u8 slave, u32 qpn) { struct mlx4_qp_context *qpc = inbox->buf + 8; struct mlx4_vport_oper_state *vp_oper; struct mlx4_priv *priv; u32 qp_type; int port; port = (qpc->pri_path.sched_queue & 0x40) ? 2 : 1; priv = mlx4_priv(dev); vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; qp_type = (be32_to_cpu(qpc->flags) >> 16) & 0xff; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH && qpc->pri_path.counter_index != MLX4_SINK_COUNTER_INDEX) { if (check_counter_index_validity(dev, slave, port, qpc->pri_path.counter_index)) return -EINVAL; } mlx4_dbg(dev, "%s: QP counter_index %d for slave %d port %d\n", __func__, qpc->pri_path.counter_index, slave, port); if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK) && dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH && !mlx4_is_qp_reserved(dev, qpn) && qp_type == MLX4_QP_ST_MLX && qpc->pri_path.counter_index != 0xFF) { /* disable multicast loopback to qp with same counter */ qpc->pri_path.fl |= MLX4_FL_ETH_SRC_CHECK_MC_LB; qpc->pri_path.vlan_control |= MLX4_VLAN_CTRL_ETH_SRC_CHECK_IF_COUNTER; } if (MLX4_VGT != vp_oper->state.default_vlan) { /* the reserved QPs (special, proxy, tunnel) * do not operate over vlans */ if (mlx4_is_qp_reserved(dev, qpn)) return 0; /* force strip vlan by clear vsd */ qpc->param3 &= ~cpu_to_be32(MLX4_STRIP_VLAN); /* preserve IF_COUNTER flag */ qpc->pri_path.vlan_control &= MLX4_VLAN_CTRL_ETH_SRC_CHECK_IF_COUNTER; if (MLX4_QP_ST_RC != qp_type) { if (0 != vp_oper->state.default_vlan) { qpc->pri_path.vlan_control |= MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; } else { /* priority tagged */ qpc->pri_path.vlan_control |= MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; } } qpc->pri_path.fvl_rx |= MLX4_FVL_RX_FORCE_ETH_VLAN; qpc->pri_path.vlan_index = vp_oper->vlan_idx; qpc->pri_path.fl |= MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN; qpc->pri_path.feup |= MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN; qpc->pri_path.sched_queue &= 0xC7; qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3; } if (vp_oper->state.spoofchk) { qpc->pri_path.feup |= MLX4_FSM_FORCE_ETH_SRC_MAC; qpc->pri_path.grh_mylmc = (0x80 & qpc->pri_path.grh_mylmc) + vp_oper->mac_idx; } return 0; } static int mpt_mask(struct mlx4_dev *dev) { return dev->caps.num_mpts - 1; } static void *find_res(struct mlx4_dev *dev, u64 res_id, enum mlx4_resource type) { struct mlx4_priv *priv = mlx4_priv(dev); return res_tracker_lookup(&priv->mfunc.master.res_tracker.res_tree[type], res_id); } static int get_res(struct mlx4_dev *dev, int slave, u64 res_id, enum mlx4_resource type, void *res) { struct res_common *r; int err = 0; spin_lock_irq(mlx4_tlock(dev)); r = find_res(dev, res_id, type); if (!r) { err = -ENONET; goto exit; } if (r->state == RES_ANY_BUSY) { err = -EBUSY; goto exit; } if (r->owner != slave) { err = -EPERM; goto exit; } r->from_state = r->state; r->state = RES_ANY_BUSY; if (res) *((struct res_common **)res) = r; exit: spin_unlock_irq(mlx4_tlock(dev)); return err; } int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev, enum mlx4_resource type, u64 res_id, int *slave) { struct res_common *r; int err = -ENOENT; int id = res_id; if (type == RES_QP) id &= 0x7fffff; spin_lock(mlx4_tlock(dev)); r = find_res(dev, id, type); if (r) { *slave = r->owner; err = 0; } spin_unlock(mlx4_tlock(dev)); return err; } static void put_res(struct mlx4_dev *dev, int slave, u64 res_id, enum mlx4_resource type) { struct res_common *r; spin_lock_irq(mlx4_tlock(dev)); r = find_res(dev, res_id, type); if (r) r->state = r->from_state; spin_unlock_irq(mlx4_tlock(dev)); } static struct res_common *alloc_qp_tr(int id) { struct res_qp *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->com.state = RES_QP_RESERVED; ret->local_qpn = id; INIT_LIST_HEAD(&ret->mcg_list); spin_lock_init(&ret->mcg_spl); atomic_set(&ret->ref_count, 0); return &ret->com; } static struct res_common *alloc_mtt_tr(int id, int order) { struct res_mtt *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->order = order; ret->com.state = RES_MTT_ALLOCATED; atomic_set(&ret->ref_count, 0); return &ret->com; } static struct res_common *alloc_mpt_tr(int id, int key) { struct res_mpt *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->com.state = RES_MPT_RESERVED; ret->key = key; return &ret->com; } static struct res_common *alloc_eq_tr(int id) { struct res_eq *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->com.state = RES_EQ_RESERVED; return &ret->com; } static struct res_common *alloc_cq_tr(int id) { struct res_cq *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->com.state = RES_CQ_ALLOCATED; atomic_set(&ret->ref_count, 0); return &ret->com; } static struct res_common *alloc_srq_tr(int id) { struct res_srq *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->com.state = RES_SRQ_ALLOCATED; atomic_set(&ret->ref_count, 0); return &ret->com; } static struct res_common *alloc_counter_tr(int id) { struct res_counter *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->com.state = RES_COUNTER_ALLOCATED; return &ret->com; } static struct res_common *alloc_xrcdn_tr(int id) { struct res_xrcdn *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->com.state = RES_XRCD_ALLOCATED; return &ret->com; } static struct res_common *alloc_fs_rule_tr(u64 id, int qpn) { struct res_fs_rule *ret; ret = kzalloc(sizeof *ret, GFP_KERNEL); if (!ret) return NULL; ret->com.res_id = id; ret->com.state = RES_FS_RULE_ALLOCATED; ret->qpn = qpn; return &ret->com; } static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave, int extra) { struct res_common *ret; switch (type) { case RES_QP: ret = alloc_qp_tr(id); break; case RES_MPT: ret = alloc_mpt_tr(id, extra); break; case RES_MTT: ret = alloc_mtt_tr(id, extra); break; case RES_EQ: ret = alloc_eq_tr(id); break; case RES_CQ: ret = alloc_cq_tr(id); break; case RES_SRQ: ret = alloc_srq_tr(id); break; case RES_MAC: printk(KERN_ERR "implementation missing\n"); return NULL; case RES_COUNTER: ret = alloc_counter_tr(id); break; case RES_XRCD: ret = alloc_xrcdn_tr(id); break; case RES_FS_RULE: ret = alloc_fs_rule_tr(id, extra); break; default: return NULL; } if (ret) ret->owner = slave; return ret; } static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, enum mlx4_resource type, int extra) { int i; int err; struct mlx4_priv *priv = mlx4_priv(dev); struct res_common **res_arr; struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct rb_root *root = &tracker->res_tree[type]; res_arr = kzalloc(count * sizeof *res_arr, GFP_KERNEL); if (!res_arr) return -ENOMEM; for (i = 0; i < count; ++i) { res_arr[i] = alloc_tr(base + i, type, slave, extra); if (!res_arr[i]) { for (--i; i >= 0; --i) kfree(res_arr[i]); kfree(res_arr); return -ENOMEM; } } spin_lock_irq(mlx4_tlock(dev)); for (i = 0; i < count; ++i) { if (find_res(dev, base + i, type)) { err = -EEXIST; goto undo; } err = res_tracker_insert(root, res_arr[i]); if (err) goto undo; list_add_tail(&res_arr[i]->list, &tracker->slave_list[slave].res_list[type]); } spin_unlock_irq(mlx4_tlock(dev)); kfree(res_arr); return 0; undo: for (--i; i >= 0; --i) { rb_erase(&res_arr[i]->node, root); list_del_init(&res_arr[i]->list); } spin_unlock_irq(mlx4_tlock(dev)); for (i = 0; i < count; ++i) kfree(res_arr[i]); kfree(res_arr); return err; } static int remove_qp_ok(struct res_qp *res) { if (res->com.state == RES_QP_BUSY || atomic_read(&res->ref_count) || !list_empty(&res->mcg_list)) { pr_err("resource tracker: fail to remove qp, state %d, ref_count %d\n", res->com.state, atomic_read(&res->ref_count)); return -EBUSY; } else if (res->com.state != RES_QP_RESERVED) { return -EPERM; } return 0; } static int remove_mtt_ok(struct res_mtt *res, int order) { if (res->com.state == RES_MTT_BUSY || atomic_read(&res->ref_count)) { printk(KERN_DEBUG "%s-%d: state %s, ref_count %d\n", __func__, __LINE__, mtt_states_str(res->com.state), atomic_read(&res->ref_count)); return -EBUSY; } else if (res->com.state != RES_MTT_ALLOCATED) return -EPERM; else if (res->order != order) return -EINVAL; return 0; } static int remove_mpt_ok(struct res_mpt *res) { if (res->com.state == RES_MPT_BUSY) return -EBUSY; else if (res->com.state != RES_MPT_RESERVED) return -EPERM; return 0; } static int remove_eq_ok(struct res_eq *res) { if (res->com.state == RES_MPT_BUSY) return -EBUSY; else if (res->com.state != RES_MPT_RESERVED) return -EPERM; return 0; } static int remove_counter_ok(struct res_counter *res) { if (res->com.state == RES_COUNTER_BUSY) return -EBUSY; else if (res->com.state != RES_COUNTER_ALLOCATED) return -EPERM; return 0; } static int remove_xrcdn_ok(struct res_xrcdn *res) { if (res->com.state == RES_XRCD_BUSY) return -EBUSY; else if (res->com.state != RES_XRCD_ALLOCATED) return -EPERM; return 0; } static int remove_fs_rule_ok(struct res_fs_rule *res) { if (res->com.state == RES_FS_RULE_BUSY) return -EBUSY; else if (res->com.state != RES_FS_RULE_ALLOCATED) return -EPERM; return 0; } static int remove_cq_ok(struct res_cq *res) { if (res->com.state == RES_CQ_BUSY) return -EBUSY; else if (res->com.state != RES_CQ_ALLOCATED) return -EPERM; return 0; } static int remove_srq_ok(struct res_srq *res) { if (res->com.state == RES_SRQ_BUSY) return -EBUSY; else if (res->com.state != RES_SRQ_ALLOCATED) return -EPERM; return 0; } static int remove_ok(struct res_common *res, enum mlx4_resource type, int extra) { switch (type) { case RES_QP: return remove_qp_ok((struct res_qp *)res); case RES_CQ: return remove_cq_ok((struct res_cq *)res); case RES_SRQ: return remove_srq_ok((struct res_srq *)res); case RES_MPT: return remove_mpt_ok((struct res_mpt *)res); case RES_MTT: return remove_mtt_ok((struct res_mtt *)res, extra); case RES_MAC: return -ENOSYS; case RES_EQ: return remove_eq_ok((struct res_eq *)res); case RES_COUNTER: return remove_counter_ok((struct res_counter *)res); case RES_XRCD: return remove_xrcdn_ok((struct res_xrcdn *)res); case RES_FS_RULE: return remove_fs_rule_ok((struct res_fs_rule *)res); default: return -EINVAL; } } static int rem_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, enum mlx4_resource type, int extra) { u64 i; int err; struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_common *r; spin_lock_irq(mlx4_tlock(dev)); for (i = base; i < base + count; ++i) { r = res_tracker_lookup(&tracker->res_tree[type], i); if (!r) { err = -ENOENT; goto out; } if (r->owner != slave) { err = -EPERM; goto out; } err = remove_ok(r, type, extra); if (err) goto out; } for (i = base; i < base + count; ++i) { r = res_tracker_lookup(&tracker->res_tree[type], i); rb_erase(&r->node, &tracker->res_tree[type]); list_del(&r->list); kfree(r); } err = 0; out: spin_unlock_irq(mlx4_tlock(dev)); return err; } static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn, enum res_qp_states state, struct res_qp **qp, int alloc) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_qp *r; int err = 0; spin_lock_irq(mlx4_tlock(dev)); r = res_tracker_lookup(&tracker->res_tree[RES_QP], qpn); if (!r) err = -ENOENT; else if (r->com.owner != slave) err = -EPERM; else { switch (state) { case RES_QP_BUSY: mlx4_dbg(dev, "%s: failed RES_QP, 0x%llx\n", __func__, (unsigned long long)r->com.res_id); err = -EBUSY; break; case RES_QP_RESERVED: if (r->com.state == RES_QP_MAPPED && !alloc) break; mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", (unsigned long long)r->com.res_id); err = -EINVAL; break; case RES_QP_MAPPED: if ((r->com.state == RES_QP_RESERVED && alloc) || r->com.state == RES_QP_HW) break; else { mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", (unsigned long long)r->com.res_id); err = -EINVAL; } break; case RES_QP_HW: if (r->com.state != RES_QP_MAPPED) err = -EINVAL; break; default: err = -EINVAL; } if (!err) { r->com.from_state = r->com.state; r->com.to_state = state; r->com.state = RES_QP_BUSY; if (qp) *qp = r; } } spin_unlock_irq(mlx4_tlock(dev)); return err; } static int mr_res_start_move_to(struct mlx4_dev *dev, int slave, int index, enum res_mpt_states state, struct res_mpt **mpt) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_mpt *r; int err = 0; spin_lock_irq(mlx4_tlock(dev)); r = res_tracker_lookup(&tracker->res_tree[RES_MPT], index); if (!r) err = -ENOENT; else if (r->com.owner != slave) err = -EPERM; else { switch (state) { case RES_MPT_BUSY: err = -EINVAL; break; case RES_MPT_RESERVED: if (r->com.state != RES_MPT_MAPPED) err = -EINVAL; break; case RES_MPT_MAPPED: if (r->com.state != RES_MPT_RESERVED && r->com.state != RES_MPT_HW) err = -EINVAL; break; case RES_MPT_HW: if (r->com.state != RES_MPT_MAPPED) err = -EINVAL; break; default: err = -EINVAL; } if (!err) { r->com.from_state = r->com.state; r->com.to_state = state; r->com.state = RES_MPT_BUSY; if (mpt) *mpt = r; } } spin_unlock_irq(mlx4_tlock(dev)); return err; } static int eq_res_start_move_to(struct mlx4_dev *dev, int slave, int index, enum res_eq_states state, struct res_eq **eq) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_eq *r; int err = 0; spin_lock_irq(mlx4_tlock(dev)); r = res_tracker_lookup(&tracker->res_tree[RES_EQ], index); if (!r) err = -ENOENT; else if (r->com.owner != slave) err = -EPERM; else { switch (state) { case RES_EQ_BUSY: err = -EINVAL; break; case RES_EQ_RESERVED: if (r->com.state != RES_EQ_HW) err = -EINVAL; break; case RES_EQ_HW: if (r->com.state != RES_EQ_RESERVED) err = -EINVAL; break; default: err = -EINVAL; } if (!err) { r->com.from_state = r->com.state; r->com.to_state = state; r->com.state = RES_EQ_BUSY; if (eq) *eq = r; } } spin_unlock_irq(mlx4_tlock(dev)); return err; } static int cq_res_start_move_to(struct mlx4_dev *dev, int slave, int cqn, enum res_cq_states state, struct res_cq **cq) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_cq *r; int err; spin_lock_irq(mlx4_tlock(dev)); r = res_tracker_lookup(&tracker->res_tree[RES_CQ], cqn); if (!r) err = -ENOENT; else if (r->com.owner != slave) err = -EPERM; else { switch (state) { case RES_CQ_BUSY: err = -EBUSY; break; case RES_CQ_ALLOCATED: if (r->com.state != RES_CQ_HW) err = -EINVAL; else if (atomic_read(&r->ref_count)) err = -EBUSY; else err = 0; break; case RES_CQ_HW: if (r->com.state != RES_CQ_ALLOCATED) err = -EINVAL; else err = 0; break; default: err = -EINVAL; } if (!err) { r->com.from_state = r->com.state; r->com.to_state = state; r->com.state = RES_CQ_BUSY; if (cq) *cq = r; } } spin_unlock_irq(mlx4_tlock(dev)); return err; } static int srq_res_start_move_to(struct mlx4_dev *dev, int slave, int index, enum res_srq_states state, struct res_srq **srq) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_srq *r; int err = 0; spin_lock_irq(mlx4_tlock(dev)); r = res_tracker_lookup(&tracker->res_tree[RES_SRQ], index); if (!r) err = -ENOENT; else if (r->com.owner != slave) err = -EPERM; else { switch (state) { case RES_SRQ_BUSY: err = -EINVAL; break; case RES_SRQ_ALLOCATED: if (r->com.state != RES_SRQ_HW) err = -EINVAL; else if (atomic_read(&r->ref_count)) err = -EBUSY; break; case RES_SRQ_HW: if (r->com.state != RES_SRQ_ALLOCATED) err = -EINVAL; break; default: err = -EINVAL; } if (!err) { r->com.from_state = r->com.state; r->com.to_state = state; r->com.state = RES_SRQ_BUSY; if (srq) *srq = r; } } spin_unlock_irq(mlx4_tlock(dev)); return err; } static void res_abort_move(struct mlx4_dev *dev, int slave, enum mlx4_resource type, int id) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_common *r; spin_lock_irq(mlx4_tlock(dev)); r = res_tracker_lookup(&tracker->res_tree[type], id); if (r && (r->owner == slave)) r->state = r->from_state; spin_unlock_irq(mlx4_tlock(dev)); } static void res_end_move(struct mlx4_dev *dev, int slave, enum mlx4_resource type, int id) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_common *r; spin_lock_irq(mlx4_tlock(dev)); r = res_tracker_lookup(&tracker->res_tree[type], id); if (r && (r->owner == slave)) r->state = r->to_state; spin_unlock_irq(mlx4_tlock(dev)); } static int valid_reserved(struct mlx4_dev *dev, int slave, int qpn) { return mlx4_is_qp_reserved(dev, qpn) && (mlx4_is_master(dev) || mlx4_is_guest_proxy(dev, slave, qpn)); } static int fw_reserved(struct mlx4_dev *dev, int qpn) { return qpn < dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; } static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int err; int count; int align; int base; int qpn; u8 flags; switch (op) { case RES_OP_RESERVE: count = get_param_l(&in_param) & 0xffffff; flags = get_param_l(&in_param) >> 24; align = get_param_h(&in_param); err = mlx4_grant_resource(dev, slave, RES_QP, count, 0); if (err) return err; err = __mlx4_qp_reserve_range(dev, count, align, &base, flags); if (err) { mlx4_release_resource(dev, slave, RES_QP, count, 0); return err; } err = add_res_range(dev, slave, base, count, RES_QP, 0); if (err) { mlx4_release_resource(dev, slave, RES_QP, count, 0); __mlx4_qp_release_range(dev, base, count); return err; } set_param_l(out_param, base); break; case RES_OP_MAP_ICM: qpn = get_param_l(&in_param) & 0x7fffff; if (valid_reserved(dev, slave, qpn)) { err = add_res_range(dev, slave, qpn, 1, RES_QP, 0); if (err) return err; } err = qp_res_start_move_to(dev, slave, qpn, RES_QP_MAPPED, NULL, 1); if (err) return err; if (!fw_reserved(dev, qpn)) { err = __mlx4_qp_alloc_icm(dev, qpn); if (err) { res_abort_move(dev, slave, RES_QP, qpn); return err; } } res_end_move(dev, slave, RES_QP, qpn); break; default: err = -EINVAL; break; } return err; } static int mtt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int err = -EINVAL; int base; int order; if (op != RES_OP_RESERVE_AND_MAP) return err; order = get_param_l(&in_param); err = mlx4_grant_resource(dev, slave, RES_MTT, 1 << order, 0); if (err) return err; base = __mlx4_alloc_mtt_range(dev, order); if (base == -1) { mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0); return -ENOMEM; } err = add_res_range(dev, slave, base, 1, RES_MTT, order); if (err) { mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0); __mlx4_free_mtt_range(dev, base, order); } else set_param_l(out_param, base); return err; } static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int err = -EINVAL; int index; int id; struct res_mpt *mpt; switch (op) { case RES_OP_RESERVE: err = mlx4_grant_resource(dev, slave, RES_MPT, 1, 0); if (err) break; index = __mlx4_mpt_reserve(dev); if (index == -1) { mlx4_release_resource(dev, slave, RES_MPT, 1, 0); break; } id = index & mpt_mask(dev); err = add_res_range(dev, slave, id, 1, RES_MPT, index); if (err) { mlx4_release_resource(dev, slave, RES_MPT, 1, 0); __mlx4_mpt_release(dev, index); break; } set_param_l(out_param, index); break; case RES_OP_MAP_ICM: index = get_param_l(&in_param); id = index & mpt_mask(dev); err = mr_res_start_move_to(dev, slave, id, RES_MPT_MAPPED, &mpt); if (err) return err; err = __mlx4_mpt_alloc_icm(dev, mpt->key); if (err) { res_abort_move(dev, slave, RES_MPT, id); return err; } res_end_move(dev, slave, RES_MPT, id); break; } return err; } static int cq_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int cqn; int err; switch (op) { case RES_OP_RESERVE_AND_MAP: err = mlx4_grant_resource(dev, slave, RES_CQ, 1, 0); if (err) break; err = __mlx4_cq_alloc_icm(dev, &cqn); if (err) { mlx4_release_resource(dev, slave, RES_CQ, 1, 0); break; } err = add_res_range(dev, slave, cqn, 1, RES_CQ, 0); if (err) { mlx4_release_resource(dev, slave, RES_CQ, 1, 0); __mlx4_cq_free_icm(dev, cqn); break; } set_param_l(out_param, cqn); break; default: err = -EINVAL; } return err; } static int srq_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int srqn; int err; switch (op) { case RES_OP_RESERVE_AND_MAP: err = mlx4_grant_resource(dev, slave, RES_SRQ, 1, 0); if (err) break; err = __mlx4_srq_alloc_icm(dev, &srqn); if (err) { mlx4_release_resource(dev, slave, RES_SRQ, 1, 0); break; } err = add_res_range(dev, slave, srqn, 1, RES_SRQ, 0); if (err) { mlx4_release_resource(dev, slave, RES_SRQ, 1, 0); __mlx4_srq_free_icm(dev, srqn); break; } set_param_l(out_param, srqn); break; default: err = -EINVAL; } return err; } static int mac_find_smac_ix_in_slave(struct mlx4_dev *dev, int slave, int port, u8 smac_index, u64 *mac) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *mac_list = &tracker->slave_list[slave].res_list[RES_MAC]; struct mac_res *res, *tmp; list_for_each_entry_safe(res, tmp, mac_list, list) { if (res->smac_index == smac_index && res->port == (u8) port) { *mac = res->mac; return 0; } } return -ENOENT; } static int mac_add_to_slave(struct mlx4_dev *dev, int slave, u64 mac, int port, u8 smac_index) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *mac_list = &tracker->slave_list[slave].res_list[RES_MAC]; struct mac_res *res, *tmp; list_for_each_entry_safe(res, tmp, mac_list, list) { if (res->mac == mac && res->port == (u8) port) { /* mac found. update ref count */ ++res->ref_count; return 0; } } if (mlx4_grant_resource(dev, slave, RES_MAC, 1, port)) return -EINVAL; res = kzalloc(sizeof *res, GFP_KERNEL); if (!res) { mlx4_release_resource(dev, slave, RES_MAC, 1, port); return -ENOMEM; } res->mac = mac; res->port = (u8) port; res->smac_index = smac_index; res->ref_count = 1; list_add_tail(&res->list, &tracker->slave_list[slave].res_list[RES_MAC]); return 0; } static void mac_del_from_slave(struct mlx4_dev *dev, int slave, u64 mac, int port) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *mac_list = &tracker->slave_list[slave].res_list[RES_MAC]; struct mac_res *res, *tmp; list_for_each_entry_safe(res, tmp, mac_list, list) { if (res->mac == mac && res->port == (u8) port) { if (!--res->ref_count) { list_del(&res->list); mlx4_release_resource(dev, slave, RES_MAC, 1, port); kfree(res); } break; } } } static void rem_slave_macs(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *mac_list = &tracker->slave_list[slave].res_list[RES_MAC]; struct mac_res *res, *tmp; int i; list_for_each_entry_safe(res, tmp, mac_list, list) { list_del(&res->list); /* dereference the mac the num times the slave referenced it */ for (i = 0; i < res->ref_count; i++) __mlx4_unregister_mac(dev, res->port, res->mac); mlx4_release_resource(dev, slave, RES_MAC, 1, res->port); kfree(res); } } static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param, int in_port) { int err = -EINVAL; int port; u64 mac; u8 smac_index = 0; if (op != RES_OP_RESERVE_AND_MAP) return err; port = !in_port ? get_param_l(out_param) : in_port; mac = in_param; err = __mlx4_register_mac(dev, port, mac); if (err >= 0) { smac_index = err; set_param_l(out_param, err); err = 0; } if (!err) { err = mac_add_to_slave(dev, slave, mac, port, smac_index); if (err) __mlx4_unregister_mac(dev, port, mac); } return err; } static int vlan_add_to_slave(struct mlx4_dev *dev, int slave, u16 vlan, int port, int vlan_index) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *vlan_list = &tracker->slave_list[slave].res_list[RES_VLAN]; struct vlan_res *res, *tmp; list_for_each_entry_safe(res, tmp, vlan_list, list) { if (res->vlan == vlan && res->port == (u8) port) { /* vlan found. update ref count */ ++res->ref_count; return 0; } } if (mlx4_grant_resource(dev, slave, RES_VLAN, 1, port)) return -EINVAL; res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) { mlx4_release_resource(dev, slave, RES_VLAN, 1, port); return -ENOMEM; } res->vlan = vlan; res->port = (u8) port; res->vlan_index = vlan_index; res->ref_count = 1; list_add_tail(&res->list, &tracker->slave_list[slave].res_list[RES_VLAN]); return 0; } static void vlan_del_from_slave(struct mlx4_dev *dev, int slave, u16 vlan, int port) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *vlan_list = &tracker->slave_list[slave].res_list[RES_VLAN]; struct vlan_res *res, *tmp; list_for_each_entry_safe(res, tmp, vlan_list, list) { if (res->vlan == vlan && res->port == (u8) port) { if (!--res->ref_count) { list_del(&res->list); mlx4_release_resource(dev, slave, RES_VLAN, 1, port); kfree(res); } break; } } } static void rem_slave_vlans(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *vlan_list = &tracker->slave_list[slave].res_list[RES_VLAN]; struct vlan_res *res, *tmp; int i; list_for_each_entry_safe(res, tmp, vlan_list, list) { list_del(&res->list); /* dereference the vlan the num times the slave referenced it */ for (i = 0; i < res->ref_count; i++) __mlx4_unregister_vlan(dev, res->port, res->vlan); mlx4_release_resource(dev, slave, RES_VLAN, 1, res->port); kfree(res); } } static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param, int in_port) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; int err = -EINVAL; u16 vlan; int vlan_index; int port; port = !in_port ? get_param_l(out_param) : in_port; if (!port) return err; if (op != RES_OP_RESERVE_AND_MAP) return err; /* upstream kernels had NOP for reg/unreg vlan. Continue this. */ if (!in_port && port > 0 && port <= dev->caps.num_ports) { slave_state[slave].old_vlan_api = true; return 0; } vlan = (u16) in_param; err = __mlx4_register_vlan(dev, port, vlan, &vlan_index); if (!err) { set_param_l(out_param, (u32) vlan_index); err = vlan_add_to_slave(dev, slave, vlan, port, vlan_index); if (err) __mlx4_unregister_vlan(dev, port, vlan); } return err; } static int counter_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param, int port) { u32 index; int err; if (op != RES_OP_RESERVE) return -EINVAL; err = __mlx4_counter_alloc(dev, slave, port, &index); if (!err) set_param_l(out_param, index); return err; } static int xrcdn_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { u32 xrcdn; int err; if (op != RES_OP_RESERVE) return -EINVAL; err = __mlx4_xrcd_alloc(dev, &xrcdn); if (err) return err; err = add_res_range(dev, slave, xrcdn, 1, RES_XRCD, 0); if (err) __mlx4_xrcd_free(dev, xrcdn); else set_param_l(out_param, xrcdn); return err; } int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int alop = vhcr->op_modifier; switch (vhcr->in_modifier & 0xFF) { case RES_QP: err = qp_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_MTT: err = mtt_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_MPT: err = mpt_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_CQ: err = cq_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_SRQ: err = srq_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_MAC: err = mac_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param, (vhcr->in_modifier >> 8) & 0xFF); break; case RES_VLAN: err = vlan_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param, (vhcr->in_modifier >> 8) & 0xFF); break; case RES_COUNTER: err = counter_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param, (vhcr->in_modifier >> 8) & 0xFF); break; case RES_XRCD: err = xrcdn_alloc_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; default: err = -EINVAL; break; } return err; } static int qp_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param) { int err; int count; int base; int qpn; switch (op) { case RES_OP_RESERVE: base = get_param_l(&in_param) & 0x7fffff; count = get_param_h(&in_param); err = rem_res_range(dev, slave, base, count, RES_QP, 0); if (err) break; mlx4_release_resource(dev, slave, RES_QP, count, 0); __mlx4_qp_release_range(dev, base, count); break; case RES_OP_MAP_ICM: qpn = get_param_l(&in_param) & 0x7fffff; err = qp_res_start_move_to(dev, slave, qpn, RES_QP_RESERVED, NULL, 0); if (err) return err; if (!fw_reserved(dev, qpn)) __mlx4_qp_free_icm(dev, qpn); res_end_move(dev, slave, RES_QP, qpn); if (valid_reserved(dev, slave, qpn)) err = rem_res_range(dev, slave, qpn, 1, RES_QP, 0); break; default: err = -EINVAL; break; } return err; } static int mtt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int err = -EINVAL; int base; int order; if (op != RES_OP_RESERVE_AND_MAP) return err; base = get_param_l(&in_param); order = get_param_h(&in_param); err = rem_res_range(dev, slave, base, 1, RES_MTT, order); if (!err) { mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0); __mlx4_free_mtt_range(dev, base, order); } return err; } static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param) { int err = -EINVAL; int index; int id; struct res_mpt *mpt; switch (op) { case RES_OP_RESERVE: index = get_param_l(&in_param); id = index & mpt_mask(dev); err = get_res(dev, slave, id, RES_MPT, &mpt); if (err) break; index = mpt->key; put_res(dev, slave, id, RES_MPT); err = rem_res_range(dev, slave, id, 1, RES_MPT, 0); if (err) break; mlx4_release_resource(dev, slave, RES_MPT, 1, 0); __mlx4_mpt_release(dev, index); break; case RES_OP_MAP_ICM: index = get_param_l(&in_param); id = index & mpt_mask(dev); err = mr_res_start_move_to(dev, slave, id, RES_MPT_RESERVED, &mpt); if (err) return err; __mlx4_mpt_free_icm(dev, mpt->key); res_end_move(dev, slave, RES_MPT, id); return err; break; default: err = -EINVAL; break; } return err; } static int cq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int cqn; int err; switch (op) { case RES_OP_RESERVE_AND_MAP: cqn = get_param_l(&in_param); err = rem_res_range(dev, slave, cqn, 1, RES_CQ, 0); if (err) break; mlx4_release_resource(dev, slave, RES_CQ, 1, 0); __mlx4_cq_free_icm(dev, cqn); break; default: err = -EINVAL; break; } return err; } static int srq_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int srqn; int err; switch (op) { case RES_OP_RESERVE_AND_MAP: srqn = get_param_l(&in_param); err = rem_res_range(dev, slave, srqn, 1, RES_SRQ, 0); if (err) break; mlx4_release_resource(dev, slave, RES_SRQ, 1, 0); __mlx4_srq_free_icm(dev, srqn); break; default: err = -EINVAL; break; } return err; } static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param, int in_port) { int port; int err = 0; switch (op) { case RES_OP_RESERVE_AND_MAP: port = !in_port ? get_param_l(out_param) : in_port; mac_del_from_slave(dev, slave, in_param, port); __mlx4_unregister_mac(dev, port, in_param); break; default: err = -EINVAL; break; } return err; } static int vlan_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param, int port) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; int err = 0; switch (op) { case RES_OP_RESERVE_AND_MAP: if (slave_state[slave].old_vlan_api == true) return 0; if (!port) return -EINVAL; vlan_del_from_slave(dev, slave, in_param, port); __mlx4_unregister_vlan(dev, port, in_param); break; default: err = -EINVAL; break; } return err; } static int counter_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param, int port) { int index; if (op != RES_OP_RESERVE) return -EINVAL; index = get_param_l(&in_param); __mlx4_counter_free(dev, slave, port, index); return 0; } static int xrcdn_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { int xrcdn; int err; if (op != RES_OP_RESERVE) return -EINVAL; xrcdn = get_param_l(&in_param); err = rem_res_range(dev, slave, xrcdn, 1, RES_XRCD, 0); if (err) return err; __mlx4_xrcd_free(dev, xrcdn); return err; } int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err = -EINVAL; int alop = vhcr->op_modifier; switch (vhcr->in_modifier & 0xFF) { case RES_QP: err = qp_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param); break; case RES_MTT: err = mtt_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_MPT: err = mpt_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param); break; case RES_CQ: err = cq_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_SRQ: err = srq_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); break; case RES_MAC: err = mac_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param, (vhcr->in_modifier >> 8) & 0xFF); break; case RES_VLAN: err = vlan_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param, (vhcr->in_modifier >> 8) & 0xFF); break; case RES_COUNTER: err = counter_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param, (vhcr->in_modifier >> 8) & 0xFF); break; case RES_XRCD: err = xrcdn_free_res(dev, slave, vhcr->op_modifier, alop, vhcr->in_param, &vhcr->out_param); default: break; } return err; } /* ugly but other choices are uglier */ static int mr_phys_mpt(struct mlx4_mpt_entry *mpt) { return (be32_to_cpu(mpt->flags) >> 9) & 1; } static int mr_get_mtt_addr(struct mlx4_mpt_entry *mpt) { return (int)be64_to_cpu(mpt->mtt_addr) & 0xfffffff8; } static int mr_get_mtt_size(struct mlx4_mpt_entry *mpt) { return be32_to_cpu(mpt->mtt_sz); } static u32 mr_get_pd(struct mlx4_mpt_entry *mpt) { return be32_to_cpu(mpt->pd_flags) & 0x00ffffff; } static int mr_is_fmr(struct mlx4_mpt_entry *mpt) { return be32_to_cpu(mpt->pd_flags) & MLX4_MPT_PD_FLAG_FAST_REG; } static int mr_is_bind_enabled(struct mlx4_mpt_entry *mpt) { return be32_to_cpu(mpt->flags) & MLX4_MPT_FLAG_BIND_ENABLE; } static int mr_is_region(struct mlx4_mpt_entry *mpt) { return be32_to_cpu(mpt->flags) & MLX4_MPT_FLAG_REGION; } static int qp_get_mtt_addr(struct mlx4_qp_context *qpc) { return be32_to_cpu(qpc->mtt_base_addr_l) & 0xfffffff8; } static int srq_get_mtt_addr(struct mlx4_srq_context *srqc) { return be32_to_cpu(srqc->mtt_base_addr_l) & 0xfffffff8; } static int qp_get_mtt_size(struct mlx4_qp_context *qpc) { int page_shift = (qpc->log_page_size & 0x3f) + 12; int log_sq_size = (qpc->sq_size_stride >> 3) & 0xf; int log_sq_sride = qpc->sq_size_stride & 7; int log_rq_size = (qpc->rq_size_stride >> 3) & 0xf; int log_rq_stride = qpc->rq_size_stride & 7; int srq = (be32_to_cpu(qpc->srqn) >> 24) & 1; int rss = (be32_to_cpu(qpc->flags) >> 13) & 1; u32 ts = (be32_to_cpu(qpc->flags) >> 16) & 0xff; int xrc = (ts == MLX4_QP_ST_XRC) ? 1 : 0; int sq_size; int rq_size; int total_pages; int total_mem; int page_offset = (be32_to_cpu(qpc->params2) >> 6) & 0x3f; sq_size = 1 << (log_sq_size + log_sq_sride + 4); rq_size = (srq|rss|xrc) ? 0 : (1 << (log_rq_size + log_rq_stride + 4)); total_mem = sq_size + rq_size; total_pages = roundup_pow_of_two((total_mem + (page_offset << 6)) >> page_shift); return total_pages; } static int check_mtt_range(struct mlx4_dev *dev, int slave, int start, int size, struct res_mtt *mtt) { int res_start = mtt->com.res_id; int res_size = (1 << mtt->order); if (start < res_start || start + size > res_start + res_size) return -EPERM; return 0; } int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int index = vhcr->in_modifier; struct res_mtt *mtt; struct res_mpt *mpt; int mtt_base = mr_get_mtt_addr(inbox->buf) / dev->caps.mtt_entry_sz; int phys; int id; u32 pd; int pd_slave; id = index & mpt_mask(dev); err = mr_res_start_move_to(dev, slave, id, RES_MPT_HW, &mpt); if (err) return err; /* Currently disable memory windows since this feature isn't tested yet * under virtualization. */ if (!mr_is_region(inbox->buf)) { err = -ENOSYS; goto ex_abort; } /* Make sure that the PD bits related to the slave id are zeros. */ pd = mr_get_pd(inbox->buf); pd_slave = (pd >> 17) & 0x7f; if (pd_slave != 0 && pd_slave != slave) { err = -EPERM; goto ex_abort; } if (mr_is_fmr(inbox->buf)) { /* FMR and Bind Enable are forbidden in slave devices. */ if (mr_is_bind_enabled(inbox->buf)) { err = -EPERM; goto ex_abort; } /* FMR and Memory Windows are also forbidden. */ if (!mr_is_region(inbox->buf)) { err = -EPERM; goto ex_abort; } } phys = mr_phys_mpt(inbox->buf); if (!phys) { err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); if (err) goto ex_abort; err = check_mtt_range(dev, slave, mtt_base, mr_get_mtt_size(inbox->buf), mtt); if (err) goto ex_put; mpt->mtt = mtt; } err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_put; if (!phys) { atomic_inc(&mtt->ref_count); put_res(dev, slave, mtt->com.res_id, RES_MTT); } res_end_move(dev, slave, RES_MPT, id); return 0; ex_put: if (!phys) put_res(dev, slave, mtt->com.res_id, RES_MTT); ex_abort: res_abort_move(dev, slave, RES_MPT, id); return err; } int mlx4_HW2SW_MPT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int index = vhcr->in_modifier; struct res_mpt *mpt; int id; id = index & mpt_mask(dev); err = mr_res_start_move_to(dev, slave, id, RES_MPT_MAPPED, &mpt); if (err) return err; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_abort; if (mpt->mtt) atomic_dec(&mpt->mtt->ref_count); res_end_move(dev, slave, RES_MPT, id); return 0; ex_abort: res_abort_move(dev, slave, RES_MPT, id); return err; } int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int index = vhcr->in_modifier; struct res_mpt *mpt; int id; id = index & mpt_mask(dev); err = get_res(dev, slave, id, RES_MPT, &mpt); if (err) return err; if (mpt->com.from_state != RES_MPT_HW) { err = -EBUSY; goto out; } err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); out: put_res(dev, slave, id, RES_MPT); return err; } static int qp_get_rcqn(struct mlx4_qp_context *qpc) { return be32_to_cpu(qpc->cqn_recv) & 0xffffff; } static int qp_get_scqn(struct mlx4_qp_context *qpc) { return be32_to_cpu(qpc->cqn_send) & 0xffffff; } static u32 qp_get_srqn(struct mlx4_qp_context *qpc) { return be32_to_cpu(qpc->srqn) & 0x1ffffff; } static void adjust_proxy_tun_qkey(struct mlx4_dev *dev, struct mlx4_vhcr *vhcr, struct mlx4_qp_context *context) { u32 qpn = vhcr->in_modifier & 0xffffff; u32 qkey = 0; if (mlx4_get_parav_qkey(dev, qpn, &qkey)) return; /* adjust qkey in qp context */ context->qkey = cpu_to_be32(qkey); } int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int qpn = vhcr->in_modifier & 0x7fffff; struct res_mtt *mtt; struct res_qp *qp; struct mlx4_qp_context *qpc = inbox->buf + 8; int mtt_base = qp_get_mtt_addr(qpc) / dev->caps.mtt_entry_sz; int mtt_size = qp_get_mtt_size(qpc); struct res_cq *rcq; struct res_cq *scq; int rcqn = qp_get_rcqn(qpc); int scqn = qp_get_scqn(qpc); u32 srqn = qp_get_srqn(qpc) & 0xffffff; int use_srq = (qp_get_srqn(qpc) >> 24) & 1; struct res_srq *srq; int local_qpn = be32_to_cpu(qpc->local_qpn) & 0xffffff; err = qp_res_start_move_to(dev, slave, qpn, RES_QP_HW, &qp, 0); if (err) return err; qp->local_qpn = local_qpn; qp->sched_queue = 0; qp->param3 = 0; qp->vlan_control = 0; qp->fvl_rx = 0; qp->pri_path_fl = 0; qp->vlan_index = 0; qp->feup = 0; qp->qpc_flags = be32_to_cpu(qpc->flags); err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); if (err) goto ex_abort; err = check_mtt_range(dev, slave, mtt_base, mtt_size, mtt); if (err) goto ex_put_mtt; err = get_res(dev, slave, rcqn, RES_CQ, &rcq); if (err) goto ex_put_mtt; if (scqn != rcqn) { err = get_res(dev, slave, scqn, RES_CQ, &scq); if (err) goto ex_put_rcq; } else scq = rcq; if (use_srq) { err = get_res(dev, slave, srqn, RES_SRQ, &srq); if (err) goto ex_put_scq; } adjust_proxy_tun_qkey(dev, vhcr, qpc); update_pkey_index(dev, slave, inbox); err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_put_srq; atomic_inc(&mtt->ref_count); qp->mtt = mtt; atomic_inc(&rcq->ref_count); qp->rcq = rcq; atomic_inc(&scq->ref_count); qp->scq = scq; if (scqn != rcqn) put_res(dev, slave, scqn, RES_CQ); if (use_srq) { atomic_inc(&srq->ref_count); put_res(dev, slave, srqn, RES_SRQ); qp->srq = srq; } put_res(dev, slave, rcqn, RES_CQ); put_res(dev, slave, mtt_base, RES_MTT); res_end_move(dev, slave, RES_QP, qpn); return 0; ex_put_srq: if (use_srq) put_res(dev, slave, srqn, RES_SRQ); ex_put_scq: if (scqn != rcqn) put_res(dev, slave, scqn, RES_CQ); ex_put_rcq: put_res(dev, slave, rcqn, RES_CQ); ex_put_mtt: put_res(dev, slave, mtt_base, RES_MTT); ex_abort: res_abort_move(dev, slave, RES_QP, qpn); return err; } static int eq_get_mtt_addr(struct mlx4_eq_context *eqc) { return be32_to_cpu(eqc->mtt_base_addr_l) & 0xfffffff8; } static int eq_get_mtt_size(struct mlx4_eq_context *eqc) { int log_eq_size = eqc->log_eq_size & 0x1f; int page_shift = (eqc->log_page_size & 0x3f) + 12; if (log_eq_size + 5 < page_shift) return 1; return 1 << (log_eq_size + 5 - page_shift); } static int cq_get_mtt_addr(struct mlx4_cq_context *cqc) { return be32_to_cpu(cqc->mtt_base_addr_l) & 0xfffffff8; } static int cq_get_mtt_size(struct mlx4_cq_context *cqc) { int log_cq_size = (be32_to_cpu(cqc->logsize_usrpage) >> 24) & 0x1f; int page_shift = (cqc->log_page_size & 0x3f) + 12; if (log_cq_size + 5 < page_shift) return 1; return 1 << (log_cq_size + 5 - page_shift); } int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int eqn = vhcr->in_modifier; int res_id = (slave << 8) | eqn; struct mlx4_eq_context *eqc = inbox->buf; int mtt_base = eq_get_mtt_addr(eqc) / dev->caps.mtt_entry_sz; int mtt_size = eq_get_mtt_size(eqc); struct res_eq *eq; struct res_mtt *mtt; err = add_res_range(dev, slave, res_id, 1, RES_EQ, 0); if (err) return err; err = eq_res_start_move_to(dev, slave, res_id, RES_EQ_HW, &eq); if (err) goto out_add; err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); if (err) goto out_move; err = check_mtt_range(dev, slave, mtt_base, mtt_size, mtt); if (err) goto out_put; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto out_put; atomic_inc(&mtt->ref_count); eq->mtt = mtt; put_res(dev, slave, mtt->com.res_id, RES_MTT); res_end_move(dev, slave, RES_EQ, res_id); return 0; out_put: put_res(dev, slave, mtt->com.res_id, RES_MTT); out_move: res_abort_move(dev, slave, RES_EQ, res_id); out_add: rem_res_range(dev, slave, res_id, 1, RES_EQ, 0); return err; } static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start, int len, struct res_mtt **res) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct res_mtt *mtt; int err = -EINVAL; spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry(mtt, &tracker->slave_list[slave].res_list[RES_MTT], com.list) { if (!check_mtt_range(dev, slave, start, len, mtt)) { *res = mtt; mtt->com.from_state = mtt->com.state; mtt->com.state = RES_MTT_BUSY; err = 0; break; } } spin_unlock_irq(mlx4_tlock(dev)); return err; } static int verify_qp_parameters(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, enum qp_transition transition, u8 slave) { u32 qp_type; struct mlx4_qp_context *qp_ctx; enum mlx4_qp_optpar optpar; int port; int num_gids; qp_ctx = inbox->buf + 8; qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; optpar = be32_to_cpu(*(__be32 *) inbox->buf); switch (qp_type) { case MLX4_QP_ST_RC: case MLX4_QP_ST_UC: switch (transition) { case QP_TRANS_INIT2RTR: case QP_TRANS_RTR2RTS: case QP_TRANS_RTS2RTS: case QP_TRANS_SQD2SQD: case QP_TRANS_SQD2RTS: if (slave != mlx4_master_func_num(dev)) if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) num_gids = mlx4_get_slave_num_gids(dev, slave); else num_gids = 1; if (qp_ctx->pri_path.mgid_index >= num_gids) return -EINVAL; } if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1; if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) num_gids = mlx4_get_slave_num_gids(dev, slave); else num_gids = 1; if (qp_ctx->alt_path.mgid_index >= num_gids) return -EINVAL; } break; default: break; } break; default: break; } return 0; } int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_mtt mtt; __be64 *page_list = inbox->buf; u64 *pg_list = (u64 *)page_list; int i; struct res_mtt *rmtt = NULL; int start = be64_to_cpu(page_list[0]); int npages = vhcr->in_modifier; int err; err = get_containing_mtt(dev, slave, start, npages, &rmtt); if (err) return err; /* Call the SW implementation of write_mtt: * - Prepare a dummy mtt struct - * - Translate inbox contents to simple addresses in host endianess */ + * - Translate inbox contents to simple addresses in host endianness */ mtt.offset = 0; /* TBD this is broken but I don't handle it since we don't really use it */ mtt.order = 0; mtt.page_shift = 0; for (i = 0; i < npages; ++i) pg_list[i + 2] = (be64_to_cpu(page_list[i + 2]) & ~1ULL); err = __mlx4_write_mtt(dev, &mtt, be64_to_cpu(page_list[0]), npages, ((u64 *)page_list + 2)); if (rmtt) put_res(dev, slave, rmtt->com.res_id, RES_MTT); return err; } int mlx4_HW2SW_EQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int eqn = vhcr->in_modifier; int res_id = eqn | (slave << 8); struct res_eq *eq; int err; err = eq_res_start_move_to(dev, slave, res_id, RES_EQ_RESERVED, &eq); if (err) return err; err = get_res(dev, slave, eq->mtt->com.res_id, RES_MTT, NULL); if (err) goto ex_abort; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_put; atomic_dec(&eq->mtt->ref_count); put_res(dev, slave, eq->mtt->com.res_id, RES_MTT); res_end_move(dev, slave, RES_EQ, res_id); rem_res_range(dev, slave, res_id, 1, RES_EQ, 0); return 0; ex_put: put_res(dev, slave, eq->mtt->com.res_id, RES_MTT); ex_abort: res_abort_move(dev, slave, RES_EQ, res_id); return err; } int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_event_eq_info *event_eq; struct mlx4_cmd_mailbox *mailbox; u32 in_modifier = 0; int err; int res_id; struct res_eq *req; if (!priv->mfunc.master.slave_state) return -EINVAL; /* check for slave valid, slave not PF, and slave active */ if (slave < 0 || slave >= dev->num_slaves || slave == dev->caps.function || !priv->mfunc.master.slave_state[slave].active) return 0; event_eq = &priv->mfunc.master.slave_state[slave].event_eq[eqe->type]; /* Create the event only if the slave is registered */ if (event_eq->eqn < 0) return 0; mutex_lock(&priv->mfunc.master.gen_eqe_mutex[slave]); res_id = (slave << 8) | event_eq->eqn; err = get_res(dev, slave, res_id, RES_EQ, &req); if (err) goto unlock; if (req->com.from_state != RES_EQ_HW) { err = -EINVAL; goto put; } mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { err = PTR_ERR(mailbox); goto put; } if (eqe->type == MLX4_EVENT_TYPE_CMD) { ++event_eq->token; eqe->event.cmd.token = cpu_to_be16(event_eq->token); } memcpy(mailbox->buf, (u8 *) eqe, 28); in_modifier = (slave & 0xff) | ((event_eq->eqn & 0xff) << 16); err = mlx4_cmd(dev, mailbox->dma, in_modifier, 0, MLX4_CMD_GEN_EQE, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); put_res(dev, slave, res_id, RES_EQ); mutex_unlock(&priv->mfunc.master.gen_eqe_mutex[slave]); mlx4_free_cmd_mailbox(dev, mailbox); return err; put: put_res(dev, slave, res_id, RES_EQ); unlock: mutex_unlock(&priv->mfunc.master.gen_eqe_mutex[slave]); return err; } int mlx4_QUERY_EQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int eqn = vhcr->in_modifier; int res_id = eqn | (slave << 8); struct res_eq *eq; int err; err = get_res(dev, slave, res_id, RES_EQ, &eq); if (err) return err; if (eq->com.from_state != RES_EQ_HW) { err = -EINVAL; goto ex_put; } err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); ex_put: put_res(dev, slave, res_id, RES_EQ); return err; } int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int cqn = vhcr->in_modifier; struct mlx4_cq_context *cqc = inbox->buf; int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz; struct res_cq *cq; struct res_mtt *mtt; err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_HW, &cq); if (err) return err; err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); if (err) goto out_move; err = check_mtt_range(dev, slave, mtt_base, cq_get_mtt_size(cqc), mtt); if (err) goto out_put; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto out_put; atomic_inc(&mtt->ref_count); cq->mtt = mtt; put_res(dev, slave, mtt->com.res_id, RES_MTT); res_end_move(dev, slave, RES_CQ, cqn); return 0; out_put: put_res(dev, slave, mtt->com.res_id, RES_MTT); out_move: res_abort_move(dev, slave, RES_CQ, cqn); return err; } int mlx4_HW2SW_CQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int cqn = vhcr->in_modifier; struct res_cq *cq; err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_ALLOCATED, &cq); if (err) return err; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto out_move; atomic_dec(&cq->mtt->ref_count); res_end_move(dev, slave, RES_CQ, cqn); return 0; out_move: res_abort_move(dev, slave, RES_CQ, cqn); return err; } int mlx4_QUERY_CQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int cqn = vhcr->in_modifier; struct res_cq *cq; int err; err = get_res(dev, slave, cqn, RES_CQ, &cq); if (err) return err; if (cq->com.from_state != RES_CQ_HW) goto ex_put; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); ex_put: put_res(dev, slave, cqn, RES_CQ); return err; } static int handle_resize(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd, struct res_cq *cq) { int err; struct res_mtt *orig_mtt; struct res_mtt *mtt; struct mlx4_cq_context *cqc = inbox->buf; int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz; err = get_res(dev, slave, cq->mtt->com.res_id, RES_MTT, &orig_mtt); if (err) return err; if (orig_mtt != cq->mtt) { err = -EINVAL; goto ex_put; } err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); if (err) goto ex_put; err = check_mtt_range(dev, slave, mtt_base, cq_get_mtt_size(cqc), mtt); if (err) goto ex_put1; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_put1; atomic_dec(&orig_mtt->ref_count); put_res(dev, slave, orig_mtt->com.res_id, RES_MTT); atomic_inc(&mtt->ref_count); cq->mtt = mtt; put_res(dev, slave, mtt->com.res_id, RES_MTT); return 0; ex_put1: put_res(dev, slave, mtt->com.res_id, RES_MTT); ex_put: put_res(dev, slave, orig_mtt->com.res_id, RES_MTT); return err; } int mlx4_MODIFY_CQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int cqn = vhcr->in_modifier; struct res_cq *cq; int err; err = get_res(dev, slave, cqn, RES_CQ, &cq); if (err) return err; if (cq->com.from_state != RES_CQ_HW) goto ex_put; if (vhcr->op_modifier == 0) { err = handle_resize(dev, slave, vhcr, inbox, outbox, cmd, cq); goto ex_put; } err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); ex_put: put_res(dev, slave, cqn, RES_CQ); return err; } static int srq_get_mtt_size(struct mlx4_srq_context *srqc) { int log_srq_size = (be32_to_cpu(srqc->state_logsize_srqn) >> 24) & 0xf; int log_rq_stride = srqc->logstride & 7; int page_shift = (srqc->log_page_size & 0x3f) + 12; if (log_srq_size + log_rq_stride + 4 < page_shift) return 1; return 1 << (log_srq_size + log_rq_stride + 4 - page_shift); } int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int srqn = vhcr->in_modifier; struct res_mtt *mtt; struct res_srq *srq; struct mlx4_srq_context *srqc = inbox->buf; int mtt_base = srq_get_mtt_addr(srqc) / dev->caps.mtt_entry_sz; if (srqn != (be32_to_cpu(srqc->state_logsize_srqn) & 0xffffff)) return -EINVAL; err = srq_res_start_move_to(dev, slave, srqn, RES_SRQ_HW, &srq); if (err) return err; err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); if (err) goto ex_abort; err = check_mtt_range(dev, slave, mtt_base, srq_get_mtt_size(srqc), mtt); if (err) goto ex_put_mtt; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_put_mtt; atomic_inc(&mtt->ref_count); srq->mtt = mtt; put_res(dev, slave, mtt->com.res_id, RES_MTT); res_end_move(dev, slave, RES_SRQ, srqn); return 0; ex_put_mtt: put_res(dev, slave, mtt->com.res_id, RES_MTT); ex_abort: res_abort_move(dev, slave, RES_SRQ, srqn); return err; } int mlx4_HW2SW_SRQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int srqn = vhcr->in_modifier; struct res_srq *srq; err = srq_res_start_move_to(dev, slave, srqn, RES_SRQ_ALLOCATED, &srq); if (err) return err; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_abort; atomic_dec(&srq->mtt->ref_count); if (srq->cq) atomic_dec(&srq->cq->ref_count); res_end_move(dev, slave, RES_SRQ, srqn); return 0; ex_abort: res_abort_move(dev, slave, RES_SRQ, srqn); return err; } int mlx4_QUERY_SRQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int srqn = vhcr->in_modifier; struct res_srq *srq; err = get_res(dev, slave, srqn, RES_SRQ, &srq); if (err) return err; if (srq->com.from_state != RES_SRQ_HW) { err = -EBUSY; goto out; } err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); out: put_res(dev, slave, srqn, RES_SRQ); return err; } int mlx4_ARM_SRQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int srqn = vhcr->in_modifier; struct res_srq *srq; err = get_res(dev, slave, srqn, RES_SRQ, &srq); if (err) return err; if (srq->com.from_state != RES_SRQ_HW) { err = -EBUSY; goto out; } err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); out: put_res(dev, slave, srqn, RES_SRQ); return err; } int mlx4_GEN_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int qpn = vhcr->in_modifier & 0x7fffff; struct res_qp *qp; err = get_res(dev, slave, qpn, RES_QP, &qp); if (err) return err; if (qp->com.from_state != RES_QP_HW) { err = -EBUSY; goto out; } err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); out: put_res(dev, slave, qpn, RES_QP); return err; } int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_qp_context *context = inbox->buf + 8; adjust_proxy_tun_qkey(dev, vhcr, context); update_pkey_index(dev, slave, inbox); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } static int roce_verify_mac(struct mlx4_dev *dev, int slave, struct mlx4_qp_context *qpc, struct mlx4_cmd_mailbox *inbox) { u64 mac; int port; u32 ts = (be32_to_cpu(qpc->flags) >> 16) & 0xff; u8 sched = *(u8 *)(inbox->buf + 64); u8 smac_ix; port = (sched >> 6 & 1) + 1; if (mlx4_is_eth(dev, port) && (ts != MLX4_QP_ST_MLX)) { smac_ix = qpc->pri_path.grh_mylmc & 0x7f; if (mac_find_smac_ix_in_slave(dev, slave, port, smac_ix, &mac)) return -ENOENT; } return 0; } int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; struct mlx4_qp_context *qpc = inbox->buf + 8; int qpn = vhcr->in_modifier & 0x7fffff; struct res_qp *qp; u8 orig_sched_queue; __be32 orig_param3 = qpc->param3; u8 orig_vlan_control = qpc->pri_path.vlan_control; u8 orig_fvl_rx = qpc->pri_path.fvl_rx; u8 orig_pri_path_fl = qpc->pri_path.fl; u8 orig_vlan_index = qpc->pri_path.vlan_index; u8 orig_feup = qpc->pri_path.feup; err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave); if (err) return err; if (roce_verify_mac(dev, slave, qpc, inbox)) return -EINVAL; update_pkey_index(dev, slave, inbox); update_gid(dev, inbox, (u8)slave); adjust_proxy_tun_qkey(dev, vhcr, qpc); orig_sched_queue = qpc->pri_path.sched_queue; err = get_res(dev, slave, qpn, RES_QP, &qp); if (err) return err; if (qp->com.from_state != RES_QP_HW) { err = -EBUSY; goto out; } /* do not modify vport QP params for RSS QPs */ if (!(qp->qpc_flags & (1 << MLX4_RSS_QPC_FLAG_OFFSET))) { err = update_vport_qp_param(dev, inbox, slave, qpn); if (err) goto out; } err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); out: /* if no error, save sched queue value passed in by VF. This is * essentially the QOS value provided by the VF. This will be useful * if we allow dynamic changes from VST back to VGT */ if (!err) { qp->sched_queue = orig_sched_queue; qp->param3 = orig_param3; qp->vlan_control = orig_vlan_control; qp->fvl_rx = orig_fvl_rx; qp->pri_path_fl = orig_pri_path_fl; qp->vlan_index = orig_vlan_index; qp->feup = orig_feup; } put_res(dev, slave, qpn, RES_QP); return err; } int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; struct mlx4_qp_context *context = inbox->buf + 8; err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave); if (err) return err; update_pkey_index(dev, slave, inbox); update_gid(dev, inbox, (u8)slave); adjust_proxy_tun_qkey(dev, vhcr, context); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; struct mlx4_qp_context *context = inbox->buf + 8; err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave); if (err) return err; update_pkey_index(dev, slave, inbox); update_gid(dev, inbox, (u8)slave); adjust_proxy_tun_qkey(dev, vhcr, context); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_qp_context *context = inbox->buf + 8; adjust_proxy_tun_qkey(dev, vhcr, context); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; struct mlx4_qp_context *context = inbox->buf + 8; err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave); if (err) return err; adjust_proxy_tun_qkey(dev, vhcr, context); update_gid(dev, inbox, (u8)slave); update_pkey_index(dev, slave, inbox); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; struct mlx4_qp_context *context = inbox->buf + 8; err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave); if (err) return err; adjust_proxy_tun_qkey(dev, vhcr, context); update_gid(dev, inbox, (u8)slave); update_pkey_index(dev, slave, inbox); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; int qpn = vhcr->in_modifier & 0x7fffff; struct res_qp *qp; err = qp_res_start_move_to(dev, slave, qpn, RES_QP_MAPPED, &qp, 0); if (err) return err; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_abort; atomic_dec(&qp->mtt->ref_count); atomic_dec(&qp->rcq->ref_count); atomic_dec(&qp->scq->ref_count); if (qp->srq) atomic_dec(&qp->srq->ref_count); res_end_move(dev, slave, RES_QP, qpn); return 0; ex_abort: res_abort_move(dev, slave, RES_QP, qpn); return err; } static struct res_gid *find_gid(struct mlx4_dev *dev, int slave, struct res_qp *rqp, u8 *gid) { struct res_gid *res; list_for_each_entry(res, &rqp->mcg_list, list) { if (!memcmp(res->gid, gid, 16)) return res; } return NULL; } static int add_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, u8 *gid, enum mlx4_protocol prot, enum mlx4_steer_type steer, u64 reg_id) { struct res_gid *res; int err; res = kzalloc(sizeof *res, GFP_KERNEL); if (!res) return -ENOMEM; spin_lock_irq(&rqp->mcg_spl); if (find_gid(dev, slave, rqp, gid)) { kfree(res); err = -EEXIST; } else { memcpy(res->gid, gid, 16); res->prot = prot; res->steer = steer; res->reg_id = reg_id; list_add_tail(&res->list, &rqp->mcg_list); err = 0; } spin_unlock_irq(&rqp->mcg_spl); return err; } static int rem_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, u8 *gid, enum mlx4_protocol prot, enum mlx4_steer_type steer, u64 *reg_id) { struct res_gid *res; int err; spin_lock_irq(&rqp->mcg_spl); res = find_gid(dev, slave, rqp, gid); if (!res || res->prot != prot || res->steer != steer) err = -EINVAL; else { *reg_id = res->reg_id; list_del(&res->list); kfree(res); err = 0; } spin_unlock_irq(&rqp->mcg_spl); return err; } static int qp_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_loopback, enum mlx4_protocol prot, enum mlx4_steer_type type, u64 *reg_id) { switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: return mlx4_trans_to_dmfs_attach(dev, qp, gid, gid[5], block_loopback, prot, reg_id); case MLX4_STEERING_MODE_B0: return mlx4_qp_attach_common(dev, qp, gid, block_loopback, prot, type); default: return -EINVAL; } } static int qp_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot, enum mlx4_steer_type type, u64 reg_id) { switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: return mlx4_flow_detach(dev, reg_id); case MLX4_STEERING_MODE_B0: return mlx4_qp_detach_common(dev, qp, gid, prot, type); default: return -EINVAL; } } int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_qp qp; /* dummy for calling attach/detach */ u8 *gid = inbox->buf; enum mlx4_protocol prot = (vhcr->in_modifier >> 28) & 0x7; int err; int qpn; struct res_qp *rqp; u64 reg_id = 0; int attach = vhcr->op_modifier; int block_loopback = vhcr->in_modifier >> 31; u8 steer_type_mask = 2; enum mlx4_steer_type type = (gid[7] & steer_type_mask) >> 1; qpn = vhcr->in_modifier & 0xffffff; err = get_res(dev, slave, qpn, RES_QP, &rqp); if (err) return err; qp.qpn = qpn; if (attach) { err = qp_attach(dev, &qp, gid, block_loopback, prot, type, ®_id); if (err) { pr_err("Fail to attach rule to qp 0x%x\n", qpn); goto ex_put; } err = add_mcg_res(dev, slave, rqp, gid, prot, type, reg_id); if (err) goto ex_detach; } else { err = rem_mcg_res(dev, slave, rqp, gid, prot, type, ®_id); if (err) goto ex_put; err = qp_detach(dev, &qp, gid, prot, type, reg_id); if (err) pr_err("Fail to detach rule from qp 0x%x reg_id = 0x%llx\n", qpn, (unsigned long long)reg_id); } put_res(dev, slave, qpn, RES_QP); return err; ex_detach: qp_detach(dev, &qp, gid, prot, type, reg_id); ex_put: put_res(dev, slave, qpn, RES_QP); return err; } /* * MAC validation for Flow Steering rules. * VF can attach rules only with a mac address which is assigned to it. */ static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header, struct list_head *rlist) { struct mac_res *res, *tmp; __be64 be_mac; /* make sure it isn't multicast or broadcast mac*/ if (!is_multicast_ether_addr(eth_header->eth.dst_mac) && !is_broadcast_ether_addr(eth_header->eth.dst_mac)) { list_for_each_entry_safe(res, tmp, rlist, list) { be_mac = cpu_to_be64(res->mac << 16); if (!memcmp(&be_mac, eth_header->eth.dst_mac, ETH_ALEN)) return 0; } pr_err("MAC %pM doesn't belong to VF %d, Steering rule rejected\n", eth_header->eth.dst_mac, slave); return -EINVAL; } return 0; } /* * In case of missing eth header, append eth header with a MAC address * assigned to the VF. */ static int add_eth_header(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox, struct list_head *rlist, int header_id) { struct mac_res *res, *tmp; u8 port; struct mlx4_net_trans_rule_hw_ctrl *ctrl; struct mlx4_net_trans_rule_hw_eth *eth_header; struct mlx4_net_trans_rule_hw_ipv4 *ip_header; struct mlx4_net_trans_rule_hw_tcp_udp *l4_header; __be64 be_mac = 0; __be64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16); ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf; port = ctrl->port; eth_header = (struct mlx4_net_trans_rule_hw_eth *)(ctrl + 1); /* Clear a space in the inbox for eth header */ switch (header_id) { case MLX4_NET_TRANS_RULE_ID_IPV4: ip_header = (struct mlx4_net_trans_rule_hw_ipv4 *)(eth_header + 1); memmove(ip_header, eth_header, sizeof(*ip_header) + sizeof(*l4_header)); break; case MLX4_NET_TRANS_RULE_ID_TCP: case MLX4_NET_TRANS_RULE_ID_UDP: l4_header = (struct mlx4_net_trans_rule_hw_tcp_udp *) (eth_header + 1); memmove(l4_header, eth_header, sizeof(*l4_header)); break; default: return -EINVAL; } list_for_each_entry_safe(res, tmp, rlist, list) { if (port == res->port) { be_mac = cpu_to_be64(res->mac << 16); break; } } if (!be_mac) { pr_err("Failed adding eth header to FS rule, Can't find matching MAC for port %d .\n", port); return -EINVAL; } memset(eth_header, 0, sizeof(*eth_header)); eth_header->size = sizeof(*eth_header) >> 2; eth_header->id = cpu_to_be16(__sw_id_hw[MLX4_NET_TRANS_RULE_ID_ETH]); memcpy(eth_header->dst_mac, &be_mac, ETH_ALEN); memcpy(eth_header->dst_mac_msk, &mac_msk, ETH_ALEN); return 0; } int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *rlist = &tracker->slave_list[slave].res_list[RES_MAC]; int err; int qpn; struct res_qp *rqp; struct mlx4_net_trans_rule_hw_ctrl *ctrl; struct _rule_hw *rule_header; int header_id; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) return -EOPNOTSUPP; ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf; qpn = be32_to_cpu(ctrl->qpn) & 0xffffff; err = get_res(dev, slave, qpn, RES_QP, &rqp); if (err) { pr_err("Steering rule with qpn 0x%x rejected.\n", qpn); return err; } rule_header = (struct _rule_hw *)(ctrl + 1); header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id)); switch (header_id) { case MLX4_NET_TRANS_RULE_ID_ETH: if (validate_eth_header_mac(slave, rule_header, rlist)) { err = -EINVAL; goto err_put; } break; case MLX4_NET_TRANS_RULE_ID_IB: break; case MLX4_NET_TRANS_RULE_ID_IPV4: case MLX4_NET_TRANS_RULE_ID_TCP: case MLX4_NET_TRANS_RULE_ID_UDP: pr_warn("Can't attach FS rule without L2 headers, adding L2 header.\n"); if (add_eth_header(dev, slave, inbox, rlist, header_id)) { err = -EINVAL; goto err_put; } vhcr->in_modifier += sizeof(struct mlx4_net_trans_rule_hw_eth) >> 2; break; default: pr_err("Corrupted mailbox.\n"); err = -EINVAL; goto err_put; } err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, vhcr->in_modifier, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) goto err_put; err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, qpn); if (err) { mlx4_err(dev, "Fail to add flow steering resources.\n "); /* detach rule*/ mlx4_cmd(dev, vhcr->out_param, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); goto err_put; } atomic_inc(&rqp->ref_count); err_put: put_res(dev, slave, qpn, RES_QP); return err; } int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; struct res_qp *rqp; struct res_fs_rule *rrule; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) return -EOPNOTSUPP; err = get_res(dev, slave, vhcr->in_param, RES_FS_RULE, &rrule); if (err) return err; /* Release the rule form busy state before removal */ put_res(dev, slave, vhcr->in_param, RES_FS_RULE); err = get_res(dev, slave, rrule->qpn, RES_QP, &rqp); if (err) return err; err = mlx4_cmd(dev, vhcr->in_param, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (!err) { err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE, 0); atomic_dec(&rqp->ref_count); if (err) { mlx4_err(dev, "Fail to remove flow steering resources.\n "); goto out; } } out: put_res(dev, slave, rrule->qpn, RES_QP); return err; } enum { BUSY_MAX_RETRIES = 10 }; int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { int err; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); return err; } static void detach_qp(struct mlx4_dev *dev, int slave, struct res_qp *rqp) { struct res_gid *rgid; struct res_gid *tmp; struct mlx4_qp qp; /* dummy for calling attach/detach */ list_for_each_entry_safe(rgid, tmp, &rqp->mcg_list, list) { switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: mlx4_flow_detach(dev, rgid->reg_id); break; case MLX4_STEERING_MODE_B0: qp.qpn = rqp->local_qpn; (void) mlx4_qp_detach_common(dev, &qp, rgid->gid, rgid->prot, rgid->steer); break; } list_del(&rgid->list); kfree(rgid); } } static int _move_all_busy(struct mlx4_dev *dev, int slave, enum mlx4_resource type, int print) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *rlist = &tracker->slave_list[slave].res_list[type]; struct res_common *r; struct res_common *tmp; int busy; busy = 0; spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(r, tmp, rlist, list) { if (r->owner == slave) { if (!r->removing) { if (r->state == RES_ANY_BUSY) { if (print) mlx4_dbg(dev, "%s id 0x%llx is busy\n", ResourceType(type), (unsigned long long)r->res_id); ++busy; } else { r->from_state = r->state; r->state = RES_ANY_BUSY; r->removing = 1; } } } } spin_unlock_irq(mlx4_tlock(dev)); return busy; } static int move_all_busy(struct mlx4_dev *dev, int slave, enum mlx4_resource type) { unsigned long begin; int busy; begin = jiffies; do { busy = _move_all_busy(dev, slave, type, 0); if (time_after(jiffies, begin + 5 * HZ)) break; if (busy) cond_resched(); } while (busy); if (busy) busy = _move_all_busy(dev, slave, type, 1); return busy; } static void rem_slave_qps(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *qp_list = &tracker->slave_list[slave].res_list[RES_QP]; struct res_qp *qp; struct res_qp *tmp; int state; u64 in_param; int qpn; int err; err = move_all_busy(dev, slave, RES_QP); if (err) mlx4_warn(dev, "rem_slave_qps: Could not move all qps to busy" "for slave %d\n", slave); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(qp, tmp, qp_list, com.list) { spin_unlock_irq(mlx4_tlock(dev)); if (qp->com.owner == slave) { qpn = qp->com.res_id; detach_qp(dev, slave, qp); state = qp->com.from_state; while (state != 0) { switch (state) { case RES_QP_RESERVED: spin_lock_irq(mlx4_tlock(dev)); rb_erase(&qp->com.node, &tracker->res_tree[RES_QP]); list_del(&qp->com.list); spin_unlock_irq(mlx4_tlock(dev)); if (!valid_reserved(dev, slave, qpn)) { __mlx4_qp_release_range(dev, qpn, 1); mlx4_release_resource(dev, slave, RES_QP, 1, 0); } kfree(qp); state = 0; break; case RES_QP_MAPPED: if (!valid_reserved(dev, slave, qpn)) __mlx4_qp_free_icm(dev, qpn); state = RES_QP_RESERVED; break; case RES_QP_HW: in_param = slave; err = mlx4_cmd(dev, in_param, qp->local_qpn, 2, MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) mlx4_dbg(dev, "rem_slave_qps: failed" " to move slave %d qpn %d to" " reset\n", slave, qp->local_qpn); atomic_dec(&qp->rcq->ref_count); atomic_dec(&qp->scq->ref_count); atomic_dec(&qp->mtt->ref_count); if (qp->srq) atomic_dec(&qp->srq->ref_count); state = RES_QP_MAPPED; break; default: state = 0; } } } spin_lock_irq(mlx4_tlock(dev)); } spin_unlock_irq(mlx4_tlock(dev)); } static void rem_slave_srqs(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *srq_list = &tracker->slave_list[slave].res_list[RES_SRQ]; struct res_srq *srq; struct res_srq *tmp; int state; u64 in_param; LIST_HEAD(tlist); int srqn; int err; err = move_all_busy(dev, slave, RES_SRQ); if (err) mlx4_warn(dev, "rem_slave_srqs: Could not move all srqs to " "busy for slave %d\n", slave); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(srq, tmp, srq_list, com.list) { spin_unlock_irq(mlx4_tlock(dev)); if (srq->com.owner == slave) { srqn = srq->com.res_id; state = srq->com.from_state; while (state != 0) { switch (state) { case RES_SRQ_ALLOCATED: __mlx4_srq_free_icm(dev, srqn); spin_lock_irq(mlx4_tlock(dev)); rb_erase(&srq->com.node, &tracker->res_tree[RES_SRQ]); list_del(&srq->com.list); spin_unlock_irq(mlx4_tlock(dev)); mlx4_release_resource(dev, slave, RES_SRQ, 1, 0); kfree(srq); state = 0; break; case RES_SRQ_HW: in_param = slave; err = mlx4_cmd(dev, in_param, srqn, 1, MLX4_CMD_HW2SW_SRQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) mlx4_dbg(dev, "rem_slave_srqs: failed" " to move slave %d srq %d to" " SW ownership\n", slave, srqn); atomic_dec(&srq->mtt->ref_count); if (srq->cq) atomic_dec(&srq->cq->ref_count); state = RES_SRQ_ALLOCATED; break; default: state = 0; } } } spin_lock_irq(mlx4_tlock(dev)); } spin_unlock_irq(mlx4_tlock(dev)); } static void rem_slave_cqs(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *cq_list = &tracker->slave_list[slave].res_list[RES_CQ]; struct res_cq *cq; struct res_cq *tmp; int state; u64 in_param; LIST_HEAD(tlist); int cqn; int err; err = move_all_busy(dev, slave, RES_CQ); if (err) mlx4_warn(dev, "rem_slave_cqs: Could not move all cqs to " "busy for slave %d\n", slave); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(cq, tmp, cq_list, com.list) { spin_unlock_irq(mlx4_tlock(dev)); if (cq->com.owner == slave && !atomic_read(&cq->ref_count)) { cqn = cq->com.res_id; state = cq->com.from_state; while (state != 0) { switch (state) { case RES_CQ_ALLOCATED: __mlx4_cq_free_icm(dev, cqn); spin_lock_irq(mlx4_tlock(dev)); rb_erase(&cq->com.node, &tracker->res_tree[RES_CQ]); list_del(&cq->com.list); spin_unlock_irq(mlx4_tlock(dev)); mlx4_release_resource(dev, slave, RES_CQ, 1, 0); kfree(cq); state = 0; break; case RES_CQ_HW: in_param = slave; err = mlx4_cmd(dev, in_param, cqn, 1, MLX4_CMD_HW2SW_CQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) mlx4_dbg(dev, "rem_slave_cqs: failed" " to move slave %d cq %d to" " SW ownership\n", slave, cqn); atomic_dec(&cq->mtt->ref_count); state = RES_CQ_ALLOCATED; break; default: state = 0; } } } spin_lock_irq(mlx4_tlock(dev)); } spin_unlock_irq(mlx4_tlock(dev)); } static void rem_slave_mrs(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *mpt_list = &tracker->slave_list[slave].res_list[RES_MPT]; struct res_mpt *mpt; struct res_mpt *tmp; int state; u64 in_param; LIST_HEAD(tlist); int mptn; int err; err = move_all_busy(dev, slave, RES_MPT); if (err) mlx4_warn(dev, "rem_slave_mrs: Could not move all mpts to " "busy for slave %d\n", slave); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(mpt, tmp, mpt_list, com.list) { spin_unlock_irq(mlx4_tlock(dev)); if (mpt->com.owner == slave) { mptn = mpt->com.res_id; state = mpt->com.from_state; while (state != 0) { switch (state) { case RES_MPT_RESERVED: __mlx4_mpt_release(dev, mpt->key); spin_lock_irq(mlx4_tlock(dev)); rb_erase(&mpt->com.node, &tracker->res_tree[RES_MPT]); list_del(&mpt->com.list); spin_unlock_irq(mlx4_tlock(dev)); mlx4_release_resource(dev, slave, RES_MPT, 1, 0); kfree(mpt); state = 0; break; case RES_MPT_MAPPED: __mlx4_mpt_free_icm(dev, mpt->key); state = RES_MPT_RESERVED; break; case RES_MPT_HW: in_param = slave; err = mlx4_cmd(dev, in_param, mptn, 0, MLX4_CMD_HW2SW_MPT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) mlx4_dbg(dev, "rem_slave_mrs: failed" " to move slave %d mpt %d to" " SW ownership\n", slave, mptn); if (mpt->mtt) atomic_dec(&mpt->mtt->ref_count); state = RES_MPT_MAPPED; break; default: state = 0; } } } spin_lock_irq(mlx4_tlock(dev)); } spin_unlock_irq(mlx4_tlock(dev)); } static void rem_slave_mtts(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *mtt_list = &tracker->slave_list[slave].res_list[RES_MTT]; struct res_mtt *mtt; struct res_mtt *tmp; int state; LIST_HEAD(tlist); int base; int err; err = move_all_busy(dev, slave, RES_MTT); if (err) mlx4_warn(dev, "rem_slave_mtts: Could not move all mtts to " "busy for slave %d\n", slave); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(mtt, tmp, mtt_list, com.list) { spin_unlock_irq(mlx4_tlock(dev)); if (mtt->com.owner == slave) { base = mtt->com.res_id; state = mtt->com.from_state; while (state != 0) { switch (state) { case RES_MTT_ALLOCATED: __mlx4_free_mtt_range(dev, base, mtt->order); spin_lock_irq(mlx4_tlock(dev)); rb_erase(&mtt->com.node, &tracker->res_tree[RES_MTT]); list_del(&mtt->com.list); spin_unlock_irq(mlx4_tlock(dev)); mlx4_release_resource(dev, slave, RES_MTT, 1 << mtt->order, 0); kfree(mtt); state = 0; break; default: state = 0; } } } spin_lock_irq(mlx4_tlock(dev)); } spin_unlock_irq(mlx4_tlock(dev)); } static void rem_slave_fs_rule(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *fs_rule_list = &tracker->slave_list[slave].res_list[RES_FS_RULE]; struct res_fs_rule *fs_rule; struct res_fs_rule *tmp; int state; u64 base; int err; err = move_all_busy(dev, slave, RES_FS_RULE); if (err) mlx4_warn(dev, "rem_slave_fs_rule: Could not move all mtts to busy for slave %d\n", slave); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(fs_rule, tmp, fs_rule_list, com.list) { spin_unlock_irq(mlx4_tlock(dev)); if (fs_rule->com.owner == slave) { base = fs_rule->com.res_id; state = fs_rule->com.from_state; while (state != 0) { switch (state) { case RES_FS_RULE_ALLOCATED: /* detach rule */ err = mlx4_cmd(dev, base, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); spin_lock_irq(mlx4_tlock(dev)); rb_erase(&fs_rule->com.node, &tracker->res_tree[RES_FS_RULE]); list_del(&fs_rule->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(fs_rule); state = 0; break; default: state = 0; } } } spin_lock_irq(mlx4_tlock(dev)); } spin_unlock_irq(mlx4_tlock(dev)); } static void rem_slave_eqs(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *eq_list = &tracker->slave_list[slave].res_list[RES_EQ]; struct res_eq *eq; struct res_eq *tmp; int err; int state; LIST_HEAD(tlist); int eqn; struct mlx4_cmd_mailbox *mailbox; err = move_all_busy(dev, slave, RES_EQ); if (err) mlx4_warn(dev, "rem_slave_eqs: Could not move all eqs to " "busy for slave %d\n", slave); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(eq, tmp, eq_list, com.list) { spin_unlock_irq(mlx4_tlock(dev)); if (eq->com.owner == slave) { eqn = eq->com.res_id; state = eq->com.from_state; while (state != 0) { switch (state) { case RES_EQ_RESERVED: spin_lock_irq(mlx4_tlock(dev)); rb_erase(&eq->com.node, &tracker->res_tree[RES_EQ]); list_del(&eq->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(eq); state = 0; break; case RES_EQ_HW: mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { cond_resched(); continue; } err = mlx4_cmd_box(dev, slave, 0, eqn & 0xff, 0, MLX4_CMD_HW2SW_EQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) mlx4_dbg(dev, "rem_slave_eqs: failed" " to move slave %d eqs %d to" " SW ownership\n", slave, eqn); mlx4_free_cmd_mailbox(dev, mailbox); atomic_dec(&eq->mtt->ref_count); state = RES_EQ_RESERVED; break; default: state = 0; } } } spin_lock_irq(mlx4_tlock(dev)); } spin_unlock_irq(mlx4_tlock(dev)); } static void rem_slave_counters(struct mlx4_dev *dev, int slave) { __mlx4_slave_counters_free(dev, slave); } static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *xrcdn_list = &tracker->slave_list[slave].res_list[RES_XRCD]; struct res_xrcdn *xrcd; struct res_xrcdn *tmp; int err; int xrcdn; err = move_all_busy(dev, slave, RES_XRCD); if (err) mlx4_warn(dev, "rem_slave_xrcdns: Could not move all xrcdns to " "busy for slave %d\n", slave); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(xrcd, tmp, xrcdn_list, com.list) { if (xrcd->com.owner == slave) { xrcdn = xrcd->com.res_id; rb_erase(&xrcd->com.node, &tracker->res_tree[RES_XRCD]); list_del(&xrcd->com.list); kfree(xrcd); __mlx4_xrcd_free(dev, xrcdn); } } spin_unlock_irq(mlx4_tlock(dev)); } void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); mutex_lock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex); rem_slave_macs(dev, slave); rem_slave_vlans(dev, slave); rem_slave_fs_rule(dev, slave); rem_slave_qps(dev, slave); rem_slave_srqs(dev, slave); rem_slave_cqs(dev, slave); rem_slave_mrs(dev, slave); rem_slave_eqs(dev, slave); rem_slave_mtts(dev, slave); rem_slave_counters(dev, slave); rem_slave_xrcdns(dev, slave); mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex); } void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) { struct mlx4_vf_immed_vlan_work *work = container_of(_work, struct mlx4_vf_immed_vlan_work, work); struct mlx4_cmd_mailbox *mailbox; struct mlx4_update_qp_context *upd_context; struct mlx4_dev *dev = &work->priv->dev; struct mlx4_resource_tracker *tracker = &work->priv->mfunc.master.res_tracker; struct list_head *qp_list = &tracker->slave_list[work->slave].res_list[RES_QP]; struct res_qp *qp; struct res_qp *tmp; u64 qp_path_mask_vlan_ctrl = ((1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_UNTAGGED) | (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_1P) | (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_TAGGED) | (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_UNTAGGED) | (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_1P) | (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_TAGGED)); u64 qp_path_mask = ((1ULL << MLX4_UPD_QP_PATH_MASK_VLAN_INDEX) | (1ULL << MLX4_UPD_QP_PATH_MASK_FVL) | (1ULL << MLX4_UPD_QP_PATH_MASK_CV) | (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_HIDE_CQE_VLAN) | (1ULL << MLX4_UPD_QP_PATH_MASK_FEUP) | (1ULL << MLX4_UPD_QP_PATH_MASK_FVL_RX) | (1ULL << MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE)); int err; int port, errors = 0; u8 vlan_control; if (mlx4_is_slave(dev)) { mlx4_warn(dev, "Trying to update-qp in slave %d\n", work->slave); goto out; } mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) goto out; if (!work->vlan_id) vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; else vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; upd_context = mailbox->buf; upd_context->qp_mask = cpu_to_be64(MLX4_UPD_QP_MASK_VSD); spin_lock_irq(mlx4_tlock(dev)); list_for_each_entry_safe(qp, tmp, qp_list, com.list) { spin_unlock_irq(mlx4_tlock(dev)); if (qp->com.owner == work->slave) { if (qp->com.from_state != RES_QP_HW || !qp->sched_queue || /* no INIT2RTR trans yet */ mlx4_is_qp_reserved(dev, qp->local_qpn) || qp->qpc_flags & (1 << MLX4_RSS_QPC_FLAG_OFFSET)) { spin_lock_irq(mlx4_tlock(dev)); continue; } port = (qp->sched_queue >> 6 & 1) + 1; if (port != work->port) { spin_lock_irq(mlx4_tlock(dev)); continue; } if (MLX4_QP_ST_RC == ((qp->qpc_flags >> 16) & 0xff)) upd_context->primary_addr_path_mask = cpu_to_be64(qp_path_mask); else upd_context->primary_addr_path_mask = cpu_to_be64(qp_path_mask | qp_path_mask_vlan_ctrl); if (work->vlan_id == MLX4_VGT) { upd_context->qp_context.param3 = qp->param3; upd_context->qp_context.pri_path.vlan_control = qp->vlan_control; upd_context->qp_context.pri_path.fvl_rx = qp->fvl_rx; upd_context->qp_context.pri_path.vlan_index = qp->vlan_index; upd_context->qp_context.pri_path.fl = qp->pri_path_fl; upd_context->qp_context.pri_path.feup = qp->feup; upd_context->qp_context.pri_path.sched_queue = qp->sched_queue; } else { upd_context->qp_context.param3 = qp->param3 & ~cpu_to_be32(MLX4_STRIP_VLAN); upd_context->qp_context.pri_path.vlan_control = vlan_control; upd_context->qp_context.pri_path.vlan_index = work->vlan_ix; upd_context->qp_context.pri_path.fvl_rx = qp->fvl_rx | MLX4_FVL_RX_FORCE_ETH_VLAN; upd_context->qp_context.pri_path.fl = qp->pri_path_fl | MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN; upd_context->qp_context.pri_path.feup = qp->feup | MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN; upd_context->qp_context.pri_path.sched_queue = qp->sched_queue & 0xC7; upd_context->qp_context.pri_path.sched_queue |= ((work->qos & 0x7) << 3); } err = mlx4_cmd(dev, mailbox->dma, qp->local_qpn & 0xffffff, 0, MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (err) { mlx4_info(dev, "UPDATE_QP failed for slave %d, " "port %d, qpn %d (%d)\n", work->slave, port, qp->local_qpn, err); errors++; } } spin_lock_irq(mlx4_tlock(dev)); } spin_unlock_irq(mlx4_tlock(dev)); mlx4_free_cmd_mailbox(dev, mailbox); if (errors) mlx4_err(dev, "%d UPDATE_QP failures for slave %d, port %d\n", errors, work->slave, work->port); /* unregister previous vlan_id if needed and we had no errors * while updating the QPs */ if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_VLAN && !errors && NO_INDX != work->orig_vlan_ix) __mlx4_unregister_vlan(&work->priv->dev, work->port, work->orig_vlan_id); out: kfree(work); return; }