Index: stable/9/sys/ofed/drivers/infiniband/hw/mlx4/main.c =================================================================== --- stable/9/sys/ofed/drivers/infiniband/hw/mlx4/main.c (revision 277139) +++ stable/9/sys/ofed/drivers/infiniband/hw/mlx4/main.c (revision 277140) @@ -1,2417 +1,2421 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #ifdef __linux__ #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mlx4_ib.h" #include "user.h" #include "wc.h" #define DRV_NAME MLX4_IB_DRV_NAME #define DRV_VERSION "1.0" #define DRV_RELDATE "April 4, 2008" #define MLX4_IB_DRIVER_PROC_DIR_NAME "driver/mlx4_ib" #define MLX4_IB_MRS_PROC_DIR_NAME "mrs" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); int mlx4_ib_sm_guid_assign = 1; #ifdef __linux__ struct proc_dir_entry *mlx4_mrs_dir_entry; static struct proc_dir_entry *mlx4_ib_driver_dir_entry; #endif module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444); MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)"); static char dev_assign_str[512]; //module_param_string(dev_assign_str, dev_assign_str, sizeof(dev_assign_str), 0644); MODULE_PARM_DESC(dev_assign_str, "Map all device function numbers to " "IB device numbers following the pattern: " "bb:dd.f-0,bb:dd.f-1,... (all numbers are hexadecimals)." " Max supported devices - 32"); static const char mlx4_ib_version[] = DRV_NAME ": Mellanox ConnectX InfiniBand driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; struct update_gid_work { struct work_struct work; union ib_gid gids[128]; struct mlx4_ib_dev *dev; int port; }; struct dev_rec { int bus; int dev; int func; int nr; }; #define MAX_DR 32 static struct dev_rec dr[MAX_DR]; static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init); static struct workqueue_struct *wq; static void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; mad->class_version = 1; mad->method = IB_MGMT_METHOD_GET; } static union ib_gid zgid; static int mlx4_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { struct mlx4_ib_dev *dev = to_mdev(ibdev); struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS, 1, NULL, NULL, in_mad, out_mad); if (err) goto out; memset(props, 0, sizeof *props); props->fw_ver = dev->dev->caps.fw_ver; props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK | IB_DEVICE_SHARED_MR; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM) props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT) props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH) props->device_cap_flags |= IB_DEVICE_UD_TSO; if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) && (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) props->device_cap_flags |= IB_DEVICE_XRC; props->device_cap_flags |= IB_DEVICE_QPG; if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) { props->device_cap_flags |= IB_DEVICE_UD_RSS; props->max_rss_tbl_sz = dev->dev->caps.max_rss_tbl_sz; } props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; props->vendor_part_id = dev->dev->pdev->device; props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&props->sys_image_guid, out_mad->data + 4, 8); props->max_mr_size = ~0ull; props->page_size_cap = dev->dev->caps.page_size_cap; props->max_qp = dev->dev->quotas.qp; props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; props->max_sge = min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg); props->max_cq = dev->dev->quotas.cq; props->max_cqe = dev->dev->caps.max_cqes; props->max_mr = dev->dev->quotas.mpt; props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds; props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma; props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma; props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; props->max_srq = dev->dev->quotas.srq; props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; props->max_srq_sge = dev->dev->caps.max_srq_sge; props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES; props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; props->masked_atomic_cap = props->atomic_cap; props->max_pkeys = dev->dev->caps.pkey_table_len[1]; props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; out: kfree(in_mad); kfree(out_mad); return err; } static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num) { struct mlx4_dev *dev = to_mdev(device)->dev; return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ? IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; } static int ib_link_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props, int netw_view) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int ext_active_speed; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view) mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); props->sm_sl = out_mad->data[36] & 0xf; props->state = out_mad->data[32] & 0xf; props->phys_state = out_mad->data[33] >> 4; props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); if (netw_view) props->gid_tbl_len = out_mad->data[50]; else props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port]; props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; props->active_speed = out_mad->data[35] >> 4; props->max_mtu = out_mad->data[41] & 0xf; props->active_mtu = out_mad->data[36] >> 4; props->subnet_timeout = out_mad->data[51] & 0x1f; props->max_vl_num = out_mad->data[37] >> 4; props->init_type_reply = out_mad->data[41] >> 4; /* Check if extended speeds (EDR/FDR/...) are supported */ if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { ext_active_speed = out_mad->data[62] >> 4; switch (ext_active_speed) { case 1: props->active_speed = IB_SPEED_FDR; break; case 2: props->active_speed = IB_SPEED_EDR; break; } } /* If reported active speed is QDR, check if is FDR-10 */ if (props->active_speed == IB_SPEED_QDR) { init_query_mad(in_mad); in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; /* Checking LinkSpeedActive for FDR-10 */ if (out_mad->data[15] & 0x1) props->active_speed = IB_SPEED_FDR10; } /* Avoid wrong speed value returned by FW if the IB link is down. */ if (props->state == IB_PORT_DOWN) props->active_speed = IB_SPEED_SDR; out: kfree(in_mad); kfree(out_mad); return err; } static u8 state_to_phys_state(enum ib_port_state state) { return state == IB_PORT_ACTIVE ? 5 : 3; } static int eth_link_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props, int netw_view) { struct mlx4_ib_dev *mdev = to_mdev(ibdev); struct mlx4_ib_iboe *iboe = &mdev->iboe; struct net_device *ndev; enum ib_mtu tmp; struct mlx4_cmd_mailbox *mailbox; int err = 0; mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0, MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); if (err) goto out; props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ? IB_WIDTH_4X : IB_WIDTH_1X; props->active_speed = IB_SPEED_QDR; props->port_cap_flags = IB_PORT_CM_SUP; if (netw_view) props->gid_tbl_len = MLX4_ROCE_MAX_GIDS; else props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; props->max_msg_sz = mdev->dev->caps.max_msg_sz; props->pkey_tbl_len = 1; props->max_mtu = IB_MTU_4096; props->max_vl_num = 2; props->state = IB_PORT_DOWN; props->phys_state = state_to_phys_state(props->state); props->active_mtu = IB_MTU_256; spin_lock(&iboe->lock); ndev = iboe->netdevs[port - 1]; if (!ndev) goto out_unlock; tmp = iboe_get_mtu(ndev->if_mtu); props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256; props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ? IB_PORT_ACTIVE : IB_PORT_DOWN; props->phys_state = state_to_phys_state(props->state); out_unlock: spin_unlock(&iboe->lock); out: mlx4_free_cmd_mailbox(mdev->dev, mailbox); return err; } int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props, int netw_view) { int err; memset(props, 0, sizeof *props); err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? ib_link_query_port(ibdev, port, props, netw_view) : eth_link_query_port(ibdev, port, props, netw_view); return err; } static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { /* returns host view */ return __mlx4_ib_query_port(ibdev, port, props, 0); } int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid, int netw_view) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; struct mlx4_ib_dev *dev = to_mdev(ibdev); int clear = 0; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); if (mlx4_is_mfunc(dev->dev) && netw_view) mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; memcpy(gid->raw, out_mad->data + 8, 8); if (mlx4_is_mfunc(dev->dev) && !netw_view) { if (index) { /* For any index > 0, return the null guid */ err = 0; clear = 1; goto out; } } init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; in_mad->attr_mod = cpu_to_be32(index / 8); err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); out: if (clear) memset(gid->raw + 8, 0, 8); kfree(in_mad); kfree(out_mad); return err; } static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { struct mlx4_ib_dev *dev = to_mdev(ibdev); *gid = dev->iboe.gid_table[port - 1][index]; return 0; } static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) return __mlx4_ib_query_gid(ibdev, port, index, gid, 0); else return iboe_query_gid(ibdev, port, index, gid); } int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey, int netw_view) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; in_mad->attr_mod = cpu_to_be32(index / 32); if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view) mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]); out: kfree(in_mad); kfree(out_mad); return err; } static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0); } static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *props) { struct mlx4_cmd_mailbox *mailbox; unsigned long flags; if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) return -EOPNOTSUPP; if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) return 0; if (mlx4_is_slave(to_mdev(ibdev)->dev)) return -EOPNOTSUPP; spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags); memcpy(ibdev->node_desc, props->node_desc, 64); spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags); /* * If possible, pass node desc to FW, so it can generate * a 144 trap. If cmd fails, just ignore. */ mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev); if (IS_ERR(mailbox)) return 0; memset(mailbox->buf, 0, 256); memcpy(mailbox->buf, props->node_desc, 64); mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox); return 0; } static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, u32 cap_mask) { struct mlx4_cmd_mailbox *mailbox; int err; u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; mailbox = mlx4_alloc_cmd_mailbox(dev->dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); memset(mailbox->buf, 0, 256); if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { *(u8 *) mailbox->buf = !!reset_qkey_viols << 6; ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask); } else { ((u8 *) mailbox->buf)[3] = !!reset_qkey_viols; ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask); } err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev->dev, mailbox); return err; } static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { struct ib_port_attr attr; u32 cap_mask; int err; mutex_lock(&to_mdev(ibdev)->cap_mask_mutex); err = mlx4_ib_query_port(ibdev, port, &attr); if (err) goto out; cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) & ~props->clr_port_cap_mask; err = mlx4_SET_PORT(to_mdev(ibdev), port, !!(mask & IB_PORT_RESET_QKEY_CNTR), cap_mask); out: mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); return err; } static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_ucontext *context; struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3; struct mlx4_ib_alloc_ucontext_resp resp; int err; if (!dev->ib_active) return ERR_PTR(-EAGAIN); if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) { resp_v3.qp_tab_size = dev->dev->caps.num_qps; if (mlx4_wc_enabled()) { resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size; resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; } else { resp_v3.bf_reg_size = 0; resp_v3.bf_regs_per_page = 0; } } else { resp.dev_caps = dev->dev->caps.userspace_caps; resp.qp_tab_size = dev->dev->caps.num_qps; if (mlx4_wc_enabled()) { resp.bf_reg_size = dev->dev->caps.bf_reg_size; resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; } else { resp.bf_reg_size = 0; resp.bf_regs_per_page = 0; } resp.cqe_size = dev->dev->caps.cqe_size; } context = kmalloc(sizeof *context, GFP_KERNEL); if (!context) return ERR_PTR(-ENOMEM); err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar); if (err) { kfree(context); return ERR_PTR(err); } INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3)); else err = ib_copy_to_udata(udata, &resp, sizeof(resp)); if (err) { mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar); kfree(context); return ERR_PTR(-EFAULT); } return &context->ibucontext; } static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct mlx4_ib_ucontext *context = to_mucontext(ibcontext); mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar); kfree(context); return 0; } #ifdef __linux__ static unsigned long mlx4_ib_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct mm_struct *mm; struct vm_area_struct *vma; unsigned long start_addr; unsigned long page_size_order; unsigned long command; mm = current->mm; if (addr) return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); /* Last 8 bits hold the command others are data per that command */ command = pgoff & MLX4_IB_MMAP_CMD_MASK; if (command != MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES) return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); page_size_order = pgoff >> MLX4_IB_MMAP_CMD_BITS; /* code is based on the huge-pages get_unmapped_area code */ start_addr = mm->free_area_cache; if (len <= mm->cached_hole_size) start_addr = TASK_UNMAPPED_BASE; full_search: addr = ALIGN(start_addr, 1 << page_size_order); for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ if (TASK_SIZE - len < addr) { /* * Start a new search - just in case we missed * some holes. */ if (start_addr != TASK_UNMAPPED_BASE) { start_addr = TASK_UNMAPPED_BASE; goto full_search; } return -ENOMEM; } if (!vma || addr + len <= vma->vm_start) return addr; addr = ALIGN(vma->vm_end, 1 << page_size_order); } } #endif static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { struct mlx4_ib_dev *dev = to_mdev(context->device); int err; /* Last 8 bits hold the command others are data per that command */ unsigned long command = vma->vm_pgoff & MLX4_IB_MMAP_CMD_MASK; if (command < MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES) { /* compatability handling for commands 0 & 1*/ if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL; } if (command == MLX4_IB_MMAP_UAR_PAGE) { vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); if (io_remap_pfn_range(vma, vma->vm_start, to_mucontext(context)->uar.pfn, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; } else if (command == MLX4_IB_MMAP_BLUE_FLAME_PAGE && dev->dev->caps.bf_reg_size != 0) { vma->vm_page_prot = pgprot_wc(vma->vm_page_prot); if (io_remap_pfn_range(vma, vma->vm_start, to_mucontext(context)->uar.pfn + dev->dev->caps.num_uars, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; } else if (command == MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES) { /* Getting contiguous physical pages */ unsigned long total_size = vma->vm_end - vma->vm_start; unsigned long page_size_order = (vma->vm_pgoff) >> MLX4_IB_MMAP_CMD_BITS; struct ib_cmem *ib_cmem; ib_cmem = ib_cmem_alloc_contiguous_pages(context, total_size, page_size_order); if (IS_ERR(ib_cmem)) { err = PTR_ERR(ib_cmem); return err; } err = ib_cmem_map_contiguous_pages_to_vma(ib_cmem, vma); if (err) { ib_cmem_release_contiguous_pages(ib_cmem); return err; } return 0; } else return -EINVAL; return 0; } static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) { struct mlx4_ib_pd *pd; int err; pd = kmalloc(sizeof *pd, GFP_KERNEL); if (!pd) return ERR_PTR(-ENOMEM); err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn); if (err) { kfree(pd); return ERR_PTR(err); } if (context) if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) { mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn); kfree(pd); return ERR_PTR(-EFAULT); } return &pd->ibpd; } static int mlx4_ib_dealloc_pd(struct ib_pd *pd) { mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); kfree(pd); return 0; } static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) { struct mlx4_ib_xrcd *xrcd; int err; if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) return ERR_PTR(-ENOSYS); xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL); if (!xrcd) return ERR_PTR(-ENOMEM); err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn); if (err) goto err1; xrcd->pd = ib_alloc_pd(ibdev); if (IS_ERR(xrcd->pd)) { err = PTR_ERR(xrcd->pd); goto err2; } xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0); if (IS_ERR(xrcd->cq)) { err = PTR_ERR(xrcd->cq); goto err3; } return &xrcd->ibxrcd; err3: ib_dealloc_pd(xrcd->pd); err2: mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn); err1: kfree(xrcd); return ERR_PTR(err); } static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd) { ib_destroy_cq(to_mxrcd(xrcd)->cq); ib_dealloc_pd(to_mxrcd(xrcd)->pd); mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn); kfree(xrcd); return 0; } static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) { struct mlx4_ib_qp *mqp = to_mqp(ibqp); struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); struct mlx4_ib_gid_entry *ge; ge = kzalloc(sizeof *ge, GFP_KERNEL); if (!ge) return -ENOMEM; ge->gid = *gid; if (mlx4_ib_add_mc(mdev, mqp, gid)) { ge->port = mqp->port; ge->added = 1; } mutex_lock(&mqp->mutex); list_add_tail(&ge->list, &mqp->gid_list); mutex_unlock(&mqp->mutex); return 0; } int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, union ib_gid *gid) { u8 mac[6]; struct net_device *ndev; int ret = 0; if (!mqp->port) return 0; spin_lock(&mdev->iboe.lock); ndev = mdev->iboe.netdevs[mqp->port - 1]; if (ndev) dev_hold(ndev); spin_unlock(&mdev->iboe.lock); if (ndev) { rdma_get_mcast_mac((struct in6_addr *)gid, mac); rtnl_lock(); dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac, 6, 0); ret = 1; rtnl_unlock(); dev_put(ndev); } return ret; } struct mlx4_ib_steering { struct list_head list; u64 reg_id; union ib_gid gid; }; static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { int err; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); struct mlx4_ib_qp *mqp = to_mqp(ibqp); u64 reg_id; struct mlx4_ib_steering *ib_steering = NULL; if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL); if (!ib_steering) return -ENOMEM; } err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port, !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), MLX4_PROT_IB_IPV6, ®_id); if (err) goto err_malloc; err = add_gid_entry(ibqp, gid); if (err) goto err_add; if (ib_steering) { memcpy(ib_steering->gid.raw, gid->raw, 16); ib_steering->reg_id = reg_id; mutex_lock(&mqp->mutex); list_add(&ib_steering->list, &mqp->steering_rules); mutex_unlock(&mqp->mutex); } return 0; err_add: mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6, reg_id); err_malloc: kfree(ib_steering); return err; } enum { IBV_FLOW_L4_NONE = 0, IBV_FLOW_L4_OTHER = 3, IBV_FLOW_L4_UDP = 5, IBV_FLOW_L4_TCP = 6 }; struct mlx4_cm_steering { struct list_head list; u64 reg_id; struct ib_flow_spec spec; }; static int flow_spec_to_net_rule(struct ib_device *dev, struct ib_flow_spec *flow_spec, struct list_head *rule_list_h) { struct mlx4_spec_list *spec_l2, *spec_l3, *spec_l4; u64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16); spec_l2 = kzalloc(sizeof *spec_l2, GFP_KERNEL); if (!spec_l2) return -ENOMEM; switch (flow_spec->type) { case IB_FLOW_ETH: spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH; memcpy(spec_l2->eth.dst_mac, flow_spec->l2_id.eth.mac, ETH_ALEN); memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN); spec_l2->eth.ether_type = flow_spec->l2_id.eth.ethertype; if (flow_spec->l2_id.eth.vlan_present) { spec_l2->eth.vlan_id = flow_spec->l2_id.eth.vlan; spec_l2->eth.vlan_id_msk = cpu_to_be16(0x0fff); } break; case IB_FLOW_IB_UC: spec_l2->id = MLX4_NET_TRANS_RULE_ID_IB; if(flow_spec->l2_id.ib_uc.qpn) { spec_l2->ib.l3_qpn = cpu_to_be32(flow_spec->l2_id.ib_uc.qpn); spec_l2->ib.qpn_msk = cpu_to_be32(0xffffff); } break; case IB_FLOW_IB_MC_IPV4: case IB_FLOW_IB_MC_IPV6: spec_l2->id = MLX4_NET_TRANS_RULE_ID_IB; memcpy(spec_l2->ib.dst_gid, flow_spec->l2_id.ib_mc.mgid, 16); memset(spec_l2->ib.dst_gid_msk, 0xff, 16); break; } list_add_tail(&spec_l2->list, rule_list_h); if (flow_spec->l2_id.eth.ethertype == cpu_to_be16(ETH_P_IP) || flow_spec->type != IB_FLOW_ETH) { spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL); if (!spec_l3) return -ENOMEM; spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4; spec_l3->ipv4.src_ip = flow_spec->src_ip; if (flow_spec->type != IB_FLOW_IB_MC_IPV4 && flow_spec->type != IB_FLOW_IB_MC_IPV6) spec_l3->ipv4.dst_ip = flow_spec->dst_ip; if (spec_l3->ipv4.src_ip) spec_l3->ipv4.src_ip_msk = MLX4_BE_WORD_MASK; if (spec_l3->ipv4.dst_ip) spec_l3->ipv4.dst_ip_msk = MLX4_BE_WORD_MASK; list_add_tail(&spec_l3->list, rule_list_h); } if (flow_spec->l4_protocol) { spec_l4 = kzalloc(sizeof(*spec_l4), GFP_KERNEL); if (!spec_l4) return -ENOMEM; spec_l4->tcp_udp.src_port = flow_spec->src_port; spec_l4->tcp_udp.dst_port = flow_spec->dst_port; if (spec_l4->tcp_udp.src_port) spec_l4->tcp_udp.src_port_msk = MLX4_BE_SHORT_MASK; if (spec_l4->tcp_udp.dst_port) spec_l4->tcp_udp.dst_port_msk = MLX4_BE_SHORT_MASK; switch (flow_spec->l4_protocol) { case IBV_FLOW_L4_UDP: spec_l4->id = MLX4_NET_TRANS_RULE_ID_UDP; break; case IBV_FLOW_L4_TCP: spec_l4->id = MLX4_NET_TRANS_RULE_ID_TCP; break; default: dev_err(dev->dma_device, "Unsupported l4 protocol.\n"); kfree(spec_l4); return -EPROTONOSUPPORT; } list_add_tail(&spec_l4->list, rule_list_h); } return 0; } static int __mlx4_ib_flow_attach(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, struct ib_flow_spec *flow_spec, int priority, int lock_qp) { u64 reg_id = 0; int err = 0; struct mlx4_cm_steering *cm_flow; struct mlx4_spec_list *spec, *tmp_spec; struct mlx4_net_trans_rule rule = { .queue_mode = MLX4_NET_TRANS_Q_FIFO, .exclusive = 0, }; rule.promisc_mode = flow_spec->rule_type; rule.port = mqp->port; rule.qpn = mqp->mqp.qpn; INIT_LIST_HEAD(&rule.list); cm_flow = kmalloc(sizeof(*cm_flow), GFP_KERNEL); if (!cm_flow) return -ENOMEM; if (rule.promisc_mode == MLX4_FS_REGULAR) { rule.allow_loopback = !flow_spec->block_mc_loopback; rule.priority = MLX4_DOMAIN_UVERBS | priority; err = flow_spec_to_net_rule(&mdev->ib_dev, flow_spec, &rule.list); if (err) goto free_list; } err = mlx4_flow_attach(mdev->dev, &rule, ®_id); if (err) goto free_list; memcpy(&cm_flow->spec, flow_spec, sizeof(*flow_spec)); cm_flow->reg_id = reg_id; if (lock_qp) mutex_lock(&mqp->mutex); list_add(&cm_flow->list, &mqp->rules_list); if (lock_qp) mutex_unlock(&mqp->mutex); free_list: list_for_each_entry_safe(spec, tmp_spec, &rule.list, list) { list_del(&spec->list); kfree(spec); } if (err) { kfree(cm_flow); dev_err(mdev->ib_dev.dma_device, "Fail to attach flow steering rule\n"); } return err; } static int __mlx4_ib_flow_detach(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, struct ib_flow_spec *spec, int priority, int lock_qp) { struct mlx4_cm_steering *cm_flow; int ret; if (lock_qp) mutex_lock(&mqp->mutex); list_for_each_entry(cm_flow, &mqp->rules_list, list) { if (!memcmp(&cm_flow->spec, spec, sizeof(*spec))) { list_del(&cm_flow->list); break; } } if (lock_qp) mutex_unlock(&mqp->mutex); if (&cm_flow->list == &mqp->rules_list) { dev_err(mdev->ib_dev.dma_device, "Couldn't find reg_id for flow spec. " "Steering rule is left attached\n"); return -EINVAL; } ret = mlx4_flow_detach(mdev->dev, cm_flow->reg_id); kfree(cm_flow); return ret; } static int mlx4_ib_flow_attach(struct ib_qp *qp, struct ib_flow_spec *flow_spec, int priority) { return __mlx4_ib_flow_attach(to_mdev(qp->device), to_mqp(qp), flow_spec, priority, 1); } static int mlx4_ib_flow_detach(struct ib_qp *qp, struct ib_flow_spec *spec, int priority) { return __mlx4_ib_flow_detach(to_mdev(qp->device), to_mqp(qp), spec, priority, 1); } static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw) { struct mlx4_ib_gid_entry *ge; struct mlx4_ib_gid_entry *tmp; struct mlx4_ib_gid_entry *ret = NULL; list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { if (!memcmp(raw, ge->gid.raw, 16)) { ret = ge; break; } } return ret; } static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { int err; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); struct mlx4_ib_qp *mqp = to_mqp(ibqp); u8 mac[6]; struct net_device *ndev; struct mlx4_ib_gid_entry *ge; u64 reg_id = 0; if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { struct mlx4_ib_steering *ib_steering; mutex_lock(&mqp->mutex); list_for_each_entry(ib_steering, &mqp->steering_rules, list) { if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) { list_del(&ib_steering->list); break; } } mutex_unlock(&mqp->mutex); if (&ib_steering->list == &mqp->steering_rules) { pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n"); return -EINVAL; } reg_id = ib_steering->reg_id; kfree(ib_steering); } err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6, reg_id); if (err) return err; mutex_lock(&mqp->mutex); ge = find_gid_entry(mqp, gid->raw); if (ge) { spin_lock(&mdev->iboe.lock); ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL; if (ndev) dev_hold(ndev); spin_unlock(&mdev->iboe.lock); rdma_get_mcast_mac((struct in6_addr *)gid, mac); if (ndev) { rtnl_lock(); dev_mc_delete(mdev->iboe.netdevs[ge->port - 1], mac, 6, 0); rtnl_unlock(); dev_put(ndev); } list_del(&ge->list); kfree(ge); } else pr_warn("could not find mgid entry\n"); mutex_unlock(&mqp->mutex); return 0; } static int init_node_data(struct mlx4_ib_dev *dev) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; if (mlx4_is_master(dev->dev)) mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad); if (err) goto out; memcpy(dev->ib_dev.node_desc, out_mad->data, 64); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad); if (err) goto out; dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); out: kfree(in_mad); kfree(out_mad); return err; } static ssize_t show_hca(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); return sprintf(buf, "MT%d\n", dev->dev->pdev->device); } static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32), (int) (dev->dev->caps.fw_ver >> 16) & 0xffff, (int) dev->dev->caps.fw_ver & 0xffff); } static ssize_t show_rev(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); return sprintf(buf, "%x\n", dev->dev->rev_id); } static ssize_t show_board(struct device *device, struct device_attribute *attr, char *buf) { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id); } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct device_attribute *mlx4_class_attributes[] = { &dev_attr_hw_rev, &dev_attr_fw_ver, &dev_attr_hca_type, &dev_attr_board_id }; static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev) { #ifdef __linux__ memcpy(eui, dev->dev_addr, 3); memcpy(eui + 5, dev->dev_addr + 3, 3); #else memcpy(eui, IF_LLADDR(dev), 3); memcpy(eui + 5, IF_LLADDR(dev) + 3, 3); #endif if (vlan_id < 0x1000) { eui[3] = vlan_id >> 8; eui[4] = vlan_id & 0xff; } else { eui[3] = 0xff; eui[4] = 0xfe; } eui[0] ^= 2; } static void update_gids_task(struct work_struct *work) { struct update_gid_work *gw = container_of(work, struct update_gid_work, work); struct mlx4_cmd_mailbox *mailbox; union ib_gid *gids; int err; struct mlx4_dev *dev = gw->dev->dev; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox)); return; } gids = mailbox->buf; memcpy(gids, gw->gids, sizeof gw->gids); err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); if (err) pr_warn("set port command failed\n"); else { memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids); mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE); } mlx4_free_cmd_mailbox(dev, mailbox); kfree(gw); } static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear) { struct net_device *ndev = dev->iboe.netdevs[port - 1]; struct update_gid_work *work; struct net_device *tmp; int i; u8 *hits; union ib_gid gid; int index_free; int found; int need_update = 0; int max_gids; u16 vid; work = kzalloc(sizeof *work, GFP_ATOMIC); if (!work) return -ENOMEM; hits = kzalloc(128, GFP_ATOMIC); if (!hits) { kfree(work); return -ENOMEM; } max_gids = dev->dev->caps.gid_table_len[port]; #ifdef __linux__ rcu_read_lock(); for_each_netdev_rcu(&init_net, tmp) { #else IFNET_RLOCK(); TAILQ_FOREACH(tmp, &V_ifnet, if_link) { #endif if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) { gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); vid = rdma_vlan_dev_vlan_id(tmp); mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev); found = 0; index_free = -1; for (i = 0; i < max_gids; ++i) { if (index_free < 0 && !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) index_free = i; if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) { hits[i] = 1; found = 1; break; } } if (!found) { if (tmp == ndev && (memcmp(&dev->iboe.gid_table[port - 1][0], &gid, sizeof gid) || !memcmp(&dev->iboe.gid_table[port - 1][0], &zgid, sizeof gid))) { dev->iboe.gid_table[port - 1][0] = gid; ++need_update; hits[0] = 1; } else if (index_free >= 0) { dev->iboe.gid_table[port - 1][index_free] = gid; hits[index_free] = 1; ++need_update; } } } #ifdef __linux__ } rcu_read_unlock(); #else } IFNET_RUNLOCK(); #endif for (i = 0; i < max_gids; ++i) if (!hits[i]) { if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) ++need_update; dev->iboe.gid_table[port - 1][i] = zgid; } if (need_update) { memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids); INIT_WORK(&work->work, update_gids_task); work->port = port; work->dev = dev; queue_work(wq, &work->work); } else kfree(work); kfree(hits); return 0; } static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event) { switch (event) { case NETDEV_UP: #ifdef __linux__ case NETDEV_CHANGEADDR: #endif update_ipv6_gids(dev, port, 0); break; case NETDEV_DOWN: update_ipv6_gids(dev, port, 1); dev->iboe.netdevs[port - 1] = NULL; } } static void netdev_added(struct mlx4_ib_dev *dev, int port) { update_ipv6_gids(dev, port, 0); } static void netdev_removed(struct mlx4_ib_dev *dev, int port) { update_ipv6_gids(dev, port, 1); } static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; struct mlx4_ib_dev *ibdev; struct net_device *oldnd; struct mlx4_ib_iboe *iboe; int port; #ifdef __linux__ if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; #endif ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb); iboe = &ibdev->iboe; spin_lock(&iboe->lock); mlx4_foreach_ib_transport_port(port, ibdev->dev) { oldnd = iboe->netdevs[port - 1]; iboe->netdevs[port - 1] = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port); if (oldnd != iboe->netdevs[port - 1]) { if (iboe->netdevs[port - 1]) netdev_added(ibdev, port); else netdev_removed(ibdev, port); } } if (dev == iboe->netdevs[0] || (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0])) handle_en_event(ibdev, 1, event); else if (dev == iboe->netdevs[1] || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1])) handle_en_event(ibdev, 2, event); spin_unlock(&iboe->lock); return NOTIFY_DONE; } static void init_pkeys(struct mlx4_ib_dev *ibdev) { int port; int slave; int i; if (mlx4_is_master(ibdev->dev)) { for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) { for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { for (i = 0; i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; ++i) { ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] = /* master has the identity virt2phys pkey mapping */ (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i : ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1; mlx4_sync_pkey_table(ibdev->dev, slave, port, i, ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]); } } } /* initialize pkey cache */ for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { for (i = 0; i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; ++i) ibdev->pkeys.phys_pkey_cache[port-1][i] = (i) ? 0 : 0xFFFF; } } } static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { char name[32]; int eq_per_port = 0; int added_eqs = 0; int total_eqs = 0; int i, j, eq; /* Legacy mode or comp_pool is not large enough */ if (dev->caps.comp_pool == 0 || dev->caps.num_ports > dev->caps.comp_pool) return; eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/ dev->caps.num_ports); /* Init eq table */ added_eqs = 0; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) added_eqs += eq_per_port; total_eqs = dev->caps.num_comp_vectors + added_eqs; ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL); if (!ibdev->eq_table) return; ibdev->eq_added = added_eqs; eq = 0; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) { for (j = 0; j < eq_per_port; j++) { - //sprintf(name, "mlx4-ib-%d-%d@%s", - // i, j, dev->pdev->bus->conf.pd_name); + snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%d:%d:%d:%d", i, j, + pci_get_domain(dev->pdev->dev.bsddev), + pci_get_bus(dev->pdev->dev.bsddev), + PCI_SLOT(dev->pdev->devfn), + PCI_FUNC(dev->pdev->devfn)); + /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(dev, name, &ibdev->eq_table[eq])) { /* Use legacy (same as mlx4_en driver) */ pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq); ibdev->eq_table[eq] = (eq % dev->caps.num_comp_vectors); } eq++; } } /* Fill the reset of the vector with legacy EQ */ for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++) ibdev->eq_table[eq++] = i; /* Advertise the new number of EQs to clients */ ibdev->ib_dev.num_comp_vectors = total_eqs; } static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { int i; /* no additional eqs were added */ if (!ibdev->eq_table) return; /* Reset the advertised EQ number */ ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; /* Free only the added eqs */ for (i = 0; i < ibdev->eq_added; i++) { /* Don't free legacy eqs if used */ if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors) continue; mlx4_release_eq(dev, ibdev->eq_table[i]); } kfree(ibdev->eq_table); } /* * create show function and a device_attribute struct pointing to * the function for _name */ #define DEVICE_DIAG_RPRT_ATTR(_name, _offset, _op_mod) \ static ssize_t show_rprt_##_name(struct device *dev, \ struct device_attribute *attr, \ char *buf){ \ return show_diag_rprt(dev, buf, _offset, _op_mod); \ } \ static DEVICE_ATTR(_name, S_IRUGO, show_rprt_##_name, NULL); #define MLX4_DIAG_RPRT_CLEAR_DIAGS 3 static size_t show_diag_rprt(struct device *device, char *buf, u32 offset, u8 op_modifier) { size_t ret; u32 counter_offset = offset; u32 diag_counter = 0; struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); ret = mlx4_query_diag_counters(dev->dev, 1, op_modifier, &counter_offset, &diag_counter); if (ret) return ret; return sprintf(buf, "%d\n", diag_counter); } static ssize_t clear_diag_counters(struct device *device, struct device_attribute *attr, const char *buf, size_t length) { size_t ret; struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); ret = mlx4_query_diag_counters(dev->dev, 0, MLX4_DIAG_RPRT_CLEAR_DIAGS, NULL, NULL); if (ret) return ret; return length; } DEVICE_DIAG_RPRT_ATTR(rq_num_lle , 0x00, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_lle , 0x04, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_lqpoe , 0x08, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_lqpoe , 0x0C, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_lpe , 0x18, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_lpe , 0x1C, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_wrfe , 0x20, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_wrfe , 0x24, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_mwbe , 0x2C, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_bre , 0x34, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_lae , 0x38, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_rire , 0x44, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_rire , 0x48, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_rae , 0x4C, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_rae , 0x50, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_roe , 0x54, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_tree , 0x5C, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_rree , 0x64, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_rnr , 0x68, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_rnr , 0x6C, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_oos , 0x100, 2); DEVICE_DIAG_RPRT_ATTR(sq_num_oos , 0x104, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_mce , 0x108, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_udsdprd , 0x118, 2); DEVICE_DIAG_RPRT_ATTR(rq_num_ucsdprd , 0x120, 2); DEVICE_DIAG_RPRT_ATTR(num_cqovf , 0x1A0, 2); DEVICE_DIAG_RPRT_ATTR(num_eqovf , 0x1A4, 2); DEVICE_DIAG_RPRT_ATTR(num_baddb , 0x1A8, 2); static DEVICE_ATTR(clear_diag, S_IWUSR, NULL, clear_diag_counters); static struct attribute *diag_rprt_attrs[] = { &dev_attr_rq_num_lle.attr, &dev_attr_sq_num_lle.attr, &dev_attr_rq_num_lqpoe.attr, &dev_attr_sq_num_lqpoe.attr, &dev_attr_rq_num_lpe.attr, &dev_attr_sq_num_lpe.attr, &dev_attr_rq_num_wrfe.attr, &dev_attr_sq_num_wrfe.attr, &dev_attr_sq_num_mwbe.attr, &dev_attr_sq_num_bre.attr, &dev_attr_rq_num_lae.attr, &dev_attr_sq_num_rire.attr, &dev_attr_rq_num_rire.attr, &dev_attr_sq_num_rae.attr, &dev_attr_rq_num_rae.attr, &dev_attr_sq_num_roe.attr, &dev_attr_sq_num_tree.attr, &dev_attr_sq_num_rree.attr, &dev_attr_rq_num_rnr.attr, &dev_attr_sq_num_rnr.attr, &dev_attr_rq_num_oos.attr, &dev_attr_sq_num_oos.attr, &dev_attr_rq_num_mce.attr, &dev_attr_rq_num_udsdprd.attr, &dev_attr_rq_num_ucsdprd.attr, &dev_attr_num_cqovf.attr, &dev_attr_num_eqovf.attr, &dev_attr_num_baddb.attr, &dev_attr_clear_diag.attr, NULL }; static struct attribute_group diag_counters_group = { .name = "diag_counters", .attrs = diag_rprt_attrs }; #ifdef __linux__ static int mlx4_ib_proc_init(void) { /* Creating procfs directories /proc/drivers/mlx4_ib/ && /proc/drivers/mlx4_ib/mrs for further use by the driver. */ int err; mlx4_ib_driver_dir_entry = proc_mkdir(MLX4_IB_DRIVER_PROC_DIR_NAME, NULL); if (!mlx4_ib_driver_dir_entry) { pr_err("mlx4_ib_proc_init has failed for %s\n", MLX4_IB_DRIVER_PROC_DIR_NAME); err = -ENODEV; goto error; } mlx4_mrs_dir_entry = proc_mkdir(MLX4_IB_MRS_PROC_DIR_NAME, mlx4_ib_driver_dir_entry); if (!mlx4_mrs_dir_entry) { pr_err("mlx4_ib_proc_init has failed for %s\n", MLX4_IB_MRS_PROC_DIR_NAME); err = -ENODEV; goto remove_entry; } return 0; remove_entry: remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME, NULL); error: return err; } #endif static void init_dev_assign(void) { int bus, slot, fn, ib_idx; char *p = dev_assign_str, *t; char curr_val[32] = {0}; int ret; int j, i = 0; memset(dr, 0, sizeof dr); if (dev_assign_str[0] == 0) return; while (strlen(p)) { ret = sscanf(p, "%02x:%02x.%x-%x", &bus, &slot, &fn, &ib_idx); if (ret != 4 || ib_idx < 0) goto err; for (j = 0; j < i; j++) if (dr[j].nr == ib_idx) goto err; dr[i].bus = bus; dr[i].dev = slot; dr[i].func = fn; dr[i].nr = ib_idx; t = strchr(p, ','); sprintf(curr_val, "%02x:%02x.%x-%x", bus, slot, fn, ib_idx); if ((!t) && strlen(p) == strlen(curr_val)) return; if (!t || (t + 1) >= dev_assign_str + sizeof dev_assign_str) goto err; ++i; if (i >= MAX_DR) goto err; p = t + 1; } return; err: memset(dr, 0, sizeof dr); printk(KERN_WARNING "mlx4_ib: The value of 'dev_assign_str' parameter " "is incorrect. The parameter value is discarded!"); } static void *mlx4_ib_add(struct mlx4_dev *dev) { struct mlx4_ib_dev *ibdev; int num_ports = 0; int i, j; int err; struct mlx4_ib_iboe *iboe; printk(KERN_INFO "%s", mlx4_ib_version); mlx4_foreach_ib_transport_port(i, dev) num_ports++; /* No point in registering a device with no ports... */ if (num_ports == 0) return NULL; ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); if (!ibdev) { dev_err(&dev->pdev->dev, "Device struct alloc failed\n"); return NULL; } iboe = &ibdev->iboe; if (mlx4_pd_alloc(dev, &ibdev->priv_pdn)) goto err_dealloc; if (mlx4_uar_alloc(dev, &ibdev->priv_uar)) goto err_pd; ibdev->priv_uar.map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE); if (!ibdev->priv_uar.map) goto err_uar; MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock); ibdev->dev = dev; strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); ibdev->ib_dev.owner = THIS_MODULE; ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; ibdev->num_ports = num_ports; ibdev->ib_dev.phys_port_cnt = ibdev->num_ports; ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dma_device = &dev->pdev->dev; if (dev->caps.userspace_caps) ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; else ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION; ibdev->ib_dev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | (1ull << IB_USER_VERBS_CMD_REG_MR) | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | (1ull << IB_USER_VERBS_CMD_QUERY_QP) | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | (1ull << IB_USER_VERBS_CMD_OPEN_QP) | (1ull << IB_USER_VERBS_CMD_ATTACH_FLOW) | (1ull << IB_USER_VERBS_CMD_DETACH_FLOW) | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); ibdev->ib_dev.query_device = mlx4_ib_query_device; ibdev->ib_dev.query_port = mlx4_ib_query_port; ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer; ibdev->ib_dev.query_gid = mlx4_ib_query_gid; ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey; ibdev->ib_dev.modify_device = mlx4_ib_modify_device; ibdev->ib_dev.modify_port = mlx4_ib_modify_port; ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext; ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext; ibdev->ib_dev.mmap = mlx4_ib_mmap; #ifdef __linux__ ibdev->ib_dev.get_unmapped_area = mlx4_ib_get_unmapped_area; #endif ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd; ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd; ibdev->ib_dev.create_ah = mlx4_ib_create_ah; ibdev->ib_dev.query_ah = mlx4_ib_query_ah; ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah; ibdev->ib_dev.create_srq = mlx4_ib_create_srq; ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq; ibdev->ib_dev.query_srq = mlx4_ib_query_srq; ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq; ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv; ibdev->ib_dev.create_qp = mlx4_ib_create_qp; ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; ibdev->ib_dev.query_qp = mlx4_ib_query_qp; ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; ibdev->ib_dev.post_send = mlx4_ib_post_send; ibdev->ib_dev.post_recv = mlx4_ib_post_recv; ibdev->ib_dev.create_cq = mlx4_ib_create_cq; ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq; ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq; ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq; ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq; ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq; ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr; ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list; ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.attach_flow = mlx4_ib_flow_attach; ibdev->ib_dev.detach_flow = mlx4_ib_flow_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; if (!mlx4_is_slave(ibdev->dev)) { ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd; ibdev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); } mlx4_ib_alloc_eqs(dev, ibdev); spin_lock_init(&iboe->lock); if (init_node_data(ibdev)) goto err_map; for (i = 0; i < ibdev->num_ports; ++i) { if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) == IB_LINK_LAYER_ETHERNET) { err = mlx4_counter_alloc(ibdev->dev, i + 1, &ibdev->counters[i]); if (err) ibdev->counters[i] = -1; } else ibdev->counters[i] = -1; } spin_lock_init(&ibdev->sm_lock); mutex_init(&ibdev->cap_mask_mutex); if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED && !mlx4_is_slave(dev)) { ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS; err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count, MLX4_IB_UC_STEER_QPN_ALIGN, &ibdev->steer_qpn_base, 0); if (err) goto err_counter; ibdev->ib_uc_qpns_bitmap = kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) * sizeof(long), GFP_KERNEL); if (!ibdev->ib_uc_qpns_bitmap) { dev_err(&dev->pdev->dev, "bit map alloc failed\n"); goto err_steer_qp_release; } bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count); err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_base + ibdev->steer_qpn_count - 1); if (err) goto err_steer_free_bitmap; } if (ib_register_device(&ibdev->ib_dev, NULL)) goto err_steer_free_bitmap; if (mlx4_ib_mad_init(ibdev)) goto err_reg; if (mlx4_ib_init_sriov(ibdev)) goto err_mad; if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) { iboe->nb.notifier_call = mlx4_ib_netdev_event; err = register_netdevice_notifier(&iboe->nb); if (err) goto err_sriov; } for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { if (device_create_file(&ibdev->ib_dev.dev, mlx4_class_attributes[j])) goto err_notif; } if (sysfs_create_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group)) goto err_notif; ibdev->ib_active = true; if (mlx4_is_mfunc(ibdev->dev)) init_pkeys(ibdev); /* create paravirt contexts for any VFs which are active */ if (mlx4_is_master(ibdev->dev)) { for (j = 0; j < MLX4_MFUNC_MAX; j++) { if (j == mlx4_master_func_num(ibdev->dev)) continue; if (mlx4_is_slave_active(ibdev->dev, j)) do_slave_init(ibdev, j, 1); } } return ibdev; err_notif: if (unregister_netdevice_notifier(&ibdev->iboe.nb)) pr_warn("failure unregistering notifier\n"); flush_workqueue(wq); err_sriov: mlx4_ib_close_sriov(ibdev); err_mad: mlx4_ib_mad_cleanup(ibdev); err_reg: ib_unregister_device(&ibdev->ib_dev); err_steer_free_bitmap: kfree(ibdev->ib_uc_qpns_bitmap); err_steer_qp_release: if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) mlx4_qp_release_range(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_count); err_counter: for (; i; --i) if (ibdev->counters[i - 1] != -1) mlx4_counter_free(ibdev->dev, i, ibdev->counters[i - 1]); err_map: iounmap(ibdev->priv_uar.map); mlx4_ib_free_eqs(dev, ibdev); err_uar: mlx4_uar_free(dev, &ibdev->priv_uar); err_pd: mlx4_pd_free(dev, ibdev->priv_pdn); err_dealloc: ib_dealloc_device(&ibdev->ib_dev); return NULL; } int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn) { int offset; WARN_ON(!dev->ib_uc_qpns_bitmap); offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap, dev->steer_qpn_count, get_count_order(count)); if (offset < 0) return offset; *qpn = dev->steer_qpn_base + offset; return 0; } void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count) { if (!qpn || dev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) return; BUG_ON(qpn < dev->steer_qpn_base); bitmap_release_region(dev->ib_uc_qpns_bitmap, qpn - dev->steer_qpn_base, get_count_order(count)); } int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, int is_attach) { struct ib_flow_spec spec = { .type = IB_FLOW_IB_UC, .l2_id.ib_uc.qpn = mqp->ibqp.qp_num, }; return is_attach ? __mlx4_ib_flow_attach(mdev, mqp, &spec, MLX4_DOMAIN_NIC, 0) : __mlx4_ib_flow_detach(mdev, mqp, &spec, MLX4_DOMAIN_NIC, 0); } static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) { struct mlx4_ib_dev *ibdev = ibdev_ptr; int p,j; mlx4_ib_close_sriov(ibdev); sysfs_remove_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group); mlx4_ib_mad_cleanup(ibdev); for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { device_remove_file(&ibdev->ib_dev.dev, mlx4_class_attributes[j]); } ib_unregister_device(&ibdev->ib_dev); if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { mlx4_qp_release_range(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_count); kfree(ibdev->ib_uc_qpns_bitmap); } if (ibdev->iboe.nb.notifier_call) { if (unregister_netdevice_notifier(&ibdev->iboe.nb)) pr_warn("failure unregistering notifier\n"); ibdev->iboe.nb.notifier_call = NULL; } iounmap(ibdev->priv_uar.map); for (p = 0; p < ibdev->num_ports; ++p) if (ibdev->counters[p] != -1) mlx4_counter_free(ibdev->dev, p + 1, ibdev->counters[p]); mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) mlx4_CLOSE_PORT(dev, p); mlx4_ib_free_eqs(dev, ibdev); mlx4_uar_free(dev, &ibdev->priv_uar); mlx4_pd_free(dev, ibdev->priv_pdn); ib_dealloc_device(&ibdev->ib_dev); } static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) { struct mlx4_ib_demux_work **dm = NULL; struct mlx4_dev *dev = ibdev->dev; int i; unsigned long flags; if (!mlx4_is_master(dev)) return; dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC); if (!dm) { pr_err("failed to allocate memory for tunneling qp update\n"); goto out; } for (i = 0; i < dev->caps.num_ports; i++) { dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC); if (!dm[i]) { pr_err("failed to allocate memory for tunneling qp update work struct\n"); for (i = 0; i < dev->caps.num_ports; i++) { if (dm[i]) kfree(dm[i]); } goto out; } } /* initialize or tear down tunnel QPs for the slave */ for (i = 0; i < dev->caps.num_ports; i++) { INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work); dm[i]->port = i + 1; dm[i]->slave = slave; dm[i]->do_init = do_init; dm[i]->dev = ibdev; spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); if (!ibdev->sriov.is_going_down) queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work); spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); } out: if (dm) kfree(dm); return; } static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, enum mlx4_dev_event event, unsigned long param) { struct ib_event ibev; struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); struct mlx4_eqe *eqe = NULL; struct ib_event_work *ew; int p = 0; if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) eqe = (struct mlx4_eqe *)param; else p = (int) param; switch (event) { case MLX4_DEV_EVENT_PORT_UP: if (p > ibdev->num_ports) return; if (mlx4_is_master(dev) && rdma_port_get_link_layer(&ibdev->ib_dev, p) == IB_LINK_LAYER_INFINIBAND) { mlx4_ib_invalidate_all_guid_record(ibdev, p); } mlx4_ib_info((struct ib_device *) ibdev_ptr, "Port %d logical link is up\n", p); ibev.event = IB_EVENT_PORT_ACTIVE; break; case MLX4_DEV_EVENT_PORT_DOWN: if (p > ibdev->num_ports) return; mlx4_ib_info((struct ib_device *) ibdev_ptr, "Port %d logical link is down\n", p); ibev.event = IB_EVENT_PORT_ERR; break; case MLX4_DEV_EVENT_CATASTROPHIC_ERROR: ibdev->ib_active = false; ibev.event = IB_EVENT_DEVICE_FATAL; break; case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: ew = kmalloc(sizeof *ew, GFP_ATOMIC); if (!ew) { pr_err("failed to allocate memory for events work\n"); break; } INIT_WORK(&ew->work, handle_port_mgmt_change_event); memcpy(&ew->ib_eqe, eqe, sizeof *eqe); ew->ib_dev = ibdev; /* need to queue only for port owner, which uses GEN_EQE */ if (mlx4_is_master(dev)) queue_work(wq, &ew->work); else handle_port_mgmt_change_event(&ew->work); return; case MLX4_DEV_EVENT_SLAVE_INIT: /* here, p is the slave id */ do_slave_init(ibdev, p, 1); return; case MLX4_DEV_EVENT_SLAVE_SHUTDOWN: /* here, p is the slave id */ do_slave_init(ibdev, p, 0); return; default: return; } ibev.device = ibdev_ptr; ibev.element.port_num = (u8) p; ib_dispatch_event(&ibev); } static struct mlx4_interface mlx4_ib_interface = { .add = mlx4_ib_add, .remove = mlx4_ib_remove, .event = mlx4_ib_event, .protocol = MLX4_PROT_IB_IPV6 }; static int __init mlx4_ib_init(void) { int err; wq = create_singlethread_workqueue("mlx4_ib"); if (!wq) return -ENOMEM; #ifdef __linux__ err = mlx4_ib_proc_init(); if (err) goto clean_wq; #endif err = mlx4_ib_mcg_init(); if (err) goto clean_proc; init_dev_assign(); err = mlx4_register_interface(&mlx4_ib_interface); if (err) goto clean_mcg; return 0; clean_mcg: mlx4_ib_mcg_destroy(); clean_proc: #ifdef __linux__ remove_proc_entry(MLX4_IB_MRS_PROC_DIR_NAME, mlx4_ib_driver_dir_entry); remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME, NULL); clean_wq: #endif destroy_workqueue(wq); return err; } static void __exit mlx4_ib_cleanup(void) { mlx4_unregister_interface(&mlx4_ib_interface); mlx4_ib_mcg_destroy(); destroy_workqueue(wq); /* Remove proc entries */ #ifdef __linux__ remove_proc_entry(MLX4_IB_MRS_PROC_DIR_NAME, mlx4_ib_driver_dir_entry); remove_proc_entry(MLX4_IB_DRIVER_PROC_DIR_NAME, NULL); #endif } module_init_order(mlx4_ib_init, SI_ORDER_MIDDLE); module_exit(mlx4_ib_cleanup); #undef MODULE_VERSION #include static int mlx4ib_evhand(module_t mod, int event, void *arg) { return (0); } static moduledata_t mlx4ib_mod = { .name = "mlx4ib", .evhand = mlx4ib_evhand, }; DECLARE_MODULE(mlx4ib, mlx4ib_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY); MODULE_DEPEND(mlx4ib, mlx4, 1, 1, 1); MODULE_DEPEND(mlx4ib, ibcore, 1, 1, 1); Index: stable/9/sys/ofed/drivers/net/mlx4/main.c =================================================================== --- stable/9/sys/ofed/drivers/net/mlx4/main.c (revision 277139) +++ stable/9/sys/ofed/drivers/net/mlx4/main.c (revision 277140) @@ -1,3802 +1,3798 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include /* * kmod.h must be included before module.h since it includes (indirectly) sys/module.h * To use the FBSD macro sys/module.h should define MODULE_VERSION before linux/module does. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "mlx4.h" #include "fw.h" #include "icm.h" #include "mlx4_stats.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver"); MODULE_LICENSE("Dual BSD/GPL"); struct workqueue_struct *mlx4_wq; #ifdef CONFIG_MLX4_DEBUG int mlx4_debug_level = 0; module_param_named(debug_level, mlx4_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); #endif /* CONFIG_MLX4_DEBUG */ #ifdef CONFIG_PCI_MSI static int msi_x = 1; module_param(msi_x, int, 0444); MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x (non-SRIOV only)"); #else /* CONFIG_PCI_MSI */ #define msi_x (0) #endif /* CONFIG_PCI_MSI */ static int enable_sys_tune = 0; module_param(enable_sys_tune, int, 0444); MODULE_PARM_DESC(enable_sys_tune, "Tune the cpu's for better performance (default 0)"); int mlx4_blck_lb = 1; module_param_named(block_loopback, mlx4_blck_lb, int, 0644); MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0 " "(default: 1)"); enum { DEFAULT_DOMAIN = 0, BDF_STR_SIZE = 8, /* bb:dd.f- */ DBDF_STR_SIZE = 13 /* mmmm:bb:dd.f- */ }; enum { NUM_VFS, PROBE_VF, PORT_TYPE_ARRAY }; enum { VALID_DATA, INVALID_DATA, INVALID_STR }; struct param_data { int id; struct mlx4_dbdf2val_lst dbdf2val; }; static struct param_data num_vfs = { .id = NUM_VFS, .dbdf2val = { .name = "num_vfs param", .num_vals = 1, .def_val = {0}, .range = {0, MLX4_MAX_NUM_VF} } }; module_param_string(num_vfs, num_vfs.dbdf2val.str, sizeof(num_vfs.dbdf2val.str), 0444); MODULE_PARM_DESC(num_vfs, "Either single value (e.g. '5') to define uniform num_vfs value for all devices functions\n" "\t\tor a string to map device function numbers to their num_vfs values (e.g. '0000:04:00.0-5,002b:1c:0b.a-15').\n" "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for num_vfs value (e.g. 15)."); static struct param_data probe_vf = { .id = PROBE_VF, .dbdf2val = { .name = "probe_vf param", .num_vals = 1, .def_val = {0}, .range = {0, MLX4_MAX_NUM_VF} } }; module_param_string(probe_vf, probe_vf.dbdf2val.str, sizeof(probe_vf.dbdf2val.str), 0444); MODULE_PARM_DESC(probe_vf, "Either single value (e.g. '3') to define uniform number of VFs to probe by the pf driver for all devices functions\n" "\t\tor a string to map device function numbers to their probe_vf values (e.g. '0000:04:00.0-3,002b:1c:0b.a-13').\n" "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for probe_vf value (e.g. 13)."); int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; module_param_named(log_num_mgm_entry_size, mlx4_log_num_mgm_entry_size, int, 0444); MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" " of qp per mcg, for example:" " 10 gives 248.range: 7 <=" " log_num_mgm_entry_size <= 12." " To activate device managed" " flow steering when available, set to -1"); static int high_rate_steer; module_param(high_rate_steer, int, 0444); MODULE_PARM_DESC(high_rate_steer, "Enable steering mode for higher packet rate" " (default off)"); static int fast_drop; module_param_named(fast_drop, fast_drop, int, 0444); MODULE_PARM_DESC(fast_drop, "Enable fast packet drop when no recieve WQEs are posted"); int mlx4_enable_64b_cqe_eqe = 1; module_param_named(enable_64b_cqe_eqe, mlx4_enable_64b_cqe_eqe, int, 0644); MODULE_PARM_DESC(enable_64b_cqe_eqe, "Enable 64 byte CQEs/EQEs when the the FW supports this if non-zero (default: 1)"); #define HCA_GLOBAL_CAP_MASK 0 #define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE static char mlx4_version[] __devinitdata = DRV_NAME ": Mellanox ConnectX core driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; static int log_num_mac = 7; module_param_named(log_num_mac, log_num_mac, int, 0444); MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)"); static int log_num_vlan; module_param_named(log_num_vlan, log_num_vlan, int, 0444); MODULE_PARM_DESC(log_num_vlan, "(Obsolete) Log2 max number of VLANs per ETH port (0-7)"); /* Log2 max number of VLANs per ETH port (0-7) */ #define MLX4_LOG_NUM_VLANS 7 int log_mtts_per_seg = ilog2(1); module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment " "(0-7) (default: 0)"); static struct param_data port_type_array = { .id = PORT_TYPE_ARRAY, .dbdf2val = { .name = "port_type_array param", .num_vals = 2, .def_val = {MLX4_PORT_TYPE_ETH, MLX4_PORT_TYPE_ETH}, .range = {MLX4_PORT_TYPE_IB, MLX4_PORT_TYPE_NA} } }; module_param_string(port_type_array, port_type_array.dbdf2val.str, sizeof(port_type_array.dbdf2val.str), 0444); MODULE_PARM_DESC(port_type_array, "Either pair of values (e.g. '1,2') to define uniform port1/port2 types configuration for all devices functions\n" "\t\tor a string to map device function numbers to their pair of port types values (e.g. '0000:04:00.0-1;2,002b:1c:0b.a-1;1').\n" "\t\tValid port types: 1-ib, 2-eth, 3-auto, 4-N/A\n" "\t\tIn case that only one port is available use the N/A port type for port2 (e.g '1,4')."); struct mlx4_port_config { struct list_head list; enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; struct pci_dev *pdev; }; #define MLX4_LOG_NUM_MTT 20 /* We limit to 30 as of a bit map issue which uses int and not uint. see mlx4_buddy_init -> bitmap_zero which gets int. */ #define MLX4_MAX_LOG_NUM_MTT 30 static struct mlx4_profile mod_param_profile = { .num_qp = 19, .num_srq = 16, .rdmarc_per_qp = 4, .num_cq = 16, .num_mcg = 13, .num_mpt = 19, .num_mtt_segs = 0, /* max(20, 2*MTTs for host memory)) */ }; module_param_named(log_num_qp, mod_param_profile.num_qp, int, 0444); MODULE_PARM_DESC(log_num_qp, "log maximum number of QPs per HCA (default: 19)"); module_param_named(log_num_srq, mod_param_profile.num_srq, int, 0444); MODULE_PARM_DESC(log_num_srq, "log maximum number of SRQs per HCA " "(default: 16)"); module_param_named(log_rdmarc_per_qp, mod_param_profile.rdmarc_per_qp, int, 0444); MODULE_PARM_DESC(log_rdmarc_per_qp, "log number of RDMARC buffers per QP " "(default: 4)"); module_param_named(log_num_cq, mod_param_profile.num_cq, int, 0444); MODULE_PARM_DESC(log_num_cq, "log maximum number of CQs per HCA (default: 16)"); module_param_named(log_num_mcg, mod_param_profile.num_mcg, int, 0444); MODULE_PARM_DESC(log_num_mcg, "log maximum number of multicast groups per HCA " "(default: 13)"); module_param_named(log_num_mpt, mod_param_profile.num_mpt, int, 0444); MODULE_PARM_DESC(log_num_mpt, "log maximum number of memory protection table entries per " "HCA (default: 19)"); module_param_named(log_num_mtt, mod_param_profile.num_mtt_segs, int, 0444); MODULE_PARM_DESC(log_num_mtt, "log maximum number of memory translation table segments per " "HCA (default: max(20, 2*MTTs for register all of the host memory limited to 30))"); enum { MLX4_IF_STATE_BASIC, MLX4_IF_STATE_EXTENDED }; static inline u64 dbdf_to_u64(int domain, int bus, int dev, int fn) { return (domain << 20) | (bus << 12) | (dev << 4) | fn; } static inline void pr_bdf_err(const char *dbdf, const char *pname) { pr_warn("mlx4_core: '%s' is not valid bdf in '%s'\n", dbdf, pname); } static inline void pr_val_err(const char *dbdf, const char *pname, const char *val) { pr_warn("mlx4_core: value '%s' of bdf '%s' in '%s' is not valid\n" , val, dbdf, pname); } static inline void pr_out_of_range_bdf(const char *dbdf, int val, struct mlx4_dbdf2val_lst *dbdf2val) { pr_warn("mlx4_core: value %d in bdf '%s' of '%s' is out of its valid range (%d,%d)\n" , val, dbdf, dbdf2val->name , dbdf2val->range.min, dbdf2val->range.max); } static inline void pr_out_of_range(struct mlx4_dbdf2val_lst *dbdf2val) { pr_warn("mlx4_core: value of '%s' is out of its valid range (%d,%d)\n" , dbdf2val->name , dbdf2val->range.min, dbdf2val->range.max); } static inline int is_in_range(int val, struct mlx4_range *r) { return (val >= r->min && val <= r->max); } static int update_defaults(struct param_data *pdata) { long int val[MLX4_MAX_BDF_VALS]; int ret; char *t, *p = pdata->dbdf2val.str; char sval[32]; int val_len; if (!strlen(p) || strchr(p, ':') || strchr(p, '.') || strchr(p, ';')) return INVALID_STR; switch (pdata->id) { case PORT_TYPE_ARRAY: t = strchr(p, ','); if (!t || t == p || (t - p) > sizeof(sval)) return INVALID_STR; val_len = t - p; strncpy(sval, p, val_len); sval[val_len] = 0; ret = kstrtol(sval, 0, &val[0]); if (ret == -EINVAL) return INVALID_STR; if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { pr_out_of_range(&pdata->dbdf2val); return INVALID_DATA; } ret = kstrtol(t + 1, 0, &val[1]); if (ret == -EINVAL) return INVALID_STR; if (ret || !is_in_range(val[1], &pdata->dbdf2val.range)) { pr_out_of_range(&pdata->dbdf2val); return INVALID_DATA; } pdata->dbdf2val.tbl[0].val[0] = val[0]; pdata->dbdf2val.tbl[0].val[1] = val[1]; break; case NUM_VFS: case PROBE_VF: ret = kstrtol(p, 0, &val[0]); if (ret == -EINVAL) return INVALID_STR; if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { pr_out_of_range(&pdata->dbdf2val); return INVALID_DATA; } pdata->dbdf2val.tbl[0].val[0] = val[0]; break; } pdata->dbdf2val.tbl[1].dbdf = MLX4_ENDOF_TBL; return VALID_DATA; } int mlx4_fill_dbdf2val_tbl(struct mlx4_dbdf2val_lst *dbdf2val_lst) { int domain, bus, dev, fn; u64 dbdf; char *p, *t, *v; char tmp[32]; char sbdf[32]; char sep = ','; int j, k, str_size, i = 1; int prfx_size; p = dbdf2val_lst->str; for (j = 0; j < dbdf2val_lst->num_vals; j++) dbdf2val_lst->tbl[0].val[j] = dbdf2val_lst->def_val[j]; dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; str_size = strlen(dbdf2val_lst->str); if (str_size == 0) return 0; while (strlen(p)) { prfx_size = BDF_STR_SIZE; sbdf[prfx_size] = 0; strncpy(sbdf, p, prfx_size); domain = DEFAULT_DOMAIN; if (sscanf(sbdf, "%02x:%02x.%x-", &bus, &dev, &fn) != 3) { prfx_size = DBDF_STR_SIZE; sbdf[prfx_size] = 0; strncpy(sbdf, p, prfx_size); if (sscanf(sbdf, "%04x:%02x:%02x.%x-", &domain, &bus, &dev, &fn) != 4) { pr_bdf_err(sbdf, dbdf2val_lst->name); goto err; } sprintf(tmp, "%04x:%02x:%02x.%x-", domain, bus, dev, fn); } else { sprintf(tmp, "%02x:%02x.%x-", bus, dev, fn); } if (strnicmp(sbdf, tmp, sizeof(tmp))) { pr_bdf_err(sbdf, dbdf2val_lst->name); goto err; } dbdf = dbdf_to_u64(domain, bus, dev, fn); for (j = 1; j < i; j++) if (dbdf2val_lst->tbl[j].dbdf == dbdf) { pr_warn("mlx4_core: in '%s', %s appears multiple times\n" , dbdf2val_lst->name, sbdf); goto err; } if (i >= MLX4_DEVS_TBL_SIZE) { pr_warn("mlx4_core: Too many devices in '%s'\n" , dbdf2val_lst->name); goto err; } p += prfx_size; t = strchr(p, sep); t = t ? t : p + strlen(p); if (p >= t) { pr_val_err(sbdf, dbdf2val_lst->name, ""); goto err; } for (k = 0; k < dbdf2val_lst->num_vals; k++) { char sval[32]; long int val; int ret, val_len; char vsep = ';'; v = (k == dbdf2val_lst->num_vals - 1) ? t : strchr(p, vsep); if (!v || v > t || v == p || (v - p) > sizeof(sval)) { pr_val_err(sbdf, dbdf2val_lst->name, p); goto err; } val_len = v - p; strncpy(sval, p, val_len); sval[val_len] = 0; ret = kstrtol(sval, 0, &val); if (ret) { if (strchr(p, vsep)) pr_warn("mlx4_core: too many vals in bdf '%s' of '%s'\n" , sbdf, dbdf2val_lst->name); else pr_val_err(sbdf, dbdf2val_lst->name, sval); goto err; } if (!is_in_range(val, &dbdf2val_lst->range)) { pr_out_of_range_bdf(sbdf, val, dbdf2val_lst); goto err; } dbdf2val_lst->tbl[i].val[k] = val; p = v; if (p[0] == vsep) p++; } dbdf2val_lst->tbl[i].dbdf = dbdf; if (strlen(p)) { if (p[0] != sep) { pr_warn("mlx4_core: expect separator '%c' before '%s' in '%s'\n" , sep, p, dbdf2val_lst->name); goto err; } p++; } i++; if (i < MLX4_DEVS_TBL_SIZE) dbdf2val_lst->tbl[i].dbdf = MLX4_ENDOF_TBL; } return 0; err: dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; pr_warn("mlx4_core: The value of '%s' is incorrect. The value is discarded!\n" , dbdf2val_lst->name); return -EINVAL; } EXPORT_SYMBOL(mlx4_fill_dbdf2val_tbl); int mlx4_get_val(struct mlx4_dbdf2val *tbl, struct pci_dev *pdev, int idx, int *val) { u64 dbdf; int i = 1; *val = tbl[0].val[idx]; if (!pdev) return -EINVAL; - if (!pdev->bus) { - return -EINVAL; - } - dbdf = dbdf_to_u64(pci_get_domain(pdev->dev.bsddev), pci_get_bus(pdev->dev.bsddev), PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); while ((i < MLX4_DEVS_TBL_SIZE) && (tbl[i].dbdf != MLX4_ENDOF_TBL)) { if (tbl[i].dbdf == dbdf) { *val = tbl[i].val[idx]; return 0; } i++; } return 0; } EXPORT_SYMBOL(mlx4_get_val); static void process_mod_param_profile(struct mlx4_profile *profile) { vm_size_t hwphyssz; hwphyssz = 0; TUNABLE_ULONG_FETCH("hw.realmem", (u_long *) &hwphyssz); profile->num_qp = 1 << mod_param_profile.num_qp; profile->num_srq = 1 << mod_param_profile.num_srq; profile->rdmarc_per_qp = 1 << mod_param_profile.rdmarc_per_qp; profile->num_cq = 1 << mod_param_profile.num_cq; profile->num_mcg = 1 << mod_param_profile.num_mcg; profile->num_mpt = 1 << mod_param_profile.num_mpt; /* * We want to scale the number of MTTs with the size of the * system memory, since it makes sense to register a lot of * memory on a system with a lot of memory. As a heuristic, * make sure we have enough MTTs to register twice the system * memory (with PAGE_SIZE entries). * * This number has to be a power of two and fit into 32 bits * due to device limitations. We cap this at 2^30 as of bit map * limitation to work with int instead of uint (mlx4_buddy_init -> bitmap_zero) * That limits us to 4TB of memory registration per HCA with * 4KB pages, which is probably OK for the next few months. */ if (mod_param_profile.num_mtt_segs) profile->num_mtt_segs = 1 << mod_param_profile.num_mtt_segs; else { profile->num_mtt_segs = roundup_pow_of_two(max_t(unsigned, 1 << (MLX4_LOG_NUM_MTT - log_mtts_per_seg), min(1UL << (MLX4_MAX_LOG_NUM_MTT - log_mtts_per_seg), (hwphyssz << 1) >> log_mtts_per_seg))); /* set the actual value, so it will be reflected to the user using the sysfs */ mod_param_profile.num_mtt_segs = ilog2(profile->num_mtt_segs); } } int mlx4_check_port_params(struct mlx4_dev *dev, enum mlx4_port_type *port_type) { int i; for (i = 0; i < dev->caps.num_ports - 1; i++) { if (port_type[i] != port_type[i + 1]) { if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { mlx4_err(dev, "Only same port types supported " "on this HCA, aborting.\n"); return -EINVAL; } } } for (i = 0; i < dev->caps.num_ports; i++) { if (!(port_type[i] & dev->caps.supported_type[i+1])) { mlx4_err(dev, "Requested port type for port %d is not " "supported on this HCA\n", i + 1); return -EINVAL; } } return 0; } static void mlx4_set_port_mask(struct mlx4_dev *dev) { int i; for (i = 1; i <= dev->caps.num_ports; ++i) dev->caps.port_mask[i] = dev->caps.port_type[i]; } static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { int err; int i; err = mlx4_QUERY_DEV_CAP(dev, dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); return err; } if (dev_cap->min_page_sz > PAGE_SIZE) { mlx4_err(dev, "HCA minimum page size of %d bigger than " "kernel PAGE_SIZE of %d, aborting.\n", dev_cap->min_page_sz, PAGE_SIZE); return -ENODEV; } if (dev_cap->num_ports > MLX4_MAX_PORTS) { mlx4_err(dev, "HCA has %d ports, but we only support %d, " "aborting.\n", dev_cap->num_ports, MLX4_MAX_PORTS); return -ENODEV; } if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) { mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than " "PCI resource 2 size of 0x%llx, aborting.\n", dev_cap->uar_size, (unsigned long long) pci_resource_len(dev->pdev, 2)); return -ENODEV; } dev->caps.num_ports = dev_cap->num_ports; dev->phys_caps.num_phys_eqs = MLX4_MAX_EQ_NUM; for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.vl_cap[i] = dev_cap->max_vl[i]; dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; dev->phys_caps.gid_phys_table_len[i] = dev_cap->max_gids[i]; dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i]; /* set gid and pkey table operating lengths by default * to non-sriov values */ dev->caps.gid_table_len[i] = dev_cap->max_gids[i]; dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i]; dev->caps.port_width_cap[i] = dev_cap->max_port_width[i]; dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i]; dev->caps.def_mac[i] = dev_cap->def_mac[i]; dev->caps.supported_type[i] = dev_cap->supported_port_types[i]; dev->caps.suggested_type[i] = dev_cap->suggested_type[i]; dev->caps.default_sense[i] = dev_cap->default_sense[i]; dev->caps.trans_type[i] = dev_cap->trans_type[i]; dev->caps.vendor_oui[i] = dev_cap->vendor_oui[i]; dev->caps.wavelength[i] = dev_cap->wavelength[i]; dev->caps.trans_code[i] = dev_cap->trans_code[i]; } dev->caps.uar_page_size = PAGE_SIZE; dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay; dev->caps.bf_reg_size = dev_cap->bf_reg_size; dev->caps.bf_regs_per_page = dev_cap->bf_regs_per_page; dev->caps.max_sq_sg = dev_cap->max_sq_sg; dev->caps.max_rq_sg = dev_cap->max_rq_sg; dev->caps.max_wqes = dev_cap->max_qp_sz; dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp; dev->caps.max_srq_wqes = dev_cap->max_srq_sz; dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1; dev->caps.reserved_srqs = dev_cap->reserved_srqs; dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz; dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; /* * Subtract 1 from the limit because we need to allocate a * spare CQE to enable resizing the CQ */ dev->caps.max_cqes = dev_cap->max_cq_sz - 1; dev->caps.reserved_cqs = dev_cap->reserved_cqs; dev->caps.reserved_eqs = dev_cap->reserved_eqs; dev->caps.reserved_mtts = dev_cap->reserved_mtts; dev->caps.reserved_mrws = dev_cap->reserved_mrws; /* The first 128 UARs are used for EQ doorbells */ dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars); dev->caps.reserved_pds = dev_cap->reserved_pds; dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? dev_cap->reserved_xrcds : 0; dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? dev_cap->max_xrcds : 0; dev->caps.mtt_entry_sz = dev_cap->mtt_entry_sz; dev->caps.max_msg_sz = dev_cap->max_msg_sz; dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); dev->caps.flags = dev_cap->flags; dev->caps.flags2 = dev_cap->flags2; dev->caps.bmme_flags = dev_cap->bmme_flags; dev->caps.reserved_lkey = dev_cap->reserved_lkey; dev->caps.stat_rate_support = dev_cap->stat_rate_support; dev->caps.cq_timestamp = dev_cap->timestamp_support; dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; /* Don't do sense port on multifunction devices (for now at least) */ if (mlx4_is_mfunc(dev)) dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; dev->caps.log_num_macs = log_num_mac; dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; dev->caps.fast_drop = fast_drop ? !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FAST_DROP) : 0; for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE; if (dev->caps.supported_type[i]) { /* if only ETH is supported - assign ETH */ if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH) dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH; /* if only IB is supported, assign IB */ else if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_IB) dev->caps.port_type[i] = MLX4_PORT_TYPE_IB; else { /* * if IB and ETH are supported, we set the port * type according to user selection of port type; * if there is no user selection, take the FW hint */ int pta; mlx4_get_val(port_type_array.dbdf2val.tbl, pci_physfn(dev->pdev), i - 1, &pta); if (pta == MLX4_PORT_TYPE_NONE) { dev->caps.port_type[i] = dev->caps.suggested_type[i] ? MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB; } else if (pta == MLX4_PORT_TYPE_NA) { mlx4_err(dev, "Port %d is valid port. " "It is not allowed to configure its type to N/A(%d)\n", i, MLX4_PORT_TYPE_NA); return -EINVAL; } else { dev->caps.port_type[i] = pta; } } } /* * Link sensing is allowed on the port if 3 conditions are true: * 1. Both protocols are supported on the port. * 2. Different types are supported on the port * 3. FW declared that it supports link sensing */ mlx4_priv(dev)->sense.sense_allowed[i] = ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) && (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)); /* Disablling auto sense for default Eth ports support */ mlx4_priv(dev)->sense.sense_allowed[i] = 0; /* * If "default_sense" bit is set, we move the port to "AUTO" mode * and perform sense_port FW command to try and set the correct * port type from beginning */ if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) { enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE; dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO; mlx4_SENSE_PORT(dev, i, &sensed_port); if (sensed_port != MLX4_PORT_TYPE_NONE) dev->caps.port_type[i] = sensed_port; } else { dev->caps.possible_type[i] = dev->caps.port_type[i]; } if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) { dev->caps.log_num_macs = dev_cap->log_max_macs[i]; mlx4_warn(dev, "Requested number of MACs is too much " "for port %d, reducing to %d.\n", i, 1 << dev->caps.log_num_macs); } if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) { dev->caps.log_num_vlans = dev_cap->log_max_vlans[i]; mlx4_warn(dev, "Requested number of VLANs is too much " "for port %d, reducing to %d.\n", i, 1 << dev->caps.log_num_vlans); } } dev->caps.max_basic_counters = dev_cap->max_basic_counters; dev->caps.max_extended_counters = dev_cap->max_extended_counters; /* support extended counters if available */ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT) dev->caps.max_counters = dev->caps.max_extended_counters; else dev->caps.max_counters = dev->caps.max_basic_counters; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = (1 << dev->caps.log_num_macs) * (1 << dev->caps.log_num_vlans) * dev->caps.num_ports; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH; dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; dev->caps.sync_qp = dev_cap->sync_qp; if (dev->pdev->device == 0x1003) dev->caps.cq_flags |= MLX4_DEV_CAP_CQ_FLAG_IO; dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; if (!mlx4_enable_64b_cqe_eqe && !mlx4_is_slave(dev)) { if (dev_cap->flags & (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) { mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n"); dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; } } if ((dev->caps.flags & (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) && mlx4_is_master(dev)) dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; if (!mlx4_is_slave(dev)) { for (i = 0; i < dev->caps.num_ports; ++i) dev->caps.def_counter_index[i] = i << 1; } return 0; } /*The function checks if there are live vf, return the num of them*/ static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_state; int i; int ret = 0; for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) { s_state = &priv->mfunc.master.slave_state[i]; if (s_state->active && s_state->last_cmd != MLX4_COMM_CMD_RESET) { mlx4_warn(dev, "%s: slave: %d is still active\n", __func__, i); ret++; } } return ret; } int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) { u32 qk = MLX4_RESERVED_QKEY_BASE; if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || qpn < dev->phys_caps.base_proxy_sqpn) return -EINVAL; if (qpn >= dev->phys_caps.base_tunnel_sqpn) /* tunnel qp */ qk += qpn - dev->phys_caps.base_tunnel_sqpn; else qk += qpn - dev->phys_caps.base_proxy_sqpn; *qkey = qk; return 0; } EXPORT_SYMBOL(mlx4_get_parav_qkey); void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val) { struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); if (!mlx4_is_master(dev)) return; priv->virt2phys_pkey[slave][port - 1][i] = val; } EXPORT_SYMBOL(mlx4_sync_pkey_table); void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid) { struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); if (!mlx4_is_master(dev)) return; priv->slave_node_guids[slave] = guid; } EXPORT_SYMBOL(mlx4_put_slave_node_guid); __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); if (!mlx4_is_master(dev)) return 0; return priv->slave_node_guids[slave]; } EXPORT_SYMBOL(mlx4_get_slave_node_guid); int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_slave; if (!mlx4_is_master(dev)) return 0; s_slave = &priv->mfunc.master.slave_state[slave]; return !!s_slave->active; } EXPORT_SYMBOL(mlx4_is_slave_active); static void slave_adjust_steering_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *hca_param) { dev->caps.steering_mode = hca_param->steering_mode; if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; else dev->caps.num_qp_per_mgm = 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2); mlx4_dbg(dev, "Steering mode is: %s\n", mlx4_steering_mode_str(dev->caps.steering_mode)); } static int mlx4_slave_cap(struct mlx4_dev *dev) { int err; u32 page_size; struct mlx4_dev_cap dev_cap; struct mlx4_func_cap func_cap; struct mlx4_init_hca_param hca_param; int i; memset(&hca_param, 0, sizeof(hca_param)); err = mlx4_QUERY_HCA(dev, &hca_param); if (err) { mlx4_err(dev, "QUERY_HCA command failed, aborting.\n"); return err; } /*fail if the hca has an unknown capability */ if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) != HCA_GLOBAL_CAP_MASK) { mlx4_err(dev, "Unknown hca global capabilities\n"); return -ENOSYS; } mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz; dev->caps.hca_core_clock = hca_param.hca_core_clock; memset(&dev_cap, 0, sizeof(dev_cap)); dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp; err = mlx4_dev_cap(dev, &dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); return err; } err = mlx4_QUERY_FW(dev); if (err) mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n"); if (!hca_param.mw_enable) { dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW; dev->caps.bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; } page_size = ~dev->caps.page_size_cap + 1; mlx4_warn(dev, "HCA minimum page size:%d\n", page_size); if (page_size > PAGE_SIZE) { mlx4_err(dev, "HCA minimum page size of %d bigger than " "kernel PAGE_SIZE of %d, aborting.\n", page_size, PAGE_SIZE); return -ENODEV; } /* slave gets uar page size from QUERY_HCA fw command */ dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12); /* TODO: relax this assumption */ if (dev->caps.uar_page_size != PAGE_SIZE) { mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %d\n", dev->caps.uar_page_size, PAGE_SIZE); return -ENODEV; } memset(&func_cap, 0, sizeof(func_cap)); err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); if (err) { mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n", err); return err; } if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != PF_CONTEXT_BEHAVIOUR_MASK) { mlx4_err(dev, "Unknown pf context behaviour\n"); return -ENOSYS; } dev->caps.num_ports = func_cap.num_ports; dev->quotas.qp = func_cap.qp_quota; dev->quotas.srq = func_cap.srq_quota; dev->quotas.cq = func_cap.cq_quota; dev->quotas.mpt = func_cap.mpt_quota; dev->quotas.mtt = func_cap.mtt_quota; dev->caps.num_qps = 1 << hca_param.log_num_qps; dev->caps.num_srqs = 1 << hca_param.log_num_srqs; dev->caps.num_cqs = 1 << hca_param.log_num_cqs; dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; dev->caps.num_eqs = func_cap.max_eq; dev->caps.reserved_eqs = func_cap.reserved_eq; dev->caps.num_pds = MLX4_NUM_PDS; dev->caps.num_mgms = 0; dev->caps.num_amgms = 0; if (dev->caps.num_ports > MLX4_MAX_PORTS) { mlx4_err(dev, "HCA has %d ports, but we only support %d, " "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS); return -ENODEV; } dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { err = -ENOMEM; goto err_mem; } for (i = 1; i <= dev->caps.num_ports; ++i) { err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap); if (err) { mlx4_err(dev, "QUERY_FUNC_CAP port command failed for" " port %d, aborting (%d).\n", i, err); goto err_mem; } dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; dev->caps.def_counter_index[i - 1] = func_cap.def_counter_index; dev->caps.port_mask[i] = dev->caps.port_type[i]; err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, &dev->caps.gid_table_len[i], &dev->caps.pkey_table_len[i]); if (err) goto err_mem; } if (dev->caps.uar_page_size * (dev->caps.num_uars - dev->caps.reserved_uars) > pci_resource_len(dev->pdev, 2)) { mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than " "PCI resource 2 size of 0x%llx, aborting.\n", dev->caps.uar_page_size * dev->caps.num_uars, (unsigned long long) pci_resource_len(dev->pdev, 2)); err = -ENOMEM; goto err_mem; } if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { dev->caps.eqe_size = 64; dev->caps.eqe_factor = 1; } else { dev->caps.eqe_size = 32; dev->caps.eqe_factor = 0; } if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { dev->caps.cqe_size = 64; dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; } else { dev->caps.cqe_size = 32; } dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; mlx4_warn(dev, "Timestamping is not supported in slave mode.\n"); slave_adjust_steering_mode(dev, &dev_cap, &hca_param); return 0; err_mem: kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); dev->caps.qp0_tunnel = dev->caps.qp0_proxy = dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; return err; } static void mlx4_request_modules(struct mlx4_dev *dev) { int port; int has_ib_port = false; int has_eth_port = false; #define EN_DRV_NAME "mlx4_en" #define IB_DRV_NAME "mlx4_ib" for (port = 1; port <= dev->caps.num_ports; port++) { if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) has_ib_port = true; else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) has_eth_port = true; } if (has_ib_port) request_module_nowait(IB_DRV_NAME); if (has_eth_port) request_module_nowait(EN_DRV_NAME); } /* * Change the port configuration of the device. * Every user of this function must hold the port mutex. */ int mlx4_change_port_types(struct mlx4_dev *dev, enum mlx4_port_type *port_types) { int err = 0; int change = 0; int port; for (port = 0; port < dev->caps.num_ports; port++) { /* Change the port type only if the new type is different * from the current, and not set to Auto */ if (port_types[port] != dev->caps.port_type[port + 1]) change = 1; } if (change) { mlx4_unregister_device(dev); for (port = 1; port <= dev->caps.num_ports; port++) { mlx4_CLOSE_PORT(dev, port); dev->caps.port_type[port] = port_types[port - 1]; err = mlx4_SET_PORT(dev, port, -1); if (err) { mlx4_err(dev, "Failed to set port %d, " "aborting\n", port); goto out; } } mlx4_set_port_mask(dev); err = mlx4_register_device(dev); if (err) { mlx4_err(dev, "Failed to register device\n"); goto out; } mlx4_request_modules(dev); } out: return err; } static ssize_t show_port_type(struct device *dev, struct device_attribute *attr, char *buf) { struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, port_attr); struct mlx4_dev *mdev = info->dev; char type[8]; sprintf(type, "%s", (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ? "ib" : "eth"); if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO) sprintf(buf, "auto (%s)\n", type); else sprintf(buf, "%s\n", type); return strlen(buf); } static ssize_t set_port_type(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, port_attr); struct mlx4_dev *mdev = info->dev; struct mlx4_priv *priv = mlx4_priv(mdev); enum mlx4_port_type types[MLX4_MAX_PORTS]; enum mlx4_port_type new_types[MLX4_MAX_PORTS]; int i; int err = 0; if (!strcmp(buf, "ib\n")) info->tmp_type = MLX4_PORT_TYPE_IB; else if (!strcmp(buf, "eth\n")) info->tmp_type = MLX4_PORT_TYPE_ETH; else if (!strcmp(buf, "auto\n")) info->tmp_type = MLX4_PORT_TYPE_AUTO; else { mlx4_err(mdev, "%s is not supported port type\n", buf); return -EINVAL; } if ((info->tmp_type & mdev->caps.supported_type[info->port]) != info->tmp_type) { mlx4_err(mdev, "Requested port type for port %d is not supported on this HCA\n", info->port); return -EINVAL; } mlx4_stop_sense(mdev); mutex_lock(&priv->port_mutex); /* Possible type is always the one that was delivered */ mdev->caps.possible_type[info->port] = info->tmp_type; for (i = 0; i < mdev->caps.num_ports; i++) { types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type : mdev->caps.possible_type[i+1]; if (types[i] == MLX4_PORT_TYPE_AUTO) types[i] = mdev->caps.port_type[i+1]; } if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) { for (i = 1; i <= mdev->caps.num_ports; i++) { if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { mdev->caps.possible_type[i] = mdev->caps.port_type[i]; err = -EINVAL; } } } if (err) { mlx4_err(mdev, "Auto sensing is not supported on this HCA. " "Set only 'eth' or 'ib' for both ports " "(should be the same)\n"); goto out; } mlx4_do_sense_ports(mdev, new_types, types); err = mlx4_check_port_params(mdev, new_types); if (err) goto out; /* We are about to apply the changes after the configuration * was verified, no need to remember the temporary types * any more */ for (i = 0; i < mdev->caps.num_ports; i++) priv->port[i + 1].tmp_type = 0; err = mlx4_change_port_types(mdev, new_types); out: mlx4_start_sense(mdev); mutex_unlock(&priv->port_mutex); return err ? err : count; } enum ibta_mtu { IB_MTU_256 = 1, IB_MTU_512 = 2, IB_MTU_1024 = 3, IB_MTU_2048 = 4, IB_MTU_4096 = 5 }; static inline int int_to_ibta_mtu(int mtu) { switch (mtu) { case 256: return IB_MTU_256; case 512: return IB_MTU_512; case 1024: return IB_MTU_1024; case 2048: return IB_MTU_2048; case 4096: return IB_MTU_4096; default: return -1; } } static inline int ibta_mtu_to_int(enum ibta_mtu mtu) { switch (mtu) { case IB_MTU_256: return 256; case IB_MTU_512: return 512; case IB_MTU_1024: return 1024; case IB_MTU_2048: return 2048; case IB_MTU_4096: return 4096; default: return -1; } } static ssize_t show_port_ib_mtu(struct device *dev, struct device_attribute *attr, char *buf) { struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, port_mtu_attr); struct mlx4_dev *mdev = info->dev; if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); sprintf(buf, "%d\n", ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port])); return strlen(buf); } static ssize_t set_port_ib_mtu(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, port_mtu_attr); struct mlx4_dev *mdev = info->dev; struct mlx4_priv *priv = mlx4_priv(mdev); int err, port, mtu, ibta_mtu = -1; if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) { mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); return -EINVAL; } mtu = (int) simple_strtol(buf, NULL, 0); ibta_mtu = int_to_ibta_mtu(mtu); if (ibta_mtu < 0) { mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf); return -EINVAL; } mdev->caps.port_ib_mtu[info->port] = ibta_mtu; mlx4_stop_sense(mdev); mutex_lock(&priv->port_mutex); mlx4_unregister_device(mdev); for (port = 1; port <= mdev->caps.num_ports; port++) { mlx4_CLOSE_PORT(mdev, port); err = mlx4_SET_PORT(mdev, port, -1); if (err) { mlx4_err(mdev, "Failed to set port %d, " "aborting\n", port); goto err_set_port; } } err = mlx4_register_device(mdev); err_set_port: mutex_unlock(&priv->port_mutex); mlx4_start_sense(mdev); return err ? err : count; } static int mlx4_load_fw(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int err, unmap_flag = 0; priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages, GFP_HIGHUSER | __GFP_NOWARN, 0); if (!priv->fw.fw_icm) { mlx4_err(dev, "Couldn't allocate FW area, aborting.\n"); return -ENOMEM; } err = mlx4_MAP_FA(dev, priv->fw.fw_icm); if (err) { mlx4_err(dev, "MAP_FA command failed, aborting.\n"); goto err_free; } err = mlx4_RUN_FW(dev); if (err) { mlx4_err(dev, "RUN_FW command failed, aborting.\n"); goto err_unmap_fa; } return 0; err_unmap_fa: unmap_flag = mlx4_UNMAP_FA(dev); if (unmap_flag) pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); err_free: if (!unmap_flag) mlx4_free_icm(dev, priv->fw.fw_icm, 0); return err; } static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, int cmpt_entry_sz) { struct mlx4_priv *priv = mlx4_priv(dev); int err; int num_eqs; err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_QP * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) goto err; err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_SRQ * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, dev->caps.num_srqs, dev->caps.reserved_srqs, 0, 0); if (err) goto err_qp; err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_CQ * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, dev->caps.num_cqs, dev->caps.reserved_cqs, 0, 0); if (err) goto err_srq; num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : dev->caps.num_eqs; err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_EQ * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, num_eqs, num_eqs, 0, 0); if (err) goto err_cq; return 0; err_cq: mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); err_srq: mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); err_qp: mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); err: return err; } static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *init_hca, u64 icm_size) { struct mlx4_priv *priv = mlx4_priv(dev); u64 aux_pages; int num_eqs; int err, unmap_flag = 0; err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages); if (err) { mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n"); return err; } mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n", (unsigned long long) icm_size >> 10, (unsigned long long) aux_pages << 2); priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages, GFP_HIGHUSER | __GFP_NOWARN, 0); if (!priv->fw.aux_icm) { mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n"); return -ENOMEM; } err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm); if (err) { mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n"); goto err_free_aux; } err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz); if (err) { mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n"); goto err_unmap_aux; } num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : dev->caps.num_eqs; err = mlx4_init_icm_table(dev, &priv->eq_table.table, init_hca->eqc_base, dev_cap->eqc_entry_sz, num_eqs, num_eqs, 0, 0); if (err) { mlx4_err(dev, "Failed to map EQ context memory, aborting.\n"); goto err_unmap_cmpt; } /* * Reserved MTT entries must be aligned up to a cacheline * boundary, since the FW will write to them, while the driver * writes to all other MTT entries. (The variable * dev->caps.mtt_entry_sz below is really the MTT segment * size, not the raw entry size) */ dev->caps.reserved_mtts = ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz, dma_get_cache_alignment()) / dev->caps.mtt_entry_sz; err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, init_hca->mtt_base, dev->caps.mtt_entry_sz, dev->caps.num_mtts, dev->caps.reserved_mtts, 1, 0); if (err) { mlx4_err(dev, "Failed to map MTT context memory, aborting.\n"); goto err_unmap_eq; } err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table, init_hca->dmpt_base, dev_cap->dmpt_entry_sz, dev->caps.num_mpts, dev->caps.reserved_mrws, 1, 1); if (err) { mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n"); goto err_unmap_mtt; } err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table, init_hca->qpc_base, dev_cap->qpc_entry_sz, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) { mlx4_err(dev, "Failed to map QP context memory, aborting.\n"); goto err_unmap_dmpt; } err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table, init_hca->auxc_base, dev_cap->aux_entry_sz, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) { mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n"); goto err_unmap_qp; } err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table, init_hca->altc_base, dev_cap->altc_entry_sz, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) { mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n"); goto err_unmap_auxc; } err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table, init_hca->rdmarc_base, dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) { mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n"); goto err_unmap_altc; } err = mlx4_init_icm_table(dev, &priv->cq_table.table, init_hca->cqc_base, dev_cap->cqc_entry_sz, dev->caps.num_cqs, dev->caps.reserved_cqs, 0, 0); if (err) { mlx4_err(dev, "Failed to map CQ context memory, aborting.\n"); goto err_unmap_rdmarc; } err = mlx4_init_icm_table(dev, &priv->srq_table.table, init_hca->srqc_base, dev_cap->srq_entry_sz, dev->caps.num_srqs, dev->caps.reserved_srqs, 0, 0); if (err) { mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n"); goto err_unmap_cq; } /* * For flow steering device managed mode it is required to use * mlx4_init_icm_table. For B0 steering mode it's not strictly * required, but for simplicity just map the whole multicast * group table now. The table isn't very big and it's a lot * easier than trying to track ref counts. */ err = mlx4_init_icm_table(dev, &priv->mcg_table.table, init_hca->mc_base, mlx4_get_mgm_entry_size(dev), dev->caps.num_mgms + dev->caps.num_amgms, dev->caps.num_mgms + dev->caps.num_amgms, 0, 0); if (err) { mlx4_err(dev, "Failed to map MCG context memory, aborting.\n"); goto err_unmap_srq; } return 0; err_unmap_srq: mlx4_cleanup_icm_table(dev, &priv->srq_table.table); err_unmap_cq: mlx4_cleanup_icm_table(dev, &priv->cq_table.table); err_unmap_rdmarc: mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); err_unmap_altc: mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); err_unmap_auxc: mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); err_unmap_qp: mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); err_unmap_dmpt: mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); err_unmap_mtt: mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); err_unmap_eq: mlx4_cleanup_icm_table(dev, &priv->eq_table.table); err_unmap_cmpt: mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); err_unmap_aux: unmap_flag = mlx4_UNMAP_ICM_AUX(dev); if (unmap_flag) pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); err_free_aux: if (!unmap_flag) mlx4_free_icm(dev, priv->fw.aux_icm, 0); return err; } static void mlx4_free_icms(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); mlx4_cleanup_icm_table(dev, &priv->mcg_table.table); mlx4_cleanup_icm_table(dev, &priv->srq_table.table); mlx4_cleanup_icm_table(dev, &priv->cq_table.table); mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); mlx4_cleanup_icm_table(dev, &priv->eq_table.table); mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); if (!mlx4_UNMAP_ICM_AUX(dev)) mlx4_free_icm(dev, priv->fw.aux_icm, 0); else pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); } static void mlx4_slave_exit(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); mutex_lock(&priv->cmd.slave_cmd_mutex); if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) mlx4_warn(dev, "Failed to close slave function.\n"); mutex_unlock(&priv->cmd.slave_cmd_mutex); } static int map_bf_area(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); resource_size_t bf_start; resource_size_t bf_len; int err = 0; if (!dev->caps.bf_reg_size) return -ENXIO; bf_start = pci_resource_start(dev->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT); bf_len = pci_resource_len(dev->pdev, 2) - (dev->caps.num_uars << PAGE_SHIFT); priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len); if (!priv->bf_mapping) err = -ENOMEM; return err; } static void unmap_bf_area(struct mlx4_dev *dev) { if (mlx4_priv(dev)->bf_mapping) io_mapping_free(mlx4_priv(dev)->bf_mapping); } int mlx4_read_clock(struct mlx4_dev *dev) { u32 clockhi, clocklo, clockhi1; cycle_t cycles; int i; struct mlx4_priv *priv = mlx4_priv(dev); if (!priv->clock_mapping) return -ENOTSUPP; for (i = 0; i < 10; i++) { clockhi = swab32(readl(priv->clock_mapping)); clocklo = swab32(readl(priv->clock_mapping + 4)); clockhi1 = swab32(readl(priv->clock_mapping)); if (clockhi == clockhi1) break; } cycles = (u64) clockhi << 32 | (u64) clocklo; return cycles; } EXPORT_SYMBOL_GPL(mlx4_read_clock); static int map_internal_clock(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); priv->clock_mapping = ioremap(pci_resource_start(dev->pdev, priv->fw.clock_bar) + priv->fw.clock_offset, MLX4_CLOCK_SIZE); if (!priv->clock_mapping) return -ENOMEM; return 0; } int mlx4_get_internal_clock_params(struct mlx4_dev *dev, struct mlx4_clock_params *params) { struct mlx4_priv *priv = mlx4_priv(dev); if (mlx4_is_slave(dev)) return -ENOTSUPP; if (!params) return -EINVAL; params->bar = priv->fw.clock_bar; params->offset = priv->fw.clock_offset; params->size = MLX4_CLOCK_SIZE; return 0; } EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params); static void unmap_internal_clock(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); if (priv->clock_mapping) iounmap(priv->clock_mapping); } static void mlx4_close_hca(struct mlx4_dev *dev) { unmap_internal_clock(dev); unmap_bf_area(dev); if (mlx4_is_slave(dev)) { mlx4_slave_exit(dev); } else { mlx4_CLOSE_HCA(dev, 0); mlx4_free_icms(dev); if (!mlx4_UNMAP_FA(dev)) mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0); else pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); } } static int mlx4_init_slave(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); u64 dma = (u64) priv->mfunc.vhcr_dma; int num_of_reset_retries = NUM_OF_RESET_RETRIES; int ret_from_reset = 0; u32 slave_read; u32 cmd_channel_ver; mutex_lock(&priv->cmd.slave_cmd_mutex); priv->cmd.max_cmds = 1; mlx4_warn(dev, "Sending reset\n"); ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME); /* if we are in the middle of flr the slave will try * NUM_OF_RESET_RETRIES times before leaving.*/ if (ret_from_reset) { if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) { msleep(SLEEP_TIME_IN_RESET); while (ret_from_reset && num_of_reset_retries) { mlx4_warn(dev, "slave is currently in the" "middle of FLR. retrying..." "(try num:%d)\n", (NUM_OF_RESET_RETRIES - num_of_reset_retries + 1)); ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME); num_of_reset_retries = num_of_reset_retries - 1; } } else goto err; } /* check the driver version - the slave I/F revision * must match the master's */ slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); cmd_channel_ver = mlx4_comm_get_version(); if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) != MLX4_COMM_GET_IF_REV(slave_read)) { mlx4_err(dev, "slave driver version is not supported" " by the master\n"); goto err; } mlx4_warn(dev, "Sending vhcr0\n"); if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) goto err; mutex_unlock(&priv->cmd.slave_cmd_mutex); return 0; err: mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); mutex_unlock(&priv->cmd.slave_cmd_mutex); return -EIO; } static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) { int i; for (i = 1; i <= dev->caps.num_ports; i++) { if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) dev->caps.gid_table_len[i] = mlx4_get_slave_num_gids(dev, 0); else dev->caps.gid_table_len[i] = 1; dev->caps.pkey_table_len[i] = dev->phys_caps.pkey_phys_table_len[i] - 1; } } static int choose_log_fs_mgm_entry_size(int qp_per_entry) { int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE; i++) { if (qp_per_entry <= 4 * ((1 << i) / 16 - 2)) break; } return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1; } static void choose_steering_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { int nvfs; mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(dev->pdev), 0, &nvfs); if (high_rate_steer && !mlx4_is_mfunc(dev)) { dev->caps.flags &= ~(MLX4_DEV_CAP_FLAG_VEP_MC_STEER | MLX4_DEV_CAP_FLAG_VEP_UC_STEER); dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_FS_EN; } if (mlx4_log_num_mgm_entry_size == -1 && dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && (!mlx4_is_mfunc(dev) || (dev_cap->fs_max_num_qp_per_entry >= (nvfs + 1))) && choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= MLX4_MIN_MGM_LOG_ENTRY_SIZE) { dev->oper_log_mgm_entry_size = choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry); dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; } else { if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) dev->caps.steering_mode = MLX4_STEERING_MODE_B0; else { dev->caps.steering_mode = MLX4_STEERING_MODE_A0; if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags " "set to use B0 steering. Falling back to A0 steering mode.\n"); } dev->oper_log_mgm_entry_size = mlx4_log_num_mgm_entry_size > 0 ? mlx4_log_num_mgm_entry_size : MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); } mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, " "log_num_mgm_entry_size = %d\n", mlx4_steering_mode_str(dev->caps.steering_mode), dev->oper_log_mgm_entry_size, mlx4_log_num_mgm_entry_size); } static int mlx4_init_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_dev_cap *dev_cap = NULL; struct mlx4_adapter adapter; struct mlx4_mod_stat_cfg mlx4_cfg; struct mlx4_profile profile; struct mlx4_init_hca_param init_hca; u64 icm_size; int err; if (!mlx4_is_slave(dev)) { err = mlx4_QUERY_FW(dev); if (err) { if (err == -EACCES) mlx4_info(dev, "non-primary physical function, skipping.\n"); else mlx4_err(dev, "QUERY_FW command failed, aborting.\n"); return err; } err = mlx4_load_fw(dev); if (err) { mlx4_err(dev, "Failed to start FW, aborting.\n"); return err; } mlx4_cfg.log_pg_sz_m = 1; mlx4_cfg.log_pg_sz = 0; err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg); if (err) mlx4_warn(dev, "Failed to override log_pg_sz parameter\n"); dev_cap = kzalloc(sizeof *dev_cap, GFP_KERNEL); if (!dev_cap) { mlx4_err(dev, "Failed to allocate memory for dev_cap\n"); err = -ENOMEM; goto err_stop_fw; } err = mlx4_dev_cap(dev, dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); goto err_stop_fw; } choose_steering_mode(dev, dev_cap); if (mlx4_is_master(dev)) mlx4_parav_master_pf_caps(dev); process_mod_param_profile(&profile); if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) profile.num_mcg = MLX4_FS_NUM_MCG; icm_size = mlx4_make_profile(dev, &profile, dev_cap, &init_hca); if ((long long) icm_size < 0) { err = icm_size; goto err_stop_fw; } dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; init_hca.log_uar_sz = ilog2(dev->caps.num_uars); init_hca.uar_page_sz = PAGE_SHIFT - 12; err = mlx4_init_icm(dev, dev_cap, &init_hca, icm_size); if (err) goto err_stop_fw; init_hca.mw_enable = 1; err = mlx4_INIT_HCA(dev, &init_hca); if (err) { mlx4_err(dev, "INIT_HCA command failed, aborting.\n"); goto err_free_icm; } /* * Read HCA frequency by QUERY_HCA command */ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { memset(&init_hca, 0, sizeof(init_hca)); err = mlx4_QUERY_HCA(dev, &init_hca); if (err) { mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n"); dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; } else { dev->caps.hca_core_clock = init_hca.hca_core_clock; } /* In case we got HCA frequency 0 - disable timestamping * to avoid dividing by zero */ if (!dev->caps.hca_core_clock) { dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; mlx4_err(dev, "HCA frequency is 0. Timestamping is not supported."); } else if (map_internal_clock(dev)) { /* Map internal clock, * in case of failure disable timestamping */ dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported.\n"); } } } else { err = mlx4_init_slave(dev); if (err) { mlx4_err(dev, "Failed to initialize slave\n"); return err; } err = mlx4_slave_cap(dev); if (err) { mlx4_err(dev, "Failed to obtain slave caps\n"); goto err_close; } } if (map_bf_area(dev)) mlx4_dbg(dev, "Failed to map blue flame area\n"); /* Only the master set the ports, all the rest got it from it.*/ if (!mlx4_is_slave(dev)) mlx4_set_port_mask(dev); err = mlx4_QUERY_ADAPTER(dev, &adapter); if (err) { mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n"); goto unmap_bf; } priv->eq_table.inta_pin = adapter.inta_pin; memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); memcpy(dev->vsd, adapter.vsd, sizeof(dev->vsd)); dev->vsd_vendor_id = adapter.vsd_vendor_id; if (!mlx4_is_slave(dev)) kfree(dev_cap); return 0; unmap_bf: if (!mlx4_is_slave(dev)) unmap_internal_clock(dev); unmap_bf_area(dev); if (mlx4_is_slave(dev)) { kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); } err_close: if (mlx4_is_slave(dev)) mlx4_slave_exit(dev); else mlx4_CLOSE_HCA(dev, 0); err_free_icm: if (!mlx4_is_slave(dev)) mlx4_free_icms(dev); err_stop_fw: if (!mlx4_is_slave(dev)) { if (!mlx4_UNMAP_FA(dev)) mlx4_free_icm(dev, priv->fw.fw_icm, 0); else pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); kfree(dev_cap); } return err; } static int mlx4_init_counters_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int nent_pow2, port_indx, vf_index, num_counters; int res, index = 0; struct counter_index *new_counter_index; if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) return -ENOENT; if (!mlx4_is_slave(dev) && dev->caps.max_counters == dev->caps.max_extended_counters) { res = mlx4_cmd(dev, MLX4_IF_STATE_EXTENDED, 0, 0, MLX4_CMD_SET_IF_STAT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (res) { mlx4_err(dev, "Failed to set extended counters (err=%d)\n", res); return res; } } mutex_init(&priv->counters_table.mutex); if (mlx4_is_slave(dev)) { for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); if (dev->caps.def_counter_index[port_indx] != 0xFF) { new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) return -ENOMEM; new_counter_index->index = dev->caps.def_counter_index[port_indx]; list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port_indx]); } } mlx4_dbg(dev, "%s: slave allocated %d counters for %d ports\n", __func__, dev->caps.num_ports, dev->caps.num_ports); return 0; } nent_pow2 = roundup_pow_of_two(dev->caps.max_counters); for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); /* allocating 2 counters per port for PFs */ /* For the PF, the ETH default counters are 0,2; */ /* and the RoCE default counters are 1,3 */ for (num_counters = 0; num_counters < 2; num_counters++, index++) { new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) return -ENOMEM; new_counter_index->index = index; list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port_indx]); } } if (mlx4_is_master(dev)) { for (vf_index = 0; vf_index < dev->num_vfs; vf_index++) { for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { INIT_LIST_HEAD(&priv->counters_table.vf_list[vf_index][port_indx]); new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) return -ENOMEM; if (index < nent_pow2 - 2) { new_counter_index->index = index; index++; } else { new_counter_index->index = MLX4_SINK_COUNTER_INDEX; } list_add_tail(&new_counter_index->list, &priv->counters_table.vf_list[vf_index][port_indx]); } } res = mlx4_bitmap_init(&priv->counters_table.bitmap, nent_pow2, nent_pow2 - 1, index, 1); mlx4_dbg(dev, "%s: master allocated %d counters for %d VFs\n", __func__, index, dev->num_vfs); } else { res = mlx4_bitmap_init(&priv->counters_table.bitmap, nent_pow2, nent_pow2 - 1, index, 1); mlx4_dbg(dev, "%s: native allocated %d counters for %d ports\n", __func__, index, dev->caps.num_ports); } return 0; } static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i, j; struct counter_index *port, *tmp_port; struct counter_index *vf, *tmp_vf; mutex_lock(&priv->counters_table.mutex); if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) { for (i = 0; i < dev->caps.num_ports; i++) { list_for_each_entry_safe(port, tmp_port, &priv->counters_table.global_port_list[i], list) { list_del(&port->list); kfree(port); } } if (!mlx4_is_slave(dev)) { for (i = 0; i < dev->num_vfs; i++) { for (j = 0; j < dev->caps.num_ports; j++) { list_for_each_entry_safe(vf, tmp_vf, &priv->counters_table.vf_list[i][j], list) { /* clear the counter statistic */ if (__mlx4_clear_if_stat(dev, vf->index)) mlx4_dbg(dev, "%s: reset counter %d failed\n", __func__, vf->index); list_del(&vf->list); kfree(vf); } } } mlx4_bitmap_cleanup(&priv->counters_table.bitmap); } } mutex_unlock(&priv->counters_table.mutex); } int __mlx4_slave_counters_free(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); int i, first; struct counter_index *vf, *tmp_vf; /* clean VF's counters for the next useg */ if (slave > 0 && slave <= dev->num_vfs) { mlx4_dbg(dev, "%s: free counters of slave(%d)\n" , __func__, slave); mutex_lock(&priv->counters_table.mutex); for (i = 0; i < dev->caps.num_ports; i++) { first = 0; list_for_each_entry_safe(vf, tmp_vf, &priv->counters_table.vf_list[slave - 1][i], list) { /* clear the counter statistic */ if (__mlx4_clear_if_stat(dev, vf->index)) mlx4_dbg(dev, "%s: reset counter %d failed\n", __func__, vf->index); if (first++ && vf->index != MLX4_SINK_COUNTER_INDEX) { mlx4_dbg(dev, "%s: delete counter index %d for slave %d and port %d\n" , __func__, vf->index, slave, i + 1); mlx4_bitmap_free(&priv->counters_table.bitmap, vf->index, MLX4_USE_RR); list_del(&vf->list); kfree(vf); } else { mlx4_dbg(dev, "%s: can't delete default counter index %d for slave %d and port %d\n" , __func__, vf->index, slave, i + 1); } } } mutex_unlock(&priv->counters_table.mutex); } return 0; } int __mlx4_counter_alloc(struct mlx4_dev *dev, int slave, int port, u32 *idx) { struct mlx4_priv *priv = mlx4_priv(dev); struct counter_index *new_counter_index; if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) return -ENOENT; if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || (port < 0) || (port > MLX4_MAX_PORTS)) { mlx4_dbg(dev, "%s: invalid slave(%d) or port(%d) index\n", __func__, slave, port); return -EINVAL; } /* handle old guest request does not support request by port index */ if (port == 0) { *idx = MLX4_SINK_COUNTER_INDEX; mlx4_dbg(dev, "%s: allocated default counter index %d for slave %d port %d\n" , __func__, *idx, slave, port); return 0; } mutex_lock(&priv->counters_table.mutex); *idx = mlx4_bitmap_alloc(&priv->counters_table.bitmap); /* if no resources return the default counter of the slave and port */ if (*idx == -1) { if (slave == 0) { /* its the ethernet counter ?????? */ new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, struct counter_index, list); } else { new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, struct counter_index, list); } *idx = new_counter_index->index; mlx4_dbg(dev, "%s: allocated defualt counter index %d for slave %d port %d\n" , __func__, *idx, slave, port); goto out; } if (slave == 0) { /* native or master */ new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) goto no_mem; new_counter_index->index = *idx; list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); } else { new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) goto no_mem; new_counter_index->index = *idx; list_add_tail(&new_counter_index->list, &priv->counters_table.vf_list[slave - 1][port - 1]); } mlx4_dbg(dev, "%s: allocated counter index %d for slave %d port %d\n" , __func__, *idx, slave, port); out: mutex_unlock(&priv->counters_table.mutex); return 0; no_mem: mlx4_bitmap_free(&priv->counters_table.bitmap, *idx, MLX4_USE_RR); mutex_unlock(&priv->counters_table.mutex); *idx = MLX4_SINK_COUNTER_INDEX; mlx4_dbg(dev, "%s: failed err (%d)\n" , __func__, -ENOMEM); return -ENOMEM; } int mlx4_counter_alloc(struct mlx4_dev *dev, u8 port, u32 *idx) { u64 out_param; int err; struct mlx4_priv *priv = mlx4_priv(dev); struct counter_index *new_counter_index, *c_index; if (mlx4_is_mfunc(dev)) { err = mlx4_cmd_imm(dev, 0, &out_param, ((u32) port) << 8 | (u32) RES_COUNTER, RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (!err) { *idx = get_param_l(&out_param); if (*idx == MLX4_SINK_COUNTER_INDEX) return -ENOSPC; mutex_lock(&priv->counters_table.mutex); c_index = list_entry(priv->counters_table.global_port_list[port - 1].next, struct counter_index, list); mutex_unlock(&priv->counters_table.mutex); if (c_index->index == *idx) return -EEXIST; if (mlx4_is_slave(dev)) { new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) { mlx4_counter_free(dev, port, *idx); return -ENOMEM; } new_counter_index->index = *idx; mutex_lock(&priv->counters_table.mutex); list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); mutex_unlock(&priv->counters_table.mutex); mlx4_dbg(dev, "%s: allocated counter index %d for port %d\n" , __func__, *idx, port); } } return err; } return __mlx4_counter_alloc(dev, 0, port, idx); } EXPORT_SYMBOL_GPL(mlx4_counter_alloc); void __mlx4_counter_free(struct mlx4_dev *dev, int slave, int port, u32 idx) { /* check if native or slave and deletes acordingly */ struct mlx4_priv *priv = mlx4_priv(dev); struct counter_index *pf, *tmp_pf; struct counter_index *vf, *tmp_vf; int first; if (idx == MLX4_SINK_COUNTER_INDEX) { mlx4_dbg(dev, "%s: try to delete default counter index %d for port %d\n" , __func__, idx, port); return; } if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || (port < 0) || (port > MLX4_MAX_PORTS)) { mlx4_warn(dev, "%s: deletion failed due to invalid slave(%d) or port(%d) index\n" , __func__, slave, idx); return; } mutex_lock(&priv->counters_table.mutex); if (slave == 0) { first = 0; list_for_each_entry_safe(pf, tmp_pf, &priv->counters_table.global_port_list[port - 1], list) { /* the first 2 counters are reserved */ if (pf->index == idx) { /* clear the counter statistic */ if (__mlx4_clear_if_stat(dev, pf->index)) mlx4_dbg(dev, "%s: reset counter %d failed\n", __func__, pf->index); if (1 < first && idx != MLX4_SINK_COUNTER_INDEX) { list_del(&pf->list); kfree(pf); mlx4_dbg(dev, "%s: delete counter index %d for native device (%d) port %d\n" , __func__, idx, slave, port); mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); goto out; } else { mlx4_dbg(dev, "%s: can't delete default counter index %d for native device (%d) port %d\n" , __func__, idx, slave, port); goto out; } } first++; } mlx4_dbg(dev, "%s: can't delete counter index %d for native device (%d) port %d\n" , __func__, idx, slave, port); } else { first = 0; list_for_each_entry_safe(vf, tmp_vf, &priv->counters_table.vf_list[slave - 1][port - 1], list) { /* the first element is reserved */ if (vf->index == idx) { /* clear the counter statistic */ if (__mlx4_clear_if_stat(dev, vf->index)) mlx4_dbg(dev, "%s: reset counter %d failed\n", __func__, vf->index); if (first) { list_del(&vf->list); kfree(vf); mlx4_dbg(dev, "%s: delete counter index %d for slave %d port %d\n", __func__, idx, slave, port); mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); goto out; } else { mlx4_dbg(dev, "%s: can't delete default slave (%d) counter index %d for port %d\n" , __func__, slave, idx, port); goto out; } } first++; } mlx4_dbg(dev, "%s: can't delete slave (%d) counter index %d for port %d\n" , __func__, slave, idx, port); } out: mutex_unlock(&priv->counters_table.mutex); } void mlx4_counter_free(struct mlx4_dev *dev, u8 port, u32 idx) { u64 in_param = 0; struct mlx4_priv *priv = mlx4_priv(dev); struct counter_index *counter, *tmp_counter; int first = 0; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, idx); mlx4_cmd(dev, in_param, ((u32) port) << 8 | (u32) RES_COUNTER, RES_OP_RESERVE, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (mlx4_is_slave(dev) && idx != MLX4_SINK_COUNTER_INDEX) { mutex_lock(&priv->counters_table.mutex); list_for_each_entry_safe(counter, tmp_counter, &priv->counters_table.global_port_list[port - 1], list) { if (counter->index == idx && first++) { list_del(&counter->list); kfree(counter); mlx4_dbg(dev, "%s: delete counter index %d for port %d\n" , __func__, idx, port); mutex_unlock(&priv->counters_table.mutex); return; } } mutex_unlock(&priv->counters_table.mutex); } return; } __mlx4_counter_free(dev, 0, port, idx); } EXPORT_SYMBOL_GPL(mlx4_counter_free); int __mlx4_clear_if_stat(struct mlx4_dev *dev, u8 counter_index) { struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; int err = 0; u32 if_stat_in_mod = (counter_index & 0xff) | (1 << 31); if (counter_index == MLX4_SINK_COUNTER_INDEX) return -EINVAL; if (mlx4_is_slave(dev)) return 0; if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(if_stat_mailbox)) { err = PTR_ERR(if_stat_mailbox); return err; } err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0, MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev, if_stat_mailbox); return err; } u8 mlx4_get_default_counter_index(struct mlx4_dev *dev, int slave, int port) { struct mlx4_priv *priv = mlx4_priv(dev); struct counter_index *new_counter_index; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) { mlx4_dbg(dev, "%s: return counter index %d for slave %d port (MLX4_PORT_TYPE_IB) %d\n", __func__, MLX4_SINK_COUNTER_INDEX, slave, port); return (u8)MLX4_SINK_COUNTER_INDEX; } mutex_lock(&priv->counters_table.mutex); if (slave == 0) { new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, struct counter_index, list); } else { new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, struct counter_index, list); } mutex_unlock(&priv->counters_table.mutex); mlx4_dbg(dev, "%s: return counter index %d for slave %d port %d\n", __func__, new_counter_index->index, slave, port); return (u8)new_counter_index->index; } int mlx4_get_vport_ethtool_stats(struct mlx4_dev *dev, int port, struct mlx4_en_vport_stats *vport_stats, int reset) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; union mlx4_counter *counter; int err = 0; u32 if_stat_in_mod; struct counter_index *vport, *tmp_vport; if (!vport_stats) return -EINVAL; if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(if_stat_mailbox)) { err = PTR_ERR(if_stat_mailbox); return err; } mutex_lock(&priv->counters_table.mutex); list_for_each_entry_safe(vport, tmp_vport, &priv->counters_table.global_port_list[port - 1], list) { if (vport->index == MLX4_SINK_COUNTER_INDEX) continue; memset(if_stat_mailbox->buf, 0, sizeof(union mlx4_counter)); if_stat_in_mod = (vport->index & 0xff) | ((reset & 1) << 31); err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0, MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (err) { mlx4_dbg(dev, "%s: failed to read statistics for counter index %d\n", __func__, vport->index); goto if_stat_out; } counter = (union mlx4_counter *)if_stat_mailbox->buf; if ((counter->control.cnt_mode & 0xf) == 1) { vport_stats->rx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastFrames); vport_stats->rx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxUnicastFrames); vport_stats->rx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxMulticastFrames); vport_stats->tx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastFrames); vport_stats->tx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxUnicastFrames); vport_stats->tx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxMulticastFrames); vport_stats->rx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastOctets); vport_stats->rx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxUnicastOctets); vport_stats->rx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxMulticastOctets); vport_stats->tx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastOctets); vport_stats->tx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxUnicastOctets); vport_stats->tx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxMulticastOctets); vport_stats->rx_errors += be64_to_cpu(counter->ext.counters[0].IfRxErrorFrames); vport_stats->rx_dropped += be64_to_cpu(counter->ext.counters[0].IfRxNoBufferFrames); vport_stats->tx_errors += be64_to_cpu(counter->ext.counters[0].IfTxDroppedFrames); } } if_stat_out: mutex_unlock(&priv->counters_table.mutex); mlx4_free_cmd_mailbox(dev, if_stat_mailbox); return err; } EXPORT_SYMBOL_GPL(mlx4_get_vport_ethtool_stats); static int mlx4_setup_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int err; int port; __be32 ib_port_default_caps; err = mlx4_init_uar_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "user access region table (err=%d), aborting.\n", err); return err; } err = mlx4_uar_alloc(dev, &priv->driver_uar); if (err) { mlx4_err(dev, "Failed to allocate driver access region " "(err=%d), aborting.\n", err); goto err_uar_table_free; } priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); if (!priv->kar) { mlx4_err(dev, "Couldn't map kernel access region, " "aborting.\n"); err = -ENOMEM; goto err_uar_free; } err = mlx4_init_pd_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "protection domain table (err=%d), aborting.\n", err); goto err_kar_unmap; } err = mlx4_init_xrcd_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "reliable connection domain table (err=%d), " "aborting.\n", err); goto err_pd_table_free; } err = mlx4_init_mr_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "memory region table (err=%d), aborting.\n", err); goto err_xrcd_table_free; } if (!mlx4_is_slave(dev)) { err = mlx4_init_mcg_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "multicast group table (err=%d), aborting.\n", err); goto err_mr_table_free; } } err = mlx4_init_eq_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "event queue table (err=%d), aborting.\n", err); goto err_mcg_table_free; } err = mlx4_cmd_use_events(dev); if (err) { mlx4_err(dev, "Failed to switch to event-driven " "firmware commands (err=%d), aborting.\n", err); goto err_eq_table_free; } err = mlx4_NOP(dev); if (err) { if (dev->flags & MLX4_FLAG_MSI_X) { mlx4_warn(dev, "NOP command failed to generate MSI-X " "interrupt IRQ %d).\n", priv->eq_table.eq[dev->caps.num_comp_vectors].irq); mlx4_warn(dev, "Trying again without MSI-X.\n"); } else { mlx4_err(dev, "NOP command failed to generate interrupt " "(IRQ %d), aborting.\n", priv->eq_table.eq[dev->caps.num_comp_vectors].irq); mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); } goto err_cmd_poll; } mlx4_dbg(dev, "NOP command IRQ test passed\n"); err = mlx4_init_cq_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "completion queue table (err=%d), aborting.\n", err); goto err_cmd_poll; } err = mlx4_init_srq_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "shared receive queue table (err=%d), aborting.\n", err); goto err_cq_table_free; } err = mlx4_init_qp_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "queue pair table (err=%d), aborting.\n", err); goto err_srq_table_free; } err = mlx4_init_counters_table(dev); if (err && err != -ENOENT) { mlx4_err(dev, "Failed to initialize counters table (err=%d), " "aborting.\n", err); goto err_qp_table_free; } if (!mlx4_is_slave(dev)) { for (port = 1; port <= dev->caps.num_ports; port++) { ib_port_default_caps = 0; err = mlx4_get_port_ib_caps(dev, port, &ib_port_default_caps); if (err) mlx4_warn(dev, "failed to get port %d default " "ib capabilities (%d). Continuing " "with caps = 0\n", port, err); dev->caps.ib_port_def_cap[port] = ib_port_default_caps; /* initialize per-slave default ib port capabilities */ if (mlx4_is_master(dev)) { int i; for (i = 0; i < dev->num_slaves; i++) { if (i == mlx4_master_func_num(dev)) continue; priv->mfunc.master.slave_state[i].ib_cap_mask[port] = ib_port_default_caps; } } dev->caps.port_ib_mtu[port] = IB_MTU_4096; err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ? dev->caps.pkey_table_len[port] : -1); if (err) { mlx4_err(dev, "Failed to set port %d (err=%d), " "aborting\n", port, err); goto err_counters_table_free; } } } return 0; err_counters_table_free: mlx4_cleanup_counters_table(dev); err_qp_table_free: mlx4_cleanup_qp_table(dev); err_srq_table_free: mlx4_cleanup_srq_table(dev); err_cq_table_free: mlx4_cleanup_cq_table(dev); err_cmd_poll: mlx4_cmd_use_polling(dev); err_eq_table_free: mlx4_cleanup_eq_table(dev); err_mcg_table_free: if (!mlx4_is_slave(dev)) mlx4_cleanup_mcg_table(dev); err_mr_table_free: mlx4_cleanup_mr_table(dev); err_xrcd_table_free: mlx4_cleanup_xrcd_table(dev); err_pd_table_free: mlx4_cleanup_pd_table(dev); err_kar_unmap: iounmap(priv->kar); err_uar_free: mlx4_uar_free(dev, &priv->driver_uar); err_uar_table_free: mlx4_cleanup_uar_table(dev); return err; } static void mlx4_enable_msi_x(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct msix_entry *entries; int nreq = min_t(int, dev->caps.num_ports * min_t(int, num_possible_cpus() + 1, MAX_MSIX_P_PORT) + MSIX_LEGACY_SZ, MAX_MSIX); int err; int i; if (msi_x) { nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, nreq); if (msi_x > 1 && !mlx4_is_mfunc(dev)) nreq = min_t(int, nreq, msi_x); entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); if (!entries) goto no_msi; for (i = 0; i < nreq; ++i) entries[i].entry = i; retry: err = pci_enable_msix(dev->pdev, entries, nreq); if (err) { /* Try again if at least 2 vectors are available */ if (err > 1) { mlx4_info(dev, "Requested %d vectors, " "but only %d MSI-X vectors available, " "trying again\n", nreq, err); nreq = err; goto retry; } kfree(entries); goto no_msi; } if (nreq < MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) { /*Working in legacy mode , all EQ's shared*/ dev->caps.comp_pool = 0; dev->caps.num_comp_vectors = nreq - 1; } else { dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ; dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1; } for (i = 0; i < nreq; ++i) priv->eq_table.eq[i].irq = entries[i].vector; dev->flags |= MLX4_FLAG_MSI_X; kfree(entries); return; } no_msi: dev->caps.num_comp_vectors = 1; dev->caps.comp_pool = 0; for (i = 0; i < 2; ++i) priv->eq_table.eq[i].irq = dev->pdev->irq; } static int mlx4_init_port_info(struct mlx4_dev *dev, int port) { struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; int err = 0; info->dev = dev; info->port = port; if (!mlx4_is_slave(dev)) { mlx4_init_mac_table(dev, &info->mac_table); mlx4_init_vlan_table(dev, &info->vlan_table); info->base_qpn = mlx4_get_base_qpn(dev, port); } sprintf(info->dev_name, "mlx4_port%d", port); info->port_attr.attr.name = info->dev_name; if (mlx4_is_mfunc(dev)) info->port_attr.attr.mode = S_IRUGO; else { info->port_attr.attr.mode = S_IRUGO | S_IWUSR; info->port_attr.store = set_port_type; } info->port_attr.show = show_port_type; sysfs_attr_init(&info->port_attr.attr); err = device_create_file(&dev->pdev->dev, &info->port_attr); if (err) { mlx4_err(dev, "Failed to create file for port %d\n", port); info->port = -1; } sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port); info->port_mtu_attr.attr.name = info->dev_mtu_name; if (mlx4_is_mfunc(dev)) info->port_mtu_attr.attr.mode = S_IRUGO; else { info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR; info->port_mtu_attr.store = set_port_ib_mtu; } info->port_mtu_attr.show = show_port_ib_mtu; sysfs_attr_init(&info->port_mtu_attr.attr); err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr); if (err) { mlx4_err(dev, "Failed to create mtu file for port %d\n", port); device_remove_file(&info->dev->pdev->dev, &info->port_attr); info->port = -1; } return err; } static void mlx4_cleanup_port_info(struct mlx4_port_info *info) { if (info->port < 0) return; device_remove_file(&info->dev->pdev->dev, &info->port_attr); device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr); } static int mlx4_init_steering(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int num_entries = dev->caps.num_ports; int i, j; priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL); if (!priv->steer) return -ENOMEM; for (i = 0; i < num_entries; i++) for (j = 0; j < MLX4_NUM_STEERS; j++) { INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]); INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]); } return 0; } static void mlx4_clear_steering(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_steer_index *entry, *tmp_entry; struct mlx4_promisc_qp *pqp, *tmp_pqp; int num_entries = dev->caps.num_ports; int i, j; for (i = 0; i < num_entries; i++) { for (j = 0; j < MLX4_NUM_STEERS; j++) { list_for_each_entry_safe(pqp, tmp_pqp, &priv->steer[i].promisc_qps[j], list) { list_del(&pqp->list); kfree(pqp); } list_for_each_entry_safe(entry, tmp_entry, &priv->steer[i].steer_entries[j], list) { list_del(&entry->list); list_for_each_entry_safe(pqp, tmp_pqp, &entry->duplicates, list) { list_del(&pqp->list); kfree(pqp); } kfree(entry); } } } kfree(priv->steer); } static int extended_func_num(struct pci_dev *pdev) { return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn); } #define MLX4_OWNER_BASE 0x8069c #define MLX4_OWNER_SIZE 4 static int mlx4_get_ownership(struct mlx4_dev *dev) { void __iomem *owner; u32 ret; if (pci_channel_offline(dev->pdev)) return -EIO; owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { mlx4_err(dev, "Failed to obtain ownership bit\n"); return -ENOMEM; } ret = readl(owner); iounmap(owner); return (int) !!ret; } static void mlx4_free_ownership(struct mlx4_dev *dev) { void __iomem *owner; if (pci_channel_offline(dev->pdev)) return; owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { mlx4_err(dev, "Failed to obtain ownership bit\n"); return; } writel(0, owner); msleep(1000); iounmap(owner); } static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) { struct mlx4_priv *priv; struct mlx4_dev *dev; int err; int port; int nvfs, prb_vf; pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev)); err = pci_enable_device(pdev); if (err) { dev_err(&pdev->dev, "Cannot enable PCI device, " "aborting.\n"); return err; } mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(pdev), 0, &nvfs); mlx4_get_val(probe_vf.dbdf2val.tbl, pci_physfn(pdev), 0, &prb_vf); if (nvfs > MLX4_MAX_NUM_VF) { dev_err(&pdev->dev, "There are more VF's (%d) than allowed(%d)\n", nvfs, MLX4_MAX_NUM_VF); return -EINVAL; } if (nvfs < 0) { dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n"); return -EINVAL; } /* * Check for BARs. */ if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) && !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { dev_err(&pdev->dev, "Missing DCS, aborting." "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%x)\n", pci_dev_data, pci_resource_flags(pdev, 0)); err = -ENODEV; goto err_disable_pdev; } if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) { dev_err(&pdev->dev, "Missing UAR, aborting.\n"); err = -ENODEV; goto err_disable_pdev; } err = pci_request_regions(pdev, DRV_NAME); if (err) { dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); goto err_disable_pdev; } pci_set_master(pdev); err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) { dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); goto err_release_regions; } } err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) { dev_warn(&pdev->dev, "Warning: couldn't set 64-bit " "consistent PCI DMA mask.\n"); err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, " "aborting.\n"); goto err_release_regions; } } /* Allow large DMA segments, up to the firmware limit of 1 GB */ dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); priv = kzalloc(sizeof *priv, GFP_KERNEL); if (!priv) { dev_err(&pdev->dev, "Device struct alloc failed, " "aborting.\n"); err = -ENOMEM; goto err_release_regions; } dev = &priv->dev; dev->pdev = pdev; INIT_LIST_HEAD(&priv->dev_list); INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); mutex_init(&priv->port_mutex); INIT_LIST_HEAD(&priv->pgdir_list); mutex_init(&priv->pgdir_mutex); INIT_LIST_HEAD(&priv->bf_list); mutex_init(&priv->bf_mutex); dev->rev_id = pdev->revision; dev->numa_node = dev_to_node(&pdev->dev); /* Detect if this device is a virtual function */ if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { /* When acting as pf, we normally skip vfs unless explicitly * requested to probe them. */ if (nvfs && extended_func_num(pdev) > prb_vf) { mlx4_warn(dev, "Skipping virtual function:%d\n", extended_func_num(pdev)); err = -ENODEV; goto err_free_dev; } mlx4_warn(dev, "Detected virtual function - running in slave mode\n"); dev->flags |= MLX4_FLAG_SLAVE; } else { /* We reset the device and enable SRIOV only for physical * devices. Try to claim ownership on the device; * if already taken, skip -- do not allow multiple PFs */ err = mlx4_get_ownership(dev); if (err) { if (err < 0) goto err_free_dev; else { mlx4_warn(dev, "Multiple PFs not yet supported." " Skipping PF.\n"); err = -EINVAL; goto err_free_dev; } } if (nvfs) { mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", nvfs); err = pci_enable_sriov(pdev, nvfs); if (err) { mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", err); err = 0; } else { mlx4_warn(dev, "Running in master mode\n"); dev->flags |= MLX4_FLAG_SRIOV | MLX4_FLAG_MASTER; dev->num_vfs = nvfs; } } atomic_set(&priv->opreq_count, 0); INIT_WORK(&priv->opreq_task, mlx4_opreq_action); /* * Now reset the HCA before we touch the PCI capabilities or * attempt a firmware command, since a boot ROM may have left * the HCA in an undefined state. */ err = mlx4_reset(dev); if (err) { mlx4_err(dev, "Failed to reset HCA, aborting.\n"); goto err_sriov; } } slave_start: err = mlx4_cmd_init(dev); if (err) { mlx4_err(dev, "Failed to init command interface, aborting.\n"); goto err_sriov; } /* In slave functions, the communication channel must be initialized * before posting commands. Also, init num_slaves before calling * mlx4_init_hca */ if (mlx4_is_mfunc(dev)) { if (mlx4_is_master(dev)) dev->num_slaves = MLX4_MAX_NUM_SLAVES; else { dev->num_slaves = 0; err = mlx4_multi_func_init(dev); if (err) { mlx4_err(dev, "Failed to init slave mfunc" " interface, aborting.\n"); goto err_cmd; } } } err = mlx4_init_hca(dev); if (err) { if (err == -EACCES) { /* Not primary Physical function * Running in slave mode */ mlx4_cmd_cleanup(dev); dev->flags |= MLX4_FLAG_SLAVE; dev->flags &= ~MLX4_FLAG_MASTER; goto slave_start; } else goto err_mfunc; } /* In master functions, the communication channel must be initialized * after obtaining its address from fw */ if (mlx4_is_master(dev)) { err = mlx4_multi_func_init(dev); if (err) { mlx4_err(dev, "Failed to init master mfunc" "interface, aborting.\n"); goto err_close; } } err = mlx4_alloc_eq_table(dev); if (err) goto err_master_mfunc; priv->msix_ctl.pool_bm = 0; mutex_init(&priv->msix_ctl.pool_lock); mlx4_enable_msi_x(dev); if ((mlx4_is_mfunc(dev)) && !(dev->flags & MLX4_FLAG_MSI_X)) { err = -ENOSYS; mlx4_err(dev, "INTx is not supported in multi-function mode." " aborting.\n"); goto err_free_eq; } if (!mlx4_is_slave(dev)) { err = mlx4_init_steering(dev); if (err) goto err_free_eq; } err = mlx4_setup_hca(dev); if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) && !mlx4_is_mfunc(dev)) { dev->flags &= ~MLX4_FLAG_MSI_X; dev->caps.num_comp_vectors = 1; dev->caps.comp_pool = 0; pci_disable_msix(pdev); err = mlx4_setup_hca(dev); } if (err) goto err_steer; mlx4_init_quotas(dev); for (port = 1; port <= dev->caps.num_ports; port++) { err = mlx4_init_port_info(dev, port); if (err) goto err_port; } err = mlx4_register_device(dev); if (err) goto err_port; mlx4_request_modules(dev); mlx4_sense_init(dev); mlx4_start_sense(dev); priv->pci_dev_data = pci_dev_data; pci_set_drvdata(pdev, dev); return 0; err_port: for (--port; port >= 1; --port) mlx4_cleanup_port_info(&priv->port[port]); mlx4_cleanup_counters_table(dev); mlx4_cleanup_qp_table(dev); mlx4_cleanup_srq_table(dev); mlx4_cleanup_cq_table(dev); mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); mlx4_cleanup_mcg_table(dev); mlx4_cleanup_mr_table(dev); mlx4_cleanup_xrcd_table(dev); mlx4_cleanup_pd_table(dev); mlx4_cleanup_uar_table(dev); err_steer: if (!mlx4_is_slave(dev)) mlx4_clear_steering(dev); err_free_eq: mlx4_free_eq_table(dev); err_master_mfunc: if (mlx4_is_master(dev)) { mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY); mlx4_multi_func_cleanup(dev); } if (mlx4_is_slave(dev)) { kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); } err_close: if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); mlx4_close_hca(dev); err_mfunc: if (mlx4_is_slave(dev)) mlx4_multi_func_cleanup(dev); err_cmd: mlx4_cmd_cleanup(dev); err_sriov: if (dev->flags & MLX4_FLAG_SRIOV) pci_disable_sriov(pdev); if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); err_free_dev: kfree(priv); err_release_regions: pci_release_regions(pdev); err_disable_pdev: pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); return err; } static int __devinit mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { printk_once(KERN_INFO "%s", mlx4_version); return __mlx4_init_one(pdev, id->driver_data); } static void mlx4_remove_one(struct pci_dev *pdev) { struct mlx4_dev *dev = pci_get_drvdata(pdev); struct mlx4_priv *priv = mlx4_priv(dev); int p; if (dev) { /* in SRIOV it is not allowed to unload the pf's * driver while there are alive vf's */ if (mlx4_is_master(dev)) { if (mlx4_how_many_lives_vf(dev)) mlx4_err(dev, "Removing PF when there are assigned VF's !!!\n"); } mlx4_stop_sense(dev); mlx4_unregister_device(dev); for (p = 1; p <= dev->caps.num_ports; p++) { mlx4_cleanup_port_info(&priv->port[p]); mlx4_CLOSE_PORT(dev, p); } if (mlx4_is_master(dev)) mlx4_free_resource_tracker(dev, RES_TR_FREE_SLAVES_ONLY); mlx4_cleanup_counters_table(dev); mlx4_cleanup_qp_table(dev); mlx4_cleanup_srq_table(dev); mlx4_cleanup_cq_table(dev); mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); mlx4_cleanup_mcg_table(dev); mlx4_cleanup_mr_table(dev); mlx4_cleanup_xrcd_table(dev); mlx4_cleanup_pd_table(dev); if (mlx4_is_master(dev)) mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY); iounmap(priv->kar); mlx4_uar_free(dev, &priv->driver_uar); mlx4_cleanup_uar_table(dev); if (!mlx4_is_slave(dev)) mlx4_clear_steering(dev); mlx4_free_eq_table(dev); if (mlx4_is_master(dev)) mlx4_multi_func_cleanup(dev); mlx4_close_hca(dev); if (mlx4_is_slave(dev)) mlx4_multi_func_cleanup(dev); mlx4_cmd_cleanup(dev); if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); if (dev->flags & MLX4_FLAG_SRIOV) { mlx4_warn(dev, "Disabling SR-IOV\n"); pci_disable_sriov(pdev); } if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); kfree(priv); pci_release_regions(pdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } } static int restore_current_port_types(struct mlx4_dev *dev, enum mlx4_port_type *types, enum mlx4_port_type *poss_types) { struct mlx4_priv *priv = mlx4_priv(dev); int err, i; mlx4_stop_sense(dev); mutex_lock(&priv->port_mutex); for (i = 0; i < dev->caps.num_ports; i++) dev->caps.possible_type[i + 1] = poss_types[i]; err = mlx4_change_port_types(dev, types); mlx4_start_sense(dev); mutex_unlock(&priv->port_mutex); return err; } int mlx4_restart_one(struct pci_dev *pdev) { struct mlx4_dev *dev = pci_get_drvdata(pdev); struct mlx4_priv *priv = mlx4_priv(dev); enum mlx4_port_type curr_type[MLX4_MAX_PORTS]; enum mlx4_port_type poss_type[MLX4_MAX_PORTS]; int pci_dev_data, err, i; pci_dev_data = priv->pci_dev_data; for (i = 0; i < dev->caps.num_ports; i++) { curr_type[i] = dev->caps.port_type[i + 1]; poss_type[i] = dev->caps.possible_type[i + 1]; } mlx4_remove_one(pdev); err = __mlx4_init_one(pdev, pci_dev_data); if (err) return err; dev = pci_get_drvdata(pdev); err = restore_current_port_types(dev, curr_type, poss_type); if (err) mlx4_err(dev, "mlx4_restart_one: could not restore original port types (%d)\n", err); return 0; } static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { /* MT25408 "Hermon" SDR */ { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" DDR */ { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" QDR */ { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" DDR PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" QDR PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" EN 10GigE */ { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */ { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26468 ConnectX EN 10GigE PCIe gen2*/ { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26478 ConnectX2 40GigE PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25400 Family [ConnectX-2 Virtual Function] */ { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF }, /* MT27500 Family [ConnectX-3] */ { PCI_VDEVICE(MELLANOX, 0x1003), 0 }, /* MT27500 Family [ConnectX-3 Virtual Function] */ { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF }, { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */ { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */ { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */ { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */ { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */ { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */ { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */ { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */ { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */ { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */ { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */ { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */ { 0, } }; MODULE_DEVICE_TABLE(pci, mlx4_pci_table); static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { mlx4_remove_one(pdev); return state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; } static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) { int ret = __mlx4_init_one(pdev, 0); return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } static const struct pci_error_handlers mlx4_err_handler = { .error_detected = mlx4_pci_err_detected, .slot_reset = mlx4_pci_slot_reset, }; static int suspend(struct pci_dev *pdev, pm_message_t state) { mlx4_remove_one(pdev); return 0; } static int resume(struct pci_dev *pdev) { return __mlx4_init_one(pdev, 0); } static struct pci_driver mlx4_driver = { .name = DRV_NAME, .id_table = mlx4_pci_table, .probe = mlx4_init_one, .remove = __devexit_p(mlx4_remove_one), .suspend = suspend, .resume = resume, .err_handler = &mlx4_err_handler, }; static int __init mlx4_verify_params(void) { int status; status = update_defaults(&port_type_array); if (status == INVALID_STR) { if (mlx4_fill_dbdf2val_tbl(&port_type_array.dbdf2val)) return -1; } else if (status == INVALID_DATA) { return -1; } status = update_defaults(&num_vfs); if (status == INVALID_STR) { if (mlx4_fill_dbdf2val_tbl(&num_vfs.dbdf2val)) return -1; } else if (status == INVALID_DATA) { return -1; } status = update_defaults(&probe_vf); if (status == INVALID_STR) { if (mlx4_fill_dbdf2val_tbl(&probe_vf.dbdf2val)) return -1; } else if (status == INVALID_DATA) { return -1; } if (msi_x < 0) { pr_warn("mlx4_core: bad msi_x: %d\n", msi_x); return -1; } if ((log_num_mac < 0) || (log_num_mac > 7)) { pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac); return -1; } if (log_num_vlan != 0) pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n", MLX4_LOG_NUM_VLANS); if (mlx4_set_4k_mtu != -1) pr_warning("mlx4_core: set_4k_mtu - obsolete module param\n"); if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) { pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg); return -1; } if (mlx4_log_num_mgm_entry_size != -1 && (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not " "in legal range (-1 or %d..%d)\n", mlx4_log_num_mgm_entry_size, MLX4_MIN_MGM_LOG_ENTRY_SIZE, MLX4_MAX_MGM_LOG_ENTRY_SIZE); return -1; } if (mod_param_profile.num_qp < 18 || mod_param_profile.num_qp > 23) { pr_warning("mlx4_core: bad log_num_qp: %d\n", mod_param_profile.num_qp); return -1; } if (mod_param_profile.num_srq < 10) { pr_warning("mlx4_core: too low log_num_srq: %d\n", mod_param_profile.num_srq); return -1; } if (mod_param_profile.num_cq < 10) { pr_warning("mlx4_core: too low log_num_cq: %d\n", mod_param_profile.num_cq); return -1; } if (mod_param_profile.num_mpt < 10) { pr_warning("mlx4_core: too low log_num_mpt: %d\n", mod_param_profile.num_mpt); return -1; } if (mod_param_profile.num_mtt_segs && mod_param_profile.num_mtt_segs < 15) { pr_warning("mlx4_core: too low log_num_mtt: %d\n", mod_param_profile.num_mtt_segs); return -1; } if (mod_param_profile.num_mtt_segs > MLX4_MAX_LOG_NUM_MTT) { pr_warning("mlx4_core: too high log_num_mtt: %d\n", mod_param_profile.num_mtt_segs); return -1; } return 0; } static int __init mlx4_init(void) { int ret; if (mlx4_verify_params()) return -EINVAL; mlx4_catas_init(); mlx4_wq = create_singlethread_workqueue("mlx4"); if (!mlx4_wq) return -ENOMEM; if (enable_sys_tune) sys_tune_init(); ret = pci_register_driver(&mlx4_driver); if (ret < 0) goto err; return 0; err: if (enable_sys_tune) sys_tune_fini(); destroy_workqueue(mlx4_wq); return ret; } static void __exit mlx4_cleanup(void) { if (enable_sys_tune) sys_tune_fini(); pci_unregister_driver(&mlx4_driver); destroy_workqueue(mlx4_wq); } module_init_order(mlx4_init, SI_ORDER_MIDDLE); module_exit(mlx4_cleanup); #include static int mlx4_evhand(module_t mod, int event, void *arg) { return (0); } static moduledata_t mlx4_mod = { .name = "mlx4", .evhand = mlx4_evhand, }; MODULE_VERSION(mlx4, 1); DECLARE_MODULE(mlx4, mlx4_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY); Index: stable/9/sys/ofed/include/linux/file.h =================================================================== --- stable/9/sys/ofed/include/linux/file.h (revision 277139) +++ stable/9/sys/ofed/include/linux/file.h (revision 277140) @@ -1,134 +1,147 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_FILE_H_ #define _LINUX_FILE_H_ #include #include #include #include #include #include struct linux_file; #undef file extern struct fileops linuxfileops; static inline struct linux_file * linux_fget(unsigned int fd) { struct file *file; file = fget_unlocked(curthread->td_proc->p_fd, fd); return (struct linux_file *)file->f_data; } static inline void fput(struct linux_file *filp) { if (filp->_file == NULL) { kfree(filp); return; } if (refcount_release(&filp->_file->f_count)) { _fdrop(filp->_file, curthread); kfree(filp); } } static inline void put_unused_fd(unsigned int fd) { struct file *file; file = fget_unlocked(curthread->td_proc->p_fd, fd); if (file == NULL) return; + /* + * NOTE: We should only get here when the "fd" has not been + * installed, so no need to free the associated Linux file + * structure. + */ fdclose(curthread->td_proc->p_fd, file, fd, curthread); + + /* drop extra reference */ + fdrop(file, curthread); } static inline void fd_install(unsigned int fd, struct linux_file *filp) { struct file *file; file = fget_unlocked(curthread->td_proc->p_fd, fd); filp->_file = file; - finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops); + finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops); + + /* drop the extra reference */ + fput(filp); } static inline int get_unused_fd(void) { struct file *file; int error; int fd; error = falloc(curthread, &file, &fd, 0); if (error) return -error; + /* drop the extra reference */ + fdrop(file, curthread); return fd; } static inline struct linux_file * alloc_file(int mode, const struct file_operations *fops) { struct linux_file *filp; filp = kzalloc(sizeof(*filp), GFP_KERNEL); if (filp == NULL) return (NULL); filp->f_op = fops; filp->f_mode = mode; return filp; } struct fd { struct linux_file *linux_file; }; static inline void fdput(struct fd fd) { fput(fd.linux_file); } static inline struct fd fdget(unsigned int fd) { struct linux_file *f = linux_fget(fd); return (struct fd){f}; } #define file linux_file #define fget linux_fget #endif /* _LINUX_FILE_H_ */ Index: stable/9/sys/ofed/include/linux/kernel.h =================================================================== --- stable/9/sys/ofed/include/linux/kernel.h (revision 277139) +++ stable/9/sys/ofed/include/linux/kernel.h (revision 277140) @@ -1,160 +1,178 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_KERNEL_H_ #define _LINUX_KERNEL_H_ #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #define KERN_CONT "" #define KERN_EMERG "<0>" #define KERN_ALERT "<1>" #define KERN_CRIT "<2>" #define KERN_ERR "<3>" #define KERN_WARNING "<4>" #define KERN_NOTICE "<5>" #define KERN_INFO "<6>" #define KERN_DEBUG "<7>" #define BUG() panic("BUG") #define BUG_ON(condition) do { if (condition) BUG(); } while(0) #define WARN_ON BUG_ON #undef ALIGN #define ALIGN(x, y) roundup2((x), (y)) #define DIV_ROUND_UP howmany #define printk(X...) printf(X) -#define pr_debug(fmt, ...) printk(KERN_DEBUG # fmt, ##__VA_ARGS__) + +/* + * The "pr_debug()" and "pr_devel()" macros should produce zero code + * unless DEBUG is defined: + */ +#ifdef DEBUG +#define pr_debug(fmt, ...) \ + log(LOG_DEBUG, fmt, ##__VA_ARGS__) +#define pr_devel(fmt, ...) \ + log(LOG_DEBUG, pr_fmt(fmt), ##__VA_ARGS__) +#else +#define pr_debug(fmt, ...) \ + ({ if (0) log(LOG_DEBUG, fmt, ##__VA_ARGS__); 0; }) +#define pr_devel(fmt, ...) \ + ({ if (0) log(LOG_DEBUG, pr_fmt(fmt), ##__VA_ARGS__); 0; }) +#endif + #define udelay(t) DELAY(t) #ifndef pr_fmt #define pr_fmt(fmt) fmt #endif /* * Print a one-time message (analogous to WARN_ONCE() et al): */ -#define printk_once(x...) ({ \ - static bool __print_once; \ - \ - if (!__print_once) { \ - __print_once = true; \ - printk(x); \ - } \ -}) +#define printk_once(...) do { \ + static bool __print_once; \ + \ + if (!__print_once) { \ + __print_once = true; \ + printk(__VA_ARGS__); \ + } \ +} while (0) +/* + * Log a one-time message (analogous to WARN_ONCE() et al): + */ +#define log_once(level,...) do { \ + static bool __log_once; \ + \ + if (!__log_once) { \ + __log_once = true; \ + log(level, __VA_ARGS__); \ + } \ +} while (0) - #define pr_emerg(fmt, ...) \ - printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) + log(LOG_EMERG, pr_fmt(fmt), ##__VA_ARGS__) #define pr_alert(fmt, ...) \ - printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) + log(LOG_ALERT, pr_fmt(fmt), ##__VA_ARGS__) #define pr_crit(fmt, ...) \ - printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) + log(LOG_CRIT, pr_fmt(fmt), ##__VA_ARGS__) #define pr_err(fmt, ...) \ - printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) + log(LOG_ERR, pr_fmt(fmt), ##__VA_ARGS__) #define pr_warning(fmt, ...) \ - printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) + log(LOG_WARNING, pr_fmt(fmt), ##__VA_ARGS__) #define pr_warn pr_warning #define pr_notice(fmt, ...) \ - printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) + log(LOG_NOTICE, pr_fmt(fmt), ##__VA_ARGS__) #define pr_info(fmt, ...) \ - printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) + log(LOG_INFO, pr_fmt(fmt), ##__VA_ARGS__) #define pr_info_once(fmt, ...) \ - printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) + log_once(LOG_INFO, pr_fmt(fmt), ##__VA_ARGS__) #define pr_cont(fmt, ...) \ - printk(KERN_CONT fmt, ##__VA_ARGS__) + printk(KERN_CONT fmt, ##__VA_ARGS__) -/* pr_devel() should produce zero code unless DEBUG is defined */ -#ifdef DEBUG -#define pr_devel(fmt, ...) \ - printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) -#else -#define pr_devel(fmt, ...) \ - ({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; }) -#endif - #ifndef WARN #define WARN(condition, format...) ({ \ int __ret_warn_on = !!(condition); \ if (unlikely(__ret_warn_on)) \ pr_warning(format); \ unlikely(__ret_warn_on); \ }) #endif #define container_of(ptr, type, member) \ ({ \ __typeof(((type *)0)->member) *_p = (ptr); \ (type *)((char *)_p - offsetof(type, member)); \ }) #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define simple_strtoul strtoul #define simple_strtol strtol #define kstrtol(a,b,c) ({*(c) = strtol(a,0,b);}) -#define min(x, y) (x < y ? x : y) -#define max(x, y) (x > y ? x : y) -#define min_t(type, _x, _y) (type)(_x) < (type)(_y) ? (type)(_x) : (_y) -#define max_t(type, _x, _y) (type)(_x) > (type)(_y) ? (type)(_x) : (_y) +#define min(x, y) ((x) < (y) ? (x) : (y)) +#define max(x, y) ((x) > (y) ? (x) : (y)) +#define min_t(type, _x, _y) ((type)(_x) < (type)(_y) ? (type)(_x) : (type)(_y)) +#define max_t(type, _x, _y) ((type)(_x) > (type)(_y) ? (type)(_x) : (type)(_y)) /* * This looks more complex than it should be. But we need to * get the type for the ~ right in round_down (it needs to be * as wide as the result!), and we want to evaluate the macro * arguments just once each. */ #define __round_mask(x, y) ((__typeof__(x))((y)-1)) #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) #define round_down(x, y) ((x) & ~__round_mask(x, y)) #define num_possible_cpus() mp_ncpus typedef struct pm_message { int event; } pm_message_t; #endif /* _LINUX_KERNEL_H_ */ Index: stable/9/sys/ofed/include/linux/linux_compat.c =================================================================== --- stable/9/sys/ofed/include/linux/linux_compat.c (revision 277139) +++ stable/9/sys/ofed/include/linux/linux_compat.c (revision 277140) @@ -1,718 +1,726 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); #include /* Undo Linux compat changes. */ #undef RB_ROOT #undef file #undef cdev #define RB_ROOT(head) (head)->rbh_root #undef LIST_HEAD /* From sys/queue.h */ #define LIST_HEAD(name, type) \ struct name { \ struct type *lh_first; /* first element */ \ } struct kobject class_root; struct device linux_rootdev; struct class miscclass; struct list_head pci_drivers; struct list_head pci_devices; spinlock_t pci_lock; int panic_cmp(struct rb_node *one, struct rb_node *two) { panic("no cmp"); } RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); int kobject_set_name(struct kobject *kobj, const char *fmt, ...) { va_list args; int error; va_start(args, fmt); error = kobject_set_name_vargs(kobj, fmt, args); va_end(args); return (error); } static inline int kobject_add_complete(struct kobject *kobj, struct kobject *parent) { struct kobj_type *t; int error; kobj->parent = kobject_get(parent); error = sysfs_create_dir(kobj); if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { struct attribute **attr; t = kobj->ktype; for (attr = t->default_attrs; *attr != NULL; attr++) { error = sysfs_create_file(kobj, *attr); if (error) break; } if (error) sysfs_remove_dir(kobj); } return (error); } int kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) { va_list args; int error; va_start(args, fmt); error = kobject_set_name_vargs(kobj, fmt, args); va_end(args); if (error) return (error); return kobject_add_complete(kobj, parent); } void kobject_release(struct kref *kref) { struct kobject *kobj; char *name; kobj = container_of(kref, struct kobject, kref); sysfs_remove_dir(kobj); if (kobj->parent) kobject_put(kobj->parent); kobj->parent = NULL; name = kobj->name; if (kobj->ktype && kobj->ktype->release) kobj->ktype->release(kobj); kfree(name); } static void kobject_kfree(struct kobject *kobj) { kfree(kobj); } static void kobject_kfree_name(struct kobject *kobj) { if (kobj) { kfree(kobj->name); } } struct kobj_type kfree_type = { .release = kobject_kfree }; +static void +dev_release(struct device *dev) +{ + pr_debug("dev_release: %s\n", dev_name(dev)); + kfree(dev); +} + struct device * device_create(struct class *class, struct device *parent, dev_t devt, void *drvdata, const char *fmt, ...) { struct device *dev; va_list args; dev = kzalloc(sizeof(*dev), M_WAITOK); dev->parent = parent; dev->class = class; dev->devt = devt; dev->driver_data = drvdata; + dev->release = dev_release; va_start(args, fmt); kobject_set_name_vargs(&dev->kobj, fmt, args); va_end(args); device_register(dev); return (dev); } int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, struct kobject *parent, const char *fmt, ...) { va_list args; int error; kobject_init(kobj, ktype); kobj->ktype = ktype; kobj->parent = parent; kobj->name = NULL; va_start(args, fmt); error = kobject_set_name_vargs(kobj, fmt, args); va_end(args); if (error) return (error); return kobject_add_complete(kobj, parent); } static void linux_file_dtor(void *cdp) { struct linux_file *filp; filp = cdp; filp->f_op->release(filp->f_vnode, filp); vdrop(filp->f_vnode); kfree(filp); } static int linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (ENODEV); filp = kzalloc(sizeof(*filp), GFP_KERNEL); filp->f_dentry = &filp->f_dentry_store; filp->f_op = ldev->ops; filp->f_flags = file->f_flag; vhold(file->f_vnode); filp->f_vnode = file->f_vnode; if (filp->f_op->open) { error = -filp->f_op->open(file->f_vnode, filp); if (error) { kfree(filp); return (error); } } error = devfs_set_cdevpriv(filp, linux_file_dtor); if (error) { filp->f_op->release(file->f_vnode, filp); kfree(filp); return (error); } return 0; } static int linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (0); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; devfs_clear_cdevpriv(); return (0); } static int linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (0); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; /* * Linux does not have a generic ioctl copyin/copyout layer. All * linux ioctls must be converted to void ioctls which pass a * pointer to the address of the data. We want the actual user * address so we dereference here. */ data = *(void **)data; if (filp->f_op->unlocked_ioctl) error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data); else error = ENOTTY; return (error); } static int linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; ssize_t bytes; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (0); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; if (uio->uio_iovcnt != 1) panic("linux_dev_read: uio %p iovcnt %d", uio, uio->uio_iovcnt); if (filp->f_op->read) { bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, uio->uio_iov->iov_len, &uio->uio_offset); if (bytes >= 0) { uio->uio_iov->iov_base += bytes; uio->uio_iov->iov_len -= bytes; uio->uio_resid -= bytes; } else error = -bytes; } else error = ENXIO; return (error); } static int linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; ssize_t bytes; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (0); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; if (uio->uio_iovcnt != 1) panic("linux_dev_write: uio %p iovcnt %d", uio, uio->uio_iovcnt); if (filp->f_op->write) { bytes = filp->f_op->write(filp, uio->uio_iov->iov_base, uio->uio_iov->iov_len, &uio->uio_offset); if (bytes >= 0) { uio->uio_iov->iov_base += bytes; uio->uio_iov->iov_len -= bytes; uio->uio_resid -= bytes; } else error = -bytes; } else error = ENXIO; return (error); } static int linux_dev_poll(struct cdev *dev, int events, struct thread *td) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; int revents; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (0); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; if (filp->f_op->poll) revents = filp->f_op->poll(filp, NULL) & events; else revents = 0; return (revents); } static int linux_dev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr) { /* XXX memattr not honored. */ *paddr = offset; return (0); } static int linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, vm_size_t size, struct vm_object **object, int nprot) { struct linux_cdev *ldev; struct linux_file *filp; struct file *file; struct vm_area_struct vma; vm_paddr_t paddr; vm_page_t m; int error; file = curthread->td_fpop; ldev = dev->si_drv1; if (ldev == NULL) return (ENODEV); if (size != PAGE_SIZE) return (EINVAL); if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) return (error); filp->f_flags = file->f_flag; vma.vm_start = 0; vma.vm_end = PAGE_SIZE; vma.vm_pgoff = *offset / PAGE_SIZE; vma.vm_pfn = 0; vma.vm_page_prot = 0; if (filp->f_op->mmap) { error = -filp->f_op->mmap(filp, &vma); if (error == 0) { paddr = (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT; *offset = paddr; m = PHYS_TO_VM_PAGE(paddr); *object = vm_pager_allocate(OBJT_DEVICE, dev, PAGE_SIZE, nprot, *offset, curthread->td_ucred); if (*object == NULL) return (EINVAL); if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) pmap_page_set_memattr(m, vma.vm_page_prot); } } else error = ENODEV; return (error); } struct cdevsw linuxcdevsw = { .d_version = D_VERSION, .d_flags = D_TRACKCLOSE, .d_open = linux_dev_open, .d_close = linux_dev_close, .d_read = linux_dev_read, .d_write = linux_dev_write, .d_ioctl = linux_dev_ioctl, .d_mmap_single = linux_dev_mmap_single, .d_mmap = linux_dev_mmap, .d_poll = linux_dev_poll, }; static int linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { struct linux_file *filp; ssize_t bytes; int error; error = 0; filp = (struct linux_file *)file->f_data; filp->f_flags = file->f_flag; if (uio->uio_iovcnt != 1) panic("linux_file_read: uio %p iovcnt %d", uio, uio->uio_iovcnt); if (filp->f_op->read) { bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, uio->uio_iov->iov_len, &uio->uio_offset); if (bytes >= 0) { uio->uio_iov->iov_base += bytes; uio->uio_iov->iov_len -= bytes; uio->uio_resid -= bytes; } else error = -bytes; } else error = ENXIO; return (error); } static int linux_file_poll(struct file *file, int events, struct ucred *active_cred, struct thread *td) { struct linux_file *filp; int revents; filp = (struct linux_file *)file->f_data; filp->f_flags = file->f_flag; if (filp->f_op->poll) revents = filp->f_op->poll(filp, NULL) & events; else revents = 0; return (0); } static int linux_file_close(struct file *file, struct thread *td) { struct linux_file *filp; int error; filp = (struct linux_file *)file->f_data; filp->f_flags = file->f_flag; error = -filp->f_op->release(NULL, filp); funsetown(&filp->f_sigio); kfree(filp); return (error); } static int linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, struct thread *td) { struct linux_file *filp; int error; filp = (struct linux_file *)fp->f_data; filp->f_flags = fp->f_flag; error = 0; switch (cmd) { case FIONBIO: break; case FIOASYNC: if (filp->f_op->fasync == NULL) break; error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); break; case FIOSETOWN: error = fsetown(*(int *)data, &filp->f_sigio); if (error == 0) error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); break; case FIOGETOWN: *(int *)data = fgetown(&filp->f_sigio); break; default: error = ENOTTY; break; } return (error); } struct fileops linuxfileops = { .fo_read = linux_file_read, .fo_poll = linux_file_poll, .fo_close = linux_file_close, .fo_ioctl = linux_file_ioctl, .fo_chmod = invfo_chmod, .fo_chown = invfo_chown, }; /* * Hash of vmmap addresses. This is infrequently accessed and does not * need to be particularly large. This is done because we must store the * caller's idea of the map size to properly unmap. */ struct vmmap { LIST_ENTRY(vmmap) vm_next; void *vm_addr; unsigned long vm_size; }; LIST_HEAD(vmmaphd, vmmap); #define VMMAP_HASH_SIZE 64 #define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) #define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; static struct mtx vmmaplock; static void vmmap_add(void *addr, unsigned long size) { struct vmmap *vmmap; vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); mtx_lock(&vmmaplock); vmmap->vm_size = size; vmmap->vm_addr = addr; LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); mtx_unlock(&vmmaplock); } static struct vmmap * vmmap_remove(void *addr) { struct vmmap *vmmap; mtx_lock(&vmmaplock); LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) if (vmmap->vm_addr == addr) break; if (vmmap) LIST_REMOVE(vmmap, vm_next); mtx_unlock(&vmmaplock); return (vmmap); } void * _ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) { void *addr; addr = pmap_mapdev_attr(phys_addr, size, attr); if (addr == NULL) return (NULL); vmmap_add(addr, size); return (addr); } void iounmap(void *addr) { struct vmmap *vmmap; vmmap = vmmap_remove(addr); if (vmmap == NULL) return; pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); kfree(vmmap); } void * vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) { vm_offset_t off; size_t size; size = count * PAGE_SIZE; off = kmem_alloc_nofault(kernel_map, size); if (off == 0) return (NULL); vmmap_add((void *)off, size); pmap_qenter(off, pages, count); return ((void *)off); } void vunmap(void *addr) { struct vmmap *vmmap; vmmap = vmmap_remove(addr); if (vmmap == NULL) return; pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); kmem_free(kernel_map, (vm_offset_t)addr, vmmap->vm_size); kfree(vmmap); } static void linux_compat_init(void) { struct sysctl_oid *rootoid; int i; rootoid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(), OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); kobject_init(&class_root, &class_ktype); kobject_set_name(&class_root, "class"); class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); kobject_init(&linux_rootdev.kobj, &dev_ktype); kobject_set_name(&linux_rootdev.kobj, "device"); linux_rootdev.kobj.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, "device"); linux_rootdev.bsddev = root_bus; miscclass.name = "misc"; class_register(&miscclass); INIT_LIST_HEAD(&pci_drivers); INIT_LIST_HEAD(&pci_devices); spin_lock_init(&pci_lock); mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); for (i = 0; i < VMMAP_HASH_SIZE; i++) LIST_INIT(&vmmaphead[i]); } SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); static void linux_compat_uninit(void) { kobject_kfree_name(&class_root); kobject_kfree_name(&linux_rootdev.kobj); kobject_kfree_name(&miscclass.kobj); } SYSUNINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_uninit, NULL); Index: stable/9/sys/ofed/include/linux/linux_idr.c =================================================================== --- stable/9/sys/ofed/include/linux/linux_idr.c (revision 277139) +++ stable/9/sys/ofed/include/linux/linux_idr.c (revision 277140) @@ -1,448 +1,449 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include /* * IDR Implementation. * * This is quick and dirty and not as re-entrant as the linux version * however it should be fairly fast. It is basically a radix tree with * a builtin bitmap for allocation. */ static MALLOC_DEFINE(M_IDR, "idr", "Linux IDR compat"); static inline int idr_max(struct idr *idr) { return (1 << (idr->layers * IDR_BITS)) - 1; } static inline int idr_pos(int id, int layer) { return (id >> (IDR_BITS * layer)) & IDR_MASK; } void idr_init(struct idr *idr) { bzero(idr, sizeof(*idr)); mtx_init(&idr->lock, "idr", NULL, MTX_DEF); } /* Only frees cached pages. */ void idr_destroy(struct idr *idr) { struct idr_layer *il, *iln; + idr_remove_all(idr); mtx_lock(&idr->lock); for (il = idr->free; il != NULL; il = iln) { iln = il->ary[0]; free(il, M_IDR); } mtx_unlock(&idr->lock); } static void idr_remove_layer(struct idr_layer *il, int layer) { int i; if (il == NULL) return; if (layer == 0) { free(il, M_IDR); return; } for (i = 0; i < IDR_SIZE; i++) if (il->ary[i]) idr_remove_layer(il->ary[i], layer - 1); } void idr_remove_all(struct idr *idr) { mtx_lock(&idr->lock); idr_remove_layer(idr->top, idr->layers - 1); idr->top = NULL; idr->layers = 0; mtx_unlock(&idr->lock); } void idr_remove(struct idr *idr, int id) { struct idr_layer *il; int layer; int idx; id &= MAX_ID_MASK; mtx_lock(&idr->lock); il = idr->top; layer = idr->layers - 1; if (il == NULL || id > idr_max(idr)) { mtx_unlock(&idr->lock); return; } /* * Walk down the tree to this item setting bitmaps along the way * as we know at least one item will be free along this path. */ while (layer && il) { idx = idr_pos(id, layer); il->bitmap |= 1 << idx; il = il->ary[idx]; layer--; } idx = id & IDR_MASK; /* * At this point we've set free space bitmaps up the whole tree. * We could make this non-fatal and unwind but linux dumps a stack * and a warning so I don't think it's necessary. */ if (il == NULL || (il->bitmap & (1 << idx)) != 0) panic("idr_remove: Item %d not allocated (%p, %p)\n", id, idr, il); il->ary[idx] = NULL; il->bitmap |= 1 << idx; mtx_unlock(&idr->lock); return; } void * idr_replace(struct idr *idr, void *ptr, int id) { struct idr_layer *il; void *res; int layer; int idx; res = ERR_PTR(-EINVAL); id &= MAX_ID_MASK; mtx_lock(&idr->lock); il = idr->top; layer = idr->layers - 1; if (il == NULL || id > idr_max(idr)) goto out; while (layer && il) { il = il->ary[idr_pos(id, layer)]; layer--; } idx = id & IDR_MASK; /* * Replace still returns an error if the item was not allocated. */ if (il != NULL && (il->bitmap & (1 << idx)) != 0) { res = il->ary[idx]; il->ary[idx] = ptr; } out: mtx_unlock(&idr->lock); return (res); } void * idr_find(struct idr *idr, int id) { struct idr_layer *il; void *res; int layer; res = NULL; id &= MAX_ID_MASK; mtx_lock(&idr->lock); il = idr->top; layer = idr->layers - 1; if (il == NULL || id > idr_max(idr)) goto out; while (layer && il) { il = il->ary[idr_pos(id, layer)]; layer--; } if (il != NULL) res = il->ary[id & IDR_MASK]; out: mtx_unlock(&idr->lock); return (res); } int idr_pre_get(struct idr *idr, gfp_t gfp_mask) { struct idr_layer *il, *iln; struct idr_layer *head; int need; mtx_lock(&idr->lock); for (;;) { need = idr->layers + 1; for (il = idr->free; il != NULL; il = il->ary[0]) need--; mtx_unlock(&idr->lock); if (need == 0) break; for (head = NULL; need; need--) { iln = malloc(sizeof(*il), M_IDR, M_ZERO | gfp_mask); if (iln == NULL) break; bitmap_fill(&iln->bitmap, IDR_SIZE); if (head != NULL) { il->ary[0] = iln; il = iln; } else head = il = iln; } if (head == NULL) return (0); mtx_lock(&idr->lock); il->ary[0] = idr->free; idr->free = head; } return (1); } static inline struct idr_layer * idr_get(struct idr *idr) { struct idr_layer *il; il = idr->free; if (il) { idr->free = il->ary[0]; il->ary[0] = NULL; return (il); } il = malloc(sizeof(*il), M_IDR, M_ZERO | M_NOWAIT); bitmap_fill(&il->bitmap, IDR_SIZE); return (il); } /* * Could be implemented as get_new_above(idr, ptr, 0, idp) but written * first for simplicity sake. */ int idr_get_new(struct idr *idr, void *ptr, int *idp) { struct idr_layer *stack[MAX_LEVEL]; struct idr_layer *il; int error; int layer; int idx; int id; error = -EAGAIN; mtx_lock(&idr->lock); /* * Expand the tree until there is free space. */ if (idr->top == NULL || idr->top->bitmap == 0) { if (idr->layers == MAX_LEVEL + 1) { error = -ENOSPC; goto out; } il = idr_get(idr); if (il == NULL) goto out; il->ary[0] = idr->top; if (idr->top) il->bitmap &= ~1; idr->top = il; idr->layers++; } il = idr->top; id = 0; /* * Walk the tree following free bitmaps, record our path. */ for (layer = idr->layers - 1;; layer--) { stack[layer] = il; idx = ffsl(il->bitmap); if (idx == 0) panic("idr_get_new: Invalid leaf state (%p, %p)\n", idr, il); idx--; id |= idx << (layer * IDR_BITS); if (layer == 0) break; if (il->ary[idx] == NULL) { il->ary[idx] = idr_get(idr); if (il->ary[idx] == NULL) goto out; } il = il->ary[idx]; } /* * Allocate the leaf to the consumer. */ il->bitmap &= ~(1 << idx); il->ary[idx] = ptr; *idp = id; /* * Clear bitmaps potentially up to the root. */ while (il->bitmap == 0 && ++layer < idr->layers) { il = stack[layer]; il->bitmap &= ~(1 << idr_pos(id, layer)); } error = 0; out: mtx_unlock(&idr->lock); #ifdef INVARIANTS if (error == 0 && idr_find(idr, id) != ptr) { panic("idr_get_new: Failed for idr %p, id %d, ptr %p\n", idr, id, ptr); } #endif return (error); } int idr_get_new_above(struct idr *idr, void *ptr, int starting_id, int *idp) { struct idr_layer *stack[MAX_LEVEL]; struct idr_layer *il; int error; int layer; int idx, sidx; int id; error = -EAGAIN; mtx_lock(&idr->lock); /* * Compute the layers required to support starting_id and the mask * at the top layer. */ restart: idx = starting_id; layer = 0; while (idx & ~IDR_MASK) { layer++; idx >>= IDR_BITS; } if (layer == MAX_LEVEL + 1) { error = -ENOSPC; goto out; } /* * Expand the tree until there is free space at or beyond starting_id. */ while (idr->layers <= layer || idr->top->bitmap < (1 << idr_pos(starting_id, idr->layers - 1))) { if (idr->layers == MAX_LEVEL + 1) { error = -ENOSPC; goto out; } il = idr_get(idr); if (il == NULL) goto out; il->ary[0] = idr->top; if (idr->top && idr->top->bitmap == 0) il->bitmap &= ~1; idr->top = il; idr->layers++; } il = idr->top; id = 0; /* * Walk the tree following free bitmaps, record our path. */ for (layer = idr->layers - 1;; layer--) { stack[layer] = il; sidx = idr_pos(starting_id, layer); /* Returns index numbered from 0 or size if none exists. */ idx = find_next_bit(&il->bitmap, IDR_SIZE, sidx); if (idx == IDR_SIZE && sidx == 0) panic("idr_get_new: Invalid leaf state (%p, %p)\n", idr, il); /* * We may have walked a path where there was a free bit but * it was lower than what we wanted. Restart the search with * a larger starting id. id contains the progress we made so * far. Search the leaf one above this level. This may * restart as many as MAX_LEVEL times but that is expected * to be rare. */ if (idx == IDR_SIZE) { starting_id = id + (1 << (layer+1 * IDR_BITS)); goto restart; } if (idx > sidx) starting_id = 0; /* Search the whole subtree. */ id |= idx << (layer * IDR_BITS); if (layer == 0) break; if (il->ary[idx] == NULL) { il->ary[idx] = idr_get(idr); if (il->ary[idx] == NULL) goto out; } il = il->ary[idx]; } /* * Allocate the leaf to the consumer. */ il->bitmap &= ~(1 << idx); il->ary[idx] = ptr; *idp = id; /* * Clear bitmaps potentially up to the root. */ while (il->bitmap == 0 && ++layer < idr->layers) { il = stack[layer]; il->bitmap &= ~(1 << idr_pos(id, layer)); } error = 0; out: mtx_unlock(&idr->lock); #ifdef INVARIANTS if (error == 0 && idr_find(idr, id) != ptr) { panic("idr_get_new_above: Failed for idr %p, id %d, ptr %p\n", idr, id, ptr); } #endif return (error); } Index: stable/9/sys/ofed/include/linux/list.h =================================================================== --- stable/9/sys/ofed/include/linux/list.h (revision 277139) +++ stable/9/sys/ofed/include/linux/list.h (revision 277140) @@ -1,399 +1,400 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_LIST_H_ #define _LINUX_LIST_H_ /* * Since LIST_HEAD conflicts with the linux definition we must include any * FreeBSD header which requires it here so it is resolved with the correct * definition prior to the undef. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #define prefetch(x) struct list_head { struct list_head *next; struct list_head *prev; }; static inline void INIT_LIST_HEAD(struct list_head *list) { list->next = list->prev = list; } static inline int list_empty(const struct list_head *head) { return (head->next == head); } static inline void list_del(struct list_head *entry) { entry->next->prev = entry->prev; entry->prev->next = entry->next; } static inline void _list_add(struct list_head *new, struct list_head *prev, struct list_head *next) { next->prev = new; new->next = next; new->prev = prev; prev->next = new; } static inline void list_del_init(struct list_head *entry) { list_del(entry); INIT_LIST_HEAD(entry); } #define list_entry(ptr, type, field) container_of(ptr, type, field) #define list_first_entry(ptr, type, member) \ list_entry((ptr)->next, type, member) #define list_for_each(p, head) \ for (p = (head)->next; p != (head); p = p->next) #define list_for_each_safe(p, n, head) \ for (p = (head)->next, n = p->next; p != (head); p = n, n = p->next) #define list_for_each_entry(p, h, field) \ for (p = list_entry((h)->next, typeof(*p), field); &p->field != (h); \ p = list_entry(p->field.next, typeof(*p), field)) #define list_for_each_entry_safe(p, n, h, field) \ for (p = list_entry((h)->next, typeof(*p), field), \ n = list_entry(p->field.next, typeof(*p), field); &p->field != (h);\ p = n, n = list_entry(n->field.next, typeof(*n), field)) #define list_for_each_entry_reverse(p, h, field) \ for (p = list_entry((h)->prev, typeof(*p), field); &p->field != (h); \ p = list_entry(p->field.prev, typeof(*p), field)) #define list_for_each_prev(p, h) for (p = (h)->prev; p != (h); p = p->prev) static inline void list_add(struct list_head *new, struct list_head *head) { _list_add(new, head, head->next); } static inline void list_add_tail(struct list_head *new, struct list_head *head) { _list_add(new, head->prev, head); } static inline void list_move(struct list_head *list, struct list_head *head) { list_del(list); list_add(list, head); } static inline void list_move_tail(struct list_head *entry, struct list_head *head) { list_del(entry); list_add_tail(entry, head); } static inline void _list_splice(const struct list_head *list, struct list_head *prev, struct list_head *next) { struct list_head *first; struct list_head *last; if (list_empty(list)) return; first = list->next; last = list->prev; first->prev = prev; prev->next = first; last->next = next; next->prev = last; } static inline void list_splice(const struct list_head *list, struct list_head *head) { _list_splice(list, head, head->next); } static inline void list_splice_tail(struct list_head *list, struct list_head *head) { _list_splice(list, head->prev, head); } static inline void list_splice_init(struct list_head *list, struct list_head *head) { _list_splice(list, head, head->next); INIT_LIST_HEAD(list); } static inline void list_splice_tail_init(struct list_head *list, struct list_head *head) { _list_splice(list, head->prev, head); INIT_LIST_HEAD(list); } #undef LIST_HEAD #define LIST_HEAD(name) struct list_head name = { &(name), &(name) } struct hlist_head { struct hlist_node *first; }; struct hlist_node { struct hlist_node *next, **pprev; }; #define HLIST_HEAD_INIT { } #define HLIST_HEAD(name) struct hlist_head name = HLIST_HEAD_INIT #define INIT_HLIST_HEAD(head) (head)->first = NULL #define INIT_HLIST_NODE(node) \ do { \ (node)->next = NULL; \ (node)->pprev = NULL; \ } while (0) static inline int hlist_unhashed(const struct hlist_node *h) { return !h->pprev; } static inline int hlist_empty(const struct hlist_head *h) { return !h->first; } static inline void hlist_del(struct hlist_node *n) { if (n->next) n->next->pprev = n->pprev; *n->pprev = n->next; } static inline void hlist_del_init(struct hlist_node *n) { if (hlist_unhashed(n)) return; hlist_del(n); INIT_HLIST_NODE(n); } static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) { n->next = h->first; if (h->first) h->first->pprev = &n->next; h->first = n; n->pprev = &h->first; } static inline void hlist_add_before(struct hlist_node *n, struct hlist_node *next) { n->pprev = next->pprev; n->next = next; next->pprev = &n->next; *(n->pprev) = n; } static inline void hlist_add_after(struct hlist_node *n, struct hlist_node *next) { next->next = n->next; n->next = next; next->pprev = &n->next; if (next->next) next->next->pprev = &next->next; } static inline void hlist_move_list(struct hlist_head *old, struct hlist_head *new) { new->first = old->first; if (new->first) new->first->pprev = &new->first; old->first = NULL; } /** * list_is_singular - tests whether a list has just one entry. * @head: the list to test. */ static inline int list_is_singular(const struct list_head *head) { return !list_empty(head) && (head->next == head->prev); } static inline void __list_cut_position(struct list_head *list, struct list_head *head, struct list_head *entry) { struct list_head *new_first = entry->next; list->next = head->next; list->next->prev = list; list->prev = entry; entry->next = list; head->next = new_first; new_first->prev = head; } /** * list_cut_position - cut a list into two * @list: a new list to add all removed entries * @head: a list with entries * @entry: an entry within head, could be the head itself * and if so we won't cut the list * * This helper moves the initial part of @head, up to and * including @entry, from @head to @list. You should * pass on @entry an element you know is on @head. @list * should be an empty list or a list you do not care about * losing its data. * */ static inline void list_cut_position(struct list_head *list, struct list_head *head, struct list_head *entry) { if (list_empty(head)) return; if (list_is_singular(head) && (head->next != entry && head != entry)) return; if (entry == head) INIT_LIST_HEAD(list); else __list_cut_position(list, head, entry); } /** * list_is_last - tests whether @list is the last entry in list @head * @list: the entry to test * @head: the head of the list */ static inline int list_is_last(const struct list_head *list, const struct list_head *head) { return list->next == head; } #define hlist_entry(ptr, type, field) container_of(ptr, type, field) #define hlist_for_each(p, head) \ for (p = (head)->first; p; p = p->next) #define hlist_for_each_safe(p, n, head) \ for (p = (head)->first; p && ({ n = p->next; 1; }); p = n) #define hlist_for_each_entry(tp, p, head, field) \ for (p = (head)->first; \ p ? (tp = hlist_entry(p, typeof(*tp), field)): NULL; p = p->next) #define hlist_for_each_entry_continue(tp, p, field) \ for (p = (p)->next; \ p ? (tp = hlist_entry(p, typeof(*tp), field)): NULL; p = p->next) #define hlist_for_each_entry_from(tp, p, field) \ for (; p ? (tp = hlist_entry(p, typeof(*tp), field)): NULL; p = p->next) #define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ for (pos = (head)->first; \ (pos) != 0 && ({ n = (pos)->next; \ tpos = hlist_entry((pos), typeof(*(tpos)), member); 1;}); \ pos = (n)) #endif /* _LINUX_LIST_H_ */ Index: stable/9/sys/ofed/include/linux/pci.h =================================================================== --- stable/9/sys/ofed/include/linux/pci.h (revision 277139) +++ stable/9/sys/ofed/include/linux/pci.h (revision 277140) @@ -1,830 +1,837 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_PCI_H_ #define _LINUX_PCI_H_ #define CONFIG_PCI_MSI #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct pci_device_id { uint32_t vendor; uint32_t device; uint32_t subvendor; uint32_t subdevice; uint32_t class_mask; uintptr_t driver_data; }; #define MODULE_DEVICE_TABLE(bus, table) #define PCI_ANY_ID (-1) #define PCI_VENDOR_ID_MELLANOX 0x15b3 #define PCI_VENDOR_ID_TOPSPIN 0x1867 #define PCI_DEVICE_ID_MELLANOX_TAVOR 0x5a44 #define PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE 0x5a46 #define PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT 0x6278 #define PCI_DEVICE_ID_MELLANOX_ARBEL 0x6282 #define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c #define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274 #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) #define PCI_FUNC(devfn) ((devfn) & 0x07) #define PCI_VDEVICE(_vendor, _device) \ .vendor = PCI_VENDOR_ID_##_vendor, .device = (_device), \ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID #define PCI_DEVICE(_vendor, _device) \ .vendor = (_vendor), .device = (_device), \ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID #define to_pci_dev(n) container_of(n, struct pci_dev, dev) #define PCI_VENDOR_ID PCIR_DEVVENDOR #define PCI_COMMAND PCIR_COMMAND #define PCI_EXP_DEVCTL PCIER_DEVICE_CTL /* Device Control */ #define PCI_EXP_LNKCTL PCIER_LINK_CTL /* Link Control */ #define PCI_EXP_FLAGS_TYPE PCIEM_FLAGS_TYPE /* Device/Port type */ #define PCI_EXP_DEVCAP PCIER_DEVICE_CAP /* Device capabilities */ #define PCI_EXP_DEVSTA PCIER_DEVICE_STA /* Device Status */ #define PCI_EXP_LNKCAP PCIER_LINK_CAP /* Link Capabilities */ #define PCI_EXP_LNKSTA PCIER_LINK_STA /* Link Status */ #define PCI_EXP_SLTCAP PCIER_SLOT_CAP /* Slot Capabilities */ #define PCI_EXP_SLTCTL PCIER_SLOT_CTL /* Slot Control */ #define PCI_EXP_SLTSTA PCIER_SLOT_STA /* Slot Status */ #define PCI_EXP_RTCTL PCIER_ROOT_CTL /* Root Control */ #define PCI_EXP_RTCAP PCIER_ROOT_CAP /* Root Capabilities */ #define PCI_EXP_RTSTA PCIER_ROOT_STA /* Root Status */ #define PCI_EXP_DEVCAP2 PCIER_DEVICE_CAP2 /* Device Capabilities 2 */ #define PCI_EXP_DEVCTL2 PCIER_DEVICE_CTL2 /* Device Control 2 */ #define PCI_EXP_LNKCAP2 PCIER_LINK_CAP2 /* Link Capabilities 2 */ #define PCI_EXP_LNKCTL2 PCIER_LINK_CTL2 /* Link Control 2 */ #define PCI_EXP_LNKSTA2 PCIER_LINK_STA2 /* Link Status 2 */ #define PCI_EXP_FLAGS PCIER_FLAGS /* Capabilities register */ #define PCI_EXP_FLAGS_VERS PCIEM_FLAGS_VERSION /* Capability version */ #define PCI_EXP_TYPE_ROOT_PORT PCIEM_TYPE_ROOT_PORT /* Root Port */ #define PCI_EXP_TYPE_ENDPOINT PCIEM_TYPE_ENDPOINT /* Express Endpoint */ #define PCI_EXP_TYPE_LEG_END PCIEM_TYPE_LEGACY_ENDPOINT /* Legacy Endpoint */ #define PCI_EXP_TYPE_DOWNSTREAM PCIEM_TYPE_DOWNSTREAM_PORT /* Downstream Port */ #define PCI_EXP_FLAGS_SLOT PCIEM_FLAGS_SLOT /* Slot implemented */ #define PCI_EXP_TYPE_RC_EC PCIEM_TYPE_ROOT_EC /* Root Complex Event Collector */ #define IORESOURCE_MEM SYS_RES_MEMORY #define IORESOURCE_IO SYS_RES_IOPORT #define IORESOURCE_IRQ SYS_RES_IRQ struct pci_dev; struct pci_driver { struct list_head links; char *name; const struct pci_device_id *id_table; int (*probe)(struct pci_dev *dev, const struct pci_device_id *id); void (*remove)(struct pci_dev *dev); int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */ int (*resume) (struct pci_dev *dev); /* Device woken up */ driver_t driver; devclass_t bsdclass; const struct pci_error_handlers *err_handler; }; extern struct list_head pci_drivers; extern struct list_head pci_devices; extern spinlock_t pci_lock; #define __devexit_p(x) x struct pci_dev { struct device dev; struct list_head links; struct pci_driver *pdrv; uint64_t dma_mask; uint16_t device; uint16_t vendor; unsigned int irq; - unsigned int devfn; - u8 revision; - struct pci_devinfo *bus; /* bus this device is on, equivalent to linux struct pci_bus */ + unsigned int devfn; + u8 revision; }; static inline struct resource_list_entry * _pci_get_rle(struct pci_dev *pdev, int type, int rid) { struct pci_devinfo *dinfo; struct resource_list *rl; dinfo = device_get_ivars(pdev->dev.bsddev); rl = &dinfo->resources; return resource_list_find(rl, type, rid); } static inline struct resource_list_entry * _pci_get_bar(struct pci_dev *pdev, int bar) { struct resource_list_entry *rle; bar = PCIR_BAR(bar); if ((rle = _pci_get_rle(pdev, SYS_RES_MEMORY, bar)) == NULL) rle = _pci_get_rle(pdev, SYS_RES_IOPORT, bar); return (rle); } static inline struct device * _pci_find_irq_dev(unsigned int irq) { struct pci_dev *pdev; spin_lock(&pci_lock); list_for_each_entry(pdev, &pci_devices, links) { if (irq == pdev->dev.irq) break; if (irq >= pdev->dev.msix && irq < pdev->dev.msix_max) break; } spin_unlock(&pci_lock); if (pdev) return &pdev->dev; return (NULL); } static inline unsigned long pci_resource_start(struct pci_dev *pdev, int bar) { struct resource_list_entry *rle; if ((rle = _pci_get_bar(pdev, bar)) == NULL) return (0); return rle->start; } static inline unsigned long pci_resource_len(struct pci_dev *pdev, int bar) { struct resource_list_entry *rle; if ((rle = _pci_get_bar(pdev, bar)) == NULL) return (0); return rle->count; } /* * All drivers just seem to want to inspect the type not flags. */ static inline int pci_resource_flags(struct pci_dev *pdev, int bar) { struct resource_list_entry *rle; if ((rle = _pci_get_bar(pdev, bar)) == NULL) return (0); return rle->type; } static inline const char * pci_name(struct pci_dev *d) { return device_get_desc(d->dev.bsddev); } static inline void * pci_get_drvdata(struct pci_dev *pdev) { return dev_get_drvdata(&pdev->dev); } static inline void pci_set_drvdata(struct pci_dev *pdev, void *data) { dev_set_drvdata(&pdev->dev, data); } static inline int pci_enable_device(struct pci_dev *pdev) { pci_enable_io(pdev->dev.bsddev, SYS_RES_IOPORT); pci_enable_io(pdev->dev.bsddev, SYS_RES_MEMORY); return (0); } static inline void pci_disable_device(struct pci_dev *pdev) { } static inline int pci_set_master(struct pci_dev *pdev) { pci_enable_busmaster(pdev->dev.bsddev); return (0); } static inline int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) { int rid; int type; type = pci_resource_flags(pdev, bar); if (type == 0) return (-ENODEV); rid = PCIR_BAR(bar); if (bus_alloc_resource_any(pdev->dev.bsddev, type, &rid, RF_ACTIVE) == NULL) return (-EINVAL); return (0); } static inline void pci_release_region(struct pci_dev *pdev, int bar) { struct resource_list_entry *rle; if ((rle = _pci_get_bar(pdev, bar)) == NULL) return; bus_release_resource(pdev->dev.bsddev, rle->type, rle->rid, rle->res); } static inline void pci_release_regions(struct pci_dev *pdev) { int i; for (i = 0; i <= PCIR_MAX_BAR_0; i++) pci_release_region(pdev, i); } static inline int pci_request_regions(struct pci_dev *pdev, const char *res_name) { int error; int i; for (i = 0; i <= PCIR_MAX_BAR_0; i++) { error = pci_request_region(pdev, i, res_name); if (error && error != -ENODEV) { pci_release_regions(pdev); return (error); } } return (0); } static inline void pci_disable_msix(struct pci_dev *pdev) { pci_release_msi(pdev->dev.bsddev); } #define PCI_CAP_ID_EXP PCIY_EXPRESS #define PCI_CAP_ID_PCIX PCIY_PCIX static inline int pci_find_capability(struct pci_dev *pdev, int capid) { int reg; if (pci_find_cap(pdev->dev.bsddev, capid, ®)) return (0); return (reg); } /** * pci_pcie_cap - get the saved PCIe capability offset * @dev: PCI device * * PCIe capability offset is calculated at PCI device initialization * time and saved in the data structure. This function returns saved * PCIe capability offset. Using this instead of pci_find_capability() * reduces unnecessary search in the PCI configuration space. If you * need to calculate PCIe capability offset from raw device for some * reasons, please use pci_find_capability() instead. */ static inline int pci_pcie_cap(struct pci_dev *dev) { return pci_find_capability(dev, PCI_CAP_ID_EXP); } static inline int pci_read_config_byte(struct pci_dev *pdev, int where, u8 *val) { *val = (u8)pci_read_config(pdev->dev.bsddev, where, 1); return (0); } static inline int pci_read_config_word(struct pci_dev *pdev, int where, u16 *val) { *val = (u16)pci_read_config(pdev->dev.bsddev, where, 2); return (0); } static inline int pci_read_config_dword(struct pci_dev *pdev, int where, u32 *val) { *val = (u32)pci_read_config(pdev->dev.bsddev, where, 4); return (0); } static inline int pci_write_config_byte(struct pci_dev *pdev, int where, u8 val) { pci_write_config(pdev->dev.bsddev, where, val, 1); return (0); } static inline int pci_write_config_word(struct pci_dev *pdev, int where, u16 val) { pci_write_config(pdev->dev.bsddev, where, val, 2); return (0); } static inline int pci_write_config_dword(struct pci_dev *pdev, int where, u32 val) { pci_write_config(pdev->dev.bsddev, where, val, 4); return (0); } static struct pci_driver * linux_pci_find(device_t dev, const struct pci_device_id **idp) { const struct pci_device_id *id; struct pci_driver *pdrv; uint16_t vendor; uint16_t device; vendor = pci_get_vendor(dev); device = pci_get_device(dev); spin_lock(&pci_lock); list_for_each_entry(pdrv, &pci_drivers, links) { for (id = pdrv->id_table; id->vendor != 0; id++) { if (vendor == id->vendor && device == id->device) { *idp = id; spin_unlock(&pci_lock); return (pdrv); } } } spin_unlock(&pci_lock); return (NULL); } static inline int linux_pci_probe(device_t dev) { const struct pci_device_id *id; struct pci_driver *pdrv; if ((pdrv = linux_pci_find(dev, &id)) == NULL) return (ENXIO); if (device_get_driver(dev) != &pdrv->driver) return (ENXIO); device_set_desc(dev, pdrv->name); return (0); } static inline int linux_pci_attach(device_t dev) { struct resource_list_entry *rle; struct pci_dev *pdev; struct pci_driver *pdrv; const struct pci_device_id *id; int error; pdrv = linux_pci_find(dev, &id); pdev = device_get_softc(dev); pdev->dev.parent = &linux_rootdev; pdev->dev.bsddev = dev; INIT_LIST_HEAD(&pdev->dev.irqents); pdev->device = id->device; pdev->vendor = id->vendor; pdev->dev.dma_mask = &pdev->dma_mask; pdev->pdrv = pdrv; kobject_init(&pdev->dev.kobj, &dev_ktype); kobject_set_name(&pdev->dev.kobj, device_get_nameunit(dev)); kobject_add(&pdev->dev.kobj, &linux_rootdev.kobj, kobject_name(&pdev->dev.kobj)); rle = _pci_get_rle(pdev, SYS_RES_IRQ, 0); if (rle) pdev->dev.irq = rle->start; else pdev->dev.irq = 0; pdev->irq = pdev->dev.irq; mtx_unlock(&Giant); spin_lock(&pci_lock); list_add(&pdev->links, &pci_devices); spin_unlock(&pci_lock); error = pdrv->probe(pdev, id); mtx_lock(&Giant); if (error) { spin_lock(&pci_lock); list_del(&pdev->links); spin_unlock(&pci_lock); put_device(&pdev->dev); return (-error); } return (0); } static inline int linux_pci_detach(device_t dev) { struct pci_dev *pdev; pdev = device_get_softc(dev); mtx_unlock(&Giant); pdev->pdrv->remove(pdev); mtx_lock(&Giant); spin_lock(&pci_lock); list_del(&pdev->links); spin_unlock(&pci_lock); put_device(&pdev->dev); return (0); } static device_method_t pci_methods[] = { DEVMETHOD(device_probe, linux_pci_probe), DEVMETHOD(device_attach, linux_pci_attach), DEVMETHOD(device_detach, linux_pci_detach), {0, 0} }; static inline int pci_register_driver(struct pci_driver *pdrv) { devclass_t bus; int error; spin_lock(&pci_lock); list_add(&pdrv->links, &pci_drivers); spin_unlock(&pci_lock); bus = devclass_find("pci"); pdrv->driver.name = pdrv->name; pdrv->driver.methods = pci_methods; pdrv->driver.size = sizeof(struct pci_dev); mtx_lock(&Giant); error = devclass_add_driver(bus, &pdrv->driver, BUS_PASS_DEFAULT, &pdrv->bsdclass); mtx_unlock(&Giant); if (error) return (-error); return (0); } static inline void pci_unregister_driver(struct pci_driver *pdrv) { devclass_t bus; list_del(&pdrv->links); bus = devclass_find("pci"); mtx_lock(&Giant); devclass_delete_driver(bus, &pdrv->driver); mtx_unlock(&Giant); } struct msix_entry { int entry; int vector; }; /* * Enable msix, positive errors indicate actual number of available * vectors. Negative errors are failures. */ static inline int pci_enable_msix(struct pci_dev *pdev, struct msix_entry *entries, int nreq) { struct resource_list_entry *rle; int error; int avail; int i; avail = pci_msix_count(pdev->dev.bsddev); if (avail < nreq) { if (avail == 0) return -EINVAL; return avail; } avail = nreq; if ((error = -pci_alloc_msix(pdev->dev.bsddev, &avail)) != 0) return error; + /* + * Handle case where "pci_alloc_msix()" may allocate less + * interrupts than available and return with no error: + */ + if (avail < nreq) { + pci_release_msi(pdev->dev.bsddev); + return avail; + } rle = _pci_get_rle(pdev, SYS_RES_IRQ, 1); pdev->dev.msix = rle->start; pdev->dev.msix_max = rle->start + avail; for (i = 0; i < nreq; i++) entries[i].vector = pdev->dev.msix + i; return (0); } static inline int pci_channel_offline(struct pci_dev *pdev) { return false; } static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) { return -ENODEV; } static inline void pci_disable_sriov(struct pci_dev *dev) { } /** * DEFINE_PCI_DEVICE_TABLE - macro used to describe a pci device table * @_table: device table name * * This macro is used to create a struct pci_device_id array (a device table) * in a generic manner. */ #define DEFINE_PCI_DEVICE_TABLE(_table) \ const struct pci_device_id _table[] __devinitdata /* XXX This should not be necessary. */ #define pcix_set_mmrbc(d, v) 0 #define pcix_get_max_mmrbc(d) 0 #define pcie_set_readrq(d, v) 0 #define PCI_DMA_BIDIRECTIONAL 0 #define PCI_DMA_TODEVICE 1 #define PCI_DMA_FROMDEVICE 2 #define PCI_DMA_NONE 3 #define pci_pool dma_pool #define pci_pool_destroy dma_pool_destroy #define pci_pool_alloc dma_pool_alloc #define pci_pool_free dma_pool_free #define pci_pool_create(_name, _pdev, _size, _align, _alloc) \ dma_pool_create(_name, &(_pdev)->dev, _size, _align, _alloc) #define pci_free_consistent(_hwdev, _size, _vaddr, _dma_handle) \ dma_free_coherent((_hwdev) == NULL ? NULL : &(_hwdev)->dev, \ _size, _vaddr, _dma_handle) #define pci_map_sg(_hwdev, _sg, _nents, _dir) \ dma_map_sg((_hwdev) == NULL ? NULL : &(_hwdev->dev), \ _sg, _nents, (enum dma_data_direction)_dir) #define pci_map_single(_hwdev, _ptr, _size, _dir) \ dma_map_single((_hwdev) == NULL ? NULL : &(_hwdev->dev), \ (_ptr), (_size), (enum dma_data_direction)_dir) #define pci_unmap_single(_hwdev, _addr, _size, _dir) \ dma_unmap_single((_hwdev) == NULL ? NULL : &(_hwdev)->dev, \ _addr, _size, (enum dma_data_direction)_dir) #define pci_unmap_sg(_hwdev, _sg, _nents, _dir) \ dma_unmap_sg((_hwdev) == NULL ? NULL : &(_hwdev)->dev, \ _sg, _nents, (enum dma_data_direction)_dir) #define pci_map_page(_hwdev, _page, _offset, _size, _dir) \ dma_map_page((_hwdev) == NULL ? NULL : &(_hwdev)->dev, _page,\ _offset, _size, (enum dma_data_direction)_dir) #define pci_unmap_page(_hwdev, _dma_address, _size, _dir) \ dma_unmap_page((_hwdev) == NULL ? NULL : &(_hwdev)->dev, \ _dma_address, _size, (enum dma_data_direction)_dir) #define pci_set_dma_mask(_pdev, mask) dma_set_mask(&(_pdev)->dev, (mask)) #define pci_dma_mapping_error(_pdev, _dma_addr) \ dma_mapping_error(&(_pdev)->dev, _dma_addr) #define pci_set_consistent_dma_mask(_pdev, _mask) \ dma_set_coherent_mask(&(_pdev)->dev, (_mask)) #define DECLARE_PCI_UNMAP_ADDR(x) DEFINE_DMA_UNMAP_ADDR(x); #define DECLARE_PCI_UNMAP_LEN(x) DEFINE_DMA_UNMAP_LEN(x); #define pci_unmap_addr dma_unmap_addr #define pci_unmap_addr_set dma_unmap_addr_set #define pci_unmap_len dma_unmap_len #define pci_unmap_len_set dma_unmap_len_set typedef unsigned int __bitwise pci_channel_state_t; typedef unsigned int __bitwise pci_ers_result_t; enum pci_channel_state { /* I/O channel is in normal state */ pci_channel_io_normal = (__force pci_channel_state_t) 1, /* I/O to channel is blocked */ pci_channel_io_frozen = (__force pci_channel_state_t) 2, /* PCI card is dead */ pci_channel_io_perm_failure = (__force pci_channel_state_t) 3, }; enum pci_ers_result { /* no result/none/not supported in device driver */ PCI_ERS_RESULT_NONE = (__force pci_ers_result_t) 1, /* Device driver can recover without slot reset */ PCI_ERS_RESULT_CAN_RECOVER = (__force pci_ers_result_t) 2, /* Device driver wants slot to be reset. */ PCI_ERS_RESULT_NEED_RESET = (__force pci_ers_result_t) 3, /* Device has completely failed, is unrecoverable */ PCI_ERS_RESULT_DISCONNECT = (__force pci_ers_result_t) 4, /* Device driver is fully recovered and operational */ PCI_ERS_RESULT_RECOVERED = (__force pci_ers_result_t) 5, }; /* PCI bus error event callbacks */ struct pci_error_handlers { /* PCI bus error detected on this device */ pci_ers_result_t (*error_detected)(struct pci_dev *dev, enum pci_channel_state error); /* MMIO has been re-enabled, but not DMA */ pci_ers_result_t (*mmio_enabled)(struct pci_dev *dev); /* PCI Express link has been reset */ pci_ers_result_t (*link_reset)(struct pci_dev *dev); /* PCI slot has been reset */ pci_ers_result_t (*slot_reset)(struct pci_dev *dev); /* Device driver may resume normal operations */ void (*resume)(struct pci_dev *dev); }; /* freeBSD does not support SRIOV - yet */ static inline struct pci_dev *pci_physfn(struct pci_dev *dev) { return dev; } static inline bool pci_is_pcie(struct pci_dev *dev) { return !!pci_pcie_cap(dev); } static inline u16 pcie_flags_reg(struct pci_dev *dev) { int pos; u16 reg16; pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!pos) return 0; pci_read_config_word(dev, pos + PCI_EXP_FLAGS, ®16); return reg16; } static inline int pci_pcie_type(struct pci_dev *dev) { return (pcie_flags_reg(dev) & PCI_EXP_FLAGS_TYPE) >> 4; } static inline int pcie_cap_version(struct pci_dev *dev) { return pcie_flags_reg(dev) & PCI_EXP_FLAGS_VERS; } static inline bool pcie_cap_has_lnkctl(struct pci_dev *dev) { int type = pci_pcie_type(dev); return pcie_cap_version(dev) > 1 || type == PCI_EXP_TYPE_ROOT_PORT || type == PCI_EXP_TYPE_ENDPOINT || type == PCI_EXP_TYPE_LEG_END; } static inline bool pcie_cap_has_devctl(const struct pci_dev *dev) { return true; } static inline bool pcie_cap_has_sltctl(struct pci_dev *dev) { int type = pci_pcie_type(dev); return pcie_cap_version(dev) > 1 || type == PCI_EXP_TYPE_ROOT_PORT || (type == PCI_EXP_TYPE_DOWNSTREAM && pcie_flags_reg(dev) & PCI_EXP_FLAGS_SLOT); } static inline bool pcie_cap_has_rtctl(struct pci_dev *dev) { int type = pci_pcie_type(dev); return pcie_cap_version(dev) > 1 || type == PCI_EXP_TYPE_ROOT_PORT || type == PCI_EXP_TYPE_RC_EC; } static bool pcie_capability_reg_implemented(struct pci_dev *dev, int pos) { if (!pci_is_pcie(dev)) return false; switch (pos) { case PCI_EXP_FLAGS_TYPE: return true; case PCI_EXP_DEVCAP: case PCI_EXP_DEVCTL: case PCI_EXP_DEVSTA: return pcie_cap_has_devctl(dev); case PCI_EXP_LNKCAP: case PCI_EXP_LNKCTL: case PCI_EXP_LNKSTA: return pcie_cap_has_lnkctl(dev); case PCI_EXP_SLTCAP: case PCI_EXP_SLTCTL: case PCI_EXP_SLTSTA: return pcie_cap_has_sltctl(dev); case PCI_EXP_RTCTL: case PCI_EXP_RTCAP: case PCI_EXP_RTSTA: return pcie_cap_has_rtctl(dev); case PCI_EXP_DEVCAP2: case PCI_EXP_DEVCTL2: case PCI_EXP_LNKCAP2: case PCI_EXP_LNKCTL2: case PCI_EXP_LNKSTA2: return pcie_cap_version(dev) > 1; default: return false; } } static inline int pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val) { if (pos & 1) return -EINVAL; if (!pcie_capability_reg_implemented(dev, pos)) return 0; return pci_write_config_word(dev, pci_pcie_cap(dev) + pos, val); } #endif /* _LINUX_PCI_H_ */ Index: stable/9/sys =================================================================== --- stable/9/sys (revision 277139) +++ stable/9/sys (revision 277140) Property changes on: stable/9/sys ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys:r276749