Index: sys/dev/mlx4/device.h =================================================================== --- sys/dev/mlx4/device.h +++ sys/dev/mlx4/device.h @@ -199,6 +199,7 @@ MLX4_DEV_CAP_FLAG2_EQE_STRIDE = 1LL << 23, MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1LL << 24, MLX4_DEV_CAP_FLAG2_RX_CSUM_MODE = 1LL << 25, + MLX4_DEV_CAP_FLAG2_SYS_EQS = 1LL << 26, }; /* bit enums for an 8-bit flags field indicating special use @@ -473,6 +474,7 @@ int num_cqs; int max_cqes; int reserved_cqs; + int num_sys_eqs; int num_eqs; int reserved_eqs; int num_comp_vectors; Index: sys/dev/mlx4/mlx4_core/fw.h =================================================================== --- sys/dev/mlx4/mlx4_core/fw.h +++ sys/dev/mlx4/mlx4_core/fw.h @@ -56,6 +56,7 @@ int max_mpts; int reserved_eqs; int max_eqs; + int num_sys_eqs; int reserved_mtts; int max_mrw_sz; int reserved_mrws; @@ -146,6 +147,16 @@ u8 def_counter_index; }; +struct mlx4_func { + int bus; + int device; + int function; + int physical_function; + int rsvd_eqs; + int max_eq; + int rsvd_uars; +}; + struct mlx4_adapter { u16 vsd_vendor_id; char board_id[MLX4_BOARD_ID_LEN]; @@ -173,6 +184,7 @@ u8 log_num_srqs; u8 log_num_cqs; u8 log_num_eqs; + u16 num_sys_eqs; u8 log_rd_per_qp; u8 log_mc_table_sz; u8 log_mpt_sz; @@ -225,6 +237,7 @@ int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages); int mlx4_NOP(struct mlx4_dev *dev); int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg); +int mlx4_QUERY_FUNC(struct mlx4_dev *dev, struct mlx4_func *func, int slave); void mlx4_opreq_action(struct work_struct *work); #endif /* MLX4_FW_H */ Index: sys/dev/mlx4/mlx4_core/mlx4_eq.c =================================================================== --- sys/dev/mlx4/mlx4_core/mlx4_eq.c +++ sys/dev/mlx4/mlx4_core/mlx4_eq.c @@ -1136,8 +1136,12 @@ goto err_out_free; } - err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs, - dev->caps.num_eqs - 1, dev->caps.reserved_eqs, 0); + err = mlx4_bitmap_init(&priv->eq_table.bitmap, + roundup_pow_of_two(dev->caps.num_eqs), + dev->caps.num_eqs - 1, + dev->caps.reserved_eqs, + roundup_pow_of_two(dev->caps.num_eqs) - + dev->caps.num_eqs); if (err) goto err_out_free; Index: sys/dev/mlx4/mlx4_core/mlx4_fw.c =================================================================== --- sys/dev/mlx4/mlx4_core/mlx4_fw.c +++ sys/dev/mlx4/mlx4_core/mlx4_fw.c @@ -179,6 +179,60 @@ return err; } +int mlx4_QUERY_FUNC(struct mlx4_dev *dev, struct mlx4_func *func, int slave) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + u8 in_modifier; + u8 field; + u16 field16; + int err; + +#define QUERY_FUNC_BUS_OFFSET 0x00 +#define QUERY_FUNC_DEVICE_OFFSET 0x01 +#define QUERY_FUNC_FUNCTION_OFFSET 0x01 +#define QUERY_FUNC_PHYSICAL_FUNCTION_OFFSET 0x03 +#define QUERY_FUNC_RSVD_EQS_OFFSET 0x04 +#define QUERY_FUNC_MAX_EQ_OFFSET 0x06 +#define QUERY_FUNC_RSVD_UARS_OFFSET 0x0b + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + in_modifier = slave; + + err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, 0, + MLX4_CMD_QUERY_FUNC, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + goto out; + + MLX4_GET(field, outbox, QUERY_FUNC_BUS_OFFSET); + func->bus = field & 0xf; + MLX4_GET(field, outbox, QUERY_FUNC_DEVICE_OFFSET); + func->device = field & 0xf1; + MLX4_GET(field, outbox, QUERY_FUNC_FUNCTION_OFFSET); + func->function = field & 0x7; + MLX4_GET(field, outbox, QUERY_FUNC_PHYSICAL_FUNCTION_OFFSET); + func->physical_function = field & 0xf; + MLX4_GET(field16, outbox, QUERY_FUNC_RSVD_EQS_OFFSET); + func->rsvd_eqs = field16 & 0xffff; + MLX4_GET(field16, outbox, QUERY_FUNC_MAX_EQ_OFFSET); + func->max_eq = field16 & 0xffff; + MLX4_GET(field, outbox, QUERY_FUNC_RSVD_UARS_OFFSET); + func->rsvd_uars = field & 0x0f; + + mlx4_dbg(dev, "Bus: %d, Device: %d, Function: %d, Physical function: %d, Max EQs: %d, Reserved EQs: %d, Reserved UARs: %d\n", + func->bus, func->device, func->function, func->physical_function, + func->max_eq, func->rsvd_eqs, func->rsvd_uars); +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -189,6 +243,7 @@ u8 field, port; u32 size; int err = 0; + struct mlx4_func func; #define QUERY_FUNC_CAP_FLAGS_OFFSET 0x0 #define QUERY_FUNC_CAP_NUM_PORTS_OFFSET 0x1 @@ -231,6 +286,7 @@ #define QUERY_FUNC_CAP_PROPS_DEF_COUNTER 0x20 #define QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID 0x80 +#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31) if (vhcr->op_modifier == 1) { port = vhcr->in_modifier; /* phys-port = logical-port */ @@ -293,11 +349,24 @@ size = dev->caps.num_cqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP); - size = dev->caps.num_eqs; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); - - size = dev->caps.reserved_eqs; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) || + mlx4_QUERY_FUNC(dev, &func, slave)) { + size = vhcr->in_modifier & + QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ? + dev->caps.num_eqs : + rounddown_pow_of_two(dev->caps.num_eqs); + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); + size = dev->caps.reserved_eqs; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); + } else { + size = vhcr->in_modifier & + QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ? + func.max_eq : + rounddown_pow_of_two(func.max_eq); + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); + size = func.rsvd_eqs; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); + } size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); @@ -327,14 +396,17 @@ u8 field, op_modifier; u32 size; int err = 0, quotas = 0; + u32 in_modifier; op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ + in_modifier = op_modifier ? gen_or_port : + QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); - err = mlx4_cmd_box(dev, 0, mailbox->dma, gen_or_port, op_modifier, + err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, op_modifier, MLX4_CMD_QUERY_FUNC_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) @@ -504,6 +576,7 @@ #define QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET 0x21 #define QUERY_DEV_CAP_RSVD_MRW_OFFSET 0x22 #define QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET 0x23 +#define QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET 0x26 #define QUERY_DEV_CAP_MAX_AV_OFFSET 0x27 #define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET 0x29 #define QUERY_DEV_CAP_MAX_RES_QP_OFFSET 0x2b @@ -601,6 +674,8 @@ dev_cap->reserved_mrws = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET); dev_cap->max_mtt_seg = 1 << (field & 0x3f); + MLX4_GET(size, outbox, QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET); + dev_cap->num_sys_eqs = size & 0xfff; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET); dev_cap->max_requester_per_qp = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET); @@ -827,8 +902,11 @@ * we can't use any EQs whose doorbell falls on that page, * even if the EQ itself isn't reserved. */ - dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4, - dev_cap->reserved_eqs); + if (dev_cap->num_sys_eqs == 0) + dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4, + dev_cap->reserved_eqs); + else + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SYS_EQS; mlx4_dbg(dev, "Max ICM size %lld MB\n", (unsigned long long) dev_cap->max_icm_sz >> 20); @@ -838,8 +916,9 @@ dev_cap->max_srqs, dev_cap->reserved_srqs, dev_cap->srq_entry_sz); mlx4_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n", dev_cap->max_cqs, dev_cap->reserved_cqs, dev_cap->cqc_entry_sz); - mlx4_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n", - dev_cap->max_eqs, dev_cap->reserved_eqs, dev_cap->eqc_entry_sz); + mlx4_dbg(dev, "Num sys EQs: %d, max EQs: %d, reserved EQs: %d, entry size: %d\n", + dev_cap->num_sys_eqs, dev_cap->max_eqs, dev_cap->reserved_eqs, + dev_cap->eqc_entry_sz); mlx4_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n", dev_cap->reserved_mrws, dev_cap->reserved_mtts); mlx4_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n", @@ -1341,6 +1420,7 @@ #define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50) #define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60) #define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67) +#define INIT_HCA_NUM_SYS_EQS_OFFSET (INIT_HCA_QPC_OFFSET + 0x6a) #define INIT_HCA_RDMARC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70) #define INIT_HCA_LOG_RD_OFFSET (INIT_HCA_QPC_OFFSET + 0x77) #define INIT_HCA_MCAST_OFFSET 0x0c0 @@ -1449,6 +1529,7 @@ MLX4_PUT(inbox, param->auxc_base, INIT_HCA_AUXC_BASE_OFFSET); MLX4_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET); MLX4_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET); + MLX4_PUT(inbox, param->num_sys_eqs, INIT_HCA_NUM_SYS_EQS_OFFSET); MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET); @@ -1555,6 +1636,7 @@ MLX4_GET(param->auxc_base, outbox, INIT_HCA_AUXC_BASE_OFFSET); MLX4_GET(param->eqc_base, outbox, INIT_HCA_EQC_BASE_OFFSET); MLX4_GET(param->log_num_eqs, outbox, INIT_HCA_LOG_EQ_OFFSET); + MLX4_GET(param->num_sys_eqs, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET); MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); Index: sys/dev/mlx4/mlx4_core/mlx4_main.c =================================================================== --- sys/dev/mlx4/mlx4_core/mlx4_main.c +++ sys/dev/mlx4/mlx4_core/mlx4_main.c @@ -590,6 +590,29 @@ dev->caps.port_mask[i] = dev->caps.port_type[i]; } +enum { + MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0, +}; + +static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) +{ + int err = 0; + struct mlx4_func func; + + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { + err = mlx4_QUERY_FUNC(dev, &func, 0); + if (err) { + mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); + return err; + } + dev_cap->max_eqs = func.max_eq; + dev_cap->reserved_eqs = func.rsvd_eqs; + dev_cap->reserved_uars = func.rsvd_uars; + err |= MLX4_QUERY_FUNC_NUM_SYS_EQS; + } + return err; +} + static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { int err; @@ -623,7 +646,10 @@ } dev->caps.num_ports = dev_cap->num_ports; - dev->phys_caps.num_phys_eqs = MLX4_MAX_EQ_NUM; + dev->caps.num_sys_eqs = dev_cap->num_sys_eqs; + dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ? + dev->caps.num_sys_eqs : + MLX4_MAX_EQ_NUM; for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.vl_cap[i] = dev_cap->max_vl[i]; dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; @@ -1465,8 +1491,7 @@ if (err) goto err_srq; - num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : - dev->caps.num_eqs; + num_eqs = dev->phys_caps.num_phys_eqs; err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_EQ * @@ -1528,8 +1553,7 @@ } - num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : - dev->caps.num_eqs; + num_eqs = dev->phys_caps.num_phys_eqs; err = mlx4_init_icm_table(dev, &priv->eq_table.table, init_hca->eqc_base, dev_cap->eqc_entry_sz, num_eqs, num_eqs, 0, 0); @@ -2065,6 +2089,18 @@ goto err_free_icm; } + if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { + err = mlx4_query_func(dev, dev_cap); + if (err < 0) { + mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n"); + goto err_stop_fw; + } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) { + dev->caps.num_eqs = dev_cap->max_eqs; + dev->caps.reserved_eqs = dev_cap->reserved_eqs; + dev->caps.reserved_uars = dev_cap->reserved_uars; + } + } + /* * Read HCA frequency by QUERY_HCA command */ @@ -2905,13 +2941,12 @@ { struct mlx4_priv *priv = mlx4_priv(dev); struct msix_entry *entries; - int nreq = min_t(int, dev->caps.num_ports * - min_t(int, num_possible_cpus() + 1, MAX_MSIX_P_PORT) - + MSIX_LEGACY_SZ, MAX_MSIX); int err; int i; if (msi_x) { + int nreq = dev->caps.num_ports * num_online_cpus() + MSIX_LEGACY_SZ; + nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, nreq); Index: sys/dev/mlx4/mlx4_core/mlx4_profile.c =================================================================== --- sys/dev/mlx4/mlx4_core/mlx4_profile.c +++ sys/dev/mlx4/mlx4_core/mlx4_profile.c @@ -199,10 +199,17 @@ init_hca->log_num_cqs = profile[i].log_num; break; case MLX4_RES_EQ: - dev->caps.num_eqs = roundup_pow_of_two(min_t(unsigned, dev_cap->max_eqs, - MAX_MSIX)); - init_hca->eqc_base = profile[i].start; - init_hca->log_num_eqs = ilog2(dev->caps.num_eqs); + if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { + init_hca->log_num_eqs = 0x1f; + init_hca->eqc_base = profile[i].start; + init_hca->num_sys_eqs = dev_cap->num_sys_eqs; + } else { + dev->caps.num_eqs = roundup_pow_of_two( + min_t(unsigned, + dev_cap->max_eqs, MAX_MSIX)); + init_hca->eqc_base = profile[i].start; + init_hca->log_num_eqs = ilog2(dev->caps.num_eqs); + } break; case MLX4_RES_DMPT: dev->caps.num_mpts = profile[i].num;