diff --git a/contrib/ofed/libibverbs/cmd.c b/contrib/ofed/libibverbs/cmd.c --- a/contrib/ofed/libibverbs/cmd.c +++ b/contrib/ofed/libibverbs/cmd.c @@ -238,6 +238,23 @@ attr->raw_packet_caps = resp->raw_packet_caps; } + if (attr_size >= offsetof(struct ibv_device_attr_ex, tm_caps) + + sizeof(attr->tm_caps)) { + if (resp->response_length >= + offsetof(struct ibv_query_device_resp_ex, tm_caps) + + sizeof(resp->tm_caps)) { + attr->tm_caps.max_rndv_hdr_size = + resp->tm_caps.max_rndv_hdr_size; + attr->tm_caps.max_num_tags = + resp->tm_caps.max_num_tags; + attr->tm_caps.flags = resp->tm_caps.flags; + attr->tm_caps.max_ops = + resp->tm_caps.max_ops; + attr->tm_caps.max_sge = + resp->tm_caps.max_sge; + } + } + return 0; } @@ -703,6 +720,17 @@ vxrcd = container_of(attr_ex->xrcd, struct verbs_xrcd, xrcd); cmd->xrcd_handle = vxrcd->handle; cmd->cq_handle = attr_ex->cq->handle; + } else if (attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_TM) { + if (cmd->srq_type != IBV_SRQT_TM) + return EINVAL; + if (!(attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_CQ) || + !attr_ex->tm_cap.max_num_tags) + return EINVAL; + + cmd->cq_handle = attr_ex->cq->handle; + cmd->max_num_tags = attr_ex->tm_cap.max_num_tags; + } else if (cmd->srq_type != IBV_SRQT_BASIC) { + return EINVAL; } if (write(context->cmd_fd, cmd, cmd_size) != cmd_size) diff --git a/contrib/ofed/libibverbs/examples/devinfo.c b/contrib/ofed/libibverbs/examples/devinfo.c --- a/contrib/ofed/libibverbs/examples/devinfo.c +++ b/contrib/ofed/libibverbs/examples/devinfo.c @@ -342,6 +342,22 @@ ex_flags & unknown_flags); } +static void print_tm_caps(const struct ibv_tm_caps *caps) +{ + if (caps->max_num_tags) { + printf("\tmax_rndv_hdr_size:\t\t%u\n", + caps->max_rndv_hdr_size); + printf("\tmax_num_tags:\t\t\t%u\n", caps->max_num_tags); + printf("\tmax_ops:\t\t\t%u\n", caps->max_ops); + printf("\tmax_sge:\t\t\t%u\n", caps->max_sge); + printf("\tflags:\n"); + if (caps->flags & IBV_TM_CAP_RC) + printf("\t\t\t\t\tIBV_TM_CAP_RC\n"); + } else { + printf("\ttag matching not supported\n"); + } +} + static void print_tso_caps(const struct ibv_tso_caps *caps) { uint32_t unknown_general_caps = ~(1 << IBV_QPT_RAW_PACKET | @@ -521,6 +537,7 @@ print_rss_caps(&device_attr.rss_caps); printf("\tmax_wq_type_rq:\t\t\t%u\n", device_attr.max_wq_type_rq); print_packet_pacing_caps(&device_attr.packet_pacing_caps); + print_tm_caps(&device_attr.tm_caps); } for (port = 1; port <= device_attr.orig_attr.phys_port_cnt; ++port) { diff --git a/contrib/ofed/libibverbs/kern-abi.h b/contrib/ofed/libibverbs/kern-abi.h --- a/contrib/ofed/libibverbs/kern-abi.h +++ b/contrib/ofed/libibverbs/kern-abi.h @@ -280,6 +280,15 @@ __u32 reserved; }; +struct ibv_tm_caps_resp { + __u32 max_rndv_hdr_size; + __u32 max_num_tags; + __u32 flags; + __u32 max_ops; + __u32 max_sge; + __u32 reserved; +}; + struct ibv_query_device_resp_ex { struct ibv_query_device_resp base; __u32 comp_mask; @@ -291,6 +300,7 @@ struct ibv_rss_caps_resp rss_caps; __u32 max_wq_type_rq; __u32 raw_packet_caps; + struct ibv_tm_caps_resp tm_caps; }; struct ibv_query_port { @@ -1116,7 +1126,7 @@ __u32 max_wr; __u32 max_sge; __u32 srq_limit; - __u32 reserved; + __u32 max_num_tags; __u32 xrcd_handle; __u32 cq_handle; __u64 driver_data[0]; diff --git a/contrib/ofed/libibverbs/man/ibv_create_srq_ex.3 b/contrib/ofed/libibverbs/man/ibv_create_srq_ex.3 --- a/contrib/ofed/libibverbs/man/ibv_create_srq_ex.3 +++ b/contrib/ofed/libibverbs/man/ibv_create_srq_ex.3 @@ -26,10 +26,11 @@ void *srq_context; /* Associated context of the SRQ */ struct ibv_srq_attr attr; /* SRQ attributes */ uint32_t comp_mask; /* Identifies valid fields */ -enum ibv_srq_type srq_type; /* Basic / XRC */ +enum ibv_srq_type srq_type; /* Basic / XRC / tag matching */ struct ibv_pd *pd; /* PD associated with the SRQ */ struct ibv_xrcd *xrcd; /* XRC domain to associate with the SRQ */ struct ibv_cq *cq; /* CQ to associate with the SRQ for XRC mode */ +struct ibv_tm_cap tm_cap; /* Tag matching attributes */ .in -8 }; .sp @@ -41,6 +42,16 @@ uint32_t srq_limit; /* The limit value of the SRQ */ .in -8 }; +.sp +.nf +struct ibv_tm_cap { +.in +8 +uint32_t max_num_tags; /* Tag matching list size */ +uint32_t max_ops; /* Number of outstanding tag list operations */ +.in -8 +}; +.sp +.nf .fi .PP The function diff --git a/contrib/ofed/libibverbs/man/ibv_query_device_ex.3 b/contrib/ofed/libibverbs/man/ibv_query_device_ex.3 --- a/contrib/ofed/libibverbs/man/ibv_query_device_ex.3 +++ b/contrib/ofed/libibverbs/man/ibv_query_device_ex.3 @@ -33,6 +33,7 @@ uint32_t max_wq_type_rq; /* Max Work Queue from type RQ */ struct ibv_packet_pacing_caps packet_pacing_caps; /* Packet pacing capabilities */ uint32_t raw_packet_caps; /* Raw packet capabilities, use enum ibv_raw_packet_caps */ +struct ibv_tm_caps tm_caps; /* Tag matching capabilities */ .in -8 }; @@ -84,6 +85,22 @@ .in -8 }; +enum ibv_tm_cap_flags { +.in +8 +IBV_TM_CAP_RC = 1 << 0, /* Support tag matching on RC transport */ +.in -8 +}; + +struct ibv_tm_caps { +.in +8 +uint32_t max_rndv_hdr_size; /* Max size of rendezvous request header */ +uint32_t max_num_tags; /* Max number of tagged buffers in a TM-SRQ matching list */ +uint32_t flags; /* From enum ibv_tm_cap_flags */ +uint32_t max_ops; /* Max number of outstanding list operations */ +uint32_t max_sge; /* Max number of SGEs in a tagged buffer */ +.in -8 +}; + .fi .SH "RETURN VALUE" .B ibv_query_device_ex() diff --git a/contrib/ofed/libibverbs/verbs.h b/contrib/ofed/libibverbs/verbs.h --- a/contrib/ofed/libibverbs/verbs.h +++ b/contrib/ofed/libibverbs/verbs.h @@ -259,6 +259,23 @@ IBV_RAW_PACKET_CAP_IP_CSUM = 1 << 2, }; +enum ibv_tm_cap_flags { + IBV_TM_CAP_RC = 1 << 0, +}; + +struct ibv_tm_caps { + /* Max size of rendezvous request header */ + uint32_t max_rndv_hdr_size; + /* Max number of tagged buffers in a TM-SRQ matching list */ + uint32_t max_num_tags; + /* From enum ibv_tm_cap_flags */ + uint32_t flags; + /* Max number of outstanding list operations */ + uint32_t max_ops; + /* Max number of SGEs in a tagged buffer */ + uint32_t max_sge; +}; + struct ibv_device_attr_ex { struct ibv_device_attr orig_attr; uint32_t comp_mask; @@ -271,6 +288,7 @@ uint32_t max_wq_type_rq; struct ibv_packet_pacing_caps packet_pacing_caps; uint32_t raw_packet_caps; /* Use ibv_raw_packet_caps */ + struct ibv_tm_caps tm_caps; }; enum ibv_mtu { @@ -652,7 +670,8 @@ enum ibv_srq_type { IBV_SRQT_BASIC, - IBV_SRQT_XRC + IBV_SRQT_XRC, + IBV_SRQT_TM, }; enum ibv_srq_init_attr_mask { @@ -660,7 +679,13 @@ IBV_SRQ_INIT_ATTR_PD = 1 << 1, IBV_SRQ_INIT_ATTR_XRCD = 1 << 2, IBV_SRQ_INIT_ATTR_CQ = 1 << 3, - IBV_SRQ_INIT_ATTR_RESERVED = 1 << 4 + IBV_SRQ_INIT_ATTR_TM = 1 << 4, + IBV_SRQ_INIT_ATTR_RESERVED = 1 << 5, +}; + +struct ibv_tm_cap { + uint32_t max_num_tags; + uint32_t max_ops; }; struct ibv_srq_init_attr_ex { @@ -672,6 +697,7 @@ struct ibv_pd *pd; struct ibv_xrcd *xrcd; struct ibv_cq *cq; + struct ibv_tm_cap tm_cap; }; enum ibv_wq_type { diff --git a/contrib/ofed/libmlx5/mlx5.h b/contrib/ofed/libmlx5/mlx5.h --- a/contrib/ofed/libmlx5/mlx5.h +++ b/contrib/ofed/libmlx5/mlx5.h @@ -360,6 +360,7 @@ __be32 *db; uint16_t counter; int wq_sig; + struct ibv_qp *cmd_qp; }; struct wr_list { diff --git a/contrib/ofed/libmlx5/verbs.c b/contrib/ofed/libmlx5/verbs.c --- a/contrib/ofed/libmlx5/verbs.c +++ b/contrib/ofed/libmlx5/verbs.c @@ -737,6 +737,13 @@ struct mlx5_srq *msrq = to_msrq(srq); struct mlx5_context *ctx = to_mctx(srq->context); + if (msrq->cmd_qp) { + ret = mlx5_destroy_qp(msrq->cmd_qp); + if (ret) + return ret; + msrq->cmd_qp = NULL; + } + ret = ibv_cmd_destroy_srq(srq); if (ret) return ret; @@ -1790,9 +1797,94 @@ return ret; } -static struct ibv_srq * -mlx5_create_xrc_srq(struct ibv_context *context, - struct ibv_srq_init_attr_ex *attr) +static struct ibv_qp * +create_cmd_qp(struct ibv_context *context, + struct ibv_srq_init_attr_ex *srq_attr, + struct ibv_srq *srq) +{ + struct ibv_qp_init_attr_ex init_attr = {}; + FILE *fp = to_mctx(context)->dbg_fp; + struct ibv_port_attr port_attr; + struct ibv_modify_qp qcmd = {}; + struct ibv_qp_attr attr = {}; + struct ibv_query_port pcmd; + struct ibv_qp *qp; + int attr_mask; + int port = 1; + int ret; + + ret = ibv_cmd_query_port(context, port, &port_attr, + &pcmd, sizeof(pcmd)); + if (ret) { + mlx5_dbg(fp, MLX5_DBG_QP, "ret %d\n", ret); + return NULL; + } + + init_attr.qp_type = IBV_QPT_RC; + init_attr.srq = srq; + /* Command QP will be used to pass MLX5_OPCODE_TAG_MATCHING messages + * to add/remove tag matching list entries. + * WQ size is based on max_ops parameter holding max number of + * outstanding list operations. + */ + init_attr.cap.max_send_wr = srq_attr->tm_cap.max_ops; + /* Tag matching list entry will point to a single sge buffer */ + init_attr.cap.max_send_sge = 1; + init_attr.comp_mask = IBV_QP_INIT_ATTR_PD; + init_attr.pd = srq_attr->pd; + init_attr.send_cq = srq_attr->cq; + init_attr.recv_cq = srq_attr->cq; + + qp = create_qp(context, &init_attr); + if (!qp) + return NULL; + + attr.qp_state = IBV_QPS_INIT; + attr.port_num = port; + attr_mask = IBV_QP_STATE | IBV_QP_PKEY_INDEX + | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS; + + ret = ibv_cmd_modify_qp(qp, &attr, attr_mask, &qcmd, sizeof(qcmd)); + if (ret) { + mlx5_dbg(fp, MLX5_DBG_QP, "ret %d\n", ret); + goto err; + } + + attr.qp_state = IBV_QPS_RTR; + attr.path_mtu = IBV_MTU_256; + attr.dest_qp_num = qp->qp_num; /* Loopback */ + attr.ah_attr.dlid = port_attr.lid; + attr.ah_attr.port_num = port; + attr_mask = IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU + | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN + | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER; + + ret = ibv_cmd_modify_qp(qp, &attr, attr_mask, &qcmd, sizeof(qcmd)); + if (ret) { + mlx5_dbg(fp, MLX5_DBG_QP, "ret %d\n", ret); + goto err; + } + + attr.qp_state = IBV_QPS_RTS; + attr_mask = IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT + | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN + | IBV_QP_MAX_QP_RD_ATOMIC; + + ret = ibv_cmd_modify_qp(qp, &attr, attr_mask, &qcmd, sizeof(qcmd)); + if (ret) { + mlx5_dbg(fp, MLX5_DBG_QP, "ret %d\n", ret); + goto err; + } + + return qp; + +err: + mlx5_destroy_qp(qp); + return NULL; +} + +struct ibv_srq *mlx5_create_srq_ex(struct ibv_context *context, + struct ibv_srq_init_attr_ex *attr) { int err; struct mlx5_create_srq_ex cmd; @@ -1804,6 +1896,24 @@ int uidx; FILE *fp = ctx->dbg_fp; + if (!(attr->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) || + (attr->srq_type == IBV_SRQT_BASIC)) + return mlx5_create_srq(attr->pd, + (struct ibv_srq_init_attr *)attr); + + if (attr->srq_type != IBV_SRQT_XRC && + attr->srq_type != IBV_SRQT_TM) { + errno = EINVAL; + return NULL; + } + + /* An extended CQ is required to read TM information from */ + if (attr->srq_type == IBV_SRQT_TM && + !(attr->cq && (to_mcq(attr->cq)->flags & MLX5_CQ_FLAGS_EXTENDED))) { + errno = EINVAL; + return NULL; + } + msrq = calloc(1, sizeof(*msrq)); if (!msrq) return NULL; @@ -1882,10 +1992,16 @@ if (err) goto err_free_uidx; + if (attr->srq_type == IBV_SRQT_TM) { + msrq->cmd_qp = create_cmd_qp(context, attr, ibsrq); + if (!msrq->cmd_qp) + goto err_destroy; + } + if (!ctx->cqe_version) { err = mlx5_store_srq(to_mctx(context), resp.srqn, msrq); if (err) - goto err_destroy; + goto err_free_cmd; pthread_mutex_unlock(&ctx->srq_table_mutex); } @@ -1896,6 +2012,9 @@ return ibsrq; +err_free_cmd: + if (msrq->cmd_qp) + mlx5_destroy_qp(msrq->cmd_qp); err_destroy: ibv_cmd_destroy_srq(ibsrq); @@ -1918,19 +2037,6 @@ return NULL; } -struct ibv_srq *mlx5_create_srq_ex(struct ibv_context *context, - struct ibv_srq_init_attr_ex *attr) -{ - if (!(attr->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) || - (attr->srq_type == IBV_SRQT_BASIC)) - return mlx5_create_srq(attr->pd, - (struct ibv_srq_init_attr *)attr); - else if (attr->srq_type == IBV_SRQT_XRC) - return mlx5_create_xrc_srq(context, attr); - - return NULL; -} - int mlx5_query_device_ex(struct ibv_context *context, const struct ibv_query_device_ex_input *input, struct ibv_device_attr_ex *attr,