diff --git a/contrib/ofed/libibverbs/examples/devinfo.c b/contrib/ofed/libibverbs/examples/devinfo.c index 7693cb30739d..866d82718f7d 100644 --- a/contrib/ofed/libibverbs/examples/devinfo.c +++ b/contrib/ofed/libibverbs/examples/devinfo.c @@ -1,698 +1,699 @@ /* * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include static int verbose; static int null_gid(union ibv_gid *gid) { return !(gid->raw[8] | gid->raw[9] | gid->raw[10] | gid->raw[11] | gid->raw[12] | gid->raw[13] | gid->raw[14] | gid->raw[15]); } static const char *guid_str(__be64 _node_guid, char *str) { uint64_t node_guid = be64toh(_node_guid); sprintf(str, "%04x:%04x:%04x:%04x", (unsigned) (node_guid >> 48) & 0xffff, (unsigned) (node_guid >> 32) & 0xffff, (unsigned) (node_guid >> 16) & 0xffff, (unsigned) (node_guid >> 0) & 0xffff); return str; } static const char *transport_str(enum ibv_transport_type transport) { switch (transport) { case IBV_TRANSPORT_IB: return "InfiniBand"; case IBV_TRANSPORT_IWARP: return "iWARP"; case IBV_TRANSPORT_USNIC: return "usNIC"; case IBV_TRANSPORT_USNIC_UDP: return "usNIC UDP"; default: return "invalid transport"; } } static const char *port_state_str(enum ibv_port_state pstate) { switch (pstate) { case IBV_PORT_DOWN: return "PORT_DOWN"; case IBV_PORT_INIT: return "PORT_INIT"; case IBV_PORT_ARMED: return "PORT_ARMED"; case IBV_PORT_ACTIVE: return "PORT_ACTIVE"; default: return "invalid state"; } } static const char *port_phy_state_str(uint8_t phys_state) { switch (phys_state) { case 1: return "SLEEP"; case 2: return "POLLING"; case 3: return "DISABLED"; case 4: return "PORT_CONFIGURATION TRAINNING"; case 5: return "LINK_UP"; case 6: return "LINK_ERROR_RECOVERY"; case 7: return "PHY TEST"; default: return "invalid physical state"; } } static const char *atomic_cap_str(enum ibv_atomic_cap atom_cap) { switch (atom_cap) { case IBV_ATOMIC_NONE: return "ATOMIC_NONE"; case IBV_ATOMIC_HCA: return "ATOMIC_HCA"; case IBV_ATOMIC_GLOB: return "ATOMIC_GLOB"; default: return "invalid atomic capability"; } } static const char *mtu_str(enum ibv_mtu max_mtu) { switch (max_mtu) { case IBV_MTU_256: return "256"; case IBV_MTU_512: return "512"; case IBV_MTU_1024: return "1024"; case IBV_MTU_2048: return "2048"; case IBV_MTU_4096: return "4096"; default: return "invalid MTU"; } } static const char *width_str(uint8_t width) { switch (width) { case 1: return "1"; case 2: return "4"; case 4: return "8"; case 8: return "12"; case 16: return "2"; default: return "invalid width"; } } static const char *speed_str(uint8_t speed) { switch (speed) { case 1: return "2.5 Gbps"; case 2: return "5.0 Gbps"; case 4: /* fall through */ case 8: return "10.0 Gbps"; case 16: return "14.0 Gbps"; case 32: return "25.0 Gbps"; case 64: return "50.0 Gbps"; + case 128: return "100.0 Gbps"; default: return "invalid speed"; } } static const char *vl_str(uint8_t vl_num) { switch (vl_num) { case 1: return "1"; case 2: return "2"; case 3: return "4"; case 4: return "8"; case 5: return "15"; default: return "invalid value"; } } static int print_all_port_gids(struct ibv_context *ctx, uint8_t port_num, int tbl_len) { union ibv_gid gid; int rc = 0; int i; for (i = 0; i < tbl_len; i++) { rc = ibv_query_gid(ctx, port_num, i, &gid); if (rc) { fprintf(stderr, "Failed to query gid to port %d, index %d\n", port_num, i); return rc; } if (!null_gid(&gid)) printf("\t\t\tGID[%3d]:\t\t%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x\n", i, gid.raw[ 0], gid.raw[ 1], gid.raw[ 2], gid.raw[ 3], gid.raw[ 4], gid.raw[ 5], gid.raw[ 6], gid.raw[ 7], gid.raw[ 8], gid.raw[ 9], gid.raw[10], gid.raw[11], gid.raw[12], gid.raw[13], gid.raw[14], gid.raw[15]); } return rc; } static const char *link_layer_str(uint8_t link_layer) { switch (link_layer) { case IBV_LINK_LAYER_UNSPECIFIED: case IBV_LINK_LAYER_INFINIBAND: return "InfiniBand"; case IBV_LINK_LAYER_ETHERNET: return "Ethernet"; default: return "Unknown"; } } static void print_device_cap_flags(uint32_t dev_cap_flags) { uint32_t unknown_flags = ~(IBV_DEVICE_RESIZE_MAX_WR | IBV_DEVICE_BAD_PKEY_CNTR | IBV_DEVICE_BAD_QKEY_CNTR | IBV_DEVICE_RAW_MULTI | IBV_DEVICE_AUTO_PATH_MIG | IBV_DEVICE_CHANGE_PHY_PORT | IBV_DEVICE_UD_AV_PORT_ENFORCE | IBV_DEVICE_CURR_QP_STATE_MOD | IBV_DEVICE_SHUTDOWN_PORT | IBV_DEVICE_INIT_TYPE | IBV_DEVICE_PORT_ACTIVE_EVENT | IBV_DEVICE_SYS_IMAGE_GUID | IBV_DEVICE_RC_RNR_NAK_GEN | IBV_DEVICE_SRQ_RESIZE | IBV_DEVICE_N_NOTIFY_CQ | IBV_DEVICE_MEM_WINDOW | IBV_DEVICE_UD_IP_CSUM | IBV_DEVICE_XRC | IBV_DEVICE_MEM_MGT_EXTENSIONS | IBV_DEVICE_MEM_WINDOW_TYPE_2A | IBV_DEVICE_MEM_WINDOW_TYPE_2B | IBV_DEVICE_RC_IP_CSUM | IBV_DEVICE_RAW_IP_CSUM | IBV_DEVICE_MANAGED_FLOW_STEERING); if (dev_cap_flags & IBV_DEVICE_RESIZE_MAX_WR) printf("\t\t\t\t\tRESIZE_MAX_WR\n"); if (dev_cap_flags & IBV_DEVICE_BAD_PKEY_CNTR) printf("\t\t\t\t\tBAD_PKEY_CNTR\n"); if (dev_cap_flags & IBV_DEVICE_BAD_QKEY_CNTR) printf("\t\t\t\t\tBAD_QKEY_CNTR\n"); if (dev_cap_flags & IBV_DEVICE_RAW_MULTI) printf("\t\t\t\t\tRAW_MULTI\n"); if (dev_cap_flags & IBV_DEVICE_AUTO_PATH_MIG) printf("\t\t\t\t\tAUTO_PATH_MIG\n"); if (dev_cap_flags & IBV_DEVICE_CHANGE_PHY_PORT) printf("\t\t\t\t\tCHANGE_PHY_PORT\n"); if (dev_cap_flags & IBV_DEVICE_UD_AV_PORT_ENFORCE) printf("\t\t\t\t\tUD_AV_PORT_ENFORCE\n"); if (dev_cap_flags & IBV_DEVICE_CURR_QP_STATE_MOD) printf("\t\t\t\t\tCURR_QP_STATE_MOD\n"); if (dev_cap_flags & IBV_DEVICE_SHUTDOWN_PORT) printf("\t\t\t\t\tSHUTDOWN_PORT\n"); if (dev_cap_flags & IBV_DEVICE_INIT_TYPE) printf("\t\t\t\t\tINIT_TYPE\n"); if (dev_cap_flags & IBV_DEVICE_PORT_ACTIVE_EVENT) printf("\t\t\t\t\tPORT_ACTIVE_EVENT\n"); if (dev_cap_flags & IBV_DEVICE_SYS_IMAGE_GUID) printf("\t\t\t\t\tSYS_IMAGE_GUID\n"); if (dev_cap_flags & IBV_DEVICE_RC_RNR_NAK_GEN) printf("\t\t\t\t\tRC_RNR_NAK_GEN\n"); if (dev_cap_flags & IBV_DEVICE_SRQ_RESIZE) printf("\t\t\t\t\tSRQ_RESIZE\n"); if (dev_cap_flags & IBV_DEVICE_N_NOTIFY_CQ) printf("\t\t\t\t\tN_NOTIFY_CQ\n"); if (dev_cap_flags & IBV_DEVICE_MEM_WINDOW) printf("\t\t\t\t\tMEM_WINDOW\n"); if (dev_cap_flags & IBV_DEVICE_UD_IP_CSUM) printf("\t\t\t\t\tUD_IP_CSUM\n"); if (dev_cap_flags & IBV_DEVICE_XRC) printf("\t\t\t\t\tXRC\n"); if (dev_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) printf("\t\t\t\t\tMEM_MGT_EXTENSIONS\n"); if (dev_cap_flags & IBV_DEVICE_MEM_WINDOW_TYPE_2A) printf("\t\t\t\t\tMEM_WINDOW_TYPE_2A\n"); if (dev_cap_flags & IBV_DEVICE_MEM_WINDOW_TYPE_2B) printf("\t\t\t\t\tMEM_WINDOW_TYPE_2B\n"); if (dev_cap_flags & IBV_DEVICE_RC_IP_CSUM) printf("\t\t\t\t\tRC_IP_CSUM\n"); if (dev_cap_flags & IBV_DEVICE_RAW_IP_CSUM) printf("\t\t\t\t\tRAW_IP_CSUM\n"); if (dev_cap_flags & IBV_DEVICE_MANAGED_FLOW_STEERING) printf("\t\t\t\t\tMANAGED_FLOW_STEERING\n"); if (dev_cap_flags & unknown_flags) printf("\t\t\t\t\tUnknown flags: 0x%" PRIX32 "\n", dev_cap_flags & unknown_flags); } static void print_odp_trans_caps(uint32_t trans) { uint32_t unknown_transport_caps = ~(IBV_ODP_SUPPORT_SEND | IBV_ODP_SUPPORT_RECV | IBV_ODP_SUPPORT_WRITE | IBV_ODP_SUPPORT_READ | IBV_ODP_SUPPORT_ATOMIC); if (!trans) { printf("\t\t\t\t\tNO SUPPORT\n"); } else { if (trans & IBV_ODP_SUPPORT_SEND) printf("\t\t\t\t\tSUPPORT_SEND\n"); if (trans & IBV_ODP_SUPPORT_RECV) printf("\t\t\t\t\tSUPPORT_RECV\n"); if (trans & IBV_ODP_SUPPORT_WRITE) printf("\t\t\t\t\tSUPPORT_WRITE\n"); if (trans & IBV_ODP_SUPPORT_READ) printf("\t\t\t\t\tSUPPORT_READ\n"); if (trans & IBV_ODP_SUPPORT_ATOMIC) printf("\t\t\t\t\tSUPPORT_ATOMIC\n"); if (trans & unknown_transport_caps) printf("\t\t\t\t\tUnknown flags: 0x%" PRIX32 "\n", trans & unknown_transport_caps); } } static void print_odp_caps(const struct ibv_odp_caps *caps) { uint64_t unknown_general_caps = ~(IBV_ODP_SUPPORT); /* general odp caps */ printf("\tgeneral_odp_caps:\n"); if (caps->general_caps & IBV_ODP_SUPPORT) printf("\t\t\t\t\tODP_SUPPORT\n"); if (caps->general_caps & unknown_general_caps) printf("\t\t\t\t\tUnknown flags: 0x%" PRIX64 "\n", caps->general_caps & unknown_general_caps); /* RC transport */ printf("\trc_odp_caps:\n"); print_odp_trans_caps(caps->per_transport_caps.rc_odp_caps); printf("\tuc_odp_caps:\n"); print_odp_trans_caps(caps->per_transport_caps.uc_odp_caps); printf("\tud_odp_caps:\n"); print_odp_trans_caps(caps->per_transport_caps.ud_odp_caps); } static void print_device_cap_flags_ex(uint64_t device_cap_flags_ex) { uint64_t ex_flags = device_cap_flags_ex & 0xffffffff00000000ULL; uint64_t unknown_flags = ~(IBV_DEVICE_RAW_SCATTER_FCS); if (ex_flags & IBV_DEVICE_RAW_SCATTER_FCS) printf("\t\t\t\t\tRAW_SCATTER_FCS\n"); if (ex_flags & unknown_flags) printf("\t\t\t\t\tUnknown flags: 0x%" PRIX64 "\n", ex_flags & unknown_flags); } static void print_tso_caps(const struct ibv_tso_caps *caps) { uint32_t unknown_general_caps = ~(1 << IBV_QPT_RAW_PACKET | 1 << IBV_QPT_UD); printf("\ttso_caps:\n"); printf("\tmax_tso:\t\t\t%d\n", caps->max_tso); if (caps->max_tso) { printf("\tsupported_qp:\n"); if (ibv_is_qpt_supported(caps->supported_qpts, IBV_QPT_RAW_PACKET)) printf("\t\t\t\t\tSUPPORT_RAW_PACKET\n"); if (ibv_is_qpt_supported(caps->supported_qpts, IBV_QPT_UD)) printf("\t\t\t\t\tSUPPORT_UD\n"); if (caps->supported_qpts & unknown_general_caps) printf("\t\t\t\t\tUnknown flags: 0x%" PRIX32 "\n", caps->supported_qpts & unknown_general_caps); } } static void print_rss_caps(const struct ibv_rss_caps *caps) { uint32_t unknown_general_caps = ~(1 << IBV_QPT_RAW_PACKET | 1 << IBV_QPT_UD); printf("\trss_caps:\n"); printf("\t\tmax_rwq_indirection_tables:\t\t\t%u\n", caps->max_rwq_indirection_tables); printf("\t\tmax_rwq_indirection_table_size:\t\t\t%u\n", caps->max_rwq_indirection_table_size); printf("\t\trx_hash_function:\t\t\t\t0x%x\n", caps->rx_hash_function); printf("\t\trx_hash_fields_mask:\t\t\t\t0x%" PRIX64 "\n", caps->rx_hash_fields_mask); if (caps->supported_qpts) { printf("\t\tsupported_qp:\n"); if (ibv_is_qpt_supported(caps->supported_qpts, IBV_QPT_RAW_PACKET)) printf("\t\t\t\t\tSUPPORT_RAW_PACKET\n"); if (ibv_is_qpt_supported(caps->supported_qpts, IBV_QPT_UD)) printf("\t\t\t\t\tSUPPORT_UD\n"); if (caps->supported_qpts & unknown_general_caps) printf("\t\t\t\t\tUnknown flags: 0x%" PRIX32 "\n", caps->supported_qpts & unknown_general_caps); } } static void print_packet_pacing_caps(const struct ibv_packet_pacing_caps *caps) { uint32_t unknown_general_caps = ~(1 << IBV_QPT_RAW_PACKET | 1 << IBV_QPT_UD); printf("\tpacket_pacing_caps:\n"); printf("\t\tqp_rate_limit_min:\t%ukbps\n", caps->qp_rate_limit_min); printf("\t\tqp_rate_limit_max:\t%ukbps\n", caps->qp_rate_limit_max); if (caps->qp_rate_limit_max) { printf("\t\tsupported_qp:\n"); if (ibv_is_qpt_supported(caps->supported_qpts, IBV_QPT_RAW_PACKET)) printf("\t\t\t\t\tSUPPORT_RAW_PACKET\n"); if (ibv_is_qpt_supported(caps->supported_qpts, IBV_QPT_UD)) printf("\t\t\t\t\tSUPPORT_UD\n"); if (caps->supported_qpts & unknown_general_caps) printf("\t\t\t\t\tUnknown flags: 0x%" PRIX32 "\n", caps->supported_qpts & unknown_general_caps); } } static void print_raw_packet_caps(uint32_t raw_packet_caps) { printf("\traw packet caps:\n"); if (raw_packet_caps & IBV_RAW_PACKET_CAP_CVLAN_STRIPPING) printf("\t\t\t\t\tC-VLAN stripping offload\n"); if (raw_packet_caps & IBV_RAW_PACKET_CAP_SCATTER_FCS) printf("\t\t\t\t\tScatter FCS offload\n"); if (raw_packet_caps & IBV_RAW_PACKET_CAP_IP_CSUM) printf("\t\t\t\t\tIP csum offload\n"); } static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port) { struct ibv_context *ctx; struct ibv_device_attr_ex device_attr; struct ibv_port_attr port_attr; int rc = 0; uint8_t port; char buf[256]; ctx = ibv_open_device(ib_dev); if (!ctx) { fprintf(stderr, "Failed to open device\n"); rc = 1; goto cleanup; } if (ibv_query_device_ex(ctx, NULL, &device_attr)) { fprintf(stderr, "Failed to query device props\n"); rc = 2; goto cleanup; } if (ib_port && ib_port > device_attr.orig_attr.phys_port_cnt) { fprintf(stderr, "Invalid port requested for device\n"); /* rc = 3 is taken by failure to clean up */ rc = 4; goto cleanup; } printf("hca_id:\t%s\n", ibv_get_device_name(ib_dev)); printf("\ttransport:\t\t\t%s (%d)\n", transport_str(ib_dev->transport_type), ib_dev->transport_type); if (strlen(device_attr.orig_attr.fw_ver)) printf("\tfw_ver:\t\t\t\t%s\n", device_attr.orig_attr.fw_ver); printf("\tnode_guid:\t\t\t%s\n", guid_str(device_attr.orig_attr.node_guid, buf)); printf("\tsys_image_guid:\t\t\t%s\n", guid_str(device_attr.orig_attr.sys_image_guid, buf)); printf("\tvendor_id:\t\t\t0x%04x\n", device_attr.orig_attr.vendor_id); printf("\tvendor_part_id:\t\t\t%d\n", device_attr.orig_attr.vendor_part_id); printf("\thw_ver:\t\t\t\t0x%X\n", device_attr.orig_attr.hw_ver); if (ibv_read_sysfs_file(ib_dev->ibdev_path, "board_id", buf, sizeof buf) > 0) printf("\tboard_id:\t\t\t%s\n", buf); printf("\tphys_port_cnt:\t\t\t%d\n", device_attr.orig_attr.phys_port_cnt); if (verbose) { printf("\tmax_mr_size:\t\t\t0x%llx\n", (unsigned long long) device_attr.orig_attr.max_mr_size); printf("\tpage_size_cap:\t\t\t0x%llx\n", (unsigned long long) device_attr.orig_attr.page_size_cap); printf("\tmax_qp:\t\t\t\t%d\n", device_attr.orig_attr.max_qp); printf("\tmax_qp_wr:\t\t\t%d\n", device_attr.orig_attr.max_qp_wr); printf("\tdevice_cap_flags:\t\t0x%08x\n", device_attr.orig_attr.device_cap_flags); print_device_cap_flags(device_attr.orig_attr.device_cap_flags); printf("\tmax_sge:\t\t\t%d\n", device_attr.orig_attr.max_sge); printf("\tmax_sge_rd:\t\t\t%d\n", device_attr.orig_attr.max_sge_rd); printf("\tmax_cq:\t\t\t\t%d\n", device_attr.orig_attr.max_cq); printf("\tmax_cqe:\t\t\t%d\n", device_attr.orig_attr.max_cqe); printf("\tmax_mr:\t\t\t\t%d\n", device_attr.orig_attr.max_mr); printf("\tmax_pd:\t\t\t\t%d\n", device_attr.orig_attr.max_pd); printf("\tmax_qp_rd_atom:\t\t\t%d\n", device_attr.orig_attr.max_qp_rd_atom); printf("\tmax_ee_rd_atom:\t\t\t%d\n", device_attr.orig_attr.max_ee_rd_atom); printf("\tmax_res_rd_atom:\t\t%d\n", device_attr.orig_attr.max_res_rd_atom); printf("\tmax_qp_init_rd_atom:\t\t%d\n", device_attr.orig_attr.max_qp_init_rd_atom); printf("\tmax_ee_init_rd_atom:\t\t%d\n", device_attr.orig_attr.max_ee_init_rd_atom); printf("\tatomic_cap:\t\t\t%s (%d)\n", atomic_cap_str(device_attr.orig_attr.atomic_cap), device_attr.orig_attr.atomic_cap); printf("\tmax_ee:\t\t\t\t%d\n", device_attr.orig_attr.max_ee); printf("\tmax_rdd:\t\t\t%d\n", device_attr.orig_attr.max_rdd); printf("\tmax_mw:\t\t\t\t%d\n", device_attr.orig_attr.max_mw); printf("\tmax_raw_ipv6_qp:\t\t%d\n", device_attr.orig_attr.max_raw_ipv6_qp); printf("\tmax_raw_ethy_qp:\t\t%d\n", device_attr.orig_attr.max_raw_ethy_qp); printf("\tmax_mcast_grp:\t\t\t%d\n", device_attr.orig_attr.max_mcast_grp); printf("\tmax_mcast_qp_attach:\t\t%d\n", device_attr.orig_attr.max_mcast_qp_attach); printf("\tmax_total_mcast_qp_attach:\t%d\n", device_attr.orig_attr.max_total_mcast_qp_attach); printf("\tmax_ah:\t\t\t\t%d\n", device_attr.orig_attr.max_ah); printf("\tmax_fmr:\t\t\t%d\n", device_attr.orig_attr.max_fmr); if (device_attr.orig_attr.max_fmr) printf("\tmax_map_per_fmr:\t\t%d\n", device_attr.orig_attr.max_map_per_fmr); printf("\tmax_srq:\t\t\t%d\n", device_attr.orig_attr.max_srq); if (device_attr.orig_attr.max_srq) { printf("\tmax_srq_wr:\t\t\t%d\n", device_attr.orig_attr.max_srq_wr); printf("\tmax_srq_sge:\t\t\t%d\n", device_attr.orig_attr.max_srq_sge); } printf("\tmax_pkeys:\t\t\t%d\n", device_attr.orig_attr.max_pkeys); printf("\tlocal_ca_ack_delay:\t\t%d\n", device_attr.orig_attr.local_ca_ack_delay); print_odp_caps(&device_attr.odp_caps); if (device_attr.completion_timestamp_mask) printf("\tcompletion timestamp_mask:\t\t\t0x%016" PRIx64 "\n", device_attr.completion_timestamp_mask); else printf("\tcompletion_timestamp_mask not supported\n"); if (device_attr.hca_core_clock) printf("\thca_core_clock:\t\t\t%" PRIu64 "kHZ\n", device_attr.hca_core_clock); else printf("\tcore clock not supported\n"); if (device_attr.raw_packet_caps) print_raw_packet_caps(device_attr.raw_packet_caps); printf("\tdevice_cap_flags_ex:\t\t0x%" PRIX64 "\n", device_attr.device_cap_flags_ex); print_device_cap_flags_ex(device_attr.device_cap_flags_ex); print_tso_caps(&device_attr.tso_caps); print_rss_caps(&device_attr.rss_caps); printf("\tmax_wq_type_rq:\t\t\t%u\n", device_attr.max_wq_type_rq); print_packet_pacing_caps(&device_attr.packet_pacing_caps); } for (port = 1; port <= device_attr.orig_attr.phys_port_cnt; ++port) { /* if in the command line the user didn't ask for info about this port */ if ((ib_port) && (port != ib_port)) continue; rc = ibv_query_port(ctx, port, &port_attr); if (rc) { fprintf(stderr, "Failed to query port %u props\n", port); goto cleanup; } printf("\t\tport:\t%d\n", port); printf("\t\t\tstate:\t\t\t%s (%d)\n", port_state_str(port_attr.state), port_attr.state); printf("\t\t\tmax_mtu:\t\t%s (%d)\n", mtu_str(port_attr.max_mtu), port_attr.max_mtu); printf("\t\t\tactive_mtu:\t\t%s (%d)\n", mtu_str(port_attr.active_mtu), port_attr.active_mtu); printf("\t\t\tsm_lid:\t\t\t%d\n", port_attr.sm_lid); printf("\t\t\tport_lid:\t\t%d\n", port_attr.lid); printf("\t\t\tport_lmc:\t\t0x%02x\n", port_attr.lmc); printf("\t\t\tlink_layer:\t\t%s\n", link_layer_str(port_attr.link_layer)); if (verbose) { printf("\t\t\tmax_msg_sz:\t\t0x%x\n", port_attr.max_msg_sz); printf("\t\t\tport_cap_flags:\t\t0x%08x\n", port_attr.port_cap_flags); printf("\t\t\tmax_vl_num:\t\t%s (%d)\n", vl_str(port_attr.max_vl_num), port_attr.max_vl_num); printf("\t\t\tbad_pkey_cntr:\t\t0x%x\n", port_attr.bad_pkey_cntr); printf("\t\t\tqkey_viol_cntr:\t\t0x%x\n", port_attr.qkey_viol_cntr); printf("\t\t\tsm_sl:\t\t\t%d\n", port_attr.sm_sl); printf("\t\t\tpkey_tbl_len:\t\t%d\n", port_attr.pkey_tbl_len); printf("\t\t\tgid_tbl_len:\t\t%d\n", port_attr.gid_tbl_len); printf("\t\t\tsubnet_timeout:\t\t%d\n", port_attr.subnet_timeout); printf("\t\t\tinit_type_reply:\t%d\n", port_attr.init_type_reply); printf("\t\t\tactive_width:\t\t%sX (%d)\n", width_str(port_attr.active_width), port_attr.active_width); printf("\t\t\tactive_speed:\t\t%s (%d)\n", speed_str(port_attr.active_speed), port_attr.active_speed); if (ib_dev->transport_type == IBV_TRANSPORT_IB) printf("\t\t\tphys_state:\t\t%s (%d)\n", port_phy_state_str(port_attr.phys_state), port_attr.phys_state); if (print_all_port_gids(ctx, port, port_attr.gid_tbl_len)) goto cleanup; } printf("\n"); } cleanup: if (ctx) if (ibv_close_device(ctx)) { fprintf(stderr, "Failed to close device"); rc = 3; } return rc; } static void usage(const char *argv0) { printf("Usage: %s print the ca attributes\n", argv0); printf("\n"); printf("Options:\n"); printf(" -d, --ib-dev= use IB device (default first device found)\n"); printf(" -i, --ib-port= use port of IB device (default all ports)\n"); printf(" -l, --list print only the IB devices names\n"); printf(" -v, --verbose print all the attributes of the IB device(s)\n"); } int main(int argc, char *argv[]) { char *ib_devname = NULL; int ret = 0; struct ibv_device **dev_list, **orig_dev_list; int num_of_hcas; int ib_port = 0; /* parse command line options */ while (1) { int c; static struct option long_options[] = { { .name = "ib-dev", .has_arg = 1, .val = 'd' }, { .name = "ib-port", .has_arg = 1, .val = 'i' }, { .name = "list", .has_arg = 0, .val = 'l' }, { .name = "verbose", .has_arg = 0, .val = 'v' }, { } }; c = getopt_long(argc, argv, "d:i:lv", long_options, NULL); if (c == -1) break; switch (c) { case 'd': ib_devname = strdup(optarg); break; case 'i': ib_port = strtol(optarg, NULL, 0); if (ib_port <= 0) { usage(argv[0]); return 1; } break; case 'v': verbose = 1; break; case 'l': dev_list = orig_dev_list = ibv_get_device_list(&num_of_hcas); if (!dev_list) { perror("Failed to get IB devices list"); return -1; } printf("%d HCA%s found:\n", num_of_hcas, num_of_hcas != 1 ? "s" : ""); while (*dev_list) { printf("\t%s\n", ibv_get_device_name(*dev_list)); ++dev_list; } printf("\n"); ibv_free_device_list(orig_dev_list); return 0; default: usage(argv[0]); return -1; } } dev_list = orig_dev_list = ibv_get_device_list(NULL); if (!dev_list) { perror("Failed to get IB devices list"); return -1; } if (ib_devname) { while (*dev_list) { if (!strcmp(ibv_get_device_name(*dev_list), ib_devname)) break; ++dev_list; } if (!*dev_list) { fprintf(stderr, "IB device '%s' wasn't found\n", ib_devname); return -1; } ret |= print_hca_cap(*dev_list, ib_port); } else { if (!*dev_list) { fprintf(stderr, "No IB devices found\n"); return -1; } while (*dev_list) { ret |= print_hca_cap(*dev_list, ib_port); ++dev_list; } } if (ib_devname) free(ib_devname); ibv_free_device_list(orig_dev_list); return ret; } diff --git a/contrib/ofed/libibverbs/verbs.c b/contrib/ofed/libibverbs/verbs.c index aec8706fd0cc..9c7e3b403f92 100644 --- a/contrib/ofed/libibverbs/verbs.c +++ b/contrib/ofed/libibverbs/verbs.c @@ -1,1037 +1,1045 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include "ibverbs.h" #ifndef NRESOLVE_NEIGH #include #include #include "neigh.h" #endif /* Hack to avoid GCC's -Wmissing-prototypes and the similar error from sparse with these prototypes. Symbol versionining requires the goofy names, the prototype must match the version in verbs.h. */ int __ibv_query_device(struct ibv_context *context, struct ibv_device_attr *device_attr); int __ibv_query_port(struct ibv_context *context, uint8_t port_num, struct ibv_port_attr *port_attr); int __ibv_query_gid(struct ibv_context *context, uint8_t port_num, int index, union ibv_gid *gid); int __ibv_query_pkey(struct ibv_context *context, uint8_t port_num, int index, __be16 *pkey); struct ibv_pd *__ibv_alloc_pd(struct ibv_context *context); int __ibv_dealloc_pd(struct ibv_pd *pd); struct ibv_mr *__ibv_reg_mr(struct ibv_pd *pd, void *addr, size_t length, int access); int __ibv_rereg_mr(struct ibv_mr *mr, int flags, struct ibv_pd *pd, void *addr, size_t length, int access); int __ibv_dereg_mr(struct ibv_mr *mr); struct ibv_cq *__ibv_create_cq(struct ibv_context *context, int cqe, void *cq_context, struct ibv_comp_channel *channel, int comp_vector); int __ibv_resize_cq(struct ibv_cq *cq, int cqe); int __ibv_destroy_cq(struct ibv_cq *cq); int __ibv_get_cq_event(struct ibv_comp_channel *channel, struct ibv_cq **cq, void **cq_context); void __ibv_ack_cq_events(struct ibv_cq *cq, unsigned int nevents); struct ibv_srq *__ibv_create_srq(struct ibv_pd *pd, struct ibv_srq_init_attr *srq_init_attr); int __ibv_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr, int srq_attr_mask); int __ibv_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr); int __ibv_destroy_srq(struct ibv_srq *srq); struct ibv_qp *__ibv_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr); int __ibv_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, struct ibv_qp_init_attr *init_attr); int __ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask); int __ibv_destroy_qp(struct ibv_qp *qp); struct ibv_ah *__ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr); int __ibv_destroy_ah(struct ibv_ah *ah); int __ibv_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); int __ibv_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); int __attribute__((const)) ibv_rate_to_mult(enum ibv_rate rate) { switch (rate) { case IBV_RATE_2_5_GBPS: return 1; case IBV_RATE_5_GBPS: return 2; case IBV_RATE_10_GBPS: return 4; case IBV_RATE_20_GBPS: return 8; case IBV_RATE_30_GBPS: return 12; case IBV_RATE_40_GBPS: return 16; case IBV_RATE_60_GBPS: return 24; case IBV_RATE_80_GBPS: return 32; case IBV_RATE_120_GBPS: return 48; case IBV_RATE_28_GBPS: return 11; case IBV_RATE_50_GBPS: return 20; case IBV_RATE_400_GBPS: return 160; case IBV_RATE_600_GBPS: return 240; + case IBV_RATE_800_GBPS: return 320; + case IBV_RATE_1200_GBPS: return 480; default: return -1; } } enum ibv_rate __attribute__((const)) mult_to_ibv_rate(int mult) { switch (mult) { case 1: return IBV_RATE_2_5_GBPS; case 2: return IBV_RATE_5_GBPS; case 4: return IBV_RATE_10_GBPS; case 8: return IBV_RATE_20_GBPS; case 12: return IBV_RATE_30_GBPS; case 16: return IBV_RATE_40_GBPS; case 24: return IBV_RATE_60_GBPS; case 32: return IBV_RATE_80_GBPS; case 48: return IBV_RATE_120_GBPS; case 11: return IBV_RATE_28_GBPS; case 20: return IBV_RATE_50_GBPS; case 160: return IBV_RATE_400_GBPS; case 240: return IBV_RATE_600_GBPS; + case 320: return IBV_RATE_800_GBPS; + case 480: return IBV_RATE_1200_GBPS; default: return IBV_RATE_MAX; } } int __attribute__((const)) ibv_rate_to_mbps(enum ibv_rate rate) { switch (rate) { case IBV_RATE_2_5_GBPS: return 2500; case IBV_RATE_5_GBPS: return 5000; case IBV_RATE_10_GBPS: return 10000; case IBV_RATE_20_GBPS: return 20000; case IBV_RATE_30_GBPS: return 30000; case IBV_RATE_40_GBPS: return 40000; case IBV_RATE_60_GBPS: return 60000; case IBV_RATE_80_GBPS: return 80000; case IBV_RATE_120_GBPS: return 120000; case IBV_RATE_14_GBPS: return 14062; case IBV_RATE_56_GBPS: return 56250; case IBV_RATE_112_GBPS: return 112500; case IBV_RATE_168_GBPS: return 168750; case IBV_RATE_25_GBPS: return 25781; case IBV_RATE_100_GBPS: return 103125; case IBV_RATE_200_GBPS: return 206250; case IBV_RATE_300_GBPS: return 309375; case IBV_RATE_28_GBPS: return 28125; case IBV_RATE_50_GBPS: return 53125; case IBV_RATE_400_GBPS: return 425000; case IBV_RATE_600_GBPS: return 637500; + case IBV_RATE_800_GBPS: return 850000; + case IBV_RATE_1200_GBPS: return 1275000; default: return -1; } } enum ibv_rate __attribute__((const)) mbps_to_ibv_rate(int mbps) { switch (mbps) { case 2500: return IBV_RATE_2_5_GBPS; case 5000: return IBV_RATE_5_GBPS; case 10000: return IBV_RATE_10_GBPS; case 20000: return IBV_RATE_20_GBPS; case 30000: return IBV_RATE_30_GBPS; case 40000: return IBV_RATE_40_GBPS; case 60000: return IBV_RATE_60_GBPS; case 80000: return IBV_RATE_80_GBPS; case 120000: return IBV_RATE_120_GBPS; case 14062: return IBV_RATE_14_GBPS; case 56250: return IBV_RATE_56_GBPS; case 112500: return IBV_RATE_112_GBPS; case 168750: return IBV_RATE_168_GBPS; case 25781: return IBV_RATE_25_GBPS; case 103125: return IBV_RATE_100_GBPS; case 206250: return IBV_RATE_200_GBPS; case 309375: return IBV_RATE_300_GBPS; case 28125: return IBV_RATE_28_GBPS; case 53125: return IBV_RATE_50_GBPS; case 425000: return IBV_RATE_400_GBPS; case 637500: return IBV_RATE_600_GBPS; + case 850000: return IBV_RATE_800_GBPS; + case 1275000: return IBV_RATE_1200_GBPS; default: return IBV_RATE_MAX; } } int __ibv_query_device(struct ibv_context *context, struct ibv_device_attr *device_attr) { return context->ops.query_device(context, device_attr); } default_symver(__ibv_query_device, ibv_query_device); int __ibv_query_port(struct ibv_context *context, uint8_t port_num, struct ibv_port_attr *port_attr) { return context->ops.query_port(context, port_num, port_attr); } default_symver(__ibv_query_port, ibv_query_port); int __ibv_query_gid(struct ibv_context *context, uint8_t port_num, int index, union ibv_gid *gid) { char name[24]; char attr[41]; uint16_t val; int i; snprintf(name, sizeof name, "ports/%d/gids/%d", port_num, index); if (ibv_read_sysfs_file(context->device->ibdev_path, name, attr, sizeof attr) < 0) return -1; for (i = 0; i < 8; ++i) { if (sscanf(attr + i * 5, "%hx", &val) != 1) return -1; gid->raw[i * 2 ] = val >> 8; gid->raw[i * 2 + 1] = val & 0xff; } return 0; } default_symver(__ibv_query_gid, ibv_query_gid); int __ibv_query_pkey(struct ibv_context *context, uint8_t port_num, int index, __be16 *pkey) { char name[24]; char attr[8]; uint16_t val; snprintf(name, sizeof name, "ports/%d/pkeys/%d", port_num, index); if (ibv_read_sysfs_file(context->device->ibdev_path, name, attr, sizeof attr) < 0) return -1; if (sscanf(attr, "%hx", &val) != 1) return -1; *pkey = htobe16(val); return 0; } default_symver(__ibv_query_pkey, ibv_query_pkey); struct ibv_pd *__ibv_alloc_pd(struct ibv_context *context) { struct ibv_pd *pd; pd = context->ops.alloc_pd(context); if (pd) pd->context = context; return pd; } default_symver(__ibv_alloc_pd, ibv_alloc_pd); int __ibv_dealloc_pd(struct ibv_pd *pd) { return pd->context->ops.dealloc_pd(pd); } default_symver(__ibv_dealloc_pd, ibv_dealloc_pd); struct ibv_mr *__ibv_reg_mr(struct ibv_pd *pd, void *addr, size_t length, int access) { struct ibv_mr *mr; if (ibv_dontfork_range(addr, length)) return NULL; mr = pd->context->ops.reg_mr(pd, addr, length, access); if (mr) { mr->context = pd->context; mr->pd = pd; mr->addr = addr; mr->length = length; } else ibv_dofork_range(addr, length); return mr; } default_symver(__ibv_reg_mr, ibv_reg_mr); int __ibv_rereg_mr(struct ibv_mr *mr, int flags, struct ibv_pd *pd, void *addr, size_t length, int access) { int dofork_onfail = 0; int err; void *old_addr; size_t old_len; if (flags & ~IBV_REREG_MR_FLAGS_SUPPORTED) { errno = EINVAL; return IBV_REREG_MR_ERR_INPUT; } if ((flags & IBV_REREG_MR_CHANGE_TRANSLATION) && (!length || !addr)) { errno = EINVAL; return IBV_REREG_MR_ERR_INPUT; } if (access && !(flags & IBV_REREG_MR_CHANGE_ACCESS)) { errno = EINVAL; return IBV_REREG_MR_ERR_INPUT; } if (!mr->context->ops.rereg_mr) { errno = ENOSYS; return IBV_REREG_MR_ERR_INPUT; } if (flags & IBV_REREG_MR_CHANGE_TRANSLATION) { err = ibv_dontfork_range(addr, length); if (err) return IBV_REREG_MR_ERR_DONT_FORK_NEW; dofork_onfail = 1; } old_addr = mr->addr; old_len = mr->length; err = mr->context->ops.rereg_mr(mr, flags, pd, addr, length, access); if (!err) { if (flags & IBV_REREG_MR_CHANGE_PD) mr->pd = pd; if (flags & IBV_REREG_MR_CHANGE_TRANSLATION) { mr->addr = addr; mr->length = length; err = ibv_dofork_range(old_addr, old_len); if (err) return IBV_REREG_MR_ERR_DO_FORK_OLD; } } else { err = IBV_REREG_MR_ERR_CMD; if (dofork_onfail) { if (ibv_dofork_range(addr, length)) err = IBV_REREG_MR_ERR_CMD_AND_DO_FORK_NEW; } } return err; } default_symver(__ibv_rereg_mr, ibv_rereg_mr); int __ibv_dereg_mr(struct ibv_mr *mr) { int ret; void *addr = mr->addr; size_t length = mr->length; ret = mr->context->ops.dereg_mr(mr); if (!ret) ibv_dofork_range(addr, length); return ret; } default_symver(__ibv_dereg_mr, ibv_dereg_mr); static struct ibv_comp_channel *ibv_create_comp_channel_v2(struct ibv_context *context) { struct ibv_abi_compat_v2 *t = context->abi_compat; static int warned; if (!pthread_mutex_trylock(&t->in_use)) return &t->channel; if (!warned) { fprintf(stderr, PFX "Warning: kernel's ABI version %d limits capacity.\n" " Only one completion channel can be created per context.\n", abi_ver); ++warned; } return NULL; } struct ibv_comp_channel *ibv_create_comp_channel(struct ibv_context *context) { struct ibv_comp_channel *channel; struct ibv_create_comp_channel cmd; struct ibv_create_comp_channel_resp resp; if (abi_ver <= 2) return ibv_create_comp_channel_v2(context); channel = malloc(sizeof *channel); if (!channel) return NULL; IBV_INIT_CMD_RESP(&cmd, sizeof cmd, CREATE_COMP_CHANNEL, &resp, sizeof resp); if (write(context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) { free(channel); return NULL; } (void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp); channel->context = context; channel->fd = resp.fd; channel->refcnt = 0; return channel; } static int ibv_destroy_comp_channel_v2(struct ibv_comp_channel *channel) { struct ibv_abi_compat_v2 *t = (struct ibv_abi_compat_v2 *) channel; pthread_mutex_unlock(&t->in_use); return 0; } int ibv_destroy_comp_channel(struct ibv_comp_channel *channel) { struct ibv_context *context; int ret; context = channel->context; pthread_mutex_lock(&context->mutex); if (channel->refcnt) { ret = EBUSY; goto out; } if (abi_ver <= 2) { ret = ibv_destroy_comp_channel_v2(channel); goto out; } close(channel->fd); free(channel); ret = 0; out: pthread_mutex_unlock(&context->mutex); return ret; } struct ibv_cq *__ibv_create_cq(struct ibv_context *context, int cqe, void *cq_context, struct ibv_comp_channel *channel, int comp_vector) { struct ibv_cq *cq; cq = context->ops.create_cq(context, cqe, channel, comp_vector); if (cq) verbs_init_cq(cq, context, channel, cq_context); return cq; } default_symver(__ibv_create_cq, ibv_create_cq); int __ibv_resize_cq(struct ibv_cq *cq, int cqe) { if (!cq->context->ops.resize_cq) return ENOSYS; return cq->context->ops.resize_cq(cq, cqe); } default_symver(__ibv_resize_cq, ibv_resize_cq); int __ibv_destroy_cq(struct ibv_cq *cq) { struct ibv_comp_channel *channel = cq->channel; int ret; ret = cq->context->ops.destroy_cq(cq); if (channel) { if (!ret) { pthread_mutex_lock(&channel->context->mutex); --channel->refcnt; pthread_mutex_unlock(&channel->context->mutex); } } return ret; } default_symver(__ibv_destroy_cq, ibv_destroy_cq); int __ibv_get_cq_event(struct ibv_comp_channel *channel, struct ibv_cq **cq, void **cq_context) { struct ibv_comp_event ev; if (read(channel->fd, &ev, sizeof ev) != sizeof ev) return -1; *cq = (struct ibv_cq *) (uintptr_t) ev.cq_handle; *cq_context = (*cq)->cq_context; if ((*cq)->context->ops.cq_event) (*cq)->context->ops.cq_event(*cq); return 0; } default_symver(__ibv_get_cq_event, ibv_get_cq_event); void __ibv_ack_cq_events(struct ibv_cq *cq, unsigned int nevents) { pthread_mutex_lock(&cq->mutex); cq->comp_events_completed += nevents; pthread_cond_signal(&cq->cond); pthread_mutex_unlock(&cq->mutex); } default_symver(__ibv_ack_cq_events, ibv_ack_cq_events); struct ibv_srq *__ibv_create_srq(struct ibv_pd *pd, struct ibv_srq_init_attr *srq_init_attr) { struct ibv_srq *srq; if (!pd->context->ops.create_srq) return NULL; srq = pd->context->ops.create_srq(pd, srq_init_attr); if (srq) { srq->context = pd->context; srq->srq_context = srq_init_attr->srq_context; srq->pd = pd; srq->events_completed = 0; pthread_mutex_init(&srq->mutex, NULL); pthread_cond_init(&srq->cond, NULL); } return srq; } default_symver(__ibv_create_srq, ibv_create_srq); int __ibv_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr, int srq_attr_mask) { return srq->context->ops.modify_srq(srq, srq_attr, srq_attr_mask); } default_symver(__ibv_modify_srq, ibv_modify_srq); int __ibv_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr) { return srq->context->ops.query_srq(srq, srq_attr); } default_symver(__ibv_query_srq, ibv_query_srq); int __ibv_destroy_srq(struct ibv_srq *srq) { return srq->context->ops.destroy_srq(srq); } default_symver(__ibv_destroy_srq, ibv_destroy_srq); struct ibv_qp *__ibv_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr) { struct ibv_qp *qp = pd->context->ops.create_qp(pd, qp_init_attr); if (qp) { qp->context = pd->context; qp->qp_context = qp_init_attr->qp_context; qp->pd = pd; qp->send_cq = qp_init_attr->send_cq; qp->recv_cq = qp_init_attr->recv_cq; qp->srq = qp_init_attr->srq; qp->qp_type = qp_init_attr->qp_type; qp->state = IBV_QPS_RESET; qp->events_completed = 0; pthread_mutex_init(&qp->mutex, NULL); pthread_cond_init(&qp->cond, NULL); } return qp; } default_symver(__ibv_create_qp, ibv_create_qp); int __ibv_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, struct ibv_qp_init_attr *init_attr) { int ret; ret = qp->context->ops.query_qp(qp, attr, attr_mask, init_attr); if (ret) return ret; if (attr_mask & IBV_QP_STATE) qp->state = attr->qp_state; return 0; } default_symver(__ibv_query_qp, ibv_query_qp); int __ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) { int ret; ret = qp->context->ops.modify_qp(qp, attr, attr_mask); if (ret) return ret; if (attr_mask & IBV_QP_STATE) qp->state = attr->qp_state; return 0; } default_symver(__ibv_modify_qp, ibv_modify_qp); int __ibv_destroy_qp(struct ibv_qp *qp) { return qp->context->ops.destroy_qp(qp); } default_symver(__ibv_destroy_qp, ibv_destroy_qp); struct ibv_ah *__ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) { struct ibv_ah *ah = pd->context->ops.create_ah(pd, attr); if (ah) { ah->context = pd->context; ah->pd = pd; } return ah; } default_symver(__ibv_create_ah, ibv_create_ah); /* GID types as appear in sysfs, no change is expected as of ABI * compatibility. */ #define V1_TYPE "IB/RoCE v1" #define V2_TYPE "RoCE v2" int ibv_query_gid_type(struct ibv_context *context, uint8_t port_num, unsigned int index, enum ibv_gid_type *type) { char name[32]; char buff[11]; snprintf(name, sizeof(name), "ports/%d/gid_attrs/types/%d", port_num, index); /* Reset errno so that we can rely on its value upon any error flow in * ibv_read_sysfs_file. */ errno = 0; if (ibv_read_sysfs_file(context->device->ibdev_path, name, buff, sizeof(buff)) <= 0) { char *dir_path; DIR *dir; if (errno == EINVAL) { /* In IB, this file doesn't exist and the kernel sets * errno to -EINVAL. */ *type = IBV_GID_TYPE_IB_ROCE_V1; return 0; } if (asprintf(&dir_path, "%s/%s/%d/%s/", context->device->ibdev_path, "ports", port_num, "gid_attrs") < 0) return -1; dir = opendir(dir_path); free(dir_path); if (!dir) { if (errno == ENOENT) /* Assuming that if gid_attrs doesn't exist, * we have an old kernel and all GIDs are * IB/RoCE v1 */ *type = IBV_GID_TYPE_IB_ROCE_V1; else return -1; } else { closedir(dir); errno = EFAULT; return -1; } } else { if (!strcmp(buff, V1_TYPE)) { *type = IBV_GID_TYPE_IB_ROCE_V1; } else if (!strcmp(buff, V2_TYPE)) { *type = IBV_GID_TYPE_ROCE_V2; } else { errno = ENOTSUP; return -1; } } return 0; } static int ibv_find_gid_index(struct ibv_context *context, uint8_t port_num, union ibv_gid *gid, enum ibv_gid_type gid_type) { enum ibv_gid_type sgid_type = 0; union ibv_gid sgid; int i = 0, ret; do { ret = ibv_query_gid(context, port_num, i, &sgid); if (!ret) { ret = ibv_query_gid_type(context, port_num, i, &sgid_type); } i++; } while (!ret && (memcmp(&sgid, gid, sizeof(*gid)) || (gid_type != sgid_type))); return ret ? ret : i - 1; } static inline void map_ipv4_addr_to_ipv6(__be32 ipv4, struct in6_addr *ipv6) { ipv6->s6_addr32[0] = 0; ipv6->s6_addr32[1] = 0; ipv6->s6_addr32[2] = htobe32(0x0000FFFF); ipv6->s6_addr32[3] = ipv4; } static inline __sum16 ipv4_calc_hdr_csum(uint16_t *data, unsigned int num_hwords) { unsigned int i = 0; uint32_t sum = 0; for (i = 0; i < num_hwords; i++) sum += *(data++); sum = (sum & 0xffff) + (sum >> 16); return (__sum16)~sum; } static inline int get_grh_header_version(struct ibv_grh *grh) { int ip6h_version = (be32toh(grh->version_tclass_flow) >> 28) & 0xf; struct ip *ip4h = (struct ip *)((void *)grh + 20); struct ip ip4h_checked; if (ip6h_version != 6) { if (ip4h->ip_v == 4) return 4; errno = EPROTONOSUPPORT; return -1; } /* version may be 6 or 4 */ if (ip4h->ip_hl != 5) /* IPv4 header length must be 5 for RoCE v2. */ return 6; /* * Verify checksum. * We can't write on scattered buffers so we have to copy to temp * buffer. */ memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked)); /* Need to set the checksum field (check) to 0 before re-calculating * the checksum. */ ip4h_checked.ip_sum = 0; ip4h_checked.ip_sum = ipv4_calc_hdr_csum((uint16_t *)&ip4h_checked, 10); /* if IPv4 header checksum is OK, believe it */ if (ip4h->ip_sum == ip4h_checked.ip_sum) return 4; return 6; } static inline void set_ah_attr_generic_fields(struct ibv_ah_attr *ah_attr, struct ibv_wc *wc, struct ibv_grh *grh, uint8_t port_num) { uint32_t flow_class; flow_class = be32toh(grh->version_tclass_flow); ah_attr->grh.flow_label = flow_class & 0xFFFFF; ah_attr->dlid = wc->slid; ah_attr->sl = wc->sl; ah_attr->src_path_bits = wc->dlid_path_bits; ah_attr->port_num = port_num; } static inline int set_ah_attr_by_ipv4(struct ibv_context *context, struct ibv_ah_attr *ah_attr, struct ip *ip4h, uint8_t port_num) { union ibv_gid sgid; int ret; /* No point searching multicast GIDs in GID table */ if (IN_CLASSD(be32toh(ip4h->ip_dst.s_addr))) { errno = EINVAL; return -1; } map_ipv4_addr_to_ipv6(ip4h->ip_dst.s_addr, (struct in6_addr *)&sgid); ret = ibv_find_gid_index(context, port_num, &sgid, IBV_GID_TYPE_ROCE_V2); if (ret < 0) return ret; map_ipv4_addr_to_ipv6(ip4h->ip_src.s_addr, (struct in6_addr *)&ah_attr->grh.dgid); ah_attr->grh.sgid_index = (uint8_t) ret; ah_attr->grh.hop_limit = ip4h->ip_ttl; ah_attr->grh.traffic_class = ip4h->ip_tos; return 0; } #define IB_NEXT_HDR 0x1b static inline int set_ah_attr_by_ipv6(struct ibv_context *context, struct ibv_ah_attr *ah_attr, struct ibv_grh *grh, uint8_t port_num) { uint32_t flow_class; uint32_t sgid_type; int ret; /* No point searching multicast GIDs in GID table */ if (grh->dgid.raw[0] == 0xFF) { errno = EINVAL; return -1; } ah_attr->grh.dgid = grh->sgid; if (grh->next_hdr == IPPROTO_UDP) { sgid_type = IBV_GID_TYPE_ROCE_V2; } else if (grh->next_hdr == IB_NEXT_HDR) { sgid_type = IBV_GID_TYPE_IB_ROCE_V1; } else { errno = EPROTONOSUPPORT; return -1; } ret = ibv_find_gid_index(context, port_num, &grh->dgid, sgid_type); if (ret < 0) return ret; ah_attr->grh.sgid_index = (uint8_t) ret; flow_class = be32toh(grh->version_tclass_flow); ah_attr->grh.hop_limit = grh->hop_limit; ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF; return 0; } int ibv_init_ah_from_wc(struct ibv_context *context, uint8_t port_num, struct ibv_wc *wc, struct ibv_grh *grh, struct ibv_ah_attr *ah_attr) { int version; int ret = 0; memset(ah_attr, 0, sizeof *ah_attr); set_ah_attr_generic_fields(ah_attr, wc, grh, port_num); if (wc->wc_flags & IBV_WC_GRH) { ah_attr->is_global = 1; version = get_grh_header_version(grh); if (version == 4) ret = set_ah_attr_by_ipv4(context, ah_attr, (struct ip *)((void *)grh + 20), port_num); else if (version == 6) ret = set_ah_attr_by_ipv6(context, ah_attr, grh, port_num); else ret = -1; } return ret; } struct ibv_ah *ibv_create_ah_from_wc(struct ibv_pd *pd, struct ibv_wc *wc, struct ibv_grh *grh, uint8_t port_num) { struct ibv_ah_attr ah_attr; int ret; ret = ibv_init_ah_from_wc(pd->context, port_num, wc, grh, &ah_attr); if (ret) return NULL; return ibv_create_ah(pd, &ah_attr); } int __ibv_destroy_ah(struct ibv_ah *ah) { return ah->context->ops.destroy_ah(ah); } default_symver(__ibv_destroy_ah, ibv_destroy_ah); int __ibv_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid) { return qp->context->ops.attach_mcast(qp, gid, lid); } default_symver(__ibv_attach_mcast, ibv_attach_mcast); int __ibv_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid) { return qp->context->ops.detach_mcast(qp, gid, lid); } default_symver(__ibv_detach_mcast, ibv_detach_mcast); static inline int ipv6_addr_v4mapped(const struct in6_addr *a) { return IN6_IS_ADDR_V4MAPPED(a) || /* IPv4 encoded multicast addresses */ (a->s6_addr32[0] == htobe32(0xff0e0000) && ((a->s6_addr32[1] | (a->s6_addr32[2] ^ htobe32(0x0000ffff))) == 0UL)); } struct peer_address { void *address; uint32_t size; }; static inline int create_peer_from_gid(int family, void *raw_gid, struct peer_address *peer_address) { switch (family) { case AF_INET: peer_address->address = raw_gid + 12; peer_address->size = 4; break; case AF_INET6: peer_address->address = raw_gid; peer_address->size = 16; break; default: return -1; } return 0; } #define NEIGH_GET_DEFAULT_TIMEOUT_MS 3000 int ibv_resolve_eth_l2_from_gid(struct ibv_context *context, struct ibv_ah_attr *attr, uint8_t eth_mac[ETHERNET_LL_SIZE], uint16_t *vid) { #ifndef NRESOLVE_NEIGH int dst_family; int src_family; int oif; struct get_neigh_handler neigh_handler; union ibv_gid sgid; int ether_len; struct peer_address src; struct peer_address dst; uint16_t ret_vid; int ret = -EINVAL; int err; err = ibv_query_gid(context, attr->port_num, attr->grh.sgid_index, &sgid); if (err) return err; err = neigh_init_resources(&neigh_handler, NEIGH_GET_DEFAULT_TIMEOUT_MS); if (err) return err; dst_family = ipv6_addr_v4mapped((struct in6_addr *)attr->grh.dgid.raw) ? AF_INET : AF_INET6; src_family = ipv6_addr_v4mapped((struct in6_addr *)sgid.raw) ? AF_INET : AF_INET6; if (create_peer_from_gid(dst_family, attr->grh.dgid.raw, &dst)) goto free_resources; if (create_peer_from_gid(src_family, &sgid.raw, &src)) goto free_resources; if (neigh_set_dst(&neigh_handler, dst_family, dst.address, dst.size)) goto free_resources; if (neigh_set_src(&neigh_handler, src_family, src.address, src.size)) goto free_resources; oif = neigh_get_oif_from_src(&neigh_handler); if (oif > 0) neigh_set_oif(&neigh_handler, oif); else goto free_resources; ret = -EHOSTUNREACH; /* blocking call */ if (process_get_neigh(&neigh_handler)) goto free_resources; ret_vid = neigh_get_vlan_id_from_dev(&neigh_handler); if (ret_vid <= 0xfff) neigh_set_vlan_id(&neigh_handler, ret_vid); /* We are using only Ethernet here */ ether_len = neigh_get_ll(&neigh_handler, eth_mac, sizeof(uint8_t) * ETHERNET_LL_SIZE); if (ether_len <= 0) goto free_resources; *vid = ret_vid; ret = 0; free_resources: neigh_free_resources(&neigh_handler); return ret; #else return -ENOSYS; #endif } diff --git a/contrib/ofed/libibverbs/verbs.h b/contrib/ofed/libibverbs/verbs.h index 498275561280..8a76dfdf7fd5 100644 --- a/contrib/ofed/libibverbs/verbs.h +++ b/contrib/ofed/libibverbs/verbs.h @@ -1,2380 +1,2382 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2004, 2011-2012 Intel Corporation. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef INFINIBAND_VERBS_H #define INFINIBAND_VERBS_H #include #include #include #include #include #include #ifdef __cplusplus # define BEGIN_C_DECLS extern "C" { # define END_C_DECLS } #else /* !__cplusplus */ # define BEGIN_C_DECLS # define END_C_DECLS #endif /* __cplusplus */ #if __GNUC__ >= 3 # define __attribute_const __attribute__((const)) #else # define __attribute_const #endif BEGIN_C_DECLS union ibv_gid { uint8_t raw[16]; struct { __be64 subnet_prefix; __be64 interface_id; } global; }; #ifndef container_of /** * container_of - cast a member of a structure out to the containing structure * @ptr: the pointer to the member. * @type: the type of the container struct this is embedded in. * @member: the name of the member within the struct. * */ #define container_of(ptr, type, member) \ ((type *) ((uint8_t *)(ptr) - offsetof(type, member))) #endif #define vext_field_avail(type, fld, sz) (offsetof(type, fld) < (sz)) static void *__VERBS_ABI_IS_EXTENDED = ((uint8_t *) NULL) - 1; enum ibv_node_type { IBV_NODE_UNKNOWN = -1, IBV_NODE_CA = 1, IBV_NODE_SWITCH, IBV_NODE_ROUTER, IBV_NODE_RNIC, IBV_NODE_USNIC, IBV_NODE_USNIC_UDP, }; enum ibv_transport_type { IBV_TRANSPORT_UNKNOWN = -1, IBV_TRANSPORT_IB = 0, IBV_TRANSPORT_IWARP, IBV_TRANSPORT_USNIC, IBV_TRANSPORT_USNIC_UDP, }; enum ibv_device_cap_flags { IBV_DEVICE_RESIZE_MAX_WR = 1, IBV_DEVICE_BAD_PKEY_CNTR = 1 << 1, IBV_DEVICE_BAD_QKEY_CNTR = 1 << 2, IBV_DEVICE_RAW_MULTI = 1 << 3, IBV_DEVICE_AUTO_PATH_MIG = 1 << 4, IBV_DEVICE_CHANGE_PHY_PORT = 1 << 5, IBV_DEVICE_UD_AV_PORT_ENFORCE = 1 << 6, IBV_DEVICE_CURR_QP_STATE_MOD = 1 << 7, IBV_DEVICE_SHUTDOWN_PORT = 1 << 8, IBV_DEVICE_INIT_TYPE = 1 << 9, IBV_DEVICE_PORT_ACTIVE_EVENT = 1 << 10, IBV_DEVICE_SYS_IMAGE_GUID = 1 << 11, IBV_DEVICE_RC_RNR_NAK_GEN = 1 << 12, IBV_DEVICE_SRQ_RESIZE = 1 << 13, IBV_DEVICE_N_NOTIFY_CQ = 1 << 14, IBV_DEVICE_MEM_WINDOW = 1 << 17, IBV_DEVICE_UD_IP_CSUM = 1 << 18, IBV_DEVICE_XRC = 1 << 20, IBV_DEVICE_MEM_MGT_EXTENSIONS = 1 << 21, IBV_DEVICE_MEM_WINDOW_TYPE_2A = 1 << 23, IBV_DEVICE_MEM_WINDOW_TYPE_2B = 1 << 24, IBV_DEVICE_RC_IP_CSUM = 1 << 25, IBV_DEVICE_RAW_IP_CSUM = 1 << 26, IBV_DEVICE_MANAGED_FLOW_STEERING = 1 << 29 }; /* * Can't extended above ibv_device_cap_flags enum as in some systems/compilers * enum range is limited to 4 bytes. */ #define IBV_DEVICE_RAW_SCATTER_FCS (1ULL << 34) enum ibv_atomic_cap { IBV_ATOMIC_NONE, IBV_ATOMIC_HCA, IBV_ATOMIC_GLOB }; struct ibv_device_attr { char fw_ver[64]; __be64 node_guid; __be64 sys_image_guid; uint64_t max_mr_size; uint64_t page_size_cap; uint32_t vendor_id; uint32_t vendor_part_id; uint32_t hw_ver; int max_qp; int max_qp_wr; int device_cap_flags; int max_sge; int max_sge_rd; int max_cq; int max_cqe; int max_mr; int max_pd; int max_qp_rd_atom; int max_ee_rd_atom; int max_res_rd_atom; int max_qp_init_rd_atom; int max_ee_init_rd_atom; enum ibv_atomic_cap atomic_cap; int max_ee; int max_rdd; int max_mw; int max_raw_ipv6_qp; int max_raw_ethy_qp; int max_mcast_grp; int max_mcast_qp_attach; int max_total_mcast_qp_attach; int max_ah; int max_fmr; int max_map_per_fmr; int max_srq; int max_srq_wr; int max_srq_sge; uint16_t max_pkeys; uint8_t local_ca_ack_delay; uint8_t phys_port_cnt; }; /* An extensible input struct for possible future extensions of the * ibv_query_device_ex verb. */ struct ibv_query_device_ex_input { uint32_t comp_mask; }; enum ibv_odp_transport_cap_bits { IBV_ODP_SUPPORT_SEND = 1 << 0, IBV_ODP_SUPPORT_RECV = 1 << 1, IBV_ODP_SUPPORT_WRITE = 1 << 2, IBV_ODP_SUPPORT_READ = 1 << 3, IBV_ODP_SUPPORT_ATOMIC = 1 << 4, }; struct ibv_odp_caps { uint64_t general_caps; struct { uint32_t rc_odp_caps; uint32_t uc_odp_caps; uint32_t ud_odp_caps; } per_transport_caps; }; enum ibv_odp_general_caps { IBV_ODP_SUPPORT = 1 << 0, }; struct ibv_tso_caps { uint32_t max_tso; uint32_t supported_qpts; }; /* RX Hash function flags */ enum ibv_rx_hash_function_flags { IBV_RX_HASH_FUNC_TOEPLITZ = 1 << 0, }; /* * RX Hash fields enable to set which incoming packet's field should * participates in RX Hash. Each flag represent certain packet's field, * when the flag is set the field that is represented by the flag will * participate in RX Hash calculation. * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP * and *TCP and *UDP flags can't be enabled together on the same QP. */ enum ibv_rx_hash_fields { IBV_RX_HASH_SRC_IPV4 = 1 << 0, IBV_RX_HASH_DST_IPV4 = 1 << 1, IBV_RX_HASH_SRC_IPV6 = 1 << 2, IBV_RX_HASH_DST_IPV6 = 1 << 3, IBV_RX_HASH_SRC_PORT_TCP = 1 << 4, IBV_RX_HASH_DST_PORT_TCP = 1 << 5, IBV_RX_HASH_SRC_PORT_UDP = 1 << 6, IBV_RX_HASH_DST_PORT_UDP = 1 << 7 }; struct ibv_rss_caps { uint32_t supported_qpts; uint32_t max_rwq_indirection_tables; uint32_t max_rwq_indirection_table_size; uint64_t rx_hash_fields_mask; /* enum ibv_rx_hash_fields */ uint8_t rx_hash_function; /* enum ibv_rx_hash_function_flags */ }; struct ibv_packet_pacing_caps { uint32_t qp_rate_limit_min; uint32_t qp_rate_limit_max; /* In kbps */ uint32_t supported_qpts; }; enum ibv_raw_packet_caps { IBV_RAW_PACKET_CAP_CVLAN_STRIPPING = 1 << 0, IBV_RAW_PACKET_CAP_SCATTER_FCS = 1 << 1, IBV_RAW_PACKET_CAP_IP_CSUM = 1 << 2, }; struct ibv_device_attr_ex { struct ibv_device_attr orig_attr; uint32_t comp_mask; struct ibv_odp_caps odp_caps; uint64_t completion_timestamp_mask; uint64_t hca_core_clock; uint64_t device_cap_flags_ex; struct ibv_tso_caps tso_caps; struct ibv_rss_caps rss_caps; uint32_t max_wq_type_rq; struct ibv_packet_pacing_caps packet_pacing_caps; uint32_t raw_packet_caps; /* Use ibv_raw_packet_caps */ }; enum ibv_mtu { IBV_MTU_256 = 1, IBV_MTU_512 = 2, IBV_MTU_1024 = 3, IBV_MTU_2048 = 4, IBV_MTU_4096 = 5 }; enum ibv_port_state { IBV_PORT_NOP = 0, IBV_PORT_DOWN = 1, IBV_PORT_INIT = 2, IBV_PORT_ARMED = 3, IBV_PORT_ACTIVE = 4, IBV_PORT_ACTIVE_DEFER = 5 }; enum { IBV_LINK_LAYER_UNSPECIFIED, IBV_LINK_LAYER_INFINIBAND, IBV_LINK_LAYER_ETHERNET, }; enum ibv_port_cap_flags { IBV_PORT_SM = 1 << 1, IBV_PORT_NOTICE_SUP = 1 << 2, IBV_PORT_TRAP_SUP = 1 << 3, IBV_PORT_OPT_IPD_SUP = 1 << 4, IBV_PORT_AUTO_MIGR_SUP = 1 << 5, IBV_PORT_SL_MAP_SUP = 1 << 6, IBV_PORT_MKEY_NVRAM = 1 << 7, IBV_PORT_PKEY_NVRAM = 1 << 8, IBV_PORT_LED_INFO_SUP = 1 << 9, IBV_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, IBV_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, IBV_PORT_EXTENDED_SPEEDS_SUP = 1 << 14, IBV_PORT_CM_SUP = 1 << 16, IBV_PORT_SNMP_TUNNEL_SUP = 1 << 17, IBV_PORT_REINIT_SUP = 1 << 18, IBV_PORT_DEVICE_MGMT_SUP = 1 << 19, IBV_PORT_VENDOR_CLASS_SUP = 1 << 20, IBV_PORT_DR_NOTICE_SUP = 1 << 21, IBV_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, IBV_PORT_BOOT_MGMT_SUP = 1 << 23, IBV_PORT_LINK_LATENCY_SUP = 1 << 24, IBV_PORT_CLIENT_REG_SUP = 1 << 25, IBV_PORT_IP_BASED_GIDS = 1 << 26 }; struct ibv_port_attr { enum ibv_port_state state; enum ibv_mtu max_mtu; enum ibv_mtu active_mtu; int gid_tbl_len; uint32_t port_cap_flags; uint32_t max_msg_sz; uint32_t bad_pkey_cntr; uint32_t qkey_viol_cntr; uint16_t pkey_tbl_len; uint16_t lid; uint16_t sm_lid; uint8_t lmc; uint8_t max_vl_num; uint8_t sm_sl; uint8_t subnet_timeout; uint8_t init_type_reply; uint8_t active_width; uint8_t active_speed; uint8_t phys_state; uint8_t link_layer; uint8_t reserved; }; enum ibv_event_type { IBV_EVENT_CQ_ERR, IBV_EVENT_QP_FATAL, IBV_EVENT_QP_REQ_ERR, IBV_EVENT_QP_ACCESS_ERR, IBV_EVENT_COMM_EST, IBV_EVENT_SQ_DRAINED, IBV_EVENT_PATH_MIG, IBV_EVENT_PATH_MIG_ERR, IBV_EVENT_DEVICE_FATAL, IBV_EVENT_PORT_ACTIVE, IBV_EVENT_PORT_ERR, IBV_EVENT_LID_CHANGE, IBV_EVENT_PKEY_CHANGE, IBV_EVENT_SM_CHANGE, IBV_EVENT_SRQ_ERR, IBV_EVENT_SRQ_LIMIT_REACHED, IBV_EVENT_QP_LAST_WQE_REACHED, IBV_EVENT_CLIENT_REREGISTER, IBV_EVENT_GID_CHANGE, IBV_EVENT_WQ_FATAL, }; struct ibv_async_event { union { struct ibv_cq *cq; struct ibv_qp *qp; struct ibv_srq *srq; struct ibv_wq *wq; int port_num; } element; enum ibv_event_type event_type; }; enum ibv_wc_status { IBV_WC_SUCCESS, IBV_WC_LOC_LEN_ERR, IBV_WC_LOC_QP_OP_ERR, IBV_WC_LOC_EEC_OP_ERR, IBV_WC_LOC_PROT_ERR, IBV_WC_WR_FLUSH_ERR, IBV_WC_MW_BIND_ERR, IBV_WC_BAD_RESP_ERR, IBV_WC_LOC_ACCESS_ERR, IBV_WC_REM_INV_REQ_ERR, IBV_WC_REM_ACCESS_ERR, IBV_WC_REM_OP_ERR, IBV_WC_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR, IBV_WC_LOC_RDD_VIOL_ERR, IBV_WC_REM_INV_RD_REQ_ERR, IBV_WC_REM_ABORT_ERR, IBV_WC_INV_EECN_ERR, IBV_WC_INV_EEC_STATE_ERR, IBV_WC_FATAL_ERR, IBV_WC_RESP_TIMEOUT_ERR, IBV_WC_GENERAL_ERR }; const char *ibv_wc_status_str(enum ibv_wc_status status); enum ibv_wc_opcode { IBV_WC_SEND, IBV_WC_RDMA_WRITE, IBV_WC_RDMA_READ, IBV_WC_COMP_SWAP, IBV_WC_FETCH_ADD, IBV_WC_BIND_MW, IBV_WC_LOCAL_INV, IBV_WC_TSO, /* * Set value of IBV_WC_RECV so consumers can test if a completion is a * receive by testing (opcode & IBV_WC_RECV). */ IBV_WC_RECV = 1 << 7, IBV_WC_RECV_RDMA_WITH_IMM }; enum { IBV_WC_IP_CSUM_OK_SHIFT = 2 }; enum ibv_create_cq_wc_flags { IBV_WC_EX_WITH_BYTE_LEN = 1 << 0, IBV_WC_EX_WITH_IMM = 1 << 1, IBV_WC_EX_WITH_QP_NUM = 1 << 2, IBV_WC_EX_WITH_SRC_QP = 1 << 3, IBV_WC_EX_WITH_SLID = 1 << 4, IBV_WC_EX_WITH_SL = 1 << 5, IBV_WC_EX_WITH_DLID_PATH_BITS = 1 << 6, IBV_WC_EX_WITH_COMPLETION_TIMESTAMP = 1 << 7, IBV_WC_EX_WITH_CVLAN = 1 << 8, IBV_WC_EX_WITH_FLOW_TAG = 1 << 9, }; enum { IBV_WC_STANDARD_FLAGS = IBV_WC_EX_WITH_BYTE_LEN | IBV_WC_EX_WITH_IMM | IBV_WC_EX_WITH_QP_NUM | IBV_WC_EX_WITH_SRC_QP | IBV_WC_EX_WITH_SLID | IBV_WC_EX_WITH_SL | IBV_WC_EX_WITH_DLID_PATH_BITS }; enum { IBV_CREATE_CQ_SUP_WC_FLAGS = IBV_WC_STANDARD_FLAGS | IBV_WC_EX_WITH_COMPLETION_TIMESTAMP | IBV_WC_EX_WITH_CVLAN | IBV_WC_EX_WITH_FLOW_TAG }; enum ibv_wc_flags { IBV_WC_GRH = 1 << 0, IBV_WC_WITH_IMM = 1 << 1, IBV_WC_IP_CSUM_OK = 1 << IBV_WC_IP_CSUM_OK_SHIFT, IBV_WC_WITH_INV = 1 << 3 }; struct ibv_wc { uint64_t wr_id; enum ibv_wc_status status; enum ibv_wc_opcode opcode; uint32_t vendor_err; uint32_t byte_len; /* When (wc_flags & IBV_WC_WITH_IMM): Immediate data in network byte order. * When (wc_flags & IBV_WC_WITH_INV): Stores the invalidated rkey. */ union { __be32 imm_data; uint32_t invalidated_rkey; }; uint32_t qp_num; uint32_t src_qp; int wc_flags; uint16_t pkey_index; uint16_t slid; uint8_t sl; uint8_t dlid_path_bits; }; enum ibv_access_flags { IBV_ACCESS_LOCAL_WRITE = 1, IBV_ACCESS_REMOTE_WRITE = (1<<1), IBV_ACCESS_REMOTE_READ = (1<<2), IBV_ACCESS_REMOTE_ATOMIC = (1<<3), IBV_ACCESS_MW_BIND = (1<<4), IBV_ACCESS_ZERO_BASED = (1<<5), IBV_ACCESS_ON_DEMAND = (1<<6), }; struct ibv_mw_bind_info { struct ibv_mr *mr; uint64_t addr; uint64_t length; int mw_access_flags; /* use ibv_access_flags */ }; struct ibv_pd { struct ibv_context *context; uint32_t handle; }; enum ibv_xrcd_init_attr_mask { IBV_XRCD_INIT_ATTR_FD = 1 << 0, IBV_XRCD_INIT_ATTR_OFLAGS = 1 << 1, IBV_XRCD_INIT_ATTR_RESERVED = 1 << 2 }; struct ibv_xrcd_init_attr { uint32_t comp_mask; int fd; int oflags; }; struct ibv_xrcd { struct ibv_context *context; }; enum ibv_rereg_mr_flags { IBV_REREG_MR_CHANGE_TRANSLATION = (1 << 0), IBV_REREG_MR_CHANGE_PD = (1 << 1), IBV_REREG_MR_CHANGE_ACCESS = (1 << 2), IBV_REREG_MR_KEEP_VALID = (1 << 3), IBV_REREG_MR_FLAGS_SUPPORTED = ((IBV_REREG_MR_KEEP_VALID << 1) - 1) }; struct ibv_mr { struct ibv_context *context; struct ibv_pd *pd; void *addr; size_t length; uint32_t handle; uint32_t lkey; uint32_t rkey; }; enum ibv_mw_type { IBV_MW_TYPE_1 = 1, IBV_MW_TYPE_2 = 2 }; struct ibv_mw { struct ibv_context *context; struct ibv_pd *pd; uint32_t rkey; uint32_t handle; enum ibv_mw_type type; }; struct ibv_global_route { union ibv_gid dgid; uint32_t flow_label; uint8_t sgid_index; uint8_t hop_limit; uint8_t traffic_class; }; struct ibv_grh { __be32 version_tclass_flow; __be16 paylen; uint8_t next_hdr; uint8_t hop_limit; union ibv_gid sgid; union ibv_gid dgid; }; enum ibv_rate { IBV_RATE_MAX = 0, IBV_RATE_2_5_GBPS = 2, IBV_RATE_5_GBPS = 5, IBV_RATE_10_GBPS = 3, IBV_RATE_20_GBPS = 6, IBV_RATE_30_GBPS = 4, IBV_RATE_40_GBPS = 7, IBV_RATE_60_GBPS = 8, IBV_RATE_80_GBPS = 9, IBV_RATE_120_GBPS = 10, IBV_RATE_14_GBPS = 11, IBV_RATE_56_GBPS = 12, IBV_RATE_112_GBPS = 13, IBV_RATE_168_GBPS = 14, IBV_RATE_25_GBPS = 15, IBV_RATE_100_GBPS = 16, IBV_RATE_200_GBPS = 17, IBV_RATE_300_GBPS = 18, IBV_RATE_28_GBPS = 19, IBV_RATE_50_GBPS = 20, IBV_RATE_400_GBPS = 21, IBV_RATE_600_GBPS = 22, + IBV_RATE_800_GBPS = 23, + IBV_RATE_1200_GBPS = 24, }; /** * ibv_rate_to_mult - Convert the IB rate enum to a multiple of the * base rate of 2.5 Gbit/sec. For example, IBV_RATE_5_GBPS will be * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec. * @rate: rate to convert. */ int __attribute_const ibv_rate_to_mult(enum ibv_rate rate); /** * mult_to_ibv_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate enum. * @mult: multiple to convert. */ enum ibv_rate __attribute_const mult_to_ibv_rate(int mult); /** * ibv_rate_to_mbps - Convert the IB rate enum to Mbit/sec. * For example, IBV_RATE_5_GBPS will return the value 5000. * @rate: rate to convert. */ int __attribute_const ibv_rate_to_mbps(enum ibv_rate rate); /** * mbps_to_ibv_rate - Convert a Mbit/sec value to an IB rate enum. * @mbps: value to convert. */ enum ibv_rate __attribute_const mbps_to_ibv_rate(int mbps) __attribute_const; struct ibv_ah_attr { struct ibv_global_route grh; uint16_t dlid; uint8_t sl; uint8_t src_path_bits; uint8_t static_rate; uint8_t is_global; uint8_t port_num; }; enum ibv_srq_attr_mask { IBV_SRQ_MAX_WR = 1 << 0, IBV_SRQ_LIMIT = 1 << 1 }; struct ibv_srq_attr { uint32_t max_wr; uint32_t max_sge; uint32_t srq_limit; }; struct ibv_srq_init_attr { void *srq_context; struct ibv_srq_attr attr; }; enum ibv_srq_type { IBV_SRQT_BASIC, IBV_SRQT_XRC }; enum ibv_srq_init_attr_mask { IBV_SRQ_INIT_ATTR_TYPE = 1 << 0, IBV_SRQ_INIT_ATTR_PD = 1 << 1, IBV_SRQ_INIT_ATTR_XRCD = 1 << 2, IBV_SRQ_INIT_ATTR_CQ = 1 << 3, IBV_SRQ_INIT_ATTR_RESERVED = 1 << 4 }; struct ibv_srq_init_attr_ex { void *srq_context; struct ibv_srq_attr attr; uint32_t comp_mask; enum ibv_srq_type srq_type; struct ibv_pd *pd; struct ibv_xrcd *xrcd; struct ibv_cq *cq; }; enum ibv_wq_type { IBV_WQT_RQ }; enum ibv_wq_init_attr_mask { IBV_WQ_INIT_ATTR_FLAGS = 1 << 0, IBV_WQ_INIT_ATTR_RESERVED = 1 << 1, }; enum ibv_wq_flags { IBV_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0, IBV_WQ_FLAGS_SCATTER_FCS = 1 << 1, IBV_WQ_FLAGS_RESERVED = 1 << 2, }; struct ibv_wq_init_attr { void *wq_context; enum ibv_wq_type wq_type; uint32_t max_wr; uint32_t max_sge; struct ibv_pd *pd; struct ibv_cq *cq; uint32_t comp_mask; /* Use ibv_wq_init_attr_mask */ uint32_t create_flags; /* use ibv_wq_flags */ }; enum ibv_wq_state { IBV_WQS_RESET, IBV_WQS_RDY, IBV_WQS_ERR, IBV_WQS_UNKNOWN }; enum ibv_wq_attr_mask { IBV_WQ_ATTR_STATE = 1 << 0, IBV_WQ_ATTR_CURR_STATE = 1 << 1, IBV_WQ_ATTR_FLAGS = 1 << 2, IBV_WQ_ATTR_RESERVED = 1 << 3, }; struct ibv_wq_attr { /* enum ibv_wq_attr_mask */ uint32_t attr_mask; /* Move the WQ to this state */ enum ibv_wq_state wq_state; /* Assume this is the current WQ state */ enum ibv_wq_state curr_wq_state; uint32_t flags; /* Use ibv_wq_flags */ uint32_t flags_mask; /* Use ibv_wq_flags */ }; /* * Receive Work Queue Indirection Table. * It's used in order to distribute incoming packets between different * Receive Work Queues. Associating Receive WQs with different CPU cores * allows to workload the traffic between different CPU cores. * The Indirection Table can contain only WQs of type IBV_WQT_RQ. */ struct ibv_rwq_ind_table { struct ibv_context *context; int ind_tbl_handle; int ind_tbl_num; uint32_t comp_mask; }; enum ibv_ind_table_init_attr_mask { IBV_CREATE_IND_TABLE_RESERVED = (1 << 0) }; /* * Receive Work Queue Indirection Table attributes */ struct ibv_rwq_ind_table_init_attr { uint32_t log_ind_tbl_size; /* Each entry is a pointer to a Receive Work Queue */ struct ibv_wq **ind_tbl; uint32_t comp_mask; }; enum ibv_qp_type { IBV_QPT_RC = 2, IBV_QPT_UC, IBV_QPT_UD, IBV_QPT_RAW_PACKET = 8, IBV_QPT_XRC_SEND = 9, IBV_QPT_XRC_RECV }; struct ibv_qp_cap { uint32_t max_send_wr; uint32_t max_recv_wr; uint32_t max_send_sge; uint32_t max_recv_sge; uint32_t max_inline_data; }; struct ibv_qp_init_attr { void *qp_context; struct ibv_cq *send_cq; struct ibv_cq *recv_cq; struct ibv_srq *srq; struct ibv_qp_cap cap; enum ibv_qp_type qp_type; int sq_sig_all; }; enum ibv_qp_init_attr_mask { IBV_QP_INIT_ATTR_PD = 1 << 0, IBV_QP_INIT_ATTR_XRCD = 1 << 1, IBV_QP_INIT_ATTR_CREATE_FLAGS = 1 << 2, IBV_QP_INIT_ATTR_MAX_TSO_HEADER = 1 << 3, IBV_QP_INIT_ATTR_IND_TABLE = 1 << 4, IBV_QP_INIT_ATTR_RX_HASH = 1 << 5, IBV_QP_INIT_ATTR_RESERVED = 1 << 6 }; enum ibv_qp_create_flags { IBV_QP_CREATE_BLOCK_SELF_MCAST_LB = 1 << 1, IBV_QP_CREATE_SCATTER_FCS = 1 << 8, IBV_QP_CREATE_CVLAN_STRIPPING = 1 << 9, }; struct ibv_rx_hash_conf { /* enum ibv_rx_hash_function_flags */ uint8_t rx_hash_function; uint8_t rx_hash_key_len; uint8_t *rx_hash_key; /* enum ibv_rx_hash_fields */ uint64_t rx_hash_fields_mask; }; struct ibv_qp_init_attr_ex { void *qp_context; struct ibv_cq *send_cq; struct ibv_cq *recv_cq; struct ibv_srq *srq; struct ibv_qp_cap cap; enum ibv_qp_type qp_type; int sq_sig_all; uint32_t comp_mask; struct ibv_pd *pd; struct ibv_xrcd *xrcd; uint32_t create_flags; uint16_t max_tso_header; struct ibv_rwq_ind_table *rwq_ind_tbl; struct ibv_rx_hash_conf rx_hash_conf; }; enum ibv_qp_open_attr_mask { IBV_QP_OPEN_ATTR_NUM = 1 << 0, IBV_QP_OPEN_ATTR_XRCD = 1 << 1, IBV_QP_OPEN_ATTR_CONTEXT = 1 << 2, IBV_QP_OPEN_ATTR_TYPE = 1 << 3, IBV_QP_OPEN_ATTR_RESERVED = 1 << 4 }; struct ibv_qp_open_attr { uint32_t comp_mask; uint32_t qp_num; struct ibv_xrcd *xrcd; void *qp_context; enum ibv_qp_type qp_type; }; enum ibv_qp_attr_mask { IBV_QP_STATE = 1 << 0, IBV_QP_CUR_STATE = 1 << 1, IBV_QP_EN_SQD_ASYNC_NOTIFY = 1 << 2, IBV_QP_ACCESS_FLAGS = 1 << 3, IBV_QP_PKEY_INDEX = 1 << 4, IBV_QP_PORT = 1 << 5, IBV_QP_QKEY = 1 << 6, IBV_QP_AV = 1 << 7, IBV_QP_PATH_MTU = 1 << 8, IBV_QP_TIMEOUT = 1 << 9, IBV_QP_RETRY_CNT = 1 << 10, IBV_QP_RNR_RETRY = 1 << 11, IBV_QP_RQ_PSN = 1 << 12, IBV_QP_MAX_QP_RD_ATOMIC = 1 << 13, IBV_QP_ALT_PATH = 1 << 14, IBV_QP_MIN_RNR_TIMER = 1 << 15, IBV_QP_SQ_PSN = 1 << 16, IBV_QP_MAX_DEST_RD_ATOMIC = 1 << 17, IBV_QP_PATH_MIG_STATE = 1 << 18, IBV_QP_CAP = 1 << 19, IBV_QP_DEST_QPN = 1 << 20, IBV_QP_RATE_LIMIT = 1 << 25, }; enum ibv_qp_state { IBV_QPS_RESET, IBV_QPS_INIT, IBV_QPS_RTR, IBV_QPS_RTS, IBV_QPS_SQD, IBV_QPS_SQE, IBV_QPS_ERR, IBV_QPS_UNKNOWN }; enum ibv_mig_state { IBV_MIG_MIGRATED, IBV_MIG_REARM, IBV_MIG_ARMED }; struct ibv_qp_attr { enum ibv_qp_state qp_state; enum ibv_qp_state cur_qp_state; enum ibv_mtu path_mtu; enum ibv_mig_state path_mig_state; uint32_t qkey; uint32_t rq_psn; uint32_t sq_psn; uint32_t dest_qp_num; int qp_access_flags; struct ibv_qp_cap cap; struct ibv_ah_attr ah_attr; struct ibv_ah_attr alt_ah_attr; uint16_t pkey_index; uint16_t alt_pkey_index; uint8_t en_sqd_async_notify; uint8_t sq_draining; uint8_t max_rd_atomic; uint8_t max_dest_rd_atomic; uint8_t min_rnr_timer; uint8_t port_num; uint8_t timeout; uint8_t retry_cnt; uint8_t rnr_retry; uint8_t alt_port_num; uint8_t alt_timeout; uint32_t rate_limit; }; enum ibv_wr_opcode { IBV_WR_RDMA_WRITE, IBV_WR_RDMA_WRITE_WITH_IMM, IBV_WR_SEND, IBV_WR_SEND_WITH_IMM, IBV_WR_RDMA_READ, IBV_WR_ATOMIC_CMP_AND_SWP, IBV_WR_ATOMIC_FETCH_AND_ADD, IBV_WR_LOCAL_INV, IBV_WR_BIND_MW, IBV_WR_SEND_WITH_INV, IBV_WR_TSO, }; enum ibv_send_flags { IBV_SEND_FENCE = 1 << 0, IBV_SEND_SIGNALED = 1 << 1, IBV_SEND_SOLICITED = 1 << 2, IBV_SEND_INLINE = 1 << 3, IBV_SEND_IP_CSUM = 1 << 4 }; struct ibv_sge { uint64_t addr; uint32_t length; uint32_t lkey; }; struct ibv_send_wr { uint64_t wr_id; struct ibv_send_wr *next; struct ibv_sge *sg_list; int num_sge; enum ibv_wr_opcode opcode; int send_flags; __be32 imm_data; union { struct { uint64_t remote_addr; uint32_t rkey; } rdma; struct { uint64_t remote_addr; uint64_t compare_add; uint64_t swap; uint32_t rkey; } atomic; struct { struct ibv_ah *ah; uint32_t remote_qpn; uint32_t remote_qkey; } ud; } wr; union { struct { uint32_t remote_srqn; } xrc; } qp_type; union { struct { struct ibv_mw *mw; uint32_t rkey; struct ibv_mw_bind_info bind_info; } bind_mw; struct { void *hdr; uint16_t hdr_sz; uint16_t mss; } tso; }; }; struct ibv_recv_wr { uint64_t wr_id; struct ibv_recv_wr *next; struct ibv_sge *sg_list; int num_sge; }; struct ibv_mw_bind { uint64_t wr_id; int send_flags; struct ibv_mw_bind_info bind_info; }; struct ibv_srq { struct ibv_context *context; void *srq_context; struct ibv_pd *pd; uint32_t handle; pthread_mutex_t mutex; pthread_cond_t cond; uint32_t events_completed; }; /* * Work Queue. QP can be created without internal WQs "packaged" inside it, * this QP can be configured to use "external" WQ object as its * receive/send queue. * WQ associated (many to one) with Completion Queue it owns WQ properties * (PD, WQ size etc). * WQ of type IBV_WQT_RQ: * - Contains receive WQEs, in this case its PD serves as scatter as well. * - Exposes post receive function to be used to post a list of work * requests (WRs) to its receive queue. */ struct ibv_wq { struct ibv_context *context; void *wq_context; struct ibv_pd *pd; struct ibv_cq *cq; uint32_t wq_num; uint32_t handle; enum ibv_wq_state state; enum ibv_wq_type wq_type; int (*post_recv)(struct ibv_wq *current, struct ibv_recv_wr *recv_wr, struct ibv_recv_wr **bad_recv_wr); pthread_mutex_t mutex; pthread_cond_t cond; uint32_t events_completed; uint32_t comp_mask; }; struct ibv_qp { struct ibv_context *context; void *qp_context; struct ibv_pd *pd; struct ibv_cq *send_cq; struct ibv_cq *recv_cq; struct ibv_srq *srq; uint32_t handle; uint32_t qp_num; enum ibv_qp_state state; enum ibv_qp_type qp_type; pthread_mutex_t mutex; pthread_cond_t cond; uint32_t events_completed; }; struct ibv_comp_channel { struct ibv_context *context; int fd; int refcnt; }; struct ibv_cq { struct ibv_context *context; struct ibv_comp_channel *channel; void *cq_context; uint32_t handle; int cqe; pthread_mutex_t mutex; pthread_cond_t cond; uint32_t comp_events_completed; uint32_t async_events_completed; }; struct ibv_poll_cq_attr { uint32_t comp_mask; }; struct ibv_cq_ex { struct ibv_context *context; struct ibv_comp_channel *channel; void *cq_context; uint32_t handle; int cqe; pthread_mutex_t mutex; pthread_cond_t cond; uint32_t comp_events_completed; uint32_t async_events_completed; uint32_t comp_mask; enum ibv_wc_status status; uint64_t wr_id; int (*start_poll)(struct ibv_cq_ex *current, struct ibv_poll_cq_attr *attr); int (*next_poll)(struct ibv_cq_ex *current); void (*end_poll)(struct ibv_cq_ex *current); enum ibv_wc_opcode (*read_opcode)(struct ibv_cq_ex *current); uint32_t (*read_vendor_err)(struct ibv_cq_ex *current); uint32_t (*read_byte_len)(struct ibv_cq_ex *current); uint32_t (*read_imm_data)(struct ibv_cq_ex *current); uint32_t (*read_qp_num)(struct ibv_cq_ex *current); uint32_t (*read_src_qp)(struct ibv_cq_ex *current); int (*read_wc_flags)(struct ibv_cq_ex *current); uint32_t (*read_slid)(struct ibv_cq_ex *current); uint8_t (*read_sl)(struct ibv_cq_ex *current); uint8_t (*read_dlid_path_bits)(struct ibv_cq_ex *current); uint64_t (*read_completion_ts)(struct ibv_cq_ex *current); uint16_t (*read_cvlan)(struct ibv_cq_ex *current); uint32_t (*read_flow_tag)(struct ibv_cq_ex *current); }; static inline struct ibv_cq *ibv_cq_ex_to_cq(struct ibv_cq_ex *cq) { return (struct ibv_cq *)cq; } static inline int ibv_start_poll(struct ibv_cq_ex *cq, struct ibv_poll_cq_attr *attr) { return cq->start_poll(cq, attr); } static inline int ibv_next_poll(struct ibv_cq_ex *cq) { return cq->next_poll(cq); } static inline void ibv_end_poll(struct ibv_cq_ex *cq) { cq->end_poll(cq); } static inline enum ibv_wc_opcode ibv_wc_read_opcode(struct ibv_cq_ex *cq) { return cq->read_opcode(cq); } static inline uint32_t ibv_wc_read_vendor_err(struct ibv_cq_ex *cq) { return cq->read_vendor_err(cq); } static inline uint32_t ibv_wc_read_byte_len(struct ibv_cq_ex *cq) { return cq->read_byte_len(cq); } static inline uint32_t ibv_wc_read_imm_data(struct ibv_cq_ex *cq) { return cq->read_imm_data(cq); } static inline uint32_t ibv_wc_read_qp_num(struct ibv_cq_ex *cq) { return cq->read_qp_num(cq); } static inline uint32_t ibv_wc_read_src_qp(struct ibv_cq_ex *cq) { return cq->read_src_qp(cq); } static inline int ibv_wc_read_wc_flags(struct ibv_cq_ex *cq) { return cq->read_wc_flags(cq); } static inline uint32_t ibv_wc_read_slid(struct ibv_cq_ex *cq) { return cq->read_slid(cq); } static inline uint8_t ibv_wc_read_sl(struct ibv_cq_ex *cq) { return cq->read_sl(cq); } static inline uint8_t ibv_wc_read_dlid_path_bits(struct ibv_cq_ex *cq) { return cq->read_dlid_path_bits(cq); } static inline uint64_t ibv_wc_read_completion_ts(struct ibv_cq_ex *cq) { return cq->read_completion_ts(cq); } static inline uint16_t ibv_wc_read_cvlan(struct ibv_cq_ex *cq) { return cq->read_cvlan(cq); } static inline uint32_t ibv_wc_read_flow_tag(struct ibv_cq_ex *cq) { return cq->read_flow_tag(cq); } static inline int ibv_post_wq_recv(struct ibv_wq *wq, struct ibv_recv_wr *recv_wr, struct ibv_recv_wr **bad_recv_wr) { return wq->post_recv(wq, recv_wr, bad_recv_wr); } struct ibv_ah { struct ibv_context *context; struct ibv_pd *pd; uint32_t handle; }; enum ibv_flow_flags { IBV_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK = 1 << 0, IBV_FLOW_ATTR_FLAGS_DONT_TRAP = 1 << 1, }; enum ibv_flow_attr_type { /* steering according to rule specifications */ IBV_FLOW_ATTR_NORMAL = 0x0, /* default unicast and multicast rule - * receive all Eth traffic which isn't steered to any QP */ IBV_FLOW_ATTR_ALL_DEFAULT = 0x1, /* default multicast rule - * receive all Eth multicast traffic which isn't steered to any QP */ IBV_FLOW_ATTR_MC_DEFAULT = 0x2, /* sniffer rule - receive all port traffic */ IBV_FLOW_ATTR_SNIFFER = 0x3, }; enum ibv_flow_spec_type { IBV_FLOW_SPEC_ETH = 0x20, IBV_FLOW_SPEC_IPV4 = 0x30, IBV_FLOW_SPEC_IPV6 = 0x31, IBV_FLOW_SPEC_IPV4_EXT = 0x32, IBV_FLOW_SPEC_TCP = 0x40, IBV_FLOW_SPEC_UDP = 0x41, IBV_FLOW_SPEC_VXLAN_TUNNEL = 0x50, IBV_FLOW_SPEC_INNER = 0x100, IBV_FLOW_SPEC_ACTION_TAG = 0x1000, IBV_FLOW_SPEC_ACTION_DROP = 0x1001, }; struct ibv_flow_eth_filter { uint8_t dst_mac[6]; uint8_t src_mac[6]; uint16_t ether_type; /* * same layout as 802.1q: prio 3, cfi 1, vlan id 12 */ uint16_t vlan_tag; }; struct ibv_flow_spec_eth { enum ibv_flow_spec_type type; uint16_t size; struct ibv_flow_eth_filter val; struct ibv_flow_eth_filter mask; }; struct ibv_flow_ipv4_filter { uint32_t src_ip; uint32_t dst_ip; }; struct ibv_flow_spec_ipv4 { enum ibv_flow_spec_type type; uint16_t size; struct ibv_flow_ipv4_filter val; struct ibv_flow_ipv4_filter mask; }; struct ibv_flow_ipv4_ext_filter { uint32_t src_ip; uint32_t dst_ip; uint8_t proto; uint8_t tos; uint8_t ttl; uint8_t flags; }; struct ibv_flow_spec_ipv4_ext { enum ibv_flow_spec_type type; uint16_t size; struct ibv_flow_ipv4_ext_filter val; struct ibv_flow_ipv4_ext_filter mask; }; struct ibv_flow_ipv6_filter { uint8_t src_ip[16]; uint8_t dst_ip[16]; uint32_t flow_label; uint8_t next_hdr; uint8_t traffic_class; uint8_t hop_limit; }; struct ibv_flow_spec_ipv6 { enum ibv_flow_spec_type type; uint16_t size; struct ibv_flow_ipv6_filter val; struct ibv_flow_ipv6_filter mask; }; struct ibv_flow_tcp_udp_filter { uint16_t dst_port; uint16_t src_port; }; struct ibv_flow_spec_tcp_udp { enum ibv_flow_spec_type type; uint16_t size; struct ibv_flow_tcp_udp_filter val; struct ibv_flow_tcp_udp_filter mask; }; struct ibv_flow_tunnel_filter { uint32_t tunnel_id; }; struct ibv_flow_spec_tunnel { enum ibv_flow_spec_type type; uint16_t size; struct ibv_flow_tunnel_filter val; struct ibv_flow_tunnel_filter mask; }; struct ibv_flow_spec_action_tag { enum ibv_flow_spec_type type; uint16_t size; uint32_t tag_id; }; struct ibv_flow_spec_action_drop { enum ibv_flow_spec_type type; uint16_t size; }; struct ibv_flow_spec { union { struct { enum ibv_flow_spec_type type; uint16_t size; } hdr; struct ibv_flow_spec_eth eth; struct ibv_flow_spec_ipv4 ipv4; struct ibv_flow_spec_tcp_udp tcp_udp; struct ibv_flow_spec_ipv4_ext ipv4_ext; struct ibv_flow_spec_ipv6 ipv6; struct ibv_flow_spec_tunnel tunnel; struct ibv_flow_spec_action_tag flow_tag; struct ibv_flow_spec_action_drop drop; }; }; struct ibv_flow_attr { uint32_t comp_mask; enum ibv_flow_attr_type type; uint16_t size; uint16_t priority; uint8_t num_of_specs; uint8_t port; uint32_t flags; /* Following are the optional layers according to user request * struct ibv_flow_spec_xxx [L2] * struct ibv_flow_spec_yyy [L3/L4] */ }; struct ibv_flow { uint32_t comp_mask; struct ibv_context *context; uint32_t handle; }; struct ibv_device; struct ibv_context; /* Obsolete, never used, do not touch */ struct _ibv_device_ops { struct ibv_context * (*_dummy1)(struct ibv_device *device, int cmd_fd); void (*_dummy2)(struct ibv_context *context); }; enum { IBV_SYSFS_NAME_MAX = 64, IBV_SYSFS_PATH_MAX = 256 }; struct ibv_device { struct _ibv_device_ops _ops; enum ibv_node_type node_type; enum ibv_transport_type transport_type; /* Name of underlying kernel IB device, eg "mthca0" */ char name[IBV_SYSFS_NAME_MAX]; /* Name of uverbs device, eg "uverbs0" */ char dev_name[IBV_SYSFS_NAME_MAX]; /* Path to infiniband_verbs class device in sysfs */ char dev_path[IBV_SYSFS_PATH_MAX]; /* Path to infiniband class device in sysfs */ char ibdev_path[IBV_SYSFS_PATH_MAX]; }; struct ibv_context_ops { int (*query_device)(struct ibv_context *context, struct ibv_device_attr *device_attr); int (*query_port)(struct ibv_context *context, uint8_t port_num, struct ibv_port_attr *port_attr); struct ibv_pd * (*alloc_pd)(struct ibv_context *context); int (*dealloc_pd)(struct ibv_pd *pd); struct ibv_mr * (*reg_mr)(struct ibv_pd *pd, void *addr, size_t length, int access); int (*rereg_mr)(struct ibv_mr *mr, int flags, struct ibv_pd *pd, void *addr, size_t length, int access); int (*dereg_mr)(struct ibv_mr *mr); struct ibv_mw * (*alloc_mw)(struct ibv_pd *pd, enum ibv_mw_type type); int (*bind_mw)(struct ibv_qp *qp, struct ibv_mw *mw, struct ibv_mw_bind *mw_bind); int (*dealloc_mw)(struct ibv_mw *mw); struct ibv_cq * (*create_cq)(struct ibv_context *context, int cqe, struct ibv_comp_channel *channel, int comp_vector); int (*poll_cq)(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc); int (*req_notify_cq)(struct ibv_cq *cq, int solicited_only); void (*cq_event)(struct ibv_cq *cq); int (*resize_cq)(struct ibv_cq *cq, int cqe); int (*destroy_cq)(struct ibv_cq *cq); struct ibv_srq * (*create_srq)(struct ibv_pd *pd, struct ibv_srq_init_attr *srq_init_attr); int (*modify_srq)(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr, int srq_attr_mask); int (*query_srq)(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr); int (*destroy_srq)(struct ibv_srq *srq); int (*post_srq_recv)(struct ibv_srq *srq, struct ibv_recv_wr *recv_wr, struct ibv_recv_wr **bad_recv_wr); struct ibv_qp * (*create_qp)(struct ibv_pd *pd, struct ibv_qp_init_attr *attr); int (*query_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, struct ibv_qp_init_attr *init_attr); int (*modify_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask); int (*destroy_qp)(struct ibv_qp *qp); int (*post_send)(struct ibv_qp *qp, struct ibv_send_wr *wr, struct ibv_send_wr **bad_wr); int (*post_recv)(struct ibv_qp *qp, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr); struct ibv_ah * (*create_ah)(struct ibv_pd *pd, struct ibv_ah_attr *attr); int (*destroy_ah)(struct ibv_ah *ah); int (*attach_mcast)(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); int (*detach_mcast)(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); void (*async_event)(struct ibv_async_event *event); }; struct ibv_context { struct ibv_device *device; struct ibv_context_ops ops; int cmd_fd; int async_fd; int num_comp_vectors; pthread_mutex_t mutex; void *abi_compat; }; enum ibv_cq_init_attr_mask { IBV_CQ_INIT_ATTR_MASK_FLAGS = 1 << 0, IBV_CQ_INIT_ATTR_MASK_RESERVED = 1 << 1 }; enum ibv_create_cq_attr_flags { IBV_CREATE_CQ_ATTR_SINGLE_THREADED = 1 << 0, IBV_CREATE_CQ_ATTR_RESERVED = 1 << 1, }; struct ibv_cq_init_attr_ex { /* Minimum number of entries required for CQ */ uint32_t cqe; /* Consumer-supplied context returned for completion events */ void *cq_context; /* Completion channel where completion events will be queued. * May be NULL if completion events will not be used. */ struct ibv_comp_channel *channel; /* Completion vector used to signal completion events. * Must be < context->num_comp_vectors. */ uint32_t comp_vector; /* Or'ed bit of enum ibv_create_cq_wc_flags. */ uint64_t wc_flags; /* compatibility mask (extended verb). Or'd flags of * enum ibv_cq_init_attr_mask */ uint32_t comp_mask; /* create cq attr flags - one or more flags from * enum ibv_create_cq_attr_flags */ uint32_t flags; }; enum ibv_values_mask { IBV_VALUES_MASK_RAW_CLOCK = 1 << 0, IBV_VALUES_MASK_RESERVED = 1 << 1 }; struct ibv_values_ex { uint32_t comp_mask; struct timespec raw_clock; }; enum verbs_context_mask { VERBS_CONTEXT_XRCD = 1 << 0, VERBS_CONTEXT_SRQ = 1 << 1, VERBS_CONTEXT_QP = 1 << 2, VERBS_CONTEXT_CREATE_FLOW = 1 << 3, VERBS_CONTEXT_DESTROY_FLOW = 1 << 4, VERBS_CONTEXT_RESERVED = 1 << 5 }; struct verbs_context { /* "grows up" - new fields go here */ int (*destroy_rwq_ind_table)(struct ibv_rwq_ind_table *rwq_ind_table); struct ibv_rwq_ind_table *(*create_rwq_ind_table)(struct ibv_context *context, struct ibv_rwq_ind_table_init_attr *init_attr); int (*destroy_wq)(struct ibv_wq *wq); int (*modify_wq)(struct ibv_wq *wq, struct ibv_wq_attr *wq_attr); struct ibv_wq * (*create_wq)(struct ibv_context *context, struct ibv_wq_init_attr *wq_init_attr); int (*query_rt_values)(struct ibv_context *context, struct ibv_values_ex *values); struct ibv_cq_ex *(*create_cq_ex)(struct ibv_context *context, struct ibv_cq_init_attr_ex *init_attr); struct verbs_ex_private *priv; int (*query_device_ex)(struct ibv_context *context, const struct ibv_query_device_ex_input *input, struct ibv_device_attr_ex *attr, size_t attr_size); int (*ibv_destroy_flow) (struct ibv_flow *flow); void (*ABI_placeholder2) (void); /* DO NOT COPY THIS GARBAGE */ struct ibv_flow * (*ibv_create_flow) (struct ibv_qp *qp, struct ibv_flow_attr *flow_attr); void (*ABI_placeholder1) (void); /* DO NOT COPY THIS GARBAGE */ struct ibv_qp *(*open_qp)(struct ibv_context *context, struct ibv_qp_open_attr *attr); struct ibv_qp *(*create_qp_ex)(struct ibv_context *context, struct ibv_qp_init_attr_ex *qp_init_attr_ex); int (*get_srq_num)(struct ibv_srq *srq, uint32_t *srq_num); struct ibv_srq * (*create_srq_ex)(struct ibv_context *context, struct ibv_srq_init_attr_ex *srq_init_attr_ex); struct ibv_xrcd * (*open_xrcd)(struct ibv_context *context, struct ibv_xrcd_init_attr *xrcd_init_attr); int (*close_xrcd)(struct ibv_xrcd *xrcd); uint64_t has_comp_mask; size_t sz; /* Must be immediately before struct ibv_context */ struct ibv_context context; /* Must be last field in the struct */ }; static inline struct verbs_context *verbs_get_ctx(struct ibv_context *ctx) { return (ctx->abi_compat != __VERBS_ABI_IS_EXTENDED) ? NULL : container_of(ctx, struct verbs_context, context); } #define verbs_get_ctx_op(ctx, op) ({ \ struct verbs_context *__vctx = verbs_get_ctx(ctx); \ (!__vctx || (__vctx->sz < sizeof(*__vctx) - offsetof(struct verbs_context, op)) || \ !__vctx->op) ? NULL : __vctx; }) #define verbs_set_ctx_op(_vctx, op, ptr) ({ \ struct verbs_context *vctx = _vctx; \ if (vctx && (vctx->sz >= sizeof(*vctx) - offsetof(struct verbs_context, op))) \ vctx->op = ptr; }) /** * ibv_get_device_list - Get list of IB devices currently available * @num_devices: optional. if non-NULL, set to the number of devices * returned in the array. * * Return a NULL-terminated array of IB devices. The array can be * released with ibv_free_device_list(). */ struct ibv_device **ibv_get_device_list(int *num_devices); /** * ibv_free_device_list - Free list from ibv_get_device_list() * * Free an array of devices returned from ibv_get_device_list(). Once * the array is freed, pointers to devices that were not opened with * ibv_open_device() are no longer valid. Client code must open all * devices it intends to use before calling ibv_free_device_list(). */ void ibv_free_device_list(struct ibv_device **list); /** * ibv_get_device_name - Return kernel device name */ const char *ibv_get_device_name(struct ibv_device *device); /** * ibv_get_device_guid - Return device's node GUID */ __be64 ibv_get_device_guid(struct ibv_device *device); /** * ibv_open_device - Initialize device for use */ struct ibv_context *ibv_open_device(struct ibv_device *device); /** * ibv_close_device - Release device */ int ibv_close_device(struct ibv_context *context); /** * ibv_get_async_event - Get next async event * @event: Pointer to use to return async event * * All async events returned by ibv_get_async_event() must eventually * be acknowledged with ibv_ack_async_event(). */ int ibv_get_async_event(struct ibv_context *context, struct ibv_async_event *event); /** * ibv_ack_async_event - Acknowledge an async event * @event: Event to be acknowledged. * * All async events which are returned by ibv_get_async_event() must * be acknowledged. To avoid races, destroying an object (CQ, SRQ or * QP) will wait for all affiliated events to be acknowledged, so * there should be a one-to-one correspondence between acks and * successful gets. */ void ibv_ack_async_event(struct ibv_async_event *event); /** * ibv_query_device - Get device properties */ int ibv_query_device(struct ibv_context *context, struct ibv_device_attr *device_attr); /** * ibv_query_port - Get port properties */ int ibv_query_port(struct ibv_context *context, uint8_t port_num, struct ibv_port_attr *port_attr); static inline int ___ibv_query_port(struct ibv_context *context, uint8_t port_num, struct ibv_port_attr *port_attr) { /* For compatibility when running with old libibverbs */ port_attr->link_layer = IBV_LINK_LAYER_UNSPECIFIED; port_attr->reserved = 0; return ibv_query_port(context, port_num, port_attr); } #define ibv_query_port(context, port_num, port_attr) \ ___ibv_query_port(context, port_num, port_attr) /** * ibv_query_gid - Get a GID table entry */ int ibv_query_gid(struct ibv_context *context, uint8_t port_num, int index, union ibv_gid *gid); /** * ibv_query_pkey - Get a P_Key table entry */ int ibv_query_pkey(struct ibv_context *context, uint8_t port_num, int index, __be16 *pkey); /** * ibv_alloc_pd - Allocate a protection domain */ struct ibv_pd *ibv_alloc_pd(struct ibv_context *context); /** * ibv_dealloc_pd - Free a protection domain */ int ibv_dealloc_pd(struct ibv_pd *pd); static inline struct ibv_flow *ibv_create_flow(struct ibv_qp *qp, struct ibv_flow_attr *flow) { struct verbs_context *vctx = verbs_get_ctx_op(qp->context, ibv_create_flow); if (!vctx || !vctx->ibv_create_flow) { errno = ENOSYS; return NULL; } return vctx->ibv_create_flow(qp, flow); } static inline int ibv_destroy_flow(struct ibv_flow *flow_id) { struct verbs_context *vctx = verbs_get_ctx_op(flow_id->context, ibv_destroy_flow); if (!vctx || !vctx->ibv_destroy_flow) return -ENOSYS; return vctx->ibv_destroy_flow(flow_id); } /** * ibv_open_xrcd - Open an extended connection domain */ static inline struct ibv_xrcd * ibv_open_xrcd(struct ibv_context *context, struct ibv_xrcd_init_attr *xrcd_init_attr) { struct verbs_context *vctx = verbs_get_ctx_op(context, open_xrcd); if (!vctx) { errno = ENOSYS; return NULL; } return vctx->open_xrcd(context, xrcd_init_attr); } /** * ibv_close_xrcd - Close an extended connection domain */ static inline int ibv_close_xrcd(struct ibv_xrcd *xrcd) { struct verbs_context *vctx = verbs_get_ctx(xrcd->context); return vctx->close_xrcd(xrcd); } /** * ibv_reg_mr - Register a memory region */ struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr, size_t length, int access); enum ibv_rereg_mr_err_code { /* Old MR is valid, invalid input */ IBV_REREG_MR_ERR_INPUT = -1, /* Old MR is valid, failed via don't fork on new address range */ IBV_REREG_MR_ERR_DONT_FORK_NEW = -2, /* New MR is valid, failed via do fork on old address range */ IBV_REREG_MR_ERR_DO_FORK_OLD = -3, /* MR shouldn't be used, command error */ IBV_REREG_MR_ERR_CMD = -4, /* MR shouldn't be used, command error, invalid fork state on new address range */ IBV_REREG_MR_ERR_CMD_AND_DO_FORK_NEW = -5, }; /** * ibv_rereg_mr - Re-Register a memory region */ int ibv_rereg_mr(struct ibv_mr *mr, int flags, struct ibv_pd *pd, void *addr, size_t length, int access); /** * ibv_dereg_mr - Deregister a memory region */ int ibv_dereg_mr(struct ibv_mr *mr); /** * ibv_alloc_mw - Allocate a memory window */ static inline struct ibv_mw *ibv_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) { struct ibv_mw *mw; if (!pd->context->ops.alloc_mw) { errno = ENOSYS; return NULL; } mw = pd->context->ops.alloc_mw(pd, type); return mw; } /** * ibv_dealloc_mw - Free a memory window */ static inline int ibv_dealloc_mw(struct ibv_mw *mw) { return mw->context->ops.dealloc_mw(mw); } /** * ibv_inc_rkey - Increase the 8 lsb in the given rkey */ static inline uint32_t ibv_inc_rkey(uint32_t rkey) { const uint32_t mask = 0x000000ff; uint8_t newtag = (uint8_t)((rkey + 1) & mask); return (rkey & ~mask) | newtag; } /** * ibv_bind_mw - Bind a memory window to a region */ static inline int ibv_bind_mw(struct ibv_qp *qp, struct ibv_mw *mw, struct ibv_mw_bind *mw_bind) { if (mw->type != IBV_MW_TYPE_1) return EINVAL; return mw->context->ops.bind_mw(qp, mw, mw_bind); } /** * ibv_create_comp_channel - Create a completion event channel */ struct ibv_comp_channel *ibv_create_comp_channel(struct ibv_context *context); /** * ibv_destroy_comp_channel - Destroy a completion event channel */ int ibv_destroy_comp_channel(struct ibv_comp_channel *channel); /** * ibv_create_cq - Create a completion queue * @context - Context CQ will be attached to * @cqe - Minimum number of entries required for CQ * @cq_context - Consumer-supplied context returned for completion events * @channel - Completion channel where completion events will be queued. * May be NULL if completion events will not be used. * @comp_vector - Completion vector used to signal completion events. * Must be >= 0 and < context->num_comp_vectors. */ struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe, void *cq_context, struct ibv_comp_channel *channel, int comp_vector); /** * ibv_create_cq_ex - Create a completion queue * @context - Context CQ will be attached to * @cq_attr - Attributes to create the CQ with */ static inline struct ibv_cq_ex *ibv_create_cq_ex(struct ibv_context *context, struct ibv_cq_init_attr_ex *cq_attr) { struct verbs_context *vctx = verbs_get_ctx_op(context, create_cq_ex); if (!vctx) { errno = ENOSYS; return NULL; } if (cq_attr->comp_mask & ~(IBV_CQ_INIT_ATTR_MASK_RESERVED - 1)) { errno = EINVAL; return NULL; } return vctx->create_cq_ex(context, cq_attr); } /** * ibv_resize_cq - Modifies the capacity of the CQ. * @cq: The CQ to resize. * @cqe: The minimum size of the CQ. * * Users can examine the cq structure to determine the actual CQ size. */ int ibv_resize_cq(struct ibv_cq *cq, int cqe); /** * ibv_destroy_cq - Destroy a completion queue */ int ibv_destroy_cq(struct ibv_cq *cq); /** * ibv_get_cq_event - Read next CQ event * @channel: Channel to get next event from. * @cq: Used to return pointer to CQ. * @cq_context: Used to return consumer-supplied CQ context. * * All completion events returned by ibv_get_cq_event() must * eventually be acknowledged with ibv_ack_cq_events(). */ int ibv_get_cq_event(struct ibv_comp_channel *channel, struct ibv_cq **cq, void **cq_context); /** * ibv_ack_cq_events - Acknowledge CQ completion events * @cq: CQ to acknowledge events for * @nevents: Number of events to acknowledge. * * All completion events which are returned by ibv_get_cq_event() must * be acknowledged. To avoid races, ibv_destroy_cq() will wait for * all completion events to be acknowledged, so there should be a * one-to-one correspondence between acks and successful gets. An * application may accumulate multiple completion events and * acknowledge them in a single call to ibv_ack_cq_events() by passing * the number of events to ack in @nevents. */ void ibv_ack_cq_events(struct ibv_cq *cq, unsigned int nevents); /** * ibv_poll_cq - Poll a CQ for work completions * @cq:the CQ being polled * @num_entries:maximum number of completions to return * @wc:array of at least @num_entries of &struct ibv_wc where completions * will be returned * * Poll a CQ for (possibly multiple) completions. If the return value * is < 0, an error occurred. If the return value is >= 0, it is the * number of completions returned. If the return value is * non-negative and strictly less than num_entries, then the CQ was * emptied. */ static inline int ibv_poll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc) { return cq->context->ops.poll_cq(cq, num_entries, wc); } /** * ibv_req_notify_cq - Request completion notification on a CQ. An * event will be added to the completion channel associated with the * CQ when an entry is added to the CQ. * @cq: The completion queue to request notification for. * @solicited_only: If non-zero, an event will be generated only for * the next solicited CQ entry. If zero, any CQ entry, solicited or * not, will generate an event. */ static inline int ibv_req_notify_cq(struct ibv_cq *cq, int solicited_only) { return cq->context->ops.req_notify_cq(cq, solicited_only); } /** * ibv_create_srq - Creates a SRQ associated with the specified protection * domain. * @pd: The protection domain associated with the SRQ. * @srq_init_attr: A list of initial attributes required to create the SRQ. * * srq_attr->max_wr and srq_attr->max_sge are read the determine the * requested size of the SRQ, and set to the actual values allocated * on return. If ibv_create_srq() succeeds, then max_wr and max_sge * will always be at least as large as the requested values. */ struct ibv_srq *ibv_create_srq(struct ibv_pd *pd, struct ibv_srq_init_attr *srq_init_attr); static inline struct ibv_srq * ibv_create_srq_ex(struct ibv_context *context, struct ibv_srq_init_attr_ex *srq_init_attr_ex) { struct verbs_context *vctx; uint32_t mask = srq_init_attr_ex->comp_mask; if (!(mask & ~(IBV_SRQ_INIT_ATTR_PD | IBV_SRQ_INIT_ATTR_TYPE)) && (mask & IBV_SRQ_INIT_ATTR_PD) && (!(mask & IBV_SRQ_INIT_ATTR_TYPE) || (srq_init_attr_ex->srq_type == IBV_SRQT_BASIC))) return ibv_create_srq(srq_init_attr_ex->pd, (struct ibv_srq_init_attr *)srq_init_attr_ex); vctx = verbs_get_ctx_op(context, create_srq_ex); if (!vctx) { errno = ENOSYS; return NULL; } return vctx->create_srq_ex(context, srq_init_attr_ex); } /** * ibv_modify_srq - Modifies the attributes for the specified SRQ. * @srq: The SRQ to modify. * @srq_attr: On input, specifies the SRQ attributes to modify. On output, * the current values of selected SRQ attributes are returned. * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ * are being modified. * * The mask may contain IBV_SRQ_MAX_WR to resize the SRQ and/or * IBV_SRQ_LIMIT to set the SRQ's limit and request notification when * the number of receives queued drops below the limit. */ int ibv_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr, int srq_attr_mask); /** * ibv_query_srq - Returns the attribute list and current values for the * specified SRQ. * @srq: The SRQ to query. * @srq_attr: The attributes of the specified SRQ. */ int ibv_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr); static inline int ibv_get_srq_num(struct ibv_srq *srq, uint32_t *srq_num) { struct verbs_context *vctx = verbs_get_ctx_op(srq->context, get_srq_num); if (!vctx) return ENOSYS; return vctx->get_srq_num(srq, srq_num); } /** * ibv_destroy_srq - Destroys the specified SRQ. * @srq: The SRQ to destroy. */ int ibv_destroy_srq(struct ibv_srq *srq); /** * ibv_post_srq_recv - Posts a list of work requests to the specified SRQ. * @srq: The SRQ to post the work request on. * @recv_wr: A list of work requests to post on the receive queue. * @bad_recv_wr: On an immediate failure, this parameter will reference * the work request that failed to be posted on the QP. */ static inline int ibv_post_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *recv_wr, struct ibv_recv_wr **bad_recv_wr) { return srq->context->ops.post_srq_recv(srq, recv_wr, bad_recv_wr); } /** * ibv_create_qp - Create a queue pair. */ struct ibv_qp *ibv_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr); static inline struct ibv_qp * ibv_create_qp_ex(struct ibv_context *context, struct ibv_qp_init_attr_ex *qp_init_attr_ex) { struct verbs_context *vctx; uint32_t mask = qp_init_attr_ex->comp_mask; if (mask == IBV_QP_INIT_ATTR_PD) return ibv_create_qp(qp_init_attr_ex->pd, (struct ibv_qp_init_attr *)qp_init_attr_ex); vctx = verbs_get_ctx_op(context, create_qp_ex); if (!vctx) { errno = ENOSYS; return NULL; } return vctx->create_qp_ex(context, qp_init_attr_ex); } /** * ibv_query_rt_values_ex - Get current real time @values of a device. * @values - in/out - defines the attributes we need to query/queried. * (Or's bits of enum ibv_values_mask on values->comp_mask field) */ static inline int ibv_query_rt_values_ex(struct ibv_context *context, struct ibv_values_ex *values) { struct verbs_context *vctx; vctx = verbs_get_ctx_op(context, query_rt_values); if (!vctx) return ENOSYS; if (values->comp_mask & ~(IBV_VALUES_MASK_RESERVED - 1)) return EINVAL; return vctx->query_rt_values(context, values); } /** * ibv_query_device_ex - Get extended device properties */ static inline int ibv_query_device_ex(struct ibv_context *context, const struct ibv_query_device_ex_input *input, struct ibv_device_attr_ex *attr) { struct verbs_context *vctx; int ret; vctx = verbs_get_ctx_op(context, query_device_ex); if (!vctx) goto legacy; ret = vctx->query_device_ex(context, input, attr, sizeof(*attr)); if (ret == ENOSYS) goto legacy; return ret; legacy: memset(attr, 0, sizeof(*attr)); ret = ibv_query_device(context, &attr->orig_attr); return ret; } /** * ibv_open_qp - Open a shareable queue pair. */ static inline struct ibv_qp * ibv_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *qp_open_attr) { struct verbs_context *vctx = verbs_get_ctx_op(context, open_qp); if (!vctx) { errno = ENOSYS; return NULL; } return vctx->open_qp(context, qp_open_attr); } /** * ibv_modify_qp - Modify a queue pair. */ int ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask); /** * ibv_query_qp - Returns the attribute list and current values for the * specified QP. * @qp: The QP to query. * @attr: The attributes of the specified QP. * @attr_mask: A bit-mask used to select specific attributes to query. * @init_attr: Additional attributes of the selected QP. * * The qp_attr_mask may be used to limit the query to gathering only the * selected attributes. */ int ibv_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, struct ibv_qp_init_attr *init_attr); /** * ibv_destroy_qp - Destroy a queue pair. */ int ibv_destroy_qp(struct ibv_qp *qp); /* * ibv_create_wq - Creates a WQ associated with the specified protection * domain. * @context: ibv_context. * @wq_init_attr: A list of initial attributes required to create the * WQ. If WQ creation succeeds, then the attributes are updated to * the actual capabilities of the created WQ. * * wq_init_attr->max_wr and wq_init_attr->max_sge determine * the requested size of the WQ, and set to the actual values allocated * on return. * If ibv_create_wq() succeeds, then max_wr and max_sge will always be * at least as large as the requested values. * * Return Value * ibv_create_wq() returns a pointer to the created WQ, or NULL if the request * fails. */ static inline struct ibv_wq *ibv_create_wq(struct ibv_context *context, struct ibv_wq_init_attr *wq_init_attr) { struct verbs_context *vctx = verbs_get_ctx_op(context, create_wq); struct ibv_wq *wq; if (!vctx) { errno = ENOSYS; return NULL; } wq = vctx->create_wq(context, wq_init_attr); if (wq) { wq->events_completed = 0; pthread_mutex_init(&wq->mutex, NULL); pthread_cond_init(&wq->cond, NULL); } return wq; } /* * ibv_modify_wq - Modifies the attributes for the specified WQ. * @wq: The WQ to modify. * @wq_attr: On input, specifies the WQ attributes to modify. * wq_attr->attr_mask: A bit-mask used to specify which attributes of the WQ * are being modified. * On output, the current values of selected WQ attributes are returned. * * Return Value * ibv_modify_wq() returns 0 on success, or the value of errno * on failure (which indicates the failure reason). * */ static inline int ibv_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *wq_attr) { struct verbs_context *vctx = verbs_get_ctx_op(wq->context, modify_wq); if (!vctx) return ENOSYS; return vctx->modify_wq(wq, wq_attr); } /* * ibv_destroy_wq - Destroys the specified WQ. * @ibv_wq: The WQ to destroy. * Return Value * ibv_destroy_wq() returns 0 on success, or the value of errno * on failure (which indicates the failure reason). */ static inline int ibv_destroy_wq(struct ibv_wq *wq) { struct verbs_context *vctx; vctx = verbs_get_ctx_op(wq->context, destroy_wq); if (!vctx) return ENOSYS; return vctx->destroy_wq(wq); } /* * ibv_create_rwq_ind_table - Creates a receive work queue Indirection Table * @context: ibv_context. * @init_attr: A list of initial attributes required to create the Indirection Table. * Return Value * ibv_create_rwq_ind_table returns a pointer to the created * Indirection Table, or NULL if the request fails. */ static inline struct ibv_rwq_ind_table *ibv_create_rwq_ind_table(struct ibv_context *context, struct ibv_rwq_ind_table_init_attr *init_attr) { struct verbs_context *vctx; vctx = verbs_get_ctx_op(context, create_rwq_ind_table); if (!vctx) { errno = ENOSYS; return NULL; } return vctx->create_rwq_ind_table(context, init_attr); } /* * ibv_destroy_rwq_ind_table - Destroys the specified Indirection Table. * @rwq_ind_table: The Indirection Table to destroy. * Return Value * ibv_destroy_rwq_ind_table() returns 0 on success, or the value of errno * on failure (which indicates the failure reason). */ static inline int ibv_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table) { struct verbs_context *vctx; vctx = verbs_get_ctx_op(rwq_ind_table->context, destroy_rwq_ind_table); if (!vctx) return ENOSYS; return vctx->destroy_rwq_ind_table(rwq_ind_table); } /** * ibv_post_send - Post a list of work requests to a send queue. * * If IBV_SEND_INLINE flag is set, the data buffers can be reused * immediately after the call returns. */ static inline int ibv_post_send(struct ibv_qp *qp, struct ibv_send_wr *wr, struct ibv_send_wr **bad_wr) { return qp->context->ops.post_send(qp, wr, bad_wr); } /** * ibv_post_recv - Post a list of work requests to a receive queue. */ static inline int ibv_post_recv(struct ibv_qp *qp, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr) { return qp->context->ops.post_recv(qp, wr, bad_wr); } /** * ibv_create_ah - Create an address handle. */ struct ibv_ah *ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr); /** * ibv_init_ah_from_wc - Initializes address handle attributes from a * work completion. * @context: Device context on which the received message arrived. * @port_num: Port on which the received message arrived. * @wc: Work completion associated with the received message. * @grh: References the received global route header. This parameter is * ignored unless the work completion indicates that the GRH is valid. * @ah_attr: Returned attributes that can be used when creating an address * handle for replying to the message. */ int ibv_init_ah_from_wc(struct ibv_context *context, uint8_t port_num, struct ibv_wc *wc, struct ibv_grh *grh, struct ibv_ah_attr *ah_attr); /** * ibv_create_ah_from_wc - Creates an address handle associated with the * sender of the specified work completion. * @pd: The protection domain associated with the address handle. * @wc: Work completion information associated with a received message. * @grh: References the received global route header. This parameter is * ignored unless the work completion indicates that the GRH is valid. * @port_num: The outbound port number to associate with the address. * * The address handle is used to reference a local or global destination * in all UD QP post sends. */ struct ibv_ah *ibv_create_ah_from_wc(struct ibv_pd *pd, struct ibv_wc *wc, struct ibv_grh *grh, uint8_t port_num); /** * ibv_destroy_ah - Destroy an address handle. */ int ibv_destroy_ah(struct ibv_ah *ah); /** * ibv_attach_mcast - Attaches the specified QP to a multicast group. * @qp: QP to attach to the multicast group. The QP must be a UD QP. * @gid: Multicast group GID. * @lid: Multicast group LID in host byte order. * * In order to route multicast packets correctly, subnet * administration must have created the multicast group and configured * the fabric appropriately. The port associated with the specified * QP must also be a member of the multicast group. */ int ibv_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); /** * ibv_detach_mcast - Detaches the specified QP from a multicast group. * @qp: QP to detach from the multicast group. * @gid: Multicast group GID. * @lid: Multicast group LID in host byte order. */ int ibv_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); /** * ibv_fork_init - Prepare data structures so that fork() may be used * safely. If this function is not called or returns a non-zero * status, then libibverbs data structures are not fork()-safe and the * effect of an application calling fork() is undefined. */ int ibv_fork_init(void); /** * ibv_node_type_str - Return string describing node_type enum value */ const char *ibv_node_type_str(enum ibv_node_type node_type); /** * ibv_port_state_str - Return string describing port_state enum value */ const char *ibv_port_state_str(enum ibv_port_state port_state); /** * ibv_event_type_str - Return string describing event_type enum value */ const char *ibv_event_type_str(enum ibv_event_type event); #define ETHERNET_LL_SIZE 6 int ibv_resolve_eth_l2_from_gid(struct ibv_context *context, struct ibv_ah_attr *attr, uint8_t eth_mac[ETHERNET_LL_SIZE], uint16_t *vid); static inline int ibv_is_qpt_supported(uint32_t caps, enum ibv_qp_type qpt) { return !!(caps & (1 << qpt)); } END_C_DECLS # undef __attribute_const #endif /* INFINIBAND_VERBS_H */