Index: head/contrib/ofed/libibverbs/examples/rc_pingpong.c =================================================================== --- head/contrib/ofed/libibverbs/examples/rc_pingpong.c (revision 363220) +++ head/contrib/ofed/libibverbs/examples/rc_pingpong.c (revision 363221) @@ -1,1045 +1,1045 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pingpong.h" #include enum { PINGPONG_RECV_WRID = 1, PINGPONG_SEND_WRID = 2, }; static int page_size; static int use_odp; static int use_ts; struct pingpong_context { struct ibv_context *context; struct ibv_comp_channel *channel; struct ibv_pd *pd; struct ibv_mr *mr; union { struct ibv_cq *cq; struct ibv_cq_ex *cq_ex; } cq_s; struct ibv_qp *qp; void *buf; int size; int send_flags; int rx_depth; int pending; struct ibv_port_attr portinfo; uint64_t completion_timestamp_mask; }; static struct ibv_cq *pp_cq(struct pingpong_context *ctx) { return use_ts ? ibv_cq_ex_to_cq(ctx->cq_s.cq_ex) : ctx->cq_s.cq; } struct pingpong_dest { int lid; int qpn; int psn; union ibv_gid gid; }; static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, enum ibv_mtu mtu, int sl, struct pingpong_dest *dest, int sgid_idx) { struct ibv_qp_attr attr = { .qp_state = IBV_QPS_RTR, .path_mtu = mtu, .dest_qp_num = dest->qpn, .rq_psn = dest->psn, .max_dest_rd_atomic = 1, .min_rnr_timer = 12, .ah_attr = { .is_global = 0, .dlid = dest->lid, .sl = sl, .src_path_bits = 0, .port_num = port } }; if (dest->gid.global.interface_id) { attr.ah_attr.is_global = 1; attr.ah_attr.grh.hop_limit = 1; attr.ah_attr.grh.dgid = dest->gid; attr.ah_attr.grh.sgid_index = sgid_idx; } if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER)) { fprintf(stderr, "Failed to modify QP to RTR\n"); return 1; } attr.qp_state = IBV_QPS_RTS; attr.timeout = 14; attr.retry_cnt = 7; attr.rnr_retry = 7; attr.sq_psn = my_psn; attr.max_rd_atomic = 1; if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC)) { fprintf(stderr, "Failed to modify QP to RTS\n"); return 1; } return 0; } static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port, const struct pingpong_dest *my_dest) { struct addrinfo *res, *t; struct addrinfo hints = { .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; char *service; char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; int n; int sockfd = -1; struct pingpong_dest *rem_dest = NULL; char gid[33]; if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port); free(service); return NULL; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); return NULL; } gid_to_wire_gid(&my_dest->gid, gid); sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, my_dest->psn, gid); if (write(sockfd, msg, sizeof msg) != sizeof msg) { fprintf(stderr, "Couldn't send local address\n"); goto out; } if (read(sockfd, msg, sizeof msg) != sizeof msg || write(sockfd, "done", sizeof "done") != sizeof "done") { perror("client read/write"); fprintf(stderr, "Couldn't read/write remote address\n"); goto out; } rem_dest = malloc(sizeof *rem_dest); if (!rem_dest) goto out; sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn, gid); wire_gid_to_gid(gid, &rem_dest->gid); out: close(sockfd); return rem_dest; } static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx, int ib_port, enum ibv_mtu mtu, int port, int sl, const struct pingpong_dest *my_dest, int sgid_idx) { struct addrinfo *res, *t; struct addrinfo hints = { .ai_flags = AI_PASSIVE, .ai_family = AF_INET, .ai_socktype = SOCK_STREAM }; char *service; char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; int n; int sockfd = -1, connfd; struct pingpong_dest *rem_dest = NULL; char gid[33]; if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { fprintf(stderr, "%s for port %d\n", gai_strerror(n), port); free(service); return NULL; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { n = 1; setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't listen to port %d\n", port); return NULL; } - if (listen(sockfd, 1)) { + if (listen(sockfd, 1) < 0) { perror("listen() failed"); close(sockfd); return NULL; } connfd = accept(sockfd, NULL, NULL); close(sockfd); if (connfd < 0) { fprintf(stderr, "accept() failed\n"); return NULL; } n = read(connfd, msg, sizeof msg); if (n != sizeof msg) { perror("server read"); fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg); goto out; } rem_dest = malloc(sizeof *rem_dest); if (!rem_dest) goto out; sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn, gid); wire_gid_to_gid(gid, &rem_dest->gid); if (pp_connect_ctx(ctx, ib_port, my_dest->psn, mtu, sl, rem_dest, sgid_idx)) { fprintf(stderr, "Couldn't connect to remote QP\n"); free(rem_dest); rem_dest = NULL; goto out; } gid_to_wire_gid(&my_dest->gid, gid); sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, my_dest->psn, gid); if (write(connfd, msg, sizeof msg) != sizeof msg || read(connfd, msg, sizeof msg) != sizeof "done") { fprintf(stderr, "Couldn't send/recv local address\n"); free(rem_dest); rem_dest = NULL; goto out; } out: close(connfd); return rem_dest; } static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, int rx_depth, int port, int use_event) { struct pingpong_context *ctx; int access_flags = IBV_ACCESS_LOCAL_WRITE; ctx = calloc(1, sizeof *ctx); if (!ctx) return NULL; ctx->size = size; ctx->send_flags = IBV_SEND_SIGNALED; ctx->rx_depth = rx_depth; ctx->buf = memalign(page_size, size); if (!ctx->buf) { fprintf(stderr, "Couldn't allocate work buf.\n"); goto clean_ctx; } /* FIXME memset(ctx->buf, 0, size); */ memset(ctx->buf, 0x7b, size); ctx->context = ibv_open_device(ib_dev); if (!ctx->context) { fprintf(stderr, "Couldn't get context for %s\n", ibv_get_device_name(ib_dev)); goto clean_buffer; } if (use_event) { ctx->channel = ibv_create_comp_channel(ctx->context); if (!ctx->channel) { fprintf(stderr, "Couldn't create completion channel\n"); goto clean_device; } } else ctx->channel = NULL; ctx->pd = ibv_alloc_pd(ctx->context); if (!ctx->pd) { fprintf(stderr, "Couldn't allocate PD\n"); goto clean_comp_channel; } if (use_odp || use_ts) { const uint32_t rc_caps_mask = IBV_ODP_SUPPORT_SEND | IBV_ODP_SUPPORT_RECV; struct ibv_device_attr_ex attrx; if (ibv_query_device_ex(ctx->context, NULL, &attrx)) { fprintf(stderr, "Couldn't query device for its features\n"); goto clean_comp_channel; } if (use_odp) { if (!(attrx.odp_caps.general_caps & IBV_ODP_SUPPORT) || (attrx.odp_caps.per_transport_caps.rc_odp_caps & rc_caps_mask) != rc_caps_mask) { fprintf(stderr, "The device isn't ODP capable or does not support RC send and receive with ODP\n"); goto clean_comp_channel; } access_flags |= IBV_ACCESS_ON_DEMAND; } if (use_ts) { if (!attrx.completion_timestamp_mask) { fprintf(stderr, "The device isn't completion timestamp capable\n"); goto clean_comp_channel; } ctx->completion_timestamp_mask = attrx.completion_timestamp_mask; } } ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, access_flags); if (!ctx->mr) { fprintf(stderr, "Couldn't register MR\n"); goto clean_pd; } if (use_ts) { struct ibv_cq_init_attr_ex attr_ex = { .cqe = rx_depth + 1, .cq_context = NULL, .channel = ctx->channel, .comp_vector = 0, .wc_flags = IBV_WC_EX_WITH_COMPLETION_TIMESTAMP }; ctx->cq_s.cq_ex = ibv_create_cq_ex(ctx->context, &attr_ex); } else { ctx->cq_s.cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL, ctx->channel, 0); } if (!pp_cq(ctx)) { fprintf(stderr, "Couldn't create CQ\n"); goto clean_mr; } { struct ibv_qp_attr attr; struct ibv_qp_init_attr init_attr = { .send_cq = pp_cq(ctx), .recv_cq = pp_cq(ctx), .cap = { .max_send_wr = 1, .max_recv_wr = rx_depth, .max_send_sge = 1, .max_recv_sge = 1 }, .qp_type = IBV_QPT_RC }; ctx->qp = ibv_create_qp(ctx->pd, &init_attr); if (!ctx->qp) { fprintf(stderr, "Couldn't create QP\n"); goto clean_cq; } ibv_query_qp(ctx->qp, &attr, IBV_QP_CAP, &init_attr); if (init_attr.cap.max_inline_data >= size) { ctx->send_flags |= IBV_SEND_INLINE; } } { struct ibv_qp_attr attr = { .qp_state = IBV_QPS_INIT, .pkey_index = 0, .port_num = port, .qp_access_flags = 0 }; if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { fprintf(stderr, "Failed to modify QP to INIT\n"); goto clean_qp; } } return ctx; clean_qp: ibv_destroy_qp(ctx->qp); clean_cq: ibv_destroy_cq(pp_cq(ctx)); clean_mr: ibv_dereg_mr(ctx->mr); clean_pd: ibv_dealloc_pd(ctx->pd); clean_comp_channel: if (ctx->channel) ibv_destroy_comp_channel(ctx->channel); clean_device: ibv_close_device(ctx->context); clean_buffer: free(ctx->buf); clean_ctx: free(ctx); return NULL; } static int pp_close_ctx(struct pingpong_context *ctx) { if (ibv_destroy_qp(ctx->qp)) { fprintf(stderr, "Couldn't destroy QP\n"); return 1; } if (ibv_destroy_cq(pp_cq(ctx))) { fprintf(stderr, "Couldn't destroy CQ\n"); return 1; } if (ibv_dereg_mr(ctx->mr)) { fprintf(stderr, "Couldn't deregister MR\n"); return 1; } if (ibv_dealloc_pd(ctx->pd)) { fprintf(stderr, "Couldn't deallocate PD\n"); return 1; } if (ctx->channel) { if (ibv_destroy_comp_channel(ctx->channel)) { fprintf(stderr, "Couldn't destroy completion channel\n"); return 1; } } if (ibv_close_device(ctx->context)) { fprintf(stderr, "Couldn't release context\n"); return 1; } free(ctx->buf); free(ctx); return 0; } static int pp_post_recv(struct pingpong_context *ctx, int n) { struct ibv_sge list = { .addr = (uintptr_t) ctx->buf, .length = ctx->size, .lkey = ctx->mr->lkey }; struct ibv_recv_wr wr = { .wr_id = PINGPONG_RECV_WRID, .sg_list = &list, .num_sge = 1, }; struct ibv_recv_wr *bad_wr; int i; for (i = 0; i < n; ++i) if (ibv_post_recv(ctx->qp, &wr, &bad_wr)) break; return i; } static int pp_post_send(struct pingpong_context *ctx) { struct ibv_sge list = { .addr = (uintptr_t) ctx->buf, .length = ctx->size, .lkey = ctx->mr->lkey }; struct ibv_send_wr wr = { .wr_id = PINGPONG_SEND_WRID, .sg_list = &list, .num_sge = 1, .opcode = IBV_WR_SEND, .send_flags = ctx->send_flags, }; struct ibv_send_wr *bad_wr; return ibv_post_send(ctx->qp, &wr, &bad_wr); } struct ts_params { uint64_t comp_recv_max_time_delta; uint64_t comp_recv_min_time_delta; uint64_t comp_recv_total_time_delta; uint64_t comp_recv_prev_time; int last_comp_with_ts; unsigned int comp_with_time_iters; }; static inline int parse_single_wc(struct pingpong_context *ctx, int *scnt, int *rcnt, int *routs, int iters, uint64_t wr_id, enum ibv_wc_status status, uint64_t completion_timestamp, struct ts_params *ts) { if (status != IBV_WC_SUCCESS) { fprintf(stderr, "Failed status %s (%d) for wr_id %d\n", ibv_wc_status_str(status), status, (int)wr_id); return 1; } switch ((int)wr_id) { case PINGPONG_SEND_WRID: ++(*scnt); break; case PINGPONG_RECV_WRID: if (--(*routs) <= 1) { *routs += pp_post_recv(ctx, ctx->rx_depth - *routs); if (*routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", *routs); return 1; } } ++(*rcnt); if (use_ts) { if (ts->last_comp_with_ts) { uint64_t delta; /* checking whether the clock was wrapped around */ if (completion_timestamp >= ts->comp_recv_prev_time) delta = completion_timestamp - ts->comp_recv_prev_time; else delta = ctx->completion_timestamp_mask - ts->comp_recv_prev_time + completion_timestamp + 1; ts->comp_recv_max_time_delta = MAX(ts->comp_recv_max_time_delta, delta); ts->comp_recv_min_time_delta = MIN(ts->comp_recv_min_time_delta, delta); ts->comp_recv_total_time_delta += delta; ts->comp_with_time_iters++; } ts->comp_recv_prev_time = completion_timestamp; ts->last_comp_with_ts = 1; } else { ts->last_comp_with_ts = 0; } break; default: fprintf(stderr, "Completion for unknown wr_id %d\n", (int)wr_id); return 1; } ctx->pending &= ~(int)wr_id; if (*scnt < iters && !ctx->pending) { if (pp_post_send(ctx)) { fprintf(stderr, "Couldn't post send\n"); return 1; } ctx->pending = PINGPONG_RECV_WRID | PINGPONG_SEND_WRID; } return 0; } static void usage(const char *argv0) { printf("Usage:\n"); printf(" %s start a server and wait for connection\n", argv0); printf(" %s connect to server at \n", argv0); printf("\n"); printf("Options:\n"); printf(" -p, --port= listen on/connect to port (default 18515)\n"); printf(" -d, --ib-dev= use IB device (default first device found)\n"); printf(" -i, --ib-port= use port of IB device (default 1)\n"); printf(" -s, --size= size of message to exchange (default 4096)\n"); printf(" -m, --mtu= path MTU (default 1024)\n"); printf(" -r, --rx-depth= number of receives to post at a time (default 500)\n"); printf(" -n, --iters= number of exchanges (default 1000)\n"); printf(" -l, --sl= service level value\n"); printf(" -e, --events sleep on CQ events (default poll)\n"); printf(" -g, --gid-idx= local port gid index\n"); printf(" -o, --odp use on demand paging\n"); printf(" -t, --ts get CQE with timestamp\n"); } int main(int argc, char *argv[]) { struct ibv_device **dev_list; struct ibv_device *ib_dev; struct pingpong_context *ctx; struct pingpong_dest my_dest; struct pingpong_dest *rem_dest; struct timeval start, end; char *ib_devname = NULL; char *servername = NULL; unsigned int port = 18515; int ib_port = 1; unsigned int size = 4096; enum ibv_mtu mtu = IBV_MTU_1024; unsigned int rx_depth = 500; unsigned int iters = 1000; int use_event = 0; int routs; int rcnt, scnt; int num_cq_events = 0; int sl = 0; int gidx = -1; char gid[33]; struct ts_params ts; srand48(getpid() * time(NULL)); while (1) { int c; static struct option long_options[] = { { .name = "port", .has_arg = 1, .val = 'p' }, { .name = "ib-dev", .has_arg = 1, .val = 'd' }, { .name = "ib-port", .has_arg = 1, .val = 'i' }, { .name = "size", .has_arg = 1, .val = 's' }, { .name = "mtu", .has_arg = 1, .val = 'm' }, { .name = "rx-depth", .has_arg = 1, .val = 'r' }, { .name = "iters", .has_arg = 1, .val = 'n' }, { .name = "sl", .has_arg = 1, .val = 'l' }, { .name = "events", .has_arg = 0, .val = 'e' }, { .name = "gid-idx", .has_arg = 1, .val = 'g' }, { .name = "odp", .has_arg = 0, .val = 'o' }, { .name = "ts", .has_arg = 0, .val = 't' }, {} }; c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:ot", long_options, NULL); if (c == -1) break; switch (c) { case 'p': port = strtoul(optarg, NULL, 0); if (port > 65535) { usage(argv[0]); return 1; } break; case 'd': ib_devname = strdupa(optarg); break; case 'i': ib_port = strtol(optarg, NULL, 0); if (ib_port < 1) { usage(argv[0]); return 1; } break; case 's': size = strtoul(optarg, NULL, 0); break; case 'm': mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0)); if (mtu == 0) { usage(argv[0]); return 1; } break; case 'r': rx_depth = strtoul(optarg, NULL, 0); break; case 'n': iters = strtoul(optarg, NULL, 0); break; case 'l': sl = strtol(optarg, NULL, 0); break; case 'e': ++use_event; break; case 'g': gidx = strtol(optarg, NULL, 0); break; case 'o': use_odp = 1; break; case 't': use_ts = 1; break; default: usage(argv[0]); return 1; } } if (optind == argc - 1) servername = strdupa(argv[optind]); else if (optind < argc) { usage(argv[0]); return 1; } if (use_ts) { ts.comp_recv_max_time_delta = 0; ts.comp_recv_min_time_delta = 0xffffffff; ts.comp_recv_total_time_delta = 0; ts.comp_recv_prev_time = 0; ts.last_comp_with_ts = 0; ts.comp_with_time_iters = 0; } page_size = sysconf(_SC_PAGESIZE); dev_list = ibv_get_device_list(NULL); if (!dev_list) { perror("Failed to get IB devices list"); return 1; } if (!ib_devname) { ib_dev = *dev_list; if (!ib_dev) { fprintf(stderr, "No IB devices found\n"); return 1; } } else { int i; for (i = 0; dev_list[i]; ++i) if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname)) break; ib_dev = dev_list[i]; if (!ib_dev) { fprintf(stderr, "IB device %s not found\n", ib_devname); return 1; } } ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event); if (!ctx) return 1; routs = pp_post_recv(ctx, ctx->rx_depth); if (routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", routs); return 1; } if (use_event) if (ibv_req_notify_cq(pp_cq(ctx), 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) { fprintf(stderr, "Couldn't get port info\n"); return 1; } my_dest.lid = ctx->portinfo.lid; if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET && !my_dest.lid) { fprintf(stderr, "Couldn't get local LID\n"); return 1; } if (gidx >= 0) { if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest.gid)) { fprintf(stderr, "can't read sgid of index %d\n", gidx); return 1; } } else memset(&my_dest.gid, 0, sizeof my_dest.gid); my_dest.qpn = ctx->qp->qp_num; my_dest.psn = lrand48() & 0xffffff; inet_ntop(AF_INET6, &my_dest.gid, gid, sizeof gid); printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n", my_dest.lid, my_dest.qpn, my_dest.psn, gid); if (servername) rem_dest = pp_client_exch_dest(servername, port, &my_dest); else rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl, &my_dest, gidx); if (!rem_dest) return 1; inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid); printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n", rem_dest->lid, rem_dest->qpn, rem_dest->psn, gid); if (servername) if (pp_connect_ctx(ctx, ib_port, my_dest.psn, mtu, sl, rem_dest, gidx)) return 1; ctx->pending = PINGPONG_RECV_WRID; if (servername) { if (pp_post_send(ctx)) { fprintf(stderr, "Couldn't post send\n"); return 1; } ctx->pending |= PINGPONG_SEND_WRID; } if (gettimeofday(&start, NULL)) { perror("gettimeofday"); return 1; } rcnt = scnt = 0; while (rcnt < iters || scnt < iters) { int ret; if (use_event) { struct ibv_cq *ev_cq; void *ev_ctx; if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) { fprintf(stderr, "Failed to get cq_event\n"); return 1; } ++num_cq_events; if (ev_cq != pp_cq(ctx)) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; } if (ibv_req_notify_cq(pp_cq(ctx), 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } } if (use_ts) { struct ibv_poll_cq_attr attr = {}; do { ret = ibv_start_poll(ctx->cq_s.cq_ex, &attr); } while (!use_event && ret == ENOENT); if (ret) { fprintf(stderr, "poll CQ failed %d\n", ret); return ret; } ret = parse_single_wc(ctx, &scnt, &rcnt, &routs, iters, ctx->cq_s.cq_ex->wr_id, ctx->cq_s.cq_ex->status, ibv_wc_read_completion_ts(ctx->cq_s.cq_ex), &ts); if (ret) { ibv_end_poll(ctx->cq_s.cq_ex); return ret; } ret = ibv_next_poll(ctx->cq_s.cq_ex); if (!ret) ret = parse_single_wc(ctx, &scnt, &rcnt, &routs, iters, ctx->cq_s.cq_ex->wr_id, ctx->cq_s.cq_ex->status, ibv_wc_read_completion_ts(ctx->cq_s.cq_ex), &ts); ibv_end_poll(ctx->cq_s.cq_ex); if (ret && ret != ENOENT) { fprintf(stderr, "poll CQ failed %d\n", ret); return ret; } } else { int ne, i; struct ibv_wc wc[2]; do { ne = ibv_poll_cq(pp_cq(ctx), 2, wc); if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return 1; } } while (!use_event && ne < 1); for (i = 0; i < ne; ++i) { ret = parse_single_wc(ctx, &scnt, &rcnt, &routs, iters, wc[i].wr_id, wc[i].status, 0, &ts); if (ret) { fprintf(stderr, "parse WC failed %d\n", ne); return 1; } } } } if (gettimeofday(&end, NULL)) { perror("gettimeofday"); return 1; } { float usec = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); long long bytes = (long long) size * iters * 2; printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", bytes, usec / 1000000., bytes * 8. / usec); printf("%d iters in %.2f seconds = %.2f usec/iter\n", iters, usec / 1000000., usec / iters); if (use_ts && ts.comp_with_time_iters) { printf("Max receive completion clock cycles = %" PRIu64 "\n", ts.comp_recv_max_time_delta); printf("Min receive completion clock cycles = %" PRIu64 "\n", ts.comp_recv_min_time_delta); printf("Average receive completion clock cycles = %f\n", (double)ts.comp_recv_total_time_delta / ts.comp_with_time_iters); } } ibv_ack_cq_events(pp_cq(ctx), num_cq_events); if (pp_close_ctx(ctx)) return 1; ibv_free_device_list(dev_list); free(rem_dest); return 0; } Index: head/contrib/ofed/libibverbs/examples/srq_pingpong.c =================================================================== --- head/contrib/ofed/libibverbs/examples/srq_pingpong.c (revision 363220) +++ head/contrib/ofed/libibverbs/examples/srq_pingpong.c (revision 363221) @@ -1,989 +1,989 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pingpong.h" enum { PINGPONG_RECV_WRID = 1, PINGPONG_SEND_WRID = 2, MAX_QP = 256, }; static int page_size; struct pingpong_context { struct ibv_context *context; struct ibv_comp_channel *channel; struct ibv_pd *pd; struct ibv_mr *mr; struct ibv_cq *cq; struct ibv_srq *srq; struct ibv_qp *qp[MAX_QP]; void *buf; int size; int send_flags; int num_qp; int rx_depth; int pending[MAX_QP]; struct ibv_port_attr portinfo; }; struct pingpong_dest { int lid; int qpn; int psn; union ibv_gid gid; }; static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu mtu, int sl, const struct pingpong_dest *my_dest, const struct pingpong_dest *dest, int sgid_idx) { int i; for (i = 0; i < ctx->num_qp; ++i) { struct ibv_qp_attr attr = { .qp_state = IBV_QPS_RTR, .path_mtu = mtu, .dest_qp_num = dest[i].qpn, .rq_psn = dest[i].psn, .max_dest_rd_atomic = 1, .min_rnr_timer = 12, .ah_attr = { .is_global = 0, .dlid = dest[i].lid, .sl = sl, .src_path_bits = 0, .port_num = port } }; if (dest->gid.global.interface_id) { attr.ah_attr.is_global = 1; attr.ah_attr.grh.hop_limit = 1; attr.ah_attr.grh.dgid = dest->gid; attr.ah_attr.grh.sgid_index = sgid_idx; } if (ibv_modify_qp(ctx->qp[i], &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER)) { fprintf(stderr, "Failed to modify QP[%d] to RTR\n", i); return 1; } attr.qp_state = IBV_QPS_RTS; attr.timeout = 14; attr.retry_cnt = 7; attr.rnr_retry = 7; attr.sq_psn = my_dest[i].psn; attr.max_rd_atomic = 1; if (ibv_modify_qp(ctx->qp[i], &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC)) { fprintf(stderr, "Failed to modify QP[%d] to RTS\n", i); return 1; } } return 0; } static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port, const struct pingpong_dest *my_dest) { struct addrinfo *res, *t; struct addrinfo hints = { .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; char *service; char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; int n; int r; int i; int sockfd = -1; struct pingpong_dest *rem_dest = NULL; char gid[33]; if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port); free(service); return NULL; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); return NULL; } for (i = 0; i < MAX_QP; ++i) { gid_to_wire_gid(&my_dest[i].gid, gid); sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn, gid); if (write(sockfd, msg, sizeof msg) != sizeof msg) { fprintf(stderr, "Couldn't send local address\n"); goto out; } } rem_dest = malloc(MAX_QP * sizeof *rem_dest); if (!rem_dest) goto out; for (i = 0; i < MAX_QP; ++i) { n = 0; while (n < sizeof msg) { r = read(sockfd, msg + n, sizeof msg - n); if (r < 0) { perror("client read"); fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n", n, (int) sizeof msg, i); goto out; } n += r; } sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn, &rem_dest[i].psn, gid); wire_gid_to_gid(gid, &rem_dest[i].gid); } if (write(sockfd, "done", sizeof "done") != sizeof "done") { perror("client write"); goto out; } out: close(sockfd); return rem_dest; } static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx, int ib_port, enum ibv_mtu mtu, int port, int sl, const struct pingpong_dest *my_dest, int sgid_idx) { struct addrinfo *res, *t; struct addrinfo hints = { .ai_flags = AI_PASSIVE, .ai_family = AF_INET, .ai_socktype = SOCK_STREAM }; char *service; char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; int n; int r; int i; int sockfd = -1, connfd; struct pingpong_dest *rem_dest = NULL; char gid[33]; if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { fprintf(stderr, "%s for port %d\n", gai_strerror(n), port); free(service); return NULL; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { n = 1; setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't listen to port %d\n", port); return NULL; } - if (listen(sockfd, 1)) { + if (listen(sockfd, 1) < 0) { perror("listen() failed"); close(sockfd); return NULL; } connfd = accept(sockfd, NULL, NULL); close(sockfd); if (connfd < 0) { fprintf(stderr, "accept() failed\n"); return NULL; } rem_dest = malloc(MAX_QP * sizeof *rem_dest); if (!rem_dest) goto out; for (i = 0; i < MAX_QP; ++i) { n = 0; while (n < sizeof msg) { r = read(connfd, msg + n, sizeof msg - n); if (r < 0) { perror("server read"); fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n", n, (int) sizeof msg, i); goto out; } n += r; } sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn, &rem_dest[i].psn, gid); wire_gid_to_gid(gid, &rem_dest[i].gid); } if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest, sgid_idx)) { fprintf(stderr, "Couldn't connect to remote QP\n"); free(rem_dest); rem_dest = NULL; goto out; } for (i = 0; i < MAX_QP; ++i) { gid_to_wire_gid(&my_dest[i].gid, gid); sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn, gid); if (write(connfd, msg, sizeof msg) != sizeof msg) { fprintf(stderr, "Couldn't send local address\n"); free(rem_dest); rem_dest = NULL; goto out; } } if (read(connfd, msg, sizeof msg) != sizeof "done") { perror("client write"); free(rem_dest); rem_dest = NULL; goto out; } out: close(connfd); return rem_dest; } static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, int num_qp, int rx_depth, int port, int use_event) { struct pingpong_context *ctx; int i; ctx = calloc(1, sizeof *ctx); if (!ctx) return NULL; ctx->size = size; ctx->send_flags = IBV_SEND_SIGNALED; ctx->num_qp = num_qp; ctx->rx_depth = rx_depth; ctx->buf = memalign(page_size, size); if (!ctx->buf) { fprintf(stderr, "Couldn't allocate work buf.\n"); goto clean_ctx; } memset(ctx->buf, 0, size); ctx->context = ibv_open_device(ib_dev); if (!ctx->context) { fprintf(stderr, "Couldn't get context for %s\n", ibv_get_device_name(ib_dev)); goto clean_buffer; } if (use_event) { ctx->channel = ibv_create_comp_channel(ctx->context); if (!ctx->channel) { fprintf(stderr, "Couldn't create completion channel\n"); goto clean_device; } } else ctx->channel = NULL; ctx->pd = ibv_alloc_pd(ctx->context); if (!ctx->pd) { fprintf(stderr, "Couldn't allocate PD\n"); goto clean_comp_channel; } ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE); if (!ctx->mr) { fprintf(stderr, "Couldn't register MR\n"); goto clean_pd; } ctx->cq = ibv_create_cq(ctx->context, rx_depth + num_qp, NULL, ctx->channel, 0); if (!ctx->cq) { fprintf(stderr, "Couldn't create CQ\n"); goto clean_mr; } { struct ibv_srq_init_attr attr = { .attr = { .max_wr = rx_depth, .max_sge = 1 } }; ctx->srq = ibv_create_srq(ctx->pd, &attr); if (!ctx->srq) { fprintf(stderr, "Couldn't create SRQ\n"); goto clean_cq; } } for (i = 0; i < num_qp; ++i) { struct ibv_qp_attr attr; struct ibv_qp_init_attr init_attr = { .send_cq = ctx->cq, .recv_cq = ctx->cq, .srq = ctx->srq, .cap = { .max_send_wr = 1, .max_send_sge = 1, }, .qp_type = IBV_QPT_RC }; ctx->qp[i] = ibv_create_qp(ctx->pd, &init_attr); if (!ctx->qp[i]) { fprintf(stderr, "Couldn't create QP[%d]\n", i); goto clean_qps; } ibv_query_qp(ctx->qp[i], &attr, IBV_QP_CAP, &init_attr); if (init_attr.cap.max_inline_data >= size) { ctx->send_flags |= IBV_SEND_INLINE; } } for (i = 0; i < num_qp; ++i) { struct ibv_qp_attr attr = { .qp_state = IBV_QPS_INIT, .pkey_index = 0, .port_num = port, .qp_access_flags = 0 }; if (ibv_modify_qp(ctx->qp[i], &attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { fprintf(stderr, "Failed to modify QP[%d] to INIT\n", i); goto clean_qps_full; } } return ctx; clean_qps_full: i = num_qp; clean_qps: for (--i; i >= 0; --i) ibv_destroy_qp(ctx->qp[i]); ibv_destroy_srq(ctx->srq); clean_cq: ibv_destroy_cq(ctx->cq); clean_mr: ibv_dereg_mr(ctx->mr); clean_pd: ibv_dealloc_pd(ctx->pd); clean_comp_channel: if (ctx->channel) ibv_destroy_comp_channel(ctx->channel); clean_device: ibv_close_device(ctx->context); clean_buffer: free(ctx->buf); clean_ctx: free(ctx); return NULL; } static int pp_close_ctx(struct pingpong_context *ctx, int num_qp) { int i; for (i = 0; i < num_qp; ++i) { if (ibv_destroy_qp(ctx->qp[i])) { fprintf(stderr, "Couldn't destroy QP[%d]\n", i); return 1; } } if (ibv_destroy_srq(ctx->srq)) { fprintf(stderr, "Couldn't destroy SRQ\n"); return 1; } if (ibv_destroy_cq(ctx->cq)) { fprintf(stderr, "Couldn't destroy CQ\n"); return 1; } if (ibv_dereg_mr(ctx->mr)) { fprintf(stderr, "Couldn't deregister MR\n"); return 1; } if (ibv_dealloc_pd(ctx->pd)) { fprintf(stderr, "Couldn't deallocate PD\n"); return 1; } if (ctx->channel) { if (ibv_destroy_comp_channel(ctx->channel)) { fprintf(stderr, "Couldn't destroy completion channel\n"); return 1; } } if (ibv_close_device(ctx->context)) { fprintf(stderr, "Couldn't release context\n"); return 1; } free(ctx->buf); free(ctx); return 0; } static int pp_post_recv(struct pingpong_context *ctx, int n) { struct ibv_sge list = { .addr = (uintptr_t) ctx->buf, .length = ctx->size, .lkey = ctx->mr->lkey }; struct ibv_recv_wr wr = { .wr_id = PINGPONG_RECV_WRID, .sg_list = &list, .num_sge = 1, }; struct ibv_recv_wr *bad_wr; int i; for (i = 0; i < n; ++i) if (ibv_post_srq_recv(ctx->srq, &wr, &bad_wr)) break; return i; } static int pp_post_send(struct pingpong_context *ctx, int qp_index) { struct ibv_sge list = { .addr = (uintptr_t) ctx->buf, .length = ctx->size, .lkey = ctx->mr->lkey }; struct ibv_send_wr wr = { .wr_id = PINGPONG_SEND_WRID, .sg_list = &list, .num_sge = 1, .opcode = IBV_WR_SEND, .send_flags = ctx->send_flags, }; struct ibv_send_wr *bad_wr; return ibv_post_send(ctx->qp[qp_index], &wr, &bad_wr); } static int find_qp(int qpn, struct pingpong_context *ctx, int num_qp) { int i; for (i = 0; i < num_qp; ++i) if (ctx->qp[i]->qp_num == qpn) return i; return -1; } static void usage(const char *argv0) { printf("Usage:\n"); printf(" %s start a server and wait for connection\n", argv0); printf(" %s connect to server at \n", argv0); printf("\n"); printf("Options:\n"); printf(" -p, --port= listen on/connect to port (default 18515)\n"); printf(" -d, --ib-dev= use IB device (default first device found)\n"); printf(" -i, --ib-port= use port of IB device (default 1)\n"); printf(" -s, --size= size of message to exchange (default 4096)\n"); printf(" -m, --mtu= path MTU (default 1024)\n"); printf(" -q, --num-qp= number of QPs to use (default 16)\n"); printf(" -r, --rx-depth= number of receives to post at a time (default 500)\n"); printf(" -n, --iters= number of exchanges per QP(default 1000)\n"); printf(" -l, --sl= service level value\n"); printf(" -e, --events sleep on CQ events (default poll)\n"); printf(" -g, --gid-idx= local port gid index\n"); } int main(int argc, char *argv[]) { struct ibv_device **dev_list; struct ibv_device *ib_dev; struct ibv_wc *wc; struct pingpong_context *ctx; struct pingpong_dest my_dest[MAX_QP]; struct pingpong_dest *rem_dest; struct timeval start, end; char *ib_devname = NULL; char *servername = NULL; unsigned int port = 18515; int ib_port = 1; unsigned int size = 4096; enum ibv_mtu mtu = IBV_MTU_1024; unsigned int num_qp = 16; unsigned int rx_depth = 500; unsigned int iters = 1000; int use_event = 0; int routs; int rcnt, scnt; int num_wc; int i; int num_cq_events = 0; int sl = 0; int gidx = -1; char gid[33]; srand48(getpid() * time(NULL)); while (1) { int c; static struct option long_options[] = { { .name = "port", .has_arg = 1, .val = 'p' }, { .name = "ib-dev", .has_arg = 1, .val = 'd' }, { .name = "ib-port", .has_arg = 1, .val = 'i' }, { .name = "size", .has_arg = 1, .val = 's' }, { .name = "mtu", .has_arg = 1, .val = 'm' }, { .name = "num-qp", .has_arg = 1, .val = 'q' }, { .name = "rx-depth", .has_arg = 1, .val = 'r' }, { .name = "iters", .has_arg = 1, .val = 'n' }, { .name = "sl", .has_arg = 1, .val = 'l' }, { .name = "events", .has_arg = 0, .val = 'e' }, { .name = "gid-idx", .has_arg = 1, .val = 'g' }, {} }; c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:eg:", long_options, NULL); if (c == -1) break; switch (c) { case 'p': port = strtoul(optarg, NULL, 0); if (port > 65535) { usage(argv[0]); return 1; } break; case 'd': ib_devname = strdupa(optarg); break; case 'i': ib_port = strtol(optarg, NULL, 0); if (ib_port < 1) { usage(argv[0]); return 1; } break; case 's': size = strtoul(optarg, NULL, 0); if (size < 1) { usage(argv[0]); return 1; } break; case 'm': mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0)); if (mtu == 0) { usage(argv[0]); return 1; } break; case 'q': num_qp = strtoul(optarg, NULL, 0); break; case 'r': rx_depth = strtoul(optarg, NULL, 0); break; case 'n': iters = strtoul(optarg, NULL, 0); break; case 'l': sl = strtol(optarg, NULL, 0); break; case 'e': ++use_event; break; case 'g': gidx = strtol(optarg, NULL, 0); break; default: usage(argv[0]); return 1; } } if (optind == argc - 1) servername = strdupa(argv[optind]); else if (optind < argc) { usage(argv[0]); return 1; } if (num_qp > rx_depth) { fprintf(stderr, "rx_depth %d is too small for %d QPs -- " "must have at least one receive per QP.\n", rx_depth, num_qp); return 1; } num_wc = num_qp + rx_depth; wc = alloca(num_wc * sizeof *wc); page_size = sysconf(_SC_PAGESIZE); dev_list = ibv_get_device_list(NULL); if (!dev_list) { perror("Failed to get IB devices list"); return 1; } if (!ib_devname) { ib_dev = *dev_list; if (!ib_dev) { fprintf(stderr, "No IB devices found\n"); return 1; } } else { for (i = 0; dev_list[i]; ++i) if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname)) break; ib_dev = dev_list[i]; if (!ib_dev) { fprintf(stderr, "IB device %s not found\n", ib_devname); return 1; } } ctx = pp_init_ctx(ib_dev, size, num_qp, rx_depth, ib_port, use_event); if (!ctx) return 1; routs = pp_post_recv(ctx, ctx->rx_depth); if (routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", routs); return 1; } if (use_event) if (ibv_req_notify_cq(ctx->cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } memset(my_dest, 0, sizeof my_dest); if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) { fprintf(stderr, "Couldn't get port info\n"); return 1; } for (i = 0; i < num_qp; ++i) { my_dest[i].qpn = ctx->qp[i]->qp_num; my_dest[i].psn = lrand48() & 0xffffff; my_dest[i].lid = ctx->portinfo.lid; if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET && !my_dest[i].lid) { fprintf(stderr, "Couldn't get local LID\n"); return 1; } if (gidx >= 0) { if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest[i].gid)) { fprintf(stderr, "Could not get local gid for " "gid index %d\n", gidx); return 1; } } else memset(&my_dest[i].gid, 0, sizeof my_dest[i].gid); inet_ntop(AF_INET6, &my_dest[i].gid, gid, sizeof gid); printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, " "GID %s\n", my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn, gid); } if (servername) rem_dest = pp_client_exch_dest(servername, port, my_dest); else rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl, my_dest, gidx); if (!rem_dest) return 1; inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid); for (i = 0; i < num_qp; ++i) { inet_ntop(AF_INET6, &rem_dest[i].gid, gid, sizeof gid); printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, " "GID %s\n", rem_dest[i].lid, rem_dest[i].qpn, rem_dest[i].psn, gid); } if (servername) if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest, gidx)) return 1; if (servername) for (i = 0; i < num_qp; ++i) { if (pp_post_send(ctx, i)) { fprintf(stderr, "Couldn't post send\n"); return 1; } ctx->pending[i] = PINGPONG_SEND_WRID | PINGPONG_RECV_WRID; } else for (i = 0; i < num_qp; ++i) ctx->pending[i] = PINGPONG_RECV_WRID; if (gettimeofday(&start, NULL)) { perror("gettimeofday"); return 1; } rcnt = scnt = 0; while (rcnt < iters || scnt < iters) { if (use_event) { struct ibv_cq *ev_cq; void *ev_ctx; if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) { fprintf(stderr, "Failed to get cq_event\n"); return 1; } ++num_cq_events; if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; } if (ibv_req_notify_cq(ctx->cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } } { int ne, qp_ind; do { ne = ibv_poll_cq(ctx->cq, num_wc, wc); if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return 1; } } while (!use_event && ne < 1); for (i = 0; i < ne; ++i) { if (wc[i].status != IBV_WC_SUCCESS) { fprintf(stderr, "Failed status %s (%d) for wr_id %d\n", ibv_wc_status_str(wc[i].status), wc[i].status, (int) wc[i].wr_id); return 1; } qp_ind = find_qp(wc[i].qp_num, ctx, num_qp); if (qp_ind < 0) { fprintf(stderr, "Couldn't find QPN %06x\n", wc[i].qp_num); return 1; } switch ((int) wc[i].wr_id) { case PINGPONG_SEND_WRID: ++scnt; break; case PINGPONG_RECV_WRID: if (--routs <= num_qp) { routs += pp_post_recv(ctx, ctx->rx_depth - routs); if (routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", routs); return 1; } } ++rcnt; break; default: fprintf(stderr, "Completion for unknown wr_id %d\n", (int) wc[i].wr_id); return 1; } ctx->pending[qp_ind] &= ~(int) wc[i].wr_id; if (scnt < iters && !ctx->pending[qp_ind]) { if (pp_post_send(ctx, qp_ind)) { fprintf(stderr, "Couldn't post send\n"); return 1; } ctx->pending[qp_ind] = PINGPONG_RECV_WRID | PINGPONG_SEND_WRID; } } } } if (gettimeofday(&end, NULL)) { perror("gettimeofday"); return 1; } { float usec = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); long long bytes = (long long) size * iters * 2; printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", bytes, usec / 1000000., bytes * 8. / usec); printf("%d iters in %.2f seconds = %.2f usec/iter\n", iters, usec / 1000000., usec / iters); } ibv_ack_cq_events(ctx->cq, num_cq_events); if (pp_close_ctx(ctx, num_qp)) return 1; ibv_free_device_list(dev_list); free(rem_dest); return 0; } Index: head/contrib/ofed/libibverbs/examples/uc_pingpong.c =================================================================== --- head/contrib/ofed/libibverbs/examples/uc_pingpong.c (revision 363220) +++ head/contrib/ofed/libibverbs/examples/uc_pingpong.c (revision 363221) @@ -1,858 +1,858 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pingpong.h" enum { PINGPONG_RECV_WRID = 1, PINGPONG_SEND_WRID = 2, }; static int page_size; struct pingpong_context { struct ibv_context *context; struct ibv_comp_channel *channel; struct ibv_pd *pd; struct ibv_mr *mr; struct ibv_cq *cq; struct ibv_qp *qp; void *buf; int size; int send_flags; int rx_depth; int pending; struct ibv_port_attr portinfo; }; struct pingpong_dest { int lid; int qpn; int psn; union ibv_gid gid; }; static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, enum ibv_mtu mtu, int sl, struct pingpong_dest *dest, int sgid_idx) { struct ibv_qp_attr attr = { .qp_state = IBV_QPS_RTR, .path_mtu = mtu, .dest_qp_num = dest->qpn, .rq_psn = dest->psn, .ah_attr = { .is_global = 0, .dlid = dest->lid, .sl = sl, .src_path_bits = 0, .port_num = port } }; if (dest->gid.global.interface_id) { attr.ah_attr.is_global = 1; attr.ah_attr.grh.hop_limit = 1; attr.ah_attr.grh.dgid = dest->gid; attr.ah_attr.grh.sgid_index = sgid_idx; } if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN)) { fprintf(stderr, "Failed to modify QP to RTR\n"); return 1; } attr.qp_state = IBV_QPS_RTS; attr.sq_psn = my_psn; if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_SQ_PSN)) { fprintf(stderr, "Failed to modify QP to RTS\n"); return 1; } return 0; } static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port, const struct pingpong_dest *my_dest) { struct addrinfo *res, *t; struct addrinfo hints = { .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; char *service; char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; int n; int sockfd = -1; struct pingpong_dest *rem_dest = NULL; char gid[33]; if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port); free(service); return NULL; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); return NULL; } gid_to_wire_gid(&my_dest->gid, gid); sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, my_dest->psn, gid); if (write(sockfd, msg, sizeof msg) != sizeof msg) { fprintf(stderr, "Couldn't send local address\n"); goto out; } if (read(sockfd, msg, sizeof msg) != sizeof msg || write(sockfd, "done", sizeof "done") != sizeof "done") { perror("client read/write"); fprintf(stderr, "Couldn't read/write remote address\n"); goto out; } rem_dest = malloc(sizeof *rem_dest); if (!rem_dest) goto out; sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn, gid); wire_gid_to_gid(gid, &rem_dest->gid); out: close(sockfd); return rem_dest; } static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx, int ib_port, enum ibv_mtu mtu, int port, int sl, const struct pingpong_dest *my_dest, int sgid_idx) { struct addrinfo *res, *t; struct addrinfo hints = { .ai_flags = AI_PASSIVE, .ai_family = AF_INET, .ai_socktype = SOCK_STREAM }; char *service; char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; int n; int sockfd = -1, connfd; struct pingpong_dest *rem_dest = NULL; char gid[33]; if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { fprintf(stderr, "%s for port %d\n", gai_strerror(n), port); free(service); return NULL; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { n = 1; setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't listen to port %d\n", port); return NULL; } - if (listen(sockfd, 1)) { + if (listen(sockfd, 1) < 0) { perror("listen() failed"); close(sockfd); return NULL; } connfd = accept(sockfd, NULL, NULL); close(sockfd); if (connfd < 0) { fprintf(stderr, "accept() failed\n"); return NULL; } n = read(connfd, msg, sizeof msg); if (n != sizeof msg) { perror("server read"); fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg); goto out; } rem_dest = malloc(sizeof *rem_dest); if (!rem_dest) goto out; sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn, gid); wire_gid_to_gid(gid, &rem_dest->gid); if (pp_connect_ctx(ctx, ib_port, my_dest->psn, mtu, sl, rem_dest, sgid_idx)) { fprintf(stderr, "Couldn't connect to remote QP\n"); free(rem_dest); rem_dest = NULL; goto out; } gid_to_wire_gid(&my_dest->gid, gid); sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, my_dest->psn, gid); if (write(connfd, msg, sizeof msg) != sizeof msg || read(connfd, msg, sizeof msg) != sizeof "done") { fprintf(stderr, "Couldn't send/recv local address\n"); free(rem_dest); rem_dest = NULL; goto out; } out: close(connfd); return rem_dest; } static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, int rx_depth, int port, int use_event) { struct pingpong_context *ctx; ctx = calloc(1, sizeof *ctx); if (!ctx) return NULL; ctx->size = size; ctx->send_flags = IBV_SEND_SIGNALED; ctx->rx_depth = rx_depth; ctx->buf = memalign(page_size, size); if (!ctx->buf) { fprintf(stderr, "Couldn't allocate work buf.\n"); goto clean_ctx; } /* FIXME memset(ctx->buf, 0, size); */ memset(ctx->buf, 0x7b, size); ctx->context = ibv_open_device(ib_dev); if (!ctx->context) { fprintf(stderr, "Couldn't get context for %s\n", ibv_get_device_name(ib_dev)); goto clean_buffer; } if (use_event) { ctx->channel = ibv_create_comp_channel(ctx->context); if (!ctx->channel) { fprintf(stderr, "Couldn't create completion channel\n"); goto clean_device; } } else ctx->channel = NULL; ctx->pd = ibv_alloc_pd(ctx->context); if (!ctx->pd) { fprintf(stderr, "Couldn't allocate PD\n"); goto clean_comp_channel; } ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE); if (!ctx->mr) { fprintf(stderr, "Couldn't register MR\n"); goto clean_pd; } ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL, ctx->channel, 0); if (!ctx->cq) { fprintf(stderr, "Couldn't create CQ\n"); goto clean_mr; } { struct ibv_qp_attr attr; struct ibv_qp_init_attr init_attr = { .send_cq = ctx->cq, .recv_cq = ctx->cq, .cap = { .max_send_wr = 1, .max_recv_wr = rx_depth, .max_send_sge = 1, .max_recv_sge = 1 }, .qp_type = IBV_QPT_UC }; ctx->qp = ibv_create_qp(ctx->pd, &init_attr); if (!ctx->qp) { fprintf(stderr, "Couldn't create QP\n"); goto clean_cq; } ibv_query_qp(ctx->qp, &attr, IBV_QP_CAP, &init_attr); if (init_attr.cap.max_inline_data >= size) { ctx->send_flags |= IBV_SEND_INLINE; } } { struct ibv_qp_attr attr = { .qp_state = IBV_QPS_INIT, .pkey_index = 0, .port_num = port, .qp_access_flags = 0 }; if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { fprintf(stderr, "Failed to modify QP to INIT\n"); goto clean_qp; } } return ctx; clean_qp: ibv_destroy_qp(ctx->qp); clean_cq: ibv_destroy_cq(ctx->cq); clean_mr: ibv_dereg_mr(ctx->mr); clean_pd: ibv_dealloc_pd(ctx->pd); clean_comp_channel: if (ctx->channel) ibv_destroy_comp_channel(ctx->channel); clean_device: ibv_close_device(ctx->context); clean_buffer: free(ctx->buf); clean_ctx: free(ctx); return NULL; } static int pp_close_ctx(struct pingpong_context *ctx) { if (ibv_destroy_qp(ctx->qp)) { fprintf(stderr, "Couldn't destroy QP\n"); return 1; } if (ibv_destroy_cq(ctx->cq)) { fprintf(stderr, "Couldn't destroy CQ\n"); return 1; } if (ibv_dereg_mr(ctx->mr)) { fprintf(stderr, "Couldn't deregister MR\n"); return 1; } if (ibv_dealloc_pd(ctx->pd)) { fprintf(stderr, "Couldn't deallocate PD\n"); return 1; } if (ctx->channel) { if (ibv_destroy_comp_channel(ctx->channel)) { fprintf(stderr, "Couldn't destroy completion channel\n"); return 1; } } if (ibv_close_device(ctx->context)) { fprintf(stderr, "Couldn't release context\n"); return 1; } free(ctx->buf); free(ctx); return 0; } static int pp_post_recv(struct pingpong_context *ctx, int n) { struct ibv_sge list = { .addr = (uintptr_t) ctx->buf, .length = ctx->size, .lkey = ctx->mr->lkey }; struct ibv_recv_wr wr = { .wr_id = PINGPONG_RECV_WRID, .sg_list = &list, .num_sge = 1, }; struct ibv_recv_wr *bad_wr; int i; for (i = 0; i < n; ++i) if (ibv_post_recv(ctx->qp, &wr, &bad_wr)) break; return i; } static int pp_post_send(struct pingpong_context *ctx) { struct ibv_sge list = { .addr = (uintptr_t) ctx->buf, .length = ctx->size, .lkey = ctx->mr->lkey }; struct ibv_send_wr wr = { .wr_id = PINGPONG_SEND_WRID, .sg_list = &list, .num_sge = 1, .opcode = IBV_WR_SEND, .send_flags = ctx->send_flags, }; struct ibv_send_wr *bad_wr; return ibv_post_send(ctx->qp, &wr, &bad_wr); } static void usage(const char *argv0) { printf("Usage:\n"); printf(" %s start a server and wait for connection\n", argv0); printf(" %s connect to server at \n", argv0); printf("\n"); printf("Options:\n"); printf(" -p, --port= listen on/connect to port (default 18515)\n"); printf(" -d, --ib-dev= use IB device (default first device found)\n"); printf(" -i, --ib-port= use port of IB device (default 1)\n"); printf(" -s, --size= size of message to exchange (default 4096)\n"); printf(" -m, --mtu= path MTU (default 1024)\n"); printf(" -r, --rx-depth= number of receives to post at a time (default 500)\n"); printf(" -n, --iters= number of exchanges (default 1000)\n"); printf(" -l, --sl= service level value\n"); printf(" -e, --events sleep on CQ events (default poll)\n"); printf(" -g, --gid-idx= local port gid index\n"); } int main(int argc, char *argv[]) { struct ibv_device **dev_list; struct ibv_device *ib_dev; struct pingpong_context *ctx; struct pingpong_dest my_dest; struct pingpong_dest *rem_dest; struct timeval start, end; char *ib_devname = NULL; char *servername = NULL; unsigned int port = 18515; int ib_port = 1; unsigned int size = 4096; enum ibv_mtu mtu = IBV_MTU_1024; unsigned int rx_depth = 500; unsigned int iters = 1000; int use_event = 0; int routs; int rcnt, scnt; int num_cq_events = 0; int sl = 0; int gidx = -1; char gid[33]; srand48(getpid() * time(NULL)); while (1) { int c; static struct option long_options[] = { { .name = "port", .has_arg = 1, .val = 'p' }, { .name = "ib-dev", .has_arg = 1, .val = 'd' }, { .name = "ib-port", .has_arg = 1, .val = 'i' }, { .name = "size", .has_arg = 1, .val = 's' }, { .name = "mtu", .has_arg = 1, .val = 'm' }, { .name = "rx-depth", .has_arg = 1, .val = 'r' }, { .name = "iters", .has_arg = 1, .val = 'n' }, { .name = "sl", .has_arg = 1, .val = 'l' }, { .name = "events", .has_arg = 0, .val = 'e' }, { .name = "gid-idx", .has_arg = 1, .val = 'g' }, {} }; c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:", long_options, NULL); if (c == -1) break; switch (c) { case 'p': port = strtoul(optarg, NULL, 0); if (port > 65535) { usage(argv[0]); return 1; } break; case 'd': ib_devname = strdupa(optarg); break; case 'i': ib_port = strtol(optarg, NULL, 0); if (ib_port < 1) { usage(argv[0]); return 1; } break; case 's': size = strtoul(optarg, NULL, 0); break; case 'm': mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0)); if (mtu == 0) { usage(argv[0]); return 1; } break; case 'r': rx_depth = strtoul(optarg, NULL, 0); break; case 'n': iters = strtoul(optarg, NULL, 0); break; case 'l': sl = strtol(optarg, NULL, 0); break; case 'e': ++use_event; break; case 'g': gidx = strtol(optarg, NULL, 0); break; default: usage(argv[0]); return 1; } } if (optind == argc - 1) servername = strdupa(argv[optind]); else if (optind < argc) { usage(argv[0]); return 1; } page_size = sysconf(_SC_PAGESIZE); dev_list = ibv_get_device_list(NULL); if (!dev_list) { perror("Failed to get IB devices list"); return 1; } if (!ib_devname) { ib_dev = *dev_list; if (!ib_dev) { fprintf(stderr, "No IB devices found\n"); return 1; } } else { int i; for (i = 0; dev_list[i]; ++i) if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname)) break; ib_dev = dev_list[i]; if (!ib_dev) { fprintf(stderr, "IB device %s not found\n", ib_devname); return 1; } } ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event); if (!ctx) return 1; routs = pp_post_recv(ctx, ctx->rx_depth); if (routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", routs); return 1; } if (use_event) if (ibv_req_notify_cq(ctx->cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) { fprintf(stderr, "Couldn't get port info\n"); return 1; } my_dest.lid = ctx->portinfo.lid; if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET && !my_dest.lid) { fprintf(stderr, "Couldn't get local LID\n"); return 1; } if (gidx >= 0) { if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest.gid)) { fprintf(stderr, "can't read sgid of index %d\n", gidx); return 1; } } else memset(&my_dest.gid, 0, sizeof my_dest.gid); my_dest.qpn = ctx->qp->qp_num; my_dest.psn = lrand48() & 0xffffff; inet_ntop(AF_INET6, &my_dest.gid, gid, sizeof gid); printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n", my_dest.lid, my_dest.qpn, my_dest.psn, gid); if (servername) rem_dest = pp_client_exch_dest(servername, port, &my_dest); else rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl, &my_dest, gidx); if (!rem_dest) return 1; inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid); printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n", rem_dest->lid, rem_dest->qpn, rem_dest->psn, gid); if (servername) if (pp_connect_ctx(ctx, ib_port, my_dest.psn, mtu, sl, rem_dest, gidx)) return 1; ctx->pending = PINGPONG_RECV_WRID; if (servername) { if (pp_post_send(ctx)) { fprintf(stderr, "Couldn't post send\n"); return 1; } ctx->pending |= PINGPONG_SEND_WRID; } if (gettimeofday(&start, NULL)) { perror("gettimeofday"); return 1; } rcnt = scnt = 0; while (rcnt < iters || scnt < iters) { if (use_event) { struct ibv_cq *ev_cq; void *ev_ctx; if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) { fprintf(stderr, "Failed to get cq_event\n"); return 1; } ++num_cq_events; if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; } if (ibv_req_notify_cq(ctx->cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } } { struct ibv_wc wc[2]; int ne, i; do { ne = ibv_poll_cq(ctx->cq, 2, wc); if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return 1; } } while (!use_event && ne < 1); for (i = 0; i < ne; ++i) { if (wc[i].status != IBV_WC_SUCCESS) { fprintf(stderr, "Failed status %s (%d) for wr_id %d\n", ibv_wc_status_str(wc[i].status), wc[i].status, (int) wc[i].wr_id); return 1; } switch ((int) wc[i].wr_id) { case PINGPONG_SEND_WRID: ++scnt; break; case PINGPONG_RECV_WRID: if (--routs <= 1) { routs += pp_post_recv(ctx, ctx->rx_depth - routs); if (routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", routs); return 1; } } ++rcnt; break; default: fprintf(stderr, "Completion for unknown wr_id %d\n", (int) wc[i].wr_id); return 1; } ctx->pending &= ~(int) wc[i].wr_id; if (scnt < iters && !ctx->pending) { if (pp_post_send(ctx)) { fprintf(stderr, "Couldn't post send\n"); return 1; } ctx->pending = PINGPONG_RECV_WRID | PINGPONG_SEND_WRID; } } } } if (gettimeofday(&end, NULL)) { perror("gettimeofday"); return 1; } { float usec = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); long long bytes = (long long) size * iters * 2; printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", bytes, usec / 1000000., bytes * 8. / usec); printf("%d iters in %.2f seconds = %.2f usec/iter\n", iters, usec / 1000000., usec / iters); } ibv_ack_cq_events(ctx->cq, num_cq_events); if (pp_close_ctx(ctx)) return 1; ibv_free_device_list(dev_list); free(rem_dest); return 0; } Index: head/contrib/ofed/libibverbs/examples/ud_pingpong.c =================================================================== --- head/contrib/ofed/libibverbs/examples/ud_pingpong.c (revision 363220) +++ head/contrib/ofed/libibverbs/examples/ud_pingpong.c (revision 363221) @@ -1,864 +1,864 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pingpong.h" enum { PINGPONG_RECV_WRID = 1, PINGPONG_SEND_WRID = 2, }; static int page_size; struct pingpong_context { struct ibv_context *context; struct ibv_comp_channel *channel; struct ibv_pd *pd; struct ibv_mr *mr; struct ibv_cq *cq; struct ibv_qp *qp; struct ibv_ah *ah; void *buf; int size; int send_flags; int rx_depth; int pending; struct ibv_port_attr portinfo; }; struct pingpong_dest { int lid; int qpn; int psn; union ibv_gid gid; }; static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, int sl, struct pingpong_dest *dest, int sgid_idx) { struct ibv_ah_attr ah_attr = { .is_global = 0, .dlid = dest->lid, .sl = sl, .src_path_bits = 0, .port_num = port }; struct ibv_qp_attr attr = { .qp_state = IBV_QPS_RTR }; if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE)) { fprintf(stderr, "Failed to modify QP to RTR\n"); return 1; } attr.qp_state = IBV_QPS_RTS; attr.sq_psn = my_psn; if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_SQ_PSN)) { fprintf(stderr, "Failed to modify QP to RTS\n"); return 1; } if (dest->gid.global.interface_id) { ah_attr.is_global = 1; ah_attr.grh.hop_limit = 1; ah_attr.grh.dgid = dest->gid; ah_attr.grh.sgid_index = sgid_idx; } ctx->ah = ibv_create_ah(ctx->pd, &ah_attr); if (!ctx->ah) { fprintf(stderr, "Failed to create AH\n"); return 1; } return 0; } static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port, const struct pingpong_dest *my_dest) { struct addrinfo *res, *t; struct addrinfo hints = { .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; char *service; char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; int n; int sockfd = -1; struct pingpong_dest *rem_dest = NULL; char gid[33]; if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port); free(service); return NULL; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); return NULL; } gid_to_wire_gid(&my_dest->gid, gid); sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, my_dest->psn, gid); if (write(sockfd, msg, sizeof msg) != sizeof msg) { fprintf(stderr, "Couldn't send local address\n"); goto out; } if (read(sockfd, msg, sizeof msg) != sizeof msg || write(sockfd, "done", sizeof "done") != sizeof "done") { perror("client read/write"); fprintf(stderr, "Couldn't read/write remote address\n"); goto out; } rem_dest = malloc(sizeof *rem_dest); if (!rem_dest) goto out; sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn, gid); wire_gid_to_gid(gid, &rem_dest->gid); out: close(sockfd); return rem_dest; } static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx, int ib_port, int port, int sl, const struct pingpong_dest *my_dest, int sgid_idx) { struct addrinfo *res, *t; struct addrinfo hints = { .ai_flags = AI_PASSIVE, .ai_family = AF_INET, .ai_socktype = SOCK_STREAM }; char *service; char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; int n; int sockfd = -1, connfd; struct pingpong_dest *rem_dest = NULL; char gid[33]; if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { fprintf(stderr, "%s for port %d\n", gai_strerror(n), port); free(service); return NULL; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { n = 1; setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't listen to port %d\n", port); return NULL; } - if (listen(sockfd, 1)) { + if (listen(sockfd, 1) < 0) { perror("listen() failed"); close(sockfd); return NULL; } connfd = accept(sockfd, NULL, NULL); close(sockfd); if (connfd < 0) { fprintf(stderr, "accept() failed\n"); return NULL; } n = read(connfd, msg, sizeof msg); if (n != sizeof msg) { perror("server read"); fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg); goto out; } rem_dest = malloc(sizeof *rem_dest); if (!rem_dest) goto out; sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn, gid); wire_gid_to_gid(gid, &rem_dest->gid); if (pp_connect_ctx(ctx, ib_port, my_dest->psn, sl, rem_dest, sgid_idx)) { fprintf(stderr, "Couldn't connect to remote QP\n"); free(rem_dest); rem_dest = NULL; goto out; } gid_to_wire_gid(&my_dest->gid, gid); sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, my_dest->psn, gid); if (write(connfd, msg, sizeof msg) != sizeof msg || read(connfd, msg, sizeof msg) != sizeof "done") { fprintf(stderr, "Couldn't send/recv local address\n"); free(rem_dest); rem_dest = NULL; goto out; } out: close(connfd); return rem_dest; } static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, int rx_depth, int port, int use_event) { struct pingpong_context *ctx; ctx = malloc(sizeof *ctx); if (!ctx) return NULL; ctx->size = size; ctx->send_flags = IBV_SEND_SIGNALED; ctx->rx_depth = rx_depth; ctx->buf = memalign(page_size, size + 40); if (!ctx->buf) { fprintf(stderr, "Couldn't allocate work buf.\n"); goto clean_ctx; } /* FIXME memset(ctx->buf, 0, size + 40); */ memset(ctx->buf, 0x7b, size + 40); ctx->context = ibv_open_device(ib_dev); if (!ctx->context) { fprintf(stderr, "Couldn't get context for %s\n", ibv_get_device_name(ib_dev)); goto clean_buffer; } { struct ibv_port_attr port_info = {}; int mtu; if (ibv_query_port(ctx->context, port, &port_info)) { fprintf(stderr, "Unable to query port info for port %d\n", port); goto clean_device; } mtu = 1 << (port_info.active_mtu + 7); if (size > mtu) { fprintf(stderr, "Requested size larger than port MTU (%d)\n", mtu); goto clean_device; } } if (use_event) { ctx->channel = ibv_create_comp_channel(ctx->context); if (!ctx->channel) { fprintf(stderr, "Couldn't create completion channel\n"); goto clean_device; } } else ctx->channel = NULL; ctx->pd = ibv_alloc_pd(ctx->context); if (!ctx->pd) { fprintf(stderr, "Couldn't allocate PD\n"); goto clean_comp_channel; } ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size + 40, IBV_ACCESS_LOCAL_WRITE); if (!ctx->mr) { fprintf(stderr, "Couldn't register MR\n"); goto clean_pd; } ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL, ctx->channel, 0); if (!ctx->cq) { fprintf(stderr, "Couldn't create CQ\n"); goto clean_mr; } { struct ibv_qp_attr attr; struct ibv_qp_init_attr init_attr = { .send_cq = ctx->cq, .recv_cq = ctx->cq, .cap = { .max_send_wr = 1, .max_recv_wr = rx_depth, .max_send_sge = 1, .max_recv_sge = 1 }, .qp_type = IBV_QPT_UD, }; ctx->qp = ibv_create_qp(ctx->pd, &init_attr); if (!ctx->qp) { fprintf(stderr, "Couldn't create QP\n"); goto clean_cq; } ibv_query_qp(ctx->qp, &attr, IBV_QP_CAP, &init_attr); if (init_attr.cap.max_inline_data >= size) { ctx->send_flags |= IBV_SEND_INLINE; } } { struct ibv_qp_attr attr = { .qp_state = IBV_QPS_INIT, .pkey_index = 0, .port_num = port, .qkey = 0x11111111 }; if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_QKEY)) { fprintf(stderr, "Failed to modify QP to INIT\n"); goto clean_qp; } } return ctx; clean_qp: ibv_destroy_qp(ctx->qp); clean_cq: ibv_destroy_cq(ctx->cq); clean_mr: ibv_dereg_mr(ctx->mr); clean_pd: ibv_dealloc_pd(ctx->pd); clean_comp_channel: if (ctx->channel) ibv_destroy_comp_channel(ctx->channel); clean_device: ibv_close_device(ctx->context); clean_buffer: free(ctx->buf); clean_ctx: free(ctx); return NULL; } static int pp_close_ctx(struct pingpong_context *ctx) { if (ibv_destroy_qp(ctx->qp)) { fprintf(stderr, "Couldn't destroy QP\n"); return 1; } if (ibv_destroy_cq(ctx->cq)) { fprintf(stderr, "Couldn't destroy CQ\n"); return 1; } if (ibv_dereg_mr(ctx->mr)) { fprintf(stderr, "Couldn't deregister MR\n"); return 1; } if (ibv_destroy_ah(ctx->ah)) { fprintf(stderr, "Couldn't destroy AH\n"); return 1; } if (ibv_dealloc_pd(ctx->pd)) { fprintf(stderr, "Couldn't deallocate PD\n"); return 1; } if (ctx->channel) { if (ibv_destroy_comp_channel(ctx->channel)) { fprintf(stderr, "Couldn't destroy completion channel\n"); return 1; } } if (ibv_close_device(ctx->context)) { fprintf(stderr, "Couldn't release context\n"); return 1; } free(ctx->buf); free(ctx); return 0; } static int pp_post_recv(struct pingpong_context *ctx, int n) { struct ibv_sge list = { .addr = (uintptr_t) ctx->buf, .length = ctx->size + 40, .lkey = ctx->mr->lkey }; struct ibv_recv_wr wr = { .wr_id = PINGPONG_RECV_WRID, .sg_list = &list, .num_sge = 1, }; struct ibv_recv_wr *bad_wr; int i; for (i = 0; i < n; ++i) if (ibv_post_recv(ctx->qp, &wr, &bad_wr)) break; return i; } static int pp_post_send(struct pingpong_context *ctx, uint32_t qpn) { struct ibv_sge list = { .addr = (uintptr_t) ctx->buf + 40, .length = ctx->size, .lkey = ctx->mr->lkey }; struct ibv_send_wr wr = { .wr_id = PINGPONG_SEND_WRID, .sg_list = &list, .num_sge = 1, .opcode = IBV_WR_SEND, .send_flags = ctx->send_flags, .wr = { .ud = { .ah = ctx->ah, .remote_qpn = qpn, .remote_qkey = 0x11111111 } } }; struct ibv_send_wr *bad_wr; return ibv_post_send(ctx->qp, &wr, &bad_wr); } static void usage(const char *argv0) { printf("Usage:\n"); printf(" %s start a server and wait for connection\n", argv0); printf(" %s connect to server at \n", argv0); printf("\n"); printf("Options:\n"); printf(" -p, --port= listen on/connect to port (default 18515)\n"); printf(" -d, --ib-dev= use IB device (default first device found)\n"); printf(" -i, --ib-port= use port of IB device (default 1)\n"); printf(" -s, --size= size of message to exchange (default 2048)\n"); printf(" -r, --rx-depth= number of receives to post at a time (default 500)\n"); printf(" -n, --iters= number of exchanges (default 1000)\n"); printf(" -l, --sl= send messages with service level (default 0)\n"); printf(" -e, --events sleep on CQ events (default poll)\n"); printf(" -g, --gid-idx= local port gid index\n"); } int main(int argc, char *argv[]) { struct ibv_device **dev_list; struct ibv_device *ib_dev; struct pingpong_context *ctx; struct pingpong_dest my_dest; struct pingpong_dest *rem_dest; struct timeval start, end; char *ib_devname = NULL; char *servername = NULL; unsigned int port = 18515; int ib_port = 1; unsigned int size = 2048; unsigned int rx_depth = 500; unsigned int iters = 1000; int use_event = 0; int routs; int rcnt, scnt; int num_cq_events = 0; int sl = 0; int gidx = -1; char gid[33]; srand48(getpid() * time(NULL)); while (1) { int c; static struct option long_options[] = { { .name = "port", .has_arg = 1, .val = 'p' }, { .name = "ib-dev", .has_arg = 1, .val = 'd' }, { .name = "ib-port", .has_arg = 1, .val = 'i' }, { .name = "size", .has_arg = 1, .val = 's' }, { .name = "rx-depth", .has_arg = 1, .val = 'r' }, { .name = "iters", .has_arg = 1, .val = 'n' }, { .name = "sl", .has_arg = 1, .val = 'l' }, { .name = "events", .has_arg = 0, .val = 'e' }, { .name = "gid-idx", .has_arg = 1, .val = 'g' }, {} }; c = getopt_long(argc, argv, "p:d:i:s:r:n:l:eg:", long_options, NULL); if (c == -1) break; switch (c) { case 'p': port = strtol(optarg, NULL, 0); if (port > 65535) { usage(argv[0]); return 1; } break; case 'd': ib_devname = strdupa(optarg); break; case 'i': ib_port = strtol(optarg, NULL, 0); if (ib_port < 1) { usage(argv[0]); return 1; } break; case 's': size = strtoul(optarg, NULL, 0); break; case 'r': rx_depth = strtoul(optarg, NULL, 0); break; case 'n': iters = strtoul(optarg, NULL, 0); break; case 'l': sl = strtol(optarg, NULL, 0); break; case 'e': ++use_event; break; case 'g': gidx = strtol(optarg, NULL, 0); break; default: usage(argv[0]); return 1; } } if (optind == argc - 1) servername = strdupa(argv[optind]); else if (optind < argc) { usage(argv[0]); return 1; } page_size = sysconf(_SC_PAGESIZE); dev_list = ibv_get_device_list(NULL); if (!dev_list) { perror("Failed to get IB devices list"); return 1; } if (!ib_devname) { ib_dev = *dev_list; if (!ib_dev) { fprintf(stderr, "No IB devices found\n"); return 1; } } else { int i; for (i = 0; dev_list[i]; ++i) if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname)) break; ib_dev = dev_list[i]; if (!ib_dev) { fprintf(stderr, "IB device %s not found\n", ib_devname); return 1; } } ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event); if (!ctx) return 1; routs = pp_post_recv(ctx, ctx->rx_depth); if (routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", routs); return 1; } if (use_event) if (ibv_req_notify_cq(ctx->cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) { fprintf(stderr, "Couldn't get port info\n"); return 1; } my_dest.lid = ctx->portinfo.lid; my_dest.qpn = ctx->qp->qp_num; my_dest.psn = lrand48() & 0xffffff; if (gidx >= 0) { if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest.gid)) { fprintf(stderr, "Could not get local gid for gid index " "%d\n", gidx); return 1; } } else memset(&my_dest.gid, 0, sizeof my_dest.gid); inet_ntop(AF_INET6, &my_dest.gid, gid, sizeof gid); printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x: GID %s\n", my_dest.lid, my_dest.qpn, my_dest.psn, gid); if (servername) rem_dest = pp_client_exch_dest(servername, port, &my_dest); else rem_dest = pp_server_exch_dest(ctx, ib_port, port, sl, &my_dest, gidx); if (!rem_dest) return 1; inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid); printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n", rem_dest->lid, rem_dest->qpn, rem_dest->psn, gid); if (servername) if (pp_connect_ctx(ctx, ib_port, my_dest.psn, sl, rem_dest, gidx)) return 1; ctx->pending = PINGPONG_RECV_WRID; if (servername) { if (pp_post_send(ctx, rem_dest->qpn)) { fprintf(stderr, "Couldn't post send\n"); return 1; } ctx->pending |= PINGPONG_SEND_WRID; } if (gettimeofday(&start, NULL)) { perror("gettimeofday"); return 1; } rcnt = scnt = 0; while (rcnt < iters || scnt < iters) { if (use_event) { struct ibv_cq *ev_cq; void *ev_ctx; if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) { fprintf(stderr, "Failed to get cq_event\n"); return 1; } ++num_cq_events; if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; } if (ibv_req_notify_cq(ctx->cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } } { struct ibv_wc wc[2]; int ne, i; do { ne = ibv_poll_cq(ctx->cq, 2, wc); if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return 1; } } while (!use_event && ne < 1); for (i = 0; i < ne; ++i) { if (wc[i].status != IBV_WC_SUCCESS) { fprintf(stderr, "Failed status %s (%d) for wr_id %d\n", ibv_wc_status_str(wc[i].status), wc[i].status, (int) wc[i].wr_id); return 1; } switch ((int) wc[i].wr_id) { case PINGPONG_SEND_WRID: ++scnt; break; case PINGPONG_RECV_WRID: if (--routs <= 1) { routs += pp_post_recv(ctx, ctx->rx_depth - routs); if (routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", routs); return 1; } } ++rcnt; break; default: fprintf(stderr, "Completion for unknown wr_id %d\n", (int) wc[i].wr_id); return 1; } ctx->pending &= ~(int) wc[i].wr_id; if (scnt < iters && !ctx->pending) { if (pp_post_send(ctx, rem_dest->qpn)) { fprintf(stderr, "Couldn't post send\n"); return 1; } ctx->pending = PINGPONG_RECV_WRID | PINGPONG_SEND_WRID; } } } } if (gettimeofday(&end, NULL)) { perror("gettimeofday"); return 1; } { float usec = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); long long bytes = (long long) size * iters * 2; printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", bytes, usec / 1000000., bytes * 8. / usec); printf("%d iters in %.2f seconds = %.2f usec/iter\n", iters, usec / 1000000., usec / iters); } ibv_ack_cq_events(ctx->cq, num_cq_events); if (pp_close_ctx(ctx)) return 1; ibv_free_device_list(dev_list); free(rem_dest); return 0; } Index: head/contrib/ofed/libibverbs/examples/xsrq_pingpong.c =================================================================== --- head/contrib/ofed/libibverbs/examples/xsrq_pingpong.c (revision 363220) +++ head/contrib/ofed/libibverbs/examples/xsrq_pingpong.c (revision 363221) @@ -1,1026 +1,1026 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2011 Intel Corporation, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pingpong.h" #define MSG_FORMAT "%04x:%06x:%06x:%06x:%06x:%32s" #define MSG_SIZE 66 #define MSG_SSCAN "%x:%x:%x:%x:%x:%s" #define ADDR_FORMAT \ "%8s: LID %04x, QPN RECV %06x SEND %06x, PSN %06x, SRQN %06x, GID %s\n" #define TERMINATION_FORMAT "%s" #define TERMINATION_MSG_SIZE 4 #define TERMINATION_MSG "END" static int page_size; struct pingpong_dest { union ibv_gid gid; int lid; int recv_qpn; int send_qpn; int recv_psn; int send_psn; int srqn; int pp_cnt; int sockfd; }; struct pingpong_context { struct ibv_context *context; struct ibv_comp_channel *channel; struct ibv_pd *pd; struct ibv_mr *mr; struct ibv_cq *send_cq; struct ibv_cq *recv_cq; struct ibv_srq *srq; struct ibv_xrcd *xrcd; struct ibv_qp **recv_qp; struct ibv_qp **send_qp; struct pingpong_dest *rem_dest; void *buf; int lid; int sl; enum ibv_mtu mtu; int ib_port; int fd; int size; int num_clients; int num_tests; int use_event; int gidx; }; static struct pingpong_context ctx; static int open_device(char *ib_devname) { struct ibv_device **dev_list; int i = 0; dev_list = ibv_get_device_list(NULL); if (!dev_list) { fprintf(stderr, "Failed to get IB devices list"); return -1; } if (ib_devname) { for (; dev_list[i]; ++i) { if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname)) break; } } if (!dev_list[i]) { fprintf(stderr, "IB device %s not found\n", ib_devname ? ib_devname : ""); return -1; } ctx.context = ibv_open_device(dev_list[i]); if (!ctx.context) { fprintf(stderr, "Couldn't get context for %s\n", ibv_get_device_name(dev_list[i])); return -1; } ibv_free_device_list(dev_list); return 0; } static int create_qps(void) { struct ibv_qp_init_attr_ex init; struct ibv_qp_attr mod; int i; for (i = 0; i < ctx.num_clients; ++i) { memset(&init, 0, sizeof init); init.qp_type = IBV_QPT_XRC_RECV; init.comp_mask = IBV_QP_INIT_ATTR_XRCD; init.xrcd = ctx.xrcd; ctx.recv_qp[i] = ibv_create_qp_ex(ctx.context, &init); if (!ctx.recv_qp[i]) { fprintf(stderr, "Couldn't create recv QP[%d] errno %d\n", i, errno); return 1; } mod.qp_state = IBV_QPS_INIT; mod.pkey_index = 0; mod.port_num = ctx.ib_port; mod.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ; if (ibv_modify_qp(ctx.recv_qp[i], &mod, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { fprintf(stderr, "Failed to modify recv QP[%d] to INIT\n", i); return 1; } memset(&init, 0, sizeof init); init.qp_type = IBV_QPT_XRC_SEND; init.send_cq = ctx.send_cq; init.cap.max_send_wr = ctx.num_clients * ctx.num_tests; init.cap.max_send_sge = 1; init.comp_mask = IBV_QP_INIT_ATTR_PD; init.pd = ctx.pd; ctx.send_qp[i] = ibv_create_qp_ex(ctx.context, &init); if (!ctx.send_qp[i]) { fprintf(stderr, "Couldn't create send QP[%d] errno %d\n", i, errno); return 1; } mod.qp_state = IBV_QPS_INIT; mod.pkey_index = 0; mod.port_num = ctx.ib_port; mod.qp_access_flags = 0; if (ibv_modify_qp(ctx.send_qp[i], &mod, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { fprintf(stderr, "Failed to modify send QP[%d] to INIT\n", i); return 1; } } return 0; } static int pp_init_ctx(char *ib_devname) { struct ibv_srq_init_attr_ex attr; struct ibv_xrcd_init_attr xrcd_attr; struct ibv_port_attr port_attr; ctx.recv_qp = calloc(ctx.num_clients, sizeof *ctx.recv_qp); ctx.send_qp = calloc(ctx.num_clients, sizeof *ctx.send_qp); ctx.rem_dest = calloc(ctx.num_clients, sizeof *ctx.rem_dest); if (!ctx.recv_qp || !ctx.send_qp || !ctx.rem_dest) return 1; if (open_device(ib_devname)) { fprintf(stderr, "Failed to open device\n"); return 1; } if (pp_get_port_info(ctx.context, ctx.ib_port, &port_attr)) { fprintf(stderr, "Failed to get port info\n"); return 1; } ctx.lid = port_attr.lid; if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET && !ctx.lid) { fprintf(stderr, "Couldn't get local LID\n"); return 1; } ctx.buf = memalign(page_size, ctx.size); if (!ctx.buf) { fprintf(stderr, "Couldn't allocate work buf.\n"); return 1; } memset(ctx.buf, 0, ctx.size); if (ctx.use_event) { ctx.channel = ibv_create_comp_channel(ctx.context); if (!ctx.channel) { fprintf(stderr, "Couldn't create completion channel\n"); return 1; } } ctx.pd = ibv_alloc_pd(ctx.context); if (!ctx.pd) { fprintf(stderr, "Couldn't allocate PD\n"); return 1; } ctx.mr = ibv_reg_mr(ctx.pd, ctx.buf, ctx.size, IBV_ACCESS_LOCAL_WRITE); if (!ctx.mr) { fprintf(stderr, "Couldn't register MR\n"); return 1; } ctx.fd = open("/tmp/xrc_domain", O_RDONLY | O_CREAT, S_IRUSR | S_IRGRP); if (ctx.fd < 0) { fprintf(stderr, "Couldn't create the file for the XRC Domain " "but not stopping %d\n", errno); ctx.fd = -1; } memset(&xrcd_attr, 0, sizeof xrcd_attr); xrcd_attr.comp_mask = IBV_XRCD_INIT_ATTR_FD | IBV_XRCD_INIT_ATTR_OFLAGS; xrcd_attr.fd = ctx.fd; xrcd_attr.oflags = O_CREAT; ctx.xrcd = ibv_open_xrcd(ctx.context, &xrcd_attr); if (!ctx.xrcd) { fprintf(stderr, "Couldn't Open the XRC Domain %d\n", errno); return 1; } ctx.recv_cq = ibv_create_cq(ctx.context, ctx.num_clients, &ctx.recv_cq, ctx.channel, 0); if (!ctx.recv_cq) { fprintf(stderr, "Couldn't create recv CQ\n"); return 1; } if (ctx.use_event) { if (ibv_req_notify_cq(ctx.recv_cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } } ctx.send_cq = ibv_create_cq(ctx.context, ctx.num_clients, NULL, NULL, 0); if (!ctx.send_cq) { fprintf(stderr, "Couldn't create send CQ\n"); return 1; } memset(&attr, 0, sizeof attr); attr.attr.max_wr = ctx.num_clients; attr.attr.max_sge = 1; attr.comp_mask = IBV_SRQ_INIT_ATTR_TYPE | IBV_SRQ_INIT_ATTR_XRCD | IBV_SRQ_INIT_ATTR_CQ | IBV_SRQ_INIT_ATTR_PD; attr.srq_type = IBV_SRQT_XRC; attr.xrcd = ctx.xrcd; attr.cq = ctx.recv_cq; attr.pd = ctx.pd; ctx.srq = ibv_create_srq_ex(ctx.context, &attr); if (!ctx.srq) { fprintf(stderr, "Couldn't create SRQ\n"); return 1; } if (create_qps()) return 1; return 0; } static int recv_termination_ack(int index) { char msg[TERMINATION_MSG_SIZE]; int n = 0, r; int sockfd = ctx.rem_dest[index].sockfd; while (n < TERMINATION_MSG_SIZE) { r = read(sockfd, msg + n, TERMINATION_MSG_SIZE - n); if (r < 0) { perror("client read"); fprintf(stderr, "%d/%d: Couldn't read remote termination ack\n", n, TERMINATION_MSG_SIZE); return 1; } n += r; } if (strcmp(msg, TERMINATION_MSG)) { fprintf(stderr, "Invalid termination ack was accepted\n"); return 1; } return 0; } static int send_termination_ack(int index) { char msg[TERMINATION_MSG_SIZE]; int sockfd = ctx.rem_dest[index].sockfd; sprintf(msg, TERMINATION_FORMAT, TERMINATION_MSG); if (write(sockfd, msg, TERMINATION_MSG_SIZE) != TERMINATION_MSG_SIZE) { fprintf(stderr, "Couldn't send termination ack\n"); return 1; } return 0; } static int pp_client_termination(void) { if (send_termination_ack(0)) return 1; if (recv_termination_ack(0)) return 1; return 0; } static int pp_server_termination(void) { int i; for (i = 0; i < ctx.num_clients; i++) { if (recv_termination_ack(i)) return 1; } for (i = 0; i < ctx.num_clients; i++) { if (send_termination_ack(i)) return 1; } return 0; } static int send_local_dest(int sockfd, int index) { char msg[MSG_SIZE]; char gid[33]; uint32_t srq_num; union ibv_gid local_gid; if (ctx.gidx >= 0) { if (ibv_query_gid(ctx.context, ctx.ib_port, ctx.gidx, &local_gid)) { fprintf(stderr, "can't read sgid of index %d\n", ctx.gidx); return -1; } } else { memset(&local_gid, 0, sizeof(local_gid)); } ctx.rem_dest[index].recv_psn = lrand48() & 0xffffff; if (ibv_get_srq_num(ctx.srq, &srq_num)) { fprintf(stderr, "Couldn't get SRQ num\n"); return -1; } inet_ntop(AF_INET6, &local_gid, gid, sizeof(gid)); printf(ADDR_FORMAT, "local", ctx.lid, ctx.recv_qp[index]->qp_num, ctx.send_qp[index]->qp_num, ctx.rem_dest[index].recv_psn, srq_num, gid); gid_to_wire_gid(&local_gid, gid); sprintf(msg, MSG_FORMAT, ctx.lid, ctx.recv_qp[index]->qp_num, ctx.send_qp[index]->qp_num, ctx.rem_dest[index].recv_psn, srq_num, gid); if (write(sockfd, msg, MSG_SIZE) != MSG_SIZE) { fprintf(stderr, "Couldn't send local address\n"); return -1; } return 0; } static int recv_remote_dest(int sockfd, int index) { struct pingpong_dest *rem_dest; char msg[MSG_SIZE]; char gid[33]; int n = 0, r; while (n < MSG_SIZE) { r = read(sockfd, msg + n, MSG_SIZE - n); if (r < 0) { perror("client read"); fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n", n, MSG_SIZE, index); return -1; } n += r; } rem_dest = &ctx.rem_dest[index]; sscanf(msg, MSG_SSCAN, &rem_dest->lid, &rem_dest->recv_qpn, &rem_dest->send_qpn, &rem_dest->send_psn, &rem_dest->srqn, gid); wire_gid_to_gid(gid, &rem_dest->gid); inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof(gid)); printf(ADDR_FORMAT, "remote", rem_dest->lid, rem_dest->recv_qpn, rem_dest->send_qpn, rem_dest->send_psn, rem_dest->srqn, gid); rem_dest->sockfd = sockfd; return 0; } static void set_ah_attr(struct ibv_ah_attr *attr, struct pingpong_context *myctx, int index) { attr->is_global = 1; attr->grh.hop_limit = 5; attr->grh.dgid = myctx->rem_dest[index].gid; attr->grh.sgid_index = myctx->gidx; } static int connect_qps(int index) { struct ibv_qp_attr attr; memset(&attr, 0, sizeof attr); attr.qp_state = IBV_QPS_RTR; attr.dest_qp_num = ctx.rem_dest[index].send_qpn; attr.path_mtu = ctx.mtu; attr.rq_psn = ctx.rem_dest[index].send_psn; attr.min_rnr_timer = 12; attr.ah_attr.dlid = ctx.rem_dest[index].lid; attr.ah_attr.sl = ctx.sl; attr.ah_attr.port_num = ctx.ib_port; if (ctx.rem_dest[index].gid.global.interface_id) set_ah_attr(&attr.ah_attr, &ctx, index); if (ibv_modify_qp(ctx.recv_qp[index], &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER)) { fprintf(stderr, "Failed to modify recv QP[%d] to RTR\n", index); return 1; } memset(&attr, 0, sizeof attr); attr.qp_state = IBV_QPS_RTS; attr.timeout = 14; attr.sq_psn = ctx.rem_dest[index].recv_psn; if (ibv_modify_qp(ctx.recv_qp[index], &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_SQ_PSN)) { fprintf(stderr, "Failed to modify recv QP[%d] to RTS\n", index); return 1; } memset(&attr, 0, sizeof attr); attr.qp_state = IBV_QPS_RTR; attr.dest_qp_num = ctx.rem_dest[index].recv_qpn; attr.path_mtu = ctx.mtu; attr.rq_psn = ctx.rem_dest[index].send_psn; attr.ah_attr.dlid = ctx.rem_dest[index].lid; attr.ah_attr.sl = ctx.sl; attr.ah_attr.port_num = ctx.ib_port; if (ctx.rem_dest[index].gid.global.interface_id) set_ah_attr(&attr.ah_attr, &ctx, index); if (ibv_modify_qp(ctx.send_qp[index], &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN)) { fprintf(stderr, "Failed to modify send QP[%d] to RTR\n", index); return 1; } memset(&attr, 0, sizeof attr); attr.qp_state = IBV_QPS_RTS; attr.timeout = 14; attr.retry_cnt = 7; attr.rnr_retry = 7; attr.sq_psn = ctx.rem_dest[index].recv_psn; if (ibv_modify_qp(ctx.send_qp[index], &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_SQ_PSN | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC)) { fprintf(stderr, "Failed to modify send QP[%d] to RTS\n", index); return 1; } return 0; } static int pp_client_connect(const char *servername, int port) { struct addrinfo *res, *t; char *service; int ret; int sockfd = -1; struct addrinfo hints = { .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; if (asprintf(&service, "%d", port) < 0) return 1; ret = getaddrinfo(servername, service, &hints, &res); if (ret < 0) { fprintf(stderr, "%s for %s:%d\n", gai_strerror(ret), servername, port); free(service); return 1; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); return 1; } if (send_local_dest(sockfd, 0)) return 1; if (recv_remote_dest(sockfd, 0)) return 1; if (connect_qps(0)) return 1; return 0; } static int pp_server_connect(int port) { struct addrinfo *res, *t; char *service; int ret, i, n; int sockfd = -1, connfd; struct addrinfo hints = { .ai_flags = AI_PASSIVE, .ai_family = AF_INET, .ai_socktype = SOCK_STREAM }; if (asprintf(&service, "%d", port) < 0) return 1; ret = getaddrinfo(NULL, service, &hints, &res); if (ret < 0) { fprintf(stderr, "%s for port %d\n", gai_strerror(ret), port); free(service); return 1; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { n = 1; setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't listen to port %d\n", port); return 1; } - if (listen(sockfd, ctx.num_clients)) { + if (listen(sockfd, ctx.num_clients) < 0) { perror("listen() failed"); close(sockfd); return 1; } for (i = 0; i < ctx.num_clients; i++) { connfd = accept(sockfd, NULL, NULL); if (connfd < 0) { fprintf(stderr, "accept() failed for client %d\n", i); return 1; } if (recv_remote_dest(connfd, i)) return 1; if (send_local_dest(connfd, i)) return 1; if (connect_qps(i)) return 1; } close(sockfd); return 0; } static int pp_close_ctx(void) { int i; for (i = 0; i < ctx.num_clients; ++i) { if (ibv_destroy_qp(ctx.send_qp[i])) { fprintf(stderr, "Couldn't destroy INI QP[%d]\n", i); return 1; } if (ibv_destroy_qp(ctx.recv_qp[i])) { fprintf(stderr, "Couldn't destroy TGT QP[%d]\n", i); return 1; } if (ctx.rem_dest[i].sockfd) close(ctx.rem_dest[i].sockfd); } if (ibv_destroy_srq(ctx.srq)) { fprintf(stderr, "Couldn't destroy SRQ\n"); return 1; } if (ctx.xrcd && ibv_close_xrcd(ctx.xrcd)) { fprintf(stderr, "Couldn't close the XRC Domain\n"); return 1; } if (ctx.fd >= 0 && close(ctx.fd)) { fprintf(stderr, "Couldn't close the file for the XRC Domain\n"); return 1; } if (ibv_destroy_cq(ctx.send_cq)) { fprintf(stderr, "Couldn't destroy send CQ\n"); return 1; } if (ibv_destroy_cq(ctx.recv_cq)) { fprintf(stderr, "Couldn't destroy recv CQ\n"); return 1; } if (ibv_dereg_mr(ctx.mr)) { fprintf(stderr, "Couldn't deregister MR\n"); return 1; } if (ibv_dealloc_pd(ctx.pd)) { fprintf(stderr, "Couldn't deallocate PD\n"); return 1; } if (ctx.channel) { if (ibv_destroy_comp_channel(ctx.channel)) { fprintf(stderr, "Couldn't destroy completion channel\n"); return 1; } } if (ibv_close_device(ctx.context)) { fprintf(stderr, "Couldn't release context\n"); return 1; } free(ctx.buf); free(ctx.rem_dest); free(ctx.send_qp); free(ctx.recv_qp); return 0; } static int pp_post_recv(int cnt) { struct ibv_sge sge; struct ibv_recv_wr wr, *bad_wr; sge.addr = (uintptr_t) ctx.buf; sge.length = ctx.size; sge.lkey = ctx.mr->lkey; wr.next = NULL; wr.wr_id = (uintptr_t) &ctx; wr.sg_list = &sge; wr.num_sge = 1; while (cnt--) { if (ibv_post_srq_recv(ctx.srq, &wr, &bad_wr)) { fprintf(stderr, "Failed to post receive to SRQ\n"); return 1; } } return 0; } /* * Send to each client round robin on each set of xrc send/recv qp. * Generate a completion on the last send. */ static int pp_post_send(int index) { struct ibv_sge sge; struct ibv_send_wr wr, *bad_wr; int qpi; sge.addr = (uintptr_t) ctx.buf; sge.length = ctx.size; sge.lkey = ctx.mr->lkey; wr.wr_id = (uintptr_t) index; wr.next = NULL; wr.sg_list = &sge; wr.num_sge = 1; wr.opcode = IBV_WR_SEND; wr.qp_type.xrc.remote_srqn = ctx.rem_dest[index].srqn; qpi = (index + ctx.rem_dest[index].pp_cnt) % ctx.num_clients; wr.send_flags = (++ctx.rem_dest[index].pp_cnt >= ctx.num_tests) ? IBV_SEND_SIGNALED : 0; return ibv_post_send(ctx.send_qp[qpi], &wr, &bad_wr); } static int find_qp(int qpn) { int i; if (ctx.num_clients == 1) return 0; for (i = 0; i < ctx.num_clients; ++i) if (ctx.recv_qp[i]->qp_num == qpn) return i; fprintf(stderr, "Unable to find qp %x\n", qpn); return 0; } static int get_cq_event(void) { struct ibv_cq *ev_cq; void *ev_ctx; if (ibv_get_cq_event(ctx.channel, &ev_cq, &ev_ctx)) { fprintf(stderr, "Failed to get cq_event\n"); return 1; } if (ev_cq != ctx.recv_cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; } if (ibv_req_notify_cq(ctx.recv_cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } return 0; } static void init(void) { srand48(getpid() * time(NULL)); ctx.size = 4096; ctx.ib_port = 1; ctx.num_clients = 1; ctx.num_tests = 5; ctx.mtu = IBV_MTU_2048; ctx.sl = 0; ctx.gidx = -1; } static void usage(const char *argv0) { printf("Usage:\n"); printf(" %s start a server and wait for connection\n", argv0); printf(" %s connect to server at \n", argv0); printf("\n"); printf("Options:\n"); printf(" -p, --port= listen on/connect to port (default 18515)\n"); printf(" -d, --ib-dev= use IB device (default first device found)\n"); printf(" -i, --ib-port= use port of IB device (default 1)\n"); printf(" -s, --size= size of message to exchange (default 4096)\n"); printf(" -m, --mtu= path MTU (default 2048)\n"); printf(" -c, --clients= number of clients (on server only, default 1)\n"); printf(" -n, --num_tests= number of tests per client (default 5)\n"); printf(" -l, --sl= service level value\n"); printf(" -e, --events sleep on CQ events (default poll)\n"); printf(" -g, --gid-idx= local port gid index\n"); } int main(int argc, char *argv[]) { char *ib_devname = NULL; char *servername = NULL; int port = 18515; int i, total, cnt = 0; int ne, qpi, num_cq_events = 0; struct ibv_wc wc; init(); while (1) { int c; static struct option long_options[] = { { .name = "port", .has_arg = 1, .val = 'p' }, { .name = "ib-dev", .has_arg = 1, .val = 'd' }, { .name = "ib-port", .has_arg = 1, .val = 'i' }, { .name = "size", .has_arg = 1, .val = 's' }, { .name = "mtu", .has_arg = 1, .val = 'm' }, { .name = "clients", .has_arg = 1, .val = 'c' }, { .name = "num_tests", .has_arg = 1, .val = 'n' }, { .name = "sl", .has_arg = 1, .val = 'l' }, { .name = "events", .has_arg = 0, .val = 'e' }, { .name = "gid-idx", .has_arg = 1, .val = 'g' }, {} }; c = getopt_long(argc, argv, "p:d:i:s:m:c:n:l:eg:", long_options, NULL); if (c == -1) break; switch (c) { case 'p': port = strtol(optarg, NULL, 0); if (port < 0 || port > 65535) { usage(argv[0]); return 1; } break; case 'd': ib_devname = strdupa(optarg); break; case 'i': ctx.ib_port = strtol(optarg, NULL, 0); if (ctx.ib_port < 0) { usage(argv[0]); return 1; } break; case 's': ctx.size = strtol(optarg, NULL, 0); break; case 'm': ctx.mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0)); if (ctx.mtu == 0) { usage(argv[0]); return 1; } break; case 'c': ctx.num_clients = strtol(optarg, NULL, 0); break; case 'n': ctx.num_tests = strtol(optarg, NULL, 0); break; case 'l': ctx.sl = strtol(optarg, NULL, 0); break; case 'g': ctx.gidx = strtol(optarg, NULL, 0); break; case 'e': ctx.use_event = 1; break; default: usage(argv[0]); return 1; } } if (optind == argc - 1) { servername = strdupa(argv[optind]); ctx.num_clients = 1; } else if (optind < argc) { usage(argv[0]); return 1; } page_size = sysconf(_SC_PAGESIZE); if (pp_init_ctx(ib_devname)) return 1; if (pp_post_recv(ctx.num_clients)) { fprintf(stderr, "Couldn't post receives\n"); return 1; } if (servername) { if (pp_client_connect(servername, port)) return 1; } else { if (pp_server_connect(port)) return 1; for (i = 0; i < ctx.num_clients; i++) pp_post_send(i); } total = ctx.num_clients * ctx.num_tests; while (cnt < total) { if (ctx.use_event) { if (get_cq_event()) return 1; ++num_cq_events; } do { ne = ibv_poll_cq(ctx.recv_cq, 1, &wc); if (ne < 0) { fprintf(stderr, "Error polling cq %d\n", ne); return 1; } else if (ne == 0) { break; } if (wc.status) { fprintf(stderr, "Work completion error %d\n", wc.status); return 1; } pp_post_recv(ne); qpi = find_qp(wc.qp_num); if (ctx.rem_dest[qpi].pp_cnt < ctx.num_tests) pp_post_send(qpi); cnt += ne; } while (ne > 0); } for (cnt = 0; cnt < ctx.num_clients; cnt += ne) { ne = ibv_poll_cq(ctx.send_cq, 1, &wc); if (ne < 0) { fprintf(stderr, "Error polling cq %d\n", ne); return 1; } } if (ctx.use_event) ibv_ack_cq_events(ctx.recv_cq, num_cq_events); /* Process should get an ack from the daemon to close its resources to * make sure latest daemon's response sent via its target QP destined * to an XSRQ created by another client won't be lost. * Failure to do so may cause the other client to wait for that sent * message forever. See comment on pp_post_send. */ if (servername) { if (pp_client_termination()) return 1; } else if (pp_server_termination()) { return 1; } if (pp_close_ctx()) return 1; printf("success\n"); return 0; }