Index: head/contrib/ofed/infiniband-diags/src/ibdiag_common.c =================================================================== --- head/contrib/ofed/infiniband-diags/src/ibdiag_common.c (revision 363219) +++ head/contrib/ofed/infiniband-diags/src/ibdiag_common.c (revision 363220) @@ -1,939 +1,947 @@ /* * Copyright (c) 2006-2007 The Regents of the University of California. * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. * Copyright (c) 2002-2010 Mellanox Technologies LTD. All rights reserved. * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. * Copyright (c) 2009 HNR Consulting. All rights reserved. * Copyright (c) 2011 Lawrence Livermore National Security. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ /** * Define common functions which can be included in the various C based diags. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int ibverbose; enum MAD_DEST ibd_dest_type = IB_DEST_LID; ib_portid_t *ibd_sm_id; static ib_portid_t sm_portid = { 0 }; /* general config options */ #define IBDIAG_CONFIG_GENERAL IBDIAG_CONFIG_PATH"/ibdiag.conf" char *ibd_ca = NULL; int ibd_ca_port = 0; int ibd_timeout = 0; uint32_t ibd_ibnetdisc_flags = IBND_CONFIG_MLX_EPI; uint64_t ibd_mkey; uint64_t ibd_sakey = 0; int show_keys = 0; char *ibd_nd_format = NULL; static const char *prog_name; static const char *prog_args; static const char **prog_examples; static struct option *long_opts = NULL; static const struct ibdiag_opt *opts_map[256]; static const char *get_build_version(void) { return "BUILD VERSION: " IBDIAG_VERSION; } static void pretty_print(int start, int width, const char *str) { int len = width - start; const char *p, *e; while (1) { while (isspace(*str)) str++; p = str; do { e = p + 1; p = strchr(e, ' '); } while (p && p - str < len); if (!p) { fprintf(stderr, "%s", str); break; } if (e - str == 1) e = p; fprintf(stderr, "%.*s\n%*s", (int)(e - str), str, start, ""); str = e; } } static inline int val_str_true(const char *val_str) { return ((strncmp(val_str, "TRUE", strlen("TRUE")) == 0) || (strncmp(val_str, "true", strlen("true")) == 0)); } void read_ibdiag_config(const char *file) { char buf[1024]; + char orig_buf[1024]; FILE *config_fd = NULL; char *p_prefix, *p_last; char *name; char *val_str; struct stat statbuf; /* silently ignore missing config file */ if (stat(file, &statbuf)) return; config_fd = fopen(file, "r"); if (!config_fd) return; while (fgets(buf, sizeof buf, config_fd) != NULL) { p_prefix = strtok_r(buf, "\n", &p_last); if (!p_prefix) continue; /* ignore blank lines */ if (*p_prefix == '#') continue; /* ignore comment lines */ + strlcpy(orig_buf, buf, sizeof(orig_buf)); name = strtok_r(p_prefix, "=", &p_last); val_str = strtok_r(NULL, "\n", &p_last); + if (!name || !val_str) { + fprintf(stderr, "%s: malformed line in \"%s\":\n%s\n", + prog_name, file, orig_buf); + continue; + } if (strncmp(name, "CA", strlen("CA")) == 0) { free(ibd_ca); ibd_ca = strdup(val_str); } else if (strncmp(name, "Port", strlen("Port")) == 0) { ibd_ca_port = strtoul(val_str, NULL, 0); } else if (strncmp(name, "timeout", strlen("timeout")) == 0) { ibd_timeout = strtoul(val_str, NULL, 0); } else if (strncmp(name, "MLX_EPI", strlen("MLX_EPI")) == 0) { if (val_str_true(val_str)) { ibd_ibnetdisc_flags |= IBND_CONFIG_MLX_EPI; } else { ibd_ibnetdisc_flags &= ~IBND_CONFIG_MLX_EPI; } } else if (strncmp(name, "m_key", strlen("m_key")) == 0) { ibd_mkey = strtoull(val_str, 0, 0); } else if (strncmp(name, "sa_key", strlen("sa_key")) == 0) { ibd_sakey = strtoull(val_str, 0, 0); } else if (strncmp(name, "nd_format", strlen("nd_format")) == 0) { + free(ibd_nd_format); ibd_nd_format = strdup(val_str); } } fclose(config_fd); } void ibdiag_show_usage() { struct option *o = long_opts; int n; fprintf(stderr, "\nUsage: %s [options] %s\n\n", prog_name, prog_args ? prog_args : ""); if (long_opts[0].name) fprintf(stderr, "Options:\n"); for (o = long_opts; o->name; o++) { const struct ibdiag_opt *io = opts_map[o->val]; n = fprintf(stderr, " --%s", io->name); if (isprint(io->letter)) n += fprintf(stderr, ", -%c", io->letter); if (io->has_arg) n += fprintf(stderr, " %s", io->arg_tmpl ? io->arg_tmpl : ""); if (io->description && *io->description) { n += fprintf(stderr, "%*s ", 24 - n > 0 ? 24 - n : 0, ""); pretty_print(n, 74, io->description); } fprintf(stderr, "\n"); } if (prog_examples) { const char **p; fprintf(stderr, "\nExamples:\n"); for (p = prog_examples; *p && **p; p++) fprintf(stderr, " %s %s\n", prog_name, *p); } fprintf(stderr, "\n"); exit(2); } static int process_opt(int ch, char *optarg) { char *endp; long val; switch (ch) { case 'z': read_ibdiag_config(optarg); break; case 'h': ibdiag_show_usage(); break; case 'V': fprintf(stderr, "%s %s\n", prog_name, get_build_version()); exit(0); case 'e': madrpc_show_errors(1); break; case 'v': ibverbose++; break; case 'd': ibdebug++; madrpc_show_errors(1); umad_debug(ibdebug - 1); break; case 'C': ibd_ca = optarg; break; case 'P': ibd_ca_port = strtoul(optarg, 0, 0); if (ibd_ca_port < 0) IBEXIT("cannot resolve CA port %d", ibd_ca_port); break; case 'D': ibd_dest_type = IB_DEST_DRPATH; break; case 'L': ibd_dest_type = IB_DEST_LID; break; case 'G': ibd_dest_type = IB_DEST_GUID; break; case 't': errno = 0; val = strtol(optarg, &endp, 0); if (errno || (endp && *endp != '\0') || val <= 0 || val > INT_MAX) IBEXIT("Invalid timeout \"%s\". Timeout requires a " "positive integer value < %d.", optarg, INT_MAX); else { madrpc_set_timeout((int)val); ibd_timeout = (int)val; } break; case 's': /* srcport is not required when resolving via IB_DEST_LID */ if (resolve_portid_str(ibd_ca, ibd_ca_port, &sm_portid, optarg, IB_DEST_LID, 0, NULL) < 0) IBEXIT("cannot resolve SM destination port %s", optarg); ibd_sm_id = &sm_portid; break; case 'K': show_keys = 1; break; case 'y': errno = 0; ibd_mkey = strtoull(optarg, &endp, 0); if (errno || *endp != '\0') { errno = 0; ibd_mkey = strtoull(getpass("M_Key: "), &endp, 0); if (errno || *endp != '\0') { IBEXIT("Bad M_Key"); } } break; default: return -1; } return 0; } static const struct ibdiag_opt common_opts[] = { {"config", 'z', 1, "", "use config file, default: " IBDIAG_CONFIG_GENERAL}, {"Ca", 'C', 1, "", "Ca name to use"}, {"Port", 'P', 1, "", "Ca port number to use"}, {"Direct", 'D', 0, NULL, "use Direct address argument"}, {"Lid", 'L', 0, NULL, "use LID address argument"}, {"Guid", 'G', 0, NULL, "use GUID address argument"}, {"timeout", 't', 1, "", "timeout in ms"}, {"sm_port", 's', 1, "", "SM port lid"}, {"show_keys", 'K', 0, NULL, "display security keys in output"}, {"m_key", 'y', 1, "", "M_Key to use in request"}, {"errors", 'e', 0, NULL, "show send and receive errors"}, {"verbose", 'v', 0, NULL, "increase verbosity level"}, {"debug", 'd', 0, NULL, "raise debug level"}, {"help", 'h', 0, NULL, "help message"}, {"version", 'V', 0, NULL, "show version"}, {0} }; static void make_opt(struct option *l, const struct ibdiag_opt *o, const struct ibdiag_opt *map[]) { l->name = o->name; l->has_arg = o->has_arg; l->flag = NULL; l->val = o->letter; if (!map[l->val]) map[l->val] = o; } static struct option *make_long_opts(const char *exclude_str, const struct ibdiag_opt *custom_opts, const struct ibdiag_opt *map[]) { struct option *long_opts, *l; const struct ibdiag_opt *o; unsigned n = 0; if (custom_opts) for (o = custom_opts; o->name; o++) n++; long_opts = malloc((sizeof(common_opts) / sizeof(common_opts[0]) + n) * sizeof(*long_opts)); if (!long_opts) return NULL; l = long_opts; if (custom_opts) for (o = custom_opts; o->name; o++) make_opt(l++, o, map); for (o = common_opts; o->name; o++) { if (exclude_str && strchr(exclude_str, o->letter)) continue; make_opt(l++, o, map); } memset(l, 0, sizeof(*l)); return long_opts; } static void make_str_opts(const struct option *o, char *p, unsigned size) { unsigned i, n = 0; for (n = 0; o->name && n + 2 + o->has_arg < size; o++) { p[n++] = (char)o->val; for (i = 0; i < (unsigned)o->has_arg; i++) p[n++] = ':'; } p[n] = '\0'; } int ibdiag_process_opts(int argc, char *const argv[], void *cxt, const char *exclude_common_str, const struct ibdiag_opt custom_opts[], int (*custom_handler) (void *cxt, int val, char *optarg), const char *usage_args, const char *usage_examples[]) { char str_opts[1024]; const struct ibdiag_opt *o; prog_name = argv[0]; prog_args = usage_args; prog_examples = usage_examples; if (long_opts) free(long_opts); long_opts = make_long_opts(exclude_common_str, custom_opts, opts_map); if (!long_opts) return -1; read_ibdiag_config(IBDIAG_CONFIG_GENERAL); make_str_opts(long_opts, str_opts, sizeof(str_opts)); while (1) { int ch = getopt_long(argc, argv, str_opts, long_opts, NULL); if (ch == -1) break; o = opts_map[ch]; if (!o) ibdiag_show_usage(); if (custom_handler) { if (custom_handler(cxt, ch, optarg) && process_opt(ch, optarg)) ibdiag_show_usage(); } else if (process_opt(ch, optarg)) ibdiag_show_usage(); } return 0; } void ibexit(const char *fn, char *msg, ...) { char buf[512]; va_list va; int n; va_start(va, msg); n = vsprintf(buf, msg, va); va_end(va); buf[n] = 0; if (ibdebug) printf("%s: iberror: [pid %d] %s: failed: %s\n", prog_name ? prog_name : "", getpid(), fn, buf); else printf("%s: iberror: failed: %s\n", prog_name ? prog_name : "", buf); exit(-1); } char * conv_cnt_human_readable(uint64_t val64, float *val, int data) { uint64_t tmp = val64; int ui = 0; int div = 1; tmp /= 1024; while (tmp) { ui++; tmp /= 1024; div *= 1024; } *val = (float)(val64); if (data) { *val *= 4; if (*val/div > 1024) { ui++; div *= 1024; } } *val /= div; if (data) { switch (ui) { case 0: return ("B"); case 1: return ("KB"); case 2: return ("MB"); case 3: return ("GB"); case 4: return ("TB"); case 5: return ("PB"); case 6: return ("EB"); default: return (""); } } else { switch (ui) { case 0: return (""); case 1: return ("K"); case 2: return ("M"); case 3: return ("G"); case 4: return ("T"); case 5: return ("P"); case 6: return ("E"); default: return (""); } } return (""); } int is_port_info_extended_supported(ib_portid_t * dest, int port, struct ibmad_port *srcport) { uint8_t data[IB_SMP_DATA_SIZE] = { 0 }; uint32_t cap_mask; uint16_t cap_mask2; if (!smp_query_via(data, dest, IB_ATTR_PORT_INFO, port, 0, srcport)) IBEXIT("port info query failed"); mad_decode_field(data, IB_PORT_CAPMASK_F, &cap_mask); if (cap_mask & CL_NTOH32(IB_PORT_CAP_HAS_CAP_MASK2)) { mad_decode_field(data, IB_PORT_CAPMASK2_F, &cap_mask2); if (!(cap_mask2 & CL_NTOH16(IB_PORT_CAP2_IS_PORT_INFO_EXT_SUPPORTED))) { IBWARN("port info capability mask2 = 0x%x doesn't" " indicate PortInfoExtended support", cap_mask2); return 0; } } else { IBWARN("port info capability mask2 not supported"); return 0; } return 1; } int is_mlnx_ext_port_info_supported(uint32_t vendorid, uint16_t devid) { if (ibd_ibnetdisc_flags & IBND_CONFIG_MLX_EPI) { if ((devid >= 0xc738 && devid <= 0xc73b) || devid == 0xcb20 || devid == 0xcf08 || ((vendorid == 0x119f) && /* Bull SwitchX */ (devid == 0x1b02 || devid == 0x1b50 || /* Bull SwitchIB and SwitchIB2 */ devid == 0x1ba0 || (devid >= 0x1bd0 && devid <= 0x1bd5)))) return 1; if ((devid >= 0x1003 && devid <= 0x1017) || ((vendorid == 0x119f) && /* Bull ConnectX3 */ (devid == 0x1b33 || devid == 0x1b73 || devid == 0x1b40 || devid == 0x1b41 || devid == 0x1b60 || devid == 0x1b61 || /* Bull ConnectIB */ devid == 0x1b83 || devid == 0x1b93 || devid == 0x1b94 || /* Bull ConnectX4 */ devid == 0x1bb4 || devid == 0x1bb5 || devid == 0x1bc4))) return 1; } return 0; } /** ========================================================================= * Resolve the SM portid using the umad layer rather than using * ib_resolve_smlid_via which requires a PortInfo query on the local port. */ int resolve_sm_portid(char *ca_name, uint8_t portnum, ib_portid_t *sm_id) { umad_port_t port; int rc; if (!sm_id) return (-1); if ((rc = umad_get_port(ca_name, portnum, &port)) < 0) return rc; memset(sm_id, 0, sizeof(*sm_id)); sm_id->lid = port.sm_lid; sm_id->sl = port.sm_sl; umad_release_port(&port); return 0; } /** ========================================================================= * Resolve local CA characteristics using the umad layer rather than using * ib_resolve_self_via which requires SMP queries on the local port. */ int resolve_self(char *ca_name, uint8_t ca_port, ib_portid_t *portid, int *portnum, ibmad_gid_t *gid) { umad_port_t port; uint64_t prefix, guid; int rc; if (!(portid || portnum || gid)) return (-1); if ((rc = umad_get_port(ca_name, ca_port, &port)) < 0) return rc; if (portid) { memset(portid, 0, sizeof(*portid)); portid->lid = port.base_lid; portid->sl = port.sm_sl; } if (portnum) *portnum = port.portnum; if (gid) { memset(gid, 0, sizeof(*gid)); prefix = cl_ntoh64(port.gid_prefix); guid = cl_ntoh64(port.port_guid); mad_encode_field(*gid, IB_GID_PREFIX_F, &prefix); mad_encode_field(*gid, IB_GID_GUID_F, &guid); } umad_release_port(&port); return 0; } int resolve_gid(char *ca_name, uint8_t ca_port, ib_portid_t * portid, ibmad_gid_t gid, ib_portid_t * sm_id, const struct ibmad_port *srcport) { ib_portid_t sm_portid; char buf[IB_SA_DATA_SIZE] = { 0 }; if (!sm_id) { sm_id = &sm_portid; if (resolve_sm_portid(ca_name, ca_port, sm_id) < 0) return -1; } if ((portid->lid = ib_path_query_via(srcport, gid, gid, sm_id, buf)) < 0) return -1; return 0; } int resolve_guid(char *ca_name, uint8_t ca_port, ib_portid_t *portid, uint64_t *guid, ib_portid_t *sm_id, const struct ibmad_port *srcport) { ib_portid_t sm_portid; uint8_t buf[IB_SA_DATA_SIZE] = { 0 }; uint64_t prefix; ibmad_gid_t selfgid; if (!sm_id) { sm_id = &sm_portid; if (resolve_sm_portid(ca_name, ca_port, sm_id) < 0) return -1; } if (resolve_self(ca_name, ca_port, NULL, NULL, &selfgid) < 0) return -1; memcpy(&prefix, portid->gid, sizeof(prefix)); if (!prefix) mad_set_field64(portid->gid, 0, IB_GID_PREFIX_F, IB_DEFAULT_SUBN_PREFIX); if (guid) mad_set_field64(portid->gid, 0, IB_GID_GUID_F, *guid); if ((portid->lid = ib_path_query_via(srcport, selfgid, portid->gid, sm_id, buf)) < 0) return -1; mad_decode_field(buf, IB_SA_PR_SL_F, &portid->sl); return 0; } /* * Callers of this function should ensure their ibmad_port has been opened with * IB_SA_CLASS as this function may require the SA to resolve addresses. */ int resolve_portid_str(char *ca_name, uint8_t ca_port, ib_portid_t * portid, char *addr_str, enum MAD_DEST dest_type, ib_portid_t *sm_id, const struct ibmad_port *srcport) { ibmad_gid_t gid; uint64_t guid; int lid; char *routepath; ib_portid_t selfportid = { 0 }; int selfport = 0; memset(portid, 0, sizeof *portid); switch (dest_type) { case IB_DEST_LID: lid = strtol(addr_str, 0, 0); if (!IB_LID_VALID(lid)) return -1; return ib_portid_set(portid, lid, 0, 0); case IB_DEST_DRPATH: if (str2drpath(&portid->drpath, addr_str, 0, 0) < 0) return -1; return 0; case IB_DEST_GUID: if (!(guid = strtoull(addr_str, 0, 0))) return -1; /* keep guid in portid? */ return resolve_guid(ca_name, ca_port, portid, &guid, sm_id, srcport); case IB_DEST_DRSLID: lid = strtol(addr_str, &routepath, 0); routepath++; if (!IB_LID_VALID(lid)) return -1; ib_portid_set(portid, lid, 0, 0); /* handle DR parsing and set DrSLID to local lid */ if (resolve_self(ca_name, ca_port, &selfportid, &selfport, NULL) < 0) return -1; if (str2drpath(&portid->drpath, routepath, selfportid.lid, 0) < 0) return -1; return 0; case IB_DEST_GID: if (inet_pton(AF_INET6, addr_str, &gid) <= 0) return -1; return resolve_gid(ca_name, ca_port, portid, gid, sm_id, srcport); default: IBWARN("bad dest_type %d", dest_type); } return -1; } static unsigned int get_max_width(unsigned int num) { unsigned r = 0; /* 1x */ if (num & 8) r = 3; /* 12x */ else { if (num & 4) r = 2; /* 8x */ else if (num & 2) r = 1; /* 4x */ else if (num & 0x10) r = 4; /* 2x */ } return (1 << r); } static unsigned int get_max(unsigned int num) { unsigned r = 0; // r will be lg(num) while (num >>= 1) // unroll for more speed... r++; return (1 << r); } void get_max_msg(char *width_msg, char *speed_msg, int msg_size, ibnd_port_t * port) { char buf[64]; uint32_t max_speed = 0; uint32_t cap_mask, rem_cap_mask, fdr10; uint8_t *info = NULL; uint32_t max_width = get_max_width(mad_get_field(port->info, 0, IB_PORT_LINK_WIDTH_SUPPORTED_F) & mad_get_field(port->remoteport->info, 0, IB_PORT_LINK_WIDTH_SUPPORTED_F)); if ((max_width & mad_get_field(port->info, 0, IB_PORT_LINK_WIDTH_ACTIVE_F)) == 0) // we are not at the max supported width // print what we could be at. snprintf(width_msg, msg_size, "Could be %s", mad_dump_val(IB_PORT_LINK_WIDTH_ACTIVE_F, buf, 64, &max_width)); if (port->node->type == IB_NODE_SWITCH) { if (port->node->ports[0]) info = (uint8_t *)&port->node->ports[0]->info; } else info = (uint8_t *)&port->info; if (info) cap_mask = mad_get_field(info, 0, IB_PORT_CAPMASK_F); else cap_mask = 0; info = NULL; if (port->remoteport->node->type == IB_NODE_SWITCH) { if (port->remoteport->node->ports[0]) info = (uint8_t *)&port->remoteport->node->ports[0]->info; } else info = (uint8_t *)&port->remoteport->info; if (info) rem_cap_mask = mad_get_field(info, 0, IB_PORT_CAPMASK_F); else rem_cap_mask = 0; if (cap_mask & CL_NTOH32(IB_PORT_CAP_HAS_EXT_SPEEDS) && rem_cap_mask & CL_NTOH32(IB_PORT_CAP_HAS_EXT_SPEEDS)) goto check_ext_speed; check_fdr10_supp: fdr10 = (mad_get_field(port->ext_info, 0, IB_MLNX_EXT_PORT_LINK_SPEED_SUPPORTED_F) & FDR10) && (mad_get_field(port->remoteport->ext_info, 0, IB_MLNX_EXT_PORT_LINK_SPEED_SUPPORTED_F) & FDR10); if (fdr10) goto check_fdr10_active; max_speed = get_max(mad_get_field(port->info, 0, IB_PORT_LINK_SPEED_SUPPORTED_F) & mad_get_field(port->remoteport->info, 0, IB_PORT_LINK_SPEED_SUPPORTED_F)); if ((max_speed & mad_get_field(port->info, 0, IB_PORT_LINK_SPEED_ACTIVE_F)) == 0) // we are not at the max supported speed // print what we could be at. snprintf(speed_msg, msg_size, "Could be %s", mad_dump_val(IB_PORT_LINK_SPEED_ACTIVE_F, buf, 64, &max_speed)); return; check_ext_speed: if (mad_get_field(port->info, 0, IB_PORT_LINK_SPEED_EXT_SUPPORTED_F) == 0 || mad_get_field(port->remoteport->info, 0, IB_PORT_LINK_SPEED_EXT_SUPPORTED_F) == 0) goto check_fdr10_supp; max_speed = get_max(mad_get_field(port->info, 0, IB_PORT_LINK_SPEED_EXT_SUPPORTED_F) & mad_get_field(port->remoteport->info, 0, IB_PORT_LINK_SPEED_EXT_SUPPORTED_F)); if ((max_speed & mad_get_field(port->info, 0, IB_PORT_LINK_SPEED_EXT_ACTIVE_F)) == 0) // we are not at the max supported extended speed // print what we could be at. snprintf(speed_msg, msg_size, "Could be %s", mad_dump_val(IB_PORT_LINK_SPEED_EXT_ACTIVE_F, buf, 64, &max_speed)); return; check_fdr10_active: if ((mad_get_field(port->ext_info, 0, IB_MLNX_EXT_PORT_LINK_SPEED_ACTIVE_F) & FDR10) == 0) { /* Special case QDR to try to avoid confusion with FDR10 */ if (mad_get_field(port->info, 0, IB_PORT_LINK_SPEED_ACTIVE_F) == 4) /* QDR (10.0 Gbps) */ snprintf(speed_msg, msg_size, "Could be FDR10 (Found link at QDR but expected speed is FDR10)"); else snprintf(speed_msg, msg_size, "Could be FDR10"); } } int vsnprint_field(char *buf, size_t n, enum MAD_FIELDS f, int spacing, const char *format, va_list va_args) { int len, i, ret; len = strlen(mad_field_name(f)); if (len + 2 > n || spacing + 1 > n) return 0; strncpy(buf, mad_field_name(f), n); buf[len] = ':'; for (i = len+1; i < spacing+1; i++) { buf[i] = '.'; } ret = vsnprintf(&buf[spacing+1], n - spacing, format, va_args); if (ret >= n - spacing) buf[n] = '\0'; return ret + spacing; } int snprint_field(char *buf, size_t n, enum MAD_FIELDS f, int spacing, const char *format, ...) { va_list val; int ret; va_start(val, format); ret = vsnprint_field(buf, n, f, spacing, format, val); va_end(val); return ret; } void dump_portinfo(void *pi, int tabs) { int field, i; char val[64]; char buf[1024]; for (field = IB_PORT_FIRST_F; field < IB_PORT_LAST_F; field++) { for (i=0;iname; r++) if (!strcasecmp(r->name, name) || (r->alias && !strcasecmp(r->alias, name))) return r->fn; return NULL; } Index: head/contrib/ofed/infiniband-diags/src/ibdiag_sa.c =================================================================== --- head/contrib/ofed/infiniband-diags/src/ibdiag_sa.c (revision 363219) +++ head/contrib/ofed/infiniband-diags/src/ibdiag_sa.c (revision 363220) @@ -1,256 +1,256 @@ /* * Copyright (c) 2006-2007 The Regents of the University of California. * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. * Copyright (c) 2002-2010 Mellanox Technologies LTD. All rights reserved. * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. * Copyright (c) 2009 HNR Consulting. All rights reserved. * Copyright (c) 2011 Lawrence Livermore National Security. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include #include #include "ibdiag_common.h" #include "ibdiag_sa.h" /* define a common SA query structure * This is by no means optimal but it moves the saquery functionality out of * the saquery tool and provides it to other utilities. */ struct sa_handle * sa_get_handle(void) { struct sa_handle * handle; handle = calloc(1, sizeof(*handle)); if (!handle) IBPANIC("calloc failed"); resolve_sm_portid(ibd_ca, ibd_ca_port, &handle->dport); if (!handle->dport.lid) { IBWARN("No SM/SA found on port %s:%d", ibd_ca ? "" : ibd_ca, ibd_ca_port); free(handle); return (NULL); } handle->dport.qp = 1; if (!handle->dport.qkey) handle->dport.qkey = IB_DEFAULT_QP1_QKEY; handle->fd = umad_open_port(ibd_ca, ibd_ca_port); handle->agent = umad_register(handle->fd, IB_SA_CLASS, 2, 1, NULL); return handle; } int sa_set_handle(struct sa_handle * handle, int grh_present, ibmad_gid_t *gid) { if (grh_present) { if (gid == NULL) { return -1; } else { handle->dport.grh_present = 1; memcpy(handle->dport.gid, gid, 16); } } return 0; } void sa_free_handle(struct sa_handle * h) { umad_unregister(h->fd, h->agent); umad_close_port(h->fd); free(h); } int sa_query(struct sa_handle * h, uint8_t method, uint16_t attr, uint32_t mod, uint64_t comp_mask, uint64_t sm_key, void *data, size_t datasz, struct sa_query_result *result) { ib_rpc_t rpc; void *umad, *mad; int ret, offset, len = 256; memset(&rpc, 0, sizeof(rpc)); rpc.mgtclass = IB_SA_CLASS; rpc.method = method; rpc.attr.id = attr; rpc.attr.mod = mod; rpc.mask = comp_mask; rpc.datasz = datasz; rpc.dataoffs = IB_SA_DATA_OFFS; umad = calloc(1, len + umad_size()); if (!umad) IBPANIC("cannot alloc mem for umad: %s\n", strerror(errno)); mad_build_pkt(umad, &rpc, &h->dport, NULL, data); mad_set_field64(umad_get_mad(umad), 0, IB_SA_MKEY_F, sm_key); if (ibdebug > 1) xdump(stdout, "SA Request:\n", umad_get_mad(umad), len); if (h->dport.grh_present) { ib_mad_addr_t *p_mad_addr = umad_get_mad_addr(umad); p_mad_addr->grh_present = 1; p_mad_addr->gid_index = 0; p_mad_addr->hop_limit = 0; p_mad_addr->traffic_class = 0; memcpy(p_mad_addr->gid, h->dport.gid, 16); } ret = umad_send(h->fd, h->agent, umad, len, ibd_timeout, 0); if (ret < 0) { IBWARN("umad_send failed: attr 0x%x: %s\n", attr, strerror(errno)); free(umad); return (-ret); } recv_mad: ret = umad_recv(h->fd, umad, &len, ibd_timeout); if (ret < 0) { if (errno == ENOSPC) { umad = realloc(umad, umad_size() + len); goto recv_mad; } IBWARN("umad_recv failed: attr 0x%x: %s\n", attr, strerror(errno)); free(umad); return (-ret); } if ((ret = umad_status(umad))) return ret; mad = umad_get_mad(umad); if (ibdebug > 1) xdump(stdout, "SA Response:\n", mad, len); method = (uint8_t) mad_get_field(mad, 0, IB_MAD_METHOD_F); offset = mad_get_field(mad, 0, IB_SA_ATTROFFS_F); result->status = mad_get_field(mad, 0, IB_MAD_STATUS_F); result->p_result_madw = mad; if (result->status != IB_SA_MAD_STATUS_SUCCESS) result->result_cnt = 0; else if (method != IB_MAD_METHOD_GET_TABLE) result->result_cnt = 1; else if (!offset) result->result_cnt = 0; else result->result_cnt = (len - IB_SA_DATA_OFFS) / (offset << 3); return 0; } void sa_free_result_mad(struct sa_query_result *result) { if (result->p_result_madw) { free((uint8_t *) result->p_result_madw - umad_size()); result->p_result_madw = NULL; } } void *sa_get_query_rec(void *mad, unsigned i) { int offset = mad_get_field(mad, 0, IB_SA_ATTROFFS_F); return (uint8_t *) mad + IB_SA_DATA_OFFS + i * (offset << 3); } static const char *ib_sa_error_str[] = { "SA_NO_ERROR", "SA_ERR_NO_RESOURCES", "SA_ERR_REQ_INVALID", "SA_ERR_NO_RECORDS", "SA_ERR_TOO_MANY_RECORDS", "SA_ERR_REQ_INVALID_GID", "SA_ERR_REQ_INSUFFICIENT_COMPONENTS", "SA_ERR_REQ_DENIED", "SA_ERR_STATUS_PRIO_SUGGESTED", "SA_ERR_UNKNOWN" }; #define ARR_SIZE(a) (sizeof(a)/sizeof((a)[0])) #define SA_ERR_UNKNOWN (ARR_SIZE(ib_sa_error_str) - 1) static inline const char *ib_sa_err_str(IN uint8_t status) { if (status > SA_ERR_UNKNOWN) status = SA_ERR_UNKNOWN; return (ib_sa_error_str[status]); } static const char *ib_mad_inv_field_str[] = { "MAD No invalid fields", "MAD Bad version", "MAD Method specified is not supported", "MAD Method/Attribute combination is not supported", "MAD Reserved", "MAD Reserved", "MAD Reserved", - "MAD Invalid value in Attribute field(s) or Attribute Modifier" + "MAD Invalid value in Attribute field(s) or Attribute Modifier", "MAD UNKNOWN ERROR" }; #define MAD_ERR_UNKNOWN (ARR_SIZE(ib_mad_inv_field_str) - 1) static inline const char *ib_mad_inv_field_err_str(IN uint8_t f) { if (f > MAD_ERR_UNKNOWN) f = MAD_ERR_UNKNOWN; return (ib_mad_inv_field_str[f]); } void sa_report_err(int status) { int st = status & 0xff; char mad_err_str[64] = { 0 }; char sa_err_str[64] = { 0 }; if (st) sprintf(mad_err_str, " (%s; %s; %s)", (st & 0x1) ? "BUSY" : "", (st & 0x2) ? "Redirection Required" : "", ib_mad_inv_field_err_str(st>>2)); st = status >> 8; if (st) sprintf(sa_err_str, " SA(%s)", ib_sa_err_str((uint8_t) st)); fprintf(stderr, "ERROR: Query result returned 0x%04x, %s%s\n", status, mad_err_str, sa_err_str); } Index: head/contrib/ofed/infiniband-diags/src/iblinkinfo.c =================================================================== --- head/contrib/ofed/infiniband-diags/src/iblinkinfo.c (revision 363219) +++ head/contrib/ofed/infiniband-diags/src/iblinkinfo.c (revision 363220) @@ -1,768 +1,769 @@ /* * Copyright (c) 2004-2009 Voltaire Inc. All rights reserved. * Copyright (c) 2007 Xsigo Systems Inc. All rights reserved. * Copyright (c) 2008 Lawrence Livermore National Lab. All rights reserved. * Copyright (c) 2010,2011 Mellanox Technologies LTD. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #if HAVE_CONFIG_H # include #endif /* HAVE_CONFIG_H */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include "ibdiag_common.h" #define DIFF_FLAG_PORT_CONNECTION 0x01 #define DIFF_FLAG_PORT_STATE 0x02 #define DIFF_FLAG_LID 0x04 #define DIFF_FLAG_NODE_DESCRIPTION 0x08 #define DIFF_FLAG_DEFAULT (DIFF_FLAG_PORT_CONNECTION | DIFF_FLAG_PORT_STATE) static char *node_name_map_file = NULL; static nn_map_t *node_name_map = NULL; static char *load_cache_file = NULL; static char *diff_cache_file = NULL; static unsigned diffcheck_flags = DIFF_FLAG_DEFAULT; static char *filterdownports_cache_file = NULL; static ibnd_fabric_t *filterdownports_fabric = NULL; static uint64_t guid = 0; static char *guid_str = NULL; static char *dr_path = NULL; static int all = 0; static int down_links_only = 0; static int line_mode = 0; static int add_sw_settings = 0; static int only_flag = 0; static int only_type = 0; int filterdownport_check(ibnd_node_t * node, ibnd_port_t * port) { ibnd_node_t *fsw; ibnd_port_t *fport; int fistate; fsw = ibnd_find_node_guid(filterdownports_fabric, node->guid); if (!fsw) return 0; if (port->portnum > fsw->numports) return 0; fport = fsw->ports[port->portnum]; if (!fport) return 0; fistate = mad_get_field(fport->info, 0, IB_PORT_STATE_F); return (fistate == IB_LINK_DOWN) ? 1 : 0; } void print_port(ibnd_node_t * node, ibnd_port_t * port, char *out_prefix) { char width[64], speed[64], state[64], physstate[64]; char remote_guid_str[256]; char remote_str[256]; char link_str[256]; char width_msg[256]; char speed_msg[256]; char ext_port_str[256]; int iwidth, ispeed, fdr10, espeed, istate, iphystate, cap_mask; int n = 0; uint8_t *info = NULL; if (!port) return; iwidth = mad_get_field(port->info, 0, IB_PORT_LINK_WIDTH_ACTIVE_F); ispeed = mad_get_field(port->info, 0, IB_PORT_LINK_SPEED_ACTIVE_F); fdr10 = mad_get_field(port->ext_info, 0, IB_MLNX_EXT_PORT_LINK_SPEED_ACTIVE_F) & FDR10; if (port->node->type == IB_NODE_SWITCH) { if (port->node->ports[0]) info = (uint8_t *)&port->node->ports[0]->info; } else info = (uint8_t *)&port->info; if (info) { cap_mask = mad_get_field(info, 0, IB_PORT_CAPMASK_F); if (cap_mask & CL_NTOH32(IB_PORT_CAP_HAS_EXT_SPEEDS)) espeed = mad_get_field(port->info, 0, IB_PORT_LINK_SPEED_EXT_ACTIVE_F); else espeed = 0; } else { ispeed = 0; iwidth = 0; espeed = 0; } istate = mad_get_field(port->info, 0, IB_PORT_STATE_F); iphystate = mad_get_field(port->info, 0, IB_PORT_PHYS_STATE_F); remote_guid_str[0] = '\0'; remote_str[0] = '\0'; link_str[0] = '\0'; width_msg[0] = '\0'; speed_msg[0] = '\0'; if (istate == IB_LINK_DOWN && filterdownports_fabric && filterdownport_check(node, port)) return; /* C14-24.2.1 states that a down port allows for invalid data to be * returned for all PortInfo components except PortState and * PortPhysicalState */ if (istate != IB_LINK_DOWN) { if (!espeed) { if (fdr10) sprintf(speed, "10.0 Gbps (FDR10)"); else mad_dump_val(IB_PORT_LINK_SPEED_ACTIVE_F, speed, 64, &ispeed); } else mad_dump_val(IB_PORT_LINK_SPEED_EXT_ACTIVE_F, speed, 64, &espeed); n = snprintf(link_str, 256, "(%3s %18s %6s/%8s)", mad_dump_val(IB_PORT_LINK_WIDTH_ACTIVE_F, width, 64, &iwidth), speed, mad_dump_val(IB_PORT_STATE_F, state, 64, &istate), mad_dump_val(IB_PORT_PHYS_STATE_F, physstate, 64, &iphystate)); } else { n = snprintf(link_str, 256, "( %6s/%8s)", mad_dump_val(IB_PORT_STATE_F, state, 64, &istate), mad_dump_val(IB_PORT_PHYS_STATE_F, physstate, 64, &iphystate)); } /* again default values due to C14-24.2.1 */ if (add_sw_settings && istate != IB_LINK_DOWN) { snprintf(link_str + n, 256 - n, " (HOQ:%d VL_Stall:%d)", mad_get_field(port->info, 0, IB_PORT_HOQ_LIFE_F), mad_get_field(port->info, 0, IB_PORT_VL_STALL_COUNT_F)); } if (port->remoteport) { char *remap = remap_node_name(node_name_map, port->remoteport->node->guid, port->remoteport->node->nodedesc); if (port->remoteport->ext_portnum) snprintf(ext_port_str, 256, "%d", port->remoteport->ext_portnum); else ext_port_str[0] = '\0'; get_max_msg(width_msg, speed_msg, 256, port); if (line_mode) { snprintf(remote_guid_str, 256, "0x%016" PRIx64 " ", port->remoteport->guid); } snprintf(remote_str, 256, "%s%6d %4d[%2s] \"%s\" (%s %s)\n", remote_guid_str, port->remoteport->base_lid ? port->remoteport->base_lid : port->remoteport->node->smalid, port->remoteport->portnum, ext_port_str, remap, width_msg, speed_msg); free(remap); } else { if (istate == IB_LINK_DOWN) snprintf(remote_str, 256, " [ ] \"\" ( )\n"); else snprintf(remote_str, 256, " \"Port not available\"\n"); } if (port->ext_portnum) snprintf(ext_port_str, 256, "%d", port->ext_portnum); else ext_port_str[0] = '\0'; if (line_mode) { char *remap = remap_node_name(node_name_map, node->guid, node->nodedesc); printf("%s0x%016" PRIx64 " \"%30s\" ", out_prefix ? out_prefix : "", port->guid, remap); free(remap); } else printf("%s ", out_prefix ? out_prefix : ""); if (port->node->type != IB_NODE_SWITCH) { if (!line_mode) printf("0x%016" PRIx64 " ", port->guid); printf("%6d %4d[%2s] ==%s==> %s", port->base_lid, port->portnum, ext_port_str, link_str, remote_str); } else printf("%6d %4d[%2s] ==%s==> %s", node->smalid, port->portnum, ext_port_str, link_str, remote_str); } static inline const char *nodetype_str(ibnd_node_t * node) { switch (node->type) { case IB_NODE_SWITCH: return "Switch"; case IB_NODE_CA: return "CA"; case IB_NODE_ROUTER: return "Router"; } return "??"; } void print_node_header(ibnd_node_t *node, int *out_header_flag, char *out_prefix) { uint64_t guid = 0; if ((!out_header_flag || !(*out_header_flag)) && !line_mode) { char *remap = remap_node_name(node_name_map, node->guid, node->nodedesc); if (node->type == IB_NODE_SWITCH) { if (node->ports[0]) guid = node->ports[0]->guid; else /* if (node->info) */ guid = mad_get_field64(node->info, 0, IB_NODE_PORT_GUID_F); printf("%s%s: 0x%016" PRIx64 " %s:\n", out_prefix ? out_prefix : "", nodetype_str(node), guid, remap); } else printf("%s%s: %s:\n", out_prefix ? out_prefix : "", nodetype_str(node), remap); - (*out_header_flag)++; + if (out_header_flag) + (*out_header_flag)++; free(remap); } } void print_node(ibnd_node_t * node, void *user_data) { int i = 0; int head_print = 0; char *out_prefix = (char *)user_data; for (i = 1; i <= node->numports; i++) { ibnd_port_t *port = node->ports[i]; if (!port) continue; if (!down_links_only || mad_get_field(port->info, 0, IB_PORT_STATE_F) == IB_LINK_DOWN) { print_node_header(node, &head_print, out_prefix); print_port(node, port, out_prefix); } } } struct iter_diff_data { uint32_t diff_flags; ibnd_fabric_t *fabric1; ibnd_fabric_t *fabric2; char *fabric1_prefix; char *fabric2_prefix; }; void diff_node_ports(ibnd_node_t * fabric1_node, ibnd_node_t * fabric2_node, int *head_print, struct iter_diff_data *data) { int i = 0; for (i = 1; i <= fabric1_node->numports; i++) { ibnd_port_t *fabric1_port, *fabric2_port; int output_diff = 0; fabric1_port = fabric1_node->ports[i]; fabric2_port = fabric2_node->ports[i]; if (!fabric1_port && !fabric2_port) continue; if (data->diff_flags & DIFF_FLAG_PORT_CONNECTION) { if ((fabric1_port && !fabric2_port) || (!fabric1_port && fabric2_port) || (fabric1_port->remoteport && !fabric2_port->remoteport) || (!fabric1_port->remoteport && fabric2_port->remoteport) || (fabric1_port->remoteport && fabric2_port->remoteport && fabric1_port->remoteport->guid != fabric2_port->remoteport->guid)) output_diff++; } /* if either fabric1_port or fabric2_port NULL, should be * handled by port connection diff code */ if (data->diff_flags & DIFF_FLAG_PORT_STATE && fabric1_port && fabric2_port) { int state1, state2; state1 = mad_get_field(fabric1_port->info, 0, IB_PORT_STATE_F); state2 = mad_get_field(fabric2_port->info, 0, IB_PORT_STATE_F); if (state1 != state2) output_diff++; } if (data->diff_flags & DIFF_FLAG_PORT_CONNECTION && data->diff_flags & DIFF_FLAG_LID && fabric1_port && fabric2_port && fabric1_port->remoteport && fabric2_port->remoteport && fabric1_port->remoteport->base_lid != fabric2_port->remoteport->base_lid) output_diff++; if (data->diff_flags & DIFF_FLAG_PORT_CONNECTION && data->diff_flags & DIFF_FLAG_NODE_DESCRIPTION && fabric1_port && fabric2_port && fabric1_port->remoteport && fabric2_port->remoteport && memcmp(fabric1_port->remoteport->node->nodedesc, fabric2_port->remoteport->node->nodedesc, IB_SMP_DATA_SIZE)) output_diff++; if (output_diff && fabric1_port) { print_node_header(fabric1_node, head_print, NULL); print_port(fabric1_node, fabric1_port, data->fabric1_prefix); } if (output_diff && fabric2_port) { - print_node_header(fabric1_node, + print_node_header(fabric2_node, head_print, NULL); print_port(fabric2_node, fabric2_port, data->fabric2_prefix); } } } void diff_node_iter(ibnd_node_t * fabric1_node, void *iter_user_data) { struct iter_diff_data *data = iter_user_data; ibnd_node_t *fabric2_node; int head_print = 0; DEBUG("DEBUG: fabric1_node %p\n", fabric1_node); fabric2_node = ibnd_find_node_guid(data->fabric2, fabric1_node->guid); if (!fabric2_node) print_node(fabric1_node, data->fabric1_prefix); else if (data->diff_flags & (DIFF_FLAG_PORT_CONNECTION | DIFF_FLAG_PORT_STATE | DIFF_FLAG_LID | DIFF_FLAG_NODE_DESCRIPTION)) { if ((fabric1_node->type == IB_NODE_SWITCH && data->diff_flags & DIFF_FLAG_LID && fabric1_node->smalid != fabric2_node->smalid) || (data->diff_flags & DIFF_FLAG_NODE_DESCRIPTION && memcmp(fabric1_node->nodedesc, fabric2_node->nodedesc, IB_SMP_DATA_SIZE))) { print_node_header(fabric1_node, NULL, data->fabric1_prefix); print_node_header(fabric2_node, NULL, data->fabric2_prefix); head_print++; } if (fabric1_node->numports != fabric2_node->numports) { print_node_header(fabric1_node, &head_print, NULL); printf("%snumports = %d\n", data->fabric1_prefix, fabric1_node->numports); printf("%snumports = %d\n", data->fabric2_prefix, fabric2_node->numports); return; } diff_node_ports(fabric1_node, fabric2_node, &head_print, data); } } int diff_node(ibnd_node_t * node, ibnd_fabric_t * orig_fabric, ibnd_fabric_t * new_fabric) { struct iter_diff_data iter_diff_data; iter_diff_data.diff_flags = diffcheck_flags; iter_diff_data.fabric1 = orig_fabric; iter_diff_data.fabric2 = new_fabric; iter_diff_data.fabric1_prefix = "< "; iter_diff_data.fabric2_prefix = "> "; if (node) diff_node_iter(node, &iter_diff_data); else { if (only_flag) ibnd_iter_nodes_type(orig_fabric, diff_node_iter, only_type, &iter_diff_data); else ibnd_iter_nodes(orig_fabric, diff_node_iter, &iter_diff_data); } /* Do opposite diff to find existence of node types * in new_fabric but not in orig_fabric. * * In this diff, we don't need to check port connections, * port state, lids, or node descriptions since it has already * been done (i.e. checks are only done when guid exists on both * orig and new). */ iter_diff_data.diff_flags = diffcheck_flags & ~DIFF_FLAG_PORT_CONNECTION; iter_diff_data.diff_flags &= ~DIFF_FLAG_PORT_STATE; iter_diff_data.diff_flags &= ~DIFF_FLAG_LID; iter_diff_data.diff_flags &= ~DIFF_FLAG_NODE_DESCRIPTION; iter_diff_data.fabric1 = new_fabric; iter_diff_data.fabric2 = orig_fabric; iter_diff_data.fabric1_prefix = "> "; iter_diff_data.fabric2_prefix = "< "; if (node) diff_node_iter(node, &iter_diff_data); else { if (only_flag) ibnd_iter_nodes_type(new_fabric, diff_node_iter, only_type, &iter_diff_data); else ibnd_iter_nodes(new_fabric, diff_node_iter, &iter_diff_data); } return 0; } static int process_opt(void *context, int ch, char *optarg) { struct ibnd_config *cfg = context; char *p; switch (ch) { case 1: node_name_map_file = strdup(optarg); break; case 2: load_cache_file = strdup(optarg); break; case 3: diff_cache_file = strdup(optarg); break; case 4: diffcheck_flags = 0; p = strtok(optarg, ","); while (p) { if (!strcasecmp(p, "port")) diffcheck_flags |= DIFF_FLAG_PORT_CONNECTION; else if (!strcasecmp(p, "state")) diffcheck_flags |= DIFF_FLAG_PORT_STATE; else if (!strcasecmp(p, "lid")) diffcheck_flags |= DIFF_FLAG_LID; else if (!strcasecmp(p, "nodedesc")) diffcheck_flags |= DIFF_FLAG_NODE_DESCRIPTION; else { fprintf(stderr, "invalid diff check key: %s\n", p); return -1; } p = strtok(NULL, ","); } break; case 5: filterdownports_cache_file = strdup(optarg); break; case 6: only_flag = 1; only_type = IB_NODE_SWITCH; break; case 7: only_flag = 1; only_type = IB_NODE_CA; break; case 'S': case 'G': guid_str = optarg; guid = (uint64_t) strtoull(guid_str, 0, 0); break; case 'D': dr_path = strdup(optarg); break; case 'a': all = 1; break; case 'n': cfg->max_hops = strtoul(optarg, NULL, 0); break; case 'd': down_links_only = 1; break; case 'l': line_mode = 1; break; case 'p': add_sw_settings = 1; break; case 'R': /* nop */ break; case 'o': cfg->max_smps = strtoul(optarg, NULL, 0); break; default: return -1; } return 0; } int main(int argc, char **argv) { struct ibnd_config config = { 0 }; int rc = 0; int resolved = -1; ibnd_fabric_t *fabric = NULL; ibnd_fabric_t *diff_fabric = NULL; struct ibmad_port *ibmad_port; ib_portid_t port_id = { 0 }; uint8_t ni[IB_SMP_DATA_SIZE] = { 0 }; int mgmt_classes[3] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS }; const struct ibdiag_opt opts[] = { {"node-name-map", 1, 1, "", "node name map file"}, {"switch", 'S', 1, "", "start partial scan at the port specified by (hex format)"}, {"port-guid", 'G', 1, "", "(same as -S)"}, {"Direct", 'D', 1, "", "start partial scan at the port specified by "}, {"all", 'a', 0, NULL, "print all nodes found in a partial fabric scan"}, {"hops", 'n', 1, "", "Number of hops to include away from specified node"}, {"down", 'd', 0, NULL, "print only down links"}, {"line", 'l', 0, NULL, "(line mode) print all information for each link on a single line"}, {"additional", 'p', 0, NULL, "print additional port settings (PktLifeTime, HoqLife, VLStallCount)"}, {"load-cache", 2, 1, "", "filename of ibnetdiscover cache to load"}, {"diff", 3, 1, "", "filename of ibnetdiscover cache to diff"}, {"diffcheck", 4, 1, "", "specify checks to execute for --diff"}, {"filterdownports", 5, 1, "", "filename of ibnetdiscover cache to filter downports"}, {"outstanding_smps", 'o', 1, NULL, "specify the number of outstanding SMP's which should be " "issued during the scan"}, {"switches-only", 6, 0, NULL, "Output only switches"}, {"cas-only", 7, 0, NULL, "Output only CAs"}, {0} }; char usage_args[] = ""; ibdiag_process_opts(argc, argv, &config, "aDdGgKLlnpRS", opts, process_opt, usage_args, NULL); argc -= optind; argv += optind; ibmad_port = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 3); if (!ibmad_port) { fprintf(stderr, "Failed to open %s port %d\n", ibd_ca, ibd_ca_port); exit(1); } smp_mkey_set(ibmad_port, ibd_mkey); if (ibd_timeout) { mad_rpc_set_timeout(ibmad_port, ibd_timeout); config.timeout_ms = ibd_timeout; } config.flags = ibd_ibnetdisc_flags; config.mkey = ibd_mkey; node_name_map = open_node_name_map(node_name_map_file); if (dr_path && load_cache_file) { mad_rpc_close_port(ibmad_port); fprintf(stderr, "Cannot specify cache and direct route path\n"); exit(1); } if (dr_path) { /* only scan part of the fabric */ if ((resolved = resolve_portid_str(ibd_ca, ibd_ca_port, &port_id, dr_path, IB_DEST_DRPATH, NULL, ibmad_port)) < 0) IBWARN("Failed to resolve %s; attempting full scan", dr_path); } else if (guid_str) { if ((resolved = resolve_portid_str(ibd_ca, ibd_ca_port, &port_id, guid_str, IB_DEST_GUID, NULL, ibmad_port)) < 0) IBWARN("Failed to resolve %s; attempting full scan\n", guid_str); } if (!smp_query_via(ni, &port_id, IB_ATTR_NODE_INFO, 0, ibd_timeout, ibmad_port)){ mad_rpc_close_port(ibmad_port); fprintf(stderr, "Failed to get local Node Info\n"); exit(1); } mad_rpc_close_port(ibmad_port); if (diff_cache_file && !(diff_fabric = ibnd_load_fabric(diff_cache_file, 0))) IBEXIT("loading cached fabric for diff failed\n"); if (filterdownports_cache_file && !(filterdownports_fabric = ibnd_load_fabric(filterdownports_cache_file, 0))) IBEXIT("loading cached fabric for filterdownports failed\n"); if (load_cache_file) { if ((fabric = ibnd_load_fabric(load_cache_file, 0)) == NULL) { fprintf(stderr, "loading cached fabric failed\n"); exit(1); } } else { if (resolved >= 0) { if (!config.max_hops) config.max_hops = 1; if (!(fabric = ibnd_discover_fabric(ibd_ca, ibd_ca_port, &port_id, &config))) IBWARN("Partial fabric scan failed;" " attempting full scan\n"); } if (!fabric && !(fabric = ibnd_discover_fabric(ibd_ca, ibd_ca_port, NULL, &config))) { fprintf(stderr, "discover failed\n"); rc = 1; goto close_port; } } if (!all && guid_str) { ibnd_port_t *p = ibnd_find_port_guid(fabric, guid); if (p && (!only_flag || p->node->type == only_type)) { ibnd_node_t *n = p->node; if (diff_fabric) diff_node(n, diff_fabric, fabric); else print_node(n, NULL); } else fprintf(stderr, "Failed to find port: %s\n", guid_str); } else if (!all && dr_path) { ibnd_port_t *p = NULL; mad_decode_field(ni, IB_NODE_PORT_GUID_F, &(guid)); p = ibnd_find_port_guid(fabric, guid); if (p && (!only_flag || p->node->type == only_type)) { ibnd_node_t *n = p->node; if (diff_fabric) diff_node(n, diff_fabric, fabric); else print_node(n, NULL); } else fprintf(stderr, "Failed to find port: %s\n", dr_path); } else { if (diff_fabric) diff_node(NULL, diff_fabric, fabric); else { if (only_flag) ibnd_iter_nodes_type(fabric, print_node, only_type, NULL); else ibnd_iter_nodes(fabric, print_node, NULL); } } ibnd_destroy_fabric(fabric); if (diff_fabric) ibnd_destroy_fabric(diff_fabric); close_port: close_node_name_map(node_name_map); exit(rc); } Index: head/contrib/ofed/infiniband-diags/src/ibportstate.c =================================================================== --- head/contrib/ofed/infiniband-diags/src/ibportstate.c (revision 363219) +++ head/contrib/ofed/infiniband-diags/src/ibportstate.c (revision 363220) @@ -1,767 +1,768 @@ /* * Copyright (c) 2004-2009 Voltaire Inc. All rights reserved. * Copyright (c) 2010,2011 Mellanox Technologies LTD. All rights reserved. * Copyright (c) 2011,2016 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #if HAVE_CONFIG_H # include #endif /* HAVE_CONFIG_H */ #include #include #include #include #include #include #include #include #include "ibdiag_common.h" enum port_ops { QUERY, ENABLE, RESET, DISABLE, SPEED, ESPEED, FDR10SPEED, WIDTH, DOWN, ARM, ACTIVE, VLS, MTU, LID, SMLID, LMC, MKEY, MKEYLEASE, MKEYPROT, ON, OFF }; struct ibmad_port *srcport; uint64_t speed = 0; /* no state change */ uint64_t espeed = 0; /* no state change */ uint64_t fdr10 = 0; /* no state change */ uint64_t width = 0; /* no state change */ uint64_t lid; uint64_t smlid; uint64_t lmc; uint64_t mtu; uint64_t vls = 0; /* no state change */ uint64_t mkey; uint64_t mkeylease; uint64_t mkeyprot; struct { const char *name; uint64_t *val; int set; } port_args[] = { {"query", NULL, 0}, /* QUERY */ {"enable", NULL, 0}, /* ENABLE */ {"reset", NULL, 0}, /* RESET */ {"disable", NULL, 0}, /* DISABLE */ {"speed", &speed, 0}, /* SPEED */ {"espeed", &espeed, 0}, /* EXTENDED SPEED */ {"fdr10", &fdr10, 0}, /* FDR10 SPEED */ {"width", &width, 0}, /* WIDTH */ {"down", NULL, 0}, /* DOWN */ {"arm", NULL, 0}, /* ARM */ {"active", NULL, 0}, /* ACTIVE */ {"vls", &vls, 0}, /* VLS */ {"mtu", &mtu, 0}, /* MTU */ {"lid", &lid, 0}, /* LID */ {"smlid", &smlid, 0}, /* SMLID */ {"lmc", &lmc, 0}, /* LMC */ {"mkey", &mkey, 0}, /* MKEY */ {"mkeylease", &mkeylease, 0}, /* MKEY LEASE */ {"mkeyprot", &mkeyprot, 0}, /* MKEY PROTECT BITS */ {"on", NULL, 0}, /* ON */ {"off", NULL, 0}, /* OFF */ }; #define NPORT_ARGS (sizeof(port_args) / sizeof(port_args[0])) /*******************************************/ /* * Return 1 if node is a switch, else zero. */ static int get_node_info(ib_portid_t * dest, uint8_t * data) { int node_type; if (!smp_query_via(data, dest, IB_ATTR_NODE_INFO, 0, 0, srcport)) IBEXIT("smp query nodeinfo failed"); node_type = mad_get_field(data, 0, IB_NODE_TYPE_F); if (node_type == IB_NODE_SWITCH) /* Switch NodeType ? */ return 1; else return 0; } static int get_port_info(ib_portid_t * dest, uint8_t * data, int portnum, int is_switch) { uint8_t smp[IB_SMP_DATA_SIZE]; uint8_t *info; int cap_mask; if (is_switch) { if (!smp_query_via(smp, dest, IB_ATTR_PORT_INFO, 0, 0, srcport)) IBEXIT("smp query port 0 portinfo failed"); info = smp; } else info = data; if (!smp_query_via(data, dest, IB_ATTR_PORT_INFO, portnum, 0, srcport)) IBEXIT("smp query portinfo failed"); cap_mask = mad_get_field(info, 0, IB_PORT_CAPMASK_F); return (cap_mask & CL_NTOH32(IB_PORT_CAP_HAS_EXT_SPEEDS)); } static void show_port_info(ib_portid_t * dest, uint8_t * data, int portnum, int espeed_cap, int is_switch) { char buf[2300]; char val[64]; mad_dump_portstates(buf, sizeof buf, data, sizeof *data); mad_decode_field(data, IB_PORT_LID_F, val); mad_dump_field(IB_PORT_LID_F, buf + strlen(buf), sizeof buf - strlen(buf), val); sprintf(buf + strlen(buf), "%s", "\n"); mad_decode_field(data, IB_PORT_SMLID_F, val); mad_dump_field(IB_PORT_SMLID_F, buf + strlen(buf), sizeof buf - strlen(buf), val); sprintf(buf + strlen(buf), "%s", "\n"); mad_decode_field(data, IB_PORT_LMC_F, val); mad_dump_field(IB_PORT_LMC_F, buf + strlen(buf), sizeof buf - strlen(buf), val); sprintf(buf + strlen(buf), "%s", "\n"); mad_decode_field(data, IB_PORT_LINK_WIDTH_SUPPORTED_F, val); mad_dump_field(IB_PORT_LINK_WIDTH_SUPPORTED_F, buf + strlen(buf), sizeof buf - strlen(buf), val); sprintf(buf + strlen(buf), "%s", "\n"); mad_decode_field(data, IB_PORT_LINK_WIDTH_ENABLED_F, val); mad_dump_field(IB_PORT_LINK_WIDTH_ENABLED_F, buf + strlen(buf), sizeof buf - strlen(buf), val); sprintf(buf + strlen(buf), "%s", "\n"); mad_decode_field(data, IB_PORT_LINK_WIDTH_ACTIVE_F, val); mad_dump_field(IB_PORT_LINK_WIDTH_ACTIVE_F, buf + strlen(buf), sizeof buf - strlen(buf), val); sprintf(buf + strlen(buf), "%s", "\n"); mad_decode_field(data, IB_PORT_LINK_SPEED_SUPPORTED_F, val); mad_dump_field(IB_PORT_LINK_SPEED_SUPPORTED_F, buf + strlen(buf), sizeof buf - strlen(buf), val); sprintf(buf + strlen(buf), "%s", "\n"); mad_decode_field(data, IB_PORT_LINK_SPEED_ENABLED_F, val); mad_dump_field(IB_PORT_LINK_SPEED_ENABLED_F, buf + strlen(buf), sizeof buf - strlen(buf), val); sprintf(buf + strlen(buf), "%s", "\n"); mad_decode_field(data, IB_PORT_LINK_SPEED_ACTIVE_F, val); mad_dump_field(IB_PORT_LINK_SPEED_ACTIVE_F, buf + strlen(buf), sizeof buf - strlen(buf), val); sprintf(buf + strlen(buf), "%s", "\n"); if (espeed_cap) { mad_decode_field(data, IB_PORT_LINK_SPEED_EXT_SUPPORTED_F, val); mad_dump_field(IB_PORT_LINK_SPEED_EXT_SUPPORTED_F, buf + strlen(buf), sizeof buf - strlen(buf), val); sprintf(buf + strlen(buf), "%s", "\n"); mad_decode_field(data, IB_PORT_LINK_SPEED_EXT_ENABLED_F, val); mad_dump_field(IB_PORT_LINK_SPEED_EXT_ENABLED_F, buf + strlen(buf), sizeof buf - strlen(buf), val); sprintf(buf + strlen(buf), "%s", "\n"); mad_decode_field(data, IB_PORT_LINK_SPEED_EXT_ACTIVE_F, val); mad_dump_field(IB_PORT_LINK_SPEED_EXT_ACTIVE_F, buf + strlen(buf), sizeof buf - strlen(buf), val); sprintf(buf + strlen(buf), "%s", "\n"); } if (!is_switch || portnum == 0) { if (show_keys) { mad_decode_field(data, IB_PORT_MKEY_F, val); mad_dump_field(IB_PORT_MKEY_F, buf + strlen(buf), sizeof buf - strlen(buf), val); } else snprint_field(buf+strlen(buf), sizeof(buf)-strlen(buf), IB_PORT_MKEY_F, 32, NOT_DISPLAYED_STR); sprintf(buf+strlen(buf), "%s", "\n"); mad_decode_field(data, IB_PORT_MKEY_LEASE_F, val); mad_dump_field(IB_PORT_MKEY_LEASE_F, buf + strlen(buf), sizeof buf - strlen(buf), val); sprintf(buf+strlen(buf), "%s", "\n"); mad_decode_field(data, IB_PORT_MKEY_PROT_BITS_F, val); mad_dump_field(IB_PORT_MKEY_PROT_BITS_F, buf + strlen(buf), sizeof buf - strlen(buf), val); sprintf(buf+strlen(buf), "%s", "\n"); } printf("# Port info: %s port %d\n%s", portid2str(dest), portnum, buf); } static void set_port_info(ib_portid_t * dest, uint8_t * data, int portnum, int espeed_cap, int is_switch) { unsigned mod; mod = portnum; if (espeed_cap) mod |= 1<<31; if (!smp_set_via(data, dest, IB_ATTR_PORT_INFO, mod, 0, srcport)) IBEXIT("smp set portinfo failed"); printf("\nAfter PortInfo set:\n"); show_port_info(dest, data, portnum, espeed_cap, is_switch); } static void get_mlnx_ext_port_info(ib_portid_t * dest, uint8_t * data, int portnum) { if (!smp_query_via(data, dest, IB_ATTR_MLNX_EXT_PORT_INFO, portnum, 0, srcport)) IBEXIT("smp query ext portinfo failed"); } static void show_mlnx_ext_port_info(ib_portid_t * dest, uint8_t * data, int portnum) { char buf[256]; mad_dump_mlnx_ext_port_info(buf, sizeof buf, data, IB_SMP_DATA_SIZE); printf("# MLNX ext Port info: %s port %d\n%s", portid2str(dest), portnum, buf); } static void set_mlnx_ext_port_info(ib_portid_t * dest, uint8_t * data, int portnum) { if (!smp_set_via(data, dest, IB_ATTR_MLNX_EXT_PORT_INFO, portnum, 0, srcport)) IBEXIT("smp set MLNX ext portinfo failed"); printf("\nAfter MLNXExtendedPortInfo set:\n"); show_mlnx_ext_port_info(dest, data, portnum); } static int get_link_width(int lwe, int lws) { if (lwe == 255) return lws; else return lwe; } static int get_link_speed(int lse, int lss) { if (lse == 15) return lss; else return lse; } static int get_link_speed_ext(int lsee, int lses) { if (lsee == 31) return lses; else return lsee; } static void validate_width(int width, int peerwidth, int lwa) { if ((width & peerwidth & 0x8)) { if (lwa != 8) IBWARN ("Peer ports operating at active width %d rather than 8 (12x)", lwa); } else if ((width & peerwidth & 0x4)) { if (lwa != 4) IBWARN ("Peer ports operating at active width %d rather than 4 (8x)", lwa); } else if ((width & peerwidth & 0x2)) { if (lwa != 2) IBWARN ("Peer ports operating at active width %d rather than 2 (4x)", lwa); } else if ((width & peerwidth & 0x10)) { if (lwa != 16) IBWARN ("Peer ports operating at active width %d rather than 16 (2x)", lwa); } else if ((width & peerwidth & 0x1)) { if (lwa != 1) IBWARN ("Peer ports operating at active width %d rather than 1 (1x)", lwa); } } static void validate_speed(int speed, int peerspeed, int lsa) { if ((speed & peerspeed & 0x4)) { if (lsa != 4) IBWARN ("Peer ports operating at active speed %d rather than 4 (10.0 Gbps)", lsa); } else if ((speed & peerspeed & 0x2)) { if (lsa != 2) IBWARN ("Peer ports operating at active speed %d rather than 2 (5.0 Gbps)", lsa); } else if ((speed & peerspeed & 0x1)) { if (lsa != 1) IBWARN ("Peer ports operating at active speed %d rather than 1 (2.5 Gbps)", lsa); } } static void validate_extended_speed(int espeed, int peerespeed, int lsea) { if ((espeed & peerespeed & 0x2)) { if (lsea != 2) IBWARN ("Peer ports operating at active extended speed %d rather than 2 (25.78125 Gbps)", lsea); } else if ((espeed & peerespeed & 0x1)) { if (lsea != 1) IBWARN ("Peer ports operating at active extended speed %d rather than 1 (14.0625 Gbps)", lsea); } } int main(int argc, char **argv) { int mgmt_classes[3] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS }; ib_portid_t portid = { 0 }; int port_op = -1; int is_switch, is_peer_switch, espeed_cap, peer_espeed_cap; int state, physstate, lwe, lws, lwa, lse, lss, lsa, lsee, lses, lsea, fdr10s, fdr10e, fdr10a; int peerlocalportnum, peerlwe, peerlws, peerlwa, peerlse, peerlss, peerlsa, peerlsee, peerlses, peerlsea, peerfdr10s, peerfdr10e, peerfdr10a; int peerwidth, peerspeed, peerespeed; uint8_t data[IB_SMP_DATA_SIZE] = { 0 }; uint8_t data2[IB_SMP_DATA_SIZE] = { 0 }; ib_portid_t peerportid = { 0 }; int portnum = 0; ib_portid_t selfportid = { 0 }; int selfport = 0; int changed = 0; int i; uint32_t vendorid, rem_vendorid; uint16_t devid, rem_devid; uint64_t val; char *endp; char usage_args[] = " []\n" "\nSupported ops: enable, disable, on, off, reset, speed, espeed, fdr10,\n" "\twidth, query, down, arm, active, vls, mtu, lid, smlid, lmc,\n" "\tmkey, mkeylease, mkeyprot\n"; const char *usage_examples[] = { "3 1 disable\t\t\t# by lid", "-G 0x2C9000100D051 1 enable\t# by guid", "-D 0 1\t\t\t# (query) by direct route", "3 1 reset\t\t\t# by lid", "3 1 speed 1\t\t\t# by lid", "3 1 width 1\t\t\t# by lid", "-D 0 1 lid 0x1234 arm\t\t# by direct route", NULL }; ibdiag_process_opts(argc, argv, NULL, NULL, NULL, NULL, usage_args, usage_examples); argc -= optind; argv += optind; if (argc < 2) ibdiag_show_usage(); srcport = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 3); if (!srcport) IBEXIT("Failed to open '%s' port '%d'", ibd_ca, ibd_ca_port); smp_mkey_set(srcport, ibd_mkey); if (resolve_portid_str(ibd_ca, ibd_ca_port, &portid, argv[0], ibd_dest_type, ibd_sm_id, srcport) < 0) IBEXIT("can't resolve destination port %s", argv[0]); if (argc > 1) portnum = strtol(argv[1], 0, 0); for (i = 2; i < argc; i++) { int j; for (j = 0; j < NPORT_ARGS; j++) { if (strcmp(argv[i], port_args[j].name)) continue; port_args[j].set = 1; if (!port_args[j].val) { if (port_op >= 0) IBEXIT("%s only one of: ", "query, enable, disable, " "reset, down, arm, active, " "can be specified", port_args[j].name); port_op = j; break; } if (++i >= argc) IBEXIT("%s requires an additional parameter", port_args[j].name); val = strtoull(argv[i], 0, 0); switch (j) { case SPEED: if (val > 15) IBEXIT("invalid speed value %ld", val); break; case ESPEED: if (val > 31) IBEXIT("invalid extended speed value %ld", val); break; case FDR10SPEED: if (val > 1) IBEXIT("invalid fdr10 speed value %ld", val); break; case WIDTH: if ((val > 31 && val != 255)) IBEXIT("invalid width value %ld", val); break; case VLS: if (val == 0 || val > 5) IBEXIT("invalid vls value %ld", val); break; case MTU: if (val == 0 || val > 5) IBEXIT("invalid mtu value %ld", val); break; case LID: if (val == 0 || val >= 0xC000) IBEXIT("invalid lid value 0x%lx", val); break; case SMLID: if (val == 0 || val >= 0xC000) IBEXIT("invalid smlid value 0x%lx", val); break; case LMC: if (val > 7) IBEXIT("invalid lmc value %ld", val); break; case MKEY: errno = 0; val = strtoull(argv[i], &endp, 0); if (errno || *endp != '\0') { errno = 0; val = strtoull(getpass("New M_Key: "), &endp, 0); if (errno || *endp != '\0') { IBEXIT("Bad new M_Key\n"); } } /* All 64-bit values are legal */ break; case MKEYLEASE: if (val > 0xFFFF) IBEXIT("invalid mkey lease time %ld", val); break; case MKEYPROT: if (val > 3) IBEXIT("invalid mkey protection bit setting %ld", val); } *port_args[j].val = val; changed = 1; break; } if (j == NPORT_ARGS) IBEXIT("invalid operation: %s", argv[i]); } if (port_op < 0) port_op = QUERY; is_switch = get_node_info(&portid, data); vendorid = (uint32_t) mad_get_field(data, 0, IB_NODE_VENDORID_F); devid = (uint16_t) mad_get_field(data, 0, IB_NODE_DEVID_F); if ((port_args[MKEY].set || port_args[MKEYLEASE].set || port_args[MKEYPROT].set) && is_switch && portnum != 0) IBEXIT("Can't set M_Key fields on switch port != 0"); if (port_op != QUERY || changed) printf("Initial %s PortInfo:\n", is_switch ? "Switch" : "CA/RT"); else printf("%s PortInfo:\n", is_switch ? "Switch" : "CA/RT"); espeed_cap = get_port_info(&portid, data, portnum, is_switch); show_port_info(&portid, data, portnum, espeed_cap, is_switch); if (is_mlnx_ext_port_info_supported(vendorid, devid)) { get_mlnx_ext_port_info(&portid, data2, portnum); show_mlnx_ext_port_info(&portid, data2, portnum); } if (port_op != QUERY || changed) { /* * If we aren't setting the LID and the LID is the default, * the SMA command will fail due to an invalid LID. * Set it to something unlikely but valid. */ physstate = mad_get_field(data, 0, IB_PORT_PHYS_STATE_F); val = mad_get_field(data, 0, IB_PORT_LID_F); if (!port_args[LID].set && (!val || val == 0xFFFF)) mad_set_field(data, 0, IB_PORT_LID_F, 0x1234); val = mad_get_field(data, 0, IB_PORT_SMLID_F); if (!port_args[SMLID].set && (!val || val == 0xFFFF)) mad_set_field(data, 0, IB_PORT_SMLID_F, 0x1234); mad_set_field(data, 0, IB_PORT_STATE_F, 0); /* NOP */ mad_set_field(data, 0, IB_PORT_PHYS_STATE_F, 0); /* NOP */ switch (port_op) { case ON: /* Enable only if state is Disable */ if(physstate != 3) { printf("Port is already in enable state\n"); goto close_port; } + /* FALLTHROUGH */ case ENABLE: case RESET: /* Polling */ mad_set_field(data, 0, IB_PORT_PHYS_STATE_F, 2); break; case OFF: case DISABLE: printf("Disable may be irreversible\n"); mad_set_field(data, 0, IB_PORT_PHYS_STATE_F, 3); break; case DOWN: mad_set_field(data, 0, IB_PORT_STATE_F, 1); break; case ARM: mad_set_field(data, 0, IB_PORT_STATE_F, 3); break; case ACTIVE: mad_set_field(data, 0, IB_PORT_STATE_F, 4); break; } /* always set enabled speeds/width - defaults to NOP */ mad_set_field(data, 0, IB_PORT_LINK_SPEED_ENABLED_F, speed); mad_set_field(data, 0, IB_PORT_LINK_SPEED_EXT_ENABLED_F, espeed); mad_set_field(data, 0, IB_PORT_LINK_WIDTH_ENABLED_F, width); if (port_args[VLS].set) mad_set_field(data, 0, IB_PORT_OPER_VLS_F, vls); if (port_args[MTU].set) mad_set_field(data, 0, IB_PORT_NEIGHBOR_MTU_F, mtu); if (port_args[LID].set) mad_set_field(data, 0, IB_PORT_LID_F, lid); if (port_args[SMLID].set) mad_set_field(data, 0, IB_PORT_SMLID_F, smlid); if (port_args[LMC].set) mad_set_field(data, 0, IB_PORT_LMC_F, lmc); if (port_args[FDR10SPEED].set) { mad_set_field(data2, 0, IB_MLNX_EXT_PORT_STATE_CHG_ENABLE_F, FDR10); mad_set_field(data2, 0, IB_MLNX_EXT_PORT_LINK_SPEED_ENABLED_F, fdr10); set_mlnx_ext_port_info(&portid, data2, portnum); } if (port_args[MKEY].set) mad_set_field64(data, 0, IB_PORT_MKEY_F, mkey); if (port_args[MKEYLEASE].set) mad_set_field(data, 0, IB_PORT_MKEY_LEASE_F, mkeylease); if (port_args[MKEYPROT].set) mad_set_field(data, 0, IB_PORT_MKEY_PROT_BITS_F, mkeyprot); set_port_info(&portid, data, portnum, espeed_cap, is_switch); } else if (is_switch && portnum) { /* Now, make sure PortState is Active */ /* Or is PortPhysicalState LinkUp sufficient ? */ mad_decode_field(data, IB_PORT_STATE_F, &state); mad_decode_field(data, IB_PORT_PHYS_STATE_F, &physstate); if (state == 4) { /* Active */ mad_decode_field(data, IB_PORT_LINK_WIDTH_ENABLED_F, &lwe); mad_decode_field(data, IB_PORT_LINK_WIDTH_SUPPORTED_F, &lws); mad_decode_field(data, IB_PORT_LINK_WIDTH_ACTIVE_F, &lwa); mad_decode_field(data, IB_PORT_LINK_SPEED_SUPPORTED_F, &lss); mad_decode_field(data, IB_PORT_LINK_SPEED_ACTIVE_F, &lsa); mad_decode_field(data, IB_PORT_LINK_SPEED_ENABLED_F, &lse); mad_decode_field(data2, IB_MLNX_EXT_PORT_LINK_SPEED_SUPPORTED_F, &fdr10s); mad_decode_field(data2, IB_MLNX_EXT_PORT_LINK_SPEED_ENABLED_F, &fdr10e); mad_decode_field(data2, IB_MLNX_EXT_PORT_LINK_SPEED_ACTIVE_F, &fdr10a); if (espeed_cap) { mad_decode_field(data, IB_PORT_LINK_SPEED_EXT_SUPPORTED_F, &lses); mad_decode_field(data, IB_PORT_LINK_SPEED_EXT_ACTIVE_F, &lsea); mad_decode_field(data, IB_PORT_LINK_SPEED_EXT_ENABLED_F, &lsee); } /* Setup portid for peer port */ memcpy(&peerportid, &portid, sizeof(peerportid)); if (portid.lid == 0) { peerportid.drpath.cnt++; if (peerportid.drpath.cnt == IB_SUBNET_PATH_HOPS_MAX) { IBEXIT("Too many hops"); } } else { peerportid.drpath.cnt = 1; /* Set DrSLID to local lid */ if (resolve_self(ibd_ca, ibd_ca_port, &selfportid, &selfport, 0) < 0) IBEXIT("could not resolve self"); peerportid.drpath.drslid = (uint16_t) selfportid.lid; peerportid.drpath.drdlid = 0xffff; } peerportid.drpath.p[peerportid.drpath.cnt] = (uint8_t) portnum; /* Get peer port NodeInfo to obtain peer port number */ is_peer_switch = get_node_info(&peerportid, data); rem_vendorid = (uint32_t) mad_get_field(data, 0, IB_NODE_VENDORID_F); rem_devid = (uint16_t) mad_get_field(data, 0, IB_NODE_DEVID_F); mad_decode_field(data, IB_NODE_LOCAL_PORT_F, &peerlocalportnum); printf("Peer PortInfo:\n"); /* Get peer port characteristics */ peer_espeed_cap = get_port_info(&peerportid, data, peerlocalportnum, is_peer_switch); if (is_mlnx_ext_port_info_supported(rem_vendorid, rem_devid)) get_mlnx_ext_port_info(&peerportid, data2, peerlocalportnum); show_port_info(&peerportid, data, peerlocalportnum, peer_espeed_cap, is_peer_switch); if (is_mlnx_ext_port_info_supported(rem_vendorid, rem_devid)) show_mlnx_ext_port_info(&peerportid, data2, peerlocalportnum); mad_decode_field(data, IB_PORT_LINK_WIDTH_ENABLED_F, &peerlwe); mad_decode_field(data, IB_PORT_LINK_WIDTH_SUPPORTED_F, &peerlws); mad_decode_field(data, IB_PORT_LINK_WIDTH_ACTIVE_F, &peerlwa); mad_decode_field(data, IB_PORT_LINK_SPEED_SUPPORTED_F, &peerlss); mad_decode_field(data, IB_PORT_LINK_SPEED_ACTIVE_F, &peerlsa); mad_decode_field(data, IB_PORT_LINK_SPEED_ENABLED_F, &peerlse); mad_decode_field(data2, IB_MLNX_EXT_PORT_LINK_SPEED_SUPPORTED_F, &peerfdr10s); mad_decode_field(data2, IB_MLNX_EXT_PORT_LINK_SPEED_ENABLED_F, &peerfdr10e); mad_decode_field(data2, IB_MLNX_EXT_PORT_LINK_SPEED_ACTIVE_F, &peerfdr10a); if (peer_espeed_cap) { mad_decode_field(data, IB_PORT_LINK_SPEED_EXT_SUPPORTED_F, &peerlses); mad_decode_field(data, IB_PORT_LINK_SPEED_EXT_ACTIVE_F, &peerlsea); mad_decode_field(data, IB_PORT_LINK_SPEED_EXT_ENABLED_F, &peerlsee); } /* Now validate peer port characteristics */ /* Examine Link Width */ width = get_link_width(lwe, lws); peerwidth = get_link_width(peerlwe, peerlws); validate_width(width, peerwidth, lwa); /* Examine Link Speeds */ speed = get_link_speed(lse, lss); peerspeed = get_link_speed(peerlse, peerlss); validate_speed(speed, peerspeed, lsa); if (espeed_cap && peer_espeed_cap) { espeed = get_link_speed_ext(lsee, lses); peerespeed = get_link_speed_ext(peerlsee, peerlses); validate_extended_speed(espeed, peerespeed, lsea); } else { if (fdr10e & FDR10 && peerfdr10e & FDR10) { if (!(fdr10a & FDR10)) IBWARN("Peer ports operating at active speed %d rather than FDR10", lsa); } } } } close_port: mad_rpc_close_port(srcport); exit(0); } Index: head/contrib/ofed/infiniband-diags/src/ibqueryerrors.c =================================================================== --- head/contrib/ofed/infiniband-diags/src/ibqueryerrors.c (revision 363219) +++ head/contrib/ofed/infiniband-diags/src/ibqueryerrors.c (revision 363220) @@ -1,1126 +1,1133 @@ /* * Copyright (c) 2004-2009 Voltaire Inc. All rights reserved. * Copyright (c) 2007 Xsigo Systems Inc. All rights reserved. * Copyright (c) 2008 Lawrence Livermore National Lab. All rights reserved. * Copyright (c) 2009 HNR Consulting. All rights reserved. * Copyright (c) 2010,2011 Mellanox Technologies LTD. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #if HAVE_CONFIG_H # include #endif /* HAVE_CONFIG_H */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include "ibdiag_common.h" #include "ibdiag_sa.h" struct ibmad_port *ibmad_port; static char *node_name_map_file = NULL; static nn_map_t *node_name_map = NULL; static char *load_cache_file = NULL; static uint16_t lid2sl_table[sizeof(uint8_t) * 1024 * 48] = { 0 }; static int obtain_sl = 1; int data_counters = 0; int data_counters_only = 0; int port_config = 0; uint64_t port_guid = 0; char *port_guid_str = NULL; #define SUP_MAX 64 int sup_total = 0; enum MAD_FIELDS suppressed_fields[SUP_MAX]; char *dr_path = NULL; uint8_t node_type_to_print = 0; unsigned clear_errors = 0, clear_counts = 0, details = 0; #define PRINT_SWITCH 0x1 #define PRINT_CA 0x2 #define PRINT_ROUTER 0x4 #define PRINT_ALL 0xFF /* all nodes default flag */ #define DEFAULT_HALF_WORLD_PR_TIMEOUT (3000) struct { int nodes_checked; int bad_nodes; int ports_checked; int bad_ports; int pma_query_failures; } summary = { 0 }; #define DEF_THRES_FILE IBDIAG_CONFIG_PATH"/error_thresholds" static char *threshold_file = DEF_THRES_FILE; /* define a "packet" with threshold values in it */ uint8_t thresholds[1204] = { 0 }; char * threshold_str = ""; static unsigned valid_gid(ib_gid_t * gid) { ib_gid_t zero_gid; memset(&zero_gid, 0, sizeof zero_gid); return memcmp(&zero_gid, gid, sizeof(*gid)); } static void set_thres(char *name, uint32_t val) { int f; int n; char tmp[256]; for (f = IB_PC_FIRST_F; f <= IB_PC_LAST_F; f++) { if (strcmp(name, mad_field_name(f)) == 0) { mad_encode_field(thresholds, f, &val); snprintf(tmp, 255, "[%s = %u]", name, val); threshold_str = realloc(threshold_str, strlen(threshold_str)+strlen(tmp)+1); if (!threshold_str) { fprintf(stderr, "Failed to allocate memory: " "%s\n", strerror(errno)); exit(1); } n = strlen(threshold_str); strcpy(threshold_str+n, tmp); } } } static void set_thresholds(char *threshold_file) { char buf[1024]; + char orig_buf[1024]; int val = 0; FILE *thresf = fopen(threshold_file, "r"); char *p_prefix, *p_last; char *name; char *val_str; char str[64]; if (!thresf) return; snprintf(str, 63, "Thresholds: "); threshold_str = malloc(strlen(str)+1); if (!threshold_str) { fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno)); exit(1); } strcpy(threshold_str, str); while (fgets(buf, sizeof buf, thresf) != NULL) { p_prefix = strtok_r(buf, "\n", &p_last); if (!p_prefix) continue; /* ignore blank lines */ if (*p_prefix == '#') continue; /* ignore comment lines */ + strlcpy(orig_buf, buf, sizeof(orig_buf)); name = strtok_r(p_prefix, "=", &p_last); val_str = strtok_r(NULL, "\n", &p_last); + if (!name || !val_str) { + fprintf(stderr, "malformed line in \"%s\":\n%s\n", + threshold_file, orig_buf); + continue; + } val = strtoul(val_str, NULL, 0); set_thres(name, val); } fclose(thresf); } static int exceeds_threshold(int field, unsigned val) { uint32_t thres = 0; mad_decode_field(thresholds, field, &thres); return (val > thres); } static void print_port_config(ibnd_node_t * node, int portnum) { char width[64], speed[64], state[64], physstate[64]; char remote_str[256]; char link_str[256]; char width_msg[256]; char speed_msg[256]; char ext_port_str[256]; int iwidth, ispeed, fdr10, espeed, istate, iphystate, cap_mask; uint8_t *info; ibnd_port_t *port = node->ports[portnum]; if (!port) return; iwidth = mad_get_field(port->info, 0, IB_PORT_LINK_WIDTH_ACTIVE_F); ispeed = mad_get_field(port->info, 0, IB_PORT_LINK_SPEED_ACTIVE_F); fdr10 = mad_get_field(port->ext_info, 0, IB_MLNX_EXT_PORT_LINK_SPEED_ACTIVE_F) & FDR10; if (port->node->type == IB_NODE_SWITCH) info = (uint8_t *)&port->node->ports[0]->info; else info = (uint8_t *)&port->info; cap_mask = mad_get_field(info, 0, IB_PORT_CAPMASK_F); if (cap_mask & CL_NTOH32(IB_PORT_CAP_HAS_EXT_SPEEDS)) espeed = mad_get_field(port->info, 0, IB_PORT_LINK_SPEED_EXT_ACTIVE_F); else espeed = 0; istate = mad_get_field(port->info, 0, IB_PORT_STATE_F); iphystate = mad_get_field(port->info, 0, IB_PORT_PHYS_STATE_F); remote_str[0] = '\0'; link_str[0] = '\0'; width_msg[0] = '\0'; speed_msg[0] = '\0'; /* C14-24.2.1 states that a down port allows for invalid data to be * returned for all PortInfo components except PortState and * PortPhysicalState */ if (istate != IB_LINK_DOWN) { if (!espeed) { if (fdr10) sprintf(speed, "10.0 Gbps (FDR10)"); else mad_dump_val(IB_PORT_LINK_SPEED_ACTIVE_F, speed, 64, &ispeed); } else mad_dump_val(IB_PORT_LINK_SPEED_EXT_ACTIVE_F, speed, 64, &espeed); snprintf(link_str, 256, "(%3s %18s %6s/%8s)", mad_dump_val(IB_PORT_LINK_WIDTH_ACTIVE_F, width, 64, &iwidth), speed, mad_dump_val(IB_PORT_STATE_F, state, 64, &istate), mad_dump_val(IB_PORT_PHYS_STATE_F, physstate, 64, &iphystate)); } else { snprintf(link_str, 256, "( %6s/%8s)", mad_dump_val(IB_PORT_STATE_F, state, 64, &istate), mad_dump_val(IB_PORT_PHYS_STATE_F, physstate, 64, &iphystate)); } if (port->remoteport) { char *rem_node_name = NULL; if (port->remoteport->ext_portnum) snprintf(ext_port_str, 256, "%d", port->remoteport->ext_portnum); else ext_port_str[0] = '\0'; get_max_msg(width_msg, speed_msg, 256, port); rem_node_name = remap_node_name(node_name_map, port->remoteport->node->guid, port->remoteport->node-> nodedesc); snprintf(remote_str, 256, "0x%016" PRIx64 " %6d %4d[%2s] \"%s\" (%s %s)\n", port->remoteport->guid, port->remoteport->base_lid ? port->remoteport-> base_lid : port->remoteport->node->smalid, port->remoteport->portnum, ext_port_str, rem_node_name, width_msg, speed_msg); free(rem_node_name); } else snprintf(remote_str, 256, " [ ] \"\" ( )\n"); if (port->ext_portnum) snprintf(ext_port_str, 256, "%d", port->ext_portnum); else ext_port_str[0] = '\0'; if (node->type == IB_NODE_SWITCH) printf(" Link info: %6d", node->smalid); else printf(" Link info: %6d", port->base_lid); printf("%4d[%2s] ==%s==> %s", port->portnum, ext_port_str, link_str, remote_str); } static int suppress(enum MAD_FIELDS field) { int i = 0; for (i = 0; i < sup_total; i++) if (field == suppressed_fields[i]) return 1; return 0; } static void report_suppressed(void) { int i = 0; printf("## Suppressed:"); for (i = 0; i < sup_total; i++) printf(" %s", mad_field_name(suppressed_fields[i])); printf("\n"); } static int print_summary(void) { printf("\n## Summary: %d nodes checked, %d bad nodes found\n", summary.nodes_checked, summary.bad_nodes); printf("## %d ports checked, %d ports have errors beyond threshold\n", summary.ports_checked, summary.bad_ports); printf("## %s\n", threshold_str); if (summary.pma_query_failures) printf("## %d PMA query failures\n", summary.pma_query_failures); report_suppressed(); return (summary.bad_ports); } static void insert_lid2sl_table(struct sa_query_result *r) { unsigned int i; for (i = 0; i < r->result_cnt; i++) { ib_path_rec_t *p_pr = (ib_path_rec_t *)sa_get_query_rec(r->p_result_madw, i); lid2sl_table[cl_ntoh16(p_pr->dlid)] = ib_path_rec_sl(p_pr); } } static int path_record_query(ib_gid_t sgid,uint64_t dguid) { ib_path_rec_t pr; ib_net64_t comp_mask = 0; uint8_t reversible = 0; struct sa_handle * h; if (!(h = sa_get_handle())) return -1; ibd_timeout = DEFAULT_HALF_WORLD_PR_TIMEOUT; memset(&pr, 0, sizeof(pr)); CHECK_AND_SET_GID(sgid, pr.sgid, PR, SGID); if(dguid) { mad_encode_field(sgid.raw, IB_GID_GUID_F, &dguid); CHECK_AND_SET_GID(sgid, pr.dgid, PR, DGID); } CHECK_AND_SET_VAL(1, 8, -1, pr.num_path, PR, NUMBPATH);/*to get only one PathRecord for each source and destination pair*/ CHECK_AND_SET_VAL(1, 8, -1, reversible, PR, REVERSIBLE);/*for a reversible path*/ pr.num_path |= reversible << 7; struct sa_query_result result; int ret = sa_query(h, IB_MAD_METHOD_GET_TABLE, (uint16_t)IB_SA_ATTR_PATHRECORD,0,cl_ntoh64(comp_mask),ibd_sakey, &pr, sizeof(pr), &result); if (ret) { sa_free_handle(h); fprintf(stderr, "Query SA failed: %s; sa call path_query failed\n", strerror(ret)); return ret; } if (result.status != IB_SA_MAD_STATUS_SUCCESS) { sa_report_err(result.status); ret = EIO; goto Exit; } insert_lid2sl_table(&result); Exit: sa_free_handle(h); sa_free_result_mad(&result); return ret; } static int query_and_dump(char *buf, size_t size, ib_portid_t * portid, char *node_name, int portnum, const char *attr_name, uint16_t attr_id, int start_field, int end_field) { uint8_t pc[1024]; uint32_t val = 0; int i, n; memset(pc, 0, sizeof(pc)); if (!pma_query_via(pc, portid, portnum, ibd_timeout, attr_id, ibmad_port)) { IBWARN("%s query failed on %s, %s port %d", attr_name, node_name, portid2str(portid), portnum); summary.pma_query_failures++; return 0; } for (n = 0, i = start_field; i < end_field; i++) { mad_decode_field(pc, i, (void *)&val); if (val) n += snprintf(buf + n, size - n, " [%s == %u]", mad_field_name(i), val); } return n; } static int print_results(ib_portid_t * portid, char *node_name, ibnd_node_t * node, uint8_t * pc, int portnum, int *header_printed, uint8_t *pce, uint16_t cap_mask) { char buf[1024]; char *str = buf; uint32_t val = 0; int i, n; for (n = 0, i = IB_PC_ERR_SYM_F; i <= IB_PC_VL15_DROPPED_F; i++) { if (suppress(i)) continue; /* this is not a counter, skip it */ if (i == IB_PC_COUNTER_SELECT2_F) continue; mad_decode_field(pc, i, (void *)&val); if (exceeds_threshold(i, val)) { n += snprintf(str + n, 1024 - n, " [%s == %u]", mad_field_name(i), val); /* If there are PortXmitDiscards, get details (if supported) */ if (i == IB_PC_XMT_DISCARDS_F && details) { n += query_and_dump(str + n, sizeof(buf) - n, portid, node_name, portnum, "PortXmitDiscardDetails", IB_GSI_PORT_XMIT_DISCARD_DETAILS, IB_PC_RCV_LOCAL_PHY_ERR_F, IB_PC_RCV_ERR_LAST_F); /* If there are PortRcvErrors, get details (if supported) */ } else if (i == IB_PC_ERR_RCV_F && details) { n += query_and_dump(str + n, sizeof(buf) - n, portid, node_name, portnum, "PortRcvErrorDetails", IB_GSI_PORT_RCV_ERROR_DETAILS, IB_PC_XMT_INACT_DISC_F, IB_PC_XMT_DISC_LAST_F); } } } if (!suppress(IB_PC_XMT_WAIT_F)) { mad_decode_field(pc, IB_PC_XMT_WAIT_F, (void *)&val); if (exceeds_threshold(IB_PC_XMT_WAIT_F, val)) n += snprintf(str + n, 1024 - n, " [%s == %u]", mad_field_name(IB_PC_XMT_WAIT_F), val); } /* if we found errors. */ if (n != 0) { if (data_counters) { uint8_t *pkt = pc; int start_field = IB_PC_XMT_BYTES_F; int end_field = IB_PC_RCV_PKTS_F; if (pce) { pkt = pce; start_field = IB_PC_EXT_XMT_BYTES_F; if (cap_mask & IB_PM_EXT_WIDTH_SUPPORTED) end_field = IB_PC_EXT_RCV_MPKTS_F; else end_field = IB_PC_EXT_RCV_PKTS_F; } for (i = start_field; i <= end_field; i++) { uint64_t val64 = 0; float val = 0; char *unit = ""; mad_decode_field(pkt, i, (void *)&val64); if (val64) { int data = 0; if (i == IB_PC_EXT_XMT_BYTES_F || i == IB_PC_EXT_RCV_BYTES_F || i == IB_PC_XMT_BYTES_F || i == IB_PC_RCV_BYTES_F) data = 1; unit = conv_cnt_human_readable(val64, &val, data); n += snprintf(str + n, 1024 - n, " [%s == %" PRIu64 " (%5.3f%s)]", mad_field_name(i), val64, val, unit); } } } if (!*header_printed) { if (node->type == IB_NODE_SWITCH) printf("Errors for 0x%" PRIx64 " \"%s\"\n", node->ports[0]->guid, node_name); else printf("Errors for \"%s\"\n", node_name); *header_printed = 1; summary.bad_nodes++; } if (portnum == 0xFF) { if (node->type == IB_NODE_SWITCH) printf(" GUID 0x%" PRIx64 " port ALL:%s\n", node->ports[0]->guid, str); } else { printf(" GUID 0x%" PRIx64 " port %d:%s\n", node->ports[portnum]->guid, portnum, str); if (port_config) print_port_config(node, portnum); summary.bad_ports++; } } return (n); } static int query_cap_mask(ib_portid_t * portid, char *node_name, int portnum, uint16_t * cap_mask) { uint8_t pc[1024] = { 0 }; uint16_t rc_cap_mask; portid->sl = lid2sl_table[portid->lid]; /* PerfMgt ClassPortInfo is a required attribute */ if (!pma_query_via(pc, portid, portnum, ibd_timeout, CLASS_PORT_INFO, ibmad_port)) { IBWARN("classportinfo query failed on %s, %s port %d", node_name, portid2str(portid), portnum); summary.pma_query_failures++; return -1; } /* ClassPortInfo should be supported as part of libibmad */ memcpy(&rc_cap_mask, pc + 2, sizeof(rc_cap_mask)); /* CapabilityMask */ *cap_mask = rc_cap_mask; return 0; } static int print_data_cnts(ib_portid_t * portid, uint16_t cap_mask, char *node_name, ibnd_node_t * node, int portnum, int *header_printed) { uint8_t pc[1024]; int i; int start_field = IB_PC_XMT_BYTES_F; int end_field = IB_PC_RCV_PKTS_F; memset(pc, 0, 1024); portid->sl = lid2sl_table[portid->lid]; if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) { if (!pma_query_via(pc, portid, portnum, ibd_timeout, IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) { IBWARN("IB_GSI_PORT_COUNTERS_EXT query failed on %s, %s port %d", node_name, portid2str(portid), portnum); summary.pma_query_failures++; return (1); } start_field = IB_PC_EXT_XMT_BYTES_F; if (cap_mask & IB_PM_EXT_WIDTH_SUPPORTED) end_field = IB_PC_EXT_RCV_MPKTS_F; else end_field = IB_PC_EXT_RCV_PKTS_F; } else { if (!pma_query_via(pc, portid, portnum, ibd_timeout, IB_GSI_PORT_COUNTERS, ibmad_port)) { IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d", node_name, portid2str(portid), portnum); summary.pma_query_failures++; return (1); } start_field = IB_PC_XMT_BYTES_F; end_field = IB_PC_RCV_PKTS_F; } if (!*header_printed) { printf("Data Counters for 0x%" PRIx64 " \"%s\"\n", node->guid, node_name); *header_printed = 1; } if (portnum == 0xFF) printf(" GUID 0x%" PRIx64 " port ALL:", node->guid); else printf(" GUID 0x%" PRIx64 " port %d:", node->guid, portnum); for (i = start_field; i <= end_field; i++) { uint64_t val64 = 0; float val = 0; char *unit = ""; int data = 0; mad_decode_field(pc, i, (void *)&val64); if (i == IB_PC_EXT_XMT_BYTES_F || i == IB_PC_EXT_RCV_BYTES_F || i == IB_PC_XMT_BYTES_F || i == IB_PC_RCV_BYTES_F) data = 1; unit = conv_cnt_human_readable(val64, &val, data); printf(" [%s == %" PRIu64 " (%5.3f%s)]", mad_field_name(i), val64, val, unit); } printf("\n"); if (portnum != 0xFF && port_config) print_port_config(node, portnum); return (0); } static int print_errors(ib_portid_t * portid, uint16_t cap_mask, char *node_name, ibnd_node_t * node, int portnum, int *header_printed) { uint8_t pc[1024]; uint8_t pce[1024]; uint8_t *pc_ext = NULL; memset(pc, 0, 1024); memset(pce, 0, 1024); portid->sl = lid2sl_table[portid->lid]; if (!pma_query_via(pc, portid, portnum, ibd_timeout, IB_GSI_PORT_COUNTERS, ibmad_port)) { IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d", node_name, portid2str(portid), portnum); summary.pma_query_failures++; return (0); } if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) { if (!pma_query_via(pce, portid, portnum, ibd_timeout, IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) { IBWARN("IB_GSI_PORT_COUNTERS_EXT query failed on %s, %s port %d", node_name, portid2str(portid), portnum); summary.pma_query_failures++; return (0); } pc_ext = pce; } if (!(cap_mask & IB_PM_PC_XMIT_WAIT_SUP)) { /* if PortCounters:PortXmitWait not supported clear this counter */ uint32_t foo = 0; mad_encode_field(pc, IB_PC_XMT_WAIT_F, &foo); } return (print_results(portid, node_name, node, pc, portnum, header_printed, pc_ext, cap_mask)); } uint8_t *reset_pc_ext(void *rcvbuf, ib_portid_t * dest, int port, unsigned mask, unsigned timeout, const struct ibmad_port * srcport) { ib_rpc_t rpc = { 0 }; int lid = dest->lid; DEBUG("lid %u port %d mask 0x%x", lid, port, mask); if (lid == -1) { IBWARN("only lid routed is supported"); return NULL; } if (!mask) mask = ~0; rpc.mgtclass = IB_PERFORMANCE_CLASS; rpc.method = IB_MAD_METHOD_SET; rpc.attr.id = IB_GSI_PORT_COUNTERS_EXT; memset(rcvbuf, 0, IB_MAD_SIZE); /* Same for attribute IDs */ mad_set_field(rcvbuf, 0, IB_PC_EXT_PORT_SELECT_F, port); mad_set_field(rcvbuf, 0, IB_PC_EXT_COUNTER_SELECT_F, mask); rpc.attr.mod = 0; rpc.timeout = timeout; rpc.datasz = IB_PC_DATA_SZ; rpc.dataoffs = IB_PC_DATA_OFFS; if (!dest->qp) dest->qp = 1; if (!dest->qkey) dest->qkey = IB_DEFAULT_QP1_QKEY; return mad_rpc(srcport, &rpc, dest, rcvbuf, rcvbuf); } static void clear_port(ib_portid_t * portid, uint16_t cap_mask, char *node_name, int port) { uint8_t pc[1024] = { 0 }; /* bits defined in Table 228 PortCounters CounterSelect and * CounterSelect2 */ uint32_t mask = 0; if (clear_errors) { mask |= 0xFFF; if (cap_mask & IB_PM_PC_XMIT_WAIT_SUP) mask |= 0x10000; } if (clear_counts) mask |= 0xF000; if (mask) if (!performance_reset_via(pc, portid, port, mask, ibd_timeout, IB_GSI_PORT_COUNTERS, ibmad_port)) fprintf(stderr, "Failed to reset errors %s port %d\n", node_name, port); if (clear_errors && details) { memset(pc, 0, 1024); performance_reset_via(pc, portid, port, 0xf, ibd_timeout, IB_GSI_PORT_XMIT_DISCARD_DETAILS, ibmad_port); memset(pc, 0, 1024); performance_reset_via(pc, portid, port, 0x3f, ibd_timeout, IB_GSI_PORT_RCV_ERROR_DETAILS, ibmad_port); } if (clear_counts && (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP))) { if (cap_mask & IB_PM_EXT_WIDTH_SUPPORTED) mask = 0xFF; else mask = 0x0F; if (!reset_pc_ext(pc, portid, port, mask, ibd_timeout, ibmad_port)) fprintf(stderr, "Failed to reset extended data counters %s, " "%s port %d\n", node_name, portid2str(portid), port); } } void print_node(ibnd_node_t * node, void *user_data) { int header_printed = 0; int p = 0; int startport = 1; int type = 0; int all_port_sup = 0; ib_portid_t portid = { 0 }; uint16_t cap_mask = 0; char *node_name = NULL; switch (node->type) { case IB_NODE_SWITCH: type = PRINT_SWITCH; break; case IB_NODE_CA: type = PRINT_CA; break; case IB_NODE_ROUTER: type = PRINT_ROUTER; break; } if ((type & node_type_to_print) == 0) return; if (node->type == IB_NODE_SWITCH && node->smaenhsp0) startport = 0; node_name = remap_node_name(node_name_map, node->guid, node->nodedesc); if (node->type == IB_NODE_SWITCH) { ib_portid_set(&portid, node->smalid, 0, 0); p = 0; } else { for (p = 1; p <= node->numports; p++) { if (node->ports[p]) { ib_portid_set(&portid, node->ports[p]->base_lid, 0, 0); break; } } } if ((query_cap_mask(&portid, node_name, p, &cap_mask) == 0) && (cap_mask & IB_PM_ALL_PORT_SELECT)) all_port_sup = 1; if (data_counters_only) { for (p = startport; p <= node->numports; p++) { if (node->ports[p]) { if (node->type == IB_NODE_SWITCH) ib_portid_set(&portid, node->smalid, 0, 0); else ib_portid_set(&portid, node->ports[p]->base_lid, 0, 0); print_data_cnts(&portid, cap_mask, node_name, node, p, &header_printed); summary.ports_checked++; if (!all_port_sup) clear_port(&portid, cap_mask, node_name, p); } } } else { if (all_port_sup) if (!print_errors(&portid, cap_mask, node_name, node, 0xFF, &header_printed)) { summary.ports_checked += node->numports; goto clear; } for (p = startport; p <= node->numports; p++) { if (node->ports[p]) { if (node->type == IB_NODE_SWITCH) ib_portid_set(&portid, node->smalid, 0, 0); else ib_portid_set(&portid, node->ports[p]->base_lid, 0, 0); print_errors(&portid, cap_mask, node_name, node, p, &header_printed); summary.ports_checked++; if (!all_port_sup) clear_port(&portid, cap_mask, node_name, p); } } } clear: summary.nodes_checked++; if (all_port_sup) clear_port(&portid, cap_mask, node_name, 0xFF); free(node_name); } static void add_suppressed(enum MAD_FIELDS field) { if (sup_total >= SUP_MAX) { IBWARN("Maximum (%d) fields have been suppressed; skipping %s", sup_total, mad_field_name(field)); return; } suppressed_fields[sup_total++] = field; } static void calculate_suppressed_fields(char *str) { enum MAD_FIELDS f; char *val, *lasts = NULL; char *tmp = strdup(str); val = strtok_r(tmp, ",", &lasts); while (val) { for (f = IB_PC_FIRST_F; f <= IB_PC_LAST_F; f++) if (strcmp(val, mad_field_name(f)) == 0) add_suppressed(f); val = strtok_r(NULL, ",", &lasts); } free(tmp); } static int process_opt(void *context, int ch, char *optarg) { struct ibnd_config *cfg = context; switch (ch) { case 's': calculate_suppressed_fields(optarg); break; case 'c': /* Right now this is the only "common" error */ add_suppressed(IB_PC_ERR_SWITCH_REL_F); break; case 1: node_name_map_file = strdup(optarg); break; case 2: data_counters++; break; case 3: node_type_to_print |= PRINT_SWITCH; break; case 4: node_type_to_print |= PRINT_CA; break; case 5: node_type_to_print |= PRINT_ROUTER; break; case 6: details = 1; break; case 7: load_cache_file = strdup(optarg); break; case 8: threshold_file = strdup(optarg); break; case 9: data_counters_only = 1; break; case 10: obtain_sl = 0; break; case 'G': case 'S': port_guid_str = optarg; port_guid = strtoull(optarg, 0, 0); break; case 'D': dr_path = strdup(optarg); break; case 'r': port_config++; break; case 'R': /* nop */ break; case 'k': clear_errors = 1; break; case 'K': clear_counts = 1; break; case 'o': cfg->max_smps = strtoul(optarg, NULL, 0); break; default: return -1; } return 0; } int main(int argc, char **argv) { struct ibnd_config config = { 0 }; int resolved = -1; ib_portid_t portid = { 0 }; ib_portid_t self_portid = { 0 }; int rc = 0; ibnd_fabric_t *fabric = NULL; ib_gid_t self_gid; int port = 0; int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS, IB_PERFORMANCE_CLASS }; const struct ibdiag_opt opts[] = { {"suppress", 's', 1, "", "suppress errors listed"}, {"suppress-common", 'c', 0, NULL, "suppress some of the common counters"}, {"node-name-map", 1, 1, "", "node name map file"}, {"port-guid", 'G', 1, "", "report the node containing the port specified by "}, {"", 'S', 1, "", "Same as \"-G\" for backward compatibility"}, {"Direct", 'D', 1, "", "report the node containing the port specified by "}, {"skip-sl", 10, 0, NULL,"don't obtain SL to all destinations"}, {"report-port", 'r', 0, NULL, "report port link information"}, {"threshold-file", 8, 1, NULL, "specify an alternate threshold file, default: " DEF_THRES_FILE}, {"GNDN", 'R', 0, NULL, "(This option is obsolete and does nothing)"}, {"data", 2, 0, NULL, "include data counters for ports with errors"}, {"switch", 3, 0, NULL, "print data for switches only"}, {"ca", 4, 0, NULL, "print data for CA's only"}, {"router", 5, 0, NULL, "print data for routers only"}, {"details", 6, 0, NULL, "include transmit discard details"}, {"counters", 9, 0, NULL, "print data counters only"}, {"clear-errors", 'k', 0, NULL, "Clear error counters after read"}, {"clear-counts", 'K', 0, NULL, "Clear data counters after read"}, {"load-cache", 7, 1, "", "filename of ibnetdiscover cache to load"}, {"outstanding_smps", 'o', 1, NULL, "specify the number of outstanding SMP's which should be " "issued during the scan"}, {0} }; char usage_args[] = ""; memset(suppressed_fields, 0, sizeof suppressed_fields); ibdiag_process_opts(argc, argv, &config, "cDGKLnRrSs", opts, process_opt, usage_args, NULL); argc -= optind; argv += optind; if (!node_type_to_print) node_type_to_print = PRINT_ALL; ibmad_port = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 4); if (!ibmad_port) IBEXIT("Failed to open port; %s:%d\n", ibd_ca, ibd_ca_port); smp_mkey_set(ibmad_port, ibd_mkey); if (ibd_timeout) { mad_rpc_set_timeout(ibmad_port, ibd_timeout); config.timeout_ms = ibd_timeout; } config.flags = ibd_ibnetdisc_flags; config.mkey = ibd_mkey; if (dr_path && load_cache_file) { mad_rpc_close_port(ibmad_port); fprintf(stderr, "Cannot specify cache and direct route path\n"); exit(-1); } if (resolve_self(ibd_ca, ibd_ca_port, &self_portid, &port, &self_gid.raw) < 0) { mad_rpc_close_port(ibmad_port); IBEXIT("can't resolve self port %s", argv[0]); } node_name_map = open_node_name_map(node_name_map_file); /* limit the scan the fabric around the target */ if (dr_path) { if ((resolved = resolve_portid_str(ibd_ca, ibd_ca_port, &portid, dr_path, IB_DEST_DRPATH, NULL, ibmad_port)) < 0) IBWARN("Failed to resolve %s; attempting full scan", dr_path); } else if (port_guid_str) { if ((resolved = resolve_portid_str(ibd_ca, ibd_ca_port, &portid, port_guid_str, IB_DEST_GUID, ibd_sm_id, ibmad_port)) < 0) IBWARN("Failed to resolve %s; attempting full scan", port_guid_str); if(obtain_sl) lid2sl_table[portid.lid] = portid.sl; } mad_rpc_close_port(ibmad_port); if (load_cache_file) { if ((fabric = ibnd_load_fabric(load_cache_file, 0)) == NULL) { fprintf(stderr, "loading cached fabric failed\n"); rc = -1; goto close_port; } } else { if (resolved >= 0) { if (!config.max_hops) config.max_hops = 1; if (!(fabric = ibnd_discover_fabric(ibd_ca, ibd_ca_port, &portid, &config))) IBWARN("Single node discover failed;" " attempting full scan"); } if (!fabric && !(fabric = ibnd_discover_fabric(ibd_ca, ibd_ca_port, NULL, &config))) { fprintf(stderr, "discover failed\n"); rc = -1; goto close_port; } } set_thresholds(threshold_file); /* reopen the global ibmad_port */ ibmad_port = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 4); if (!ibmad_port) { ibnd_destroy_fabric(fabric); close_node_name_map(node_name_map); IBEXIT("Failed to reopen port: %s:%d\n", ibd_ca, ibd_ca_port); } smp_mkey_set(ibmad_port, ibd_mkey); if (ibd_timeout) mad_rpc_set_timeout(ibmad_port, ibd_timeout); if (port_guid_str) { ibnd_port_t *port = ibnd_find_port_guid(fabric, port_guid); if (port) print_node(port->node, NULL); else fprintf(stderr, "Failed to find node: %s\n", port_guid_str); } else if (dr_path) { ibnd_port_t *port; uint8_t ni[IB_SMP_DATA_SIZE] = { 0 }; if (!smp_query_via(ni, &portid, IB_ATTR_NODE_INFO, 0, ibd_timeout, ibmad_port)) { fprintf(stderr, "Failed to query local Node Info\n"); goto destroy_fabric; } mad_decode_field(ni, IB_NODE_PORT_GUID_F, &(port_guid)); port = ibnd_find_port_guid(fabric, port_guid); if (port) { if(obtain_sl) if(path_record_query(self_gid,port->guid)) goto destroy_fabric; print_node(port->node, NULL); } else fprintf(stderr, "Failed to find node: %s\n", dr_path); } else { if(obtain_sl) if(path_record_query(self_gid,0)) goto destroy_fabric; ibnd_iter_nodes(fabric, print_node, NULL); } rc = print_summary(); if (rc) rc = 1; destroy_fabric: mad_rpc_close_port(ibmad_port); ibnd_destroy_fabric(fabric); close_port: close_node_name_map(node_name_map); exit(rc); } Index: head/contrib/ofed/infiniband-diags/src/ibroute.c =================================================================== --- head/contrib/ofed/infiniband-diags/src/ibroute.c (revision 363219) +++ head/contrib/ofed/infiniband-diags/src/ibroute.c (revision 363220) @@ -1,488 +1,489 @@ /* * Copyright (c) 2004-2009 Voltaire Inc. All rights reserved. * Copyright (c) 2009-2011 Mellanox Technologies LTD. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #if HAVE_CONFIG_H # include #endif /* HAVE_CONFIG_H */ #include #include #include #include #include #include #include #include #include #include #include "ibdiag_common.h" struct ibmad_port *srcport; static int brief, dump_all, multicast; static char *node_name_map_file = NULL; static nn_map_t *node_name_map = NULL; /*******************************************/ char *check_switch(ib_portid_t * portid, unsigned int *nports, uint64_t * guid, uint8_t * sw, char *nd) { uint8_t ni[IB_SMP_DATA_SIZE] = { 0 }; int type; DEBUG("checking node type"); if (!smp_query_via(ni, portid, IB_ATTR_NODE_INFO, 0, 0, srcport)) { xdump(stderr, "nodeinfo\n", ni, sizeof ni); return "node info failed: valid addr?"; } if (!smp_query_via(nd, portid, IB_ATTR_NODE_DESC, 0, 0, srcport)) return "node desc failed"; mad_decode_field(ni, IB_NODE_TYPE_F, &type); if (type != IB_NODE_SWITCH) return "not a switch"; DEBUG("Gathering information about switch"); mad_decode_field(ni, IB_NODE_NPORTS_F, nports); mad_decode_field(ni, IB_NODE_GUID_F, guid); if (!smp_query_via(sw, portid, IB_ATTR_SWITCH_INFO, 0, 0, srcport)) return "switch info failed: is a switch node?"; return 0; } #define IB_MLIDS_IN_BLOCK (IB_SMP_DATA_SIZE/2) int dump_mlid(char *str, int strlen, unsigned mlid, unsigned nports, uint16_t mft[16][IB_MLIDS_IN_BLOCK]) { uint16_t mask; unsigned i, chunk, bit, nonzero = 0; if (brief) { int n = 0; unsigned chunks = ALIGN(nports + 1, 16) / 16; for (i = 0; i < chunks; i++) { mask = ntohs(mft[i][mlid % IB_MLIDS_IN_BLOCK]); if (mask) nonzero++; n += snprintf(str + n, strlen - n, "%04hx", mask); if (n >= strlen) { n = strlen; break; } } if (!nonzero && !dump_all) { str[0] = 0; return 0; } return n; } for (i = 0; i <= nports; i++) { chunk = i / 16; bit = i % 16; mask = ntohs(mft[chunk][mlid % IB_MLIDS_IN_BLOCK]); if (mask) nonzero++; str[i * 2] = (mask & (1 << bit)) ? 'x' : ' '; str[i * 2 + 1] = ' '; } if (!nonzero && !dump_all) { str[0] = 0; return 0; } str[i * 2] = 0; return i * 2; } uint16_t mft[16][IB_MLIDS_IN_BLOCK] = { { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0}, { 0 }, { 0 } }; char *dump_multicast_tables(ib_portid_t * portid, unsigned startlid, unsigned endlid) { char nd[IB_SMP_DATA_SIZE] = { 0 }; uint8_t sw[IB_SMP_DATA_SIZE] = { 0 }; char str[512]; char *s; uint64_t nodeguid; uint32_t mod; unsigned block, i, j, e, nports, cap, chunks, startblock, lastblock, top; char *mapnd = NULL; int n = 0; if ((s = check_switch(portid, &nports, &nodeguid, sw, nd))) return s; mad_decode_field(sw, IB_SW_MCAST_FDB_CAP_F, &cap); mad_decode_field(sw, IB_SW_MCAST_FDB_TOP_F, &top); if (!endlid || endlid > IB_MIN_MCAST_LID + cap - 1) endlid = IB_MIN_MCAST_LID + cap - 1; if (!dump_all && top && top < endlid) { if (top < IB_MIN_MCAST_LID - 1) IBWARN("illegal top mlid %x", top); else endlid = top; } if (!startlid) startlid = IB_MIN_MCAST_LID; else if (startlid < IB_MIN_MCAST_LID) { IBWARN("illegal start mlid %x, set to %x", startlid, IB_MIN_MCAST_LID); startlid = IB_MIN_MCAST_LID; } if (endlid > IB_MAX_MCAST_LID) { IBWARN("illegal end mlid %x, truncate to %x", endlid, IB_MAX_MCAST_LID); endlid = IB_MAX_MCAST_LID; } mapnd = remap_node_name(node_name_map, nodeguid, nd); printf("Multicast mlids [0x%x-0x%x] of switch %s guid 0x%016" PRIx64 " (%s):\n", startlid, endlid, portid2str(portid), nodeguid, mapnd); if (brief) printf(" MLid Port Mask\n"); else { if (nports > 9) { for (i = 0, s = str; i <= nports; i++) { *s++ = (i % 10) ? ' ' : '0' + i / 10; *s++ = ' '; } *s = 0; printf(" %s\n", str); } for (i = 0, s = str; i <= nports; i++) s += sprintf(s, "%d ", i % 10); printf(" Ports: %s\n", str); printf(" MLid\n"); } if (ibverbose) printf("Switch multicast mlid capability is %d top is 0x%x\n", cap, top); chunks = ALIGN(nports + 1, 16) / 16; startblock = startlid / IB_MLIDS_IN_BLOCK; lastblock = endlid / IB_MLIDS_IN_BLOCK; for (block = startblock; block <= lastblock; block++) { for (j = 0; j < chunks; j++) { int status; mod = (block - IB_MIN_MCAST_LID / IB_MLIDS_IN_BLOCK) | (j << 28); DEBUG("reading block %x chunk %d mod %x", block, j, mod); if (!smp_query_status_via (mft + j, portid, IB_ATTR_MULTICASTFORWTBL, mod, 0, &status, srcport)) { fprintf(stderr, "SubnGet() failed" "; MAD status 0x%x AM 0x%x\n", status, mod); return NULL; } } i = block * IB_MLIDS_IN_BLOCK; e = i + IB_MLIDS_IN_BLOCK; if (i < startlid) i = startlid; if (e > endlid + 1) e = endlid + 1; for (; i < e; i++) { if (dump_mlid(str, sizeof str, i, nports, mft) == 0) continue; printf("0x%04x %s\n", i, str); n++; } } printf("%d %smlids dumped \n", n, dump_all ? "" : "valid "); free(mapnd); return 0; } int dump_lid(char *str, int strlen, int lid, int valid) { char nd[IB_SMP_DATA_SIZE] = { 0 }; uint8_t ni[IB_SMP_DATA_SIZE] = { 0 }; uint8_t pi[IB_SMP_DATA_SIZE] = { 0 }; ib_portid_t lidport = { 0 }; static int last_port_lid, base_port_lid; char ntype[50], sguid[30]; static uint64_t portguid; uint64_t nodeguid; int baselid, lmc, type; char *mapnd = NULL; int rc; if (brief) { str[0] = 0; return 0; } if (lid <= last_port_lid) { if (!valid) return snprintf(str, strlen, ": (path #%d - illegal port)", lid - base_port_lid); else if (!portguid) return snprintf(str, strlen, ": (path #%d out of %d)", lid - base_port_lid + 1, last_port_lid - base_port_lid + 1); else { return snprintf(str, strlen, ": (path #%d out of %d: portguid %s)", lid - base_port_lid + 1, last_port_lid - base_port_lid + 1, mad_dump_val(IB_NODE_PORT_GUID_F, sguid, sizeof sguid, &portguid)); } } if (!valid) return snprintf(str, strlen, ": (illegal port)"); portguid = 0; lidport.lid = lid; if (!smp_query_via(nd, &lidport, IB_ATTR_NODE_DESC, 0, 100, srcport) || !smp_query_via(pi, &lidport, IB_ATTR_PORT_INFO, 0, 100, srcport) || !smp_query_via(ni, &lidport, IB_ATTR_NODE_INFO, 0, 100, srcport)) return snprintf(str, strlen, ": (unknown node and type)"); mad_decode_field(ni, IB_NODE_GUID_F, &nodeguid); mad_decode_field(ni, IB_NODE_PORT_GUID_F, &portguid); mad_decode_field(ni, IB_NODE_TYPE_F, &type); mad_decode_field(pi, IB_PORT_LID_F, &baselid); mad_decode_field(pi, IB_PORT_LMC_F, &lmc); if (lmc > 0) { base_port_lid = baselid; last_port_lid = baselid + (1 << lmc) - 1; } mapnd = remap_node_name(node_name_map, nodeguid, nd); rc = snprintf(str, strlen, ": (%s portguid %s: '%s')", mad_dump_val(IB_NODE_TYPE_F, ntype, sizeof ntype, &type), mad_dump_val(IB_NODE_PORT_GUID_F, sguid, sizeof sguid, &portguid), mapnd); free(mapnd); return rc; } char *dump_unicast_tables(ib_portid_t * portid, int startlid, int endlid) { char lft[IB_SMP_DATA_SIZE] = { 0 }; char nd[IB_SMP_DATA_SIZE] = { 0 }; uint8_t sw[IB_SMP_DATA_SIZE] = { 0 }; char str[200], *s; uint64_t nodeguid; int block, i, e, top; unsigned nports; int n = 0, startblock, endblock; char *mapnd = NULL; if ((s = check_switch(portid, &nports, &nodeguid, sw, nd))) return s; mad_decode_field(sw, IB_SW_LINEAR_FDB_TOP_F, &top); if (!endlid || endlid > top) endlid = top; if (endlid > IB_MAX_UCAST_LID) { IBWARN("illegal lft top %d, truncate to %d", endlid, IB_MAX_UCAST_LID); endlid = IB_MAX_UCAST_LID; } mapnd = remap_node_name(node_name_map, nodeguid, nd); printf("Unicast lids [0x%x-0x%x] of switch %s guid 0x%016" PRIx64 " (%s):\n", startlid, endlid, portid2str(portid), nodeguid, mapnd); + free(mapnd); + DEBUG("Switch top is 0x%x\n", top); printf(" Lid Out Destination\n"); printf(" Port Info \n"); startblock = startlid / IB_SMP_DATA_SIZE; endblock = ALIGN(endlid, IB_SMP_DATA_SIZE) / IB_SMP_DATA_SIZE; for (block = startblock; block < endblock; block++) { int status; DEBUG("reading block %d", block); if (!smp_query_status_via(lft, portid, IB_ATTR_LINEARFORWTBL, block, 0, &status, srcport)) { fprintf(stderr, "SubnGet() failed" "; MAD status 0x%x AM 0x%x\n", status, block); return NULL; } i = block * IB_SMP_DATA_SIZE; e = i + IB_SMP_DATA_SIZE; if (i < startlid) i = startlid; if (e > endlid + 1) e = endlid + 1; for (; i < e; i++) { unsigned outport = lft[i % IB_SMP_DATA_SIZE]; unsigned valid = (outport <= nports); if (!valid && !dump_all) continue; dump_lid(str, sizeof str, i, valid); printf("0x%04x %03u %s\n", i, outport & 0xff, str); n++; } } printf("%d %slids dumped \n", n, dump_all ? "" : "valid "); - free(mapnd); return 0; } static int process_opt(void *context, int ch, char *optarg) { switch (ch) { case 'a': dump_all++; break; case 'M': multicast++; break; case 'n': brief++; break; case 1: node_name_map_file = strdup(optarg); break; default: return -1; } return 0; } int main(int argc, char **argv) { int mgmt_classes[3] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS }; ib_portid_t portid = { 0 }; unsigned startlid = 0, endlid = 0; char *err; const struct ibdiag_opt opts[] = { {"all", 'a', 0, NULL, "show all lids, even invalid entries"}, {"no_dests", 'n', 0, NULL, "do not try to resolve destinations"}, {"Multicast", 'M', 0, NULL, "show multicast forwarding tables"}, {"node-name-map", 1, 1, "", "node name map file"}, {0} }; char usage_args[] = "[ [ []]]"; const char *usage_examples[] = { " -- Unicast examples:", "4\t# dump all lids with valid out ports of switch with lid 4", "-a 4\t# same, but dump all lids, even with invalid out ports", "-n 4\t# simple dump format - no destination resolving", "4 10\t# dump lids starting from 10", "4 0x10 0x20\t# dump lid range", "-G 0x08f1040023\t# resolve switch by GUID", "-D 0,1\t# resolve switch by direct path", " -- Multicast examples:", "-M 4\t# dump all non empty mlids of switch with lid 4", "-M 4 0xc010 0xc020\t# same, but with range", "-M -n 4\t# simple dump format", NULL, }; ibdiag_process_opts(argc, argv, NULL, "K", opts, process_opt, usage_args, usage_examples); argc -= optind; argv += optind; if (!argc) ibdiag_show_usage(); if (argc > 1) startlid = strtoul(argv[1], 0, 0); if (argc > 2) endlid = strtoul(argv[2], 0, 0); node_name_map = open_node_name_map(node_name_map_file); srcport = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 3); if (!srcport) IBEXIT("Failed to open '%s' port '%d'", ibd_ca, ibd_ca_port); smp_mkey_set(srcport, ibd_mkey); if (resolve_portid_str(ibd_ca, ibd_ca_port, &portid, argv[0], ibd_dest_type, ibd_sm_id, srcport) < 0) IBEXIT("can't resolve destination port %s", argv[0]); if (multicast) err = dump_multicast_tables(&portid, startlid, endlid); else err = dump_unicast_tables(&portid, startlid, endlid); if (err) IBEXIT("dump tables: %s", err); mad_rpc_close_port(srcport); close_node_name_map(node_name_map); exit(0); } Index: head/contrib/ofed/libibumad/umad_str.c =================================================================== --- head/contrib/ofed/libibumad/umad_str.c (revision 363219) +++ head/contrib/ofed/libibumad/umad_str.c (revision 363220) @@ -1,355 +1,352 @@ /* * Copyright (c) 2004, 2005, 2010 Intel Corporation. All rights reserved. * Copyright (c) 2013 Lawrence Livermore National Security. All rights reserved. * Copyright (c) 2014 Mellanox Technologies LTD. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include #include #include #include #include #include #include #include "umad_str.h" const char * umad_class_str(uint8_t mgmt_class) { switch (mgmt_class) { case UMAD_CLASS_SUBN_LID_ROUTED: case UMAD_CLASS_SUBN_DIRECTED_ROUTE: return("Subn"); case UMAD_CLASS_SUBN_ADM: return("SubnAdm"); case UMAD_CLASS_PERF_MGMT: return("Perf"); case UMAD_CLASS_BM: return("BM"); case UMAD_CLASS_DEVICE_MGMT: return("DevMgt"); case UMAD_CLASS_CM: return("ComMgt"); case UMAD_CLASS_SNMP: return("SNMP"); case UMAD_CLASS_DEVICE_ADM: return("DevAdm"); case UMAD_CLASS_BOOT_MGMT: return("BootMgt"); case UMAD_CLASS_BIS: return("BIS"); case UMAD_CLASS_CONG_MGMT: return("CongestionManagment"); default: break; } if ((UMAD_CLASS_VENDOR_RANGE1_START <= mgmt_class && mgmt_class <= UMAD_CLASS_VENDOR_RANGE1_END) || (UMAD_CLASS_VENDOR_RANGE2_START <= mgmt_class && mgmt_class <= UMAD_CLASS_VENDOR_RANGE2_END)) return("Vendor"); if (UMAD_CLASS_APPLICATION_START <= mgmt_class && mgmt_class <= UMAD_CLASS_APPLICATION_END) { return("Application"); } return (""); } static const char * umad_common_method_str(uint8_t method) { switch(method) { case UMAD_METHOD_GET: return ("Get"); case UMAD_METHOD_SET: return ("Set"); case UMAD_METHOD_GET_RESP: return ("GetResp"); case UMAD_METHOD_SEND: return ("Send"); case UMAD_METHOD_TRAP: return ("Trap"); case UMAD_METHOD_REPORT: return ("Report"); case UMAD_METHOD_REPORT_RESP: return ("ReportResp"); case UMAD_METHOD_TRAP_REPRESS: return ("TrapRepress"); default: return ("> 8) { case UMAD_SA_STATUS_SUCCESS: return ("Success"); case UMAD_SA_STATUS_NO_RESOURCES: return ("No Resources"); case UMAD_SA_STATUS_REQ_INVALID: return ("Request Invalid"); case UMAD_SA_STATUS_NO_RECORDS: return ("No Records"); case UMAD_SA_STATUS_TOO_MANY_RECORDS: return ("Too Many Records"); case UMAD_SA_STATUS_INVALID_GID: return ("Invalid GID"); case UMAD_SA_STATUS_INSUF_COMPS: return ("Insufficient Components"); case UMAD_SA_STATUS_REQ_DENIED: return ("Request Denied"); case UMAD_SA_STATUS_PRI_SUGGESTED: return ("Priority Suggested"); } return ("Undefined Error"); } static const char *umad_common_attr_str(__be16 attr_id) { switch(be16toh(attr_id)) { case UMAD_ATTR_CLASS_PORT_INFO: return "Class Port Info"; case UMAD_ATTR_NOTICE: return "Notice"; case UMAD_ATTR_INFORM_INFO: return "Inform Info"; default: return ""; } } static const char * umad_sm_attr_str(__be16 attr_id) { switch(be16toh(attr_id)) { case UMAD_SM_ATTR_NODE_DESC: return ("NodeDescription"); case UMAD_SM_ATTR_NODE_INFO: return ("NodeInfo"); case UMAD_SM_ATTR_SWITCH_INFO: return ("SwitchInfo"); case UMAD_SM_ATTR_GUID_INFO: return ("GUIDInfo"); case UMAD_SM_ATTR_PORT_INFO: return ("PortInfo"); case UMAD_SM_ATTR_PKEY_TABLE: return ("P_KeyTable"); case UMAD_SM_ATTR_SLVL_TABLE: return ("SLtoVLMappingTable"); case UMAD_SM_ATTR_VL_ARB_TABLE: return ("VLArbitrationTable"); case UMAD_SM_ATTR_LINEAR_FT: return ("LinearForwardingTable"); case UMAD_SM_ATTR_RANDOM_FT: return ("RandomForwardingTable"); case UMAD_SM_ATTR_MCAST_FT: return ("MulticastForwardingTable"); case UMAD_SM_ATTR_SM_INFO: return ("SMInfo"); case UMAD_SM_ATTR_VENDOR_DIAG: return ("VendorDiag"); case UMAD_SM_ATTR_LED_INFO: return ("LedInfo"); case UMAD_SM_ATTR_LINK_SPD_WIDTH_TABLE: return ("LinkSpeedWidthPairsTable"); case UMAD_SM_ATTR_VENDOR_MADS_TABLE: return ("VendorSpecificMadsTable"); case UMAD_SM_ATTR_HIERARCHY_INFO: return ("HierarchyInfo"); case UMAD_SM_ATTR_CABLE_INFO: return ("CableInfo"); case UMAD_SM_ATTR_PORT_INFO_EXT: return ("PortInfoExtended"); default: return (umad_common_attr_str(attr_id)); } - return (""); } static const char * umad_sa_attr_str(__be16 attr_id) { switch(be16toh(attr_id)) { case UMAD_SA_ATTR_NODE_REC: return ("NodeRecord"); case UMAD_SA_ATTR_PORT_INFO_REC: return ("PortInfoRecord"); case UMAD_SA_ATTR_SLVL_REC: return ("SLtoVLMappingTableRecord"); case UMAD_SA_ATTR_SWITCH_INFO_REC: return ("SwitchInfoRecord"); case UMAD_SA_ATTR_LINEAR_FT_REC: return ("LinearForwardingTableRecord"); case UMAD_SA_ATTR_RANDOM_FT_REC: return ("RandomForwardingTableRecord"); case UMAD_SA_ATTR_MCAST_FT_REC: return ("MulticastForwardingTableRecord"); case UMAD_SA_ATTR_SM_INFO_REC: return ("SMInfoRecord"); case UMAD_SA_ATTR_INFORM_INFO_REC: return ("InformInfoRecord"); case UMAD_SA_ATTR_LINK_REC: return ("LinkRecord"); case UMAD_SA_ATTR_GUID_INFO_REC: return ("GuidInfoRecord"); case UMAD_SA_ATTR_SERVICE_REC: return ("ServiceRecord"); case UMAD_SA_ATTR_PKEY_TABLE_REC: return ("P_KeyTableRecord"); case UMAD_SA_ATTR_PATH_REC: return ("PathRecord"); case UMAD_SA_ATTR_VL_ARB_REC: return ("VLArbitrationTableRecord"); case UMAD_SA_ATTR_MCMEMBER_REC: return ("MCMemberRecord"); case UMAD_SA_ATTR_TRACE_REC: return ("TraceRecord"); case UMAD_SA_ATTR_MULTI_PATH_REC: return ("MultiPathRecord"); case UMAD_SA_ATTR_SERVICE_ASSOC_REC: return ("ServiceAssociationRecord"); case UMAD_SA_ATTR_LINK_SPD_WIDTH_TABLE_REC: return ("LinkSpeedWidthPairsTableRecord"); case UMAD_SA_ATTR_HIERARCHY_INFO_REC: return ("HierarchyInfoRecord"); case UMAD_SA_ATTR_CABLE_INFO_REC: return ("CableInfoRecord"); case UMAD_SA_ATTR_PORT_INFO_EXT_REC: return ("PortInfoExtendedRecord"); default: return (umad_common_attr_str(attr_id)); } - return (""); } static const char * umad_cm_attr_str(__be16 attr_id) { switch(be16toh(attr_id)) { case UMAD_CM_ATTR_REQ: return "ConnectRequest"; case UMAD_CM_ATTR_MRA: return "MsgRcptAck"; case UMAD_CM_ATTR_REJ: return "ConnectReject"; case UMAD_CM_ATTR_REP: return "ConnectReply"; case UMAD_CM_ATTR_RTU: return "ReadyToUse"; case UMAD_CM_ATTR_DREQ: return "DisconnectRequest"; case UMAD_CM_ATTR_DREP: return "DisconnectReply"; case UMAD_CM_ATTR_SIDR_REQ: return "ServiceIDResReq"; case UMAD_CM_ATTR_SIDR_REP: return "ServiceIDResReqResp"; case UMAD_CM_ATTR_LAP: return "LoadAlternatePath"; case UMAD_CM_ATTR_APR: return "AlternatePathResponse"; case UMAD_CM_ATTR_SAP: return "SuggestAlternatePath"; case UMAD_CM_ATTR_SPR: return "SuggestPathResponse"; default: return (umad_common_attr_str(attr_id)); } - return (""); } const char * umad_attribute_str(uint8_t mgmt_class, __be16 attr_id) { switch (mgmt_class) { case UMAD_CLASS_SUBN_LID_ROUTED: case UMAD_CLASS_SUBN_DIRECTED_ROUTE: return(umad_sm_attr_str(attr_id)); case UMAD_CLASS_SUBN_ADM: return(umad_sa_attr_str(attr_id)); case UMAD_CLASS_CM: return(umad_cm_attr_str(attr_id)); } return (umad_common_attr_str(attr_id)); } Index: head/contrib/ofed/libibverbs/device.c =================================================================== --- head/contrib/ofed/libibverbs/device.c (revision 363219) +++ head/contrib/ofed/libibverbs/device.c (revision 363220) @@ -1,398 +1,395 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include "ibverbs.h" /* Hack to avoid GCC's -Wmissing-prototypes and the similar error from sparse with these prototypes. Symbol versionining requires the goofy names, the prototype must match the version in verbs.h. */ struct ibv_device **__ibv_get_device_list(int *num_devices); void __ibv_free_device_list(struct ibv_device **list); const char *__ibv_get_device_name(struct ibv_device *device); __be64 __ibv_get_device_guid(struct ibv_device *device); struct ibv_context *__ibv_open_device(struct ibv_device *device); int __ibv_close_device(struct ibv_context *context); int __ibv_get_async_event(struct ibv_context *context, struct ibv_async_event *event); void __ibv_ack_async_event(struct ibv_async_event *event); static pthread_once_t device_list_once = PTHREAD_ONCE_INIT; static int num_devices; static struct ibv_device **device_list; static void count_devices(void) { num_devices = ibverbs_init(&device_list); } struct ibv_device **__ibv_get_device_list(int *num) { struct ibv_device **l; int i; if (num) *num = 0; pthread_once(&device_list_once, count_devices); if (num_devices < 0) { errno = -num_devices; return NULL; } l = calloc(num_devices + 1, sizeof (struct ibv_device *)); if (!l) { errno = ENOMEM; return NULL; } for (i = 0; i < num_devices; ++i) l[i] = device_list[i]; if (num) *num = num_devices; return l; } default_symver(__ibv_get_device_list, ibv_get_device_list); void __ibv_free_device_list(struct ibv_device **list) { free(list); } default_symver(__ibv_free_device_list, ibv_free_device_list); const char *__ibv_get_device_name(struct ibv_device *device) { return device->name; } default_symver(__ibv_get_device_name, ibv_get_device_name); __be64 __ibv_get_device_guid(struct ibv_device *device) { char attr[24]; uint64_t guid = 0; uint16_t parts[4]; int i; if (ibv_read_sysfs_file(device->ibdev_path, "node_guid", attr, sizeof attr) < 0) return 0; if (sscanf(attr, "%hx:%hx:%hx:%hx", parts, parts + 1, parts + 2, parts + 3) != 4) return 0; for (i = 0; i < 4; ++i) guid = (guid << 16) | parts[i]; return htobe64(guid); } default_symver(__ibv_get_device_guid, ibv_get_device_guid); void verbs_init_cq(struct ibv_cq *cq, struct ibv_context *context, struct ibv_comp_channel *channel, void *cq_context) { cq->context = context; cq->channel = channel; if (cq->channel) { pthread_mutex_lock(&context->mutex); ++cq->channel->refcnt; pthread_mutex_unlock(&context->mutex); } cq->cq_context = cq_context; cq->comp_events_completed = 0; cq->async_events_completed = 0; pthread_mutex_init(&cq->mutex, NULL); pthread_cond_init(&cq->cond, NULL); } static struct ibv_cq_ex * __lib_ibv_create_cq_ex(struct ibv_context *context, struct ibv_cq_init_attr_ex *cq_attr) { struct verbs_context *vctx = verbs_get_ctx(context); struct ibv_cq_ex *cq; if (cq_attr->wc_flags & ~IBV_CREATE_CQ_SUP_WC_FLAGS) { errno = EOPNOTSUPP; return NULL; } cq = vctx->priv->create_cq_ex(context, cq_attr); if (cq) verbs_init_cq(ibv_cq_ex_to_cq(cq), context, cq_attr->channel, cq_attr->cq_context); return cq; } struct ibv_context *__ibv_open_device(struct ibv_device *device) { struct verbs_device *verbs_device = verbs_get_device(device); char *devpath; int cmd_fd, ret; struct ibv_context *context; struct verbs_context *context_ex; if (asprintf(&devpath, "/dev/%s", device->dev_name) < 0) return NULL; /* * We'll only be doing writes, but we need O_RDWR in case the * provider needs to mmap() the file. */ cmd_fd = open(devpath, O_RDWR | O_CLOEXEC); free(devpath); if (cmd_fd < 0) return NULL; if (!verbs_device->ops->init_context) { context = verbs_device->ops->alloc_context(device, cmd_fd); if (!context) goto err; } else { struct verbs_ex_private *priv; /* Library now allocates the context */ context_ex = calloc(1, sizeof(*context_ex) + verbs_device->size_of_context); if (!context_ex) { errno = ENOMEM; goto err; } priv = calloc(1, sizeof(*priv)); if (!priv) { errno = ENOMEM; free(context_ex); goto err; } context_ex->priv = priv; context_ex->context.abi_compat = __VERBS_ABI_IS_EXTENDED; context_ex->sz = sizeof(*context_ex); context = &context_ex->context; ret = verbs_device->ops->init_context(verbs_device, context, cmd_fd); if (ret) goto verbs_err; /* * In order to maintain backward/forward binary compatibility * with apps compiled against libibverbs-1.1.8 that use the * flow steering addition, we need to set the two * ABI_placeholder entries to match the driver set flow * entries. This is because apps compiled against * libibverbs-1.1.8 use an inline ibv_create_flow and * ibv_destroy_flow function that looks in the placeholder * spots for the proper entry points. For apps compiled * against libibverbs-1.1.9 and later, the inline functions * will be looking in the right place. */ context_ex->ABI_placeholder1 = (void (*)(void)) context_ex->ibv_create_flow; context_ex->ABI_placeholder2 = (void (*)(void)) context_ex->ibv_destroy_flow; if (context_ex->create_cq_ex) { priv->create_cq_ex = context_ex->create_cq_ex; context_ex->create_cq_ex = __lib_ibv_create_cq_ex; } } context->device = device; context->cmd_fd = cmd_fd; pthread_mutex_init(&context->mutex, NULL); return context; verbs_err: free(context_ex->priv); free(context_ex); err: close(cmd_fd); return NULL; } default_symver(__ibv_open_device, ibv_open_device); int __ibv_close_device(struct ibv_context *context) { int async_fd = context->async_fd; int cmd_fd = context->cmd_fd; - int cq_fd = -1; struct verbs_context *context_ex; struct verbs_device *verbs_device = verbs_get_device(context->device); context_ex = verbs_get_ctx(context); if (context_ex) { verbs_device->ops->uninit_context(verbs_device, context); free(context_ex->priv); free(context_ex); } else { verbs_device->ops->free_context(context); } close(async_fd); close(cmd_fd); - if (abi_ver <= 2) - close(cq_fd); return 0; } default_symver(__ibv_close_device, ibv_close_device); int __ibv_get_async_event(struct ibv_context *context, struct ibv_async_event *event) { struct ibv_kern_async_event ev; if (read(context->async_fd, &ev, sizeof ev) != sizeof ev) return -1; event->event_type = ev.event_type; switch (event->event_type) { case IBV_EVENT_CQ_ERR: event->element.cq = (void *) (uintptr_t) ev.element; break; case IBV_EVENT_QP_FATAL: case IBV_EVENT_QP_REQ_ERR: case IBV_EVENT_QP_ACCESS_ERR: case IBV_EVENT_COMM_EST: case IBV_EVENT_SQ_DRAINED: case IBV_EVENT_PATH_MIG: case IBV_EVENT_PATH_MIG_ERR: case IBV_EVENT_QP_LAST_WQE_REACHED: event->element.qp = (void *) (uintptr_t) ev.element; break; case IBV_EVENT_SRQ_ERR: case IBV_EVENT_SRQ_LIMIT_REACHED: event->element.srq = (void *) (uintptr_t) ev.element; break; case IBV_EVENT_WQ_FATAL: event->element.wq = (void *) (uintptr_t) ev.element; break; default: event->element.port_num = ev.element; break; } if (context->ops.async_event) context->ops.async_event(event); return 0; } default_symver(__ibv_get_async_event, ibv_get_async_event); void __ibv_ack_async_event(struct ibv_async_event *event) { switch (event->event_type) { case IBV_EVENT_CQ_ERR: { struct ibv_cq *cq = event->element.cq; pthread_mutex_lock(&cq->mutex); ++cq->async_events_completed; pthread_cond_signal(&cq->cond); pthread_mutex_unlock(&cq->mutex); return; } case IBV_EVENT_QP_FATAL: case IBV_EVENT_QP_REQ_ERR: case IBV_EVENT_QP_ACCESS_ERR: case IBV_EVENT_COMM_EST: case IBV_EVENT_SQ_DRAINED: case IBV_EVENT_PATH_MIG: case IBV_EVENT_PATH_MIG_ERR: case IBV_EVENT_QP_LAST_WQE_REACHED: { struct ibv_qp *qp = event->element.qp; pthread_mutex_lock(&qp->mutex); ++qp->events_completed; pthread_cond_signal(&qp->cond); pthread_mutex_unlock(&qp->mutex); return; } case IBV_EVENT_SRQ_ERR: case IBV_EVENT_SRQ_LIMIT_REACHED: { struct ibv_srq *srq = event->element.srq; pthread_mutex_lock(&srq->mutex); ++srq->events_completed; pthread_cond_signal(&srq->cond); pthread_mutex_unlock(&srq->mutex); return; } case IBV_EVENT_WQ_FATAL: { struct ibv_wq *wq = event->element.wq; pthread_mutex_lock(&wq->mutex); ++wq->events_completed; pthread_cond_signal(&wq->cond); pthread_mutex_unlock(&wq->mutex); return; } default: return; } } default_symver(__ibv_ack_async_event, ibv_ack_async_event); Index: head/contrib/ofed/libibverbs/examples/rc_pingpong.c =================================================================== --- head/contrib/ofed/libibverbs/examples/rc_pingpong.c (revision 363219) +++ head/contrib/ofed/libibverbs/examples/rc_pingpong.c (revision 363220) @@ -1,1041 +1,1045 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pingpong.h" #include enum { PINGPONG_RECV_WRID = 1, PINGPONG_SEND_WRID = 2, }; static int page_size; static int use_odp; static int use_ts; struct pingpong_context { struct ibv_context *context; struct ibv_comp_channel *channel; struct ibv_pd *pd; struct ibv_mr *mr; union { struct ibv_cq *cq; struct ibv_cq_ex *cq_ex; } cq_s; struct ibv_qp *qp; void *buf; int size; int send_flags; int rx_depth; int pending; struct ibv_port_attr portinfo; uint64_t completion_timestamp_mask; }; static struct ibv_cq *pp_cq(struct pingpong_context *ctx) { return use_ts ? ibv_cq_ex_to_cq(ctx->cq_s.cq_ex) : ctx->cq_s.cq; } struct pingpong_dest { int lid; int qpn; int psn; union ibv_gid gid; }; static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, enum ibv_mtu mtu, int sl, struct pingpong_dest *dest, int sgid_idx) { struct ibv_qp_attr attr = { .qp_state = IBV_QPS_RTR, .path_mtu = mtu, .dest_qp_num = dest->qpn, .rq_psn = dest->psn, .max_dest_rd_atomic = 1, .min_rnr_timer = 12, .ah_attr = { .is_global = 0, .dlid = dest->lid, .sl = sl, .src_path_bits = 0, .port_num = port } }; if (dest->gid.global.interface_id) { attr.ah_attr.is_global = 1; attr.ah_attr.grh.hop_limit = 1; attr.ah_attr.grh.dgid = dest->gid; attr.ah_attr.grh.sgid_index = sgid_idx; } if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER)) { fprintf(stderr, "Failed to modify QP to RTR\n"); return 1; } attr.qp_state = IBV_QPS_RTS; attr.timeout = 14; attr.retry_cnt = 7; attr.rnr_retry = 7; attr.sq_psn = my_psn; attr.max_rd_atomic = 1; if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC)) { fprintf(stderr, "Failed to modify QP to RTS\n"); return 1; } return 0; } static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port, const struct pingpong_dest *my_dest) { struct addrinfo *res, *t; struct addrinfo hints = { .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; char *service; char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; int n; int sockfd = -1; struct pingpong_dest *rem_dest = NULL; char gid[33]; if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port); free(service); return NULL; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); return NULL; } gid_to_wire_gid(&my_dest->gid, gid); sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, my_dest->psn, gid); if (write(sockfd, msg, sizeof msg) != sizeof msg) { fprintf(stderr, "Couldn't send local address\n"); goto out; } if (read(sockfd, msg, sizeof msg) != sizeof msg || write(sockfd, "done", sizeof "done") != sizeof "done") { perror("client read/write"); fprintf(stderr, "Couldn't read/write remote address\n"); goto out; } rem_dest = malloc(sizeof *rem_dest); if (!rem_dest) goto out; sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn, gid); wire_gid_to_gid(gid, &rem_dest->gid); out: close(sockfd); return rem_dest; } static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx, int ib_port, enum ibv_mtu mtu, int port, int sl, const struct pingpong_dest *my_dest, int sgid_idx) { struct addrinfo *res, *t; struct addrinfo hints = { .ai_flags = AI_PASSIVE, .ai_family = AF_INET, .ai_socktype = SOCK_STREAM }; char *service; char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; int n; int sockfd = -1, connfd; struct pingpong_dest *rem_dest = NULL; char gid[33]; if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { fprintf(stderr, "%s for port %d\n", gai_strerror(n), port); free(service); return NULL; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { n = 1; setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't listen to port %d\n", port); return NULL; } - listen(sockfd, 1); + if (listen(sockfd, 1)) { + perror("listen() failed"); + close(sockfd); + return NULL; + } connfd = accept(sockfd, NULL, NULL); close(sockfd); if (connfd < 0) { fprintf(stderr, "accept() failed\n"); return NULL; } n = read(connfd, msg, sizeof msg); if (n != sizeof msg) { perror("server read"); fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg); goto out; } rem_dest = malloc(sizeof *rem_dest); if (!rem_dest) goto out; sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn, gid); wire_gid_to_gid(gid, &rem_dest->gid); if (pp_connect_ctx(ctx, ib_port, my_dest->psn, mtu, sl, rem_dest, sgid_idx)) { fprintf(stderr, "Couldn't connect to remote QP\n"); free(rem_dest); rem_dest = NULL; goto out; } gid_to_wire_gid(&my_dest->gid, gid); sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, my_dest->psn, gid); if (write(connfd, msg, sizeof msg) != sizeof msg || read(connfd, msg, sizeof msg) != sizeof "done") { fprintf(stderr, "Couldn't send/recv local address\n"); free(rem_dest); rem_dest = NULL; goto out; } out: close(connfd); return rem_dest; } static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, int rx_depth, int port, int use_event) { struct pingpong_context *ctx; int access_flags = IBV_ACCESS_LOCAL_WRITE; ctx = calloc(1, sizeof *ctx); if (!ctx) return NULL; ctx->size = size; ctx->send_flags = IBV_SEND_SIGNALED; ctx->rx_depth = rx_depth; ctx->buf = memalign(page_size, size); if (!ctx->buf) { fprintf(stderr, "Couldn't allocate work buf.\n"); goto clean_ctx; } /* FIXME memset(ctx->buf, 0, size); */ memset(ctx->buf, 0x7b, size); ctx->context = ibv_open_device(ib_dev); if (!ctx->context) { fprintf(stderr, "Couldn't get context for %s\n", ibv_get_device_name(ib_dev)); goto clean_buffer; } if (use_event) { ctx->channel = ibv_create_comp_channel(ctx->context); if (!ctx->channel) { fprintf(stderr, "Couldn't create completion channel\n"); goto clean_device; } } else ctx->channel = NULL; ctx->pd = ibv_alloc_pd(ctx->context); if (!ctx->pd) { fprintf(stderr, "Couldn't allocate PD\n"); goto clean_comp_channel; } if (use_odp || use_ts) { const uint32_t rc_caps_mask = IBV_ODP_SUPPORT_SEND | IBV_ODP_SUPPORT_RECV; struct ibv_device_attr_ex attrx; if (ibv_query_device_ex(ctx->context, NULL, &attrx)) { fprintf(stderr, "Couldn't query device for its features\n"); goto clean_comp_channel; } if (use_odp) { if (!(attrx.odp_caps.general_caps & IBV_ODP_SUPPORT) || (attrx.odp_caps.per_transport_caps.rc_odp_caps & rc_caps_mask) != rc_caps_mask) { fprintf(stderr, "The device isn't ODP capable or does not support RC send and receive with ODP\n"); goto clean_comp_channel; } access_flags |= IBV_ACCESS_ON_DEMAND; } if (use_ts) { if (!attrx.completion_timestamp_mask) { fprintf(stderr, "The device isn't completion timestamp capable\n"); goto clean_comp_channel; } ctx->completion_timestamp_mask = attrx.completion_timestamp_mask; } } ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, access_flags); if (!ctx->mr) { fprintf(stderr, "Couldn't register MR\n"); goto clean_pd; } if (use_ts) { struct ibv_cq_init_attr_ex attr_ex = { .cqe = rx_depth + 1, .cq_context = NULL, .channel = ctx->channel, .comp_vector = 0, .wc_flags = IBV_WC_EX_WITH_COMPLETION_TIMESTAMP }; ctx->cq_s.cq_ex = ibv_create_cq_ex(ctx->context, &attr_ex); } else { ctx->cq_s.cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL, ctx->channel, 0); } if (!pp_cq(ctx)) { fprintf(stderr, "Couldn't create CQ\n"); goto clean_mr; } { struct ibv_qp_attr attr; struct ibv_qp_init_attr init_attr = { .send_cq = pp_cq(ctx), .recv_cq = pp_cq(ctx), .cap = { .max_send_wr = 1, .max_recv_wr = rx_depth, .max_send_sge = 1, .max_recv_sge = 1 }, .qp_type = IBV_QPT_RC }; ctx->qp = ibv_create_qp(ctx->pd, &init_attr); if (!ctx->qp) { fprintf(stderr, "Couldn't create QP\n"); goto clean_cq; } ibv_query_qp(ctx->qp, &attr, IBV_QP_CAP, &init_attr); if (init_attr.cap.max_inline_data >= size) { ctx->send_flags |= IBV_SEND_INLINE; } } { struct ibv_qp_attr attr = { .qp_state = IBV_QPS_INIT, .pkey_index = 0, .port_num = port, .qp_access_flags = 0 }; if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { fprintf(stderr, "Failed to modify QP to INIT\n"); goto clean_qp; } } return ctx; clean_qp: ibv_destroy_qp(ctx->qp); clean_cq: ibv_destroy_cq(pp_cq(ctx)); clean_mr: ibv_dereg_mr(ctx->mr); clean_pd: ibv_dealloc_pd(ctx->pd); clean_comp_channel: if (ctx->channel) ibv_destroy_comp_channel(ctx->channel); clean_device: ibv_close_device(ctx->context); clean_buffer: free(ctx->buf); clean_ctx: free(ctx); return NULL; } static int pp_close_ctx(struct pingpong_context *ctx) { if (ibv_destroy_qp(ctx->qp)) { fprintf(stderr, "Couldn't destroy QP\n"); return 1; } if (ibv_destroy_cq(pp_cq(ctx))) { fprintf(stderr, "Couldn't destroy CQ\n"); return 1; } if (ibv_dereg_mr(ctx->mr)) { fprintf(stderr, "Couldn't deregister MR\n"); return 1; } if (ibv_dealloc_pd(ctx->pd)) { fprintf(stderr, "Couldn't deallocate PD\n"); return 1; } if (ctx->channel) { if (ibv_destroy_comp_channel(ctx->channel)) { fprintf(stderr, "Couldn't destroy completion channel\n"); return 1; } } if (ibv_close_device(ctx->context)) { fprintf(stderr, "Couldn't release context\n"); return 1; } free(ctx->buf); free(ctx); return 0; } static int pp_post_recv(struct pingpong_context *ctx, int n) { struct ibv_sge list = { .addr = (uintptr_t) ctx->buf, .length = ctx->size, .lkey = ctx->mr->lkey }; struct ibv_recv_wr wr = { .wr_id = PINGPONG_RECV_WRID, .sg_list = &list, .num_sge = 1, }; struct ibv_recv_wr *bad_wr; int i; for (i = 0; i < n; ++i) if (ibv_post_recv(ctx->qp, &wr, &bad_wr)) break; return i; } static int pp_post_send(struct pingpong_context *ctx) { struct ibv_sge list = { .addr = (uintptr_t) ctx->buf, .length = ctx->size, .lkey = ctx->mr->lkey }; struct ibv_send_wr wr = { .wr_id = PINGPONG_SEND_WRID, .sg_list = &list, .num_sge = 1, .opcode = IBV_WR_SEND, .send_flags = ctx->send_flags, }; struct ibv_send_wr *bad_wr; return ibv_post_send(ctx->qp, &wr, &bad_wr); } struct ts_params { uint64_t comp_recv_max_time_delta; uint64_t comp_recv_min_time_delta; uint64_t comp_recv_total_time_delta; uint64_t comp_recv_prev_time; int last_comp_with_ts; unsigned int comp_with_time_iters; }; static inline int parse_single_wc(struct pingpong_context *ctx, int *scnt, int *rcnt, int *routs, int iters, uint64_t wr_id, enum ibv_wc_status status, uint64_t completion_timestamp, struct ts_params *ts) { if (status != IBV_WC_SUCCESS) { fprintf(stderr, "Failed status %s (%d) for wr_id %d\n", ibv_wc_status_str(status), status, (int)wr_id); return 1; } switch ((int)wr_id) { case PINGPONG_SEND_WRID: ++(*scnt); break; case PINGPONG_RECV_WRID: if (--(*routs) <= 1) { *routs += pp_post_recv(ctx, ctx->rx_depth - *routs); if (*routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", *routs); return 1; } } ++(*rcnt); if (use_ts) { if (ts->last_comp_with_ts) { uint64_t delta; /* checking whether the clock was wrapped around */ if (completion_timestamp >= ts->comp_recv_prev_time) delta = completion_timestamp - ts->comp_recv_prev_time; else delta = ctx->completion_timestamp_mask - ts->comp_recv_prev_time + completion_timestamp + 1; ts->comp_recv_max_time_delta = MAX(ts->comp_recv_max_time_delta, delta); ts->comp_recv_min_time_delta = MIN(ts->comp_recv_min_time_delta, delta); ts->comp_recv_total_time_delta += delta; ts->comp_with_time_iters++; } ts->comp_recv_prev_time = completion_timestamp; ts->last_comp_with_ts = 1; } else { ts->last_comp_with_ts = 0; } break; default: fprintf(stderr, "Completion for unknown wr_id %d\n", (int)wr_id); return 1; } ctx->pending &= ~(int)wr_id; if (*scnt < iters && !ctx->pending) { if (pp_post_send(ctx)) { fprintf(stderr, "Couldn't post send\n"); return 1; } ctx->pending = PINGPONG_RECV_WRID | PINGPONG_SEND_WRID; } return 0; } static void usage(const char *argv0) { printf("Usage:\n"); printf(" %s start a server and wait for connection\n", argv0); printf(" %s connect to server at \n", argv0); printf("\n"); printf("Options:\n"); printf(" -p, --port= listen on/connect to port (default 18515)\n"); printf(" -d, --ib-dev= use IB device (default first device found)\n"); printf(" -i, --ib-port= use port of IB device (default 1)\n"); printf(" -s, --size= size of message to exchange (default 4096)\n"); printf(" -m, --mtu= path MTU (default 1024)\n"); printf(" -r, --rx-depth= number of receives to post at a time (default 500)\n"); printf(" -n, --iters= number of exchanges (default 1000)\n"); printf(" -l, --sl= service level value\n"); printf(" -e, --events sleep on CQ events (default poll)\n"); printf(" -g, --gid-idx= local port gid index\n"); printf(" -o, --odp use on demand paging\n"); printf(" -t, --ts get CQE with timestamp\n"); } int main(int argc, char *argv[]) { struct ibv_device **dev_list; struct ibv_device *ib_dev; struct pingpong_context *ctx; struct pingpong_dest my_dest; struct pingpong_dest *rem_dest; struct timeval start, end; char *ib_devname = NULL; char *servername = NULL; unsigned int port = 18515; int ib_port = 1; unsigned int size = 4096; enum ibv_mtu mtu = IBV_MTU_1024; unsigned int rx_depth = 500; unsigned int iters = 1000; int use_event = 0; int routs; int rcnt, scnt; int num_cq_events = 0; int sl = 0; int gidx = -1; char gid[33]; struct ts_params ts; srand48(getpid() * time(NULL)); while (1) { int c; static struct option long_options[] = { { .name = "port", .has_arg = 1, .val = 'p' }, { .name = "ib-dev", .has_arg = 1, .val = 'd' }, { .name = "ib-port", .has_arg = 1, .val = 'i' }, { .name = "size", .has_arg = 1, .val = 's' }, { .name = "mtu", .has_arg = 1, .val = 'm' }, { .name = "rx-depth", .has_arg = 1, .val = 'r' }, { .name = "iters", .has_arg = 1, .val = 'n' }, { .name = "sl", .has_arg = 1, .val = 'l' }, { .name = "events", .has_arg = 0, .val = 'e' }, { .name = "gid-idx", .has_arg = 1, .val = 'g' }, { .name = "odp", .has_arg = 0, .val = 'o' }, { .name = "ts", .has_arg = 0, .val = 't' }, {} }; c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:ot", long_options, NULL); if (c == -1) break; switch (c) { case 'p': port = strtoul(optarg, NULL, 0); if (port > 65535) { usage(argv[0]); return 1; } break; case 'd': ib_devname = strdupa(optarg); break; case 'i': ib_port = strtol(optarg, NULL, 0); if (ib_port < 1) { usage(argv[0]); return 1; } break; case 's': size = strtoul(optarg, NULL, 0); break; case 'm': mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0)); if (mtu == 0) { usage(argv[0]); return 1; } break; case 'r': rx_depth = strtoul(optarg, NULL, 0); break; case 'n': iters = strtoul(optarg, NULL, 0); break; case 'l': sl = strtol(optarg, NULL, 0); break; case 'e': ++use_event; break; case 'g': gidx = strtol(optarg, NULL, 0); break; case 'o': use_odp = 1; break; case 't': use_ts = 1; break; default: usage(argv[0]); return 1; } } if (optind == argc - 1) servername = strdupa(argv[optind]); else if (optind < argc) { usage(argv[0]); return 1; } if (use_ts) { ts.comp_recv_max_time_delta = 0; ts.comp_recv_min_time_delta = 0xffffffff; ts.comp_recv_total_time_delta = 0; ts.comp_recv_prev_time = 0; ts.last_comp_with_ts = 0; ts.comp_with_time_iters = 0; } page_size = sysconf(_SC_PAGESIZE); dev_list = ibv_get_device_list(NULL); if (!dev_list) { perror("Failed to get IB devices list"); return 1; } if (!ib_devname) { ib_dev = *dev_list; if (!ib_dev) { fprintf(stderr, "No IB devices found\n"); return 1; } } else { int i; for (i = 0; dev_list[i]; ++i) if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname)) break; ib_dev = dev_list[i]; if (!ib_dev) { fprintf(stderr, "IB device %s not found\n", ib_devname); return 1; } } ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event); if (!ctx) return 1; routs = pp_post_recv(ctx, ctx->rx_depth); if (routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", routs); return 1; } if (use_event) if (ibv_req_notify_cq(pp_cq(ctx), 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) { fprintf(stderr, "Couldn't get port info\n"); return 1; } my_dest.lid = ctx->portinfo.lid; if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET && !my_dest.lid) { fprintf(stderr, "Couldn't get local LID\n"); return 1; } if (gidx >= 0) { if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest.gid)) { fprintf(stderr, "can't read sgid of index %d\n", gidx); return 1; } } else memset(&my_dest.gid, 0, sizeof my_dest.gid); my_dest.qpn = ctx->qp->qp_num; my_dest.psn = lrand48() & 0xffffff; inet_ntop(AF_INET6, &my_dest.gid, gid, sizeof gid); printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n", my_dest.lid, my_dest.qpn, my_dest.psn, gid); if (servername) rem_dest = pp_client_exch_dest(servername, port, &my_dest); else rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl, &my_dest, gidx); if (!rem_dest) return 1; inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid); printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n", rem_dest->lid, rem_dest->qpn, rem_dest->psn, gid); if (servername) if (pp_connect_ctx(ctx, ib_port, my_dest.psn, mtu, sl, rem_dest, gidx)) return 1; ctx->pending = PINGPONG_RECV_WRID; if (servername) { if (pp_post_send(ctx)) { fprintf(stderr, "Couldn't post send\n"); return 1; } ctx->pending |= PINGPONG_SEND_WRID; } if (gettimeofday(&start, NULL)) { perror("gettimeofday"); return 1; } rcnt = scnt = 0; while (rcnt < iters || scnt < iters) { int ret; if (use_event) { struct ibv_cq *ev_cq; void *ev_ctx; if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) { fprintf(stderr, "Failed to get cq_event\n"); return 1; } ++num_cq_events; if (ev_cq != pp_cq(ctx)) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; } if (ibv_req_notify_cq(pp_cq(ctx), 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } } if (use_ts) { struct ibv_poll_cq_attr attr = {}; do { ret = ibv_start_poll(ctx->cq_s.cq_ex, &attr); } while (!use_event && ret == ENOENT); if (ret) { fprintf(stderr, "poll CQ failed %d\n", ret); return ret; } ret = parse_single_wc(ctx, &scnt, &rcnt, &routs, iters, ctx->cq_s.cq_ex->wr_id, ctx->cq_s.cq_ex->status, ibv_wc_read_completion_ts(ctx->cq_s.cq_ex), &ts); if (ret) { ibv_end_poll(ctx->cq_s.cq_ex); return ret; } ret = ibv_next_poll(ctx->cq_s.cq_ex); if (!ret) ret = parse_single_wc(ctx, &scnt, &rcnt, &routs, iters, ctx->cq_s.cq_ex->wr_id, ctx->cq_s.cq_ex->status, ibv_wc_read_completion_ts(ctx->cq_s.cq_ex), &ts); ibv_end_poll(ctx->cq_s.cq_ex); if (ret && ret != ENOENT) { fprintf(stderr, "poll CQ failed %d\n", ret); return ret; } } else { int ne, i; struct ibv_wc wc[2]; do { ne = ibv_poll_cq(pp_cq(ctx), 2, wc); if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return 1; } } while (!use_event && ne < 1); for (i = 0; i < ne; ++i) { ret = parse_single_wc(ctx, &scnt, &rcnt, &routs, iters, wc[i].wr_id, wc[i].status, 0, &ts); if (ret) { fprintf(stderr, "parse WC failed %d\n", ne); return 1; } } } } if (gettimeofday(&end, NULL)) { perror("gettimeofday"); return 1; } { float usec = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); long long bytes = (long long) size * iters * 2; printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", bytes, usec / 1000000., bytes * 8. / usec); printf("%d iters in %.2f seconds = %.2f usec/iter\n", iters, usec / 1000000., usec / iters); if (use_ts && ts.comp_with_time_iters) { printf("Max receive completion clock cycles = %" PRIu64 "\n", ts.comp_recv_max_time_delta); printf("Min receive completion clock cycles = %" PRIu64 "\n", ts.comp_recv_min_time_delta); printf("Average receive completion clock cycles = %f\n", (double)ts.comp_recv_total_time_delta / ts.comp_with_time_iters); } } ibv_ack_cq_events(pp_cq(ctx), num_cq_events); if (pp_close_ctx(ctx)) return 1; ibv_free_device_list(dev_list); free(rem_dest); return 0; } Index: head/contrib/ofed/libibverbs/examples/srq_pingpong.c =================================================================== --- head/contrib/ofed/libibverbs/examples/srq_pingpong.c (revision 363219) +++ head/contrib/ofed/libibverbs/examples/srq_pingpong.c (revision 363220) @@ -1,985 +1,989 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pingpong.h" enum { PINGPONG_RECV_WRID = 1, PINGPONG_SEND_WRID = 2, MAX_QP = 256, }; static int page_size; struct pingpong_context { struct ibv_context *context; struct ibv_comp_channel *channel; struct ibv_pd *pd; struct ibv_mr *mr; struct ibv_cq *cq; struct ibv_srq *srq; struct ibv_qp *qp[MAX_QP]; void *buf; int size; int send_flags; int num_qp; int rx_depth; int pending[MAX_QP]; struct ibv_port_attr portinfo; }; struct pingpong_dest { int lid; int qpn; int psn; union ibv_gid gid; }; static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu mtu, int sl, const struct pingpong_dest *my_dest, const struct pingpong_dest *dest, int sgid_idx) { int i; for (i = 0; i < ctx->num_qp; ++i) { struct ibv_qp_attr attr = { .qp_state = IBV_QPS_RTR, .path_mtu = mtu, .dest_qp_num = dest[i].qpn, .rq_psn = dest[i].psn, .max_dest_rd_atomic = 1, .min_rnr_timer = 12, .ah_attr = { .is_global = 0, .dlid = dest[i].lid, .sl = sl, .src_path_bits = 0, .port_num = port } }; if (dest->gid.global.interface_id) { attr.ah_attr.is_global = 1; attr.ah_attr.grh.hop_limit = 1; attr.ah_attr.grh.dgid = dest->gid; attr.ah_attr.grh.sgid_index = sgid_idx; } if (ibv_modify_qp(ctx->qp[i], &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER)) { fprintf(stderr, "Failed to modify QP[%d] to RTR\n", i); return 1; } attr.qp_state = IBV_QPS_RTS; attr.timeout = 14; attr.retry_cnt = 7; attr.rnr_retry = 7; attr.sq_psn = my_dest[i].psn; attr.max_rd_atomic = 1; if (ibv_modify_qp(ctx->qp[i], &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC)) { fprintf(stderr, "Failed to modify QP[%d] to RTS\n", i); return 1; } } return 0; } static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port, const struct pingpong_dest *my_dest) { struct addrinfo *res, *t; struct addrinfo hints = { .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; char *service; char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; int n; int r; int i; int sockfd = -1; struct pingpong_dest *rem_dest = NULL; char gid[33]; if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port); free(service); return NULL; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); return NULL; } for (i = 0; i < MAX_QP; ++i) { gid_to_wire_gid(&my_dest[i].gid, gid); sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn, gid); if (write(sockfd, msg, sizeof msg) != sizeof msg) { fprintf(stderr, "Couldn't send local address\n"); goto out; } } rem_dest = malloc(MAX_QP * sizeof *rem_dest); if (!rem_dest) goto out; for (i = 0; i < MAX_QP; ++i) { n = 0; while (n < sizeof msg) { r = read(sockfd, msg + n, sizeof msg - n); if (r < 0) { perror("client read"); fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n", n, (int) sizeof msg, i); goto out; } n += r; } sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn, &rem_dest[i].psn, gid); wire_gid_to_gid(gid, &rem_dest[i].gid); } if (write(sockfd, "done", sizeof "done") != sizeof "done") { perror("client write"); goto out; } out: close(sockfd); return rem_dest; } static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx, int ib_port, enum ibv_mtu mtu, int port, int sl, const struct pingpong_dest *my_dest, int sgid_idx) { struct addrinfo *res, *t; struct addrinfo hints = { .ai_flags = AI_PASSIVE, .ai_family = AF_INET, .ai_socktype = SOCK_STREAM }; char *service; char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; int n; int r; int i; int sockfd = -1, connfd; struct pingpong_dest *rem_dest = NULL; char gid[33]; if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { fprintf(stderr, "%s for port %d\n", gai_strerror(n), port); free(service); return NULL; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { n = 1; setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't listen to port %d\n", port); return NULL; } - listen(sockfd, 1); + if (listen(sockfd, 1)) { + perror("listen() failed"); + close(sockfd); + return NULL; + } connfd = accept(sockfd, NULL, NULL); close(sockfd); if (connfd < 0) { fprintf(stderr, "accept() failed\n"); return NULL; } rem_dest = malloc(MAX_QP * sizeof *rem_dest); if (!rem_dest) goto out; for (i = 0; i < MAX_QP; ++i) { n = 0; while (n < sizeof msg) { r = read(connfd, msg + n, sizeof msg - n); if (r < 0) { perror("server read"); fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n", n, (int) sizeof msg, i); goto out; } n += r; } sscanf(msg, "%x:%x:%x:%s", &rem_dest[i].lid, &rem_dest[i].qpn, &rem_dest[i].psn, gid); wire_gid_to_gid(gid, &rem_dest[i].gid); } if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest, sgid_idx)) { fprintf(stderr, "Couldn't connect to remote QP\n"); free(rem_dest); rem_dest = NULL; goto out; } for (i = 0; i < MAX_QP; ++i) { gid_to_wire_gid(&my_dest[i].gid, gid); sprintf(msg, "%04x:%06x:%06x:%s", my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn, gid); if (write(connfd, msg, sizeof msg) != sizeof msg) { fprintf(stderr, "Couldn't send local address\n"); free(rem_dest); rem_dest = NULL; goto out; } } if (read(connfd, msg, sizeof msg) != sizeof "done") { perror("client write"); free(rem_dest); rem_dest = NULL; goto out; } out: close(connfd); return rem_dest; } static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, int num_qp, int rx_depth, int port, int use_event) { struct pingpong_context *ctx; int i; ctx = calloc(1, sizeof *ctx); if (!ctx) return NULL; ctx->size = size; ctx->send_flags = IBV_SEND_SIGNALED; ctx->num_qp = num_qp; ctx->rx_depth = rx_depth; ctx->buf = memalign(page_size, size); if (!ctx->buf) { fprintf(stderr, "Couldn't allocate work buf.\n"); goto clean_ctx; } memset(ctx->buf, 0, size); ctx->context = ibv_open_device(ib_dev); if (!ctx->context) { fprintf(stderr, "Couldn't get context for %s\n", ibv_get_device_name(ib_dev)); goto clean_buffer; } if (use_event) { ctx->channel = ibv_create_comp_channel(ctx->context); if (!ctx->channel) { fprintf(stderr, "Couldn't create completion channel\n"); goto clean_device; } } else ctx->channel = NULL; ctx->pd = ibv_alloc_pd(ctx->context); if (!ctx->pd) { fprintf(stderr, "Couldn't allocate PD\n"); goto clean_comp_channel; } ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE); if (!ctx->mr) { fprintf(stderr, "Couldn't register MR\n"); goto clean_pd; } ctx->cq = ibv_create_cq(ctx->context, rx_depth + num_qp, NULL, ctx->channel, 0); if (!ctx->cq) { fprintf(stderr, "Couldn't create CQ\n"); goto clean_mr; } { struct ibv_srq_init_attr attr = { .attr = { .max_wr = rx_depth, .max_sge = 1 } }; ctx->srq = ibv_create_srq(ctx->pd, &attr); if (!ctx->srq) { fprintf(stderr, "Couldn't create SRQ\n"); goto clean_cq; } } for (i = 0; i < num_qp; ++i) { struct ibv_qp_attr attr; struct ibv_qp_init_attr init_attr = { .send_cq = ctx->cq, .recv_cq = ctx->cq, .srq = ctx->srq, .cap = { .max_send_wr = 1, .max_send_sge = 1, }, .qp_type = IBV_QPT_RC }; ctx->qp[i] = ibv_create_qp(ctx->pd, &init_attr); if (!ctx->qp[i]) { fprintf(stderr, "Couldn't create QP[%d]\n", i); goto clean_qps; } ibv_query_qp(ctx->qp[i], &attr, IBV_QP_CAP, &init_attr); if (init_attr.cap.max_inline_data >= size) { ctx->send_flags |= IBV_SEND_INLINE; } } for (i = 0; i < num_qp; ++i) { struct ibv_qp_attr attr = { .qp_state = IBV_QPS_INIT, .pkey_index = 0, .port_num = port, .qp_access_flags = 0 }; if (ibv_modify_qp(ctx->qp[i], &attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { fprintf(stderr, "Failed to modify QP[%d] to INIT\n", i); goto clean_qps_full; } } return ctx; clean_qps_full: i = num_qp; clean_qps: for (--i; i >= 0; --i) ibv_destroy_qp(ctx->qp[i]); ibv_destroy_srq(ctx->srq); clean_cq: ibv_destroy_cq(ctx->cq); clean_mr: ibv_dereg_mr(ctx->mr); clean_pd: ibv_dealloc_pd(ctx->pd); clean_comp_channel: if (ctx->channel) ibv_destroy_comp_channel(ctx->channel); clean_device: ibv_close_device(ctx->context); clean_buffer: free(ctx->buf); clean_ctx: free(ctx); return NULL; } static int pp_close_ctx(struct pingpong_context *ctx, int num_qp) { int i; for (i = 0; i < num_qp; ++i) { if (ibv_destroy_qp(ctx->qp[i])) { fprintf(stderr, "Couldn't destroy QP[%d]\n", i); return 1; } } if (ibv_destroy_srq(ctx->srq)) { fprintf(stderr, "Couldn't destroy SRQ\n"); return 1; } if (ibv_destroy_cq(ctx->cq)) { fprintf(stderr, "Couldn't destroy CQ\n"); return 1; } if (ibv_dereg_mr(ctx->mr)) { fprintf(stderr, "Couldn't deregister MR\n"); return 1; } if (ibv_dealloc_pd(ctx->pd)) { fprintf(stderr, "Couldn't deallocate PD\n"); return 1; } if (ctx->channel) { if (ibv_destroy_comp_channel(ctx->channel)) { fprintf(stderr, "Couldn't destroy completion channel\n"); return 1; } } if (ibv_close_device(ctx->context)) { fprintf(stderr, "Couldn't release context\n"); return 1; } free(ctx->buf); free(ctx); return 0; } static int pp_post_recv(struct pingpong_context *ctx, int n) { struct ibv_sge list = { .addr = (uintptr_t) ctx->buf, .length = ctx->size, .lkey = ctx->mr->lkey }; struct ibv_recv_wr wr = { .wr_id = PINGPONG_RECV_WRID, .sg_list = &list, .num_sge = 1, }; struct ibv_recv_wr *bad_wr; int i; for (i = 0; i < n; ++i) if (ibv_post_srq_recv(ctx->srq, &wr, &bad_wr)) break; return i; } static int pp_post_send(struct pingpong_context *ctx, int qp_index) { struct ibv_sge list = { .addr = (uintptr_t) ctx->buf, .length = ctx->size, .lkey = ctx->mr->lkey }; struct ibv_send_wr wr = { .wr_id = PINGPONG_SEND_WRID, .sg_list = &list, .num_sge = 1, .opcode = IBV_WR_SEND, .send_flags = ctx->send_flags, }; struct ibv_send_wr *bad_wr; return ibv_post_send(ctx->qp[qp_index], &wr, &bad_wr); } static int find_qp(int qpn, struct pingpong_context *ctx, int num_qp) { int i; for (i = 0; i < num_qp; ++i) if (ctx->qp[i]->qp_num == qpn) return i; return -1; } static void usage(const char *argv0) { printf("Usage:\n"); printf(" %s start a server and wait for connection\n", argv0); printf(" %s connect to server at \n", argv0); printf("\n"); printf("Options:\n"); printf(" -p, --port= listen on/connect to port (default 18515)\n"); printf(" -d, --ib-dev= use IB device (default first device found)\n"); printf(" -i, --ib-port= use port of IB device (default 1)\n"); printf(" -s, --size= size of message to exchange (default 4096)\n"); printf(" -m, --mtu= path MTU (default 1024)\n"); printf(" -q, --num-qp= number of QPs to use (default 16)\n"); printf(" -r, --rx-depth= number of receives to post at a time (default 500)\n"); printf(" -n, --iters= number of exchanges per QP(default 1000)\n"); printf(" -l, --sl= service level value\n"); printf(" -e, --events sleep on CQ events (default poll)\n"); printf(" -g, --gid-idx= local port gid index\n"); } int main(int argc, char *argv[]) { struct ibv_device **dev_list; struct ibv_device *ib_dev; struct ibv_wc *wc; struct pingpong_context *ctx; struct pingpong_dest my_dest[MAX_QP]; struct pingpong_dest *rem_dest; struct timeval start, end; char *ib_devname = NULL; char *servername = NULL; unsigned int port = 18515; int ib_port = 1; unsigned int size = 4096; enum ibv_mtu mtu = IBV_MTU_1024; unsigned int num_qp = 16; unsigned int rx_depth = 500; unsigned int iters = 1000; int use_event = 0; int routs; int rcnt, scnt; int num_wc; int i; int num_cq_events = 0; int sl = 0; int gidx = -1; char gid[33]; srand48(getpid() * time(NULL)); while (1) { int c; static struct option long_options[] = { { .name = "port", .has_arg = 1, .val = 'p' }, { .name = "ib-dev", .has_arg = 1, .val = 'd' }, { .name = "ib-port", .has_arg = 1, .val = 'i' }, { .name = "size", .has_arg = 1, .val = 's' }, { .name = "mtu", .has_arg = 1, .val = 'm' }, { .name = "num-qp", .has_arg = 1, .val = 'q' }, { .name = "rx-depth", .has_arg = 1, .val = 'r' }, { .name = "iters", .has_arg = 1, .val = 'n' }, { .name = "sl", .has_arg = 1, .val = 'l' }, { .name = "events", .has_arg = 0, .val = 'e' }, { .name = "gid-idx", .has_arg = 1, .val = 'g' }, {} }; c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:eg:", long_options, NULL); if (c == -1) break; switch (c) { case 'p': port = strtoul(optarg, NULL, 0); if (port > 65535) { usage(argv[0]); return 1; } break; case 'd': ib_devname = strdupa(optarg); break; case 'i': ib_port = strtol(optarg, NULL, 0); if (ib_port < 1) { usage(argv[0]); return 1; } break; case 's': size = strtoul(optarg, NULL, 0); if (size < 1) { usage(argv[0]); return 1; } break; case 'm': mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0)); if (mtu == 0) { usage(argv[0]); return 1; } break; case 'q': num_qp = strtoul(optarg, NULL, 0); break; case 'r': rx_depth = strtoul(optarg, NULL, 0); break; case 'n': iters = strtoul(optarg, NULL, 0); break; case 'l': sl = strtol(optarg, NULL, 0); break; case 'e': ++use_event; break; case 'g': gidx = strtol(optarg, NULL, 0); break; default: usage(argv[0]); return 1; } } if (optind == argc - 1) servername = strdupa(argv[optind]); else if (optind < argc) { usage(argv[0]); return 1; } if (num_qp > rx_depth) { fprintf(stderr, "rx_depth %d is too small for %d QPs -- " "must have at least one receive per QP.\n", rx_depth, num_qp); return 1; } num_wc = num_qp + rx_depth; wc = alloca(num_wc * sizeof *wc); page_size = sysconf(_SC_PAGESIZE); dev_list = ibv_get_device_list(NULL); if (!dev_list) { perror("Failed to get IB devices list"); return 1; } if (!ib_devname) { ib_dev = *dev_list; if (!ib_dev) { fprintf(stderr, "No IB devices found\n"); return 1; } } else { for (i = 0; dev_list[i]; ++i) if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname)) break; ib_dev = dev_list[i]; if (!ib_dev) { fprintf(stderr, "IB device %s not found\n", ib_devname); return 1; } } ctx = pp_init_ctx(ib_dev, size, num_qp, rx_depth, ib_port, use_event); if (!ctx) return 1; routs = pp_post_recv(ctx, ctx->rx_depth); if (routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", routs); return 1; } if (use_event) if (ibv_req_notify_cq(ctx->cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } memset(my_dest, 0, sizeof my_dest); if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) { fprintf(stderr, "Couldn't get port info\n"); return 1; } for (i = 0; i < num_qp; ++i) { my_dest[i].qpn = ctx->qp[i]->qp_num; my_dest[i].psn = lrand48() & 0xffffff; my_dest[i].lid = ctx->portinfo.lid; if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET && !my_dest[i].lid) { fprintf(stderr, "Couldn't get local LID\n"); return 1; } if (gidx >= 0) { if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest[i].gid)) { fprintf(stderr, "Could not get local gid for " "gid index %d\n", gidx); return 1; } } else memset(&my_dest[i].gid, 0, sizeof my_dest[i].gid); inet_ntop(AF_INET6, &my_dest[i].gid, gid, sizeof gid); printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, " "GID %s\n", my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn, gid); } if (servername) rem_dest = pp_client_exch_dest(servername, port, my_dest); else rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl, my_dest, gidx); if (!rem_dest) return 1; inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid); for (i = 0; i < num_qp; ++i) { inet_ntop(AF_INET6, &rem_dest[i].gid, gid, sizeof gid); printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, " "GID %s\n", rem_dest[i].lid, rem_dest[i].qpn, rem_dest[i].psn, gid); } if (servername) if (pp_connect_ctx(ctx, ib_port, mtu, sl, my_dest, rem_dest, gidx)) return 1; if (servername) for (i = 0; i < num_qp; ++i) { if (pp_post_send(ctx, i)) { fprintf(stderr, "Couldn't post send\n"); return 1; } ctx->pending[i] = PINGPONG_SEND_WRID | PINGPONG_RECV_WRID; } else for (i = 0; i < num_qp; ++i) ctx->pending[i] = PINGPONG_RECV_WRID; if (gettimeofday(&start, NULL)) { perror("gettimeofday"); return 1; } rcnt = scnt = 0; while (rcnt < iters || scnt < iters) { if (use_event) { struct ibv_cq *ev_cq; void *ev_ctx; if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) { fprintf(stderr, "Failed to get cq_event\n"); return 1; } ++num_cq_events; if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; } if (ibv_req_notify_cq(ctx->cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } } { int ne, qp_ind; do { ne = ibv_poll_cq(ctx->cq, num_wc, wc); if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return 1; } } while (!use_event && ne < 1); for (i = 0; i < ne; ++i) { if (wc[i].status != IBV_WC_SUCCESS) { fprintf(stderr, "Failed status %s (%d) for wr_id %d\n", ibv_wc_status_str(wc[i].status), wc[i].status, (int) wc[i].wr_id); return 1; } qp_ind = find_qp(wc[i].qp_num, ctx, num_qp); if (qp_ind < 0) { fprintf(stderr, "Couldn't find QPN %06x\n", wc[i].qp_num); return 1; } switch ((int) wc[i].wr_id) { case PINGPONG_SEND_WRID: ++scnt; break; case PINGPONG_RECV_WRID: if (--routs <= num_qp) { routs += pp_post_recv(ctx, ctx->rx_depth - routs); if (routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", routs); return 1; } } ++rcnt; break; default: fprintf(stderr, "Completion for unknown wr_id %d\n", (int) wc[i].wr_id); return 1; } ctx->pending[qp_ind] &= ~(int) wc[i].wr_id; if (scnt < iters && !ctx->pending[qp_ind]) { if (pp_post_send(ctx, qp_ind)) { fprintf(stderr, "Couldn't post send\n"); return 1; } ctx->pending[qp_ind] = PINGPONG_RECV_WRID | PINGPONG_SEND_WRID; } } } } if (gettimeofday(&end, NULL)) { perror("gettimeofday"); return 1; } { float usec = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); long long bytes = (long long) size * iters * 2; printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", bytes, usec / 1000000., bytes * 8. / usec); printf("%d iters in %.2f seconds = %.2f usec/iter\n", iters, usec / 1000000., usec / iters); } ibv_ack_cq_events(ctx->cq, num_cq_events); if (pp_close_ctx(ctx, num_qp)) return 1; ibv_free_device_list(dev_list); free(rem_dest); return 0; } Index: head/contrib/ofed/libibverbs/examples/uc_pingpong.c =================================================================== --- head/contrib/ofed/libibverbs/examples/uc_pingpong.c (revision 363219) +++ head/contrib/ofed/libibverbs/examples/uc_pingpong.c (revision 363220) @@ -1,854 +1,858 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pingpong.h" enum { PINGPONG_RECV_WRID = 1, PINGPONG_SEND_WRID = 2, }; static int page_size; struct pingpong_context { struct ibv_context *context; struct ibv_comp_channel *channel; struct ibv_pd *pd; struct ibv_mr *mr; struct ibv_cq *cq; struct ibv_qp *qp; void *buf; int size; int send_flags; int rx_depth; int pending; struct ibv_port_attr portinfo; }; struct pingpong_dest { int lid; int qpn; int psn; union ibv_gid gid; }; static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, enum ibv_mtu mtu, int sl, struct pingpong_dest *dest, int sgid_idx) { struct ibv_qp_attr attr = { .qp_state = IBV_QPS_RTR, .path_mtu = mtu, .dest_qp_num = dest->qpn, .rq_psn = dest->psn, .ah_attr = { .is_global = 0, .dlid = dest->lid, .sl = sl, .src_path_bits = 0, .port_num = port } }; if (dest->gid.global.interface_id) { attr.ah_attr.is_global = 1; attr.ah_attr.grh.hop_limit = 1; attr.ah_attr.grh.dgid = dest->gid; attr.ah_attr.grh.sgid_index = sgid_idx; } if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN)) { fprintf(stderr, "Failed to modify QP to RTR\n"); return 1; } attr.qp_state = IBV_QPS_RTS; attr.sq_psn = my_psn; if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_SQ_PSN)) { fprintf(stderr, "Failed to modify QP to RTS\n"); return 1; } return 0; } static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port, const struct pingpong_dest *my_dest) { struct addrinfo *res, *t; struct addrinfo hints = { .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; char *service; char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; int n; int sockfd = -1; struct pingpong_dest *rem_dest = NULL; char gid[33]; if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port); free(service); return NULL; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); return NULL; } gid_to_wire_gid(&my_dest->gid, gid); sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, my_dest->psn, gid); if (write(sockfd, msg, sizeof msg) != sizeof msg) { fprintf(stderr, "Couldn't send local address\n"); goto out; } if (read(sockfd, msg, sizeof msg) != sizeof msg || write(sockfd, "done", sizeof "done") != sizeof "done") { perror("client read/write"); fprintf(stderr, "Couldn't read/write remote address\n"); goto out; } rem_dest = malloc(sizeof *rem_dest); if (!rem_dest) goto out; sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn, gid); wire_gid_to_gid(gid, &rem_dest->gid); out: close(sockfd); return rem_dest; } static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx, int ib_port, enum ibv_mtu mtu, int port, int sl, const struct pingpong_dest *my_dest, int sgid_idx) { struct addrinfo *res, *t; struct addrinfo hints = { .ai_flags = AI_PASSIVE, .ai_family = AF_INET, .ai_socktype = SOCK_STREAM }; char *service; char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; int n; int sockfd = -1, connfd; struct pingpong_dest *rem_dest = NULL; char gid[33]; if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { fprintf(stderr, "%s for port %d\n", gai_strerror(n), port); free(service); return NULL; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { n = 1; setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't listen to port %d\n", port); return NULL; } - listen(sockfd, 1); + if (listen(sockfd, 1)) { + perror("listen() failed"); + close(sockfd); + return NULL; + } connfd = accept(sockfd, NULL, NULL); close(sockfd); if (connfd < 0) { fprintf(stderr, "accept() failed\n"); return NULL; } n = read(connfd, msg, sizeof msg); if (n != sizeof msg) { perror("server read"); fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg); goto out; } rem_dest = malloc(sizeof *rem_dest); if (!rem_dest) goto out; sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn, gid); wire_gid_to_gid(gid, &rem_dest->gid); if (pp_connect_ctx(ctx, ib_port, my_dest->psn, mtu, sl, rem_dest, sgid_idx)) { fprintf(stderr, "Couldn't connect to remote QP\n"); free(rem_dest); rem_dest = NULL; goto out; } gid_to_wire_gid(&my_dest->gid, gid); sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, my_dest->psn, gid); if (write(connfd, msg, sizeof msg) != sizeof msg || read(connfd, msg, sizeof msg) != sizeof "done") { fprintf(stderr, "Couldn't send/recv local address\n"); free(rem_dest); rem_dest = NULL; goto out; } out: close(connfd); return rem_dest; } static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, int rx_depth, int port, int use_event) { struct pingpong_context *ctx; ctx = calloc(1, sizeof *ctx); if (!ctx) return NULL; ctx->size = size; ctx->send_flags = IBV_SEND_SIGNALED; ctx->rx_depth = rx_depth; ctx->buf = memalign(page_size, size); if (!ctx->buf) { fprintf(stderr, "Couldn't allocate work buf.\n"); goto clean_ctx; } /* FIXME memset(ctx->buf, 0, size); */ memset(ctx->buf, 0x7b, size); ctx->context = ibv_open_device(ib_dev); if (!ctx->context) { fprintf(stderr, "Couldn't get context for %s\n", ibv_get_device_name(ib_dev)); goto clean_buffer; } if (use_event) { ctx->channel = ibv_create_comp_channel(ctx->context); if (!ctx->channel) { fprintf(stderr, "Couldn't create completion channel\n"); goto clean_device; } } else ctx->channel = NULL; ctx->pd = ibv_alloc_pd(ctx->context); if (!ctx->pd) { fprintf(stderr, "Couldn't allocate PD\n"); goto clean_comp_channel; } ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE); if (!ctx->mr) { fprintf(stderr, "Couldn't register MR\n"); goto clean_pd; } ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL, ctx->channel, 0); if (!ctx->cq) { fprintf(stderr, "Couldn't create CQ\n"); goto clean_mr; } { struct ibv_qp_attr attr; struct ibv_qp_init_attr init_attr = { .send_cq = ctx->cq, .recv_cq = ctx->cq, .cap = { .max_send_wr = 1, .max_recv_wr = rx_depth, .max_send_sge = 1, .max_recv_sge = 1 }, .qp_type = IBV_QPT_UC }; ctx->qp = ibv_create_qp(ctx->pd, &init_attr); if (!ctx->qp) { fprintf(stderr, "Couldn't create QP\n"); goto clean_cq; } ibv_query_qp(ctx->qp, &attr, IBV_QP_CAP, &init_attr); if (init_attr.cap.max_inline_data >= size) { ctx->send_flags |= IBV_SEND_INLINE; } } { struct ibv_qp_attr attr = { .qp_state = IBV_QPS_INIT, .pkey_index = 0, .port_num = port, .qp_access_flags = 0 }; if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { fprintf(stderr, "Failed to modify QP to INIT\n"); goto clean_qp; } } return ctx; clean_qp: ibv_destroy_qp(ctx->qp); clean_cq: ibv_destroy_cq(ctx->cq); clean_mr: ibv_dereg_mr(ctx->mr); clean_pd: ibv_dealloc_pd(ctx->pd); clean_comp_channel: if (ctx->channel) ibv_destroy_comp_channel(ctx->channel); clean_device: ibv_close_device(ctx->context); clean_buffer: free(ctx->buf); clean_ctx: free(ctx); return NULL; } static int pp_close_ctx(struct pingpong_context *ctx) { if (ibv_destroy_qp(ctx->qp)) { fprintf(stderr, "Couldn't destroy QP\n"); return 1; } if (ibv_destroy_cq(ctx->cq)) { fprintf(stderr, "Couldn't destroy CQ\n"); return 1; } if (ibv_dereg_mr(ctx->mr)) { fprintf(stderr, "Couldn't deregister MR\n"); return 1; } if (ibv_dealloc_pd(ctx->pd)) { fprintf(stderr, "Couldn't deallocate PD\n"); return 1; } if (ctx->channel) { if (ibv_destroy_comp_channel(ctx->channel)) { fprintf(stderr, "Couldn't destroy completion channel\n"); return 1; } } if (ibv_close_device(ctx->context)) { fprintf(stderr, "Couldn't release context\n"); return 1; } free(ctx->buf); free(ctx); return 0; } static int pp_post_recv(struct pingpong_context *ctx, int n) { struct ibv_sge list = { .addr = (uintptr_t) ctx->buf, .length = ctx->size, .lkey = ctx->mr->lkey }; struct ibv_recv_wr wr = { .wr_id = PINGPONG_RECV_WRID, .sg_list = &list, .num_sge = 1, }; struct ibv_recv_wr *bad_wr; int i; for (i = 0; i < n; ++i) if (ibv_post_recv(ctx->qp, &wr, &bad_wr)) break; return i; } static int pp_post_send(struct pingpong_context *ctx) { struct ibv_sge list = { .addr = (uintptr_t) ctx->buf, .length = ctx->size, .lkey = ctx->mr->lkey }; struct ibv_send_wr wr = { .wr_id = PINGPONG_SEND_WRID, .sg_list = &list, .num_sge = 1, .opcode = IBV_WR_SEND, .send_flags = ctx->send_flags, }; struct ibv_send_wr *bad_wr; return ibv_post_send(ctx->qp, &wr, &bad_wr); } static void usage(const char *argv0) { printf("Usage:\n"); printf(" %s start a server and wait for connection\n", argv0); printf(" %s connect to server at \n", argv0); printf("\n"); printf("Options:\n"); printf(" -p, --port= listen on/connect to port (default 18515)\n"); printf(" -d, --ib-dev= use IB device (default first device found)\n"); printf(" -i, --ib-port= use port of IB device (default 1)\n"); printf(" -s, --size= size of message to exchange (default 4096)\n"); printf(" -m, --mtu= path MTU (default 1024)\n"); printf(" -r, --rx-depth= number of receives to post at a time (default 500)\n"); printf(" -n, --iters= number of exchanges (default 1000)\n"); printf(" -l, --sl= service level value\n"); printf(" -e, --events sleep on CQ events (default poll)\n"); printf(" -g, --gid-idx= local port gid index\n"); } int main(int argc, char *argv[]) { struct ibv_device **dev_list; struct ibv_device *ib_dev; struct pingpong_context *ctx; struct pingpong_dest my_dest; struct pingpong_dest *rem_dest; struct timeval start, end; char *ib_devname = NULL; char *servername = NULL; unsigned int port = 18515; int ib_port = 1; unsigned int size = 4096; enum ibv_mtu mtu = IBV_MTU_1024; unsigned int rx_depth = 500; unsigned int iters = 1000; int use_event = 0; int routs; int rcnt, scnt; int num_cq_events = 0; int sl = 0; int gidx = -1; char gid[33]; srand48(getpid() * time(NULL)); while (1) { int c; static struct option long_options[] = { { .name = "port", .has_arg = 1, .val = 'p' }, { .name = "ib-dev", .has_arg = 1, .val = 'd' }, { .name = "ib-port", .has_arg = 1, .val = 'i' }, { .name = "size", .has_arg = 1, .val = 's' }, { .name = "mtu", .has_arg = 1, .val = 'm' }, { .name = "rx-depth", .has_arg = 1, .val = 'r' }, { .name = "iters", .has_arg = 1, .val = 'n' }, { .name = "sl", .has_arg = 1, .val = 'l' }, { .name = "events", .has_arg = 0, .val = 'e' }, { .name = "gid-idx", .has_arg = 1, .val = 'g' }, {} }; c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:", long_options, NULL); if (c == -1) break; switch (c) { case 'p': port = strtoul(optarg, NULL, 0); if (port > 65535) { usage(argv[0]); return 1; } break; case 'd': ib_devname = strdupa(optarg); break; case 'i': ib_port = strtol(optarg, NULL, 0); if (ib_port < 1) { usage(argv[0]); return 1; } break; case 's': size = strtoul(optarg, NULL, 0); break; case 'm': mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0)); if (mtu == 0) { usage(argv[0]); return 1; } break; case 'r': rx_depth = strtoul(optarg, NULL, 0); break; case 'n': iters = strtoul(optarg, NULL, 0); break; case 'l': sl = strtol(optarg, NULL, 0); break; case 'e': ++use_event; break; case 'g': gidx = strtol(optarg, NULL, 0); break; default: usage(argv[0]); return 1; } } if (optind == argc - 1) servername = strdupa(argv[optind]); else if (optind < argc) { usage(argv[0]); return 1; } page_size = sysconf(_SC_PAGESIZE); dev_list = ibv_get_device_list(NULL); if (!dev_list) { perror("Failed to get IB devices list"); return 1; } if (!ib_devname) { ib_dev = *dev_list; if (!ib_dev) { fprintf(stderr, "No IB devices found\n"); return 1; } } else { int i; for (i = 0; dev_list[i]; ++i) if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname)) break; ib_dev = dev_list[i]; if (!ib_dev) { fprintf(stderr, "IB device %s not found\n", ib_devname); return 1; } } ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event); if (!ctx) return 1; routs = pp_post_recv(ctx, ctx->rx_depth); if (routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", routs); return 1; } if (use_event) if (ibv_req_notify_cq(ctx->cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) { fprintf(stderr, "Couldn't get port info\n"); return 1; } my_dest.lid = ctx->portinfo.lid; if (ctx->portinfo.link_layer != IBV_LINK_LAYER_ETHERNET && !my_dest.lid) { fprintf(stderr, "Couldn't get local LID\n"); return 1; } if (gidx >= 0) { if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest.gid)) { fprintf(stderr, "can't read sgid of index %d\n", gidx); return 1; } } else memset(&my_dest.gid, 0, sizeof my_dest.gid); my_dest.qpn = ctx->qp->qp_num; my_dest.psn = lrand48() & 0xffffff; inet_ntop(AF_INET6, &my_dest.gid, gid, sizeof gid); printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n", my_dest.lid, my_dest.qpn, my_dest.psn, gid); if (servername) rem_dest = pp_client_exch_dest(servername, port, &my_dest); else rem_dest = pp_server_exch_dest(ctx, ib_port, mtu, port, sl, &my_dest, gidx); if (!rem_dest) return 1; inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid); printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n", rem_dest->lid, rem_dest->qpn, rem_dest->psn, gid); if (servername) if (pp_connect_ctx(ctx, ib_port, my_dest.psn, mtu, sl, rem_dest, gidx)) return 1; ctx->pending = PINGPONG_RECV_WRID; if (servername) { if (pp_post_send(ctx)) { fprintf(stderr, "Couldn't post send\n"); return 1; } ctx->pending |= PINGPONG_SEND_WRID; } if (gettimeofday(&start, NULL)) { perror("gettimeofday"); return 1; } rcnt = scnt = 0; while (rcnt < iters || scnt < iters) { if (use_event) { struct ibv_cq *ev_cq; void *ev_ctx; if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) { fprintf(stderr, "Failed to get cq_event\n"); return 1; } ++num_cq_events; if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; } if (ibv_req_notify_cq(ctx->cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } } { struct ibv_wc wc[2]; int ne, i; do { ne = ibv_poll_cq(ctx->cq, 2, wc); if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return 1; } } while (!use_event && ne < 1); for (i = 0; i < ne; ++i) { if (wc[i].status != IBV_WC_SUCCESS) { fprintf(stderr, "Failed status %s (%d) for wr_id %d\n", ibv_wc_status_str(wc[i].status), wc[i].status, (int) wc[i].wr_id); return 1; } switch ((int) wc[i].wr_id) { case PINGPONG_SEND_WRID: ++scnt; break; case PINGPONG_RECV_WRID: if (--routs <= 1) { routs += pp_post_recv(ctx, ctx->rx_depth - routs); if (routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", routs); return 1; } } ++rcnt; break; default: fprintf(stderr, "Completion for unknown wr_id %d\n", (int) wc[i].wr_id); return 1; } ctx->pending &= ~(int) wc[i].wr_id; if (scnt < iters && !ctx->pending) { if (pp_post_send(ctx)) { fprintf(stderr, "Couldn't post send\n"); return 1; } ctx->pending = PINGPONG_RECV_WRID | PINGPONG_SEND_WRID; } } } } if (gettimeofday(&end, NULL)) { perror("gettimeofday"); return 1; } { float usec = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); long long bytes = (long long) size * iters * 2; printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", bytes, usec / 1000000., bytes * 8. / usec); printf("%d iters in %.2f seconds = %.2f usec/iter\n", iters, usec / 1000000., usec / iters); } ibv_ack_cq_events(ctx->cq, num_cq_events); if (pp_close_ctx(ctx)) return 1; ibv_free_device_list(dev_list); free(rem_dest); return 0; } Index: head/contrib/ofed/libibverbs/examples/ud_pingpong.c =================================================================== --- head/contrib/ofed/libibverbs/examples/ud_pingpong.c (revision 363219) +++ head/contrib/ofed/libibverbs/examples/ud_pingpong.c (revision 363220) @@ -1,860 +1,864 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pingpong.h" enum { PINGPONG_RECV_WRID = 1, PINGPONG_SEND_WRID = 2, }; static int page_size; struct pingpong_context { struct ibv_context *context; struct ibv_comp_channel *channel; struct ibv_pd *pd; struct ibv_mr *mr; struct ibv_cq *cq; struct ibv_qp *qp; struct ibv_ah *ah; void *buf; int size; int send_flags; int rx_depth; int pending; struct ibv_port_attr portinfo; }; struct pingpong_dest { int lid; int qpn; int psn; union ibv_gid gid; }; static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, int sl, struct pingpong_dest *dest, int sgid_idx) { struct ibv_ah_attr ah_attr = { .is_global = 0, .dlid = dest->lid, .sl = sl, .src_path_bits = 0, .port_num = port }; struct ibv_qp_attr attr = { .qp_state = IBV_QPS_RTR }; if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE)) { fprintf(stderr, "Failed to modify QP to RTR\n"); return 1; } attr.qp_state = IBV_QPS_RTS; attr.sq_psn = my_psn; if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_SQ_PSN)) { fprintf(stderr, "Failed to modify QP to RTS\n"); return 1; } if (dest->gid.global.interface_id) { ah_attr.is_global = 1; ah_attr.grh.hop_limit = 1; ah_attr.grh.dgid = dest->gid; ah_attr.grh.sgid_index = sgid_idx; } ctx->ah = ibv_create_ah(ctx->pd, &ah_attr); if (!ctx->ah) { fprintf(stderr, "Failed to create AH\n"); return 1; } return 0; } static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port, const struct pingpong_dest *my_dest) { struct addrinfo *res, *t; struct addrinfo hints = { .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; char *service; char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; int n; int sockfd = -1; struct pingpong_dest *rem_dest = NULL; char gid[33]; if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(servername, service, &hints, &res); if (n < 0) { fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port); free(service); return NULL; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); return NULL; } gid_to_wire_gid(&my_dest->gid, gid); sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, my_dest->psn, gid); if (write(sockfd, msg, sizeof msg) != sizeof msg) { fprintf(stderr, "Couldn't send local address\n"); goto out; } if (read(sockfd, msg, sizeof msg) != sizeof msg || write(sockfd, "done", sizeof "done") != sizeof "done") { perror("client read/write"); fprintf(stderr, "Couldn't read/write remote address\n"); goto out; } rem_dest = malloc(sizeof *rem_dest); if (!rem_dest) goto out; sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn, gid); wire_gid_to_gid(gid, &rem_dest->gid); out: close(sockfd); return rem_dest; } static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx, int ib_port, int port, int sl, const struct pingpong_dest *my_dest, int sgid_idx) { struct addrinfo *res, *t; struct addrinfo hints = { .ai_flags = AI_PASSIVE, .ai_family = AF_INET, .ai_socktype = SOCK_STREAM }; char *service; char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"]; int n; int sockfd = -1, connfd; struct pingpong_dest *rem_dest = NULL; char gid[33]; if (asprintf(&service, "%d", port) < 0) return NULL; n = getaddrinfo(NULL, service, &hints, &res); if (n < 0) { fprintf(stderr, "%s for port %d\n", gai_strerror(n), port); free(service); return NULL; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { n = 1; setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't listen to port %d\n", port); return NULL; } - listen(sockfd, 1); + if (listen(sockfd, 1)) { + perror("listen() failed"); + close(sockfd); + return NULL; + } connfd = accept(sockfd, NULL, NULL); close(sockfd); if (connfd < 0) { fprintf(stderr, "accept() failed\n"); return NULL; } n = read(connfd, msg, sizeof msg); if (n != sizeof msg) { perror("server read"); fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg); goto out; } rem_dest = malloc(sizeof *rem_dest); if (!rem_dest) goto out; sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn, gid); wire_gid_to_gid(gid, &rem_dest->gid); if (pp_connect_ctx(ctx, ib_port, my_dest->psn, sl, rem_dest, sgid_idx)) { fprintf(stderr, "Couldn't connect to remote QP\n"); free(rem_dest); rem_dest = NULL; goto out; } gid_to_wire_gid(&my_dest->gid, gid); sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn, my_dest->psn, gid); if (write(connfd, msg, sizeof msg) != sizeof msg || read(connfd, msg, sizeof msg) != sizeof "done") { fprintf(stderr, "Couldn't send/recv local address\n"); free(rem_dest); rem_dest = NULL; goto out; } out: close(connfd); return rem_dest; } static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, int rx_depth, int port, int use_event) { struct pingpong_context *ctx; ctx = malloc(sizeof *ctx); if (!ctx) return NULL; ctx->size = size; ctx->send_flags = IBV_SEND_SIGNALED; ctx->rx_depth = rx_depth; ctx->buf = memalign(page_size, size + 40); if (!ctx->buf) { fprintf(stderr, "Couldn't allocate work buf.\n"); goto clean_ctx; } /* FIXME memset(ctx->buf, 0, size + 40); */ memset(ctx->buf, 0x7b, size + 40); ctx->context = ibv_open_device(ib_dev); if (!ctx->context) { fprintf(stderr, "Couldn't get context for %s\n", ibv_get_device_name(ib_dev)); goto clean_buffer; } { struct ibv_port_attr port_info = {}; int mtu; if (ibv_query_port(ctx->context, port, &port_info)) { fprintf(stderr, "Unable to query port info for port %d\n", port); goto clean_device; } mtu = 1 << (port_info.active_mtu + 7); if (size > mtu) { fprintf(stderr, "Requested size larger than port MTU (%d)\n", mtu); goto clean_device; } } if (use_event) { ctx->channel = ibv_create_comp_channel(ctx->context); if (!ctx->channel) { fprintf(stderr, "Couldn't create completion channel\n"); goto clean_device; } } else ctx->channel = NULL; ctx->pd = ibv_alloc_pd(ctx->context); if (!ctx->pd) { fprintf(stderr, "Couldn't allocate PD\n"); goto clean_comp_channel; } ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size + 40, IBV_ACCESS_LOCAL_WRITE); if (!ctx->mr) { fprintf(stderr, "Couldn't register MR\n"); goto clean_pd; } ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL, ctx->channel, 0); if (!ctx->cq) { fprintf(stderr, "Couldn't create CQ\n"); goto clean_mr; } { struct ibv_qp_attr attr; struct ibv_qp_init_attr init_attr = { .send_cq = ctx->cq, .recv_cq = ctx->cq, .cap = { .max_send_wr = 1, .max_recv_wr = rx_depth, .max_send_sge = 1, .max_recv_sge = 1 }, .qp_type = IBV_QPT_UD, }; ctx->qp = ibv_create_qp(ctx->pd, &init_attr); if (!ctx->qp) { fprintf(stderr, "Couldn't create QP\n"); goto clean_cq; } ibv_query_qp(ctx->qp, &attr, IBV_QP_CAP, &init_attr); if (init_attr.cap.max_inline_data >= size) { ctx->send_flags |= IBV_SEND_INLINE; } } { struct ibv_qp_attr attr = { .qp_state = IBV_QPS_INIT, .pkey_index = 0, .port_num = port, .qkey = 0x11111111 }; if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_QKEY)) { fprintf(stderr, "Failed to modify QP to INIT\n"); goto clean_qp; } } return ctx; clean_qp: ibv_destroy_qp(ctx->qp); clean_cq: ibv_destroy_cq(ctx->cq); clean_mr: ibv_dereg_mr(ctx->mr); clean_pd: ibv_dealloc_pd(ctx->pd); clean_comp_channel: if (ctx->channel) ibv_destroy_comp_channel(ctx->channel); clean_device: ibv_close_device(ctx->context); clean_buffer: free(ctx->buf); clean_ctx: free(ctx); return NULL; } static int pp_close_ctx(struct pingpong_context *ctx) { if (ibv_destroy_qp(ctx->qp)) { fprintf(stderr, "Couldn't destroy QP\n"); return 1; } if (ibv_destroy_cq(ctx->cq)) { fprintf(stderr, "Couldn't destroy CQ\n"); return 1; } if (ibv_dereg_mr(ctx->mr)) { fprintf(stderr, "Couldn't deregister MR\n"); return 1; } if (ibv_destroy_ah(ctx->ah)) { fprintf(stderr, "Couldn't destroy AH\n"); return 1; } if (ibv_dealloc_pd(ctx->pd)) { fprintf(stderr, "Couldn't deallocate PD\n"); return 1; } if (ctx->channel) { if (ibv_destroy_comp_channel(ctx->channel)) { fprintf(stderr, "Couldn't destroy completion channel\n"); return 1; } } if (ibv_close_device(ctx->context)) { fprintf(stderr, "Couldn't release context\n"); return 1; } free(ctx->buf); free(ctx); return 0; } static int pp_post_recv(struct pingpong_context *ctx, int n) { struct ibv_sge list = { .addr = (uintptr_t) ctx->buf, .length = ctx->size + 40, .lkey = ctx->mr->lkey }; struct ibv_recv_wr wr = { .wr_id = PINGPONG_RECV_WRID, .sg_list = &list, .num_sge = 1, }; struct ibv_recv_wr *bad_wr; int i; for (i = 0; i < n; ++i) if (ibv_post_recv(ctx->qp, &wr, &bad_wr)) break; return i; } static int pp_post_send(struct pingpong_context *ctx, uint32_t qpn) { struct ibv_sge list = { .addr = (uintptr_t) ctx->buf + 40, .length = ctx->size, .lkey = ctx->mr->lkey }; struct ibv_send_wr wr = { .wr_id = PINGPONG_SEND_WRID, .sg_list = &list, .num_sge = 1, .opcode = IBV_WR_SEND, .send_flags = ctx->send_flags, .wr = { .ud = { .ah = ctx->ah, .remote_qpn = qpn, .remote_qkey = 0x11111111 } } }; struct ibv_send_wr *bad_wr; return ibv_post_send(ctx->qp, &wr, &bad_wr); } static void usage(const char *argv0) { printf("Usage:\n"); printf(" %s start a server and wait for connection\n", argv0); printf(" %s connect to server at \n", argv0); printf("\n"); printf("Options:\n"); printf(" -p, --port= listen on/connect to port (default 18515)\n"); printf(" -d, --ib-dev= use IB device (default first device found)\n"); printf(" -i, --ib-port= use port of IB device (default 1)\n"); printf(" -s, --size= size of message to exchange (default 2048)\n"); printf(" -r, --rx-depth= number of receives to post at a time (default 500)\n"); printf(" -n, --iters= number of exchanges (default 1000)\n"); printf(" -l, --sl= send messages with service level (default 0)\n"); printf(" -e, --events sleep on CQ events (default poll)\n"); printf(" -g, --gid-idx= local port gid index\n"); } int main(int argc, char *argv[]) { struct ibv_device **dev_list; struct ibv_device *ib_dev; struct pingpong_context *ctx; struct pingpong_dest my_dest; struct pingpong_dest *rem_dest; struct timeval start, end; char *ib_devname = NULL; char *servername = NULL; unsigned int port = 18515; int ib_port = 1; unsigned int size = 2048; unsigned int rx_depth = 500; unsigned int iters = 1000; int use_event = 0; int routs; int rcnt, scnt; int num_cq_events = 0; int sl = 0; int gidx = -1; char gid[33]; srand48(getpid() * time(NULL)); while (1) { int c; static struct option long_options[] = { { .name = "port", .has_arg = 1, .val = 'p' }, { .name = "ib-dev", .has_arg = 1, .val = 'd' }, { .name = "ib-port", .has_arg = 1, .val = 'i' }, { .name = "size", .has_arg = 1, .val = 's' }, { .name = "rx-depth", .has_arg = 1, .val = 'r' }, { .name = "iters", .has_arg = 1, .val = 'n' }, { .name = "sl", .has_arg = 1, .val = 'l' }, { .name = "events", .has_arg = 0, .val = 'e' }, { .name = "gid-idx", .has_arg = 1, .val = 'g' }, {} }; c = getopt_long(argc, argv, "p:d:i:s:r:n:l:eg:", long_options, NULL); if (c == -1) break; switch (c) { case 'p': port = strtol(optarg, NULL, 0); if (port > 65535) { usage(argv[0]); return 1; } break; case 'd': ib_devname = strdupa(optarg); break; case 'i': ib_port = strtol(optarg, NULL, 0); if (ib_port < 1) { usage(argv[0]); return 1; } break; case 's': size = strtoul(optarg, NULL, 0); break; case 'r': rx_depth = strtoul(optarg, NULL, 0); break; case 'n': iters = strtoul(optarg, NULL, 0); break; case 'l': sl = strtol(optarg, NULL, 0); break; case 'e': ++use_event; break; case 'g': gidx = strtol(optarg, NULL, 0); break; default: usage(argv[0]); return 1; } } if (optind == argc - 1) servername = strdupa(argv[optind]); else if (optind < argc) { usage(argv[0]); return 1; } page_size = sysconf(_SC_PAGESIZE); dev_list = ibv_get_device_list(NULL); if (!dev_list) { perror("Failed to get IB devices list"); return 1; } if (!ib_devname) { ib_dev = *dev_list; if (!ib_dev) { fprintf(stderr, "No IB devices found\n"); return 1; } } else { int i; for (i = 0; dev_list[i]; ++i) if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname)) break; ib_dev = dev_list[i]; if (!ib_dev) { fprintf(stderr, "IB device %s not found\n", ib_devname); return 1; } } ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event); if (!ctx) return 1; routs = pp_post_recv(ctx, ctx->rx_depth); if (routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", routs); return 1; } if (use_event) if (ibv_req_notify_cq(ctx->cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) { fprintf(stderr, "Couldn't get port info\n"); return 1; } my_dest.lid = ctx->portinfo.lid; my_dest.qpn = ctx->qp->qp_num; my_dest.psn = lrand48() & 0xffffff; if (gidx >= 0) { if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest.gid)) { fprintf(stderr, "Could not get local gid for gid index " "%d\n", gidx); return 1; } } else memset(&my_dest.gid, 0, sizeof my_dest.gid); inet_ntop(AF_INET6, &my_dest.gid, gid, sizeof gid); printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x: GID %s\n", my_dest.lid, my_dest.qpn, my_dest.psn, gid); if (servername) rem_dest = pp_client_exch_dest(servername, port, &my_dest); else rem_dest = pp_server_exch_dest(ctx, ib_port, port, sl, &my_dest, gidx); if (!rem_dest) return 1; inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid); printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n", rem_dest->lid, rem_dest->qpn, rem_dest->psn, gid); if (servername) if (pp_connect_ctx(ctx, ib_port, my_dest.psn, sl, rem_dest, gidx)) return 1; ctx->pending = PINGPONG_RECV_WRID; if (servername) { if (pp_post_send(ctx, rem_dest->qpn)) { fprintf(stderr, "Couldn't post send\n"); return 1; } ctx->pending |= PINGPONG_SEND_WRID; } if (gettimeofday(&start, NULL)) { perror("gettimeofday"); return 1; } rcnt = scnt = 0; while (rcnt < iters || scnt < iters) { if (use_event) { struct ibv_cq *ev_cq; void *ev_ctx; if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) { fprintf(stderr, "Failed to get cq_event\n"); return 1; } ++num_cq_events; if (ev_cq != ctx->cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; } if (ibv_req_notify_cq(ctx->cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } } { struct ibv_wc wc[2]; int ne, i; do { ne = ibv_poll_cq(ctx->cq, 2, wc); if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return 1; } } while (!use_event && ne < 1); for (i = 0; i < ne; ++i) { if (wc[i].status != IBV_WC_SUCCESS) { fprintf(stderr, "Failed status %s (%d) for wr_id %d\n", ibv_wc_status_str(wc[i].status), wc[i].status, (int) wc[i].wr_id); return 1; } switch ((int) wc[i].wr_id) { case PINGPONG_SEND_WRID: ++scnt; break; case PINGPONG_RECV_WRID: if (--routs <= 1) { routs += pp_post_recv(ctx, ctx->rx_depth - routs); if (routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", routs); return 1; } } ++rcnt; break; default: fprintf(stderr, "Completion for unknown wr_id %d\n", (int) wc[i].wr_id); return 1; } ctx->pending &= ~(int) wc[i].wr_id; if (scnt < iters && !ctx->pending) { if (pp_post_send(ctx, rem_dest->qpn)) { fprintf(stderr, "Couldn't post send\n"); return 1; } ctx->pending = PINGPONG_RECV_WRID | PINGPONG_SEND_WRID; } } } } if (gettimeofday(&end, NULL)) { perror("gettimeofday"); return 1; } { float usec = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); long long bytes = (long long) size * iters * 2; printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", bytes, usec / 1000000., bytes * 8. / usec); printf("%d iters in %.2f seconds = %.2f usec/iter\n", iters, usec / 1000000., usec / iters); } ibv_ack_cq_events(ctx->cq, num_cq_events); if (pp_close_ctx(ctx)) return 1; ibv_free_device_list(dev_list); free(rem_dest); return 0; } Index: head/contrib/ofed/libibverbs/examples/xsrq_pingpong.c =================================================================== --- head/contrib/ofed/libibverbs/examples/xsrq_pingpong.c (revision 363219) +++ head/contrib/ofed/libibverbs/examples/xsrq_pingpong.c (revision 363220) @@ -1,1022 +1,1026 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2011 Intel Corporation, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pingpong.h" #define MSG_FORMAT "%04x:%06x:%06x:%06x:%06x:%32s" #define MSG_SIZE 66 #define MSG_SSCAN "%x:%x:%x:%x:%x:%s" #define ADDR_FORMAT \ "%8s: LID %04x, QPN RECV %06x SEND %06x, PSN %06x, SRQN %06x, GID %s\n" #define TERMINATION_FORMAT "%s" #define TERMINATION_MSG_SIZE 4 #define TERMINATION_MSG "END" static int page_size; struct pingpong_dest { union ibv_gid gid; int lid; int recv_qpn; int send_qpn; int recv_psn; int send_psn; int srqn; int pp_cnt; int sockfd; }; struct pingpong_context { struct ibv_context *context; struct ibv_comp_channel *channel; struct ibv_pd *pd; struct ibv_mr *mr; struct ibv_cq *send_cq; struct ibv_cq *recv_cq; struct ibv_srq *srq; struct ibv_xrcd *xrcd; struct ibv_qp **recv_qp; struct ibv_qp **send_qp; struct pingpong_dest *rem_dest; void *buf; int lid; int sl; enum ibv_mtu mtu; int ib_port; int fd; int size; int num_clients; int num_tests; int use_event; int gidx; }; static struct pingpong_context ctx; static int open_device(char *ib_devname) { struct ibv_device **dev_list; int i = 0; dev_list = ibv_get_device_list(NULL); if (!dev_list) { fprintf(stderr, "Failed to get IB devices list"); return -1; } if (ib_devname) { for (; dev_list[i]; ++i) { if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname)) break; } } if (!dev_list[i]) { fprintf(stderr, "IB device %s not found\n", ib_devname ? ib_devname : ""); return -1; } ctx.context = ibv_open_device(dev_list[i]); if (!ctx.context) { fprintf(stderr, "Couldn't get context for %s\n", ibv_get_device_name(dev_list[i])); return -1; } ibv_free_device_list(dev_list); return 0; } static int create_qps(void) { struct ibv_qp_init_attr_ex init; struct ibv_qp_attr mod; int i; for (i = 0; i < ctx.num_clients; ++i) { memset(&init, 0, sizeof init); init.qp_type = IBV_QPT_XRC_RECV; init.comp_mask = IBV_QP_INIT_ATTR_XRCD; init.xrcd = ctx.xrcd; ctx.recv_qp[i] = ibv_create_qp_ex(ctx.context, &init); if (!ctx.recv_qp[i]) { fprintf(stderr, "Couldn't create recv QP[%d] errno %d\n", i, errno); return 1; } mod.qp_state = IBV_QPS_INIT; mod.pkey_index = 0; mod.port_num = ctx.ib_port; mod.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ; if (ibv_modify_qp(ctx.recv_qp[i], &mod, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { fprintf(stderr, "Failed to modify recv QP[%d] to INIT\n", i); return 1; } memset(&init, 0, sizeof init); init.qp_type = IBV_QPT_XRC_SEND; init.send_cq = ctx.send_cq; init.cap.max_send_wr = ctx.num_clients * ctx.num_tests; init.cap.max_send_sge = 1; init.comp_mask = IBV_QP_INIT_ATTR_PD; init.pd = ctx.pd; ctx.send_qp[i] = ibv_create_qp_ex(ctx.context, &init); if (!ctx.send_qp[i]) { fprintf(stderr, "Couldn't create send QP[%d] errno %d\n", i, errno); return 1; } mod.qp_state = IBV_QPS_INIT; mod.pkey_index = 0; mod.port_num = ctx.ib_port; mod.qp_access_flags = 0; if (ibv_modify_qp(ctx.send_qp[i], &mod, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { fprintf(stderr, "Failed to modify send QP[%d] to INIT\n", i); return 1; } } return 0; } static int pp_init_ctx(char *ib_devname) { struct ibv_srq_init_attr_ex attr; struct ibv_xrcd_init_attr xrcd_attr; struct ibv_port_attr port_attr; ctx.recv_qp = calloc(ctx.num_clients, sizeof *ctx.recv_qp); ctx.send_qp = calloc(ctx.num_clients, sizeof *ctx.send_qp); ctx.rem_dest = calloc(ctx.num_clients, sizeof *ctx.rem_dest); if (!ctx.recv_qp || !ctx.send_qp || !ctx.rem_dest) return 1; if (open_device(ib_devname)) { fprintf(stderr, "Failed to open device\n"); return 1; } if (pp_get_port_info(ctx.context, ctx.ib_port, &port_attr)) { fprintf(stderr, "Failed to get port info\n"); return 1; } ctx.lid = port_attr.lid; if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET && !ctx.lid) { fprintf(stderr, "Couldn't get local LID\n"); return 1; } ctx.buf = memalign(page_size, ctx.size); if (!ctx.buf) { fprintf(stderr, "Couldn't allocate work buf.\n"); return 1; } memset(ctx.buf, 0, ctx.size); if (ctx.use_event) { ctx.channel = ibv_create_comp_channel(ctx.context); if (!ctx.channel) { fprintf(stderr, "Couldn't create completion channel\n"); return 1; } } ctx.pd = ibv_alloc_pd(ctx.context); if (!ctx.pd) { fprintf(stderr, "Couldn't allocate PD\n"); return 1; } ctx.mr = ibv_reg_mr(ctx.pd, ctx.buf, ctx.size, IBV_ACCESS_LOCAL_WRITE); if (!ctx.mr) { fprintf(stderr, "Couldn't register MR\n"); return 1; } ctx.fd = open("/tmp/xrc_domain", O_RDONLY | O_CREAT, S_IRUSR | S_IRGRP); if (ctx.fd < 0) { fprintf(stderr, "Couldn't create the file for the XRC Domain " "but not stopping %d\n", errno); ctx.fd = -1; } memset(&xrcd_attr, 0, sizeof xrcd_attr); xrcd_attr.comp_mask = IBV_XRCD_INIT_ATTR_FD | IBV_XRCD_INIT_ATTR_OFLAGS; xrcd_attr.fd = ctx.fd; xrcd_attr.oflags = O_CREAT; ctx.xrcd = ibv_open_xrcd(ctx.context, &xrcd_attr); if (!ctx.xrcd) { fprintf(stderr, "Couldn't Open the XRC Domain %d\n", errno); return 1; } ctx.recv_cq = ibv_create_cq(ctx.context, ctx.num_clients, &ctx.recv_cq, ctx.channel, 0); if (!ctx.recv_cq) { fprintf(stderr, "Couldn't create recv CQ\n"); return 1; } if (ctx.use_event) { if (ibv_req_notify_cq(ctx.recv_cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } } ctx.send_cq = ibv_create_cq(ctx.context, ctx.num_clients, NULL, NULL, 0); if (!ctx.send_cq) { fprintf(stderr, "Couldn't create send CQ\n"); return 1; } memset(&attr, 0, sizeof attr); attr.attr.max_wr = ctx.num_clients; attr.attr.max_sge = 1; attr.comp_mask = IBV_SRQ_INIT_ATTR_TYPE | IBV_SRQ_INIT_ATTR_XRCD | IBV_SRQ_INIT_ATTR_CQ | IBV_SRQ_INIT_ATTR_PD; attr.srq_type = IBV_SRQT_XRC; attr.xrcd = ctx.xrcd; attr.cq = ctx.recv_cq; attr.pd = ctx.pd; ctx.srq = ibv_create_srq_ex(ctx.context, &attr); if (!ctx.srq) { fprintf(stderr, "Couldn't create SRQ\n"); return 1; } if (create_qps()) return 1; return 0; } static int recv_termination_ack(int index) { char msg[TERMINATION_MSG_SIZE]; int n = 0, r; int sockfd = ctx.rem_dest[index].sockfd; while (n < TERMINATION_MSG_SIZE) { r = read(sockfd, msg + n, TERMINATION_MSG_SIZE - n); if (r < 0) { perror("client read"); fprintf(stderr, "%d/%d: Couldn't read remote termination ack\n", n, TERMINATION_MSG_SIZE); return 1; } n += r; } if (strcmp(msg, TERMINATION_MSG)) { fprintf(stderr, "Invalid termination ack was accepted\n"); return 1; } return 0; } static int send_termination_ack(int index) { char msg[TERMINATION_MSG_SIZE]; int sockfd = ctx.rem_dest[index].sockfd; sprintf(msg, TERMINATION_FORMAT, TERMINATION_MSG); if (write(sockfd, msg, TERMINATION_MSG_SIZE) != TERMINATION_MSG_SIZE) { fprintf(stderr, "Couldn't send termination ack\n"); return 1; } return 0; } static int pp_client_termination(void) { if (send_termination_ack(0)) return 1; if (recv_termination_ack(0)) return 1; return 0; } static int pp_server_termination(void) { int i; for (i = 0; i < ctx.num_clients; i++) { if (recv_termination_ack(i)) return 1; } for (i = 0; i < ctx.num_clients; i++) { if (send_termination_ack(i)) return 1; } return 0; } static int send_local_dest(int sockfd, int index) { char msg[MSG_SIZE]; char gid[33]; uint32_t srq_num; union ibv_gid local_gid; if (ctx.gidx >= 0) { if (ibv_query_gid(ctx.context, ctx.ib_port, ctx.gidx, &local_gid)) { fprintf(stderr, "can't read sgid of index %d\n", ctx.gidx); return -1; } } else { memset(&local_gid, 0, sizeof(local_gid)); } ctx.rem_dest[index].recv_psn = lrand48() & 0xffffff; if (ibv_get_srq_num(ctx.srq, &srq_num)) { fprintf(stderr, "Couldn't get SRQ num\n"); return -1; } inet_ntop(AF_INET6, &local_gid, gid, sizeof(gid)); printf(ADDR_FORMAT, "local", ctx.lid, ctx.recv_qp[index]->qp_num, ctx.send_qp[index]->qp_num, ctx.rem_dest[index].recv_psn, srq_num, gid); gid_to_wire_gid(&local_gid, gid); sprintf(msg, MSG_FORMAT, ctx.lid, ctx.recv_qp[index]->qp_num, ctx.send_qp[index]->qp_num, ctx.rem_dest[index].recv_psn, srq_num, gid); if (write(sockfd, msg, MSG_SIZE) != MSG_SIZE) { fprintf(stderr, "Couldn't send local address\n"); return -1; } return 0; } static int recv_remote_dest(int sockfd, int index) { struct pingpong_dest *rem_dest; char msg[MSG_SIZE]; char gid[33]; int n = 0, r; while (n < MSG_SIZE) { r = read(sockfd, msg + n, MSG_SIZE - n); if (r < 0) { perror("client read"); fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n", n, MSG_SIZE, index); return -1; } n += r; } rem_dest = &ctx.rem_dest[index]; sscanf(msg, MSG_SSCAN, &rem_dest->lid, &rem_dest->recv_qpn, &rem_dest->send_qpn, &rem_dest->send_psn, &rem_dest->srqn, gid); wire_gid_to_gid(gid, &rem_dest->gid); inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof(gid)); printf(ADDR_FORMAT, "remote", rem_dest->lid, rem_dest->recv_qpn, rem_dest->send_qpn, rem_dest->send_psn, rem_dest->srqn, gid); rem_dest->sockfd = sockfd; return 0; } static void set_ah_attr(struct ibv_ah_attr *attr, struct pingpong_context *myctx, int index) { attr->is_global = 1; attr->grh.hop_limit = 5; attr->grh.dgid = myctx->rem_dest[index].gid; attr->grh.sgid_index = myctx->gidx; } static int connect_qps(int index) { struct ibv_qp_attr attr; memset(&attr, 0, sizeof attr); attr.qp_state = IBV_QPS_RTR; attr.dest_qp_num = ctx.rem_dest[index].send_qpn; attr.path_mtu = ctx.mtu; attr.rq_psn = ctx.rem_dest[index].send_psn; attr.min_rnr_timer = 12; attr.ah_attr.dlid = ctx.rem_dest[index].lid; attr.ah_attr.sl = ctx.sl; attr.ah_attr.port_num = ctx.ib_port; if (ctx.rem_dest[index].gid.global.interface_id) set_ah_attr(&attr.ah_attr, &ctx, index); if (ibv_modify_qp(ctx.recv_qp[index], &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER)) { fprintf(stderr, "Failed to modify recv QP[%d] to RTR\n", index); return 1; } memset(&attr, 0, sizeof attr); attr.qp_state = IBV_QPS_RTS; attr.timeout = 14; attr.sq_psn = ctx.rem_dest[index].recv_psn; if (ibv_modify_qp(ctx.recv_qp[index], &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_SQ_PSN)) { fprintf(stderr, "Failed to modify recv QP[%d] to RTS\n", index); return 1; } memset(&attr, 0, sizeof attr); attr.qp_state = IBV_QPS_RTR; attr.dest_qp_num = ctx.rem_dest[index].recv_qpn; attr.path_mtu = ctx.mtu; attr.rq_psn = ctx.rem_dest[index].send_psn; attr.ah_attr.dlid = ctx.rem_dest[index].lid; attr.ah_attr.sl = ctx.sl; attr.ah_attr.port_num = ctx.ib_port; if (ctx.rem_dest[index].gid.global.interface_id) set_ah_attr(&attr.ah_attr, &ctx, index); if (ibv_modify_qp(ctx.send_qp[index], &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN)) { fprintf(stderr, "Failed to modify send QP[%d] to RTR\n", index); return 1; } memset(&attr, 0, sizeof attr); attr.qp_state = IBV_QPS_RTS; attr.timeout = 14; attr.retry_cnt = 7; attr.rnr_retry = 7; attr.sq_psn = ctx.rem_dest[index].recv_psn; if (ibv_modify_qp(ctx.send_qp[index], &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_SQ_PSN | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC)) { fprintf(stderr, "Failed to modify send QP[%d] to RTS\n", index); return 1; } return 0; } static int pp_client_connect(const char *servername, int port) { struct addrinfo *res, *t; char *service; int ret; int sockfd = -1; struct addrinfo hints = { .ai_family = AF_UNSPEC, .ai_socktype = SOCK_STREAM }; if (asprintf(&service, "%d", port) < 0) return 1; ret = getaddrinfo(servername, service, &hints, &res); if (ret < 0) { fprintf(stderr, "%s for %s:%d\n", gai_strerror(ret), servername, port); free(service); return 1; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); return 1; } if (send_local_dest(sockfd, 0)) return 1; if (recv_remote_dest(sockfd, 0)) return 1; if (connect_qps(0)) return 1; return 0; } static int pp_server_connect(int port) { struct addrinfo *res, *t; char *service; int ret, i, n; int sockfd = -1, connfd; struct addrinfo hints = { .ai_flags = AI_PASSIVE, .ai_family = AF_INET, .ai_socktype = SOCK_STREAM }; if (asprintf(&service, "%d", port) < 0) return 1; ret = getaddrinfo(NULL, service, &hints, &res); if (ret < 0) { fprintf(stderr, "%s for port %d\n", gai_strerror(ret), port); free(service); return 1; } for (t = res; t; t = t->ai_next) { sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); if (sockfd >= 0) { n = 1; setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) break; close(sockfd); sockfd = -1; } } freeaddrinfo_null(res); free(service); if (sockfd < 0) { fprintf(stderr, "Couldn't listen to port %d\n", port); return 1; } - listen(sockfd, ctx.num_clients); + if (listen(sockfd, ctx.num_clients)) { + perror("listen() failed"); + close(sockfd); + return 1; + } for (i = 0; i < ctx.num_clients; i++) { connfd = accept(sockfd, NULL, NULL); if (connfd < 0) { fprintf(stderr, "accept() failed for client %d\n", i); return 1; } if (recv_remote_dest(connfd, i)) return 1; if (send_local_dest(connfd, i)) return 1; if (connect_qps(i)) return 1; } close(sockfd); return 0; } static int pp_close_ctx(void) { int i; for (i = 0; i < ctx.num_clients; ++i) { if (ibv_destroy_qp(ctx.send_qp[i])) { fprintf(stderr, "Couldn't destroy INI QP[%d]\n", i); return 1; } if (ibv_destroy_qp(ctx.recv_qp[i])) { fprintf(stderr, "Couldn't destroy TGT QP[%d]\n", i); return 1; } if (ctx.rem_dest[i].sockfd) close(ctx.rem_dest[i].sockfd); } if (ibv_destroy_srq(ctx.srq)) { fprintf(stderr, "Couldn't destroy SRQ\n"); return 1; } if (ctx.xrcd && ibv_close_xrcd(ctx.xrcd)) { fprintf(stderr, "Couldn't close the XRC Domain\n"); return 1; } if (ctx.fd >= 0 && close(ctx.fd)) { fprintf(stderr, "Couldn't close the file for the XRC Domain\n"); return 1; } if (ibv_destroy_cq(ctx.send_cq)) { fprintf(stderr, "Couldn't destroy send CQ\n"); return 1; } if (ibv_destroy_cq(ctx.recv_cq)) { fprintf(stderr, "Couldn't destroy recv CQ\n"); return 1; } if (ibv_dereg_mr(ctx.mr)) { fprintf(stderr, "Couldn't deregister MR\n"); return 1; } if (ibv_dealloc_pd(ctx.pd)) { fprintf(stderr, "Couldn't deallocate PD\n"); return 1; } if (ctx.channel) { if (ibv_destroy_comp_channel(ctx.channel)) { fprintf(stderr, "Couldn't destroy completion channel\n"); return 1; } } if (ibv_close_device(ctx.context)) { fprintf(stderr, "Couldn't release context\n"); return 1; } free(ctx.buf); free(ctx.rem_dest); free(ctx.send_qp); free(ctx.recv_qp); return 0; } static int pp_post_recv(int cnt) { struct ibv_sge sge; struct ibv_recv_wr wr, *bad_wr; sge.addr = (uintptr_t) ctx.buf; sge.length = ctx.size; sge.lkey = ctx.mr->lkey; wr.next = NULL; wr.wr_id = (uintptr_t) &ctx; wr.sg_list = &sge; wr.num_sge = 1; while (cnt--) { if (ibv_post_srq_recv(ctx.srq, &wr, &bad_wr)) { fprintf(stderr, "Failed to post receive to SRQ\n"); return 1; } } return 0; } /* * Send to each client round robin on each set of xrc send/recv qp. * Generate a completion on the last send. */ static int pp_post_send(int index) { struct ibv_sge sge; struct ibv_send_wr wr, *bad_wr; int qpi; sge.addr = (uintptr_t) ctx.buf; sge.length = ctx.size; sge.lkey = ctx.mr->lkey; wr.wr_id = (uintptr_t) index; wr.next = NULL; wr.sg_list = &sge; wr.num_sge = 1; wr.opcode = IBV_WR_SEND; wr.qp_type.xrc.remote_srqn = ctx.rem_dest[index].srqn; qpi = (index + ctx.rem_dest[index].pp_cnt) % ctx.num_clients; wr.send_flags = (++ctx.rem_dest[index].pp_cnt >= ctx.num_tests) ? IBV_SEND_SIGNALED : 0; return ibv_post_send(ctx.send_qp[qpi], &wr, &bad_wr); } static int find_qp(int qpn) { int i; if (ctx.num_clients == 1) return 0; for (i = 0; i < ctx.num_clients; ++i) if (ctx.recv_qp[i]->qp_num == qpn) return i; fprintf(stderr, "Unable to find qp %x\n", qpn); return 0; } static int get_cq_event(void) { struct ibv_cq *ev_cq; void *ev_ctx; if (ibv_get_cq_event(ctx.channel, &ev_cq, &ev_ctx)) { fprintf(stderr, "Failed to get cq_event\n"); return 1; } if (ev_cq != ctx.recv_cq) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; } if (ibv_req_notify_cq(ctx.recv_cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } return 0; } static void init(void) { srand48(getpid() * time(NULL)); ctx.size = 4096; ctx.ib_port = 1; ctx.num_clients = 1; ctx.num_tests = 5; ctx.mtu = IBV_MTU_2048; ctx.sl = 0; ctx.gidx = -1; } static void usage(const char *argv0) { printf("Usage:\n"); printf(" %s start a server and wait for connection\n", argv0); printf(" %s connect to server at \n", argv0); printf("\n"); printf("Options:\n"); printf(" -p, --port= listen on/connect to port (default 18515)\n"); printf(" -d, --ib-dev= use IB device (default first device found)\n"); printf(" -i, --ib-port= use port of IB device (default 1)\n"); printf(" -s, --size= size of message to exchange (default 4096)\n"); printf(" -m, --mtu= path MTU (default 2048)\n"); printf(" -c, --clients= number of clients (on server only, default 1)\n"); printf(" -n, --num_tests= number of tests per client (default 5)\n"); printf(" -l, --sl= service level value\n"); printf(" -e, --events sleep on CQ events (default poll)\n"); printf(" -g, --gid-idx= local port gid index\n"); } int main(int argc, char *argv[]) { char *ib_devname = NULL; char *servername = NULL; int port = 18515; int i, total, cnt = 0; int ne, qpi, num_cq_events = 0; struct ibv_wc wc; init(); while (1) { int c; static struct option long_options[] = { { .name = "port", .has_arg = 1, .val = 'p' }, { .name = "ib-dev", .has_arg = 1, .val = 'd' }, { .name = "ib-port", .has_arg = 1, .val = 'i' }, { .name = "size", .has_arg = 1, .val = 's' }, { .name = "mtu", .has_arg = 1, .val = 'm' }, { .name = "clients", .has_arg = 1, .val = 'c' }, { .name = "num_tests", .has_arg = 1, .val = 'n' }, { .name = "sl", .has_arg = 1, .val = 'l' }, { .name = "events", .has_arg = 0, .val = 'e' }, { .name = "gid-idx", .has_arg = 1, .val = 'g' }, {} }; c = getopt_long(argc, argv, "p:d:i:s:m:c:n:l:eg:", long_options, NULL); if (c == -1) break; switch (c) { case 'p': port = strtol(optarg, NULL, 0); if (port < 0 || port > 65535) { usage(argv[0]); return 1; } break; case 'd': ib_devname = strdupa(optarg); break; case 'i': ctx.ib_port = strtol(optarg, NULL, 0); if (ctx.ib_port < 0) { usage(argv[0]); return 1; } break; case 's': ctx.size = strtol(optarg, NULL, 0); break; case 'm': ctx.mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0)); if (ctx.mtu == 0) { usage(argv[0]); return 1; } break; case 'c': ctx.num_clients = strtol(optarg, NULL, 0); break; case 'n': ctx.num_tests = strtol(optarg, NULL, 0); break; case 'l': ctx.sl = strtol(optarg, NULL, 0); break; case 'g': ctx.gidx = strtol(optarg, NULL, 0); break; case 'e': ctx.use_event = 1; break; default: usage(argv[0]); return 1; } } if (optind == argc - 1) { servername = strdupa(argv[optind]); ctx.num_clients = 1; } else if (optind < argc) { usage(argv[0]); return 1; } page_size = sysconf(_SC_PAGESIZE); if (pp_init_ctx(ib_devname)) return 1; if (pp_post_recv(ctx.num_clients)) { fprintf(stderr, "Couldn't post receives\n"); return 1; } if (servername) { if (pp_client_connect(servername, port)) return 1; } else { if (pp_server_connect(port)) return 1; for (i = 0; i < ctx.num_clients; i++) pp_post_send(i); } total = ctx.num_clients * ctx.num_tests; while (cnt < total) { if (ctx.use_event) { if (get_cq_event()) return 1; ++num_cq_events; } do { ne = ibv_poll_cq(ctx.recv_cq, 1, &wc); if (ne < 0) { fprintf(stderr, "Error polling cq %d\n", ne); return 1; } else if (ne == 0) { break; } if (wc.status) { fprintf(stderr, "Work completion error %d\n", wc.status); return 1; } pp_post_recv(ne); qpi = find_qp(wc.qp_num); if (ctx.rem_dest[qpi].pp_cnt < ctx.num_tests) pp_post_send(qpi); cnt += ne; } while (ne > 0); } for (cnt = 0; cnt < ctx.num_clients; cnt += ne) { ne = ibv_poll_cq(ctx.send_cq, 1, &wc); if (ne < 0) { fprintf(stderr, "Error polling cq %d\n", ne); return 1; } } if (ctx.use_event) ibv_ack_cq_events(ctx.recv_cq, num_cq_events); /* Process should get an ack from the daemon to close its resources to * make sure latest daemon's response sent via its target QP destined * to an XSRQ created by another client won't be lost. * Failure to do so may cause the other client to wait for that sent * message forever. See comment on pp_post_send. */ if (servername) { if (pp_client_termination()) return 1; } else if (pp_server_termination()) { return 1; } if (pp_close_ctx()) return 1; printf("success\n"); return 0; } Index: head/contrib/ofed/libmlx5/bitmap.h =================================================================== --- head/contrib/ofed/libmlx5/bitmap.h (revision 363219) +++ head/contrib/ofed/libmlx5/bitmap.h (revision 363220) @@ -1,111 +1,111 @@ /* * Copyright (c) 2000, 2011 Mellanox Technology Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef BITMAP_H #define BITMAP_H #include #include #include #include #include #include #include #include #include #include "mlx5.h" /* Only ia64 requires this */ #ifdef __ia64__ #define MLX5_SHM_ADDR ((void *)0x8000000000000000UL) #define MLX5_SHMAT_FLAGS (SHM_RND) #else #define MLX5_SHM_ADDR NULL #define MLX5_SHMAT_FLAGS 0 #endif #define BITS_PER_LONG (8 * sizeof(long)) #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) #ifndef HPAGE_SIZE #define HPAGE_SIZE (2UL * 1024 * 1024) #endif #define MLX5_SHM_LENGTH HPAGE_SIZE #define MLX5_Q_CHUNK_SIZE 32768 #define MLX5_SHM_NUM_REGION 64 static inline unsigned long mlx5_ffz(uint32_t word) { return __builtin_ffs(~word) - 1; } static inline uint32_t mlx5_find_first_zero_bit(const unsigned long *addr, uint32_t size) { const unsigned long *p = addr; uint32_t result = 0; unsigned long tmp; while (size & ~(BITS_PER_LONG - 1)) { tmp = *(p++); if (~tmp) goto found; result += BITS_PER_LONG; size -= BITS_PER_LONG; } if (!size) return result; tmp = (*p) | (~0UL << size); if (tmp == (uint32_t)~0UL) /* Are any bits zero? */ return result + size; /* Nope. */ found: return result + mlx5_ffz(tmp); } static inline void mlx5_set_bit(unsigned int nr, unsigned long *addr) { - addr[(nr / BITS_PER_LONG)] |= (1 << (nr % BITS_PER_LONG)); + addr[(nr / BITS_PER_LONG)] |= (1UL << (nr % BITS_PER_LONG)); } -static inline void mlx5_clear_bit(unsigned int nr, unsigned long *addr) +static inline void mlx5_clear_bit(unsigned int nr, unsigned long *addr) { - addr[(nr / BITS_PER_LONG)] &= ~(1 << (nr % BITS_PER_LONG)); + addr[(nr / BITS_PER_LONG)] &= ~(1UL << (nr % BITS_PER_LONG)); } static inline int mlx5_test_bit(unsigned int nr, const unsigned long *addr) { - return !!(addr[(nr / BITS_PER_LONG)] & (1 << (nr % BITS_PER_LONG))); + return !!(addr[(nr / BITS_PER_LONG)] & (1UL << (nr % BITS_PER_LONG))); } #endif Index: head/contrib/ofed/librdmacm/examples/mckey.c =================================================================== --- head/contrib/ofed/librdmacm/examples/mckey.c (revision 363219) +++ head/contrib/ofed/librdmacm/examples/mckey.c (revision 363220) @@ -1,631 +1,630 @@ /* * Copyright (c) 2005-2007 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * $Id$ */ #include #include #include #include #include #include #include #include #include #include #include #include struct cmatest_node { int id; struct rdma_cm_id *cma_id; int connected; struct ibv_pd *pd; struct ibv_cq *cq; struct ibv_mr *mr; struct ibv_ah *ah; uint32_t remote_qpn; uint32_t remote_qkey; void *mem; }; struct cmatest { struct rdma_event_channel *channel; pthread_t cmathread; struct cmatest_node *nodes; int conn_index; int connects_left; struct sockaddr_storage dst_in; struct sockaddr *dst_addr; struct sockaddr_storage src_in; struct sockaddr *src_addr; }; static struct cmatest test; static int connections = 1; static int message_size = 100; static int message_count = 10; static int is_sender; static int unmapped_addr; static char *dst_addr; static char *src_addr; static enum rdma_port_space port_space = RDMA_PS_UDP; static int create_message(struct cmatest_node *node) { if (!message_size) message_count = 0; if (!message_count) return 0; node->mem = malloc(message_size + sizeof(struct ibv_grh)); if (!node->mem) { printf("failed message allocation\n"); return -1; } node->mr = ibv_reg_mr(node->pd, node->mem, message_size + sizeof(struct ibv_grh), IBV_ACCESS_LOCAL_WRITE); if (!node->mr) { printf("failed to reg MR\n"); goto err; } return 0; err: free(node->mem); return -1; } static int verify_test_params(struct cmatest_node *node) { struct ibv_port_attr port_attr; int ret; ret = ibv_query_port(node->cma_id->verbs, node->cma_id->port_num, &port_attr); if (ret) return ret; if (message_count && message_size > (1 << (port_attr.active_mtu + 7))) { printf("mckey: message_size %d is larger than active mtu %d\n", message_size, 1 << (port_attr.active_mtu + 7)); return -EINVAL; } return 0; } static int init_node(struct cmatest_node *node) { struct ibv_qp_init_attr init_qp_attr; int cqe, ret; node->pd = ibv_alloc_pd(node->cma_id->verbs); if (!node->pd) { ret = -ENOMEM; printf("mckey: unable to allocate PD\n"); goto out; } cqe = message_count ? message_count * 2 : 2; node->cq = ibv_create_cq(node->cma_id->verbs, cqe, node, NULL, 0); if (!node->cq) { ret = -ENOMEM; printf("mckey: unable to create CQ\n"); goto out; } memset(&init_qp_attr, 0, sizeof init_qp_attr); init_qp_attr.cap.max_send_wr = message_count ? message_count : 1; init_qp_attr.cap.max_recv_wr = message_count ? message_count : 1; init_qp_attr.cap.max_send_sge = 1; init_qp_attr.cap.max_recv_sge = 1; init_qp_attr.qp_context = node; init_qp_attr.sq_sig_all = 0; init_qp_attr.qp_type = IBV_QPT_UD; init_qp_attr.send_cq = node->cq; init_qp_attr.recv_cq = node->cq; ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr); if (ret) { perror("mckey: unable to create QP"); goto out; } ret = create_message(node); if (ret) { printf("mckey: failed to create messages: %d\n", ret); goto out; } out: return ret; } static int post_recvs(struct cmatest_node *node) { struct ibv_recv_wr recv_wr, *recv_failure; struct ibv_sge sge; int i, ret = 0; if (!message_count) return 0; recv_wr.next = NULL; recv_wr.sg_list = &sge; recv_wr.num_sge = 1; recv_wr.wr_id = (uintptr_t) node; sge.length = message_size + sizeof(struct ibv_grh); sge.lkey = node->mr->lkey; sge.addr = (uintptr_t) node->mem; for (i = 0; i < message_count && !ret; i++ ) { ret = ibv_post_recv(node->cma_id->qp, &recv_wr, &recv_failure); if (ret) { printf("failed to post receives: %d\n", ret); break; } } return ret; } static int post_sends(struct cmatest_node *node, int signal_flag) { struct ibv_send_wr send_wr, *bad_send_wr; struct ibv_sge sge; int i, ret = 0; if (!node->connected || !message_count) return 0; send_wr.next = NULL; send_wr.sg_list = &sge; send_wr.num_sge = 1; send_wr.opcode = IBV_WR_SEND_WITH_IMM; send_wr.send_flags = signal_flag; send_wr.wr_id = (unsigned long)node; send_wr.imm_data = htobe32(node->cma_id->qp->qp_num); send_wr.wr.ud.ah = node->ah; send_wr.wr.ud.remote_qpn = node->remote_qpn; send_wr.wr.ud.remote_qkey = node->remote_qkey; sge.length = message_size; sge.lkey = node->mr->lkey; sge.addr = (uintptr_t) node->mem; for (i = 0; i < message_count && !ret; i++) { ret = ibv_post_send(node->cma_id->qp, &send_wr, &bad_send_wr); if (ret) printf("failed to post sends: %d\n", ret); } return ret; } static void connect_error(void) { test.connects_left--; } static int addr_handler(struct cmatest_node *node) { int ret; ret = verify_test_params(node); if (ret) goto err; ret = init_node(node); if (ret) goto err; if (!is_sender) { ret = post_recvs(node); if (ret) goto err; } ret = rdma_join_multicast(node->cma_id, test.dst_addr, node); if (ret) { perror("mckey: failure joining"); goto err; } return 0; err: connect_error(); return ret; } static int join_handler(struct cmatest_node *node, struct rdma_ud_param *param) { char buf[40]; inet_ntop(AF_INET6, param->ah_attr.grh.dgid.raw, buf, 40); printf("mckey: joined dgid: %s mlid 0x%x sl %d\n", buf, param->ah_attr.dlid, param->ah_attr.sl); node->remote_qpn = param->qp_num; node->remote_qkey = param->qkey; node->ah = ibv_create_ah(node->pd, ¶m->ah_attr); if (!node->ah) { printf("mckey: failure creating address handle\n"); goto err; } node->connected = 1; test.connects_left--; return 0; err: connect_error(); return -1; } static int cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { int ret = 0; switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: ret = addr_handler(cma_id->context); break; case RDMA_CM_EVENT_MULTICAST_JOIN: ret = join_handler(cma_id->context, &event->param.ud); break; case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR: case RDMA_CM_EVENT_MULTICAST_ERROR: printf("mckey: event: %s, error: %d\n", rdma_event_str(event->event), event->status); connect_error(); ret = event->status; break; case RDMA_CM_EVENT_DEVICE_REMOVAL: /* Cleanup will occur after test completes. */ break; default: break; } return ret; } static void *cma_thread(void *arg) { struct rdma_cm_event *event; int ret; while (1) { ret = rdma_get_cm_event(test.channel, &event); if (ret) { perror("rdma_get_cm_event"); break; } switch (event->event) { case RDMA_CM_EVENT_MULTICAST_ERROR: case RDMA_CM_EVENT_ADDR_CHANGE: printf("mckey: event: %s, status: %d\n", rdma_event_str(event->event), event->status); break; default: break; } rdma_ack_cm_event(event); } return NULL; } static void destroy_node(struct cmatest_node *node) { if (!node->cma_id) return; if (node->ah) ibv_destroy_ah(node->ah); if (node->cma_id->qp) rdma_destroy_qp(node->cma_id); if (node->cq) ibv_destroy_cq(node->cq); if (node->mem) { ibv_dereg_mr(node->mr); free(node->mem); } if (node->pd) ibv_dealloc_pd(node->pd); /* Destroy the RDMA ID after all device resources */ rdma_destroy_id(node->cma_id); } static int alloc_nodes(void) { int ret, i; test.nodes = malloc(sizeof *test.nodes * connections); if (!test.nodes) { printf("mckey: unable to allocate memory for test nodes\n"); return -ENOMEM; } memset(test.nodes, 0, sizeof *test.nodes * connections); for (i = 0; i < connections; i++) { test.nodes[i].id = i; ret = rdma_create_id(test.channel, &test.nodes[i].cma_id, &test.nodes[i], port_space); if (ret) goto err; } return 0; err: while (--i >= 0) rdma_destroy_id(test.nodes[i].cma_id); free(test.nodes); return ret; } static void destroy_nodes(void) { int i; for (i = 0; i < connections; i++) destroy_node(&test.nodes[i]); free(test.nodes); } static int poll_cqs(void) { struct ibv_wc wc[8]; int done, i, ret; for (i = 0; i < connections; i++) { if (!test.nodes[i].connected) continue; for (done = 0; done < message_count; done += ret) { ret = ibv_poll_cq(test.nodes[i].cq, 8, wc); if (ret < 0) { printf("mckey: failed polling CQ: %d\n", ret); return ret; } } } return 0; } static int connect_events(void) { struct rdma_cm_event *event; int ret = 0; while (test.connects_left && !ret) { ret = rdma_get_cm_event(test.channel, &event); if (!ret) { ret = cma_handler(event->id, event); rdma_ack_cm_event(event); } } return ret; } static int get_addr(char *dst, struct sockaddr *addr) { struct addrinfo *res; int ret; ret = getaddrinfo(dst, NULL, NULL, &res); if (ret) { printf("getaddrinfo failed (%s) - invalid hostname or IP address\n", gai_strerror(ret)); return ret; } memcpy(addr, res->ai_addr, res->ai_addrlen); freeaddrinfo(res); return ret; } static int get_dst_addr(char *dst, struct sockaddr *addr) { struct sockaddr_ib *sib; if (!unmapped_addr) return get_addr(dst, addr); sib = (struct sockaddr_ib *) addr; memset(sib, 0, sizeof *sib); sib->sib_family = AF_IB; - inet_pton(AF_INET6, dst, &sib->sib_addr); - return 0; + return inet_pton(AF_INET6, dst, &sib->sib_addr) != 1; } static int run(void) { int i, ret, err; printf("mckey: starting %s\n", is_sender ? "client" : "server"); if (src_addr) { ret = get_addr(src_addr, (struct sockaddr *) &test.src_in); if (ret) return ret; } ret = get_dst_addr(dst_addr, (struct sockaddr *) &test.dst_in); if (ret) return ret; printf("mckey: joining\n"); for (i = 0; i < connections; i++) { if (src_addr) { ret = rdma_bind_addr(test.nodes[i].cma_id, test.src_addr); if (ret) { perror("mckey: addr bind failure"); connect_error(); return ret; } } if (unmapped_addr) ret = addr_handler(&test.nodes[i]); else ret = rdma_resolve_addr(test.nodes[i].cma_id, test.src_addr, test.dst_addr, 2000); if (ret) { perror("mckey: resolve addr failure"); connect_error(); return ret; } } ret = connect_events(); if (ret) goto out; pthread_create(&test.cmathread, NULL, cma_thread, NULL); /* * Pause to give SM chance to configure switches. We don't want to * handle reliability issue in this simple test program. */ sleep(3); if (message_count) { if (is_sender) { printf("initiating data transfers\n"); for (i = 0; i < connections; i++) { ret = post_sends(&test.nodes[i], 0); if (ret) goto out; } } else { printf("receiving data transfers\n"); ret = poll_cqs(); if (ret) goto out; } printf("data transfers complete\n"); } out: for (i = 0; i < connections; i++) { err = rdma_leave_multicast(test.nodes[i].cma_id, test.dst_addr); if (err) { perror("mckey: failure leaving"); ret = err; } } return ret; } int main(int argc, char **argv) { int op, ret; while ((op = getopt(argc, argv, "m:M:sb:c:C:S:p:")) != -1) { switch (op) { case 'm': dst_addr = optarg; break; case 'M': unmapped_addr = 1; dst_addr = optarg; break; case 's': is_sender = 1; break; case 'b': src_addr = optarg; test.src_addr = (struct sockaddr *) &test.src_in; break; case 'c': connections = atoi(optarg); break; case 'C': message_count = atoi(optarg); break; case 'S': message_size = atoi(optarg); break; case 'p': port_space = strtol(optarg, NULL, 0); break; default: printf("usage: %s\n", argv[0]); printf("\t-m multicast_address\n"); printf("\t[-M unmapped_multicast_address]\n" "\t replaces -m and requires -b\n"); printf("\t[-s(ender)]\n"); printf("\t[-b bind_address]\n"); printf("\t[-c connections]\n"); printf("\t[-C message_count]\n"); printf("\t[-S message_size]\n"); printf("\t[-p port_space - %#x for UDP (default), " "%#x for IPOIB]\n", RDMA_PS_UDP, RDMA_PS_IPOIB); exit(1); } } if (unmapped_addr && !src_addr) { printf("unmapped multicast address requires binding " "to source address\n"); exit(1); } test.dst_addr = (struct sockaddr *) &test.dst_in; test.connects_left = connections; test.channel = rdma_create_event_channel(); if (!test.channel) { perror("failed to create event channel"); exit(1); } if (alloc_nodes()) exit(1); ret = run(); printf("test complete\n"); destroy_nodes(); rdma_destroy_event_channel(test.channel); printf("return status %d\n", ret); return ret; } Index: head/contrib/ofed/opensm/opensm/osm_perfmgr.c =================================================================== --- head/contrib/ofed/opensm/opensm/osm_perfmgr.c (revision 363219) +++ head/contrib/ofed/opensm/opensm/osm_perfmgr.c (revision 363220) @@ -1,2035 +1,2037 @@ /* * Copyright (c) 2007 The Regents of the University of California. * Copyright (c) 2007-2009 Voltaire, Inc. All rights reserved. * Copyright (c) 2009,2010 HNR Consulting. All rights reserved. * Copyright (c) 2013 Lawrence Livermore National Security. All rights reserved. * Copyright (c) 2011-2014 Mellanox Technologies LTD. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ /* * Abstract: * Implementation of osm_perfmgr_t. * This object implements an IBA performance manager. * * Author: * Ira Weiny, LLNL */ #if HAVE_CONFIG_H # include #endif /* HAVE_CONFIG_H */ #ifdef ENABLE_OSM_PERF_MGR #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define FILE_ID OSM_FILE_PERFMGR_C #include #include #include #include #include #include #define PERFMGR_INITIAL_TID_VALUE 0xcafe #ifdef ENABLE_OSM_PERF_MGR_PROFILE struct { double fastest_us; double slowest_us; double avg_us; uint64_t num; } perfmgr_mad_stats = { fastest_us: DBL_MAX, slowest_us: DBL_MIN, avg_us: 0, num:0}; /* diff must be something which can fit in a susecond_t */ static inline void update_mad_stats(struct timeval *diff) { double new = (diff->tv_sec * 1000000) + diff->tv_usec; if (new < perfmgr_mad_stats.fastest_us) perfmgr_mad_stats.fastest_us = new; if (new > perfmgr_mad_stats.slowest_us) perfmgr_mad_stats.slowest_us = new; perfmgr_mad_stats.avg_us = ((perfmgr_mad_stats.avg_us * perfmgr_mad_stats.num) + new) / (perfmgr_mad_stats.num + 1); perfmgr_mad_stats.num++; } static inline void clear_mad_stats(void) { perfmgr_mad_stats.fastest_us = DBL_MAX; perfmgr_mad_stats.slowest_us = DBL_MIN; perfmgr_mad_stats.avg_us = 0; perfmgr_mad_stats.num = 0; } /* after and diff can be the same struct */ static inline void diff_time(struct timeval *before, struct timeval *after, struct timeval *diff) { struct timeval tmp = *after; if (tmp.tv_usec < before->tv_usec) { tmp.tv_sec--; tmp.tv_usec += 1000000; } diff->tv_sec = tmp.tv_sec - before->tv_sec; diff->tv_usec = tmp.tv_usec - before->tv_usec; } #endif /********************************************************************** * Internal helper functions **********************************************************************/ static void init_monitored_nodes(osm_perfmgr_t * pm) { cl_qmap_init(&pm->monitored_map); pm->remove_list = NULL; cl_event_construct(&pm->sig_query); cl_event_init(&pm->sig_query, FALSE); } static void mark_for_removal(osm_perfmgr_t * pm, monitored_node_t * node) { if (pm->remove_list) { node->next = pm->remove_list; pm->remove_list = node; } else { node->next = NULL; pm->remove_list = node; } } static void remove_marked_nodes(osm_perfmgr_t * pm) { while (pm->remove_list) { monitored_node_t *next = pm->remove_list->next; int port; cl_qmap_remove_item(&pm->monitored_map, (cl_map_item_t *) (pm->remove_list)); if (pm->rm_nodes) perfmgr_db_delete_entry(pm->db, pm->remove_list->guid); else perfmgr_db_mark_active(pm->db, pm->remove_list->guid, FALSE); if (pm->remove_list->name) free(pm->remove_list->name); for (port = pm->remove_list->esp0 ? 0 : 1; port < pm->remove_list->num_ports; port++) { if (pm->remove_list->port[port].remote_name) free(pm->remove_list->port[port].remote_name); } free(pm->remove_list); pm->remove_list = next; } } static inline void decrement_outstanding_queries(osm_perfmgr_t * pm) { cl_atomic_dec(&pm->outstanding_queries); if (!pm->outstanding_queries) { cl_spinlock_acquire(&pm->lock); if (pm->sweep_state == PERFMGR_SWEEP_POST_PROCESSING) { pm->sweep_state = PERFMGR_SWEEP_SLEEP; OSM_LOG(pm->log, OSM_LOG_INFO, "PM sweep state exiting Post Processing\n"); } cl_spinlock_release(&pm->lock); } cl_event_signal(&pm->sig_query); } /********************************************************************** * Receive the MAD from the vendor layer and post it for processing by * the dispatcher **********************************************************************/ static void perfmgr_mad_recv_callback(osm_madw_t * p_madw, void *bind_context, osm_madw_t * p_req_madw) { osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context; OSM_LOG_ENTER(pm->log); CL_ASSERT(p_madw); CL_ASSERT(p_req_madw != NULL); osm_madw_copy_context(p_madw, p_req_madw); osm_mad_pool_put(pm->mad_pool, p_req_madw); decrement_outstanding_queries(pm); /* post this message for later processing. */ if (cl_disp_post(pm->pc_disp_h, OSM_MSG_MAD_PORT_COUNTERS, p_madw, NULL, NULL) != CL_SUCCESS) { OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5401: " "PerfMgr Dispatcher post failed\n"); osm_mad_pool_put(pm->mad_pool, p_madw); } OSM_LOG_EXIT(pm->log); } /********************************************************************** * Process MAD send errors **********************************************************************/ static void perfmgr_mad_send_err_callback(void *bind_context, osm_madw_t * p_madw) { osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context; osm_madw_context_t *context = &p_madw->context; uint64_t node_guid = context->perfmgr_context.node_guid; uint8_t port = context->perfmgr_context.port; cl_map_item_t *p_node; monitored_node_t *p_mon_node; ib_net16_t orig_lid; OSM_LOG_ENTER(pm->log); /* * get the monitored node struct to have the printable name * for log messages */ if ((p_node = cl_qmap_get(&pm->monitored_map, node_guid)) == cl_qmap_end(&pm->monitored_map)) { OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5415: GUID 0x%016" PRIx64 " not found in monitored map\n", node_guid); goto Exit; } p_mon_node = (monitored_node_t *) p_node; OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5402: %s (0x%" PRIx64 ") port %u LID %u TID 0x%" PRIx64 "\n", p_mon_node->name, p_mon_node->guid, port, cl_ntoh16(p_madw->mad_addr.dest_lid), cl_ntoh64(p_madw->p_mad->trans_id)); if (pm->subn->opt.perfmgr_redir && p_madw->status == IB_TIMEOUT) { /* First, find the node in the monitored map */ cl_plock_acquire(&pm->osm->lock); /* Now, validate port number */ if (port >= p_mon_node->num_ports) { cl_plock_release(&pm->osm->lock); OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5416: " "Invalid port num %u for %s (GUID 0x%016" PRIx64 ") num ports %u\n", port, p_mon_node->name, p_mon_node->guid, p_mon_node->num_ports); goto Exit; } /* Clear redirection info for this port except orig_lid */ orig_lid = p_mon_node->port[port].orig_lid; memset(&p_mon_node->port[port], 0, sizeof(monitored_port_t)); p_mon_node->port[port].orig_lid = orig_lid; p_mon_node->port[port].valid = TRUE; cl_plock_release(&pm->osm->lock); } Exit: osm_mad_pool_put(pm->mad_pool, p_madw); decrement_outstanding_queries(pm); OSM_LOG_EXIT(pm->log); } /********************************************************************** * Bind the PerfMgr to the vendor layer for MAD sends/receives **********************************************************************/ ib_api_status_t osm_perfmgr_bind(osm_perfmgr_t * pm, ib_net64_t port_guid) { osm_bind_info_t bind_info; ib_api_status_t status = IB_SUCCESS; OSM_LOG_ENTER(pm->log); if (pm->bind_handle != OSM_BIND_INVALID_HANDLE) { OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5403: Multiple binds not allowed\n"); status = IB_ERROR; goto Exit; } bind_info.port_guid = pm->port_guid = port_guid; bind_info.mad_class = IB_MCLASS_PERF; bind_info.class_version = 1; bind_info.is_responder = FALSE; bind_info.is_report_processor = FALSE; bind_info.is_trap_processor = FALSE; bind_info.recv_q_size = OSM_PM_DEFAULT_QP1_RCV_SIZE; bind_info.send_q_size = OSM_PM_DEFAULT_QP1_SEND_SIZE; bind_info.timeout = pm->subn->opt.transaction_timeout; bind_info.retries = pm->subn->opt.transaction_retries; OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); pm->bind_handle = osm_vendor_bind(pm->vendor, &bind_info, pm->mad_pool, perfmgr_mad_recv_callback, perfmgr_mad_send_err_callback, pm); if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) { status = IB_ERROR; OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5404: Vendor specific bind failed (%s)\n", ib_get_err_str(status)); } Exit: OSM_LOG_EXIT(pm->log); return status; } /********************************************************************** * Unbind the PerfMgr from the vendor layer for MAD sends/receives **********************************************************************/ static void perfmgr_mad_unbind(osm_perfmgr_t * pm) { OSM_LOG_ENTER(pm->log); if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) { OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5405: No previous bind\n"); goto Exit; } osm_vendor_unbind(pm->bind_handle); Exit: OSM_LOG_EXIT(pm->log); } /********************************************************************** * Given a monitored node and a port, return the qp **********************************************************************/ static ib_net32_t get_qp(monitored_node_t * mon_node, uint8_t port) { ib_net32_t qp = IB_QP1; if (mon_node && mon_node->num_ports && port < mon_node->num_ports && mon_node->port[port].redirection && mon_node->port[port].qp) qp = mon_node->port[port].qp; return qp; } static ib_net16_t get_base_lid(osm_node_t * p_node, uint8_t port) { switch (p_node->node_info.node_type) { case IB_NODE_TYPE_CA: case IB_NODE_TYPE_ROUTER: return osm_node_get_base_lid(p_node, port); case IB_NODE_TYPE_SWITCH: return osm_node_get_base_lid(p_node, 0); default: return 0; } } /********************************************************************** * Given a node, a port, and an optional monitored node, * return the lid appropriate to query that port **********************************************************************/ static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port, monitored_node_t * mon_node) { if (mon_node && mon_node->num_ports && port < mon_node->num_ports && mon_node->port[port].lid) return mon_node->port[port].lid; return get_base_lid(p_node, port); } /********************************************************************** * Build a Performance Management class MAD **********************************************************************/ static osm_madw_t *perfmgr_build_mad(osm_perfmgr_t * perfmgr, ib_net16_t dest_lid, uint8_t sl, ib_net32_t dest_qp, uint16_t pkey_ix, uint8_t mad_method, ib_net16_t attr_id, osm_madw_context_t * p_context, ib_perfmgt_mad_t ** p_pm_mad) { ib_perfmgt_mad_t *pm_mad = NULL; osm_madw_t *p_madw = NULL; OSM_LOG_ENTER(perfmgr->log); p_madw = osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle, MAD_BLOCK_SIZE, NULL); if (p_madw == NULL) return NULL; pm_mad = osm_madw_get_perfmgt_mad_ptr(p_madw); /* build the mad */ pm_mad->header.base_ver = 1; pm_mad->header.mgmt_class = IB_MCLASS_PERF; pm_mad->header.class_ver = 1; pm_mad->header.method = mad_method; pm_mad->header.status = 0; pm_mad->header.class_spec = 0; pm_mad->header.trans_id = cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) & (uint64_t) (0xFFFFFFFF)); if (perfmgr->trans_id == 0) pm_mad->header.trans_id = cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) & (uint64_t) (0xFFFFFFFF)); pm_mad->header.attr_id = attr_id; pm_mad->header.resv = 0; pm_mad->header.attr_mod = 0; p_madw->mad_addr.dest_lid = dest_lid; p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp; p_madw->mad_addr.addr_type.gsi.remote_qkey = cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY); p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix; p_madw->mad_addr.addr_type.gsi.service_level = sl; p_madw->mad_addr.addr_type.gsi.global_route = FALSE; p_madw->resp_expected = TRUE; if (p_context) p_madw->context = *p_context; if (p_pm_mad) *p_pm_mad = pm_mad; OSM_LOG_EXIT(perfmgr->log); return (p_madw); } /********************************************************************** * Send a Performance Management class MAD **********************************************************************/ static ib_api_status_t perfmgr_send_mad(osm_perfmgr_t *perfmgr, osm_madw_t * const p_madw) { cl_status_t sts; ib_api_status_t status = osm_vendor_send(perfmgr->bind_handle, p_madw, TRUE); if (status == IB_SUCCESS) { /* pause thread if there are too many outstanding requests */ cl_atomic_inc(&(perfmgr->outstanding_queries)); while (perfmgr->outstanding_queries > (int32_t)perfmgr->max_outstanding_queries) { cl_spinlock_acquire(&perfmgr->lock); if (perfmgr->sweep_state == PERFMGR_SWEEP_SLEEP) { perfmgr->sweep_state = PERFMGR_SWEEP_POST_PROCESSING; OSM_LOG(perfmgr->log, OSM_LOG_INFO, "PM sweep state going into Post Processing\n"); } else if (perfmgr->sweep_state == PERFMGR_SWEEP_ACTIVE) perfmgr->sweep_state = PERFMGR_SWEEP_SUSPENDED; cl_spinlock_release(&perfmgr->lock); wait: sts = cl_event_wait_on(&perfmgr->sig_query, EVENT_NO_TIMEOUT, TRUE); if (sts != CL_SUCCESS) goto wait; cl_spinlock_acquire(&perfmgr->lock); if (perfmgr->sweep_state == PERFMGR_SWEEP_SUSPENDED) perfmgr->sweep_state = PERFMGR_SWEEP_ACTIVE; cl_spinlock_release(&perfmgr->lock); } } return (status); } /********************************************************************** * Form and send the PortCounters MAD for a single port **********************************************************************/ static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, ib_net16_t dest_lid, ib_net32_t dest_qp, uint16_t pkey_ix, uint8_t port, uint8_t mad_method, uint16_t counter_select, uint8_t counter_select2, osm_madw_context_t * p_context, uint8_t sl) { ib_api_status_t status = IB_SUCCESS; ib_port_counters_t *port_counter = NULL; ib_perfmgt_mad_t *pm_mad = NULL; osm_madw_t *p_madw = NULL; OSM_LOG_ENTER(perfmgr->log); p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_PORT_CNTRS; p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix, mad_method, IB_MAD_ATTR_PORT_CNTRS, p_context, &pm_mad); if (p_madw == NULL) return IB_INSUFFICIENT_MEMORY; port_counter = (ib_port_counters_t *) & pm_mad->data; memset(port_counter, 0, sizeof(*port_counter)); port_counter->port_select = port; port_counter->counter_select = cl_hton16(counter_select); port_counter->counter_select2 = counter_select2; status = perfmgr_send_mad(perfmgr, p_madw); OSM_LOG_EXIT(perfmgr->log); return status; } /********************************************************************** * sweep the node_guid_tbl and collect the node guids to be tracked **********************************************************************/ static void collect_guids(cl_map_item_t * p_map_item, void *context) { osm_node_t *node = (osm_node_t *) p_map_item; uint64_t node_guid = cl_ntoh64(node->node_info.node_guid); osm_perfmgr_t *pm = (osm_perfmgr_t *) context; monitored_node_t *mon_node = NULL; uint32_t num_ports; int port; OSM_LOG_ENTER(pm->log); if (cl_qmap_get(&pm->monitored_map, node_guid) == cl_qmap_end(&pm->monitored_map)) { if (pm->ignore_cas && (node->node_info.node_type == IB_NODE_TYPE_CA)) goto Exit; /* if not already in map add it */ num_ports = osm_node_get_num_physp(node); mon_node = malloc(sizeof(*mon_node) + sizeof(monitored_port_t) * num_ports); if (!mon_node) { OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5406: " "malloc failed: not handling node %s" "(GUID 0x%" PRIx64 ")\n", node->print_desc, node_guid); goto Exit; } memset(mon_node, 0, sizeof(*mon_node) + sizeof(monitored_port_t) * num_ports); mon_node->guid = node_guid; mon_node->name = strdup(node->print_desc); mon_node->num_ports = num_ports; mon_node->node_type = node->node_info.node_type; /* check for enhanced switch port 0 */ mon_node->esp0 = (node->sw && ib_switch_info_is_enhanced_port0(&node->sw-> switch_info)); for (port = mon_node->esp0 ? 0 : 1; port < num_ports; port++) { monitored_port_t *mon_port = &mon_node->port[port]; osm_physp_t *p_physp = &node->physp_table[port]; osm_physp_t *p_remote_physp = p_physp->p_remote_physp; mon_port->orig_lid = 0; mon_port->valid = FALSE; if (osm_physp_is_valid(p_physp)) { mon_port->orig_lid = get_base_lid(node, port); mon_port->valid = TRUE; } mon_port->remote_valid = FALSE; mon_port->remote_name = NULL; if (p_remote_physp && osm_physp_is_valid(p_remote_physp)) { osm_node_t *p_remote_node = p_remote_physp->p_node; mon_port->remote_valid = TRUE; mon_port->remote_guid = p_remote_node->node_info.node_guid; mon_port->remote_name = strdup(p_remote_node->print_desc); mon_port->remote_port = p_remote_physp->port_num; } } cl_qmap_insert(&pm->monitored_map, node_guid, (cl_map_item_t *) mon_node); } Exit: OSM_LOG_EXIT(pm->log); } /********************************************************************** * Form and send the ClassPortInfo MAD for a single port **********************************************************************/ static ib_api_status_t perfmgr_send_cpi_mad(osm_perfmgr_t * pm, ib_net16_t dest_lid, ib_net32_t dest_qp, uint16_t pkey_ix, uint8_t port, osm_madw_context_t * p_context, uint8_t sl) { ib_api_status_t status = IB_SUCCESS; osm_madw_t *p_madw = NULL; OSM_LOG_ENTER(pm->log); p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_CLASS_PORT_INFO; p_madw = perfmgr_build_mad(pm, dest_lid, sl, dest_qp, pkey_ix, IB_MAD_METHOD_GET, IB_MAD_ATTR_CLASS_PORT_INFO, p_context, NULL); if (p_madw == NULL) return IB_INSUFFICIENT_MEMORY; status = perfmgr_send_mad(pm, p_madw); OSM_LOG_EXIT(pm->log); return status; } /********************************************************************** * return if some form of PortCountersExtended (PCE || PCE NoIETF) are supported **********************************************************************/ static inline boolean_t pce_supported(monitored_node_t *mon_node, uint8_t port) { monitored_port_t *mon_port = &(mon_node->port[port]); return (mon_port->cpi_valid && (mon_port->cap_mask & IB_PM_EXT_WIDTH_SUPPORTED || mon_port->cap_mask & IB_PM_EXT_WIDTH_NOIETF_SUP)); } /********************************************************************** * return if CapMask.PortCountersXmitWaitSupported is set **********************************************************************/ static inline boolean_t xmit_wait_supported(monitored_node_t *mon_node, uint8_t port) { monitored_port_t *mon_port = &(mon_node->port[port]); return (mon_port->cpi_valid && (mon_port->cap_mask & IB_PM_PC_XMIT_WAIT_SUP)); } /********************************************************************** * return if "full" PortCountersExtended (IETF) is indicated **********************************************************************/ static inline boolean_t ietf_supported(monitored_node_t *mon_node, uint8_t port) { monitored_port_t *mon_port = &(mon_node->port[port]); return (mon_port->cpi_valid && (mon_port->cap_mask & IB_PM_EXT_WIDTH_SUPPORTED)); } /********************************************************************** * Form and send the PortCountersExtended MAD for a single port **********************************************************************/ static ib_api_status_t perfmgr_send_pce_mad(osm_perfmgr_t * perfmgr, ib_net16_t dest_lid, ib_net32_t dest_qp, uint16_t pkey_ix, uint8_t port, uint8_t mad_method, osm_madw_context_t * p_context, uint8_t sl) { ib_api_status_t status = IB_SUCCESS; ib_port_counters_ext_t *port_counter_ext = NULL; ib_perfmgt_mad_t *pm_mad = NULL; osm_madw_t *p_madw = NULL; OSM_LOG_ENTER(perfmgr->log); p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_PORT_CNTRS_EXT; p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix, mad_method, IB_MAD_ATTR_PORT_CNTRS_EXT, p_context, &pm_mad); if (p_madw == NULL) return IB_INSUFFICIENT_MEMORY; port_counter_ext = (ib_port_counters_ext_t *) & pm_mad->data; memset(port_counter_ext, 0, sizeof(*port_counter_ext)); port_counter_ext->port_select = port; port_counter_ext->counter_select = cl_hton16(0x00FF); status = perfmgr_send_mad(perfmgr, p_madw); OSM_LOG_EXIT(perfmgr->log); return status; } /********************************************************************** * query the Port Counters of all the nodes in the subnet **********************************************************************/ static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) { ib_api_status_t status = IB_SUCCESS; osm_perfmgr_t *pm = context; osm_node_t *node = NULL; monitored_node_t *mon_node = (monitored_node_t *) p_map_item; osm_madw_context_t mad_context; uint64_t node_guid = 0; ib_net32_t remote_qp; uint8_t port, num_ports = 0; OSM_LOG_ENTER(pm->log); cl_plock_acquire(&pm->osm->lock); node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); if (!node) { OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5407: Node \"%s\" (guid 0x%" PRIx64 ") no longer exists so removing from PerfMgr monitoring\n", mon_node->name, mon_node->guid); mark_for_removal(pm, mon_node); goto Exit; } num_ports = osm_node_get_num_physp(node); node_guid = cl_ntoh64(node->node_info.node_guid); /* make sure there is a database object ready to store this info */ if (perfmgr_db_create_entry(pm->db, node_guid, mon_node->esp0, num_ports, node->print_desc) != PERFMGR_EVENT_DB_SUCCESS) { OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5408: DB create entry failed for 0x%" PRIx64 " (%s) : %s\n", node_guid, node->print_desc, strerror(errno)); goto Exit; } perfmgr_db_mark_active(pm->db, node_guid, TRUE); /* issue the query for each port */ for (port = mon_node->esp0 ? 0 : 1; port < num_ports; port++) { ib_net16_t lid; if (!osm_node_get_physp_ptr(node, port)) continue; if (!mon_node->port[port].valid) continue; lid = get_lid(node, port, mon_node); if (lid == 0) { OSM_LOG(pm->log, OSM_LOG_DEBUG, "WARN: node 0x%" PRIx64 " port %d (%s): port out of range, skipping\n", cl_ntoh64(node->node_info.node_guid), port, node->print_desc); continue; } remote_qp = get_qp(mon_node, port); mad_context.perfmgr_context.node_guid = node_guid; mad_context.perfmgr_context.port = port; mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET; if (pm->query_cpi && !mon_node->port[port].cpi_valid) { status = perfmgr_send_cpi_mad(pm, lid, remote_qp, mon_node->port[port].pkey_ix, port, &mad_context, 0); /* FIXME SL != 0 */ if (status != IB_SUCCESS) OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: " "Failed to issue ClassPortInfo query " "for node 0x%" PRIx64 " port %d (%s)\n", node->node_info.node_guid, port, node->print_desc); if (mon_node->node_type == IB_NODE_TYPE_SWITCH) goto Exit; /* only need to issue 1 CPI query for switches */ } else { #ifdef ENABLE_OSM_PERF_MGR_PROFILE gettimeofday(&mad_context.perfmgr_context.query_start, NULL); #endif OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%" PRIx64 " port %d (lid %u) (%s)\n", node_guid, port, cl_ntoh16(lid), node->print_desc); status = perfmgr_send_pc_mad(pm, lid, remote_qp, mon_node->port[port].pkey_ix, port, IB_MAD_METHOD_GET, 0xffff, 1, &mad_context, 0); /* FIXME SL != 0 */ if (status != IB_SUCCESS) OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: " "Failed to issue port counter query for node 0x%" PRIx64 " port %d (%s)\n", node->node_info.node_guid, port, node->print_desc); if (pce_supported(mon_node, port)) { #if ENABLE_OSM_PERF_MGR_PROFILE gettimeofday(&mad_context.perfmgr_context.query_start, NULL); #endif status = perfmgr_send_pce_mad(pm, lid, remote_qp, mon_node->port[port].pkey_ix, port, IB_MAD_METHOD_GET, &mad_context, 0); /* FIXME SL != 0 */ if (status != IB_SUCCESS) OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5417: Failed to issue " "port counter query for " "node 0x%" PRIx64 " port " "%d (%s)\n", node->node_info.node_guid, port, node->print_desc); } } } Exit: cl_plock_release(&pm->osm->lock); OSM_LOG_EXIT(pm->log); } /********************************************************************** * Discovery stuff * This code should not be here, but merged with main OpenSM **********************************************************************/ extern int wait_for_pending_transactions(osm_stats_t * stats); extern void osm_drop_mgr_process(IN osm_sm_t * sm); static int sweep_hop_1(osm_sm_t * sm) { ib_api_status_t status = IB_SUCCESS; osm_madw_context_t context; osm_node_t *p_node; osm_port_t *p_port; osm_dr_path_t hop_1_path; ib_net64_t port_guid; uint8_t port_num; uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; uint8_t num_ports; osm_physp_t *p_ext_physp; port_guid = sm->p_subn->sm_port_guid; p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5481: No SM port object\n"); return -1; } p_node = p_port->p_node; port_num = ib_node_info_get_local_port_num(&p_node->node_info); OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Probing hop 1 on local port %u\n", port_num); memset(path_array, 0, sizeof(path_array)); /* the hop_1 operations depend on the type of our node. * Currently - legal nodes that can host SM are SW and CA */ switch (osm_node_get_type(p_node)) { case IB_NODE_TYPE_CA: case IB_NODE_TYPE_ROUTER: memset(&context, 0, sizeof(context)); context.ni_context.node_guid = osm_node_get_node_guid(p_node); context.ni_context.port_num = port_num; path_array[1] = port_num; osm_dr_path_init(&hop_1_path, 1, path_array); CL_PLOCK_ACQUIRE(sm->p_lock); status = osm_req_get(sm, &hop_1_path, IB_MAD_ATTR_NODE_INFO, 0, TRUE, 0, CL_DISP_MSGID_NONE, &context); CL_PLOCK_RELEASE(sm->p_lock); if (status != IB_SUCCESS) OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5482: " "Request for NodeInfo failed\n"); break; case IB_NODE_TYPE_SWITCH: /* Need to go over all the ports of the switch, and send a node_info * from them. This doesn't include the port 0 of the switch, which * hosts the SM. * Note: We'll send another switchInfo on port 0, since if no ports * are connected, we still want to get some response, and have the * subnet come up. */ num_ports = osm_node_get_num_physp(p_node); for (port_num = 0; port_num < num_ports; port_num++) { /* go through the port only if the port is not DOWN */ p_ext_physp = osm_node_get_physp_ptr(p_node, port_num); if (!p_ext_physp || ib_port_info_get_port_state (&p_ext_physp->port_info) <= IB_LINK_DOWN) continue; memset(&context, 0, sizeof(context)); context.ni_context.node_guid = osm_node_get_node_guid(p_node); context.ni_context.port_num = port_num; path_array[1] = port_num; osm_dr_path_init(&hop_1_path, 1, path_array); CL_PLOCK_ACQUIRE(sm->p_lock); status = osm_req_get(sm, &hop_1_path, IB_MAD_ATTR_NODE_INFO, 0, TRUE, 0, CL_DISP_MSGID_NONE, &context); CL_PLOCK_RELEASE(sm->p_lock); if (status != IB_SUCCESS) OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5484: " "Request for NodeInfo failed\n"); } break; default: OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5483: Unknown node type %d\n", osm_node_get_type(p_node)); } return status; } static unsigned is_sm_port_down(osm_sm_t * sm) { ib_net64_t port_guid; osm_port_t *p_port; port_guid = sm->p_subn->sm_port_guid; if (port_guid == 0) return 1; CL_PLOCK_ACQUIRE(sm->p_lock); p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { CL_PLOCK_RELEASE(sm->p_lock); OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5485: " "SM port with GUID:%016" PRIx64 " is unknown\n", cl_ntoh64(port_guid)); return 1; } CL_PLOCK_RELEASE(sm->p_lock); if (p_port->p_node->sw && !ib_switch_info_is_enhanced_port0(&p_port->p_node->sw->switch_info)) return 0; /* base SP0 */ return osm_physp_get_port_state(p_port->p_physp) == IB_LINK_DOWN; } static int sweep_hop_0(osm_sm_t * sm) { ib_api_status_t status; osm_dr_path_t dr_path; osm_bind_handle_t h_bind; uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; memset(path_array, 0, sizeof(path_array)); h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); if (h_bind == OSM_BIND_INVALID_HANDLE) { OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "No bound ports\n"); return -1; } osm_dr_path_init(&dr_path, 0, path_array); CL_PLOCK_ACQUIRE(sm->p_lock); status = osm_req_get(sm, &dr_path, IB_MAD_ATTR_NODE_INFO, 0, TRUE, 0, CL_DISP_MSGID_NONE, NULL); CL_PLOCK_RELEASE(sm->p_lock); if (status != IB_SUCCESS) OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5486: Request for NodeInfo failed\n"); return status; } static void reset_node_count(cl_map_item_t * p_map_item, void *cxt) { osm_node_t *p_node = (osm_node_t *) p_map_item; p_node->discovery_count = 0; memset(p_node->physp_discovered, 0, sizeof(uint8_t) * p_node->physp_tbl_size); } static void reset_port_count(cl_map_item_t * p_map_item, void *cxt) { osm_port_t *p_port = (osm_port_t *) p_map_item; p_port->discovery_count = 0; } static void reset_switch_count(cl_map_item_t * p_map_item, void *cxt) { osm_switch_t *p_sw = (osm_switch_t *) p_map_item; p_sw->need_update = 0; } static int perfmgr_discovery(osm_opensm_t * osm) { int ret; CL_PLOCK_ACQUIRE(&osm->lock); cl_qmap_apply_func(&osm->subn.node_guid_tbl, reset_node_count, NULL); cl_qmap_apply_func(&osm->subn.port_guid_tbl, reset_port_count, NULL); cl_qmap_apply_func(&osm->subn.sw_guid_tbl, reset_switch_count, NULL); CL_PLOCK_RELEASE(&osm->lock); osm->subn.in_sweep_hop_0 = TRUE; ret = sweep_hop_0(&osm->sm); if (ret) goto _exit; if (wait_for_pending_transactions(&osm->stats)) goto _exit; if (is_sm_port_down(&osm->sm)) { OSM_LOG(&osm->log, OSM_LOG_VERBOSE, "SM port is down\n"); goto _drop; } osm->subn.in_sweep_hop_0 = FALSE; ret = sweep_hop_1(&osm->sm); if (ret) goto _exit; if (wait_for_pending_transactions(&osm->stats)) goto _exit; _drop: osm_drop_mgr_process(&osm->sm); _exit: return ret; } /********************************************************************** * Main PerfMgr processor - query the performance counters **********************************************************************/ void osm_perfmgr_process(osm_perfmgr_t * pm) { #ifdef ENABLE_OSM_PERF_MGR_PROFILE struct timeval before, after; #endif if (pm->state != PERFMGR_STATE_ENABLED) return; cl_spinlock_acquire(&pm->lock); if (pm->sweep_state == PERFMGR_SWEEP_ACTIVE || pm->sweep_state == PERFMGR_SWEEP_SUSPENDED || pm->sweep_state == PERFMGR_SWEEP_POST_PROCESSING) { cl_spinlock_release(&pm->lock); OSM_LOG(pm->log, OSM_LOG_INFO, "PM sweep state %d, skipping sweep\n", pm->sweep_state); return; } pm->sweep_state = PERFMGR_SWEEP_ACTIVE; cl_spinlock_release(&pm->lock); if (pm->subn->sm_state == IB_SMINFO_STATE_STANDBY || pm->subn->sm_state == IB_SMINFO_STATE_NOTACTIVE) perfmgr_discovery(pm->subn->p_osm); /* if redirection enabled, determine local port */ if (pm->subn->opt.perfmgr_redir && pm->local_port == -1) { osm_node_t *p_node; osm_port_t *p_port; CL_PLOCK_ACQUIRE(pm->sm->p_lock); p_port = osm_get_port_by_guid(pm->subn, pm->port_guid); if (p_port) { p_node = p_port->p_node; CL_ASSERT(p_node); pm->local_port = ib_node_info_get_local_port_num(&p_node->node_info); } else OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5487: No PerfMgr port object for " "port GUID 0x%" PRIx64 "\n", cl_ntoh64(pm->port_guid)); CL_PLOCK_RELEASE(pm->sm->p_lock); } #ifdef ENABLE_OSM_PERF_MGR_PROFILE gettimeofday(&before, NULL); #endif /* With the global lock held, collect the node guids */ /* FIXME we should be able to track SA notices * and not have to sweep the node_guid_tbl each pass */ OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Gathering PerfMgr stats\n"); cl_plock_acquire(&pm->osm->lock); cl_qmap_apply_func(&pm->subn->node_guid_tbl, collect_guids, pm); cl_plock_release(&pm->osm->lock); /* then for each node query their counters */ cl_qmap_apply_func(&pm->monitored_map, perfmgr_query_counters, pm); /* clean out any nodes found to be removed during the sweep */ remove_marked_nodes(pm); #ifdef ENABLE_OSM_PERF_MGR_PROFILE gettimeofday(&after, NULL); diff_time(&before, &after, &after); osm_log_v2(pm->log, OSM_LOG_INFO, FILE_ID, "PerfMgr total sweep time : %ld.%06ld s\n" " fastest mad : %g us\n" " slowest mad : %g us\n" " average mad : %g us\n", after.tv_sec, after.tv_usec, perfmgr_mad_stats.fastest_us, perfmgr_mad_stats.slowest_us, perfmgr_mad_stats.avg_us); clear_mad_stats(); #endif cl_spinlock_acquire(&pm->lock); pm->sweep_state = PERFMGR_SWEEP_SLEEP; cl_spinlock_release(&pm->lock); } /********************************************************************** * PerfMgr timer - loop continuously and signal SM to run PerfMgr * processor if enabled **********************************************************************/ static void perfmgr_sweep(void *arg) { osm_perfmgr_t *pm = arg; osm_sm_signal(pm->sm, OSM_SIGNAL_PERFMGR_SWEEP); cl_timer_start(&pm->sweep_timer, pm->sweep_time_s * 1000); } void osm_perfmgr_shutdown(osm_perfmgr_t * pm) { OSM_LOG_ENTER(pm->log); cl_timer_stop(&pm->sweep_timer); cl_disp_unregister(pm->pc_disp_h); perfmgr_mad_unbind(pm); OSM_LOG_EXIT(pm->log); } void osm_perfmgr_destroy(osm_perfmgr_t * pm) { OSM_LOG_ENTER(pm->log); perfmgr_db_destroy(pm->db); cl_timer_destroy(&pm->sweep_timer); OSM_LOG_EXIT(pm->log); } /********************************************************************** * Detect if someone else on the network could have cleared the counters * without us knowing. This is easy to detect because the counters never * wrap but are "sticky". * * The one time this will not work is if the port is getting errors fast * enough to have the reading overtake the previous reading. In this case, * counters will be missed. **********************************************************************/ static void perfmgr_check_oob_clear(osm_perfmgr_t * pm, monitored_node_t * mon_node, uint8_t port, perfmgr_db_err_reading_t * cr) { perfmgr_db_err_reading_t prev_err; if (perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_err) != PERFMGR_EVENT_DB_SUCCESS) { OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous " "error reading for %s (guid 0x%" PRIx64 ") port %u\n", mon_node->name, mon_node->guid, port); return; } OSM_LOG(pm->log, OSM_LOG_DEBUG, "Errors vs previous node %s (0x%" PRIx64 ") port %u\n" "SE: %"PRIu64" ?< %"PRIu64"\n" "LE: %"PRIu64" ?< %"PRIu64"\n" "LD: %"PRIu64" ?< %"PRIu64"\n" "RE: %"PRIu64" ?< %"PRIu64"\n" "RPE: %"PRIu64" ?< %"PRIu64"\n" "SRE: %"PRIu64" ?< %"PRIu64"\n" "XD: %"PRIu64" ?< %"PRIu64"\n" "XCE: %"PRIu64" ?< %"PRIu64"\n" "RCE: %"PRIu64" ?< %"PRIu64"\n" "LI: %"PRIu64" ?< %"PRIu64"\n" "BO: %"PRIu64" ?< %"PRIu64"\n" "VL15: %"PRIu64" ?< %"PRIu64"\n" "XW: %"PRIu64" ?< %"PRIu64"\n" , mon_node->name, mon_node->guid, port, cr->symbol_err_cnt, prev_err.symbol_err_cnt, cr->link_err_recover, prev_err.link_err_recover, cr->link_downed, prev_err.link_downed, cr->rcv_err, prev_err.rcv_err, cr->rcv_rem_phys_err, prev_err.rcv_rem_phys_err, cr->rcv_switch_relay_err, prev_err.rcv_switch_relay_err, cr->xmit_discards, prev_err.xmit_discards, cr->xmit_constraint_err, prev_err.xmit_constraint_err, cr->rcv_constraint_err, prev_err.rcv_constraint_err, cr->link_integrity, prev_err.link_integrity, cr->buffer_overrun, prev_err.buffer_overrun, cr->vl15_dropped, prev_err.vl15_dropped, cr->xmit_wait, prev_err.xmit_wait); if (cr->symbol_err_cnt < prev_err.symbol_err_cnt || cr->link_err_recover < prev_err.link_err_recover || cr->link_downed < prev_err.link_downed || cr->rcv_err < prev_err.rcv_err || cr->rcv_rem_phys_err < prev_err.rcv_rem_phys_err || cr->rcv_switch_relay_err < prev_err.rcv_switch_relay_err || cr->xmit_discards < prev_err.xmit_discards || cr->xmit_constraint_err < prev_err.xmit_constraint_err || cr->rcv_constraint_err < prev_err.rcv_constraint_err || cr->link_integrity < prev_err.link_integrity || cr->buffer_overrun < prev_err.buffer_overrun || cr->vl15_dropped < prev_err.vl15_dropped || cr->xmit_wait < prev_err.xmit_wait) { OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 540A: " "Detected an out of band error clear " "on %s (0x%" PRIx64 ") port %u\n", mon_node->name, mon_node->guid, port); perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port); } } /********************************************************************** * Return 1 if the value is "close" to overflowing * "close" is defined at 25% for now **********************************************************************/ static int counter_overflow_4(uint8_t val) { return (val >= 10); } static int counter_overflow_8(uint8_t val) { return (val >= (UINT8_MAX - (UINT8_MAX / 4))); } static int counter_overflow_16(ib_net16_t val) { return (cl_ntoh16(val) >= (UINT16_MAX - (UINT16_MAX / 4))); } static int counter_overflow_32(ib_net32_t val) { return (cl_ntoh32(val) >= (UINT32_MAX - (UINT32_MAX / 4))); } static int counter_overflow_64(ib_net64_t val) { return (cl_ntoh64(val) >= (UINT64_MAX - (UINT64_MAX / 4))); } /********************************************************************** * Check if the port counters have overflowed and if so issue a clear * MAD to the port **********************************************************************/ static void perfmgr_check_overflow(osm_perfmgr_t * pm, monitored_node_t * mon_node, int16_t pkey_ix, uint8_t port, ib_port_counters_t * pc, boolean_t xmit_wait_sup) { osm_madw_context_t mad_context; ib_api_status_t status; ib_net32_t remote_qp; uint16_t counter_select; uint8_t counter_select2; OSM_LOG_ENTER(pm->log); if (counter_overflow_16(pc->symbol_err_cnt) || counter_overflow_8(pc->link_err_recover) || counter_overflow_8(pc->link_downed) || counter_overflow_16(pc->rcv_err) || counter_overflow_16(pc->rcv_rem_phys_err) || counter_overflow_16(pc->rcv_switch_relay_err) || counter_overflow_16(pc->xmit_discards) || counter_overflow_8(pc->xmit_constraint_err) || counter_overflow_8(pc->rcv_constraint_err) || counter_overflow_4(PC_LINK_INT(pc->link_int_buffer_overrun)) || counter_overflow_4(PC_BUF_OVERRUN(pc->link_int_buffer_overrun)) || counter_overflow_16(pc->vl15_dropped) || (xmit_wait_sup && counter_overflow_32(pc->xmit_wait)) || (!pce_supported(mon_node, port) && (counter_overflow_32(pc->xmit_data) || counter_overflow_32(pc->rcv_data) || counter_overflow_32(pc->xmit_pkts) || counter_overflow_32(pc->rcv_pkts)))) { osm_node_t *p_node = NULL; ib_net16_t lid = 0; if (!mon_node->port[port].valid) goto Exit; osm_log_v2(pm->log, OSM_LOG_VERBOSE, FILE_ID, "PerfMgr: Counter overflow: %s (0x%" PRIx64 ") port %d; clearing counters\n", mon_node->name, mon_node->guid, port); cl_plock_acquire(&pm->osm->lock); p_node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); lid = get_lid(p_node, port, mon_node); cl_plock_release(&pm->osm->lock); if (lid == 0) { OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 540C: " "Failed to clear counters for %s (0x%" PRIx64 ") port %d; failed to get lid\n", mon_node->name, mon_node->guid, port); goto Exit; } remote_qp = get_qp(NULL, port); mad_context.perfmgr_context.node_guid = mon_node->guid; mad_context.perfmgr_context.port = port; mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET; /* apparently some HW uses the same counters for the 32 and 64 * bit versions and a clear of them in the PortCounters * attribute also clears the ExtendedPortCounters equivalant * counters */ if (pce_supported(mon_node, port)) counter_select = 0x0fff; else counter_select = 0xffff; if (xmit_wait_sup) counter_select2 = 1; else counter_select2 = 0; status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix, port, IB_MAD_METHOD_SET, counter_select, counter_select2, &mad_context, 0); /* FIXME SL != 0 */ if (status != IB_SUCCESS) OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: " "Failed to send clear counters MAD for %s (0x%" PRIx64 ") port %d\n", mon_node->name, mon_node->guid, port); perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port); if (!pce_supported(mon_node, port)) perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port); } Exit: OSM_LOG_EXIT(pm->log); } /********************************************************************** * Check if the port counters have overflowed and if so issue a clear * MAD to the port **********************************************************************/ static void perfmgr_check_pce_overflow(osm_perfmgr_t * pm, monitored_node_t * mon_node, int16_t pkey_ix, uint8_t port, ib_port_counters_ext_t * pc) { osm_madw_context_t mad_context; ib_api_status_t status; ib_net32_t remote_qp; OSM_LOG_ENTER(pm->log); if (counter_overflow_64(pc->xmit_data) || counter_overflow_64(pc->rcv_data) || counter_overflow_64(pc->xmit_pkts) || counter_overflow_64(pc->rcv_pkts) || (ietf_supported(mon_node, port) && (counter_overflow_64(pc->unicast_xmit_pkts) || counter_overflow_64(pc->unicast_rcv_pkts) || counter_overflow_64(pc->multicast_xmit_pkts) || counter_overflow_64(pc->multicast_rcv_pkts)))) { osm_node_t *p_node = NULL; ib_net16_t lid = 0; if (!mon_node->port[port].valid) goto Exit; osm_log(pm->log, OSM_LOG_VERBOSE, "PerfMgr: PortCountersExtended overflow: %s (0x%" PRIx64 ") port %d; clearing counters\n", mon_node->name, mon_node->guid, port); cl_plock_acquire(&pm->osm->lock); p_node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); lid = get_lid(p_node, port, mon_node); cl_plock_release(&pm->osm->lock); if (lid == 0) { OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5418: " "Failed to clear counters for %s (0x%" PRIx64 ") port %d; failed to get lid\n", mon_node->name, mon_node->guid, port); goto Exit; } remote_qp = get_qp(NULL, port); mad_context.perfmgr_context.node_guid = mon_node->guid; mad_context.perfmgr_context.port = port; mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET; /* clear port counters */ status = perfmgr_send_pce_mad(pm, lid, remote_qp, pkey_ix, port, IB_MAD_METHOD_SET, &mad_context, 0); /* FIXME SL != 0 */ if (status != IB_SUCCESS) OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5419: " "Failed to send clear counters MAD for %s (0x%" PRIx64 ") port %d\n", mon_node->name, mon_node->guid, port); perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port); } Exit: OSM_LOG_EXIT(pm->log); } /********************************************************************** * Check values for logging of errors **********************************************************************/ static void perfmgr_log_errors(osm_perfmgr_t * pm, monitored_node_t * mon_node, uint8_t port, perfmgr_db_err_reading_t * reading) { perfmgr_db_err_reading_t prev_read; perfmgr_db_err_t err = perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_read); uint64_t cur, prev; if (err != PERFMGR_EVENT_DB_SUCCESS) { OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous " "reading for %s (0x%" PRIx64 ") port %u\n", mon_node->name, mon_node->guid, port); return; } #define LOG_ERR_CNT(errname, errnum, counter_name) \ if (reading->counter_name > prev_read.counter_name) { \ if (mon_node->port[port].remote_valid == TRUE) \ OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR %s: " \ "%s : %" PRIu64 " : node " \ "\"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u " \ "connected to \"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", \ errnum, errname, \ reading->counter_name - prev_read.counter_name, \ mon_node->name, mon_node->guid, port, \ mon_node->port[port].remote_name, \ mon_node->port[port].remote_guid, \ mon_node->port[port].remote_port); \ else \ OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR %s: " \ "%s : %" PRIu64 " : node " \ "\"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", \ errnum, errname, \ reading->counter_name - prev_read.counter_name, \ mon_node->name, mon_node->guid, port); \ } LOG_ERR_CNT("SymbolErrorCounter", "5431", symbol_err_cnt); LOG_ERR_CNT("LinkErrorRecoveryCounter", "5432", link_err_recover); LOG_ERR_CNT("LinkDownedCounter", "5433", link_downed); LOG_ERR_CNT("PortRcvErrors", "5434", rcv_err); LOG_ERR_CNT("PortRcvRemotePhysicalErrors", "5435", rcv_rem_phys_err); LOG_ERR_CNT("PortRcvSwitchRelayErrors", "5436", rcv_switch_relay_err); LOG_ERR_CNT("PortXmitDiscards", "5437", xmit_discards); LOG_ERR_CNT("PortXmitConstraintErrors", "5438", xmit_constraint_err); LOG_ERR_CNT("PortRcvConstraintErrors", "5439", rcv_constraint_err); LOG_ERR_CNT("LocalLinkIntegrityErrors", "543A", link_integrity); LOG_ERR_CNT("ExcessiveBufferOverrunErrors", "543B", buffer_overrun); LOG_ERR_CNT("VL15Dropped", "543C", vl15_dropped); cur = reading->xmit_wait; prev = prev_read.xmit_wait; if (pm->xmit_wait_log && cur > prev && (cur - prev) >= pm->xmit_wait_threshold) { OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 543D: XmitWait : %" PRIu64 " : node \"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", cur - prev, mon_node->name, mon_node->guid, port); } } static int16_t validate_redir_pkey(osm_perfmgr_t *pm, ib_net16_t pkey) { int16_t pkey_ix = -1; osm_port_t *p_port; osm_pkey_tbl_t *p_pkey_tbl; ib_net16_t *p_orig_pkey; uint16_t block; uint8_t index; OSM_LOG_ENTER(pm->log); CL_PLOCK_ACQUIRE(pm->sm->p_lock); p_port = osm_get_port_by_guid(pm->subn, pm->port_guid); if (!p_port) { CL_PLOCK_RELEASE(pm->sm->p_lock); OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 541E: No PerfMgr port object\n"); goto Exit; } if (p_port->p_physp && osm_physp_is_valid(p_port->p_physp)) { p_pkey_tbl = &p_port->p_physp->pkeys; if (!p_pkey_tbl) { CL_PLOCK_RELEASE(pm->sm->p_lock); OSM_LOG(pm->log, OSM_LOG_VERBOSE, "No PKey table found for PerfMgr port\n"); goto Exit; } p_orig_pkey = cl_map_get(&p_pkey_tbl->keys, ib_pkey_get_base(pkey)); if (!p_orig_pkey) { CL_PLOCK_RELEASE(pm->sm->p_lock); OSM_LOG(pm->log, OSM_LOG_VERBOSE, "PKey 0x%x not found for PerfMgr port\n", cl_ntoh16(pkey)); goto Exit; } if (osm_pkey_tbl_get_block_and_idx(p_pkey_tbl, p_orig_pkey, &block, &index) == IB_SUCCESS) { CL_PLOCK_RELEASE(pm->sm->p_lock); pkey_ix = block * IB_NUM_PKEY_ELEMENTS_IN_BLOCK + index; } else { CL_PLOCK_RELEASE(pm->sm->p_lock); OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 541F: Failed to obtain P_Key 0x%04x " "block and index for PerfMgr port\n", cl_ntoh16(pkey)); } } else { CL_PLOCK_RELEASE(pm->sm->p_lock); OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5420: Local PerfMgt port physp invalid\n"); } Exit: OSM_LOG_EXIT(pm->log); return pkey_ix; } static boolean_t handle_redirect(osm_perfmgr_t *pm, ib_class_port_info_t *cpi, monitored_node_t *p_mon_node, uint8_t port, osm_madw_context_t *mad_context) { char gid_str[INET6_ADDRSTRLEN]; ib_api_status_t status; boolean_t valid = TRUE; int16_t pkey_ix = 0; uint8_t mad_method; OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Redirection to LID %u GID %s QP 0x%x received\n", cl_ntoh16(cpi->redir_lid), inet_ntop(AF_INET6, cpi->redir_gid.raw, gid_str, sizeof gid_str), cl_ntoh32(cpi->redir_qp)); if (!pm->subn->opt.perfmgr_redir) { OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Redirection requested but disabled\n"); valid = FALSE; } /* valid redirection ? */ if (cpi->redir_lid == 0) { if (!ib_gid_is_notzero(&cpi->redir_gid)) { OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid redirection " "(both redirect LID and GID are zero)\n"); valid = FALSE; } } if (cpi->redir_qp == 0) { OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQP\n"); valid = FALSE; } if (cpi->redir_pkey == 0) { OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectP_Key\n"); valid = FALSE; } if (cpi->redir_qkey != IB_QP1_WELL_KNOWN_Q_KEY) { OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQ_Key\n"); valid = FALSE; } pkey_ix = validate_redir_pkey(pm, cpi->redir_pkey); if (pkey_ix == -1) { OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Index for Pkey 0x%x not found\n", cl_ntoh16(cpi->redir_pkey)); valid = FALSE; } if (cpi->redir_lid == 0) { /* GID redirection: get PathRecord information */ OSM_LOG(pm->log, OSM_LOG_VERBOSE, "GID redirection not currently supported\n"); goto Exit; } if (!valid) goto Exit; /* LID redirection support (easier than GID redirection) */ cl_plock_acquire(&pm->osm->lock); p_mon_node->port[port].redirection = TRUE; p_mon_node->port[port].valid = valid; memcpy(&p_mon_node->port[port].gid, &cpi->redir_gid, sizeof(ib_gid_t)); p_mon_node->port[port].lid = cpi->redir_lid; p_mon_node->port[port].qp = cpi->redir_qp; p_mon_node->port[port].pkey = cpi->redir_pkey; if (pkey_ix != -1) p_mon_node->port[port].pkey_ix = pkey_ix; cl_plock_release(&pm->osm->lock); /* either */ if (pm->query_cpi) { /* issue a CPI query to the redirected location */ mad_method = IB_MAD_METHOD_GET; p_mon_node->port[port].cpi_valid = FALSE; status = perfmgr_send_cpi_mad(pm, cpi->redir_lid, cpi->redir_qp, pkey_ix, port, mad_context, 0); /* FIXME SL != 0 */ } else { /* reissue the original query to the redirected location */ uint8_t counter_select2; if (xmit_wait_supported(p_mon_node, port)) counter_select2 = 1; else counter_select2 = 0; mad_method = mad_context->perfmgr_context.mad_method; if (mad_context->perfmgr_context.mad_attr_id == IB_MAD_ATTR_PORT_CNTRS) { status = perfmgr_send_pc_mad(pm, cpi->redir_lid, cpi->redir_qp, pkey_ix, port, mad_method, 0xffff, counter_select2, mad_context, 0); /* FIXME SL != 0 */ } else { status = perfmgr_send_pce_mad(pm, cpi->redir_lid, cpi->redir_qp, pkey_ix, port, mad_method, mad_context, 0); /* FIXME SL != 0 */ } } if (status != IB_SUCCESS) OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5414: " "Failed to send redirected MAD " "with method 0x%x for node %s " "(NodeGuid 0x%" PRIx64 ") port %d\n", mad_method, p_mon_node->name, p_mon_node->guid, port); Exit: return (valid); } /********************************************************************** * Detect if someone else on the network could have cleared the counters * without us knowing. This is easy to detect because the counters never * wrap but are "sticky" PortCountersExtended version. * * The one time this will not work is if the port is getting errors fast * enough to have the reading overtake the previous reading. In this case, * counters will be missed. **********************************************************************/ static void perfmgr_check_data_cnt_oob_clear(osm_perfmgr_t * pm, monitored_node_t * mon_node, uint8_t port, perfmgr_db_data_cnt_reading_t * dc) { perfmgr_db_data_cnt_reading_t prev_dc; if (perfmgr_db_get_prev_dc(pm->db, mon_node->guid, port, &prev_dc) != PERFMGR_EVENT_DB_SUCCESS) { OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous data count " "reading for %s (0x%" PRIx64 ") port %u\n", mon_node->name, mon_node->guid, port); return; } OSM_LOG(pm->log, OSM_LOG_DEBUG, "Data vs previous node %s (0x%" PRIx64 ") port %u\n" "TX: %"PRIu64" ?< %"PRIu64"\n" "RX: %"PRIu64" ?< %"PRIu64"\n" "TXP: %"PRIu64" ?< %"PRIu64"\n" "RXP: %"PRIu64" ?< %"PRIu64"\n" "UTXP: %"PRIu64" ?< %"PRIu64"\n" "URXP: %"PRIu64" ?< %"PRIu64"\n" "MTXP: %"PRIu64" ?< %"PRIu64"\n" "MRXP: %"PRIu64" ?< %"PRIu64"\n" , mon_node->name, mon_node->guid, port, dc->xmit_data, prev_dc.xmit_data, dc->rcv_data, prev_dc.rcv_data, dc->xmit_pkts, prev_dc.xmit_pkts, dc->rcv_pkts, prev_dc.rcv_pkts, dc->unicast_xmit_pkts, prev_dc.unicast_xmit_pkts, dc->unicast_rcv_pkts, prev_dc.unicast_rcv_pkts, dc->multicast_xmit_pkts, prev_dc.multicast_xmit_pkts, dc->multicast_rcv_pkts, prev_dc.multicast_rcv_pkts); if (dc->xmit_data < prev_dc.xmit_data || dc->rcv_data < prev_dc.rcv_data || dc->xmit_pkts < prev_dc.xmit_pkts || dc->rcv_pkts < prev_dc.rcv_pkts || (ietf_supported(mon_node, port) && (dc->unicast_xmit_pkts < prev_dc.unicast_xmit_pkts || dc->unicast_rcv_pkts < prev_dc.unicast_rcv_pkts || dc->multicast_xmit_pkts < prev_dc.multicast_xmit_pkts || dc->multicast_rcv_pkts < prev_dc.multicast_rcv_pkts))) { OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 540B: Detected an out of band data counter " "clear on node %s (0x%" PRIx64 ") port %u\n", mon_node->name, mon_node->guid, port); perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port); } } /********************************************************************** * The dispatcher uses a thread pool which will call this function when * there is a thread available to process the mad received on the wire **********************************************************************/ static void pc_recv_process(void *context, void *data) { osm_perfmgr_t *pm = context; osm_madw_t *p_madw = data; osm_madw_context_t *mad_context = &p_madw->context; ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw); uint64_t node_guid = mad_context->perfmgr_context.node_guid; uint8_t port = mad_context->perfmgr_context.port; perfmgr_db_err_reading_t err_reading; perfmgr_db_data_cnt_reading_t data_reading; cl_map_item_t *p_node; monitored_node_t *p_mon_node; ib_class_port_info_t *cpi = NULL; OSM_LOG_ENTER(pm->log); /* * get the monitored node struct to have the printable name * for log messages */ if ((p_node = cl_qmap_get(&pm->monitored_map, node_guid)) == cl_qmap_end(&pm->monitored_map)) { OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5412: GUID 0x%016" PRIx64 " not found in monitored map\n", node_guid); goto Exit; } p_mon_node = (monitored_node_t *) p_node; OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Processing received MAD status 0x%x context 0x%" PRIx64 " port %u\n", cl_ntoh16(p_mad->status), node_guid, port); CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS || p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS_EXT || p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO); cl_plock_acquire(&pm->osm->lock); /* validate port number */ if (port >= p_mon_node->num_ports) { cl_plock_release(&pm->osm->lock); OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: " "Invalid port num %d for GUID 0x%016" PRIx64 " num ports %d\n", port, node_guid, p_mon_node->num_ports); goto Exit; } cl_plock_release(&pm->osm->lock); /* capture CLASS_PORT_INFO data */ if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) { boolean_t cpi_valid = TRUE; cpi = (ib_class_port_info_t *) & (osm_madw_get_perfmgt_mad_ptr(p_madw)->data); /* Response could be redirection (IBM eHCA PMA does this) */ if (p_mad->status & IB_MAD_STATUS_REDIRECT) cpi_valid = handle_redirect(pm, cpi, p_mon_node, port, mad_context); if (pm->query_cpi && cpi_valid) { cl_plock_acquire(&pm->osm->lock); if (p_mon_node->node_type == IB_NODE_TYPE_SWITCH) { int i; for (i = p_mon_node->esp0 ? 0 : 1; i < p_mon_node->num_ports; i++) { p_mon_node->port[i].cap_mask = cpi->cap_mask; p_mon_node->port[i].cpi_valid = cpi_valid; } } else { p_mon_node->port[port].cap_mask = cpi->cap_mask; p_mon_node->port[port].cpi_valid = cpi_valid; } cl_plock_release(&pm->osm->lock); } goto Exit; } if (p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS_EXT) { ib_port_counters_ext_t *ext_wire_read = (ib_port_counters_ext_t *) &osm_madw_get_perfmgt_mad_ptr(p_madw)->data; /* convert wire data to perfmgr data counter reading */ perfmgr_db_fill_data_cnt_read_pce(ext_wire_read, &data_reading, ietf_supported(p_mon_node, port)); /* add counter */ if (mad_context->perfmgr_context.mad_method == IB_MAD_METHOD_GET) { /* detect an out of band clear on the port */ perfmgr_check_data_cnt_oob_clear(pm, p_mon_node, port, &data_reading); perfmgr_db_add_dc_reading(pm->db, node_guid, port, &data_reading, ietf_supported(p_mon_node, port)); } else { perfmgr_db_clear_prev_dc(pm->db, node_guid, port); } perfmgr_check_pce_overflow(pm, p_mon_node, p_mon_node->port[port].pkey_ix, port, ext_wire_read); } else { boolean_t pce_sup = pce_supported(p_mon_node, port); boolean_t xmit_wait_sup = xmit_wait_supported(p_mon_node, port); ib_port_counters_t *wire_read = (ib_port_counters_t *) &osm_madw_get_perfmgt_mad_ptr(p_madw)->data; perfmgr_db_fill_err_read(wire_read, &err_reading, xmit_wait_sup); if (!pce_sup) perfmgr_db_fill_data_cnt_read_pc(wire_read, &data_reading); if (mad_context->perfmgr_context.mad_method == IB_MAD_METHOD_GET) { /* detect an out of band clear on the port */ perfmgr_check_oob_clear(pm, p_mon_node, port, &err_reading); if (!pce_sup) perfmgr_check_data_cnt_oob_clear(pm, p_mon_node, port, &data_reading); /* log errors from this reading */ if (pm->subn->opt.perfmgr_log_errors) perfmgr_log_errors(pm, p_mon_node, port, &err_reading); perfmgr_db_add_err_reading(pm->db, node_guid, port, &err_reading); if (!pce_sup) perfmgr_db_add_dc_reading(pm->db, node_guid, port, &data_reading, 0); } else { perfmgr_db_clear_prev_err(pm->db, node_guid, port); if (!pce_sup) perfmgr_db_clear_prev_dc(pm->db, node_guid, port); } perfmgr_check_overflow(pm, p_mon_node, p_mon_node->port[port].pkey_ix, port, wire_read, xmit_wait_sup); } #ifdef ENABLE_OSM_PERF_MGR_PROFILE do { struct timeval proc_time; gettimeofday(&proc_time, NULL); diff_time(&p_madw->context.perfmgr_context.query_start, &proc_time, &proc_time); update_mad_stats(&proc_time); } while (0); #endif Exit: osm_mad_pool_put(pm->mad_pool, p_madw); OSM_LOG_EXIT(pm->log); } /********************************************************************** * Initialize the PerfMgr object **********************************************************************/ ib_api_status_t osm_perfmgr_init(osm_perfmgr_t * pm, osm_opensm_t * osm, const osm_subn_opt_t * p_opt) { ib_api_status_t status; OSM_LOG_ENTER(&osm->log); OSM_LOG(&osm->log, OSM_LOG_VERBOSE, "Initializing PerfMgr\n"); memset(pm, 0, sizeof(*pm)); pm->subn = &osm->subn; pm->sm = &osm->sm; pm->log = &osm->log; pm->mad_pool = &osm->mad_pool; pm->vendor = osm->p_vendor; pm->trans_id = PERFMGR_INITIAL_TID_VALUE; pm->state = p_opt->perfmgr ? PERFMGR_STATE_ENABLED : PERFMGR_STATE_DISABLE; pm->sweep_state = PERFMGR_SWEEP_SLEEP; - cl_spinlock_init(&pm->lock); + status = cl_spinlock_init(&pm->lock); + if (status != IB_SUCCESS) + goto Exit; pm->sweep_time_s = p_opt->perfmgr_sweep_time_s; pm->max_outstanding_queries = p_opt->perfmgr_max_outstanding_queries; pm->ignore_cas = p_opt->perfmgr_ignore_cas; pm->osm = osm; pm->local_port = -1; status = cl_timer_init(&pm->sweep_timer, perfmgr_sweep, pm); if (status != IB_SUCCESS) goto Exit; status = IB_INSUFFICIENT_RESOURCES; pm->db = perfmgr_db_construct(pm); if (!pm->db) { pm->state = PERFMGR_STATE_NO_DB; goto Exit; } pm->pc_disp_h = cl_disp_register(&osm->disp, OSM_MSG_MAD_PORT_COUNTERS, pc_recv_process, pm); if (pm->pc_disp_h == CL_DISP_INVALID_HANDLE) { perfmgr_db_destroy(pm->db); goto Exit; } init_monitored_nodes(pm); if (pm->state == PERFMGR_STATE_ENABLED) cl_timer_start(&pm->sweep_timer, pm->sweep_time_s * 1000); pm->rm_nodes = p_opt->perfmgr_rm_nodes; pm->query_cpi = p_opt->perfmgr_query_cpi; pm->xmit_wait_log = p_opt->perfmgr_xmit_wait_log; pm->xmit_wait_threshold = p_opt->perfmgr_xmit_wait_threshold; status = IB_SUCCESS; Exit: OSM_LOG_EXIT(pm->log); return status; } /********************************************************************** * Clear the counters from the db **********************************************************************/ void osm_perfmgr_clear_counters(osm_perfmgr_t * pm) { /** * FIXME todo issue clear on the fabric? */ perfmgr_db_clear_counters(pm->db); osm_log_v2(pm->log, OSM_LOG_INFO, FILE_ID, "PerfMgr counters cleared\n"); } /******************************************************************* * Dump the DB information to the file specified *******************************************************************/ void osm_perfmgr_dump_counters(osm_perfmgr_t * pm, perfmgr_db_dump_t dump_type) { char path[256]; char *file_name; if (pm->subn->opt.event_db_dump_file) file_name = pm->subn->opt.event_db_dump_file; else { snprintf(path, sizeof(path), "%s/%s", pm->subn->opt.dump_files_dir, OSM_PERFMGR_DEFAULT_DUMP_FILE); file_name = path; } if (perfmgr_db_dump(pm->db, file_name, dump_type) != 0) OSM_LOG(pm->log, OSM_LOG_ERROR, "Failed to dump file %s : %s", file_name, strerror(errno)); } /******************************************************************* * Print the DB information to the fp specified *******************************************************************/ void osm_perfmgr_print_counters(osm_perfmgr_t * pm, char *nodename, FILE * fp, char *port, int err_only) { if (nodename) { char *end = NULL; uint64_t guid = strtoull(nodename, &end, 0); if (nodename + strlen(nodename) != end) perfmgr_db_print_by_name(pm->db, nodename, fp, port, err_only); else perfmgr_db_print_by_guid(pm->db, guid, fp, port, err_only); } else perfmgr_db_print_all(pm->db, fp, err_only); } void osm_perfmgr_update_nodename(osm_perfmgr_t *pm, uint64_t node_guid, char *nodename) { if (pm->db) perfmgr_db_update_name(pm->db, node_guid, nodename); } #endif /* ENABLE_OSM_PERF_MGR */ Index: head/contrib/ofed/opensm/opensm/osm_port.c =================================================================== --- head/contrib/ofed/opensm/opensm/osm_port.c (revision 363219) +++ head/contrib/ofed/opensm/opensm/osm_port.c (revision 363220) @@ -1,700 +1,702 @@ /* * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ /* * Abstract: * Implementation of osm_physp_t. * This object represents an Infiniband Port. * This object is part of the opensm family of objects. */ #if HAVE_CONFIG_H # include #endif /* HAVE_CONFIG_H */ #include #include #include #include #include #define FILE_ID OSM_FILE_PORT_C #include #include #include #include #include #include void osm_physp_construct(IN osm_physp_t * p_physp) { memset(p_physp, 0, sizeof(*p_physp)); osm_dr_path_construct(&p_physp->dr_path); cl_ptr_vector_construct(&p_physp->slvl_by_port); osm_pkey_tbl_construct(&p_physp->pkeys); } void osm_physp_destroy(IN osm_physp_t * p_physp) { size_t num_slvl, i; /* the physp might be uninitialized */ if (p_physp->port_guid) { if (p_physp->p_guids) free(p_physp->p_guids); /* free the SL2VL Tables */ num_slvl = cl_ptr_vector_get_size(&p_physp->slvl_by_port); for (i = 0; i < num_slvl; i++) free(cl_ptr_vector_get(&p_physp->slvl_by_port, i)); cl_ptr_vector_destroy(&p_physp->slvl_by_port); /* free the P_Key Tables */ osm_pkey_tbl_destroy(&p_physp->pkeys); memset(p_physp, 0, sizeof(*p_physp)); osm_dr_path_construct(&p_physp->dr_path); /* clear dr_path */ } } void osm_physp_init(IN osm_physp_t * p_physp, IN ib_net64_t port_guid, IN uint8_t port_num, IN const struct osm_node *p_node, IN osm_bind_handle_t h_bind, IN uint8_t hop_count, IN const uint8_t * p_initial_path) { uint16_t num_slvl, i; ib_slvl_table_t *p_slvl; CL_ASSERT(p_node); osm_physp_construct(p_physp); p_physp->port_guid = port_guid; p_physp->port_num = port_num; p_physp->healthy = TRUE; p_physp->need_update = 2; p_physp->p_node = (struct osm_node *)p_node; osm_dr_path_init(&p_physp->dr_path, hop_count, p_initial_path); /* allocate enough SL2VL tables */ if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) /* we need node num ports + 1 SL2VL tables */ num_slvl = osm_node_get_num_physp(p_node) + 1; else /* An end node - we need only one SL2VL */ num_slvl = 1; cl_ptr_vector_init(&p_physp->slvl_by_port, num_slvl, 1); for (i = 0; i < num_slvl; i++) { p_slvl = (ib_slvl_table_t *) malloc(sizeof(ib_slvl_table_t)); if (!p_slvl) break; memset(p_slvl, 0, sizeof(ib_slvl_table_t)); cl_ptr_vector_set(&p_physp->slvl_by_port, i, p_slvl); } /* initialize the pkey table */ osm_pkey_tbl_init(&p_physp->pkeys); } void osm_port_delete(IN OUT osm_port_t ** pp_port) { free(*pp_port); *pp_port = NULL; } osm_port_t *osm_port_new(IN const ib_node_info_t * p_ni, IN osm_node_t * p_parent_node) { osm_port_t *p_port; ib_net64_t port_guid; osm_physp_t *p_physp; uint8_t port_num; p_port = malloc(sizeof(*p_port)); if (!p_port) return NULL; memset(p_port, 0, sizeof(*p_port)); cl_qlist_init(&p_port->mcm_list); p_port->p_node = (struct osm_node *)p_parent_node; port_guid = p_ni->port_guid; p_port->guid = port_guid; port_num = p_ni->node_type == IB_NODE_TYPE_SWITCH ? 0 : ib_node_info_get_local_port_num(p_ni); /* Get the pointers to the physical node objects "owned" by this logical port GUID. For switches, port '0' is owned; for HCA's and routers, only the singular part that has this GUID is owned. */ p_physp = osm_node_get_physp_ptr(p_parent_node, port_num); - if (!p_physp) + if (!p_physp) { + osm_port_delete(&p_port); return NULL; + } CL_ASSERT(port_guid == osm_physp_get_port_guid(p_physp)); p_port->p_physp = p_physp; return p_port; } void osm_port_get_lid_range_ho(IN const osm_port_t * p_port, IN uint16_t * p_min_lid, IN uint16_t * p_max_lid) { uint8_t lmc; *p_min_lid = cl_ntoh16(osm_port_get_base_lid(p_port)); lmc = osm_port_get_lmc(p_port); *p_max_lid = (uint16_t) (*p_min_lid + (1 << lmc) - 1); } uint8_t osm_physp_calc_link_mtu(IN osm_log_t * p_log, IN const osm_physp_t * p_physp, IN uint8_t current_mtu) { const osm_physp_t *p_remote_physp; uint8_t mtu; uint8_t remote_mtu; OSM_LOG_ENTER(p_log); p_remote_physp = osm_physp_get_remote(p_physp); if (p_remote_physp) { /* use the available MTU */ mtu = ib_port_info_get_mtu_cap(&p_physp->port_info); remote_mtu = ib_port_info_get_mtu_cap(&p_remote_physp->port_info); OSM_LOG(p_log, OSM_LOG_DEBUG, "Remote port 0x%016" PRIx64 " port = %u : " "MTU = %u. This Port MTU: %u\n", cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)), osm_physp_get_port_num(p_remote_physp), remote_mtu, mtu); if (mtu != remote_mtu) { if (mtu > remote_mtu) mtu = remote_mtu; if (mtu != current_mtu) OSM_LOG(p_log, OSM_LOG_VERBOSE, "MTU mismatch between ports." "\n\t\t\t\tPort 0x%016" PRIx64 ", port %u" " and port 0x%016" PRIx64 ", port %u." "\n\t\t\t\tUsing lower MTU of %u\n", cl_ntoh64(osm_physp_get_port_guid(p_physp)), osm_physp_get_port_num(p_physp), cl_ntoh64(osm_physp_get_port_guid (p_remote_physp)), osm_physp_get_port_num(p_remote_physp), mtu); } } else mtu = ib_port_info_get_neighbor_mtu(&p_physp->port_info); if (mtu == 0) { OSM_LOG(p_log, OSM_LOG_DEBUG, "ERR 4101: " "Invalid MTU = 0. Forcing correction to 256\n"); mtu = 1; } OSM_LOG_EXIT(p_log); return mtu; } uint8_t osm_physp_calc_link_op_vls(IN osm_log_t * p_log, IN const osm_subn_t * p_subn, IN const osm_physp_t * p_physp, IN uint8_t current_op_vls) { const osm_physp_t *p_remote_physp; uint8_t op_vls; uint8_t remote_op_vls; OSM_LOG_ENTER(p_log); p_remote_physp = osm_physp_get_remote(p_physp); if (p_remote_physp) { /* use the available VLCap */ op_vls = ib_port_info_get_vl_cap(&p_physp->port_info); remote_op_vls = ib_port_info_get_vl_cap(&p_remote_physp->port_info); OSM_LOG(p_log, OSM_LOG_DEBUG, "Remote port 0x%016" PRIx64 " port = 0x%X : " "VL_CAP = %u. This port VL_CAP = %u\n", cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)), osm_physp_get_port_num(p_remote_physp), remote_op_vls, op_vls); if (op_vls != remote_op_vls) { if (op_vls > remote_op_vls) op_vls = remote_op_vls; if (op_vls != current_op_vls) OSM_LOG(p_log, OSM_LOG_VERBOSE, "OP_VLS mismatch between ports." "\n\t\t\t\tPort 0x%016" PRIx64 ", port 0x%X" " and port 0x%016" PRIx64 ", port 0x%X." "\n\t\t\t\tUsing lower OP_VLS of %u\n", cl_ntoh64(osm_physp_get_port_guid(p_physp)), osm_physp_get_port_num(p_physp), cl_ntoh64(osm_physp_get_port_guid (p_remote_physp)), osm_physp_get_port_num(p_remote_physp), op_vls); } } else op_vls = ib_port_info_get_op_vls(&p_physp->port_info); if (op_vls == 0) { /* for non compliant implementations */ OSM_LOG(p_log, OSM_LOG_VERBOSE, "Invalid OP_VLS = 0. Forcing correction to 1 (VL0)\n"); op_vls = 1; } /* support user limitation of max_op_vls */ if (op_vls > p_subn->opt.max_op_vls) op_vls = p_subn->opt.max_op_vls; OSM_LOG_EXIT(p_log); return op_vls; } static inline uint64_t ptr_to_key(void const *p) { uint64_t k = 0; memcpy(&k, p, sizeof(void *)); return k; } static inline void *key_to_ptr(uint64_t k) { void *p = 0; memcpy(&p, &k, sizeof(void *)); return p; } /********************************************************************** Traverse the fabric from the SM node following the DR path given and add every phys port traversed to the map. Avoid tracking the first and last phys ports (going into the first switch and into the target port). **********************************************************************/ static cl_status_t physp_get_dr_physp_set(IN osm_log_t * p_log, IN osm_subn_t const *p_subn, IN osm_dr_path_t const *p_path, OUT cl_map_t * p_physp_map) { osm_port_t *p_port; osm_physp_t *p_physp; osm_node_t *p_node; uint8_t hop; cl_status_t status = CL_SUCCESS; OSM_LOG_ENTER(p_log); /* find the OSM node */ p_port = osm_get_port_by_guid(p_subn, p_subn->sm_port_guid); if (!p_port) { OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4103: " "Failed to find the SM own port by guid\n"); status = CL_ERROR; goto Exit; } /* get the node of the SM */ p_node = p_port->p_node; /* traverse the path adding the nodes to the table start after the first dummy hop and stop just before the last one */ for (hop = 1; hop < p_path->hop_count - 1; hop++) { /* go out using the phys port of the path */ p_physp = osm_node_get_physp_ptr(p_node, p_path->path[hop]); /* make sure we got a valid port and it has a remote port */ if (!p_physp) { OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4104: " "DR Traversal stopped on invalid port at hop:%u\n", hop); status = CL_ERROR; goto Exit; } /* we track the ports we go out along the path */ if (hop > 1) cl_map_insert(p_physp_map, ptr_to_key(p_physp), NULL); OSM_LOG(p_log, OSM_LOG_DEBUG, "Traversed through node: 0x%016" PRIx64 " port:%u\n", cl_ntoh64(p_node->node_info.node_guid), p_path->path[hop]); if (!(p_physp = osm_physp_get_remote(p_physp))) { OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4106: " "DR Traversal stopped on missing remote physp at hop:%u\n", hop); status = CL_ERROR; goto Exit; } p_node = osm_physp_get_node_ptr(p_physp); } Exit: OSM_LOG_EXIT(p_log); return status; } static void physp_update_new_dr_path(IN osm_physp_t const *p_dest_physp, IN cl_map_t * p_visited_map, IN osm_bind_handle_t * h_bind) { cl_list_t tmpPortsList; osm_physp_t *p_physp, *p_src_physp = NULL; uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; uint8_t i = 0; osm_dr_path_t *p_dr_path; cl_list_construct(&tmpPortsList); cl_list_init(&tmpPortsList, 10); cl_list_insert_head(&tmpPortsList, p_dest_physp); /* get the output port where we need to come from */ p_physp = (osm_physp_t *) cl_map_get(p_visited_map, ptr_to_key(p_dest_physp)); while (p_physp != NULL) { cl_list_insert_head(&tmpPortsList, p_physp); /* get the input port through where we reached the output port */ p_src_physp = p_physp; p_physp = (osm_physp_t *) cl_map_get(p_visited_map, ptr_to_key(p_physp)); /* if we reached a null p_physp - this means we are at the begining of the path. Break. */ if (p_physp == NULL) break; /* get the output port */ p_physp = (osm_physp_t *) cl_map_get(p_visited_map, ptr_to_key(p_physp)); } memset(path_array, 0, sizeof(path_array)); p_physp = (osm_physp_t *) cl_list_remove_head(&tmpPortsList); while (p_physp != NULL) { i++; path_array[i] = p_physp->port_num; p_physp = (osm_physp_t *) cl_list_remove_head(&tmpPortsList); } if (p_src_physp) { p_dr_path = osm_physp_get_dr_path_ptr(p_src_physp); osm_dr_path_init(p_dr_path, i, path_array); } cl_list_destroy(&tmpPortsList); } void osm_physp_replace_dr_path_with_alternate_dr_path(IN osm_log_t * p_log, IN osm_subn_t const *p_subn, IN osm_physp_t const *p_dest_physp, IN osm_bind_handle_t * h_bind) { cl_map_t physp_map; cl_map_t visited_map; osm_dr_path_t *p_dr_path; cl_list_t *p_currPortsList; cl_list_t *p_nextPortsList; osm_port_t *p_port; osm_physp_t *p_physp, *p_remote_physp; ib_net64_t port_guid; boolean_t next_list_is_full = TRUE, reached_dest = FALSE; uint8_t num_ports, port_num; p_nextPortsList = (cl_list_t *) malloc(sizeof(cl_list_t)); if (!p_nextPortsList) return; /* initialize the map of all port participating in current dr path not including first and last switches */ cl_map_construct(&physp_map); cl_map_init(&physp_map, 4); cl_map_construct(&visited_map); cl_map_init(&visited_map, 4); p_dr_path = osm_physp_get_dr_path_ptr(p_dest_physp); physp_get_dr_physp_set(p_log, p_subn, p_dr_path, &physp_map); /* BFS from OSM port until we find the target physp but avoid going through mapped ports */ cl_list_construct(p_nextPortsList); cl_list_init(p_nextPortsList, 10); port_guid = p_subn->sm_port_guid; CL_ASSERT(port_guid); p_port = osm_get_port_by_guid(p_subn, port_guid); if (!p_port) { OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4105: No SM port object\n"); goto Exit; } /* HACK: We are assuming SM is running on HCA, so when getting the default port we'll get the port connected to the rest of the subnet. If SM is running on SWITCH - we should try to get a dr path from all switch ports. */ p_physp = p_port->p_physp; CL_ASSERT(p_physp); cl_list_insert_tail(p_nextPortsList, p_physp); while (next_list_is_full == TRUE) { next_list_is_full = FALSE; p_currPortsList = p_nextPortsList; p_nextPortsList = (cl_list_t *) malloc(sizeof(cl_list_t)); if (!p_nextPortsList) { p_nextPortsList = p_currPortsList; goto Exit; } cl_list_construct(p_nextPortsList); cl_list_init(p_nextPortsList, 10); p_physp = (osm_physp_t *) cl_list_remove_head(p_currPortsList); while (p_physp != NULL) { /* If we are in a switch - need to go out through all the other physical ports of the switch */ num_ports = osm_node_get_num_physp(p_physp->p_node); for (port_num = 1; port_num < num_ports; port_num++) { if (osm_node_get_type(p_physp->p_node) == IB_NODE_TYPE_SWITCH) p_remote_physp = osm_node_get_physp_ptr(p_physp-> p_node, port_num); else /* this is HCA or router - the remote port is just the port connected on the other side */ p_remote_physp = p_physp->p_remote_physp; /* make sure that all of the following occurred: 1. The port isn't NULL 2. This is not the port we came from 3. The port is not in the physp_map 4. This port haven't been visited before */ if (p_remote_physp && p_remote_physp != p_physp && cl_map_get(&physp_map, ptr_to_key(p_remote_physp)) == NULL && cl_map_get(&visited_map, ptr_to_key (p_remote_physp)) == NULL) { /* Insert the port into the visited_map, and save its source port */ cl_map_insert(&visited_map, ptr_to_key (p_remote_physp), p_physp); /* Is this the p_dest_physp? */ if (p_remote_physp == p_dest_physp) { /* update the new dr path */ physp_update_new_dr_path (p_dest_physp, &visited_map, h_bind); reached_dest = TRUE; break; } /* add the p_remote_physp to the nextPortsList */ cl_list_insert_tail(p_nextPortsList, p_remote_physp); next_list_is_full = TRUE; } } p_physp = (osm_physp_t *) cl_list_remove_head(p_currPortsList); if (reached_dest == TRUE) { /* free the rest of the currPortsList */ while (p_physp != NULL) p_physp = (osm_physp_t *) cl_list_remove_head (p_currPortsList); /* free the nextPortsList, if items were added to it */ p_physp = (osm_physp_t *) cl_list_remove_head(p_nextPortsList); while (p_physp != NULL) p_physp = (osm_physp_t *) cl_list_remove_head (p_nextPortsList); next_list_is_full = FALSE; } } cl_list_destroy(p_currPortsList); free(p_currPortsList); } /* cleanup */ Exit: cl_list_destroy(p_nextPortsList); free(p_nextPortsList); cl_map_destroy(&physp_map); cl_map_destroy(&visited_map); } boolean_t osm_link_is_healthy(IN const osm_physp_t * p_physp) { osm_physp_t *p_remote_physp; CL_ASSERT(p_physp); p_remote_physp = p_physp->p_remote_physp; if (p_remote_physp != NULL) return ((p_physp->healthy) & (p_remote_physp->healthy)); /* the other side is not known - consider the link as healthy */ return TRUE; } void osm_physp_set_pkey_tbl(IN osm_log_t * p_log, IN const osm_subn_t * p_subn, IN osm_physp_t * p_physp, IN ib_pkey_table_t * p_pkey_tbl, IN uint16_t block_num, IN boolean_t is_set) { uint16_t max_blocks; CL_ASSERT(p_pkey_tbl); /* (14.2.5.7) - the block number valid values are 0-2047, and are further limited by the size of the P_Key table specified by the PartitionCap on the node. */ if (!p_physp->p_node->sw || p_physp->port_num == 0) /* The maximum blocks is defined in the node info: partition cap for CA, router, and switch management ports. */ max_blocks = (cl_ntoh16(p_physp->p_node->node_info.partition_cap) + IB_NUM_PKEY_ELEMENTS_IN_BLOCK - 1) / IB_NUM_PKEY_ELEMENTS_IN_BLOCK; else /* This is a switch, and not a management port. The maximum blocks is defined in the switch info: partition enforcement cap. */ max_blocks = (cl_ntoh16(p_physp->p_node->sw->switch_info.enforce_cap) + IB_NUM_PKEY_ELEMENTS_IN_BLOCK - 1) / IB_NUM_PKEY_ELEMENTS_IN_BLOCK; if (block_num >= max_blocks) { OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4108: " "Got illegal update for block number:%u max:%u " "for GUID: %" PRIx64 " port number:%u\n", block_num, max_blocks, cl_ntoh64(p_physp->p_node->node_info.node_guid), p_physp->port_num); return; } /* decrement block received counter */ if(!is_set) p_physp->pkeys.rcv_blocks_cnt--; osm_pkey_tbl_set(&p_physp->pkeys, block_num, p_pkey_tbl, p_subn->opt.allow_both_pkeys); } osm_alias_guid_t *osm_alias_guid_new(IN const ib_net64_t alias_guid, IN osm_port_t *p_base_port) { osm_alias_guid_t *p_alias_guid; p_alias_guid = calloc(1, sizeof(*p_alias_guid)); if (p_alias_guid) { p_alias_guid->alias_guid = alias_guid; p_alias_guid->p_base_port = p_base_port; } return p_alias_guid; } void osm_alias_guid_delete(IN OUT osm_alias_guid_t ** pp_alias_guid) { free(*pp_alias_guid); *pp_alias_guid = NULL; } void osm_physp_set_port_info(IN osm_physp_t * p_physp, IN const ib_port_info_t * p_pi, IN const struct osm_sm * p_sm) { CL_ASSERT(p_pi); CL_ASSERT(osm_physp_is_valid(p_physp)); if (ib_port_info_get_port_state(p_pi) == IB_LINK_DOWN) { /* If PortState is down, only copy PortState */ /* and PortPhysicalState per C14-24-2.1 */ ib_port_info_set_port_state(&p_physp->port_info, IB_LINK_DOWN); ib_port_info_set_port_phys_state (ib_port_info_get_port_phys_state(p_pi), &p_physp->port_info); } else { p_physp->port_info = *p_pi; /* The MKey in p_pi can only be considered valid if it's * for a HCA/router or switch port 0, and it's either * non-zero or the MKeyProtect bits are also zero. */ if ((osm_node_get_type(p_physp->p_node) != IB_NODE_TYPE_SWITCH || p_physp->port_num == 0) && (p_pi->m_key != 0 || ib_port_info_get_mpb(p_pi) == 0)) osm_db_guid2mkey_set(p_sm->p_subn->p_g2m, cl_ntoh64(p_physp->port_guid), cl_ntoh64(p_pi->m_key)); } } Index: head/contrib/ofed/opensm/opensm/osm_sa_mad_ctrl.c =================================================================== --- head/contrib/ofed/opensm/opensm/osm_sa_mad_ctrl.c (revision 363219) +++ head/contrib/ofed/opensm/opensm/osm_sa_mad_ctrl.c (revision 363220) @@ -1,622 +1,623 @@ /* * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. * Copyright (c) 2009 HNR Consulting. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ /* * Abstract: * Implementation of osm_sa_mad_ctrl_t. * This object is part of the SA object. */ #if HAVE_CONFIG_H # include #endif /* HAVE_CONFIG_H */ #include #include #include #include #define FILE_ID OSM_FILE_SA_MAD_CTRL_C #include #include #include #include #include #include /****f* opensm: SA/sa_mad_ctrl_disp_done_callback * NAME * sa_mad_ctrl_disp_done_callback * * DESCRIPTION * This function is the Dispatcher callback that indicates * a received MAD has been processed by the recipient. * * SYNOPSIS */ static void sa_mad_ctrl_disp_done_callback(IN void *context, IN void *p_data) { osm_sa_mad_ctrl_t *p_ctrl = context; osm_madw_t *p_madw = p_data; OSM_LOG_ENTER(p_ctrl->p_log); CL_ASSERT(p_madw); /* Return the MAD & wrapper to the pool. */ osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); OSM_LOG_EXIT(p_ctrl->p_log); } /************/ /****f* opensm: SA/sa_mad_ctrl_process * NAME * sa_mad_ctrl_process * * DESCRIPTION * This function handles known methods for received MADs. * * SYNOPSIS */ static void sa_mad_ctrl_process(IN osm_sa_mad_ctrl_t * p_ctrl, IN osm_madw_t * p_madw, IN boolean_t is_get_request) { ib_sa_mad_t *p_sa_mad; cl_disp_reg_handle_t h_disp; cl_status_t status; cl_disp_msgid_t msg_id = CL_DISP_MSGID_NONE; uint64_t last_dispatched_msg_queue_time_msec; uint32_t num_messages; OSM_LOG_ENTER(p_ctrl->p_log); p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); /* If the dispatcher is showing us that it is overloaded there is no point in placing the request in. We should instead provide immediate response - IB_RESOURCE_BUSY But how do we know? The dispatcher reports back the number of outstanding messages and the time the last message stayed in the queue. HACK: Actually, we cannot send a mad from within the receive callback; thus - we will just drop it. */ if (!is_get_request && p_ctrl->p_set_disp) { h_disp = p_ctrl->h_set_disp; goto SKIP_QUEUE_CHECK; } h_disp = p_ctrl->h_disp; cl_disp_get_queue_status(h_disp, &num_messages, &last_dispatched_msg_queue_time_msec); if (num_messages > 1 && p_ctrl->p_subn->opt.max_msg_fifo_timeout && last_dispatched_msg_queue_time_msec > p_ctrl->p_subn->opt.max_msg_fifo_timeout) { OSM_LOG(p_ctrl->p_log, OSM_LOG_INFO, /* "Responding BUSY status since the dispatcher is already" */ "Dropping MAD since the dispatcher is already" " overloaded with %u messages and queue time of:" "%" PRIu64 "[msec]\n", num_messages, last_dispatched_msg_queue_time_msec); /* send a busy response */ /* osm_sa_send_error(p_ctrl->p_resp, p_madw, IB_RESOURCE_BUSY); */ /* return the request to the pool */ osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); goto Exit; } SKIP_QUEUE_CHECK: /* Note that attr_id (like the rest of the MAD) is in network byte order. */ switch (p_sa_mad->attr_id) { case IB_MAD_ATTR_CLASS_PORT_INFO: msg_id = OSM_MSG_MAD_CLASS_PORT_INFO; break; case IB_MAD_ATTR_NODE_RECORD: msg_id = OSM_MSG_MAD_NODE_RECORD; break; case IB_MAD_ATTR_PORTINFO_RECORD: msg_id = OSM_MSG_MAD_PORTINFO_RECORD; break; case IB_MAD_ATTR_LINK_RECORD: msg_id = OSM_MSG_MAD_LINK_RECORD; break; case IB_MAD_ATTR_SMINFO_RECORD: msg_id = OSM_MSG_MAD_SMINFO_RECORD; break; case IB_MAD_ATTR_SERVICE_RECORD: msg_id = OSM_MSG_MAD_SERVICE_RECORD; break; case IB_MAD_ATTR_PATH_RECORD: msg_id = OSM_MSG_MAD_PATH_RECORD; break; case IB_MAD_ATTR_MCMEMBER_RECORD: msg_id = OSM_MSG_MAD_MCMEMBER_RECORD; break; case IB_MAD_ATTR_INFORM_INFO: msg_id = OSM_MSG_MAD_INFORM_INFO; break; case IB_MAD_ATTR_VLARB_RECORD: msg_id = OSM_MSG_MAD_VL_ARB_RECORD; break; case IB_MAD_ATTR_SLVL_RECORD: msg_id = OSM_MSG_MAD_SLVL_TBL_RECORD; break; case IB_MAD_ATTR_PKEY_TBL_RECORD: msg_id = OSM_MSG_MAD_PKEY_TBL_RECORD; break; case IB_MAD_ATTR_LFT_RECORD: msg_id = OSM_MSG_MAD_LFT_RECORD; break; case IB_MAD_ATTR_GUIDINFO_RECORD: msg_id = OSM_MSG_MAD_GUIDINFO_RECORD; break; case IB_MAD_ATTR_INFORM_INFO_RECORD: msg_id = OSM_MSG_MAD_INFORM_INFO_RECORD; break; case IB_MAD_ATTR_SWITCH_INFO_RECORD: msg_id = OSM_MSG_MAD_SWITCH_INFO_RECORD; break; case IB_MAD_ATTR_MFT_RECORD: msg_id = OSM_MSG_MAD_MFT_RECORD; break; #if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) case IB_MAD_ATTR_MULTIPATH_RECORD: msg_id = OSM_MSG_MAD_MULTIPATH_RECORD; break; #endif default: OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A01: " "Unsupported attribute 0x%X (%s)\n", cl_ntoh16(p_sa_mad->attr_id), ib_get_sa_attr_str(p_sa_mad->attr_id)); osm_dump_sa_mad_v2(p_ctrl->p_log, p_sa_mad, FILE_ID, OSM_LOG_ERROR); } if (msg_id != CL_DISP_MSGID_NONE) { /* Post this MAD to the dispatcher for asynchronous processing by the appropriate controller. */ OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "Posting Dispatcher message %s\n", osm_get_disp_msg_str(msg_id)); status = cl_disp_post(h_disp, msg_id, p_madw, sa_mad_ctrl_disp_done_callback, p_ctrl); if (status != CL_SUCCESS) { OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A02: " "Dispatcher post message failed (%s) for attribute 0x%X (%s)\n", CL_STATUS_MSG(status), cl_ntoh16(p_sa_mad->attr_id), ib_get_sa_attr_str(p_sa_mad->attr_id)); osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); goto Exit; } } else { /* There is an unknown MAD attribute type for which there is no recipient. Simply retire the MAD here. */ cl_atomic_inc(&p_ctrl->p_stats->sa_mads_rcvd_unknown); osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); } Exit: OSM_LOG_EXIT(p_ctrl->p_log); } /* * PARAMETERS * * RETURN VALUES * * NOTES * * SEE ALSO *********/ /****f* opensm: SA/sa_mad_ctrl_rcv_callback * NAME * sa_mad_ctrl_rcv_callback * * DESCRIPTION * This is the callback from the transport layer for received MADs. * * SYNOPSIS */ static void sa_mad_ctrl_rcv_callback(IN osm_madw_t * p_madw, IN void *context, IN osm_madw_t * p_req_madw) { osm_sa_mad_ctrl_t *p_ctrl = context; ib_sa_mad_t *p_sa_mad; boolean_t is_get_request = FALSE; OSM_LOG_ENTER(p_ctrl->p_log); CL_ASSERT(p_madw); /* A MAD was received from the wire, possibly in response to a request. */ cl_atomic_inc(&p_ctrl->p_stats->sa_mads_rcvd); OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "%u SA MADs received\n", p_ctrl->p_stats->sa_mads_rcvd); /* * C15-0.1.3 requires not responding to any MAD if the SM is * not in active state! * We will not respond if the sm_state is not MASTER, or if the * first_time_master_sweep flag (of the subnet) is TRUE - this * flag indicates that the master still didn't finish its first * sweep, so the subnet is not up and stable yet. */ if (p_ctrl->p_subn->sm_state != IB_SMINFO_STATE_MASTER) { cl_atomic_inc(&p_ctrl->p_stats->sa_mads_ignored); OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE, "Received SA MAD while SM not MASTER. MAD ignored\n"); osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); goto Exit; } if (p_ctrl->p_subn->first_time_master_sweep == TRUE) { cl_atomic_inc(&p_ctrl->p_stats->sa_mads_ignored); OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE, "Received SA MAD while SM in first sweep. MAD ignored\n"); osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); goto Exit; } p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); if (OSM_LOG_IS_ACTIVE_V2(p_ctrl->p_log, OSM_LOG_FRAMES)) osm_dump_sa_mad_v2(p_ctrl->p_log, p_sa_mad, FILE_ID, OSM_LOG_FRAMES); /* * C15-0.1.5 - Table 185: SA Header - p884 * SM_key should be either 0 or match the current SM_Key * otherwise discard the MAD. */ if (p_sa_mad->sm_key != 0 && p_sa_mad->sm_key != p_ctrl->p_subn->opt.sa_key) { OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A04: " "Non-Zero MAD SM_Key: 0x%" PRIx64 " != SM_Key: 0x%" PRIx64 "; SA MAD ignored for method 0x%X attribute 0x%X (%s)\n", cl_ntoh64(p_sa_mad->sm_key), cl_ntoh64(p_ctrl->p_subn->opt.sa_key), p_sa_mad->method, cl_ntoh16(p_sa_mad->attr_id), ib_get_sa_attr_str(p_sa_mad->attr_id)); osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); goto Exit; } switch (p_sa_mad->method) { case IB_MAD_METHOD_REPORT_RESP: /* we do not really do anything with report responses - just retire the transaction */ OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "Received Report Response. Retiring the transaction\n"); if (p_req_madw) osm_mad_pool_put(p_ctrl->p_mad_pool, p_req_madw); osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); break; case IB_MAD_METHOD_GET: case IB_MAD_METHOD_GETTABLE: #if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) case IB_MAD_METHOD_GETMULTI: #endif is_get_request = TRUE; + /* FALLTHROUGH */ case IB_MAD_METHOD_SET: case IB_MAD_METHOD_DELETE: /* if we are closing down simply do nothing */ if (osm_exit_flag) osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); else sa_mad_ctrl_process(p_ctrl, p_madw, is_get_request); break; default: cl_atomic_inc(&p_ctrl->p_stats->sa_mads_rcvd_unknown); OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A05: " "Unsupported method = 0x%X\n", p_sa_mad->method); osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); goto Exit; } Exit: OSM_LOG_EXIT(p_ctrl->p_log); } /* * PARAMETERS * * RETURN VALUES * * NOTES * * SEE ALSO *********/ /****f* opensm: SA/sa_mad_ctrl_send_err_callback * NAME * sa_mad_ctrl_send_err_callback * * DESCRIPTION * This is the callback from the transport layer for send errors * on MADs that were expecting a response. * * SYNOPSIS */ static void sa_mad_ctrl_send_err_callback(IN void *context, IN osm_madw_t * p_madw) { osm_sa_mad_ctrl_t *p_ctrl = context; cl_status_t status; OSM_LOG_ENTER(p_ctrl->p_log); /* We should never be here since the SA never originates a request. Unless we generated a Report(Notice) */ CL_ASSERT(p_madw); OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A06: " "MAD completed in error (%s): " "%s(%s), attr_mod 0x%x, LID %u, TID 0x%" PRIx64 "\n", ib_get_err_str(p_madw->status), ib_get_sa_method_str(p_madw->p_mad->method), ib_get_sa_attr_str(p_madw->p_mad->attr_id), cl_ntoh32(p_madw->p_mad->attr_mod), cl_ntoh16(p_madw->mad_addr.dest_lid), cl_ntoh64(p_madw->p_mad->trans_id)); osm_dump_sa_mad_v2(p_ctrl->p_log, osm_madw_get_sa_mad_ptr(p_madw), FILE_ID, OSM_LOG_ERROR); /* An error occurred. No response was received to a request MAD. Retire the original request MAD. */ if (osm_madw_get_err_msg(p_madw) != CL_DISP_MSGID_NONE) { OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "Posting Dispatcher message %s\n", osm_get_disp_msg_str(osm_madw_get_err_msg(p_madw))); if (p_ctrl->p_set_disp && (p_madw->p_mad->method == IB_MAD_METHOD_SET || p_madw->p_mad->method == IB_MAD_METHOD_DELETE)) status = cl_disp_post(p_ctrl->h_set_disp, osm_madw_get_err_msg(p_madw), p_madw, sa_mad_ctrl_disp_done_callback, p_ctrl); else status = cl_disp_post(p_ctrl->h_disp, osm_madw_get_err_msg(p_madw), p_madw, sa_mad_ctrl_disp_done_callback, p_ctrl); if (status != CL_SUCCESS) { OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A07: " "Dispatcher post message failed (%s)\n", CL_STATUS_MSG(status)); } } else /* No error message was provided, just retire the MAD. */ osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); OSM_LOG_EXIT(p_ctrl->p_log); } /* * PARAMETERS * * RETURN VALUES * * NOTES * * SEE ALSO *********/ void osm_sa_mad_ctrl_construct(IN osm_sa_mad_ctrl_t * p_ctrl) { CL_ASSERT(p_ctrl); memset(p_ctrl, 0, sizeof(*p_ctrl)); p_ctrl->h_disp = CL_DISP_INVALID_HANDLE; p_ctrl->h_set_disp = CL_DISP_INVALID_HANDLE; } void osm_sa_mad_ctrl_destroy(IN osm_sa_mad_ctrl_t * p_ctrl) { CL_ASSERT(p_ctrl); cl_disp_unregister(p_ctrl->h_disp); cl_disp_unregister(p_ctrl->h_set_disp); } ib_api_status_t osm_sa_mad_ctrl_init(IN osm_sa_mad_ctrl_t * p_ctrl, IN osm_sa_t * sa, IN osm_mad_pool_t * p_mad_pool, IN osm_vendor_t * p_vendor, IN osm_subn_t * p_subn, IN osm_log_t * p_log, IN osm_stats_t * p_stats, IN cl_dispatcher_t * p_disp, IN cl_dispatcher_t * p_set_disp) { ib_api_status_t status = IB_SUCCESS; OSM_LOG_ENTER(p_log); osm_sa_mad_ctrl_construct(p_ctrl); p_ctrl->sa = sa; p_ctrl->p_log = p_log; p_ctrl->p_disp = p_disp; p_ctrl->p_set_disp = p_set_disp; p_ctrl->p_mad_pool = p_mad_pool; p_ctrl->p_vendor = p_vendor; p_ctrl->p_stats = p_stats; p_ctrl->p_subn = p_subn; p_ctrl->h_disp = cl_disp_register(p_disp, CL_DISP_MSGID_NONE, NULL, p_ctrl); if (p_ctrl->h_disp == CL_DISP_INVALID_HANDLE) { OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 1A08: " "Dispatcher registration failed\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; } if (p_set_disp) { p_ctrl->h_set_disp = cl_disp_register(p_set_disp, CL_DISP_MSGID_NONE, NULL, p_ctrl); if (p_ctrl->h_set_disp == CL_DISP_INVALID_HANDLE) { OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 1A0A: " "SA set dispatcher registration failed\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; } } Exit: OSM_LOG_EXIT(p_log); return status; } ib_api_status_t osm_sa_mad_ctrl_bind(IN osm_sa_mad_ctrl_t * p_ctrl, IN ib_net64_t port_guid) { osm_bind_info_t bind_info; ib_api_status_t status = IB_SUCCESS; OSM_LOG_ENTER(p_ctrl->p_log); if (p_ctrl->h_bind != OSM_BIND_INVALID_HANDLE) { OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A09: " "Multiple binds not allowed\n"); status = IB_ERROR; goto Exit; } bind_info.class_version = 2; bind_info.is_responder = TRUE; bind_info.is_report_processor = FALSE; bind_info.is_trap_processor = FALSE; bind_info.mad_class = IB_MCLASS_SUBN_ADM; bind_info.port_guid = port_guid; bind_info.recv_q_size = OSM_SM_DEFAULT_QP1_RCV_SIZE; bind_info.send_q_size = OSM_SM_DEFAULT_QP1_SEND_SIZE; bind_info.timeout = p_ctrl->sa->p_subn->opt.transaction_timeout; bind_info.retries = p_ctrl->sa->p_subn->opt.transaction_retries; OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE, "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); p_ctrl->h_bind = osm_vendor_bind(p_ctrl->p_vendor, &bind_info, p_ctrl->p_mad_pool, sa_mad_ctrl_rcv_callback, sa_mad_ctrl_send_err_callback, p_ctrl); if (p_ctrl->h_bind == OSM_BIND_INVALID_HANDLE) { status = IB_ERROR; OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A10: " "Vendor specific bind failed (%s)\n", ib_get_err_str(status)); goto Exit; } Exit: OSM_LOG_EXIT(p_ctrl->p_log); return status; } ib_api_status_t osm_sa_mad_ctrl_unbind(IN osm_sa_mad_ctrl_t * p_ctrl) { ib_api_status_t status = IB_SUCCESS; OSM_LOG_ENTER(p_ctrl->p_log); if (p_ctrl->h_bind == OSM_BIND_INVALID_HANDLE) { OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A11: " "No previous bind\n"); status = IB_ERROR; goto Exit; } osm_vendor_unbind(p_ctrl->h_bind); Exit: OSM_LOG_EXIT(p_ctrl->p_log); return status; }